File: | gromacs/mdlib/minimize.c |
Location: | line 1891, column 9 |
Description: | Value stored to 'foundlower' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 1991-2000, University of Groningen, The Netherlands. |
5 | * Copyright (c) 2001-2004, The GROMACS development team. |
6 | * Copyright (c) 2013,2014, by the GROMACS development team, led by |
7 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
8 | * and including many others, as listed in the AUTHORS file in the |
9 | * top-level source directory and at http://www.gromacs.org. |
10 | * |
11 | * GROMACS is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU Lesser General Public License |
13 | * as published by the Free Software Foundation; either version 2.1 |
14 | * of the License, or (at your option) any later version. |
15 | * |
16 | * GROMACS is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * Lesser General Public License for more details. |
20 | * |
21 | * You should have received a copy of the GNU Lesser General Public |
22 | * License along with GROMACS; if not, see |
23 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
24 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
25 | * |
26 | * If you want to redistribute modifications to GROMACS, please |
27 | * consider that scientific software is very special. Version |
28 | * control is crucial - bugs must be traceable. We will be happy to |
29 | * consider code for inclusion in the official distribution, but |
30 | * derived work must not be called official GROMACS. Details are found |
31 | * in the README & COPYING files - if they are missing, get the |
32 | * official version at http://www.gromacs.org. |
33 | * |
34 | * To help us fund GROMACS development, we humbly ask that you cite |
35 | * the research papers on the package. Check out http://www.gromacs.org. |
36 | */ |
37 | #ifdef HAVE_CONFIG_H1 |
38 | #include <config.h> |
39 | #endif |
40 | |
41 | #include <string.h> |
42 | #include <time.h> |
43 | #include <math.h> |
44 | #include "gromacs/utility/cstringutil.h" |
45 | #include "network.h" |
46 | #include "gromacs/utility/smalloc.h" |
47 | #include "nrnb.h" |
48 | #include "force.h" |
49 | #include "macros.h" |
50 | #include "names.h" |
51 | #include "gromacs/utility/fatalerror.h" |
52 | #include "txtdump.h" |
53 | #include "typedefs.h" |
54 | #include "update.h" |
55 | #include "constr.h" |
56 | #include "gromacs/math/vec.h" |
57 | #include "tgroup.h" |
58 | #include "mdebin.h" |
59 | #include "vsite.h" |
60 | #include "force.h" |
61 | #include "mdrun.h" |
62 | #include "md_support.h" |
63 | #include "sim_util.h" |
64 | #include "domdec.h" |
65 | #include "mdatoms.h" |
66 | #include "ns.h" |
67 | #include "mtop_util.h" |
68 | #include "pme.h" |
69 | #include "bondf.h" |
70 | #include "gmx_omp_nthreads.h" |
71 | #include "md_logging.h" |
72 | |
73 | #include "gromacs/fileio/confio.h" |
74 | #include "gromacs/fileio/trajectory_writing.h" |
75 | #include "gromacs/linearalgebra/mtxio.h" |
76 | #include "gromacs/linearalgebra/sparsematrix.h" |
77 | #include "gromacs/timing/wallcycle.h" |
78 | #include "gromacs/timing/walltime_accounting.h" |
79 | #include "gromacs/imd/imd.h" |
80 | |
81 | typedef struct { |
82 | t_state s; |
83 | rvec *f; |
84 | real epot; |
85 | real fnorm; |
86 | real fmax; |
87 | int a_fmax; |
88 | } em_state_t; |
89 | |
90 | static em_state_t *init_em_state() |
91 | { |
92 | em_state_t *ems; |
93 | |
94 | snew(ems, 1)(ems) = save_calloc("ems", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 94, (1), sizeof(*(ems))); |
95 | |
96 | /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ |
97 | snew(ems->s.lambda, efptNR)(ems->s.lambda) = save_calloc("ems->s.lambda", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 97, (efptNR), sizeof(*(ems->s.lambda))); |
98 | |
99 | return ems; |
100 | } |
101 | |
102 | static void print_em_start(FILE *fplog, |
103 | t_commrec *cr, |
104 | gmx_walltime_accounting_t walltime_accounting, |
105 | gmx_wallcycle_t wcycle, |
106 | const char *name) |
107 | { |
108 | walltime_accounting_start(walltime_accounting); |
109 | wallcycle_start(wcycle, ewcRUN); |
110 | print_start(fplog, cr, walltime_accounting, name); |
111 | } |
112 | static void em_time_end(gmx_walltime_accounting_t walltime_accounting, |
113 | gmx_wallcycle_t wcycle) |
114 | { |
115 | wallcycle_stop(wcycle, ewcRUN); |
116 | |
117 | walltime_accounting_end(walltime_accounting); |
118 | } |
119 | |
120 | static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) |
121 | { |
122 | fprintf(out, "\n"); |
123 | fprintf(out, "%s:\n", minimizer); |
124 | fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); |
125 | fprintf(out, " Number of steps = %12d\n", nsteps); |
126 | } |
127 | |
128 | static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) |
129 | { |
130 | char buffer[2048]; |
131 | if (bLastStep) |
132 | { |
133 | sprintf(buffer, |
134 | "\nEnergy minimization reached the maximum number " |
135 | "of steps before the forces reached the requested " |
136 | "precision Fmax < %g.\n", ftol); |
137 | } |
138 | else |
139 | { |
140 | sprintf(buffer, |
141 | "\nEnergy minimization has stopped, but the forces have " |
142 | "not converged to the requested precision Fmax < %g (which " |
143 | "may not be possible for your system). It stopped " |
144 | "because the algorithm tried to make a new step whose size " |
145 | "was too small, or there was no change in the energy since " |
146 | "last step. Either way, we regard the minimization as " |
147 | "converged to within the available machine precision, " |
148 | "given your starting configuration and EM parameters.\n%s%s", |
149 | ftol, |
150 | sizeof(real) < sizeof(double) ? |
151 | "\nDouble precision normally gives you higher accuracy, but " |
152 | "this is often not needed for preparing to run molecular " |
153 | "dynamics.\n" : |
154 | "", |
155 | bConstrain ? |
156 | "You might need to increase your constraint accuracy, or turn\n" |
157 | "off constraints altogether (set constraints = none in mdp file)\n" : |
158 | ""); |
159 | } |
160 | fputs(wrap_lines(buffer, 78, 0, FALSE0), fp); |
161 | } |
162 | |
163 | |
164 | |
165 | static void print_converged(FILE *fp, const char *alg, real ftol, |
166 | gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps, |
167 | real epot, real fmax, int nfmax, real fnorm) |
168 | { |
169 | char buf[STEPSTRSIZE22]; |
170 | |
171 | if (bDone) |
172 | { |
173 | fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", |
174 | alg, ftol, gmx_step_str(count, buf)); |
175 | } |
176 | else if (count < nsteps) |
177 | { |
178 | fprintf(fp, "\n%s converged to machine precision in %s steps,\n" |
179 | "but did not reach the requested Fmax < %g.\n", |
180 | alg, gmx_step_str(count, buf), ftol); |
181 | } |
182 | else |
183 | { |
184 | fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", |
185 | alg, ftol, gmx_step_str(count, buf)); |
186 | } |
187 | |
188 | #ifdef GMX_DOUBLE |
189 | fprintf(fp, "Potential Energy = %21.14e\n", epot); |
190 | fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); |
191 | fprintf(fp, "Norm of force = %21.14e\n", fnorm); |
192 | #else |
193 | fprintf(fp, "Potential Energy = %14.7e\n", epot); |
194 | fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); |
195 | fprintf(fp, "Norm of force = %14.7e\n", fnorm); |
196 | #endif |
197 | } |
198 | |
199 | static void get_f_norm_max(t_commrec *cr, |
200 | t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, |
201 | real *fnorm, real *fmax, int *a_fmax) |
202 | { |
203 | double fnorm2, *sum; |
204 | real fmax2, fmax2_0, fam; |
205 | int la_max, a_max, start, end, i, m, gf; |
206 | |
207 | /* This routine finds the largest force and returns it. |
208 | * On parallel machines the global max is taken. |
209 | */ |
210 | fnorm2 = 0; |
211 | fmax2 = 0; |
212 | la_max = -1; |
213 | gf = 0; |
214 | start = 0; |
215 | end = mdatoms->homenr; |
216 | if (mdatoms->cFREEZE) |
217 | { |
218 | for (i = start; i < end; i++) |
219 | { |
220 | gf = mdatoms->cFREEZE[i]; |
221 | fam = 0; |
222 | for (m = 0; m < DIM3; m++) |
223 | { |
224 | if (!opts->nFreeze[gf][m]) |
225 | { |
226 | fam += sqr(f[i][m]); |
227 | } |
228 | } |
229 | fnorm2 += fam; |
230 | if (fam > fmax2) |
231 | { |
232 | fmax2 = fam; |
233 | la_max = i; |
234 | } |
235 | } |
236 | } |
237 | else |
238 | { |
239 | for (i = start; i < end; i++) |
240 | { |
241 | fam = norm2(f[i]); |
242 | fnorm2 += fam; |
243 | if (fam > fmax2) |
244 | { |
245 | fmax2 = fam; |
246 | la_max = i; |
247 | } |
248 | } |
249 | } |
250 | |
251 | if (la_max >= 0 && DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
252 | { |
253 | a_max = cr->dd->gatindex[la_max]; |
254 | } |
255 | else |
256 | { |
257 | a_max = la_max; |
258 | } |
259 | if (PAR(cr)((cr)->nnodes > 1)) |
260 | { |
261 | snew(sum, 2*cr->nnodes+1)(sum) = save_calloc("sum", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 261, (2*cr->nnodes+1), sizeof(*(sum))); |
262 | sum[2*cr->nodeid] = fmax2; |
263 | sum[2*cr->nodeid+1] = a_max; |
264 | sum[2*cr->nnodes] = fnorm2; |
265 | gmx_sumd(2*cr->nnodes+1, sum, cr); |
266 | fnorm2 = sum[2*cr->nnodes]; |
267 | /* Determine the global maximum */ |
268 | for (i = 0; i < cr->nnodes; i++) |
269 | { |
270 | if (sum[2*i] > fmax2) |
271 | { |
272 | fmax2 = sum[2*i]; |
273 | a_max = (int)(sum[2*i+1] + 0.5); |
274 | } |
275 | } |
276 | sfree(sum)save_free("sum", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 276, (sum)); |
277 | } |
278 | |
279 | if (fnorm) |
280 | { |
281 | *fnorm = sqrt(fnorm2); |
282 | } |
283 | if (fmax) |
284 | { |
285 | *fmax = sqrt(fmax2); |
286 | } |
287 | if (a_fmax) |
288 | { |
289 | *a_fmax = a_max; |
290 | } |
291 | } |
292 | |
293 | static void get_state_f_norm_max(t_commrec *cr, |
294 | t_grpopts *opts, t_mdatoms *mdatoms, |
295 | em_state_t *ems) |
296 | { |
297 | get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); |
298 | } |
299 | |
300 | void init_em(FILE *fplog, const char *title, |
301 | t_commrec *cr, t_inputrec *ir, |
302 | t_state *state_global, gmx_mtop_t *top_global, |
303 | em_state_t *ems, gmx_localtop_t **top, |
304 | rvec **f, rvec **f_global, |
305 | t_nrnb *nrnb, rvec mu_tot, |
306 | t_forcerec *fr, gmx_enerdata_t **enerd, |
307 | t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, |
308 | gmx_vsite_t *vsite, gmx_constr_t constr, |
309 | int nfile, const t_filenm fnm[], |
310 | gmx_mdoutf_t *outf, t_mdebin **mdebin, |
311 | int imdport, unsigned long gmx_unused__attribute__ ((unused)) Flags) |
312 | { |
313 | int i; |
314 | real dvdl_constr; |
315 | |
316 | if (fplog) |
317 | { |
318 | fprintf(fplog, "Initiating %s\n", title); |
319 | } |
320 | |
321 | state_global->ngtc = 0; |
322 | |
323 | /* Initialize lambda variables */ |
324 | initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL((void*)0)); |
325 | |
326 | init_nrnb(nrnb); |
327 | |
328 | /* Interactive molecular dynamics */ |
329 | init_IMD(ir, cr, top_global, fplog, 1, state_global->x, |
330 | nfile, fnm, NULL((void*)0), imdport, Flags); |
331 | |
332 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
333 | { |
334 | *top = dd_init_local_top(top_global); |
335 | |
336 | dd_init_local_state(cr->dd, state_global, &ems->s); |
337 | |
338 | *f = NULL((void*)0); |
339 | |
340 | /* Distribute the charge groups over the nodes from the master node */ |
341 | dd_partition_system(fplog, ir->init_step, cr, TRUE1, 1, |
342 | state_global, top_global, ir, |
343 | &ems->s, &ems->f, mdatoms, *top, |
344 | fr, vsite, NULL((void*)0), constr, |
345 | nrnb, NULL((void*)0), FALSE0); |
346 | dd_store_state(cr->dd, &ems->s); |
347 | |
348 | if (ir->nstfout) |
349 | { |
350 | snew(*f_global, top_global->natoms)(*f_global) = save_calloc("*f_global", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 350, (top_global->natoms), sizeof(*(*f_global))); |
351 | } |
352 | else |
353 | { |
354 | *f_global = NULL((void*)0); |
355 | } |
356 | *graph = NULL((void*)0); |
357 | } |
358 | else |
359 | { |
360 | snew(*f, top_global->natoms)(*f) = save_calloc("*f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 360, (top_global->natoms), sizeof(*(*f))); |
361 | |
362 | /* Just copy the state */ |
363 | ems->s = *state_global; |
364 | snew(ems->s.x, ems->s.nalloc)(ems->s.x) = save_calloc("ems->s.x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 364, (ems->s.nalloc), sizeof(*(ems->s.x))); |
365 | snew(ems->f, ems->s.nalloc)(ems->f) = save_calloc("ems->f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 365, (ems->s.nalloc), sizeof(*(ems->f))); |
366 | for (i = 0; i < state_global->natoms; i++) |
367 | { |
368 | copy_rvec(state_global->x[i], ems->s.x[i]); |
369 | } |
370 | copy_mat(state_global->box, ems->s.box); |
371 | |
372 | *top = gmx_mtop_generate_local_top(top_global, ir); |
373 | *f_global = *f; |
374 | |
375 | forcerec_set_excl_load(fr, *top); |
376 | |
377 | setup_bonded_threading(fr, &(*top)->idef); |
378 | |
379 | if (ir->ePBC != epbcNONE && !fr->bMolPBC) |
380 | { |
381 | *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE0, FALSE0); |
382 | } |
383 | else |
384 | { |
385 | *graph = NULL((void*)0); |
386 | } |
387 | |
388 | atoms2md(top_global, ir, 0, NULL((void*)0), top_global->natoms, mdatoms); |
389 | update_mdatoms(mdatoms, state_global->lambda[efptFEP]); |
390 | |
391 | if (vsite) |
392 | { |
393 | set_vsite_top(vsite, *top, mdatoms, cr); |
394 | } |
395 | } |
396 | |
397 | if (constr) |
398 | { |
399 | if (ir->eConstrAlg == econtSHAKE && |
400 | gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) |
401 | { |
402 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 402, "Can not do energy minimization with %s, use %s\n", |
403 | econstr_names[econtSHAKE], econstr_names[econtLINCS]); |
404 | } |
405 | |
406 | if (!DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
407 | { |
408 | set_constraints(constr, *top, ir, mdatoms, cr); |
409 | } |
410 | |
411 | if (!ir->bContinuation) |
412 | { |
413 | /* Constrain the starting coordinates */ |
414 | dvdl_constr = 0; |
415 | constrain(PAR(cr)((cr)->nnodes > 1) ? NULL((void*)0) : fplog, TRUE1, TRUE1, constr, &(*top)->idef, |
416 | ir, NULL((void*)0), cr, -1, 0, mdatoms, |
417 | ems->s.x, ems->s.x, NULL((void*)0), fr->bMolPBC, ems->s.box, |
418 | ems->s.lambda[efptFEP], &dvdl_constr, |
419 | NULL((void*)0), NULL((void*)0), nrnb, econqCoord, FALSE0, 0, 0); |
420 | } |
421 | } |
422 | |
423 | if (PAR(cr)((cr)->nnodes > 1)) |
424 | { |
425 | *gstat = global_stat_init(ir); |
426 | } |
427 | |
428 | *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL((void*)0)); |
429 | |
430 | snew(*enerd, 1)(*enerd) = save_calloc("*enerd", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 430, (1), sizeof(*(*enerd))); |
431 | init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, |
432 | *enerd); |
433 | |
434 | if (mdebin != NULL((void*)0)) |
435 | { |
436 | /* Init bin for energy stuff */ |
437 | *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL((void*)0)); |
438 | } |
439 | |
440 | clear_rvec(mu_tot); |
441 | calc_shifts(ems->s.box, fr->shift_vec); |
442 | } |
443 | |
444 | static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, |
445 | gmx_walltime_accounting_t walltime_accounting, |
446 | gmx_wallcycle_t wcycle) |
447 | { |
448 | if (!(cr->duty & DUTY_PME(1<<1))) |
449 | { |
450 | /* Tell the PME only node to finish */ |
451 | gmx_pme_send_finish(cr); |
452 | } |
453 | |
454 | done_mdoutf(outf); |
455 | |
456 | em_time_end(walltime_accounting, wcycle); |
457 | } |
458 | |
459 | static void swap_em_state(em_state_t *ems1, em_state_t *ems2) |
460 | { |
461 | em_state_t tmp; |
462 | |
463 | tmp = *ems1; |
464 | *ems1 = *ems2; |
465 | *ems2 = tmp; |
466 | } |
467 | |
468 | static void copy_em_coords(em_state_t *ems, t_state *state) |
469 | { |
470 | int i; |
471 | |
472 | for (i = 0; (i < state->natoms); i++) |
473 | { |
474 | copy_rvec(ems->s.x[i], state->x[i]); |
475 | } |
476 | } |
477 | |
478 | static void write_em_traj(FILE *fplog, t_commrec *cr, |
479 | gmx_mdoutf_t outf, |
480 | gmx_bool bX, gmx_bool bF, const char *confout, |
481 | gmx_mtop_t *top_global, |
482 | t_inputrec *ir, gmx_int64_t step, |
483 | em_state_t *state, |
484 | t_state *state_global, rvec *f_global) |
485 | { |
486 | int mdof_flags; |
487 | gmx_bool bIMDout = FALSE0; |
488 | |
489 | |
490 | /* Shall we do IMD output? */ |
491 | if (ir->bIMD) |
492 | { |
493 | bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup)); |
494 | } |
495 | |
496 | if ((bX || bF || bIMDout || confout != NULL((void*)0)) && !DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
497 | { |
498 | copy_em_coords(state, state_global); |
499 | f_global = state->f; |
500 | } |
501 | |
502 | mdof_flags = 0; |
503 | if (bX) |
504 | { |
505 | mdof_flags |= MDOF_X(1<<0); |
506 | } |
507 | if (bF) |
508 | { |
509 | mdof_flags |= MDOF_F(1<<2); |
510 | } |
511 | |
512 | /* If we want IMD output, set appropriate MDOF flag */ |
513 | if (ir->bIMD) |
514 | { |
515 | mdof_flags |= MDOF_IMD(1<<5); |
516 | } |
517 | |
518 | mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, |
519 | top_global, step, (double)step, |
520 | &state->s, state_global, state->f, f_global); |
521 | |
522 | if (confout != NULL((void*)0) && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
523 | { |
524 | if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
525 | { |
526 | /* Make molecules whole only for confout writing */ |
527 | do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, |
528 | state_global->x); |
529 | } |
530 | |
531 | write_sto_conf_mtop(confout, |
532 | *top_global->name, top_global, |
533 | state_global->x, NULL((void*)0), ir->ePBC, state_global->box); |
534 | } |
535 | } |
536 | |
537 | static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, |
538 | gmx_bool bMolPBC, |
539 | em_state_t *ems1, real a, rvec *f, em_state_t *ems2, |
540 | gmx_constr_t constr, gmx_localtop_t *top, |
541 | t_nrnb *nrnb, gmx_wallcycle_t wcycle, |
542 | gmx_int64_t count) |
543 | |
544 | { |
545 | t_state *s1, *s2; |
546 | int i; |
547 | int start, end; |
548 | rvec *x1, *x2; |
549 | real dvdl_constr; |
550 | |
551 | s1 = &ems1->s; |
552 | s2 = &ems2->s; |
553 | |
554 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && s1->ddp_count != cr->dd->ddp_count) |
555 | { |
556 | gmx_incons("state mismatch in do_em_step")_gmx_error("incons", "state mismatch in do_em_step", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 556); |
557 | } |
558 | |
559 | s2->flags = s1->flags; |
560 | |
561 | if (s2->nalloc != s1->nalloc) |
562 | { |
563 | s2->nalloc = s1->nalloc; |
564 | srenew(s2->x, s1->nalloc)(s2->x) = save_realloc("s2->x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 564, (s2->x), (s1->nalloc), sizeof(*(s2->x))); |
565 | srenew(ems2->f, s1->nalloc)(ems2->f) = save_realloc("ems2->f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 565, (ems2->f), (s1->nalloc), sizeof(*(ems2->f))); |
566 | if (s2->flags & (1<<estCGP)) |
567 | { |
568 | srenew(s2->cg_p, s1->nalloc)(s2->cg_p) = save_realloc("s2->cg_p", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 568, (s2->cg_p), (s1->nalloc), sizeof(*(s2->cg_p)) ); |
569 | } |
570 | } |
571 | |
572 | s2->natoms = s1->natoms; |
573 | copy_mat(s1->box, s2->box); |
574 | /* Copy free energy state */ |
575 | for (i = 0; i < efptNR; i++) |
576 | { |
577 | s2->lambda[i] = s1->lambda[i]; |
578 | } |
579 | copy_mat(s1->box, s2->box); |
580 | |
581 | start = 0; |
582 | end = md->homenr; |
583 | |
584 | x1 = s1->x; |
585 | x2 = s2->x; |
586 | |
587 | #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate)) |
588 | { |
589 | int gf, i, m; |
590 | |
591 | gf = 0; |
592 | #pragma omp for schedule(static) nowait |
593 | for (i = start; i < end; i++) |
594 | { |
595 | if (md->cFREEZE) |
596 | { |
597 | gf = md->cFREEZE[i]; |
598 | } |
599 | for (m = 0; m < DIM3; m++) |
600 | { |
601 | if (ir->opts.nFreeze[gf][m]) |
602 | { |
603 | x2[i][m] = x1[i][m]; |
604 | } |
605 | else |
606 | { |
607 | x2[i][m] = x1[i][m] + a*f[i][m]; |
608 | } |
609 | } |
610 | } |
611 | |
612 | if (s2->flags & (1<<estCGP)) |
613 | { |
614 | /* Copy the CG p vector */ |
615 | x1 = s1->cg_p; |
616 | x2 = s2->cg_p; |
617 | #pragma omp for schedule(static) nowait |
618 | for (i = start; i < end; i++) |
619 | { |
620 | copy_rvec(x1[i], x2[i]); |
621 | } |
622 | } |
623 | |
624 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
625 | { |
626 | s2->ddp_count = s1->ddp_count; |
627 | if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) |
628 | { |
629 | #pragma omp barrier |
630 | s2->cg_gl_nalloc = s1->cg_gl_nalloc; |
631 | srenew(s2->cg_gl, s2->cg_gl_nalloc)(s2->cg_gl) = save_realloc("s2->cg_gl", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 631, (s2->cg_gl), (s2->cg_gl_nalloc), sizeof(*(s2-> cg_gl))); |
632 | #pragma omp barrier |
633 | } |
634 | s2->ncg_gl = s1->ncg_gl; |
635 | #pragma omp for schedule(static) nowait |
636 | for (i = 0; i < s2->ncg_gl; i++) |
637 | { |
638 | s2->cg_gl[i] = s1->cg_gl[i]; |
639 | } |
640 | s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; |
641 | } |
642 | } |
643 | |
644 | if (constr) |
645 | { |
646 | wallcycle_start(wcycle, ewcCONSTR); |
647 | dvdl_constr = 0; |
648 | constrain(NULL((void*)0), TRUE1, TRUE1, constr, &top->idef, |
649 | ir, NULL((void*)0), cr, count, 0, md, |
650 | s1->x, s2->x, NULL((void*)0), bMolPBC, s2->box, |
651 | s2->lambda[efptBONDED], &dvdl_constr, |
652 | NULL((void*)0), NULL((void*)0), nrnb, econqCoord, FALSE0, 0, 0); |
653 | wallcycle_stop(wcycle, ewcCONSTR); |
654 | } |
655 | } |
656 | |
657 | static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, |
658 | gmx_mtop_t *top_global, t_inputrec *ir, |
659 | em_state_t *ems, gmx_localtop_t *top, |
660 | t_mdatoms *mdatoms, t_forcerec *fr, |
661 | gmx_vsite_t *vsite, gmx_constr_t constr, |
662 | t_nrnb *nrnb, gmx_wallcycle_t wcycle) |
663 | { |
664 | /* Repartition the domain decomposition */ |
665 | wallcycle_start(wcycle, ewcDOMDEC); |
666 | dd_partition_system(fplog, step, cr, FALSE0, 1, |
667 | NULL((void*)0), top_global, ir, |
668 | &ems->s, &ems->f, |
669 | mdatoms, top, fr, vsite, NULL((void*)0), constr, |
670 | nrnb, wcycle, FALSE0); |
671 | dd_store_state(cr->dd, &ems->s); |
672 | wallcycle_stop(wcycle, ewcDOMDEC); |
673 | } |
674 | |
675 | static void evaluate_energy(FILE *fplog, t_commrec *cr, |
676 | gmx_mtop_t *top_global, |
677 | em_state_t *ems, gmx_localtop_t *top, |
678 | t_inputrec *inputrec, |
679 | t_nrnb *nrnb, gmx_wallcycle_t wcycle, |
680 | gmx_global_stat_t gstat, |
681 | gmx_vsite_t *vsite, gmx_constr_t constr, |
682 | t_fcdata *fcd, |
683 | t_graph *graph, t_mdatoms *mdatoms, |
684 | t_forcerec *fr, rvec mu_tot, |
685 | gmx_enerdata_t *enerd, tensor vir, tensor pres, |
686 | gmx_int64_t count, gmx_bool bFirst) |
687 | { |
688 | real t; |
689 | gmx_bool bNS; |
690 | int nabnsb; |
691 | tensor force_vir, shake_vir, ekin; |
692 | real dvdl_constr, prescorr, enercorr, dvdlcorr; |
693 | real terminate = 0; |
694 | |
695 | /* Set the time to the initial time, the time does not change during EM */ |
696 | t = inputrec->init_t; |
697 | |
698 | if (bFirst || |
699 | (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && ems->s.ddp_count < cr->dd->ddp_count)) |
700 | { |
701 | /* This is the first state or an old state used before the last ns */ |
702 | bNS = TRUE1; |
703 | } |
704 | else |
705 | { |
706 | bNS = FALSE0; |
707 | if (inputrec->nstlist > 0) |
708 | { |
709 | bNS = TRUE1; |
710 | } |
711 | else if (inputrec->nstlist == -1) |
712 | { |
713 | nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL((void*)0), ems->s.x); |
714 | if (PAR(cr)((cr)->nnodes > 1)) |
715 | { |
716 | gmx_sumi(1, &nabnsb, cr); |
717 | } |
718 | bNS = (nabnsb > 0); |
719 | } |
720 | } |
721 | |
722 | if (vsite) |
723 | { |
724 | construct_vsites(vsite, ems->s.x, 1, NULL((void*)0), |
725 | top->idef.iparams, top->idef.il, |
726 | fr->ePBC, fr->bMolPBC, cr, ems->s.box); |
727 | } |
728 | |
729 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && bNS) |
730 | { |
731 | /* Repartition the domain decomposition */ |
732 | em_dd_partition_system(fplog, count, cr, top_global, inputrec, |
733 | ems, top, mdatoms, fr, vsite, constr, |
734 | nrnb, wcycle); |
735 | } |
736 | |
737 | /* Calc force & energy on new trial position */ |
738 | /* do_force always puts the charge groups in the box and shifts again |
739 | * We do not unshift, so molecules are always whole in congrad.c |
740 | */ |
741 | do_force(fplog, cr, inputrec, |
742 | count, nrnb, wcycle, top, &top_global->groups, |
743 | ems->s.box, ems->s.x, &ems->s.hist, |
744 | ems->f, force_vir, mdatoms, enerd, fcd, |
745 | ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL((void*)0), NULL((void*)0), TRUE1, |
746 | GMX_FORCE_STATECHANGED(1<<0) | GMX_FORCE_ALLFORCES((1<<4) | (1<<6) | (1<<7)) | |
747 | GMX_FORCE_VIRIAL(1<<8) | GMX_FORCE_ENERGY(1<<9) | |
748 | (bNS ? GMX_FORCE_NS(1<<2) | GMX_FORCE_DO_LR(1<<11) : 0)); |
749 | |
750 | /* Clear the unused shake virial and pressure */ |
751 | clear_mat(shake_vir); |
752 | clear_mat(pres); |
753 | |
754 | /* Communicate stuff when parallel */ |
755 | if (PAR(cr)((cr)->nnodes > 1) && inputrec->eI != eiNM) |
756 | { |
757 | wallcycle_start(wcycle, ewcMoveE); |
758 | |
759 | global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, |
760 | inputrec, NULL((void*)0), NULL((void*)0), NULL((void*)0), 1, &terminate, |
761 | top_global, &ems->s, FALSE0, |
762 | CGLO_ENERGY(1<<6) | |
763 | CGLO_PRESSURE(1<<8) | |
764 | CGLO_CONSTRAINT(1<<9) | |
765 | CGLO_FIRSTITERATE(1<<11)); |
766 | |
767 | wallcycle_stop(wcycle, ewcMoveE); |
768 | } |
769 | |
770 | /* Calculate long range corrections to pressure and energy */ |
771 | calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], |
772 | pres, force_vir, &prescorr, &enercorr, &dvdlcorr); |
773 | enerd->term[F_DISPCORR] = enercorr; |
774 | enerd->term[F_EPOT] += enercorr; |
775 | enerd->term[F_PRES] += prescorr; |
776 | enerd->term[F_DVDL] += dvdlcorr; |
777 | |
778 | ems->epot = enerd->term[F_EPOT]; |
779 | |
780 | if (constr) |
781 | { |
782 | /* Project out the constraint components of the force */ |
783 | wallcycle_start(wcycle, ewcCONSTR); |
784 | dvdl_constr = 0; |
785 | constrain(NULL((void*)0), FALSE0, FALSE0, constr, &top->idef, |
786 | inputrec, NULL((void*)0), cr, count, 0, mdatoms, |
787 | ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, |
788 | ems->s.lambda[efptBONDED], &dvdl_constr, |
789 | NULL((void*)0), &shake_vir, nrnb, econqForceDispl, FALSE0, 0, 0); |
790 | if (fr->bSepDVDL && fplog) |
791 | { |
792 | gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr); |
793 | } |
794 | enerd->term[F_DVDL_CONSTR] += dvdl_constr; |
795 | m_add(force_vir, shake_vir, vir); |
796 | wallcycle_stop(wcycle, ewcCONSTR); |
797 | } |
798 | else |
799 | { |
800 | copy_mat(force_vir, vir); |
801 | } |
802 | |
803 | clear_mat(ekin); |
804 | enerd->term[F_PRES] = |
805 | calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); |
806 | |
807 | sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); |
808 | |
809 | if (EI_ENERGY_MINIMIZATION(inputrec->eI)((inputrec->eI) == eiSteep || (inputrec->eI) == eiCG || (inputrec->eI) == eiLBFGS)) |
810 | { |
811 | get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); |
812 | } |
813 | } |
814 | |
815 | static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, |
816 | gmx_mtop_t *mtop, |
817 | em_state_t *s_min, em_state_t *s_b) |
818 | { |
819 | rvec *fm, *fb, *fmg; |
820 | t_block *cgs_gl; |
821 | int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; |
822 | double partsum; |
823 | unsigned char *grpnrFREEZE; |
824 | |
825 | if (debug) |
826 | { |
827 | fprintf(debug, "Doing reorder_partsum\n"); |
828 | } |
829 | |
830 | fm = s_min->f; |
831 | fb = s_b->f; |
832 | |
833 | cgs_gl = dd_charge_groups_global(cr->dd); |
834 | index = cgs_gl->index; |
835 | |
836 | /* Collect fm in a global vector fmg. |
837 | * This conflicts with the spirit of domain decomposition, |
838 | * but to fully optimize this a much more complicated algorithm is required. |
839 | */ |
840 | snew(fmg, mtop->natoms)(fmg) = save_calloc("fmg", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 840, (mtop->natoms), sizeof(*(fmg))); |
841 | |
842 | ncg = s_min->s.ncg_gl; |
843 | cg_gl = s_min->s.cg_gl; |
844 | i = 0; |
845 | for (c = 0; c < ncg; c++) |
846 | { |
847 | cg = cg_gl[c]; |
848 | a0 = index[cg]; |
849 | a1 = index[cg+1]; |
850 | for (a = a0; a < a1; a++) |
851 | { |
852 | copy_rvec(fm[i], fmg[a]); |
853 | i++; |
854 | } |
855 | } |
856 | gmx_sumgmx_sumf(mtop->natoms*3, fmg[0], cr); |
857 | |
858 | /* Now we will determine the part of the sum for the cgs in state s_b */ |
859 | ncg = s_b->s.ncg_gl; |
860 | cg_gl = s_b->s.cg_gl; |
861 | partsum = 0; |
862 | i = 0; |
863 | gf = 0; |
864 | grpnrFREEZE = mtop->groups.grpnr[egcFREEZE]; |
865 | for (c = 0; c < ncg; c++) |
866 | { |
867 | cg = cg_gl[c]; |
868 | a0 = index[cg]; |
869 | a1 = index[cg+1]; |
870 | for (a = a0; a < a1; a++) |
871 | { |
872 | if (mdatoms->cFREEZE && grpnrFREEZE) |
873 | { |
874 | gf = grpnrFREEZE[i]; |
875 | } |
876 | for (m = 0; m < DIM3; m++) |
877 | { |
878 | if (!opts->nFreeze[gf][m]) |
879 | { |
880 | partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; |
881 | } |
882 | } |
883 | i++; |
884 | } |
885 | } |
886 | |
887 | sfree(fmg)save_free("fmg", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 887, (fmg)); |
888 | |
889 | return partsum; |
890 | } |
891 | |
892 | static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, |
893 | gmx_mtop_t *mtop, |
894 | em_state_t *s_min, em_state_t *s_b) |
895 | { |
896 | rvec *fm, *fb; |
897 | double sum; |
898 | int gf, i, m; |
899 | |
900 | /* This is just the classical Polak-Ribiere calculation of beta; |
901 | * it looks a bit complicated since we take freeze groups into account, |
902 | * and might have to sum it in parallel runs. |
903 | */ |
904 | |
905 | if (!DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) || |
906 | (s_min->s.ddp_count == cr->dd->ddp_count && |
907 | s_b->s.ddp_count == cr->dd->ddp_count)) |
908 | { |
909 | fm = s_min->f; |
910 | fb = s_b->f; |
911 | sum = 0; |
912 | gf = 0; |
913 | /* This part of code can be incorrect with DD, |
914 | * since the atom ordering in s_b and s_min might differ. |
915 | */ |
916 | for (i = 0; i < mdatoms->homenr; i++) |
917 | { |
918 | if (mdatoms->cFREEZE) |
919 | { |
920 | gf = mdatoms->cFREEZE[i]; |
921 | } |
922 | for (m = 0; m < DIM3; m++) |
923 | { |
924 | if (!opts->nFreeze[gf][m]) |
925 | { |
926 | sum += (fb[i][m] - fm[i][m])*fb[i][m]; |
927 | } |
928 | } |
929 | } |
930 | } |
931 | else |
932 | { |
933 | /* We need to reorder cgs while summing */ |
934 | sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b); |
935 | } |
936 | if (PAR(cr)((cr)->nnodes > 1)) |
937 | { |
938 | gmx_sumd(1, &sum, cr); |
939 | } |
940 | |
941 | return sum/sqr(s_min->fnorm); |
942 | } |
943 | |
944 | double do_cg(FILE *fplog, t_commrec *cr, |
945 | int nfile, const t_filenm fnm[], |
946 | const output_env_t gmx_unused__attribute__ ((unused)) oenv, gmx_bool bVerbose, gmx_bool gmx_unused__attribute__ ((unused)) bCompact, |
947 | int gmx_unused__attribute__ ((unused)) nstglobalcomm, |
948 | gmx_vsite_t *vsite, gmx_constr_t constr, |
949 | int gmx_unused__attribute__ ((unused)) stepout, |
950 | t_inputrec *inputrec, |
951 | gmx_mtop_t *top_global, t_fcdata *fcd, |
952 | t_state *state_global, |
953 | t_mdatoms *mdatoms, |
954 | t_nrnb *nrnb, gmx_wallcycle_t wcycle, |
955 | gmx_edsam_t gmx_unused__attribute__ ((unused)) ed, |
956 | t_forcerec *fr, |
957 | int gmx_unused__attribute__ ((unused)) repl_ex_nst, int gmx_unused__attribute__ ((unused)) repl_ex_nex, int gmx_unused__attribute__ ((unused)) repl_ex_seed, |
958 | gmx_membed_t gmx_unused__attribute__ ((unused)) membed, |
959 | real gmx_unused__attribute__ ((unused)) cpt_period, real gmx_unused__attribute__ ((unused)) max_hours, |
960 | const char gmx_unused__attribute__ ((unused)) *deviceOptions, |
961 | int imdport, |
962 | unsigned long gmx_unused__attribute__ ((unused)) Flags, |
963 | gmx_walltime_accounting_t walltime_accounting) |
964 | { |
965 | const char *CG = "Polak-Ribiere Conjugate Gradients"; |
966 | |
967 | em_state_t *s_min, *s_a, *s_b, *s_c; |
968 | gmx_localtop_t *top; |
969 | gmx_enerdata_t *enerd; |
970 | rvec *f; |
971 | gmx_global_stat_t gstat; |
972 | t_graph *graph; |
973 | rvec *f_global, *p, *sf, *sfm; |
974 | double gpa, gpb, gpc, tmp, sum[2], minstep; |
975 | real fnormn; |
976 | real stepsize; |
977 | real a, b, c, beta = 0.0; |
978 | real epot_repl = 0; |
979 | real pnorm; |
980 | t_mdebin *mdebin; |
981 | gmx_bool converged, foundlower; |
982 | rvec mu_tot; |
983 | gmx_bool do_log = FALSE0, do_ene = FALSE0, do_x, do_f; |
984 | tensor vir, pres; |
985 | int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; |
986 | gmx_mdoutf_t outf; |
987 | int i, m, gf, step, nminstep; |
988 | real terminate = 0; |
989 | |
990 | step = 0; |
991 | |
992 | s_min = init_em_state(); |
993 | s_a = init_em_state(); |
994 | s_b = init_em_state(); |
995 | s_c = init_em_state(); |
996 | |
997 | /* Init em and store the local state in s_min */ |
998 | init_em(fplog, CG, cr, inputrec, |
999 | state_global, top_global, s_min, &top, &f, &f_global, |
1000 | nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, |
1001 | nfile, fnm, &outf, &mdebin, imdport, Flags); |
1002 | |
1003 | /* Print to log file */ |
1004 | print_em_start(fplog, cr, walltime_accounting, wcycle, CG); |
1005 | |
1006 | /* Max number of steps */ |
1007 | number_steps = inputrec->nsteps; |
1008 | |
1009 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1010 | { |
1011 | sp_header(stderrstderr, CG, inputrec->em_tol, number_steps); |
1012 | } |
1013 | if (fplog) |
1014 | { |
1015 | sp_header(fplog, CG, inputrec->em_tol, number_steps); |
1016 | } |
1017 | |
1018 | /* Call the force routine and some auxiliary (neighboursearching etc.) */ |
1019 | /* do_force always puts the charge groups in the box and shifts again |
1020 | * We do not unshift, so molecules are always whole in congrad.c |
1021 | */ |
1022 | evaluate_energy(fplog, cr, |
1023 | top_global, s_min, top, |
1024 | inputrec, nrnb, wcycle, gstat, |
1025 | vsite, constr, fcd, graph, mdatoms, fr, |
1026 | mu_tot, enerd, vir, pres, -1, TRUE1); |
1027 | where()_where("/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1027); |
1028 | |
1029 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1030 | { |
1031 | /* Copy stuff to the energy bin for easy printing etc. */ |
1032 | upd_mdebin(mdebin, FALSE0, FALSE0, (double)step, |
1033 | mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, |
1034 | NULL((void*)0), NULL((void*)0), vir, pres, NULL((void*)0), mu_tot, constr); |
1035 | |
1036 | print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); |
1037 | print_ebin(mdoutf_get_fp_ene(outf), TRUE1, FALSE0, FALSE0, fplog, step, step, eprNORMAL, |
1038 | TRUE1, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
1039 | } |
1040 | where()_where("/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1040); |
1041 | |
1042 | /* Estimate/guess the initial stepsize */ |
1043 | stepsize = inputrec->em_stepsize/s_min->fnorm; |
1044 | |
1045 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1046 | { |
1047 | fprintf(stderrstderr, " F-max = %12.5e on atom %d\n", |
1048 | s_min->fmax, s_min->a_fmax+1); |
1049 | fprintf(stderrstderr, " F-Norm = %12.5e\n", |
1050 | s_min->fnorm/sqrt(state_global->natoms)); |
1051 | fprintf(stderrstderr, "\n"); |
1052 | /* and copy to the log file too... */ |
1053 | fprintf(fplog, " F-max = %12.5e on atom %d\n", |
1054 | s_min->fmax, s_min->a_fmax+1); |
1055 | fprintf(fplog, " F-Norm = %12.5e\n", |
1056 | s_min->fnorm/sqrt(state_global->natoms)); |
1057 | fprintf(fplog, "\n"); |
1058 | } |
1059 | /* Start the loop over CG steps. |
1060 | * Each successful step is counted, and we continue until |
1061 | * we either converge or reach the max number of steps. |
1062 | */ |
1063 | converged = FALSE0; |
1064 | for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) |
1065 | { |
1066 | |
1067 | /* start taking steps in a new direction |
1068 | * First time we enter the routine, beta=0, and the direction is |
1069 | * simply the negative gradient. |
1070 | */ |
1071 | |
1072 | /* Calculate the new direction in p, and the gradient in this direction, gpa */ |
1073 | p = s_min->s.cg_p; |
1074 | sf = s_min->f; |
1075 | gpa = 0; |
1076 | gf = 0; |
1077 | for (i = 0; i < mdatoms->homenr; i++) |
1078 | { |
1079 | if (mdatoms->cFREEZE) |
1080 | { |
1081 | gf = mdatoms->cFREEZE[i]; |
1082 | } |
1083 | for (m = 0; m < DIM3; m++) |
1084 | { |
1085 | if (!inputrec->opts.nFreeze[gf][m]) |
1086 | { |
1087 | p[i][m] = sf[i][m] + beta*p[i][m]; |
1088 | gpa -= p[i][m]*sf[i][m]; |
1089 | /* f is negative gradient, thus the sign */ |
1090 | } |
1091 | else |
1092 | { |
1093 | p[i][m] = 0; |
1094 | } |
1095 | } |
1096 | } |
1097 | |
1098 | /* Sum the gradient along the line across CPUs */ |
1099 | if (PAR(cr)((cr)->nnodes > 1)) |
1100 | { |
1101 | gmx_sumd(1, &gpa, cr); |
1102 | } |
1103 | |
1104 | /* Calculate the norm of the search vector */ |
1105 | get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL((void*)0), NULL((void*)0)); |
1106 | |
1107 | /* Just in case stepsize reaches zero due to numerical precision... */ |
1108 | if (stepsize <= 0) |
1109 | { |
1110 | stepsize = inputrec->em_stepsize/pnorm; |
1111 | } |
1112 | |
1113 | /* |
1114 | * Double check the value of the derivative in the search direction. |
1115 | * If it is positive it must be due to the old information in the |
1116 | * CG formula, so just remove that and start over with beta=0. |
1117 | * This corresponds to a steepest descent step. |
1118 | */ |
1119 | if (gpa > 0) |
1120 | { |
1121 | beta = 0; |
1122 | step--; /* Don't count this step since we are restarting */ |
1123 | continue; /* Go back to the beginning of the big for-loop */ |
1124 | } |
1125 | |
1126 | /* Calculate minimum allowed stepsize, before the average (norm) |
1127 | * relative change in coordinate is smaller than precision |
1128 | */ |
1129 | minstep = 0; |
1130 | for (i = 0; i < mdatoms->homenr; i++) |
1131 | { |
1132 | for (m = 0; m < DIM3; m++) |
1133 | { |
1134 | tmp = fabs(s_min->s.x[i][m]); |
1135 | if (tmp < 1.0) |
1136 | { |
1137 | tmp = 1.0; |
1138 | } |
1139 | tmp = p[i][m]/tmp; |
1140 | minstep += tmp*tmp; |
1141 | } |
1142 | } |
1143 | /* Add up from all CPUs */ |
1144 | if (PAR(cr)((cr)->nnodes > 1)) |
1145 | { |
1146 | gmx_sumd(1, &minstep, cr); |
1147 | } |
1148 | |
1149 | minstep = GMX_REAL_EPS5.96046448E-08/sqrt(minstep/(3*state_global->natoms)); |
1150 | |
1151 | if (stepsize < minstep) |
1152 | { |
1153 | converged = TRUE1; |
1154 | break; |
1155 | } |
1156 | |
1157 | /* Write coordinates if necessary */ |
1158 | do_x = do_per_step(step, inputrec->nstxout); |
1159 | do_f = do_per_step(step, inputrec->nstfout); |
1160 | |
1161 | write_em_traj(fplog, cr, outf, do_x, do_f, NULL((void*)0), |
1162 | top_global, inputrec, step, |
1163 | s_min, state_global, f_global); |
1164 | |
1165 | /* Take a step downhill. |
1166 | * In theory, we should minimize the function along this direction. |
1167 | * That is quite possible, but it turns out to take 5-10 function evaluations |
1168 | * for each line. However, we dont really need to find the exact minimum - |
1169 | * it is much better to start a new CG step in a modified direction as soon |
1170 | * as we are close to it. This will save a lot of energy evaluations. |
1171 | * |
1172 | * In practice, we just try to take a single step. |
1173 | * If it worked (i.e. lowered the energy), we increase the stepsize but |
1174 | * the continue straight to the next CG step without trying to find any minimum. |
1175 | * If it didn't work (higher energy), there must be a minimum somewhere between |
1176 | * the old position and the new one. |
1177 | * |
1178 | * Due to the finite numerical accuracy, it turns out that it is a good idea |
1179 | * to even accept a SMALL increase in energy, if the derivative is still downhill. |
1180 | * This leads to lower final energies in the tests I've done. / Erik |
1181 | */ |
1182 | s_a->epot = s_min->epot; |
1183 | a = 0.0; |
1184 | c = a + stepsize; /* reference position along line is zero */ |
1185 | |
1186 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && s_min->s.ddp_count < cr->dd->ddp_count) |
1187 | { |
1188 | em_dd_partition_system(fplog, step, cr, top_global, inputrec, |
1189 | s_min, top, mdatoms, fr, vsite, constr, |
1190 | nrnb, wcycle); |
1191 | } |
1192 | |
1193 | /* Take a trial step (new coords in s_c) */ |
1194 | do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, |
1195 | constr, top, nrnb, wcycle, -1); |
1196 | |
1197 | neval++; |
1198 | /* Calculate energy for the trial step */ |
1199 | evaluate_energy(fplog, cr, |
1200 | top_global, s_c, top, |
1201 | inputrec, nrnb, wcycle, gstat, |
1202 | vsite, constr, fcd, graph, mdatoms, fr, |
1203 | mu_tot, enerd, vir, pres, -1, FALSE0); |
1204 | |
1205 | /* Calc derivative along line */ |
1206 | p = s_c->s.cg_p; |
1207 | sf = s_c->f; |
1208 | gpc = 0; |
1209 | for (i = 0; i < mdatoms->homenr; i++) |
1210 | { |
1211 | for (m = 0; m < DIM3; m++) |
1212 | { |
1213 | gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ |
1214 | } |
1215 | } |
1216 | /* Sum the gradient along the line across CPUs */ |
1217 | if (PAR(cr)((cr)->nnodes > 1)) |
1218 | { |
1219 | gmx_sumd(1, &gpc, cr); |
1220 | } |
1221 | |
1222 | /* This is the max amount of increase in energy we tolerate */ |
1223 | tmp = sqrt(GMX_REAL_EPS5.96046448E-08)*fabs(s_a->epot); |
1224 | |
1225 | /* Accept the step if the energy is lower, or if it is not significantly higher |
1226 | * and the line derivative is still negative. |
1227 | */ |
1228 | if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) |
1229 | { |
1230 | foundlower = TRUE1; |
1231 | /* Great, we found a better energy. Increase step for next iteration |
1232 | * if we are still going down, decrease it otherwise |
1233 | */ |
1234 | if (gpc < 0) |
1235 | { |
1236 | stepsize *= 1.618034; /* The golden section */ |
1237 | } |
1238 | else |
1239 | { |
1240 | stepsize *= 0.618034; /* 1/golden section */ |
1241 | } |
1242 | } |
1243 | else |
1244 | { |
1245 | /* New energy is the same or higher. We will have to do some work |
1246 | * to find a smaller value in the interval. Take smaller step next time! |
1247 | */ |
1248 | foundlower = FALSE0; |
1249 | stepsize *= 0.618034; |
1250 | } |
1251 | |
1252 | |
1253 | |
1254 | |
1255 | /* OK, if we didn't find a lower value we will have to locate one now - there must |
1256 | * be one in the interval [a=0,c]. |
1257 | * The same thing is valid here, though: Don't spend dozens of iterations to find |
1258 | * the line minimum. We try to interpolate based on the derivative at the endpoints, |
1259 | * and only continue until we find a lower value. In most cases this means 1-2 iterations. |
1260 | * |
1261 | * I also have a safeguard for potentially really patological functions so we never |
1262 | * take more than 20 steps before we give up ... |
1263 | * |
1264 | * If we already found a lower value we just skip this step and continue to the update. |
1265 | */ |
1266 | if (!foundlower) |
1267 | { |
1268 | nminstep = 0; |
1269 | |
1270 | do |
1271 | { |
1272 | /* Select a new trial point. |
1273 | * If the derivatives at points a & c have different sign we interpolate to zero, |
1274 | * otherwise just do a bisection. |
1275 | */ |
1276 | if (gpa < 0 && gpc > 0) |
1277 | { |
1278 | b = a + gpa*(a-c)/(gpc-gpa); |
1279 | } |
1280 | else |
1281 | { |
1282 | b = 0.5*(a+c); |
1283 | } |
1284 | |
1285 | /* safeguard if interpolation close to machine accuracy causes errors: |
1286 | * never go outside the interval |
1287 | */ |
1288 | if (b <= a || b >= c) |
1289 | { |
1290 | b = 0.5*(a+c); |
1291 | } |
1292 | |
1293 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && s_min->s.ddp_count != cr->dd->ddp_count) |
1294 | { |
1295 | /* Reload the old state */ |
1296 | em_dd_partition_system(fplog, -1, cr, top_global, inputrec, |
1297 | s_min, top, mdatoms, fr, vsite, constr, |
1298 | nrnb, wcycle); |
1299 | } |
1300 | |
1301 | /* Take a trial step to this new point - new coords in s_b */ |
1302 | do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, |
1303 | constr, top, nrnb, wcycle, -1); |
1304 | |
1305 | neval++; |
1306 | /* Calculate energy for the trial step */ |
1307 | evaluate_energy(fplog, cr, |
1308 | top_global, s_b, top, |
1309 | inputrec, nrnb, wcycle, gstat, |
1310 | vsite, constr, fcd, graph, mdatoms, fr, |
1311 | mu_tot, enerd, vir, pres, -1, FALSE0); |
1312 | |
1313 | /* p does not change within a step, but since the domain decomposition |
1314 | * might change, we have to use cg_p of s_b here. |
1315 | */ |
1316 | p = s_b->s.cg_p; |
1317 | sf = s_b->f; |
1318 | gpb = 0; |
1319 | for (i = 0; i < mdatoms->homenr; i++) |
1320 | { |
1321 | for (m = 0; m < DIM3; m++) |
1322 | { |
1323 | gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ |
1324 | } |
1325 | } |
1326 | /* Sum the gradient along the line across CPUs */ |
1327 | if (PAR(cr)((cr)->nnodes > 1)) |
1328 | { |
1329 | gmx_sumd(1, &gpb, cr); |
1330 | } |
1331 | |
1332 | if (debug) |
1333 | { |
1334 | fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", |
1335 | s_a->epot, s_b->epot, s_c->epot, gpb); |
1336 | } |
1337 | |
1338 | epot_repl = s_b->epot; |
1339 | |
1340 | /* Keep one of the intervals based on the value of the derivative at the new point */ |
1341 | if (gpb > 0) |
1342 | { |
1343 | /* Replace c endpoint with b */ |
1344 | swap_em_state(s_b, s_c); |
1345 | c = b; |
1346 | gpc = gpb; |
1347 | } |
1348 | else |
1349 | { |
1350 | /* Replace a endpoint with b */ |
1351 | swap_em_state(s_b, s_a); |
1352 | a = b; |
1353 | gpa = gpb; |
1354 | } |
1355 | |
1356 | /* |
1357 | * Stop search as soon as we find a value smaller than the endpoints. |
1358 | * Never run more than 20 steps, no matter what. |
1359 | */ |
1360 | nminstep++; |
1361 | } |
1362 | while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && |
1363 | (nminstep < 20)); |
1364 | |
1365 | if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS5.96046448E-08 || |
1366 | nminstep >= 20) |
1367 | { |
1368 | /* OK. We couldn't find a significantly lower energy. |
1369 | * If beta==0 this was steepest descent, and then we give up. |
1370 | * If not, set beta=0 and restart with steepest descent before quitting. |
1371 | */ |
1372 | if (beta == 0.0) |
1373 | { |
1374 | /* Converged */ |
1375 | converged = TRUE1; |
1376 | break; |
1377 | } |
1378 | else |
1379 | { |
1380 | /* Reset memory before giving up */ |
1381 | beta = 0.0; |
1382 | continue; |
1383 | } |
1384 | } |
1385 | |
1386 | /* Select min energy state of A & C, put the best in B. |
1387 | */ |
1388 | if (s_c->epot < s_a->epot) |
1389 | { |
1390 | if (debug) |
1391 | { |
1392 | fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", |
1393 | s_c->epot, s_a->epot); |
1394 | } |
1395 | swap_em_state(s_b, s_c); |
1396 | gpb = gpc; |
1397 | b = c; |
1398 | } |
1399 | else |
1400 | { |
1401 | if (debug) |
1402 | { |
1403 | fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", |
1404 | s_a->epot, s_c->epot); |
1405 | } |
1406 | swap_em_state(s_b, s_a); |
1407 | gpb = gpa; |
1408 | b = a; |
1409 | } |
1410 | |
1411 | } |
1412 | else |
1413 | { |
1414 | if (debug) |
1415 | { |
1416 | fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", |
1417 | s_c->epot); |
1418 | } |
1419 | swap_em_state(s_b, s_c); |
1420 | gpb = gpc; |
1421 | b = c; |
1422 | } |
1423 | |
1424 | /* new search direction */ |
1425 | /* beta = 0 means forget all memory and restart with steepest descents. */ |
1426 | if (nstcg && ((step % nstcg) == 0)) |
1427 | { |
1428 | beta = 0.0; |
1429 | } |
1430 | else |
1431 | { |
1432 | /* s_min->fnorm cannot be zero, because then we would have converged |
1433 | * and broken out. |
1434 | */ |
1435 | |
1436 | /* Polak-Ribiere update. |
1437 | * Change to fnorm2/fnorm2_old for Fletcher-Reeves |
1438 | */ |
1439 | beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); |
1440 | } |
1441 | /* Limit beta to prevent oscillations */ |
1442 | if (fabs(beta) > 5.0) |
1443 | { |
1444 | beta = 0.0; |
1445 | } |
1446 | |
1447 | |
1448 | /* update positions */ |
1449 | swap_em_state(s_min, s_b); |
1450 | gpa = gpb; |
1451 | |
1452 | /* Print it if necessary */ |
1453 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1454 | { |
1455 | if (bVerbose) |
1456 | { |
1457 | fprintf(stderrstderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", |
1458 | step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms), |
1459 | s_min->fmax, s_min->a_fmax+1); |
1460 | } |
1461 | /* Store the new (lower) energies */ |
1462 | upd_mdebin(mdebin, FALSE0, FALSE0, (double)step, |
1463 | mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, |
1464 | NULL((void*)0), NULL((void*)0), vir, pres, NULL((void*)0), mu_tot, constr); |
1465 | |
1466 | do_log = do_per_step(step, inputrec->nstlog); |
1467 | do_ene = do_per_step(step, inputrec->nstenergy); |
1468 | |
1469 | /* Prepare IMD energy record, if bIMD is TRUE. */ |
1470 | IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE1); |
1471 | |
1472 | if (do_log) |
1473 | { |
1474 | print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); |
1475 | } |
1476 | print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE0, FALSE0, |
1477 | do_log ? fplog : NULL((void*)0), step, step, eprNORMAL, |
1478 | TRUE1, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
1479 | } |
1480 | |
1481 | /* Send energies and positions to the IMD client if bIMD is TRUE. */ |
1482 | if (do_IMD(inputrec->bIMD, step, cr, TRUE1, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1483 | { |
1484 | IMD_send_positions(inputrec->imd); |
1485 | } |
1486 | |
1487 | /* Stop when the maximum force lies below tolerance. |
1488 | * If we have reached machine precision, converged is already set to true. |
1489 | */ |
1490 | converged = converged || (s_min->fmax < inputrec->em_tol); |
1491 | |
1492 | } /* End of the loop */ |
1493 | |
1494 | /* IMD cleanup, if bIMD is TRUE. */ |
1495 | IMD_finalize(inputrec->bIMD, inputrec->imd); |
1496 | |
1497 | if (converged) |
1498 | { |
1499 | step--; /* we never took that last step in this case */ |
1500 | |
1501 | } |
1502 | if (s_min->fmax > inputrec->em_tol) |
1503 | { |
1504 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1505 | { |
1506 | warn_step(stderrstderr, inputrec->em_tol, step-1 == number_steps, FALSE0); |
1507 | warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE0); |
1508 | } |
1509 | converged = FALSE0; |
1510 | } |
1511 | |
1512 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1513 | { |
1514 | /* If we printed energy and/or logfile last step (which was the last step) |
1515 | * we don't have to do it again, but otherwise print the final values. |
1516 | */ |
1517 | if (!do_log) |
1518 | { |
1519 | /* Write final value to log since we didn't do anything the last step */ |
1520 | print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]); |
1521 | } |
1522 | if (!do_ene || !do_log) |
1523 | { |
1524 | /* Write final energy file entries */ |
1525 | print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE0, FALSE0, |
1526 | !do_log ? fplog : NULL((void*)0), step, step, eprNORMAL, |
1527 | TRUE1, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
1528 | } |
1529 | } |
1530 | |
1531 | /* Print some stuff... */ |
1532 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1533 | { |
1534 | fprintf(stderrstderr, "\nwriting lowest energy coordinates.\n"); |
1535 | } |
1536 | |
1537 | /* IMPORTANT! |
1538 | * For accurate normal mode calculation it is imperative that we |
1539 | * store the last conformation into the full precision binary trajectory. |
1540 | * |
1541 | * However, we should only do it if we did NOT already write this step |
1542 | * above (which we did if do_x or do_f was true). |
1543 | */ |
1544 | do_x = !do_per_step(step, inputrec->nstxout); |
1545 | do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); |
1546 | |
1547 | write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), |
1548 | top_global, inputrec, step, |
1549 | s_min, state_global, f_global); |
1550 | |
1551 | fnormn = s_min->fnorm/sqrt(state_global->natoms); |
1552 | |
1553 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1554 | { |
1555 | print_converged(stderrstderr, CG, inputrec->em_tol, step, converged, number_steps, |
1556 | s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); |
1557 | print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, |
1558 | s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); |
1559 | |
1560 | fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); |
1561 | } |
1562 | |
1563 | finish_em(cr, outf, walltime_accounting, wcycle); |
1564 | |
1565 | /* To print the actual number of steps we needed somewhere */ |
1566 | walltime_accounting_set_nsteps_done(walltime_accounting, step); |
1567 | |
1568 | return 0; |
1569 | } /* That's all folks */ |
1570 | |
1571 | |
1572 | double do_lbfgs(FILE *fplog, t_commrec *cr, |
1573 | int nfile, const t_filenm fnm[], |
1574 | const output_env_t gmx_unused__attribute__ ((unused)) oenv, gmx_bool bVerbose, gmx_bool gmx_unused__attribute__ ((unused)) bCompact, |
1575 | int gmx_unused__attribute__ ((unused)) nstglobalcomm, |
1576 | gmx_vsite_t *vsite, gmx_constr_t constr, |
1577 | int gmx_unused__attribute__ ((unused)) stepout, |
1578 | t_inputrec *inputrec, |
1579 | gmx_mtop_t *top_global, t_fcdata *fcd, |
1580 | t_state *state, |
1581 | t_mdatoms *mdatoms, |
1582 | t_nrnb *nrnb, gmx_wallcycle_t wcycle, |
1583 | gmx_edsam_t gmx_unused__attribute__ ((unused)) ed, |
1584 | t_forcerec *fr, |
1585 | int gmx_unused__attribute__ ((unused)) repl_ex_nst, int gmx_unused__attribute__ ((unused)) repl_ex_nex, int gmx_unused__attribute__ ((unused)) repl_ex_seed, |
1586 | gmx_membed_t gmx_unused__attribute__ ((unused)) membed, |
1587 | real gmx_unused__attribute__ ((unused)) cpt_period, real gmx_unused__attribute__ ((unused)) max_hours, |
1588 | const char gmx_unused__attribute__ ((unused)) *deviceOptions, |
1589 | int imdport, |
1590 | unsigned long gmx_unused__attribute__ ((unused)) Flags, |
1591 | gmx_walltime_accounting_t walltime_accounting) |
1592 | { |
1593 | static const char *LBFGS = "Low-Memory BFGS Minimizer"; |
1594 | em_state_t ems; |
1595 | gmx_localtop_t *top; |
1596 | gmx_enerdata_t *enerd; |
1597 | rvec *f; |
1598 | gmx_global_stat_t gstat; |
1599 | t_graph *graph; |
1600 | rvec *f_global; |
1601 | int ncorr, nmaxcorr, point, cp, neval, nminstep; |
1602 | double stepsize, gpa, gpb, gpc, tmp, minstep; |
1603 | real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; |
1604 | real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; |
1605 | real a, b, c, maxdelta, delta; |
1606 | real diag, Epot0, Epot, EpotA, EpotB, EpotC; |
1607 | real dgdx, dgdg, sq, yr, beta; |
1608 | t_mdebin *mdebin; |
1609 | gmx_bool converged, first; |
1610 | rvec mu_tot; |
1611 | real fnorm, fmax; |
1612 | gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; |
1613 | tensor vir, pres; |
1614 | int start, end, number_steps; |
1615 | gmx_mdoutf_t outf; |
1616 | int i, k, m, n, nfmax, gf, step; |
1617 | int mdof_flags; |
1618 | /* not used */ |
1619 | real terminate; |
1620 | |
1621 | if (PAR(cr)((cr)->nnodes > 1)) |
1622 | { |
1623 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1623, "Cannot do parallel L-BFGS Minimization - yet.\n"); |
1624 | } |
1625 | |
1626 | if (NULL((void*)0) != constr) |
1627 | { |
1628 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1628, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); |
1629 | } |
1630 | |
1631 | n = 3*state->natoms; |
1632 | nmaxcorr = inputrec->nbfgscorr; |
1633 | |
1634 | /* Allocate memory */ |
1635 | /* Use pointers to real so we dont have to loop over both atoms and |
1636 | * dimensions all the time... |
1637 | * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real |
1638 | * that point to the same memory. |
1639 | */ |
1640 | snew(xa, n)(xa) = save_calloc("xa", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1640, (n), sizeof(*(xa))); |
1641 | snew(xb, n)(xb) = save_calloc("xb", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1641, (n), sizeof(*(xb))); |
1642 | snew(xc, n)(xc) = save_calloc("xc", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1642, (n), sizeof(*(xc))); |
1643 | snew(fa, n)(fa) = save_calloc("fa", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1643, (n), sizeof(*(fa))); |
1644 | snew(fb, n)(fb) = save_calloc("fb", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1644, (n), sizeof(*(fb))); |
1645 | snew(fc, n)(fc) = save_calloc("fc", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1645, (n), sizeof(*(fc))); |
1646 | snew(frozen, n)(frozen) = save_calloc("frozen", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1646, (n), sizeof(*(frozen))); |
1647 | |
1648 | snew(p, n)(p) = save_calloc("p", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1648, (n), sizeof(*(p))); |
1649 | snew(lastx, n)(lastx) = save_calloc("lastx", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1649, (n), sizeof(*(lastx))); |
1650 | snew(lastf, n)(lastf) = save_calloc("lastf", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1650, (n), sizeof(*(lastf))); |
1651 | snew(rho, nmaxcorr)(rho) = save_calloc("rho", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1651, (nmaxcorr), sizeof(*(rho))); |
1652 | snew(alpha, nmaxcorr)(alpha) = save_calloc("alpha", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1652, (nmaxcorr), sizeof(*(alpha))); |
1653 | |
1654 | snew(dx, nmaxcorr)(dx) = save_calloc("dx", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1654, (nmaxcorr), sizeof(*(dx))); |
1655 | for (i = 0; i < nmaxcorr; i++) |
1656 | { |
1657 | snew(dx[i], n)(dx[i]) = save_calloc("dx[i]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1657, (n), sizeof(*(dx[i]))); |
1658 | } |
1659 | |
1660 | snew(dg, nmaxcorr)(dg) = save_calloc("dg", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1660, (nmaxcorr), sizeof(*(dg))); |
1661 | for (i = 0; i < nmaxcorr; i++) |
1662 | { |
1663 | snew(dg[i], n)(dg[i]) = save_calloc("dg[i]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1663, (n), sizeof(*(dg[i]))); |
1664 | } |
1665 | |
1666 | step = 0; |
1667 | neval = 0; |
1668 | |
1669 | /* Init em */ |
1670 | init_em(fplog, LBFGS, cr, inputrec, |
1671 | state, top_global, &ems, &top, &f, &f_global, |
1672 | nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, |
1673 | nfile, fnm, &outf, &mdebin, imdport, Flags); |
1674 | /* Do_lbfgs is not completely updated like do_steep and do_cg, |
1675 | * so we free some memory again. |
1676 | */ |
1677 | sfree(ems.s.x)save_free("ems.s.x", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1677, (ems.s.x)); |
1678 | sfree(ems.f)save_free("ems.f", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1678, (ems.f)); |
1679 | |
1680 | xx = (real *)state->x; |
1681 | ff = (real *)f; |
1682 | |
1683 | start = 0; |
1684 | end = mdatoms->homenr; |
1685 | |
1686 | /* Print to log file */ |
1687 | print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); |
1688 | |
1689 | do_log = do_ene = do_x = do_f = TRUE1; |
1690 | |
1691 | /* Max number of steps */ |
1692 | number_steps = inputrec->nsteps; |
1693 | |
1694 | /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ |
1695 | gf = 0; |
1696 | for (i = start; i < end; i++) |
1697 | { |
1698 | if (mdatoms->cFREEZE) |
1699 | { |
1700 | gf = mdatoms->cFREEZE[i]; |
1701 | } |
1702 | for (m = 0; m < DIM3; m++) |
1703 | { |
1704 | frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; |
1705 | } |
1706 | } |
1707 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1708 | { |
1709 | sp_header(stderrstderr, LBFGS, inputrec->em_tol, number_steps); |
1710 | } |
1711 | if (fplog) |
1712 | { |
1713 | sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); |
1714 | } |
1715 | |
1716 | if (vsite) |
1717 | { |
1718 | construct_vsites(vsite, state->x, 1, NULL((void*)0), |
1719 | top->idef.iparams, top->idef.il, |
1720 | fr->ePBC, fr->bMolPBC, cr, state->box); |
1721 | } |
1722 | |
1723 | /* Call the force routine and some auxiliary (neighboursearching etc.) */ |
1724 | /* do_force always puts the charge groups in the box and shifts again |
1725 | * We do not unshift, so molecules are always whole |
1726 | */ |
1727 | neval++; |
1728 | ems.s.x = state->x; |
1729 | ems.f = f; |
1730 | evaluate_energy(fplog, cr, |
1731 | top_global, &ems, top, |
1732 | inputrec, nrnb, wcycle, gstat, |
1733 | vsite, constr, fcd, graph, mdatoms, fr, |
1734 | mu_tot, enerd, vir, pres, -1, TRUE1); |
1735 | where()_where("/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1735); |
1736 | |
1737 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1738 | { |
1739 | /* Copy stuff to the energy bin for easy printing etc. */ |
1740 | upd_mdebin(mdebin, FALSE0, FALSE0, (double)step, |
1741 | mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, |
1742 | NULL((void*)0), NULL((void*)0), vir, pres, NULL((void*)0), mu_tot, constr); |
1743 | |
1744 | print_ebin_header(fplog, step, step, state->lambda[efptFEP]); |
1745 | print_ebin(mdoutf_get_fp_ene(outf), TRUE1, FALSE0, FALSE0, fplog, step, step, eprNORMAL, |
1746 | TRUE1, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
1747 | } |
1748 | where()_where("/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 1748); |
1749 | |
1750 | /* This is the starting energy */ |
1751 | Epot = enerd->term[F_EPOT]; |
1752 | |
1753 | fnorm = ems.fnorm; |
1754 | fmax = ems.fmax; |
1755 | nfmax = ems.a_fmax; |
1756 | |
1757 | /* Set the initial step. |
1758 | * since it will be multiplied by the non-normalized search direction |
1759 | * vector (force vector the first time), we scale it by the |
1760 | * norm of the force. |
1761 | */ |
1762 | |
1763 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
1764 | { |
1765 | fprintf(stderrstderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); |
1766 | fprintf(stderrstderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); |
1767 | fprintf(stderrstderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); |
1768 | fprintf(stderrstderr, "\n"); |
1769 | /* and copy to the log file too... */ |
1770 | fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); |
1771 | fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); |
1772 | fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms)); |
1773 | fprintf(fplog, "\n"); |
1774 | } |
1775 | |
1776 | point = 0; |
1777 | for (i = 0; i < n; i++) |
1778 | { |
1779 | if (!frozen[i]) |
1780 | { |
1781 | dx[point][i] = ff[i]; /* Initial search direction */ |
1782 | } |
1783 | else |
1784 | { |
1785 | dx[point][i] = 0; |
1786 | } |
1787 | } |
1788 | |
1789 | stepsize = 1.0/fnorm; |
1790 | converged = FALSE0; |
1791 | |
1792 | /* Start the loop over BFGS steps. |
1793 | * Each successful step is counted, and we continue until |
1794 | * we either converge or reach the max number of steps. |
1795 | */ |
1796 | |
1797 | ncorr = 0; |
1798 | |
1799 | /* Set the gradient from the force */ |
1800 | converged = FALSE0; |
1801 | for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++) |
1802 | { |
1803 | |
1804 | /* Write coordinates if necessary */ |
1805 | do_x = do_per_step(step, inputrec->nstxout); |
1806 | do_f = do_per_step(step, inputrec->nstfout); |
1807 | |
1808 | mdof_flags = 0; |
1809 | if (do_x) |
1810 | { |
1811 | mdof_flags |= MDOF_X(1<<0); |
1812 | } |
1813 | |
1814 | if (do_f) |
1815 | { |
1816 | mdof_flags |= MDOF_F(1<<2); |
1817 | } |
1818 | |
1819 | if (inputrec->bIMD) |
1820 | { |
1821 | mdof_flags |= MDOF_IMD(1<<5); |
1822 | } |
1823 | |
1824 | mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, |
1825 | top_global, step, (real)step, state, state, f, f); |
1826 | |
1827 | /* Do the linesearching in the direction dx[point][0..(n-1)] */ |
1828 | |
1829 | /* pointer to current direction - point=0 first time here */ |
1830 | s = dx[point]; |
1831 | |
1832 | /* calculate line gradient */ |
1833 | for (gpa = 0, i = 0; i < n; i++) |
1834 | { |
1835 | gpa -= s[i]*ff[i]; |
1836 | } |
1837 | |
1838 | /* Calculate minimum allowed stepsize, before the average (norm) |
1839 | * relative change in coordinate is smaller than precision |
1840 | */ |
1841 | for (minstep = 0, i = 0; i < n; i++) |
1842 | { |
1843 | tmp = fabs(xx[i]); |
1844 | if (tmp < 1.0) |
1845 | { |
1846 | tmp = 1.0; |
1847 | } |
1848 | tmp = s[i]/tmp; |
1849 | minstep += tmp*tmp; |
1850 | } |
1851 | minstep = GMX_REAL_EPS5.96046448E-08/sqrt(minstep/n); |
1852 | |
1853 | if (stepsize < minstep) |
1854 | { |
1855 | converged = TRUE1; |
1856 | break; |
1857 | } |
1858 | |
1859 | /* Store old forces and coordinates */ |
1860 | for (i = 0; i < n; i++) |
1861 | { |
1862 | lastx[i] = xx[i]; |
1863 | lastf[i] = ff[i]; |
1864 | } |
1865 | Epot0 = Epot; |
1866 | |
1867 | first = TRUE1; |
1868 | |
1869 | for (i = 0; i < n; i++) |
1870 | { |
1871 | xa[i] = xx[i]; |
1872 | } |
1873 | |
1874 | /* Take a step downhill. |
1875 | * In theory, we should minimize the function along this direction. |
1876 | * That is quite possible, but it turns out to take 5-10 function evaluations |
1877 | * for each line. However, we dont really need to find the exact minimum - |
1878 | * it is much better to start a new BFGS step in a modified direction as soon |
1879 | * as we are close to it. This will save a lot of energy evaluations. |
1880 | * |
1881 | * In practice, we just try to take a single step. |
1882 | * If it worked (i.e. lowered the energy), we increase the stepsize but |
1883 | * the continue straight to the next BFGS step without trying to find any minimum. |
1884 | * If it didn't work (higher energy), there must be a minimum somewhere between |
1885 | * the old position and the new one. |
1886 | * |
1887 | * Due to the finite numerical accuracy, it turns out that it is a good idea |
1888 | * to even accept a SMALL increase in energy, if the derivative is still downhill. |
1889 | * This leads to lower final energies in the tests I've done. / Erik |
1890 | */ |
1891 | foundlower = FALSE0; |
Value stored to 'foundlower' is never read | |
1892 | EpotA = Epot0; |
1893 | a = 0.0; |
1894 | c = a + stepsize; /* reference position along line is zero */ |
1895 | |
1896 | /* Check stepsize first. We do not allow displacements |
1897 | * larger than emstep. |
1898 | */ |
1899 | do |
1900 | { |
1901 | c = a + stepsize; |
1902 | maxdelta = 0; |
1903 | for (i = 0; i < n; i++) |
1904 | { |
1905 | delta = c*s[i]; |
1906 | if (delta > maxdelta) |
1907 | { |
1908 | maxdelta = delta; |
1909 | } |
1910 | } |
1911 | if (maxdelta > inputrec->em_stepsize) |
1912 | { |
1913 | stepsize *= 0.1; |
1914 | } |
1915 | } |
1916 | while (maxdelta > inputrec->em_stepsize); |
1917 | |
1918 | /* Take a trial step */ |
1919 | for (i = 0; i < n; i++) |
1920 | { |
1921 | xc[i] = lastx[i] + c*s[i]; |
1922 | } |
1923 | |
1924 | neval++; |
1925 | /* Calculate energy for the trial step */ |
1926 | ems.s.x = (rvec *)xc; |
1927 | ems.f = (rvec *)fc; |
1928 | evaluate_energy(fplog, cr, |
1929 | top_global, &ems, top, |
1930 | inputrec, nrnb, wcycle, gstat, |
1931 | vsite, constr, fcd, graph, mdatoms, fr, |
1932 | mu_tot, enerd, vir, pres, step, FALSE0); |
1933 | EpotC = ems.epot; |
1934 | |
1935 | /* Calc derivative along line */ |
1936 | for (gpc = 0, i = 0; i < n; i++) |
1937 | { |
1938 | gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ |
1939 | } |
1940 | /* Sum the gradient along the line across CPUs */ |
1941 | if (PAR(cr)((cr)->nnodes > 1)) |
1942 | { |
1943 | gmx_sumd(1, &gpc, cr); |
1944 | } |
1945 | |
1946 | /* This is the max amount of increase in energy we tolerate */ |
1947 | tmp = sqrt(GMX_REAL_EPS5.96046448E-08)*fabs(EpotA); |
1948 | |
1949 | /* Accept the step if the energy is lower, or if it is not significantly higher |
1950 | * and the line derivative is still negative. |
1951 | */ |
1952 | if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) |
1953 | { |
1954 | foundlower = TRUE1; |
1955 | /* Great, we found a better energy. Increase step for next iteration |
1956 | * if we are still going down, decrease it otherwise |
1957 | */ |
1958 | if (gpc < 0) |
1959 | { |
1960 | stepsize *= 1.618034; /* The golden section */ |
1961 | } |
1962 | else |
1963 | { |
1964 | stepsize *= 0.618034; /* 1/golden section */ |
1965 | } |
1966 | } |
1967 | else |
1968 | { |
1969 | /* New energy is the same or higher. We will have to do some work |
1970 | * to find a smaller value in the interval. Take smaller step next time! |
1971 | */ |
1972 | foundlower = FALSE0; |
1973 | stepsize *= 0.618034; |
1974 | } |
1975 | |
1976 | /* OK, if we didn't find a lower value we will have to locate one now - there must |
1977 | * be one in the interval [a=0,c]. |
1978 | * The same thing is valid here, though: Don't spend dozens of iterations to find |
1979 | * the line minimum. We try to interpolate based on the derivative at the endpoints, |
1980 | * and only continue until we find a lower value. In most cases this means 1-2 iterations. |
1981 | * |
1982 | * I also have a safeguard for potentially really patological functions so we never |
1983 | * take more than 20 steps before we give up ... |
1984 | * |
1985 | * If we already found a lower value we just skip this step and continue to the update. |
1986 | */ |
1987 | |
1988 | if (!foundlower) |
1989 | { |
1990 | |
1991 | nminstep = 0; |
1992 | do |
1993 | { |
1994 | /* Select a new trial point. |
1995 | * If the derivatives at points a & c have different sign we interpolate to zero, |
1996 | * otherwise just do a bisection. |
1997 | */ |
1998 | |
1999 | if (gpa < 0 && gpc > 0) |
2000 | { |
2001 | b = a + gpa*(a-c)/(gpc-gpa); |
2002 | } |
2003 | else |
2004 | { |
2005 | b = 0.5*(a+c); |
2006 | } |
2007 | |
2008 | /* safeguard if interpolation close to machine accuracy causes errors: |
2009 | * never go outside the interval |
2010 | */ |
2011 | if (b <= a || b >= c) |
2012 | { |
2013 | b = 0.5*(a+c); |
2014 | } |
2015 | |
2016 | /* Take a trial step */ |
2017 | for (i = 0; i < n; i++) |
2018 | { |
2019 | xb[i] = lastx[i] + b*s[i]; |
2020 | } |
2021 | |
2022 | neval++; |
2023 | /* Calculate energy for the trial step */ |
2024 | ems.s.x = (rvec *)xb; |
2025 | ems.f = (rvec *)fb; |
2026 | evaluate_energy(fplog, cr, |
2027 | top_global, &ems, top, |
2028 | inputrec, nrnb, wcycle, gstat, |
2029 | vsite, constr, fcd, graph, mdatoms, fr, |
2030 | mu_tot, enerd, vir, pres, step, FALSE0); |
2031 | EpotB = ems.epot; |
2032 | |
2033 | fnorm = ems.fnorm; |
2034 | |
2035 | for (gpb = 0, i = 0; i < n; i++) |
2036 | { |
2037 | gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ |
2038 | |
2039 | } |
2040 | /* Sum the gradient along the line across CPUs */ |
2041 | if (PAR(cr)((cr)->nnodes > 1)) |
2042 | { |
2043 | gmx_sumd(1, &gpb, cr); |
2044 | } |
2045 | |
2046 | /* Keep one of the intervals based on the value of the derivative at the new point */ |
2047 | if (gpb > 0) |
2048 | { |
2049 | /* Replace c endpoint with b */ |
2050 | EpotC = EpotB; |
2051 | c = b; |
2052 | gpc = gpb; |
2053 | /* swap coord pointers b/c */ |
2054 | xtmp = xb; |
2055 | ftmp = fb; |
2056 | xb = xc; |
2057 | fb = fc; |
2058 | xc = xtmp; |
2059 | fc = ftmp; |
2060 | } |
2061 | else |
2062 | { |
2063 | /* Replace a endpoint with b */ |
2064 | EpotA = EpotB; |
2065 | a = b; |
2066 | gpa = gpb; |
2067 | /* swap coord pointers a/b */ |
2068 | xtmp = xb; |
2069 | ftmp = fb; |
2070 | xb = xa; |
2071 | fb = fa; |
2072 | xa = xtmp; |
2073 | fa = ftmp; |
2074 | } |
2075 | |
2076 | /* |
2077 | * Stop search as soon as we find a value smaller than the endpoints, |
2078 | * or if the tolerance is below machine precision. |
2079 | * Never run more than 20 steps, no matter what. |
2080 | */ |
2081 | nminstep++; |
2082 | } |
2083 | while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); |
2084 | |
2085 | if (fabs(EpotB-Epot0) < GMX_REAL_EPS5.96046448E-08 || nminstep >= 20) |
2086 | { |
2087 | /* OK. We couldn't find a significantly lower energy. |
2088 | * If ncorr==0 this was steepest descent, and then we give up. |
2089 | * If not, reset memory to restart as steepest descent before quitting. |
2090 | */ |
2091 | if (ncorr == 0) |
2092 | { |
2093 | /* Converged */ |
2094 | converged = TRUE1; |
2095 | break; |
2096 | } |
2097 | else |
2098 | { |
2099 | /* Reset memory */ |
2100 | ncorr = 0; |
2101 | /* Search in gradient direction */ |
2102 | for (i = 0; i < n; i++) |
2103 | { |
2104 | dx[point][i] = ff[i]; |
2105 | } |
2106 | /* Reset stepsize */ |
2107 | stepsize = 1.0/fnorm; |
2108 | continue; |
2109 | } |
2110 | } |
2111 | |
2112 | /* Select min energy state of A & C, put the best in xx/ff/Epot |
2113 | */ |
2114 | if (EpotC < EpotA) |
2115 | { |
2116 | Epot = EpotC; |
2117 | /* Use state C */ |
2118 | for (i = 0; i < n; i++) |
2119 | { |
2120 | xx[i] = xc[i]; |
2121 | ff[i] = fc[i]; |
2122 | } |
2123 | stepsize = c; |
2124 | } |
2125 | else |
2126 | { |
2127 | Epot = EpotA; |
2128 | /* Use state A */ |
2129 | for (i = 0; i < n; i++) |
2130 | { |
2131 | xx[i] = xa[i]; |
2132 | ff[i] = fa[i]; |
2133 | } |
2134 | stepsize = a; |
2135 | } |
2136 | |
2137 | } |
2138 | else |
2139 | { |
2140 | /* found lower */ |
2141 | Epot = EpotC; |
2142 | /* Use state C */ |
2143 | for (i = 0; i < n; i++) |
2144 | { |
2145 | xx[i] = xc[i]; |
2146 | ff[i] = fc[i]; |
2147 | } |
2148 | stepsize = c; |
2149 | } |
2150 | |
2151 | /* Update the memory information, and calculate a new |
2152 | * approximation of the inverse hessian |
2153 | */ |
2154 | |
2155 | /* Have new data in Epot, xx, ff */ |
2156 | if (ncorr < nmaxcorr) |
2157 | { |
2158 | ncorr++; |
2159 | } |
2160 | |
2161 | for (i = 0; i < n; i++) |
2162 | { |
2163 | dg[point][i] = lastf[i]-ff[i]; |
2164 | dx[point][i] *= stepsize; |
2165 | } |
2166 | |
2167 | dgdg = 0; |
2168 | dgdx = 0; |
2169 | for (i = 0; i < n; i++) |
2170 | { |
2171 | dgdg += dg[point][i]*dg[point][i]; |
2172 | dgdx += dg[point][i]*dx[point][i]; |
2173 | } |
2174 | |
2175 | diag = dgdx/dgdg; |
2176 | |
2177 | rho[point] = 1.0/dgdx; |
2178 | point++; |
2179 | |
2180 | if (point >= nmaxcorr) |
2181 | { |
2182 | point = 0; |
2183 | } |
2184 | |
2185 | /* Update */ |
2186 | for (i = 0; i < n; i++) |
2187 | { |
2188 | p[i] = ff[i]; |
2189 | } |
2190 | |
2191 | cp = point; |
2192 | |
2193 | /* Recursive update. First go back over the memory points */ |
2194 | for (k = 0; k < ncorr; k++) |
2195 | { |
2196 | cp--; |
2197 | if (cp < 0) |
2198 | { |
2199 | cp = ncorr-1; |
2200 | } |
2201 | |
2202 | sq = 0; |
2203 | for (i = 0; i < n; i++) |
2204 | { |
2205 | sq += dx[cp][i]*p[i]; |
2206 | } |
2207 | |
2208 | alpha[cp] = rho[cp]*sq; |
2209 | |
2210 | for (i = 0; i < n; i++) |
2211 | { |
2212 | p[i] -= alpha[cp]*dg[cp][i]; |
2213 | } |
2214 | } |
2215 | |
2216 | for (i = 0; i < n; i++) |
2217 | { |
2218 | p[i] *= diag; |
2219 | } |
2220 | |
2221 | /* And then go forward again */ |
2222 | for (k = 0; k < ncorr; k++) |
2223 | { |
2224 | yr = 0; |
2225 | for (i = 0; i < n; i++) |
2226 | { |
2227 | yr += p[i]*dg[cp][i]; |
2228 | } |
2229 | |
2230 | beta = rho[cp]*yr; |
2231 | beta = alpha[cp]-beta; |
2232 | |
2233 | for (i = 0; i < n; i++) |
2234 | { |
2235 | p[i] += beta*dx[cp][i]; |
2236 | } |
2237 | |
2238 | cp++; |
2239 | if (cp >= ncorr) |
2240 | { |
2241 | cp = 0; |
2242 | } |
2243 | } |
2244 | |
2245 | for (i = 0; i < n; i++) |
2246 | { |
2247 | if (!frozen[i]) |
2248 | { |
2249 | dx[point][i] = p[i]; |
2250 | } |
2251 | else |
2252 | { |
2253 | dx[point][i] = 0; |
2254 | } |
2255 | } |
2256 | |
2257 | stepsize = 1.0; |
2258 | |
2259 | /* Test whether the convergence criterion is met */ |
2260 | get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); |
2261 | |
2262 | /* Print it if necessary */ |
2263 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2264 | { |
2265 | if (bVerbose) |
2266 | { |
2267 | fprintf(stderrstderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", |
2268 | step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1); |
2269 | } |
2270 | /* Store the new (lower) energies */ |
2271 | upd_mdebin(mdebin, FALSE0, FALSE0, (double)step, |
2272 | mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box, |
2273 | NULL((void*)0), NULL((void*)0), vir, pres, NULL((void*)0), mu_tot, constr); |
2274 | do_log = do_per_step(step, inputrec->nstlog); |
2275 | do_ene = do_per_step(step, inputrec->nstenergy); |
2276 | if (do_log) |
2277 | { |
2278 | print_ebin_header(fplog, step, step, state->lambda[efptFEP]); |
2279 | } |
2280 | print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE0, FALSE0, |
2281 | do_log ? fplog : NULL((void*)0), step, step, eprNORMAL, |
2282 | TRUE1, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
2283 | } |
2284 | |
2285 | /* Send x and E to IMD client, if bIMD is TRUE. */ |
2286 | if (do_IMD(inputrec->bIMD, step, cr, TRUE1, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2287 | { |
2288 | IMD_send_positions(inputrec->imd); |
2289 | } |
2290 | |
2291 | /* Stop when the maximum force lies below tolerance. |
2292 | * If we have reached machine precision, converged is already set to true. |
2293 | */ |
2294 | |
2295 | converged = converged || (fmax < inputrec->em_tol); |
2296 | |
2297 | } /* End of the loop */ |
2298 | |
2299 | /* IMD cleanup, if bIMD is TRUE. */ |
2300 | IMD_finalize(inputrec->bIMD, inputrec->imd); |
2301 | |
2302 | if (converged) |
2303 | { |
2304 | step--; /* we never took that last step in this case */ |
2305 | |
2306 | } |
2307 | if (fmax > inputrec->em_tol) |
2308 | { |
2309 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2310 | { |
2311 | warn_step(stderrstderr, inputrec->em_tol, step-1 == number_steps, FALSE0); |
2312 | warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE0); |
2313 | } |
2314 | converged = FALSE0; |
2315 | } |
2316 | |
2317 | /* If we printed energy and/or logfile last step (which was the last step) |
2318 | * we don't have to do it again, but otherwise print the final values. |
2319 | */ |
2320 | if (!do_log) /* Write final value to log since we didn't do anythin last step */ |
2321 | { |
2322 | print_ebin_header(fplog, step, step, state->lambda[efptFEP]); |
2323 | } |
2324 | if (!do_ene || !do_log) /* Write final energy file entries */ |
2325 | { |
2326 | print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE0, FALSE0, |
2327 | !do_log ? fplog : NULL((void*)0), step, step, eprNORMAL, |
2328 | TRUE1, mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
2329 | } |
2330 | |
2331 | /* Print some stuff... */ |
2332 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2333 | { |
2334 | fprintf(stderrstderr, "\nwriting lowest energy coordinates.\n"); |
2335 | } |
2336 | |
2337 | /* IMPORTANT! |
2338 | * For accurate normal mode calculation it is imperative that we |
2339 | * store the last conformation into the full precision binary trajectory. |
2340 | * |
2341 | * However, we should only do it if we did NOT already write this step |
2342 | * above (which we did if do_x or do_f was true). |
2343 | */ |
2344 | do_x = !do_per_step(step, inputrec->nstxout); |
2345 | do_f = !do_per_step(step, inputrec->nstfout); |
2346 | write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), |
2347 | top_global, inputrec, step, |
2348 | &ems, state, f); |
2349 | |
2350 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2351 | { |
2352 | print_converged(stderrstderr, LBFGS, inputrec->em_tol, step, converged, |
2353 | number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); |
2354 | print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, |
2355 | number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms)); |
2356 | |
2357 | fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); |
2358 | } |
2359 | |
2360 | finish_em(cr, outf, walltime_accounting, wcycle); |
2361 | |
2362 | /* To print the actual number of steps we needed somewhere */ |
2363 | walltime_accounting_set_nsteps_done(walltime_accounting, step); |
2364 | |
2365 | return 0; |
2366 | } /* That's all folks */ |
2367 | |
2368 | |
2369 | double do_steep(FILE *fplog, t_commrec *cr, |
2370 | int nfile, const t_filenm fnm[], |
2371 | const output_env_t gmx_unused__attribute__ ((unused)) oenv, gmx_bool bVerbose, gmx_bool gmx_unused__attribute__ ((unused)) bCompact, |
2372 | int gmx_unused__attribute__ ((unused)) nstglobalcomm, |
2373 | gmx_vsite_t *vsite, gmx_constr_t constr, |
2374 | int gmx_unused__attribute__ ((unused)) stepout, |
2375 | t_inputrec *inputrec, |
2376 | gmx_mtop_t *top_global, t_fcdata *fcd, |
2377 | t_state *state_global, |
2378 | t_mdatoms *mdatoms, |
2379 | t_nrnb *nrnb, gmx_wallcycle_t wcycle, |
2380 | gmx_edsam_t gmx_unused__attribute__ ((unused)) ed, |
2381 | t_forcerec *fr, |
2382 | int gmx_unused__attribute__ ((unused)) repl_ex_nst, int gmx_unused__attribute__ ((unused)) repl_ex_nex, int gmx_unused__attribute__ ((unused)) repl_ex_seed, |
2383 | gmx_membed_t gmx_unused__attribute__ ((unused)) membed, |
2384 | real gmx_unused__attribute__ ((unused)) cpt_period, real gmx_unused__attribute__ ((unused)) max_hours, |
2385 | const char gmx_unused__attribute__ ((unused)) *deviceOptions, |
2386 | int imdport, |
2387 | unsigned long gmx_unused__attribute__ ((unused)) Flags, |
2388 | gmx_walltime_accounting_t walltime_accounting) |
2389 | { |
2390 | const char *SD = "Steepest Descents"; |
2391 | em_state_t *s_min, *s_try; |
2392 | rvec *f_global; |
2393 | gmx_localtop_t *top; |
2394 | gmx_enerdata_t *enerd; |
2395 | rvec *f; |
2396 | gmx_global_stat_t gstat; |
2397 | t_graph *graph; |
2398 | real stepsize, constepsize; |
2399 | real ustep, fnormn; |
2400 | gmx_mdoutf_t outf; |
2401 | t_mdebin *mdebin; |
2402 | gmx_bool bDone, bAbort, do_x, do_f; |
2403 | tensor vir, pres; |
2404 | rvec mu_tot; |
2405 | int nsteps; |
2406 | int count = 0; |
2407 | int steps_accepted = 0; |
2408 | /* not used */ |
2409 | real terminate = 0; |
2410 | |
2411 | s_min = init_em_state(); |
2412 | s_try = init_em_state(); |
2413 | |
2414 | /* Init em and store the local state in s_try */ |
2415 | init_em(fplog, SD, cr, inputrec, |
2416 | state_global, top_global, s_try, &top, &f, &f_global, |
2417 | nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, |
2418 | nfile, fnm, &outf, &mdebin, imdport, Flags); |
2419 | |
2420 | /* Print to log file */ |
2421 | print_em_start(fplog, cr, walltime_accounting, wcycle, SD); |
2422 | |
2423 | /* Set variables for stepsize (in nm). This is the largest |
2424 | * step that we are going to make in any direction. |
2425 | */ |
2426 | ustep = inputrec->em_stepsize; |
2427 | stepsize = 0; |
2428 | |
2429 | /* Max number of steps */ |
2430 | nsteps = inputrec->nsteps; |
2431 | |
2432 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2433 | { |
2434 | /* Print to the screen */ |
2435 | sp_header(stderrstderr, SD, inputrec->em_tol, nsteps); |
2436 | } |
2437 | if (fplog) |
2438 | { |
2439 | sp_header(fplog, SD, inputrec->em_tol, nsteps); |
2440 | } |
2441 | |
2442 | /**** HERE STARTS THE LOOP **** |
2443 | * count is the counter for the number of steps |
2444 | * bDone will be TRUE when the minimization has converged |
2445 | * bAbort will be TRUE when nsteps steps have been performed or when |
2446 | * the stepsize becomes smaller than is reasonable for machine precision |
2447 | */ |
2448 | count = 0; |
2449 | bDone = FALSE0; |
2450 | bAbort = FALSE0; |
2451 | while (!bDone && !bAbort) |
2452 | { |
2453 | bAbort = (nsteps >= 0) && (count == nsteps); |
2454 | |
2455 | /* set new coordinates, except for first step */ |
2456 | if (count > 0) |
2457 | { |
2458 | do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, |
2459 | s_min, stepsize, s_min->f, s_try, |
2460 | constr, top, nrnb, wcycle, count); |
2461 | } |
2462 | |
2463 | evaluate_energy(fplog, cr, |
2464 | top_global, s_try, top, |
2465 | inputrec, nrnb, wcycle, gstat, |
2466 | vsite, constr, fcd, graph, mdatoms, fr, |
2467 | mu_tot, enerd, vir, pres, count, count == 0); |
2468 | |
2469 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2470 | { |
2471 | print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]); |
2472 | } |
2473 | |
2474 | if (count == 0) |
2475 | { |
2476 | s_min->epot = s_try->epot + 1; |
2477 | } |
2478 | |
2479 | /* Print it if necessary */ |
2480 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2481 | { |
2482 | if (bVerbose) |
2483 | { |
2484 | fprintf(stderrstderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", |
2485 | count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, |
2486 | (s_try->epot < s_min->epot) ? '\n' : '\r'); |
2487 | } |
2488 | |
2489 | if (s_try->epot < s_min->epot) |
2490 | { |
2491 | /* Store the new (lower) energies */ |
2492 | upd_mdebin(mdebin, FALSE0, FALSE0, (double)count, |
2493 | mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, |
2494 | s_try->s.box, NULL((void*)0), NULL((void*)0), vir, pres, NULL((void*)0), mu_tot, constr); |
2495 | |
2496 | /* Prepare IMD energy record, if bIMD is TRUE. */ |
2497 | IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE1); |
2498 | |
2499 | print_ebin(mdoutf_get_fp_ene(outf), TRUE1, |
2500 | do_per_step(steps_accepted, inputrec->nstdisreout), |
2501 | do_per_step(steps_accepted, inputrec->nstorireout), |
2502 | fplog, count, count, eprNORMAL, TRUE1, |
2503 | mdebin, fcd, &(top_global->groups), &(inputrec->opts)); |
2504 | fflush(fplog); |
2505 | } |
2506 | } |
2507 | |
2508 | /* Now if the new energy is smaller than the previous... |
2509 | * or if this is the first step! |
2510 | * or if we did random steps! |
2511 | */ |
2512 | |
2513 | if ( (count == 0) || (s_try->epot < s_min->epot) ) |
2514 | { |
2515 | steps_accepted++; |
2516 | |
2517 | /* Test whether the convergence criterion is met... */ |
2518 | bDone = (s_try->fmax < inputrec->em_tol); |
2519 | |
2520 | /* Copy the arrays for force, positions and energy */ |
2521 | /* The 'Min' array always holds the coords and forces of the minimal |
2522 | sampled energy */ |
2523 | swap_em_state(s_min, s_try); |
2524 | if (count > 0) |
2525 | { |
2526 | ustep *= 1.2; |
2527 | } |
2528 | |
2529 | /* Write to trn, if necessary */ |
2530 | do_x = do_per_step(steps_accepted, inputrec->nstxout); |
2531 | do_f = do_per_step(steps_accepted, inputrec->nstfout); |
2532 | write_em_traj(fplog, cr, outf, do_x, do_f, NULL((void*)0), |
2533 | top_global, inputrec, count, |
2534 | s_min, state_global, f_global); |
2535 | } |
2536 | else |
2537 | { |
2538 | /* If energy is not smaller make the step smaller... */ |
2539 | ustep *= 0.5; |
2540 | |
2541 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && s_min->s.ddp_count != cr->dd->ddp_count) |
2542 | { |
2543 | /* Reload the old state */ |
2544 | em_dd_partition_system(fplog, count, cr, top_global, inputrec, |
2545 | s_min, top, mdatoms, fr, vsite, constr, |
2546 | nrnb, wcycle); |
2547 | } |
2548 | } |
2549 | |
2550 | /* Determine new step */ |
2551 | stepsize = ustep/s_min->fmax; |
2552 | |
2553 | /* Check if stepsize is too small, with 1 nm as a characteristic length */ |
2554 | #ifdef GMX_DOUBLE |
2555 | if (count == nsteps || ustep < 1e-12) |
2556 | #else |
2557 | if (count == nsteps || ustep < 1e-6) |
2558 | #endif |
2559 | { |
2560 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2561 | { |
2562 | warn_step(stderrstderr, inputrec->em_tol, count == nsteps, constr != NULL((void*)0)); |
2563 | warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL((void*)0)); |
2564 | } |
2565 | bAbort = TRUE1; |
2566 | } |
2567 | |
2568 | /* Send IMD energies and positions, if bIMD is TRUE. */ |
2569 | if (do_IMD(inputrec->bIMD, count, cr, TRUE1, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2570 | { |
2571 | IMD_send_positions(inputrec->imd); |
2572 | } |
2573 | |
2574 | count++; |
2575 | } /* End of the loop */ |
2576 | |
2577 | /* IMD cleanup, if bIMD is TRUE. */ |
2578 | IMD_finalize(inputrec->bIMD, inputrec->imd); |
2579 | |
2580 | /* Print some data... */ |
2581 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2582 | { |
2583 | fprintf(stderrstderr, "\nwriting lowest energy coordinates.\n"); |
2584 | } |
2585 | write_em_traj(fplog, cr, outf, TRUE1, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), |
2586 | top_global, inputrec, count, |
2587 | s_min, state_global, f_global); |
2588 | |
2589 | fnormn = s_min->fnorm/sqrt(state_global->natoms); |
2590 | |
2591 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2592 | { |
2593 | print_converged(stderrstderr, SD, inputrec->em_tol, count, bDone, nsteps, |
2594 | s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); |
2595 | print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, |
2596 | s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); |
2597 | } |
2598 | |
2599 | finish_em(cr, outf, walltime_accounting, wcycle); |
2600 | |
2601 | /* To print the actual number of steps we needed somewhere */ |
2602 | inputrec->nsteps = count; |
2603 | |
2604 | walltime_accounting_set_nsteps_done(walltime_accounting, count); |
2605 | |
2606 | return 0; |
2607 | } /* That's all folks */ |
2608 | |
2609 | |
2610 | double do_nm(FILE *fplog, t_commrec *cr, |
2611 | int nfile, const t_filenm fnm[], |
2612 | const output_env_t gmx_unused__attribute__ ((unused)) oenv, gmx_bool bVerbose, gmx_bool gmx_unused__attribute__ ((unused)) bCompact, |
2613 | int gmx_unused__attribute__ ((unused)) nstglobalcomm, |
2614 | gmx_vsite_t *vsite, gmx_constr_t constr, |
2615 | int gmx_unused__attribute__ ((unused)) stepout, |
2616 | t_inputrec *inputrec, |
2617 | gmx_mtop_t *top_global, t_fcdata *fcd, |
2618 | t_state *state_global, |
2619 | t_mdatoms *mdatoms, |
2620 | t_nrnb *nrnb, gmx_wallcycle_t wcycle, |
2621 | gmx_edsam_t gmx_unused__attribute__ ((unused)) ed, |
2622 | t_forcerec *fr, |
2623 | int gmx_unused__attribute__ ((unused)) repl_ex_nst, int gmx_unused__attribute__ ((unused)) repl_ex_nex, int gmx_unused__attribute__ ((unused)) repl_ex_seed, |
2624 | gmx_membed_t gmx_unused__attribute__ ((unused)) membed, |
2625 | real gmx_unused__attribute__ ((unused)) cpt_period, real gmx_unused__attribute__ ((unused)) max_hours, |
2626 | const char gmx_unused__attribute__ ((unused)) *deviceOptions, |
2627 | int imdport, |
2628 | unsigned long gmx_unused__attribute__ ((unused)) Flags, |
2629 | gmx_walltime_accounting_t walltime_accounting) |
2630 | { |
2631 | const char *NM = "Normal Mode Analysis"; |
2632 | gmx_mdoutf_t outf; |
2633 | int natoms, atom, d; |
2634 | int nnodes, node; |
2635 | rvec *f_global; |
2636 | gmx_localtop_t *top; |
2637 | gmx_enerdata_t *enerd; |
2638 | rvec *f; |
2639 | gmx_global_stat_t gstat; |
2640 | t_graph *graph; |
2641 | real t, t0, lambda, lam0; |
2642 | gmx_bool bNS; |
2643 | tensor vir, pres; |
2644 | rvec mu_tot; |
2645 | rvec *fneg, *dfdx; |
2646 | gmx_bool bSparse; /* use sparse matrix storage format */ |
2647 | size_t sz = 0; |
2648 | gmx_sparsematrix_t * sparse_matrix = NULL((void*)0); |
2649 | real * full_matrix = NULL((void*)0); |
2650 | em_state_t * state_work; |
2651 | |
2652 | /* added with respect to mdrun */ |
2653 | int i, j, k, row, col; |
2654 | real der_range = 10.0*sqrt(GMX_REAL_EPS5.96046448E-08); |
2655 | real x_min; |
2656 | real fnorm, fmax; |
2657 | |
2658 | if (constr != NULL((void*)0)) |
2659 | { |
2660 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 2660, "Constraints present with Normal Mode Analysis, this combination is not supported"); |
2661 | } |
2662 | |
2663 | state_work = init_em_state(); |
2664 | |
2665 | /* Init em and store the local state in state_minimum */ |
2666 | init_em(fplog, NM, cr, inputrec, |
2667 | state_global, top_global, state_work, &top, |
2668 | &f, &f_global, |
2669 | nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, |
2670 | nfile, fnm, &outf, NULL((void*)0), imdport, Flags); |
2671 | |
2672 | natoms = top_global->natoms; |
2673 | snew(fneg, natoms)(fneg) = save_calloc("fneg", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 2673, (natoms), sizeof(*(fneg))); |
2674 | snew(dfdx, natoms)(dfdx) = save_calloc("dfdx", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 2674, (natoms), sizeof(*(dfdx))); |
2675 | |
2676 | #ifndef GMX_DOUBLE |
2677 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2678 | { |
2679 | fprintf(stderrstderr, |
2680 | "NOTE: This version of Gromacs has been compiled in single precision,\n" |
2681 | " which MIGHT not be accurate enough for normal mode analysis.\n" |
2682 | " Gromacs now uses sparse matrix storage, so the memory requirements\n" |
2683 | " are fairly modest even if you recompile in double precision.\n\n"); |
2684 | } |
2685 | #endif |
2686 | |
2687 | /* Check if we can/should use sparse storage format. |
2688 | * |
2689 | * Sparse format is only useful when the Hessian itself is sparse, which it |
2690 | * will be when we use a cutoff. |
2691 | * For small systems (n<1000) it is easier to always use full matrix format, though. |
2692 | */ |
2693 | if (EEL_FULL(fr->eeltype)((((fr->eeltype) == eelPME || (fr->eeltype) == eelPMESWITCH || (fr->eeltype) == eelPMEUSER || (fr->eeltype) == eelPMEUSERSWITCH || (fr->eeltype) == eelP3M_AD) || (fr->eeltype) == eelEWALD ) || (fr->eeltype) == eelPOISSON) || fr->rlist == 0.0) |
2694 | { |
2695 | md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); |
2696 | bSparse = FALSE0; |
2697 | } |
2698 | else if (top_global->natoms < 1000) |
2699 | { |
2700 | md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms); |
2701 | bSparse = FALSE0; |
2702 | } |
2703 | else |
2704 | { |
2705 | md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); |
2706 | bSparse = TRUE1; |
2707 | } |
2708 | |
2709 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2710 | { |
2711 | sz = DIM3*top_global->natoms; |
2712 | |
2713 | fprintf(stderrstderr, "Allocating Hessian memory...\n\n"); |
2714 | |
2715 | if (bSparse) |
2716 | { |
2717 | sparse_matrix = gmx_sparsematrix_init(sz); |
2718 | sparse_matrix->compressed_symmetric = TRUE1; |
2719 | } |
2720 | else |
2721 | { |
2722 | snew(full_matrix, sz*sz)(full_matrix) = save_calloc("full_matrix", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 2722, (sz*sz), sizeof(*(full_matrix))); |
2723 | } |
2724 | } |
2725 | |
2726 | /* Initial values */ |
2727 | t0 = inputrec->init_t; |
2728 | lam0 = inputrec->fepvals->init_lambda; |
2729 | t = t0; |
2730 | lambda = lam0; |
2731 | |
2732 | init_nrnb(nrnb); |
2733 | |
2734 | where()_where("/home/alexxy/Develop/gromacs/src/gromacs/mdlib/minimize.c" , 2734); |
2735 | |
2736 | /* Write start time and temperature */ |
2737 | print_em_start(fplog, cr, walltime_accounting, wcycle, NM); |
2738 | |
2739 | /* fudge nr of steps to nr of atoms */ |
2740 | inputrec->nsteps = natoms*2; |
2741 | |
2742 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2743 | { |
2744 | fprintf(stderrstderr, "starting normal mode calculation '%s'\n%d steps.\n\n", |
2745 | *(top_global->name), (int)inputrec->nsteps); |
2746 | } |
2747 | |
2748 | nnodes = cr->nnodes; |
2749 | |
2750 | /* Make evaluate_energy do a single node force calculation */ |
2751 | cr->nnodes = 1; |
2752 | evaluate_energy(fplog, cr, |
2753 | top_global, state_work, top, |
2754 | inputrec, nrnb, wcycle, gstat, |
2755 | vsite, constr, fcd, graph, mdatoms, fr, |
2756 | mu_tot, enerd, vir, pres, -1, TRUE1); |
2757 | cr->nnodes = nnodes; |
2758 | |
2759 | /* if forces are not small, warn user */ |
2760 | get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); |
2761 | |
2762 | md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); |
2763 | if (state_work->fmax > 1.0e-3) |
2764 | { |
2765 | md_print_info(cr, fplog, |
2766 | "The force is probably not small enough to " |
2767 | "ensure that you are at a minimum.\n" |
2768 | "Be aware that negative eigenvalues may occur\n" |
2769 | "when the resulting matrix is diagonalized.\n\n"); |
2770 | } |
2771 | |
2772 | /*********************************************************** |
2773 | * |
2774 | * Loop over all pairs in matrix |
2775 | * |
2776 | * do_force called twice. Once with positive and |
2777 | * once with negative displacement |
2778 | * |
2779 | ************************************************************/ |
2780 | |
2781 | /* Steps are divided one by one over the nodes */ |
2782 | for (atom = cr->nodeid; atom < natoms; atom += nnodes) |
2783 | { |
2784 | |
2785 | for (d = 0; d < DIM3; d++) |
2786 | { |
2787 | x_min = state_work->s.x[atom][d]; |
2788 | |
2789 | state_work->s.x[atom][d] = x_min - der_range; |
2790 | |
2791 | /* Make evaluate_energy do a single node force calculation */ |
2792 | cr->nnodes = 1; |
2793 | evaluate_energy(fplog, cr, |
2794 | top_global, state_work, top, |
2795 | inputrec, nrnb, wcycle, gstat, |
2796 | vsite, constr, fcd, graph, mdatoms, fr, |
2797 | mu_tot, enerd, vir, pres, atom*2, FALSE0); |
2798 | |
2799 | for (i = 0; i < natoms; i++) |
2800 | { |
2801 | copy_rvec(state_work->f[i], fneg[i]); |
2802 | } |
2803 | |
2804 | state_work->s.x[atom][d] = x_min + der_range; |
2805 | |
2806 | evaluate_energy(fplog, cr, |
2807 | top_global, state_work, top, |
2808 | inputrec, nrnb, wcycle, gstat, |
2809 | vsite, constr, fcd, graph, mdatoms, fr, |
2810 | mu_tot, enerd, vir, pres, atom*2+1, FALSE0); |
2811 | cr->nnodes = nnodes; |
2812 | |
2813 | /* x is restored to original */ |
2814 | state_work->s.x[atom][d] = x_min; |
2815 | |
2816 | for (j = 0; j < natoms; j++) |
2817 | { |
2818 | for (k = 0; (k < DIM3); k++) |
2819 | { |
2820 | dfdx[j][k] = |
2821 | -(state_work->f[j][k] - fneg[j][k])/(2*der_range); |
2822 | } |
2823 | } |
2824 | |
2825 | if (!MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2826 | { |
2827 | #ifdef GMX_MPI |
2828 | #ifdef GMX_DOUBLE |
2829 | #define mpi_type MPI_DOUBLETMPI_DOUBLE |
2830 | #else |
2831 | #define mpi_type MPI_FLOATTMPI_FLOAT |
2832 | #endif |
2833 | MPI_SendtMPI_Send(dfdx[0], natoms*DIM3, mpi_type, MASTERNODE(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)), cr->nodeid, |
2834 | cr->mpi_comm_mygroup); |
2835 | #endif |
2836 | } |
2837 | else |
2838 | { |
2839 | for (node = 0; (node < nnodes && atom+node < natoms); node++) |
2840 | { |
2841 | if (node > 0) |
2842 | { |
2843 | #ifdef GMX_MPI |
2844 | MPI_Status stat; |
2845 | MPI_RecvtMPI_Recv(dfdx[0], natoms*DIM3, mpi_type, node, node, |
2846 | cr->mpi_comm_mygroup, &stat); |
2847 | #undef mpi_type |
2848 | #endif |
2849 | } |
2850 | |
2851 | row = (atom + node)*DIM3 + d; |
2852 | |
2853 | for (j = 0; j < natoms; j++) |
2854 | { |
2855 | for (k = 0; k < DIM3; k++) |
2856 | { |
2857 | col = j*DIM3 + k; |
2858 | |
2859 | if (bSparse) |
2860 | { |
2861 | if (col >= row && dfdx[j][k] != 0.0) |
2862 | { |
2863 | gmx_sparsematrix_increment_value(sparse_matrix, |
2864 | row, col, dfdx[j][k]); |
2865 | } |
2866 | } |
2867 | else |
2868 | { |
2869 | full_matrix[row*sz+col] = dfdx[j][k]; |
2870 | } |
2871 | } |
2872 | } |
2873 | } |
2874 | } |
2875 | |
2876 | if (bVerbose && fplog) |
2877 | { |
2878 | fflush(fplog); |
2879 | } |
2880 | } |
2881 | /* write progress */ |
2882 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)) && bVerbose) |
2883 | { |
2884 | fprintf(stderrstderr, "\rFinished step %d out of %d", |
2885 | min(atom+nnodes, natoms)(((atom+nnodes) < (natoms)) ? (atom+nnodes) : (natoms) ), natoms); |
2886 | fflush(stderrstderr); |
2887 | } |
2888 | } |
2889 | |
2890 | if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1))) |
2891 | { |
2892 | fprintf(stderrstderr, "\n\nWriting Hessian...\n"); |
2893 | gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); |
2894 | } |
2895 | |
2896 | finish_em(cr, outf, walltime_accounting, wcycle); |
2897 | |
2898 | walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2); |
2899 | |
2900 | return 0; |
2901 | } |