1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
54 #include "gmx_fatal.h"
66 #include "md_support.h"
72 #include "gmx_wallcycle.h"
73 #include "mtop_util.h"
77 #include "gmx_omp_nthreads.h"
80 #include "gromacs/linearalgebra/mtxio.h"
81 #include "gromacs/linearalgebra/sparsematrix.h"
92 static em_state_t *init_em_state()
98 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
99 snew(ems->s.lambda, efptNR);
104 static void print_em_start(FILE *fplog, t_commrec *cr, gmx_runtime_t *runtime,
105 gmx_wallcycle_t wcycle,
110 runtime_start(runtime);
112 sprintf(buf, "Started %s", name);
113 print_date_and_time(fplog, cr->nodeid, buf, NULL);
115 wallcycle_start(wcycle, ewcRUN);
117 static void em_time_end(gmx_runtime_t *runtime,
118 gmx_wallcycle_t wcycle)
120 wallcycle_stop(wcycle, ewcRUN);
122 runtime_end(runtime);
125 static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
128 fprintf(out, "%s:\n", minimizer);
129 fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol);
130 fprintf(out, " Number of steps = %12d\n", nsteps);
133 static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
139 "\nEnergy minimization reached the maximum number"
140 "of steps before the forces reached the requested"
141 "precision Fmax < %g.\n", ftol);
146 "\nEnergy minimization has stopped, but the forces have"
147 "not converged to the requested precision Fmax < %g (which"
148 "may not be possible for your system). It stopped"
149 "because the algorithm tried to make a new step whose size"
150 "was too small, or there was no change in the energy since"
151 "last step. Either way, we regard the minimization as"
152 "converged to within the available machine precision,"
153 "given your starting configuration and EM parameters.\n%s%s",
155 sizeof(real) < sizeof(double) ?
156 "\nDouble precision normally gives you higher accuracy, but"
157 "this is often not needed for preparing to run molecular"
161 "You might need to increase your constraint accuracy, or turn\n"
162 "off constraints altogether (set constraints = none in mdp file)\n" :
165 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
170 static void print_converged(FILE *fp, const char *alg, real ftol,
171 gmx_large_int_t count, gmx_bool bDone, gmx_large_int_t nsteps,
172 real epot, real fmax, int nfmax, real fnorm)
174 char buf[STEPSTRSIZE];
178 fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
179 alg, ftol, gmx_step_str(count, buf));
181 else if (count < nsteps)
183 fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
184 "but did not reach the requested Fmax < %g.\n",
185 alg, gmx_step_str(count, buf), ftol);
189 fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
190 alg, ftol, gmx_step_str(count, buf));
194 fprintf(fp, "Potential Energy = %21.14e\n", epot);
195 fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1);
196 fprintf(fp, "Norm of force = %21.14e\n", fnorm);
198 fprintf(fp, "Potential Energy = %14.7e\n", epot);
199 fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1);
200 fprintf(fp, "Norm of force = %14.7e\n", fnorm);
204 static void get_f_norm_max(t_commrec *cr,
205 t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
206 real *fnorm, real *fmax, int *a_fmax)
209 real fmax2, fmax2_0, fam;
210 int la_max, a_max, start, end, i, m, gf;
212 /* This routine finds the largest force and returns it.
213 * On parallel machines the global max is taken.
219 start = mdatoms->start;
220 end = mdatoms->homenr + start;
221 if (mdatoms->cFREEZE)
223 for (i = start; i < end; i++)
225 gf = mdatoms->cFREEZE[i];
227 for (m = 0; m < DIM; m++)
229 if (!opts->nFreeze[gf][m])
244 for (i = start; i < end; i++)
256 if (la_max >= 0 && DOMAINDECOMP(cr))
258 a_max = cr->dd->gatindex[la_max];
266 snew(sum, 2*cr->nnodes+1);
267 sum[2*cr->nodeid] = fmax2;
268 sum[2*cr->nodeid+1] = a_max;
269 sum[2*cr->nnodes] = fnorm2;
270 gmx_sumd(2*cr->nnodes+1, sum, cr);
271 fnorm2 = sum[2*cr->nnodes];
272 /* Determine the global maximum */
273 for (i = 0; i < cr->nnodes; i++)
275 if (sum[2*i] > fmax2)
278 a_max = (int)(sum[2*i+1] + 0.5);
286 *fnorm = sqrt(fnorm2);
298 static void get_state_f_norm_max(t_commrec *cr,
299 t_grpopts *opts, t_mdatoms *mdatoms,
302 get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
305 void init_em(FILE *fplog, const char *title,
306 t_commrec *cr, t_inputrec *ir,
307 t_state *state_global, gmx_mtop_t *top_global,
308 em_state_t *ems, gmx_localtop_t **top,
309 rvec **f, rvec **f_global,
310 t_nrnb *nrnb, rvec mu_tot,
311 t_forcerec *fr, gmx_enerdata_t **enerd,
312 t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
313 gmx_vsite_t *vsite, gmx_constr_t constr,
314 int nfile, const t_filenm fnm[],
315 gmx_mdoutf_t **outf, t_mdebin **mdebin)
317 int start, homenr, i;
322 fprintf(fplog, "Initiating %s\n", title);
325 state_global->ngtc = 0;
327 /* Initialize lambda variables */
328 initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
332 if (DOMAINDECOMP(cr))
334 *top = dd_init_local_top(top_global);
336 dd_init_local_state(cr->dd, state_global, &ems->s);
340 /* Distribute the charge groups over the nodes from the master node */
341 dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
342 state_global, top_global, ir,
343 &ems->s, &ems->f, mdatoms, *top,
344 fr, vsite, NULL, constr,
346 dd_store_state(cr->dd, &ems->s);
350 snew(*f_global, top_global->natoms);
360 snew(*f, top_global->natoms);
362 /* Just copy the state */
363 ems->s = *state_global;
364 snew(ems->s.x, ems->s.nalloc);
365 snew(ems->f, ems->s.nalloc);
366 for (i = 0; i < state_global->natoms; i++)
368 copy_rvec(state_global->x[i], ems->s.x[i]);
370 copy_mat(state_global->box, ems->s.box);
372 if (PAR(cr) && ir->eI != eiNM)
374 /* Initialize the particle decomposition and split the topology */
375 *top = split_system(fplog, top_global, ir, cr);
377 pd_cg_range(cr, &fr->cg0, &fr->hcg);
381 *top = gmx_mtop_generate_local_top(top_global, ir);
385 forcerec_set_excl_load(fr, *top, cr);
387 init_bonded_thread_force_reduction(fr, &(*top)->idef);
389 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
391 *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
400 pd_at_range(cr, &start, &homenr);
406 homenr = top_global->natoms;
408 atoms2md(top_global, ir, 0, NULL, start, homenr, mdatoms);
409 update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
413 set_vsite_top(vsite, *top, mdatoms, cr);
419 if (ir->eConstrAlg == econtSHAKE &&
420 gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
422 gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
423 econstr_names[econtSHAKE], econstr_names[econtLINCS]);
426 if (!DOMAINDECOMP(cr))
428 set_constraints(constr, *top, ir, mdatoms, cr);
431 if (!ir->bContinuation)
433 /* Constrain the starting coordinates */
435 constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
436 ir, NULL, cr, -1, 0, mdatoms,
437 ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
438 ems->s.lambda[efptFEP], &dvdl_constr,
439 NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
445 *gstat = global_stat_init(ir);
448 *outf = init_mdoutf(nfile, fnm, 0, cr, ir, NULL);
451 init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
456 /* Init bin for energy stuff */
457 *mdebin = init_mdebin((*outf)->fp_ene, top_global, ir, NULL);
461 calc_shifts(ems->s.box, fr->shift_vec);
464 static void finish_em(t_commrec *cr, gmx_mdoutf_t *outf,
465 gmx_runtime_t *runtime, gmx_wallcycle_t wcycle)
467 if (!(cr->duty & DUTY_PME))
469 /* Tell the PME only node to finish */
470 gmx_pme_send_finish(cr);
475 em_time_end(runtime, wcycle);
478 static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
487 static void copy_em_coords(em_state_t *ems, t_state *state)
491 for (i = 0; (i < state->natoms); i++)
493 copy_rvec(ems->s.x[i], state->x[i]);
497 static void write_em_traj(FILE *fplog, t_commrec *cr,
499 gmx_bool bX, gmx_bool bF, const char *confout,
500 gmx_mtop_t *top_global,
501 t_inputrec *ir, gmx_large_int_t step,
503 t_state *state_global, rvec *f_global)
507 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
509 copy_em_coords(state, state_global);
516 mdof_flags |= MDOF_X;
520 mdof_flags |= MDOF_F;
522 write_traj(fplog, cr, outf, mdof_flags,
523 top_global, step, (double)step,
524 &state->s, state_global, state->f, f_global, NULL, NULL);
526 if (confout != NULL && MASTER(cr))
528 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
530 /* Make molecules whole only for confout writing */
531 do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
535 write_sto_conf_mtop(confout,
536 *top_global->name, top_global,
537 state_global->x, NULL, ir->ePBC, state_global->box);
541 static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
543 em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
544 gmx_constr_t constr, gmx_localtop_t *top,
545 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
546 gmx_large_int_t count)
558 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
560 gmx_incons("state mismatch in do_em_step");
563 s2->flags = s1->flags;
565 if (s2->nalloc != s1->nalloc)
567 s2->nalloc = s1->nalloc;
568 srenew(s2->x, s1->nalloc);
569 srenew(ems2->f, s1->nalloc);
570 if (s2->flags & (1<<estCGP))
572 srenew(s2->cg_p, s1->nalloc);
576 s2->natoms = s1->natoms;
577 copy_mat(s1->box, s2->box);
578 /* Copy free energy state */
579 for (i = 0; i < efptNR; i++)
581 s2->lambda[i] = s1->lambda[i];
583 copy_mat(s1->box, s2->box);
586 end = md->start + md->homenr;
591 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
596 #pragma omp for schedule(static) nowait
597 for (i = start; i < end; i++)
603 for (m = 0; m < DIM; m++)
605 if (ir->opts.nFreeze[gf][m])
611 x2[i][m] = x1[i][m] + a*f[i][m];
616 if (s2->flags & (1<<estCGP))
618 /* Copy the CG p vector */
621 #pragma omp for schedule(static) nowait
622 for (i = start; i < end; i++)
624 copy_rvec(x1[i], x2[i]);
628 if (DOMAINDECOMP(cr))
630 s2->ddp_count = s1->ddp_count;
631 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
634 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
635 srenew(s2->cg_gl, s2->cg_gl_nalloc);
638 s2->ncg_gl = s1->ncg_gl;
639 #pragma omp for schedule(static) nowait
640 for (i = 0; i < s2->ncg_gl; i++)
642 s2->cg_gl[i] = s1->cg_gl[i];
644 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
650 wallcycle_start(wcycle, ewcCONSTR);
652 constrain(NULL, TRUE, TRUE, constr, &top->idef,
653 ir, NULL, cr, count, 0, md,
654 s1->x, s2->x, NULL, bMolPBC, s2->box,
655 s2->lambda[efptBONDED], &dvdl_constr,
656 NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
657 wallcycle_stop(wcycle, ewcCONSTR);
661 static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
662 gmx_mtop_t *top_global, t_inputrec *ir,
663 em_state_t *ems, gmx_localtop_t *top,
664 t_mdatoms *mdatoms, t_forcerec *fr,
665 gmx_vsite_t *vsite, gmx_constr_t constr,
666 t_nrnb *nrnb, gmx_wallcycle_t wcycle)
668 /* Repartition the domain decomposition */
669 wallcycle_start(wcycle, ewcDOMDEC);
670 dd_partition_system(fplog, step, cr, FALSE, 1,
671 NULL, top_global, ir,
673 mdatoms, top, fr, vsite, NULL, constr,
674 nrnb, wcycle, FALSE);
675 dd_store_state(cr->dd, &ems->s);
676 wallcycle_stop(wcycle, ewcDOMDEC);
679 static void evaluate_energy(FILE *fplog, t_commrec *cr,
680 gmx_mtop_t *top_global,
681 em_state_t *ems, gmx_localtop_t *top,
682 t_inputrec *inputrec,
683 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
684 gmx_global_stat_t gstat,
685 gmx_vsite_t *vsite, gmx_constr_t constr,
687 t_graph *graph, t_mdatoms *mdatoms,
688 t_forcerec *fr, rvec mu_tot,
689 gmx_enerdata_t *enerd, tensor vir, tensor pres,
690 gmx_large_int_t count, gmx_bool bFirst)
695 tensor force_vir, shake_vir, ekin;
696 real dvdl_constr, prescorr, enercorr, dvdlcorr;
699 /* Set the time to the initial time, the time does not change during EM */
700 t = inputrec->init_t;
703 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
705 /* This the first state or an old state used before the last ns */
711 if (inputrec->nstlist > 0)
715 else if (inputrec->nstlist == -1)
717 nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x);
720 gmx_sumi(1, &nabnsb, cr);
728 construct_vsites(vsite, ems->s.x, 1, NULL,
729 top->idef.iparams, top->idef.il,
730 fr->ePBC, fr->bMolPBC, graph, cr, ems->s.box);
733 if (DOMAINDECOMP(cr))
737 /* Repartition the domain decomposition */
738 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
739 ems, top, mdatoms, fr, vsite, constr,
744 /* Calc force & energy on new trial position */
745 /* do_force always puts the charge groups in the box and shifts again
746 * We do not unshift, so molecules are always whole in congrad.c
748 do_force(fplog, cr, inputrec,
749 count, nrnb, wcycle, top, &top_global->groups,
750 ems->s.box, ems->s.x, &ems->s.hist,
751 ems->f, force_vir, mdatoms, enerd, fcd,
752 ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
753 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
754 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
755 (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
757 /* Clear the unused shake virial and pressure */
758 clear_mat(shake_vir);
761 /* Communicate stuff when parallel */
762 if (PAR(cr) && inputrec->eI != eiNM)
764 wallcycle_start(wcycle, ewcMoveE);
766 global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
767 inputrec, NULL, NULL, NULL, 1, &terminate,
768 top_global, &ems->s, FALSE,
774 wallcycle_stop(wcycle, ewcMoveE);
777 /* Calculate long range corrections to pressure and energy */
778 calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
779 pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
780 enerd->term[F_DISPCORR] = enercorr;
781 enerd->term[F_EPOT] += enercorr;
782 enerd->term[F_PRES] += prescorr;
783 enerd->term[F_DVDL] += dvdlcorr;
785 ems->epot = enerd->term[F_EPOT];
789 /* Project out the constraint components of the force */
790 wallcycle_start(wcycle, ewcCONSTR);
792 constrain(NULL, FALSE, FALSE, constr, &top->idef,
793 inputrec, NULL, cr, count, 0, mdatoms,
794 ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
795 ems->s.lambda[efptBONDED], &dvdl_constr,
796 NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0);
797 if (fr->bSepDVDL && fplog)
799 gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr);
801 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
802 m_add(force_vir, shake_vir, vir);
803 wallcycle_stop(wcycle, ewcCONSTR);
807 copy_mat(force_vir, vir);
811 enerd->term[F_PRES] =
812 calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
814 sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
816 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
818 get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
822 static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
824 em_state_t *s_min, em_state_t *s_b)
828 int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
830 unsigned char *grpnrFREEZE;
834 fprintf(debug, "Doing reorder_partsum\n");
840 cgs_gl = dd_charge_groups_global(cr->dd);
841 index = cgs_gl->index;
843 /* Collect fm in a global vector fmg.
844 * This conflicts with the spirit of domain decomposition,
845 * but to fully optimize this a much more complicated algorithm is required.
847 snew(fmg, mtop->natoms);
849 ncg = s_min->s.ncg_gl;
850 cg_gl = s_min->s.cg_gl;
852 for (c = 0; c < ncg; c++)
857 for (a = a0; a < a1; a++)
859 copy_rvec(fm[i], fmg[a]);
863 gmx_sum(mtop->natoms*3, fmg[0], cr);
865 /* Now we will determine the part of the sum for the cgs in state s_b */
867 cg_gl = s_b->s.cg_gl;
871 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
872 for (c = 0; c < ncg; c++)
877 for (a = a0; a < a1; a++)
879 if (mdatoms->cFREEZE && grpnrFREEZE)
883 for (m = 0; m < DIM; m++)
885 if (!opts->nFreeze[gf][m])
887 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
899 static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
901 em_state_t *s_min, em_state_t *s_b)
907 /* This is just the classical Polak-Ribiere calculation of beta;
908 * it looks a bit complicated since we take freeze groups into account,
909 * and might have to sum it in parallel runs.
912 if (!DOMAINDECOMP(cr) ||
913 (s_min->s.ddp_count == cr->dd->ddp_count &&
914 s_b->s.ddp_count == cr->dd->ddp_count))
920 /* This part of code can be incorrect with DD,
921 * since the atom ordering in s_b and s_min might differ.
923 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
925 if (mdatoms->cFREEZE)
927 gf = mdatoms->cFREEZE[i];
929 for (m = 0; m < DIM; m++)
931 if (!opts->nFreeze[gf][m])
933 sum += (fb[i][m] - fm[i][m])*fb[i][m];
940 /* We need to reorder cgs while summing */
941 sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
945 gmx_sumd(1, &sum, cr);
948 return sum/sqr(s_min->fnorm);
951 double do_cg(FILE *fplog, t_commrec *cr,
952 int nfile, const t_filenm fnm[],
953 const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
954 int gmx_unused nstglobalcomm,
955 gmx_vsite_t *vsite, gmx_constr_t constr,
956 int gmx_unused stepout,
957 t_inputrec *inputrec,
958 gmx_mtop_t *top_global, t_fcdata *fcd,
959 t_state *state_global,
961 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
962 gmx_edsam_t gmx_unused ed,
964 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
965 gmx_membed_t gmx_unused membed,
966 real gmx_unused cpt_period, real gmx_unused max_hours,
967 const char gmx_unused *deviceOptions,
968 unsigned long gmx_unused Flags,
969 gmx_runtime_t *runtime)
971 const char *CG = "Polak-Ribiere Conjugate Gradients";
973 em_state_t *s_min, *s_a, *s_b, *s_c;
975 gmx_enerdata_t *enerd;
977 gmx_global_stat_t gstat;
979 rvec *f_global, *p, *sf, *sfm;
980 double gpa, gpb, gpc, tmp, sum[2], minstep;
983 real a, b, c, beta = 0.0;
987 gmx_bool converged, foundlower;
989 gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f;
991 int number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
993 int i, m, gf, step, nminstep;
998 s_min = init_em_state();
999 s_a = init_em_state();
1000 s_b = init_em_state();
1001 s_c = init_em_state();
1003 /* Init em and store the local state in s_min */
1004 init_em(fplog, CG, cr, inputrec,
1005 state_global, top_global, s_min, &top, &f, &f_global,
1006 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1007 nfile, fnm, &outf, &mdebin);
1009 /* Print to log file */
1010 print_em_start(fplog, cr, runtime, wcycle, CG);
1012 /* Max number of steps */
1013 number_steps = inputrec->nsteps;
1017 sp_header(stderr, CG, inputrec->em_tol, number_steps);
1021 sp_header(fplog, CG, inputrec->em_tol, number_steps);
1024 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1025 /* do_force always puts the charge groups in the box and shifts again
1026 * We do not unshift, so molecules are always whole in congrad.c
1028 evaluate_energy(fplog, cr,
1029 top_global, s_min, top,
1030 inputrec, nrnb, wcycle, gstat,
1031 vsite, constr, fcd, graph, mdatoms, fr,
1032 mu_tot, enerd, vir, pres, -1, TRUE);
1037 /* Copy stuff to the energy bin for easy printing etc. */
1038 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1039 mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1040 NULL, NULL, vir, pres, NULL, mu_tot, constr);
1042 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1043 print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1044 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1048 /* Estimate/guess the initial stepsize */
1049 stepsize = inputrec->em_stepsize/s_min->fnorm;
1053 fprintf(stderr, " F-max = %12.5e on atom %d\n",
1054 s_min->fmax, s_min->a_fmax+1);
1055 fprintf(stderr, " F-Norm = %12.5e\n",
1056 s_min->fnorm/sqrt(state_global->natoms));
1057 fprintf(stderr, "\n");
1058 /* and copy to the log file too... */
1059 fprintf(fplog, " F-max = %12.5e on atom %d\n",
1060 s_min->fmax, s_min->a_fmax+1);
1061 fprintf(fplog, " F-Norm = %12.5e\n",
1062 s_min->fnorm/sqrt(state_global->natoms));
1063 fprintf(fplog, "\n");
1065 /* Start the loop over CG steps.
1066 * Each successful step is counted, and we continue until
1067 * we either converge or reach the max number of steps.
1070 for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
1073 /* start taking steps in a new direction
1074 * First time we enter the routine, beta=0, and the direction is
1075 * simply the negative gradient.
1078 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1083 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1085 if (mdatoms->cFREEZE)
1087 gf = mdatoms->cFREEZE[i];
1089 for (m = 0; m < DIM; m++)
1091 if (!inputrec->opts.nFreeze[gf][m])
1093 p[i][m] = sf[i][m] + beta*p[i][m];
1094 gpa -= p[i][m]*sf[i][m];
1095 /* f is negative gradient, thus the sign */
1104 /* Sum the gradient along the line across CPUs */
1107 gmx_sumd(1, &gpa, cr);
1110 /* Calculate the norm of the search vector */
1111 get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
1113 /* Just in case stepsize reaches zero due to numerical precision... */
1116 stepsize = inputrec->em_stepsize/pnorm;
1120 * Double check the value of the derivative in the search direction.
1121 * If it is positive it must be due to the old information in the
1122 * CG formula, so just remove that and start over with beta=0.
1123 * This corresponds to a steepest descent step.
1128 step--; /* Don't count this step since we are restarting */
1129 continue; /* Go back to the beginning of the big for-loop */
1132 /* Calculate minimum allowed stepsize, before the average (norm)
1133 * relative change in coordinate is smaller than precision
1136 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1138 for (m = 0; m < DIM; m++)
1140 tmp = fabs(s_min->s.x[i][m]);
1149 /* Add up from all CPUs */
1152 gmx_sumd(1, &minstep, cr);
1155 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1157 if (stepsize < minstep)
1163 /* Write coordinates if necessary */
1164 do_x = do_per_step(step, inputrec->nstxout);
1165 do_f = do_per_step(step, inputrec->nstfout);
1167 write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
1168 top_global, inputrec, step,
1169 s_min, state_global, f_global);
1171 /* Take a step downhill.
1172 * In theory, we should minimize the function along this direction.
1173 * That is quite possible, but it turns out to take 5-10 function evaluations
1174 * for each line. However, we dont really need to find the exact minimum -
1175 * it is much better to start a new CG step in a modified direction as soon
1176 * as we are close to it. This will save a lot of energy evaluations.
1178 * In practice, we just try to take a single step.
1179 * If it worked (i.e. lowered the energy), we increase the stepsize but
1180 * the continue straight to the next CG step without trying to find any minimum.
1181 * If it didn't work (higher energy), there must be a minimum somewhere between
1182 * the old position and the new one.
1184 * Due to the finite numerical accuracy, it turns out that it is a good idea
1185 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1186 * This leads to lower final energies in the tests I've done. / Erik
1188 s_a->epot = s_min->epot;
1190 c = a + stepsize; /* reference position along line is zero */
1192 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1194 em_dd_partition_system(fplog, step, cr, top_global, inputrec,
1195 s_min, top, mdatoms, fr, vsite, constr,
1199 /* Take a trial step (new coords in s_c) */
1200 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
1201 constr, top, nrnb, wcycle, -1);
1204 /* Calculate energy for the trial step */
1205 evaluate_energy(fplog, cr,
1206 top_global, s_c, top,
1207 inputrec, nrnb, wcycle, gstat,
1208 vsite, constr, fcd, graph, mdatoms, fr,
1209 mu_tot, enerd, vir, pres, -1, FALSE);
1211 /* Calc derivative along line */
1215 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1217 for (m = 0; m < DIM; m++)
1219 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1222 /* Sum the gradient along the line across CPUs */
1225 gmx_sumd(1, &gpc, cr);
1228 /* This is the max amount of increase in energy we tolerate */
1229 tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1231 /* Accept the step if the energy is lower, or if it is not significantly higher
1232 * and the line derivative is still negative.
1234 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1237 /* Great, we found a better energy. Increase step for next iteration
1238 * if we are still going down, decrease it otherwise
1242 stepsize *= 1.618034; /* The golden section */
1246 stepsize *= 0.618034; /* 1/golden section */
1251 /* New energy is the same or higher. We will have to do some work
1252 * to find a smaller value in the interval. Take smaller step next time!
1255 stepsize *= 0.618034;
1261 /* OK, if we didn't find a lower value we will have to locate one now - there must
1262 * be one in the interval [a=0,c].
1263 * The same thing is valid here, though: Don't spend dozens of iterations to find
1264 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1265 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1267 * I also have a safeguard for potentially really patological functions so we never
1268 * take more than 20 steps before we give up ...
1270 * If we already found a lower value we just skip this step and continue to the update.
1278 /* Select a new trial point.
1279 * If the derivatives at points a & c have different sign we interpolate to zero,
1280 * otherwise just do a bisection.
1282 if (gpa < 0 && gpc > 0)
1284 b = a + gpa*(a-c)/(gpc-gpa);
1291 /* safeguard if interpolation close to machine accuracy causes errors:
1292 * never go outside the interval
1294 if (b <= a || b >= c)
1299 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1301 /* Reload the old state */
1302 em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
1303 s_min, top, mdatoms, fr, vsite, constr,
1307 /* Take a trial step to this new point - new coords in s_b */
1308 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
1309 constr, top, nrnb, wcycle, -1);
1312 /* Calculate energy for the trial step */
1313 evaluate_energy(fplog, cr,
1314 top_global, s_b, top,
1315 inputrec, nrnb, wcycle, gstat,
1316 vsite, constr, fcd, graph, mdatoms, fr,
1317 mu_tot, enerd, vir, pres, -1, FALSE);
1319 /* p does not change within a step, but since the domain decomposition
1320 * might change, we have to use cg_p of s_b here.
1325 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1327 for (m = 0; m < DIM; m++)
1329 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1332 /* Sum the gradient along the line across CPUs */
1335 gmx_sumd(1, &gpb, cr);
1340 fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1341 s_a->epot, s_b->epot, s_c->epot, gpb);
1344 epot_repl = s_b->epot;
1346 /* Keep one of the intervals based on the value of the derivative at the new point */
1349 /* Replace c endpoint with b */
1350 swap_em_state(s_b, s_c);
1356 /* Replace a endpoint with b */
1357 swap_em_state(s_b, s_a);
1363 * Stop search as soon as we find a value smaller than the endpoints.
1364 * Never run more than 20 steps, no matter what.
1368 while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1371 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1374 /* OK. We couldn't find a significantly lower energy.
1375 * If beta==0 this was steepest descent, and then we give up.
1376 * If not, set beta=0 and restart with steepest descent before quitting.
1386 /* Reset memory before giving up */
1392 /* Select min energy state of A & C, put the best in B.
1394 if (s_c->epot < s_a->epot)
1398 fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
1399 s_c->epot, s_a->epot);
1401 swap_em_state(s_b, s_c);
1409 fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
1410 s_a->epot, s_c->epot);
1412 swap_em_state(s_b, s_a);
1422 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
1425 swap_em_state(s_b, s_c);
1430 /* new search direction */
1431 /* beta = 0 means forget all memory and restart with steepest descents. */
1432 if (nstcg && ((step % nstcg) == 0))
1438 /* s_min->fnorm cannot be zero, because then we would have converged
1442 /* Polak-Ribiere update.
1443 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1445 beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1447 /* Limit beta to prevent oscillations */
1448 if (fabs(beta) > 5.0)
1454 /* update positions */
1455 swap_em_state(s_min, s_b);
1458 /* Print it if necessary */
1463 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1464 step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms),
1465 s_min->fmax, s_min->a_fmax+1);
1467 /* Store the new (lower) energies */
1468 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1469 mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1470 NULL, NULL, vir, pres, NULL, mu_tot, constr);
1472 do_log = do_per_step(step, inputrec->nstlog);
1473 do_ene = do_per_step(step, inputrec->nstenergy);
1476 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1478 print_ebin(outf->fp_ene, do_ene, FALSE, FALSE,
1479 do_log ? fplog : NULL, step, step, eprNORMAL,
1480 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1483 /* Stop when the maximum force lies below tolerance.
1484 * If we have reached machine precision, converged is already set to true.
1486 converged = converged || (s_min->fmax < inputrec->em_tol);
1488 } /* End of the loop */
1492 step--; /* we never took that last step in this case */
1495 if (s_min->fmax > inputrec->em_tol)
1499 warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
1500 warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
1507 /* If we printed energy and/or logfile last step (which was the last step)
1508 * we don't have to do it again, but otherwise print the final values.
1512 /* Write final value to log since we didn't do anything the last step */
1513 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1515 if (!do_ene || !do_log)
1517 /* Write final energy file entries */
1518 print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE,
1519 !do_log ? fplog : NULL, step, step, eprNORMAL,
1520 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1524 /* Print some stuff... */
1527 fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1531 * For accurate normal mode calculation it is imperative that we
1532 * store the last conformation into the full precision binary trajectory.
1534 * However, we should only do it if we did NOT already write this step
1535 * above (which we did if do_x or do_f was true).
1537 do_x = !do_per_step(step, inputrec->nstxout);
1538 do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1540 write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
1541 top_global, inputrec, step,
1542 s_min, state_global, f_global);
1544 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1548 print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
1549 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1550 print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
1551 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1553 fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1556 finish_em(cr, outf, runtime, wcycle);
1558 /* To print the actual number of steps we needed somewhere */
1559 runtime->nsteps_done = step;
1562 } /* That's all folks */
1565 double do_lbfgs(FILE *fplog, t_commrec *cr,
1566 int nfile, const t_filenm fnm[],
1567 const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
1568 int gmx_unused nstglobalcomm,
1569 gmx_vsite_t *vsite, gmx_constr_t constr,
1570 int gmx_unused stepout,
1571 t_inputrec *inputrec,
1572 gmx_mtop_t *top_global, t_fcdata *fcd,
1575 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1576 gmx_edsam_t gmx_unused ed,
1578 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
1579 gmx_membed_t gmx_unused membed,
1580 real gmx_unused cpt_period, real gmx_unused max_hours,
1581 const char gmx_unused *deviceOptions,
1582 unsigned long gmx_unused Flags,
1583 gmx_runtime_t *runtime)
1585 static const char *LBFGS = "Low-Memory BFGS Minimizer";
1587 gmx_localtop_t *top;
1588 gmx_enerdata_t *enerd;
1590 gmx_global_stat_t gstat;
1593 int ncorr, nmaxcorr, point, cp, neval, nminstep;
1594 double stepsize, gpa, gpb, gpc, tmp, minstep;
1595 real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
1596 real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
1597 real a, b, c, maxdelta, delta;
1598 real diag, Epot0, Epot, EpotA, EpotB, EpotC;
1599 real dgdx, dgdg, sq, yr, beta;
1601 gmx_bool converged, first;
1604 gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen;
1606 int start, end, number_steps;
1608 int i, k, m, n, nfmax, gf, step;
1615 gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
1620 gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1623 n = 3*state->natoms;
1624 nmaxcorr = inputrec->nbfgscorr;
1626 /* Allocate memory */
1627 /* Use pointers to real so we dont have to loop over both atoms and
1628 * dimensions all the time...
1629 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1630 * that point to the same memory.
1643 snew(rho, nmaxcorr);
1644 snew(alpha, nmaxcorr);
1647 for (i = 0; i < nmaxcorr; i++)
1653 for (i = 0; i < nmaxcorr; i++)
1662 init_em(fplog, LBFGS, cr, inputrec,
1663 state, top_global, &ems, &top, &f, &f_global,
1664 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1665 nfile, fnm, &outf, &mdebin);
1666 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1667 * so we free some memory again.
1672 xx = (real *)state->x;
1675 start = mdatoms->start;
1676 end = mdatoms->homenr + start;
1678 /* Print to log file */
1679 print_em_start(fplog, cr, runtime, wcycle, LBFGS);
1681 do_log = do_ene = do_x = do_f = TRUE;
1683 /* Max number of steps */
1684 number_steps = inputrec->nsteps;
1686 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1688 for (i = start; i < end; i++)
1690 if (mdatoms->cFREEZE)
1692 gf = mdatoms->cFREEZE[i];
1694 for (m = 0; m < DIM; m++)
1696 frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
1701 sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1705 sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1710 construct_vsites(vsite, state->x, 1, NULL,
1711 top->idef.iparams, top->idef.il,
1712 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
1715 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1716 /* do_force always puts the charge groups in the box and shifts again
1717 * We do not unshift, so molecules are always whole
1722 evaluate_energy(fplog, cr,
1723 top_global, &ems, top,
1724 inputrec, nrnb, wcycle, gstat,
1725 vsite, constr, fcd, graph, mdatoms, fr,
1726 mu_tot, enerd, vir, pres, -1, TRUE);
1731 /* Copy stuff to the energy bin for easy printing etc. */
1732 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1733 mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
1734 NULL, NULL, vir, pres, NULL, mu_tot, constr);
1736 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
1737 print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1738 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1742 /* This is the starting energy */
1743 Epot = enerd->term[F_EPOT];
1749 /* Set the initial step.
1750 * since it will be multiplied by the non-normalized search direction
1751 * vector (force vector the first time), we scale it by the
1752 * norm of the force.
1757 fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1758 fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1);
1759 fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms));
1760 fprintf(stderr, "\n");
1761 /* and copy to the log file too... */
1762 fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1763 fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1);
1764 fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms));
1765 fprintf(fplog, "\n");
1769 for (i = 0; i < n; i++)
1773 dx[point][i] = ff[i]; /* Initial search direction */
1781 stepsize = 1.0/fnorm;
1784 /* Start the loop over BFGS steps.
1785 * Each successful step is counted, and we continue until
1786 * we either converge or reach the max number of steps.
1791 /* Set the gradient from the force */
1793 for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
1796 /* Write coordinates if necessary */
1797 do_x = do_per_step(step, inputrec->nstxout);
1798 do_f = do_per_step(step, inputrec->nstfout);
1803 mdof_flags |= MDOF_X;
1808 mdof_flags |= MDOF_F;
1811 write_traj(fplog, cr, outf, mdof_flags,
1812 top_global, step, (real)step, state, state, f, f, NULL, NULL);
1814 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1816 /* pointer to current direction - point=0 first time here */
1819 /* calculate line gradient */
1820 for (gpa = 0, i = 0; i < n; i++)
1825 /* Calculate minimum allowed stepsize, before the average (norm)
1826 * relative change in coordinate is smaller than precision
1828 for (minstep = 0, i = 0; i < n; i++)
1838 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1840 if (stepsize < minstep)
1846 /* Store old forces and coordinates */
1847 for (i = 0; i < n; i++)
1856 for (i = 0; i < n; i++)
1861 /* Take a step downhill.
1862 * In theory, we should minimize the function along this direction.
1863 * That is quite possible, but it turns out to take 5-10 function evaluations
1864 * for each line. However, we dont really need to find the exact minimum -
1865 * it is much better to start a new BFGS step in a modified direction as soon
1866 * as we are close to it. This will save a lot of energy evaluations.
1868 * In practice, we just try to take a single step.
1869 * If it worked (i.e. lowered the energy), we increase the stepsize but
1870 * the continue straight to the next BFGS step without trying to find any minimum.
1871 * If it didn't work (higher energy), there must be a minimum somewhere between
1872 * the old position and the new one.
1874 * Due to the finite numerical accuracy, it turns out that it is a good idea
1875 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1876 * This leads to lower final energies in the tests I've done. / Erik
1881 c = a + stepsize; /* reference position along line is zero */
1883 /* Check stepsize first. We do not allow displacements
1884 * larger than emstep.
1890 for (i = 0; i < n; i++)
1893 if (delta > maxdelta)
1898 if (maxdelta > inputrec->em_stepsize)
1903 while (maxdelta > inputrec->em_stepsize);
1905 /* Take a trial step */
1906 for (i = 0; i < n; i++)
1908 xc[i] = lastx[i] + c*s[i];
1912 /* Calculate energy for the trial step */
1913 ems.s.x = (rvec *)xc;
1915 evaluate_energy(fplog, cr,
1916 top_global, &ems, top,
1917 inputrec, nrnb, wcycle, gstat,
1918 vsite, constr, fcd, graph, mdatoms, fr,
1919 mu_tot, enerd, vir, pres, step, FALSE);
1922 /* Calc derivative along line */
1923 for (gpc = 0, i = 0; i < n; i++)
1925 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1927 /* Sum the gradient along the line across CPUs */
1930 gmx_sumd(1, &gpc, cr);
1933 /* This is the max amount of increase in energy we tolerate */
1934 tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
1936 /* Accept the step if the energy is lower, or if it is not significantly higher
1937 * and the line derivative is still negative.
1939 if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
1942 /* Great, we found a better energy. Increase step for next iteration
1943 * if we are still going down, decrease it otherwise
1947 stepsize *= 1.618034; /* The golden section */
1951 stepsize *= 0.618034; /* 1/golden section */
1956 /* New energy is the same or higher. We will have to do some work
1957 * to find a smaller value in the interval. Take smaller step next time!
1960 stepsize *= 0.618034;
1963 /* OK, if we didn't find a lower value we will have to locate one now - there must
1964 * be one in the interval [a=0,c].
1965 * The same thing is valid here, though: Don't spend dozens of iterations to find
1966 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1967 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1969 * I also have a safeguard for potentially really patological functions so we never
1970 * take more than 20 steps before we give up ...
1972 * If we already found a lower value we just skip this step and continue to the update.
1981 /* Select a new trial point.
1982 * If the derivatives at points a & c have different sign we interpolate to zero,
1983 * otherwise just do a bisection.
1986 if (gpa < 0 && gpc > 0)
1988 b = a + gpa*(a-c)/(gpc-gpa);
1995 /* safeguard if interpolation close to machine accuracy causes errors:
1996 * never go outside the interval
1998 if (b <= a || b >= c)
2003 /* Take a trial step */
2004 for (i = 0; i < n; i++)
2006 xb[i] = lastx[i] + b*s[i];
2010 /* Calculate energy for the trial step */
2011 ems.s.x = (rvec *)xb;
2013 evaluate_energy(fplog, cr,
2014 top_global, &ems, top,
2015 inputrec, nrnb, wcycle, gstat,
2016 vsite, constr, fcd, graph, mdatoms, fr,
2017 mu_tot, enerd, vir, pres, step, FALSE);
2022 for (gpb = 0, i = 0; i < n; i++)
2024 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
2027 /* Sum the gradient along the line across CPUs */
2030 gmx_sumd(1, &gpb, cr);
2033 /* Keep one of the intervals based on the value of the derivative at the new point */
2036 /* Replace c endpoint with b */
2040 /* swap coord pointers b/c */
2050 /* Replace a endpoint with b */
2054 /* swap coord pointers a/b */
2064 * Stop search as soon as we find a value smaller than the endpoints,
2065 * or if the tolerance is below machine precision.
2066 * Never run more than 20 steps, no matter what.
2070 while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
2072 if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
2074 /* OK. We couldn't find a significantly lower energy.
2075 * If ncorr==0 this was steepest descent, and then we give up.
2076 * If not, reset memory to restart as steepest descent before quitting.
2088 /* Search in gradient direction */
2089 for (i = 0; i < n; i++)
2091 dx[point][i] = ff[i];
2093 /* Reset stepsize */
2094 stepsize = 1.0/fnorm;
2099 /* Select min energy state of A & C, put the best in xx/ff/Epot
2105 for (i = 0; i < n; i++)
2116 for (i = 0; i < n; i++)
2130 for (i = 0; i < n; i++)
2138 /* Update the memory information, and calculate a new
2139 * approximation of the inverse hessian
2142 /* Have new data in Epot, xx, ff */
2143 if (ncorr < nmaxcorr)
2148 for (i = 0; i < n; i++)
2150 dg[point][i] = lastf[i]-ff[i];
2151 dx[point][i] *= stepsize;
2156 for (i = 0; i < n; i++)
2158 dgdg += dg[point][i]*dg[point][i];
2159 dgdx += dg[point][i]*dx[point][i];
2164 rho[point] = 1.0/dgdx;
2167 if (point >= nmaxcorr)
2173 for (i = 0; i < n; i++)
2180 /* Recursive update. First go back over the memory points */
2181 for (k = 0; k < ncorr; k++)
2190 for (i = 0; i < n; i++)
2192 sq += dx[cp][i]*p[i];
2195 alpha[cp] = rho[cp]*sq;
2197 for (i = 0; i < n; i++)
2199 p[i] -= alpha[cp]*dg[cp][i];
2203 for (i = 0; i < n; i++)
2208 /* And then go forward again */
2209 for (k = 0; k < ncorr; k++)
2212 for (i = 0; i < n; i++)
2214 yr += p[i]*dg[cp][i];
2218 beta = alpha[cp]-beta;
2220 for (i = 0; i < n; i++)
2222 p[i] += beta*dx[cp][i];
2232 for (i = 0; i < n; i++)
2236 dx[point][i] = p[i];
2246 /* Test whether the convergence criterion is met */
2247 get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
2249 /* Print it if necessary */
2254 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2255 step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1);
2257 /* Store the new (lower) energies */
2258 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
2259 mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
2260 NULL, NULL, vir, pres, NULL, mu_tot, constr);
2261 do_log = do_per_step(step, inputrec->nstlog);
2262 do_ene = do_per_step(step, inputrec->nstenergy);
2265 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
2267 print_ebin(outf->fp_ene, do_ene, FALSE, FALSE,
2268 do_log ? fplog : NULL, step, step, eprNORMAL,
2269 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2272 /* Stop when the maximum force lies below tolerance.
2273 * If we have reached machine precision, converged is already set to true.
2276 converged = converged || (fmax < inputrec->em_tol);
2278 } /* End of the loop */
2282 step--; /* we never took that last step in this case */
2285 if (fmax > inputrec->em_tol)
2289 warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
2290 warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
2295 /* If we printed energy and/or logfile last step (which was the last step)
2296 * we don't have to do it again, but otherwise print the final values.
2298 if (!do_log) /* Write final value to log since we didn't do anythin last step */
2300 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
2302 if (!do_ene || !do_log) /* Write final energy file entries */
2304 print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE,
2305 !do_log ? fplog : NULL, step, step, eprNORMAL,
2306 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2309 /* Print some stuff... */
2312 fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2316 * For accurate normal mode calculation it is imperative that we
2317 * store the last conformation into the full precision binary trajectory.
2319 * However, we should only do it if we did NOT already write this step
2320 * above (which we did if do_x or do_f was true).
2322 do_x = !do_per_step(step, inputrec->nstxout);
2323 do_f = !do_per_step(step, inputrec->nstfout);
2324 write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
2325 top_global, inputrec, step,
2330 print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
2331 number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
2332 print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
2333 number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
2335 fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2338 finish_em(cr, outf, runtime, wcycle);
2340 /* To print the actual number of steps we needed somewhere */
2341 runtime->nsteps_done = step;
2344 } /* That's all folks */
2347 double do_steep(FILE *fplog, t_commrec *cr,
2348 int nfile, const t_filenm fnm[],
2349 const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
2350 int gmx_unused nstglobalcomm,
2351 gmx_vsite_t *vsite, gmx_constr_t constr,
2352 int gmx_unused stepout,
2353 t_inputrec *inputrec,
2354 gmx_mtop_t *top_global, t_fcdata *fcd,
2355 t_state *state_global,
2357 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2358 gmx_edsam_t gmx_unused ed,
2360 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2361 gmx_membed_t gmx_unused membed,
2362 real gmx_unused cpt_period, real gmx_unused max_hours,
2363 const char gmx_unused *deviceOptions,
2364 unsigned long gmx_unused Flags,
2365 gmx_runtime_t *runtime)
2367 const char *SD = "Steepest Descents";
2368 em_state_t *s_min, *s_try;
2370 gmx_localtop_t *top;
2371 gmx_enerdata_t *enerd;
2373 gmx_global_stat_t gstat;
2375 real stepsize, constepsize;
2379 gmx_bool bDone, bAbort, do_x, do_f;
2384 int steps_accepted = 0;
2388 s_min = init_em_state();
2389 s_try = init_em_state();
2391 /* Init em and store the local state in s_try */
2392 init_em(fplog, SD, cr, inputrec,
2393 state_global, top_global, s_try, &top, &f, &f_global,
2394 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2395 nfile, fnm, &outf, &mdebin);
2397 /* Print to log file */
2398 print_em_start(fplog, cr, runtime, wcycle, SD);
2400 /* Set variables for stepsize (in nm). This is the largest
2401 * step that we are going to make in any direction.
2403 ustep = inputrec->em_stepsize;
2406 /* Max number of steps */
2407 nsteps = inputrec->nsteps;
2411 /* Print to the screen */
2412 sp_header(stderr, SD, inputrec->em_tol, nsteps);
2416 sp_header(fplog, SD, inputrec->em_tol, nsteps);
2419 /**** HERE STARTS THE LOOP ****
2420 * count is the counter for the number of steps
2421 * bDone will be TRUE when the minimization has converged
2422 * bAbort will be TRUE when nsteps steps have been performed or when
2423 * the stepsize becomes smaller than is reasonable for machine precision
2428 while (!bDone && !bAbort)
2430 bAbort = (nsteps >= 0) && (count == nsteps);
2432 /* set new coordinates, except for first step */
2435 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
2436 s_min, stepsize, s_min->f, s_try,
2437 constr, top, nrnb, wcycle, count);
2440 evaluate_energy(fplog, cr,
2441 top_global, s_try, top,
2442 inputrec, nrnb, wcycle, gstat,
2443 vsite, constr, fcd, graph, mdatoms, fr,
2444 mu_tot, enerd, vir, pres, count, count == 0);
2448 print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
2453 s_min->epot = s_try->epot + 1;
2456 /* Print it if necessary */
2461 fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2462 count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
2463 (s_try->epot < s_min->epot) ? '\n' : '\r');
2466 if (s_try->epot < s_min->epot)
2468 /* Store the new (lower) energies */
2469 upd_mdebin(mdebin, FALSE, FALSE, (double)count,
2470 mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
2471 s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
2472 print_ebin(outf->fp_ene, TRUE,
2473 do_per_step(steps_accepted, inputrec->nstdisreout),
2474 do_per_step(steps_accepted, inputrec->nstorireout),
2475 fplog, count, count, eprNORMAL, TRUE,
2476 mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2481 /* Now if the new energy is smaller than the previous...
2482 * or if this is the first step!
2483 * or if we did random steps!
2486 if ( (count == 0) || (s_try->epot < s_min->epot) )
2490 /* Test whether the convergence criterion is met... */
2491 bDone = (s_try->fmax < inputrec->em_tol);
2493 /* Copy the arrays for force, positions and energy */
2494 /* The 'Min' array always holds the coords and forces of the minimal
2496 swap_em_state(s_min, s_try);
2502 /* Write to trn, if necessary */
2503 do_x = do_per_step(steps_accepted, inputrec->nstxout);
2504 do_f = do_per_step(steps_accepted, inputrec->nstfout);
2505 write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
2506 top_global, inputrec, count,
2507 s_min, state_global, f_global);
2511 /* If energy is not smaller make the step smaller... */
2514 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2516 /* Reload the old state */
2517 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
2518 s_min, top, mdatoms, fr, vsite, constr,
2523 /* Determine new step */
2524 stepsize = ustep/s_min->fmax;
2526 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2528 if (count == nsteps || ustep < 1e-12)
2530 if (count == nsteps || ustep < 1e-6)
2535 warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
2536 warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
2542 } /* End of the loop */
2544 /* Print some shit... */
2547 fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2549 write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
2550 top_global, inputrec, count,
2551 s_min, state_global, f_global);
2553 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2557 print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
2558 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2559 print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
2560 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2563 finish_em(cr, outf, runtime, wcycle);
2565 /* To print the actual number of steps we needed somewhere */
2566 inputrec->nsteps = count;
2568 runtime->nsteps_done = count;
2571 } /* That's all folks */
2574 double do_nm(FILE *fplog, t_commrec *cr,
2575 int nfile, const t_filenm fnm[],
2576 const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
2577 int gmx_unused nstglobalcomm,
2578 gmx_vsite_t *vsite, gmx_constr_t constr,
2579 int gmx_unused stepout,
2580 t_inputrec *inputrec,
2581 gmx_mtop_t *top_global, t_fcdata *fcd,
2582 t_state *state_global,
2584 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2585 gmx_edsam_t gmx_unused ed,
2587 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2588 gmx_membed_t gmx_unused membed,
2589 real gmx_unused cpt_period, real gmx_unused max_hours,
2590 const char gmx_unused *deviceOptions,
2591 unsigned long gmx_unused Flags,
2592 gmx_runtime_t *runtime)
2594 const char *NM = "Normal Mode Analysis";
2596 int natoms, atom, d;
2599 gmx_localtop_t *top;
2600 gmx_enerdata_t *enerd;
2602 gmx_global_stat_t gstat;
2604 real t, t0, lambda, lam0;
2609 gmx_bool bSparse; /* use sparse matrix storage format */
2611 gmx_sparsematrix_t * sparse_matrix = NULL;
2612 real * full_matrix = NULL;
2613 em_state_t * state_work;
2615 /* added with respect to mdrun */
2616 int i, j, k, row, col;
2617 real der_range = 10.0*sqrt(GMX_REAL_EPS);
2623 gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
2626 state_work = init_em_state();
2628 /* Init em and store the local state in state_minimum */
2629 init_em(fplog, NM, cr, inputrec,
2630 state_global, top_global, state_work, &top,
2632 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2633 nfile, fnm, &outf, NULL);
2635 natoms = top_global->natoms;
2643 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2644 " which MIGHT not be accurate enough for normal mode analysis.\n"
2645 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2646 " are fairly modest even if you recompile in double precision.\n\n");
2650 /* Check if we can/should use sparse storage format.
2652 * Sparse format is only useful when the Hessian itself is sparse, which it
2653 * will be when we use a cutoff.
2654 * For small systems (n<1000) it is easier to always use full matrix format, though.
2656 if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
2658 fprintf(stderr, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
2661 else if (top_global->natoms < 1000)
2663 fprintf(stderr, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
2668 fprintf(stderr, "Using compressed symmetric sparse Hessian format.\n");
2672 sz = DIM*top_global->natoms;
2674 fprintf(stderr, "Allocating Hessian memory...\n\n");
2678 sparse_matrix = gmx_sparsematrix_init(sz);
2679 sparse_matrix->compressed_symmetric = TRUE;
2683 snew(full_matrix, sz*sz);
2686 /* Initial values */
2687 t0 = inputrec->init_t;
2688 lam0 = inputrec->fepvals->init_lambda;
2696 /* Write start time and temperature */
2697 print_em_start(fplog, cr, runtime, wcycle, NM);
2699 /* fudge nr of steps to nr of atoms */
2700 inputrec->nsteps = natoms*2;
2704 fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
2705 *(top_global->name), (int)inputrec->nsteps);
2708 nnodes = cr->nnodes;
2710 /* Make evaluate_energy do a single node force calculation */
2712 evaluate_energy(fplog, cr,
2713 top_global, state_work, top,
2714 inputrec, nrnb, wcycle, gstat,
2715 vsite, constr, fcd, graph, mdatoms, fr,
2716 mu_tot, enerd, vir, pres, -1, TRUE);
2717 cr->nnodes = nnodes;
2719 /* if forces are not small, warn user */
2720 get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
2724 fprintf(stderr, "Maximum force:%12.5e\n", state_work->fmax);
2725 if (state_work->fmax > 1.0e-3)
2727 fprintf(stderr, "Maximum force probably not small enough to");
2728 fprintf(stderr, " ensure that you are in an \nenergy well. ");
2729 fprintf(stderr, "Be aware that negative eigenvalues may occur");
2730 fprintf(stderr, " when the\nresulting matrix is diagonalized.\n");
2734 /***********************************************************
2736 * Loop over all pairs in matrix
2738 * do_force called twice. Once with positive and
2739 * once with negative displacement
2741 ************************************************************/
2743 /* Steps are divided one by one over the nodes */
2744 for (atom = cr->nodeid; atom < natoms; atom += nnodes)
2747 for (d = 0; d < DIM; d++)
2749 x_min = state_work->s.x[atom][d];
2751 state_work->s.x[atom][d] = x_min - der_range;
2753 /* Make evaluate_energy do a single node force calculation */
2755 evaluate_energy(fplog, cr,
2756 top_global, state_work, top,
2757 inputrec, nrnb, wcycle, gstat,
2758 vsite, constr, fcd, graph, mdatoms, fr,
2759 mu_tot, enerd, vir, pres, atom*2, FALSE);
2761 for (i = 0; i < natoms; i++)
2763 copy_rvec(state_work->f[i], fneg[i]);
2766 state_work->s.x[atom][d] = x_min + der_range;
2768 evaluate_energy(fplog, cr,
2769 top_global, state_work, top,
2770 inputrec, nrnb, wcycle, gstat,
2771 vsite, constr, fcd, graph, mdatoms, fr,
2772 mu_tot, enerd, vir, pres, atom*2+1, FALSE);
2773 cr->nnodes = nnodes;
2775 /* x is restored to original */
2776 state_work->s.x[atom][d] = x_min;
2778 for (j = 0; j < natoms; j++)
2780 for (k = 0; (k < DIM); k++)
2783 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2791 #define mpi_type MPI_DOUBLE
2793 #define mpi_type MPI_FLOAT
2795 MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
2796 cr->mpi_comm_mygroup);
2801 for (node = 0; (node < nnodes && atom+node < natoms); node++)
2807 MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
2808 cr->mpi_comm_mygroup, &stat);
2813 row = (atom + node)*DIM + d;
2815 for (j = 0; j < natoms; j++)
2817 for (k = 0; k < DIM; k++)
2823 if (col >= row && dfdx[j][k] != 0.0)
2825 gmx_sparsematrix_increment_value(sparse_matrix,
2826 row, col, dfdx[j][k]);
2831 full_matrix[row*sz+col] = dfdx[j][k];
2838 if (bVerbose && fplog)
2843 /* write progress */
2844 if (MASTER(cr) && bVerbose)
2846 fprintf(stderr, "\rFinished step %d out of %d",
2847 min(atom+nnodes, natoms), natoms);
2854 fprintf(stderr, "\n\nWriting Hessian...\n");
2855 gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
2858 finish_em(cr, outf, runtime, wcycle);
2860 runtime->nsteps_done = natoms*2;