1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
67 #include "md_support.h"
73 #include "gmx_wallcycle.h"
74 #include "mtop_util.h"
78 #include "gmx_omp_nthreads.h"
81 #include "gromacs/linearalgebra/mtxio.h"
82 #include "gromacs/linearalgebra/sparsematrix.h"
93 static em_state_t *init_em_state()
99 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
100 snew(ems->s.lambda, efptNR);
105 static void print_em_start(FILE *fplog, t_commrec *cr, gmx_runtime_t *runtime,
106 gmx_wallcycle_t wcycle,
111 runtime_start(runtime);
113 sprintf(buf, "Started %s", name);
114 print_date_and_time(fplog, cr->nodeid, buf, NULL);
116 wallcycle_start(wcycle, ewcRUN);
118 static void em_time_end(FILE *fplog, t_commrec *cr, gmx_runtime_t *runtime,
119 gmx_wallcycle_t wcycle)
121 wallcycle_stop(wcycle, ewcRUN);
123 runtime_end(runtime);
126 static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
129 fprintf(out, "%s:\n", minimizer);
130 fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol);
131 fprintf(out, " Number of steps = %12d\n", nsteps);
134 static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
140 "\nEnergy minimization reached the maximum number"
141 "of steps before the forces reached the requested"
142 "precision Fmax < %g.\n", ftol);
147 "\nEnergy minimization has stopped, but the forces have"
148 "not converged to the requested precision Fmax < %g (which"
149 "may not be possible for your system). It stopped"
150 "because the algorithm tried to make a new step whose size"
151 "was too small, or there was no change in the energy since"
152 "last step. Either way, we regard the minimization as"
153 "converged to within the available machine precision,"
154 "given your starting configuration and EM parameters.\n%s%s",
156 sizeof(real) < sizeof(double) ?
157 "\nDouble precision normally gives you higher accuracy, but"
158 "this is often not needed for preparing to run molecular"
162 "You might need to increase your constraint accuracy, or turn\n"
163 "off constraints altogether (set constraints = none in mdp file)\n" :
166 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
171 static void print_converged(FILE *fp, const char *alg, real ftol,
172 gmx_large_int_t count, gmx_bool bDone, gmx_large_int_t nsteps,
173 real epot, real fmax, int nfmax, real fnorm)
175 char buf[STEPSTRSIZE];
179 fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
180 alg, ftol, gmx_step_str(count, buf));
182 else if (count < nsteps)
184 fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
185 "but did not reach the requested Fmax < %g.\n",
186 alg, gmx_step_str(count, buf), ftol);
190 fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
191 alg, ftol, gmx_step_str(count, buf));
195 fprintf(fp, "Potential Energy = %21.14e\n", epot);
196 fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1);
197 fprintf(fp, "Norm of force = %21.14e\n", fnorm);
199 fprintf(fp, "Potential Energy = %14.7e\n", epot);
200 fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1);
201 fprintf(fp, "Norm of force = %14.7e\n", fnorm);
205 static void get_f_norm_max(t_commrec *cr,
206 t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
207 real *fnorm, real *fmax, int *a_fmax)
210 real fmax2, fmax2_0, fam;
211 int la_max, a_max, start, end, i, m, gf;
213 /* This routine finds the largest force and returns it.
214 * On parallel machines the global max is taken.
220 start = mdatoms->start;
221 end = mdatoms->homenr + start;
222 if (mdatoms->cFREEZE)
224 for (i = start; i < end; i++)
226 gf = mdatoms->cFREEZE[i];
228 for (m = 0; m < DIM; m++)
230 if (!opts->nFreeze[gf][m])
245 for (i = start; i < end; i++)
257 if (la_max >= 0 && DOMAINDECOMP(cr))
259 a_max = cr->dd->gatindex[la_max];
267 snew(sum, 2*cr->nnodes+1);
268 sum[2*cr->nodeid] = fmax2;
269 sum[2*cr->nodeid+1] = a_max;
270 sum[2*cr->nnodes] = fnorm2;
271 gmx_sumd(2*cr->nnodes+1, sum, cr);
272 fnorm2 = sum[2*cr->nnodes];
273 /* Determine the global maximum */
274 for (i = 0; i < cr->nnodes; i++)
276 if (sum[2*i] > fmax2)
279 a_max = (int)(sum[2*i+1] + 0.5);
287 *fnorm = sqrt(fnorm2);
299 static void get_state_f_norm_max(t_commrec *cr,
300 t_grpopts *opts, t_mdatoms *mdatoms,
303 get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
306 void init_em(FILE *fplog, const char *title,
307 t_commrec *cr, t_inputrec *ir,
308 t_state *state_global, gmx_mtop_t *top_global,
309 em_state_t *ems, gmx_localtop_t **top,
310 rvec **f, rvec **f_global,
311 t_nrnb *nrnb, rvec mu_tot,
312 t_forcerec *fr, gmx_enerdata_t **enerd,
313 t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
314 gmx_vsite_t *vsite, gmx_constr_t constr,
315 int nfile, const t_filenm fnm[],
316 gmx_mdoutf_t **outf, t_mdebin **mdebin)
318 int start, homenr, i;
323 fprintf(fplog, "Initiating %s\n", title);
326 state_global->ngtc = 0;
328 /* Initialize lambda variables */
329 initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
333 if (DOMAINDECOMP(cr))
335 *top = dd_init_local_top(top_global);
337 dd_init_local_state(cr->dd, state_global, &ems->s);
341 /* Distribute the charge groups over the nodes from the master node */
342 dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
343 state_global, top_global, ir,
344 &ems->s, &ems->f, mdatoms, *top,
345 fr, vsite, NULL, constr,
347 dd_store_state(cr->dd, &ems->s);
351 snew(*f_global, top_global->natoms);
361 snew(*f, top_global->natoms);
363 /* Just copy the state */
364 ems->s = *state_global;
365 snew(ems->s.x, ems->s.nalloc);
366 snew(ems->f, ems->s.nalloc);
367 for (i = 0; i < state_global->natoms; i++)
369 copy_rvec(state_global->x[i], ems->s.x[i]);
371 copy_mat(state_global->box, ems->s.box);
373 if (PAR(cr) && ir->eI != eiNM)
375 /* Initialize the particle decomposition and split the topology */
376 *top = split_system(fplog, top_global, ir, cr);
378 pd_cg_range(cr, &fr->cg0, &fr->hcg);
382 *top = gmx_mtop_generate_local_top(top_global, ir);
386 forcerec_set_excl_load(fr, *top, cr);
388 init_bonded_thread_force_reduction(fr, &(*top)->idef);
390 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
392 *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
401 pd_at_range(cr, &start, &homenr);
407 homenr = top_global->natoms;
409 atoms2md(top_global, ir, 0, NULL, start, homenr, mdatoms);
410 update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
414 set_vsite_top(vsite, *top, mdatoms, cr);
420 if (ir->eConstrAlg == econtSHAKE &&
421 gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
423 gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
424 econstr_names[econtSHAKE], econstr_names[econtLINCS]);
427 if (!DOMAINDECOMP(cr))
429 set_constraints(constr, *top, ir, mdatoms, cr);
432 if (!ir->bContinuation)
434 /* Constrain the starting coordinates */
436 constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
437 ir, NULL, cr, -1, 0, mdatoms,
438 ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
439 ems->s.lambda[efptFEP], &dvdlambda,
440 NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
446 *gstat = global_stat_init(ir);
449 *outf = init_mdoutf(nfile, fnm, 0, cr, ir, NULL);
452 init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
457 /* Init bin for energy stuff */
458 *mdebin = init_mdebin((*outf)->fp_ene, top_global, ir, NULL);
462 calc_shifts(ems->s.box, fr->shift_vec);
465 static void finish_em(FILE *fplog, t_commrec *cr, gmx_mdoutf_t *outf,
466 gmx_runtime_t *runtime, gmx_wallcycle_t wcycle)
468 if (!(cr->duty & DUTY_PME))
470 /* Tell the PME only node to finish */
471 gmx_pme_send_finish(cr);
476 em_time_end(fplog, cr, runtime, wcycle);
479 static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
488 static void copy_em_coords(em_state_t *ems, t_state *state)
492 for (i = 0; (i < state->natoms); i++)
494 copy_rvec(ems->s.x[i], state->x[i]);
498 static void write_em_traj(FILE *fplog, t_commrec *cr,
500 gmx_bool bX, gmx_bool bF, const char *confout,
501 gmx_mtop_t *top_global,
502 t_inputrec *ir, gmx_large_int_t step,
504 t_state *state_global, rvec *f_global)
508 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
510 copy_em_coords(state, state_global);
517 mdof_flags |= MDOF_X;
521 mdof_flags |= MDOF_F;
523 write_traj(fplog, cr, outf, mdof_flags,
524 top_global, step, (double)step,
525 &state->s, state_global, state->f, f_global, NULL, NULL);
527 if (confout != NULL && MASTER(cr))
529 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
531 /* Make molecules whole only for confout writing */
532 do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
536 write_sto_conf_mtop(confout,
537 *top_global->name, top_global,
538 state_global->x, NULL, ir->ePBC, state_global->box);
542 static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
544 em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
545 gmx_constr_t constr, gmx_localtop_t *top,
546 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
547 gmx_large_int_t count)
559 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
561 gmx_incons("state mismatch in do_em_step");
564 s2->flags = s1->flags;
566 if (s2->nalloc != s1->nalloc)
568 s2->nalloc = s1->nalloc;
569 srenew(s2->x, s1->nalloc);
570 srenew(ems2->f, s1->nalloc);
571 if (s2->flags & (1<<estCGP))
573 srenew(s2->cg_p, s1->nalloc);
577 s2->natoms = s1->natoms;
578 copy_mat(s1->box, s2->box);
579 /* Copy free energy state */
580 for (i = 0; i < efptNR; i++)
582 s2->lambda[i] = s1->lambda[i];
584 copy_mat(s1->box, s2->box);
587 end = md->start + md->homenr;
592 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
597 #pragma omp for schedule(static) nowait
598 for (i = start; i < end; i++)
604 for (m = 0; m < DIM; m++)
606 if (ir->opts.nFreeze[gf][m])
612 x2[i][m] = x1[i][m] + a*f[i][m];
617 if (s2->flags & (1<<estCGP))
619 /* Copy the CG p vector */
622 #pragma omp for schedule(static) nowait
623 for (i = start; i < end; i++)
625 copy_rvec(x1[i], x2[i]);
629 if (DOMAINDECOMP(cr))
631 s2->ddp_count = s1->ddp_count;
632 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
635 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
636 srenew(s2->cg_gl, s2->cg_gl_nalloc);
639 s2->ncg_gl = s1->ncg_gl;
640 #pragma omp for schedule(static) nowait
641 for (i = 0; i < s2->ncg_gl; i++)
643 s2->cg_gl[i] = s1->cg_gl[i];
645 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
651 wallcycle_start(wcycle, ewcCONSTR);
653 constrain(NULL, TRUE, TRUE, constr, &top->idef,
654 ir, NULL, cr, count, 0, md,
655 s1->x, s2->x, NULL, bMolPBC, s2->box,
656 s2->lambda[efptBONDED], &dvdlambda,
657 NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
658 wallcycle_stop(wcycle, ewcCONSTR);
662 static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
663 gmx_mtop_t *top_global, t_inputrec *ir,
664 em_state_t *ems, gmx_localtop_t *top,
665 t_mdatoms *mdatoms, t_forcerec *fr,
666 gmx_vsite_t *vsite, gmx_constr_t constr,
667 t_nrnb *nrnb, gmx_wallcycle_t wcycle)
669 /* Repartition the domain decomposition */
670 wallcycle_start(wcycle, ewcDOMDEC);
671 dd_partition_system(fplog, step, cr, FALSE, 1,
672 NULL, top_global, ir,
674 mdatoms, top, fr, vsite, NULL, constr,
675 nrnb, wcycle, FALSE);
676 dd_store_state(cr->dd, &ems->s);
677 wallcycle_stop(wcycle, ewcDOMDEC);
680 static void evaluate_energy(FILE *fplog, gmx_bool bVerbose, t_commrec *cr,
681 t_state *state_global, gmx_mtop_t *top_global,
682 em_state_t *ems, gmx_localtop_t *top,
683 t_inputrec *inputrec,
684 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
685 gmx_global_stat_t gstat,
686 gmx_vsite_t *vsite, gmx_constr_t constr,
688 t_graph *graph, t_mdatoms *mdatoms,
689 t_forcerec *fr, rvec mu_tot,
690 gmx_enerdata_t *enerd, tensor vir, tensor pres,
691 gmx_large_int_t count, gmx_bool bFirst)
696 tensor force_vir, shake_vir, ekin;
697 real dvdlambda, prescorr, enercorr, dvdlcorr;
700 /* Set the time to the initial time, the time does not change during EM */
701 t = inputrec->init_t;
704 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
706 /* This the first state or an old state used before the last ns */
712 if (inputrec->nstlist > 0)
716 else if (inputrec->nstlist == -1)
718 nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x);
721 gmx_sumi(1, &nabnsb, cr);
729 construct_vsites(fplog, vsite, ems->s.x, nrnb, 1, NULL,
730 top->idef.iparams, top->idef.il,
731 fr->ePBC, fr->bMolPBC, graph, cr, ems->s.box);
734 if (DOMAINDECOMP(cr))
738 /* Repartition the domain decomposition */
739 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
740 ems, top, mdatoms, fr, vsite, constr,
745 /* Calc force & energy on new trial position */
746 /* do_force always puts the charge groups in the box and shifts again
747 * We do not unshift, so molecules are always whole in congrad.c
749 do_force(fplog, cr, inputrec,
750 count, nrnb, wcycle, top, top_global, &top_global->groups,
751 ems->s.box, ems->s.x, &ems->s.hist,
752 ems->f, force_vir, mdatoms, enerd, fcd,
753 ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
754 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
755 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
756 (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
758 /* Clear the unused shake virial and pressure */
759 clear_mat(shake_vir);
762 /* Communicate stuff when parallel */
763 if (PAR(cr) && inputrec->eI != eiNM)
765 wallcycle_start(wcycle, ewcMoveE);
767 global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
768 inputrec, NULL, NULL, NULL, 1, &terminate,
769 top_global, &ems->s, FALSE,
775 wallcycle_stop(wcycle, ewcMoveE);
778 /* Calculate long range corrections to pressure and energy */
779 calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
780 pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
781 enerd->term[F_DISPCORR] = enercorr;
782 enerd->term[F_EPOT] += enercorr;
783 enerd->term[F_PRES] += prescorr;
784 enerd->term[F_DVDL] += dvdlcorr;
786 ems->epot = enerd->term[F_EPOT];
790 /* Project out the constraint components of the force */
791 wallcycle_start(wcycle, ewcCONSTR);
793 constrain(NULL, FALSE, FALSE, constr, &top->idef,
794 inputrec, NULL, cr, count, 0, mdatoms,
795 ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
796 ems->s.lambda[efptBONDED], &dvdlambda,
797 NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0);
798 if (fr->bSepDVDL && fplog)
800 fprintf(fplog, sepdvdlformat, "Constraints", t, dvdlambda);
802 enerd->term[F_DVDL_BONDED] += dvdlambda;
803 m_add(force_vir, shake_vir, vir);
804 wallcycle_stop(wcycle, ewcCONSTR);
808 copy_mat(force_vir, vir);
812 enerd->term[F_PRES] =
813 calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
815 sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
817 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
819 get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
823 static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
825 em_state_t *s_min, em_state_t *s_b)
829 int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
831 unsigned char *grpnrFREEZE;
835 fprintf(debug, "Doing reorder_partsum\n");
841 cgs_gl = dd_charge_groups_global(cr->dd);
842 index = cgs_gl->index;
844 /* Collect fm in a global vector fmg.
845 * This conflicts with the spirit of domain decomposition,
846 * but to fully optimize this a much more complicated algorithm is required.
848 snew(fmg, mtop->natoms);
850 ncg = s_min->s.ncg_gl;
851 cg_gl = s_min->s.cg_gl;
853 for (c = 0; c < ncg; c++)
858 for (a = a0; a < a1; a++)
860 copy_rvec(fm[i], fmg[a]);
864 gmx_sum(mtop->natoms*3, fmg[0], cr);
866 /* Now we will determine the part of the sum for the cgs in state s_b */
868 cg_gl = s_b->s.cg_gl;
872 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
873 for (c = 0; c < ncg; c++)
878 for (a = a0; a < a1; a++)
880 if (mdatoms->cFREEZE && grpnrFREEZE)
884 for (m = 0; m < DIM; m++)
886 if (!opts->nFreeze[gf][m])
888 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
900 static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
902 em_state_t *s_min, em_state_t *s_b)
908 /* This is just the classical Polak-Ribiere calculation of beta;
909 * it looks a bit complicated since we take freeze groups into account,
910 * and might have to sum it in parallel runs.
913 if (!DOMAINDECOMP(cr) ||
914 (s_min->s.ddp_count == cr->dd->ddp_count &&
915 s_b->s.ddp_count == cr->dd->ddp_count))
921 /* This part of code can be incorrect with DD,
922 * since the atom ordering in s_b and s_min might differ.
924 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
926 if (mdatoms->cFREEZE)
928 gf = mdatoms->cFREEZE[i];
930 for (m = 0; m < DIM; m++)
932 if (!opts->nFreeze[gf][m])
934 sum += (fb[i][m] - fm[i][m])*fb[i][m];
941 /* We need to reorder cgs while summing */
942 sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
946 gmx_sumd(1, &sum, cr);
949 return sum/sqr(s_min->fnorm);
952 double do_cg(FILE *fplog, t_commrec *cr,
953 int nfile, const t_filenm fnm[],
954 const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
956 gmx_vsite_t *vsite, gmx_constr_t constr,
958 t_inputrec *inputrec,
959 gmx_mtop_t *top_global, t_fcdata *fcd,
960 t_state *state_global,
962 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
965 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
967 real cpt_period, real max_hours,
968 const char *deviceOptions,
970 gmx_runtime_t *runtime)
972 const char *CG = "Polak-Ribiere Conjugate Gradients";
974 em_state_t *s_min, *s_a, *s_b, *s_c;
976 gmx_enerdata_t *enerd;
978 gmx_global_stat_t gstat;
980 rvec *f_global, *p, *sf, *sfm;
981 double gpa, gpb, gpc, tmp, sum[2], minstep;
984 real a, b, c, beta = 0.0;
988 gmx_bool converged, foundlower;
990 gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f;
992 int number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
994 int i, m, gf, step, nminstep;
999 s_min = init_em_state();
1000 s_a = init_em_state();
1001 s_b = init_em_state();
1002 s_c = init_em_state();
1004 /* Init em and store the local state in s_min */
1005 init_em(fplog, CG, cr, inputrec,
1006 state_global, top_global, s_min, &top, &f, &f_global,
1007 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1008 nfile, fnm, &outf, &mdebin);
1010 /* Print to log file */
1011 print_em_start(fplog, cr, runtime, wcycle, CG);
1013 /* Max number of steps */
1014 number_steps = inputrec->nsteps;
1018 sp_header(stderr, CG, inputrec->em_tol, number_steps);
1022 sp_header(fplog, CG, inputrec->em_tol, number_steps);
1025 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1026 /* do_force always puts the charge groups in the box and shifts again
1027 * We do not unshift, so molecules are always whole in congrad.c
1029 evaluate_energy(fplog, bVerbose, cr,
1030 state_global, top_global, s_min, top,
1031 inputrec, nrnb, wcycle, gstat,
1032 vsite, constr, fcd, graph, mdatoms, fr,
1033 mu_tot, enerd, vir, pres, -1, TRUE);
1038 /* Copy stuff to the energy bin for easy printing etc. */
1039 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1040 mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1041 NULL, NULL, vir, pres, NULL, mu_tot, constr);
1043 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1044 print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1045 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1049 /* Estimate/guess the initial stepsize */
1050 stepsize = inputrec->em_stepsize/s_min->fnorm;
1054 fprintf(stderr, " F-max = %12.5e on atom %d\n",
1055 s_min->fmax, s_min->a_fmax+1);
1056 fprintf(stderr, " F-Norm = %12.5e\n",
1057 s_min->fnorm/sqrt(state_global->natoms));
1058 fprintf(stderr, "\n");
1059 /* and copy to the log file too... */
1060 fprintf(fplog, " F-max = %12.5e on atom %d\n",
1061 s_min->fmax, s_min->a_fmax+1);
1062 fprintf(fplog, " F-Norm = %12.5e\n",
1063 s_min->fnorm/sqrt(state_global->natoms));
1064 fprintf(fplog, "\n");
1066 /* Start the loop over CG steps.
1067 * Each successful step is counted, and we continue until
1068 * we either converge or reach the max number of steps.
1071 for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
1074 /* start taking steps in a new direction
1075 * First time we enter the routine, beta=0, and the direction is
1076 * simply the negative gradient.
1079 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1084 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1086 if (mdatoms->cFREEZE)
1088 gf = mdatoms->cFREEZE[i];
1090 for (m = 0; m < DIM; m++)
1092 if (!inputrec->opts.nFreeze[gf][m])
1094 p[i][m] = sf[i][m] + beta*p[i][m];
1095 gpa -= p[i][m]*sf[i][m];
1096 /* f is negative gradient, thus the sign */
1105 /* Sum the gradient along the line across CPUs */
1108 gmx_sumd(1, &gpa, cr);
1111 /* Calculate the norm of the search vector */
1112 get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
1114 /* Just in case stepsize reaches zero due to numerical precision... */
1117 stepsize = inputrec->em_stepsize/pnorm;
1121 * Double check the value of the derivative in the search direction.
1122 * If it is positive it must be due to the old information in the
1123 * CG formula, so just remove that and start over with beta=0.
1124 * This corresponds to a steepest descent step.
1129 step--; /* Don't count this step since we are restarting */
1130 continue; /* Go back to the beginning of the big for-loop */
1133 /* Calculate minimum allowed stepsize, before the average (norm)
1134 * relative change in coordinate is smaller than precision
1137 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1139 for (m = 0; m < DIM; m++)
1141 tmp = fabs(s_min->s.x[i][m]);
1150 /* Add up from all CPUs */
1153 gmx_sumd(1, &minstep, cr);
1156 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1158 if (stepsize < minstep)
1164 /* Write coordinates if necessary */
1165 do_x = do_per_step(step, inputrec->nstxout);
1166 do_f = do_per_step(step, inputrec->nstfout);
1168 write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
1169 top_global, inputrec, step,
1170 s_min, state_global, f_global);
1172 /* Take a step downhill.
1173 * In theory, we should minimize the function along this direction.
1174 * That is quite possible, but it turns out to take 5-10 function evaluations
1175 * for each line. However, we dont really need to find the exact minimum -
1176 * it is much better to start a new CG step in a modified direction as soon
1177 * as we are close to it. This will save a lot of energy evaluations.
1179 * In practice, we just try to take a single step.
1180 * If it worked (i.e. lowered the energy), we increase the stepsize but
1181 * the continue straight to the next CG step without trying to find any minimum.
1182 * If it didn't work (higher energy), there must be a minimum somewhere between
1183 * the old position and the new one.
1185 * Due to the finite numerical accuracy, it turns out that it is a good idea
1186 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1187 * This leads to lower final energies in the tests I've done. / Erik
1189 s_a->epot = s_min->epot;
1191 c = a + stepsize; /* reference position along line is zero */
1193 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1195 em_dd_partition_system(fplog, step, cr, top_global, inputrec,
1196 s_min, top, mdatoms, fr, vsite, constr,
1200 /* Take a trial step (new coords in s_c) */
1201 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
1202 constr, top, nrnb, wcycle, -1);
1205 /* Calculate energy for the trial step */
1206 evaluate_energy(fplog, bVerbose, cr,
1207 state_global, top_global, s_c, top,
1208 inputrec, nrnb, wcycle, gstat,
1209 vsite, constr, fcd, graph, mdatoms, fr,
1210 mu_tot, enerd, vir, pres, -1, FALSE);
1212 /* Calc derivative along line */
1216 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1218 for (m = 0; m < DIM; m++)
1220 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1223 /* Sum the gradient along the line across CPUs */
1226 gmx_sumd(1, &gpc, cr);
1229 /* This is the max amount of increase in energy we tolerate */
1230 tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1232 /* Accept the step if the energy is lower, or if it is not significantly higher
1233 * and the line derivative is still negative.
1235 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1238 /* Great, we found a better energy. Increase step for next iteration
1239 * if we are still going down, decrease it otherwise
1243 stepsize *= 1.618034; /* The golden section */
1247 stepsize *= 0.618034; /* 1/golden section */
1252 /* New energy is the same or higher. We will have to do some work
1253 * to find a smaller value in the interval. Take smaller step next time!
1256 stepsize *= 0.618034;
1262 /* OK, if we didn't find a lower value we will have to locate one now - there must
1263 * be one in the interval [a=0,c].
1264 * The same thing is valid here, though: Don't spend dozens of iterations to find
1265 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1266 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1268 * I also have a safeguard for potentially really patological functions so we never
1269 * take more than 20 steps before we give up ...
1271 * If we already found a lower value we just skip this step and continue to the update.
1279 /* Select a new trial point.
1280 * If the derivatives at points a & c have different sign we interpolate to zero,
1281 * otherwise just do a bisection.
1283 if (gpa < 0 && gpc > 0)
1285 b = a + gpa*(a-c)/(gpc-gpa);
1292 /* safeguard if interpolation close to machine accuracy causes errors:
1293 * never go outside the interval
1295 if (b <= a || b >= c)
1300 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1302 /* Reload the old state */
1303 em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
1304 s_min, top, mdatoms, fr, vsite, constr,
1308 /* Take a trial step to this new point - new coords in s_b */
1309 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
1310 constr, top, nrnb, wcycle, -1);
1313 /* Calculate energy for the trial step */
1314 evaluate_energy(fplog, bVerbose, cr,
1315 state_global, top_global, s_b, top,
1316 inputrec, nrnb, wcycle, gstat,
1317 vsite, constr, fcd, graph, mdatoms, fr,
1318 mu_tot, enerd, vir, pres, -1, FALSE);
1320 /* p does not change within a step, but since the domain decomposition
1321 * might change, we have to use cg_p of s_b here.
1326 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1328 for (m = 0; m < DIM; m++)
1330 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1333 /* Sum the gradient along the line across CPUs */
1336 gmx_sumd(1, &gpb, cr);
1341 fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1342 s_a->epot, s_b->epot, s_c->epot, gpb);
1345 epot_repl = s_b->epot;
1347 /* Keep one of the intervals based on the value of the derivative at the new point */
1350 /* Replace c endpoint with b */
1351 swap_em_state(s_b, s_c);
1357 /* Replace a endpoint with b */
1358 swap_em_state(s_b, s_a);
1364 * Stop search as soon as we find a value smaller than the endpoints.
1365 * Never run more than 20 steps, no matter what.
1369 while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1372 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1375 /* OK. We couldn't find a significantly lower energy.
1376 * If beta==0 this was steepest descent, and then we give up.
1377 * If not, set beta=0 and restart with steepest descent before quitting.
1387 /* Reset memory before giving up */
1393 /* Select min energy state of A & C, put the best in B.
1395 if (s_c->epot < s_a->epot)
1399 fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
1400 s_c->epot, s_a->epot);
1402 swap_em_state(s_b, s_c);
1410 fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
1411 s_a->epot, s_c->epot);
1413 swap_em_state(s_b, s_a);
1423 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
1426 swap_em_state(s_b, s_c);
1431 /* new search direction */
1432 /* beta = 0 means forget all memory and restart with steepest descents. */
1433 if (nstcg && ((step % nstcg) == 0))
1439 /* s_min->fnorm cannot be zero, because then we would have converged
1443 /* Polak-Ribiere update.
1444 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1446 beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1448 /* Limit beta to prevent oscillations */
1449 if (fabs(beta) > 5.0)
1455 /* update positions */
1456 swap_em_state(s_min, s_b);
1459 /* Print it if necessary */
1464 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1465 step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms),
1466 s_min->fmax, s_min->a_fmax+1);
1468 /* Store the new (lower) energies */
1469 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1470 mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1471 NULL, NULL, vir, pres, NULL, mu_tot, constr);
1473 do_log = do_per_step(step, inputrec->nstlog);
1474 do_ene = do_per_step(step, inputrec->nstenergy);
1477 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1479 print_ebin(outf->fp_ene, do_ene, FALSE, FALSE,
1480 do_log ? fplog : NULL, step, step, eprNORMAL,
1481 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1484 /* Stop when the maximum force lies below tolerance.
1485 * If we have reached machine precision, converged is already set to true.
1487 converged = converged || (s_min->fmax < inputrec->em_tol);
1489 } /* End of the loop */
1493 step--; /* we never took that last step in this case */
1496 if (s_min->fmax > inputrec->em_tol)
1500 warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
1501 warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
1508 /* If we printed energy and/or logfile last step (which was the last step)
1509 * we don't have to do it again, but otherwise print the final values.
1513 /* Write final value to log since we didn't do anything the last step */
1514 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1516 if (!do_ene || !do_log)
1518 /* Write final energy file entries */
1519 print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE,
1520 !do_log ? fplog : NULL, step, step, eprNORMAL,
1521 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1525 /* Print some stuff... */
1528 fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1532 * For accurate normal mode calculation it is imperative that we
1533 * store the last conformation into the full precision binary trajectory.
1535 * However, we should only do it if we did NOT already write this step
1536 * above (which we did if do_x or do_f was true).
1538 do_x = !do_per_step(step, inputrec->nstxout);
1539 do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1541 write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
1542 top_global, inputrec, step,
1543 s_min, state_global, f_global);
1545 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1549 print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
1550 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1551 print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
1552 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1554 fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1557 finish_em(fplog, cr, outf, runtime, wcycle);
1559 /* To print the actual number of steps we needed somewhere */
1560 runtime->nsteps_done = step;
1563 } /* That's all folks */
1566 double do_lbfgs(FILE *fplog, t_commrec *cr,
1567 int nfile, const t_filenm fnm[],
1568 const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
1570 gmx_vsite_t *vsite, gmx_constr_t constr,
1572 t_inputrec *inputrec,
1573 gmx_mtop_t *top_global, t_fcdata *fcd,
1576 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1579 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1580 gmx_membed_t membed,
1581 real cpt_period, real max_hours,
1582 const char *deviceOptions,
1583 unsigned long Flags,
1584 gmx_runtime_t *runtime)
1586 static const char *LBFGS = "Low-Memory BFGS Minimizer";
1588 gmx_localtop_t *top;
1589 gmx_enerdata_t *enerd;
1591 gmx_global_stat_t gstat;
1594 int ncorr, nmaxcorr, point, cp, neval, nminstep;
1595 double stepsize, gpa, gpb, gpc, tmp, minstep;
1596 real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
1597 real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
1598 real a, b, c, maxdelta, delta;
1599 real diag, Epot0, Epot, EpotA, EpotB, EpotC;
1600 real dgdx, dgdg, sq, yr, beta;
1602 gmx_bool converged, first;
1605 gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen;
1607 int start, end, number_steps;
1609 int i, k, m, n, nfmax, gf, step;
1616 gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
1621 gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1624 n = 3*state->natoms;
1625 nmaxcorr = inputrec->nbfgscorr;
1627 /* Allocate memory */
1628 /* Use pointers to real so we dont have to loop over both atoms and
1629 * dimensions all the time...
1630 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1631 * that point to the same memory.
1644 snew(rho, nmaxcorr);
1645 snew(alpha, nmaxcorr);
1648 for (i = 0; i < nmaxcorr; i++)
1654 for (i = 0; i < nmaxcorr; i++)
1663 init_em(fplog, LBFGS, cr, inputrec,
1664 state, top_global, &ems, &top, &f, &f_global,
1665 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1666 nfile, fnm, &outf, &mdebin);
1667 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1668 * so we free some memory again.
1673 xx = (real *)state->x;
1676 start = mdatoms->start;
1677 end = mdatoms->homenr + start;
1679 /* Print to log file */
1680 print_em_start(fplog, cr, runtime, wcycle, LBFGS);
1682 do_log = do_ene = do_x = do_f = TRUE;
1684 /* Max number of steps */
1685 number_steps = inputrec->nsteps;
1687 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1689 for (i = start; i < end; i++)
1691 if (mdatoms->cFREEZE)
1693 gf = mdatoms->cFREEZE[i];
1695 for (m = 0; m < DIM; m++)
1697 frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
1702 sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1706 sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1711 construct_vsites(fplog, vsite, state->x, nrnb, 1, NULL,
1712 top->idef.iparams, top->idef.il,
1713 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
1716 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1717 /* do_force always puts the charge groups in the box and shifts again
1718 * We do not unshift, so molecules are always whole
1723 evaluate_energy(fplog, bVerbose, cr,
1724 state, top_global, &ems, top,
1725 inputrec, nrnb, wcycle, gstat,
1726 vsite, constr, fcd, graph, mdatoms, fr,
1727 mu_tot, enerd, vir, pres, -1, TRUE);
1732 /* Copy stuff to the energy bin for easy printing etc. */
1733 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1734 mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
1735 NULL, NULL, vir, pres, NULL, mu_tot, constr);
1737 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
1738 print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1739 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1743 /* This is the starting energy */
1744 Epot = enerd->term[F_EPOT];
1750 /* Set the initial step.
1751 * since it will be multiplied by the non-normalized search direction
1752 * vector (force vector the first time), we scale it by the
1753 * norm of the force.
1758 fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1759 fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1);
1760 fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms));
1761 fprintf(stderr, "\n");
1762 /* and copy to the log file too... */
1763 fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1764 fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1);
1765 fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrt(state->natoms));
1766 fprintf(fplog, "\n");
1770 for (i = 0; i < n; i++)
1774 dx[point][i] = ff[i]; /* Initial search direction */
1782 stepsize = 1.0/fnorm;
1785 /* Start the loop over BFGS steps.
1786 * Each successful step is counted, and we continue until
1787 * we either converge or reach the max number of steps.
1792 /* Set the gradient from the force */
1794 for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
1797 /* Write coordinates if necessary */
1798 do_x = do_per_step(step, inputrec->nstxout);
1799 do_f = do_per_step(step, inputrec->nstfout);
1804 mdof_flags |= MDOF_X;
1809 mdof_flags |= MDOF_F;
1812 write_traj(fplog, cr, outf, mdof_flags,
1813 top_global, step, (real)step, state, state, f, f, NULL, NULL);
1815 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1817 /* pointer to current direction - point=0 first time here */
1820 /* calculate line gradient */
1821 for (gpa = 0, i = 0; i < n; i++)
1826 /* Calculate minimum allowed stepsize, before the average (norm)
1827 * relative change in coordinate is smaller than precision
1829 for (minstep = 0, i = 0; i < n; i++)
1839 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1841 if (stepsize < minstep)
1847 /* Store old forces and coordinates */
1848 for (i = 0; i < n; i++)
1857 for (i = 0; i < n; i++)
1862 /* Take a step downhill.
1863 * In theory, we should minimize the function along this direction.
1864 * That is quite possible, but it turns out to take 5-10 function evaluations
1865 * for each line. However, we dont really need to find the exact minimum -
1866 * it is much better to start a new BFGS step in a modified direction as soon
1867 * as we are close to it. This will save a lot of energy evaluations.
1869 * In practice, we just try to take a single step.
1870 * If it worked (i.e. lowered the energy), we increase the stepsize but
1871 * the continue straight to the next BFGS step without trying to find any minimum.
1872 * If it didn't work (higher energy), there must be a minimum somewhere between
1873 * the old position and the new one.
1875 * Due to the finite numerical accuracy, it turns out that it is a good idea
1876 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1877 * This leads to lower final energies in the tests I've done. / Erik
1882 c = a + stepsize; /* reference position along line is zero */
1884 /* Check stepsize first. We do not allow displacements
1885 * larger than emstep.
1891 for (i = 0; i < n; i++)
1894 if (delta > maxdelta)
1899 if (maxdelta > inputrec->em_stepsize)
1904 while (maxdelta > inputrec->em_stepsize);
1906 /* Take a trial step */
1907 for (i = 0; i < n; i++)
1909 xc[i] = lastx[i] + c*s[i];
1913 /* Calculate energy for the trial step */
1914 ems.s.x = (rvec *)xc;
1916 evaluate_energy(fplog, bVerbose, cr,
1917 state, top_global, &ems, top,
1918 inputrec, nrnb, wcycle, gstat,
1919 vsite, constr, fcd, graph, mdatoms, fr,
1920 mu_tot, enerd, vir, pres, step, FALSE);
1923 /* Calc derivative along line */
1924 for (gpc = 0, i = 0; i < n; i++)
1926 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1928 /* Sum the gradient along the line across CPUs */
1931 gmx_sumd(1, &gpc, cr);
1934 /* This is the max amount of increase in energy we tolerate */
1935 tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
1937 /* Accept the step if the energy is lower, or if it is not significantly higher
1938 * and the line derivative is still negative.
1940 if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
1943 /* Great, we found a better energy. Increase step for next iteration
1944 * if we are still going down, decrease it otherwise
1948 stepsize *= 1.618034; /* The golden section */
1952 stepsize *= 0.618034; /* 1/golden section */
1957 /* New energy is the same or higher. We will have to do some work
1958 * to find a smaller value in the interval. Take smaller step next time!
1961 stepsize *= 0.618034;
1964 /* OK, if we didn't find a lower value we will have to locate one now - there must
1965 * be one in the interval [a=0,c].
1966 * The same thing is valid here, though: Don't spend dozens of iterations to find
1967 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1968 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1970 * I also have a safeguard for potentially really patological functions so we never
1971 * take more than 20 steps before we give up ...
1973 * If we already found a lower value we just skip this step and continue to the update.
1982 /* Select a new trial point.
1983 * If the derivatives at points a & c have different sign we interpolate to zero,
1984 * otherwise just do a bisection.
1987 if (gpa < 0 && gpc > 0)
1989 b = a + gpa*(a-c)/(gpc-gpa);
1996 /* safeguard if interpolation close to machine accuracy causes errors:
1997 * never go outside the interval
1999 if (b <= a || b >= c)
2004 /* Take a trial step */
2005 for (i = 0; i < n; i++)
2007 xb[i] = lastx[i] + b*s[i];
2011 /* Calculate energy for the trial step */
2012 ems.s.x = (rvec *)xb;
2014 evaluate_energy(fplog, bVerbose, cr,
2015 state, top_global, &ems, top,
2016 inputrec, nrnb, wcycle, gstat,
2017 vsite, constr, fcd, graph, mdatoms, fr,
2018 mu_tot, enerd, vir, pres, step, FALSE);
2023 for (gpb = 0, i = 0; i < n; i++)
2025 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
2028 /* Sum the gradient along the line across CPUs */
2031 gmx_sumd(1, &gpb, cr);
2034 /* Keep one of the intervals based on the value of the derivative at the new point */
2037 /* Replace c endpoint with b */
2041 /* swap coord pointers b/c */
2051 /* Replace a endpoint with b */
2055 /* swap coord pointers a/b */
2065 * Stop search as soon as we find a value smaller than the endpoints,
2066 * or if the tolerance is below machine precision.
2067 * Never run more than 20 steps, no matter what.
2071 while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
2073 if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
2075 /* OK. We couldn't find a significantly lower energy.
2076 * If ncorr==0 this was steepest descent, and then we give up.
2077 * If not, reset memory to restart as steepest descent before quitting.
2089 /* Search in gradient direction */
2090 for (i = 0; i < n; i++)
2092 dx[point][i] = ff[i];
2094 /* Reset stepsize */
2095 stepsize = 1.0/fnorm;
2100 /* Select min energy state of A & C, put the best in xx/ff/Epot
2106 for (i = 0; i < n; i++)
2117 for (i = 0; i < n; i++)
2131 for (i = 0; i < n; i++)
2139 /* Update the memory information, and calculate a new
2140 * approximation of the inverse hessian
2143 /* Have new data in Epot, xx, ff */
2144 if (ncorr < nmaxcorr)
2149 for (i = 0; i < n; i++)
2151 dg[point][i] = lastf[i]-ff[i];
2152 dx[point][i] *= stepsize;
2157 for (i = 0; i < n; i++)
2159 dgdg += dg[point][i]*dg[point][i];
2160 dgdx += dg[point][i]*dx[point][i];
2165 rho[point] = 1.0/dgdx;
2168 if (point >= nmaxcorr)
2174 for (i = 0; i < n; i++)
2181 /* Recursive update. First go back over the memory points */
2182 for (k = 0; k < ncorr; k++)
2191 for (i = 0; i < n; i++)
2193 sq += dx[cp][i]*p[i];
2196 alpha[cp] = rho[cp]*sq;
2198 for (i = 0; i < n; i++)
2200 p[i] -= alpha[cp]*dg[cp][i];
2204 for (i = 0; i < n; i++)
2209 /* And then go forward again */
2210 for (k = 0; k < ncorr; k++)
2213 for (i = 0; i < n; i++)
2215 yr += p[i]*dg[cp][i];
2219 beta = alpha[cp]-beta;
2221 for (i = 0; i < n; i++)
2223 p[i] += beta*dx[cp][i];
2233 for (i = 0; i < n; i++)
2237 dx[point][i] = p[i];
2247 /* Test whether the convergence criterion is met */
2248 get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
2250 /* Print it if necessary */
2255 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2256 step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1);
2258 /* Store the new (lower) energies */
2259 upd_mdebin(mdebin, FALSE, FALSE, (double)step,
2260 mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
2261 NULL, NULL, vir, pres, NULL, mu_tot, constr);
2262 do_log = do_per_step(step, inputrec->nstlog);
2263 do_ene = do_per_step(step, inputrec->nstenergy);
2266 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
2268 print_ebin(outf->fp_ene, do_ene, FALSE, FALSE,
2269 do_log ? fplog : NULL, step, step, eprNORMAL,
2270 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2273 /* Stop when the maximum force lies below tolerance.
2274 * If we have reached machine precision, converged is already set to true.
2277 converged = converged || (fmax < inputrec->em_tol);
2279 } /* End of the loop */
2283 step--; /* we never took that last step in this case */
2286 if (fmax > inputrec->em_tol)
2290 warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
2291 warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
2296 /* If we printed energy and/or logfile last step (which was the last step)
2297 * we don't have to do it again, but otherwise print the final values.
2299 if (!do_log) /* Write final value to log since we didn't do anythin last step */
2301 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
2303 if (!do_ene || !do_log) /* Write final energy file entries */
2305 print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE,
2306 !do_log ? fplog : NULL, step, step, eprNORMAL,
2307 TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2310 /* Print some stuff... */
2313 fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2317 * For accurate normal mode calculation it is imperative that we
2318 * store the last conformation into the full precision binary trajectory.
2320 * However, we should only do it if we did NOT already write this step
2321 * above (which we did if do_x or do_f was true).
2323 do_x = !do_per_step(step, inputrec->nstxout);
2324 do_f = !do_per_step(step, inputrec->nstfout);
2325 write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
2326 top_global, inputrec, step,
2331 print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
2332 number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
2333 print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
2334 number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
2336 fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2339 finish_em(fplog, cr, outf, runtime, wcycle);
2341 /* To print the actual number of steps we needed somewhere */
2342 runtime->nsteps_done = step;
2345 } /* That's all folks */
2348 double do_steep(FILE *fplog, t_commrec *cr,
2349 int nfile, const t_filenm fnm[],
2350 const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
2352 gmx_vsite_t *vsite, gmx_constr_t constr,
2354 t_inputrec *inputrec,
2355 gmx_mtop_t *top_global, t_fcdata *fcd,
2356 t_state *state_global,
2358 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2361 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2362 gmx_membed_t membed,
2363 real cpt_period, real max_hours,
2364 const char *deviceOptions,
2365 unsigned long Flags,
2366 gmx_runtime_t *runtime)
2368 const char *SD = "Steepest Descents";
2369 em_state_t *s_min, *s_try;
2371 gmx_localtop_t *top;
2372 gmx_enerdata_t *enerd;
2374 gmx_global_stat_t gstat;
2376 real stepsize, constepsize;
2377 real ustep, dvdlambda, fnormn;
2380 gmx_bool bDone, bAbort, do_x, do_f;
2385 int steps_accepted = 0;
2389 s_min = init_em_state();
2390 s_try = init_em_state();
2392 /* Init em and store the local state in s_try */
2393 init_em(fplog, SD, cr, inputrec,
2394 state_global, top_global, s_try, &top, &f, &f_global,
2395 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2396 nfile, fnm, &outf, &mdebin);
2398 /* Print to log file */
2399 print_em_start(fplog, cr, runtime, wcycle, SD);
2401 /* Set variables for stepsize (in nm). This is the largest
2402 * step that we are going to make in any direction.
2404 ustep = inputrec->em_stepsize;
2407 /* Max number of steps */
2408 nsteps = inputrec->nsteps;
2412 /* Print to the screen */
2413 sp_header(stderr, SD, inputrec->em_tol, nsteps);
2417 sp_header(fplog, SD, inputrec->em_tol, nsteps);
2420 /**** HERE STARTS THE LOOP ****
2421 * count is the counter for the number of steps
2422 * bDone will be TRUE when the minimization has converged
2423 * bAbort will be TRUE when nsteps steps have been performed or when
2424 * the stepsize becomes smaller than is reasonable for machine precision
2429 while (!bDone && !bAbort)
2431 bAbort = (nsteps >= 0) && (count == nsteps);
2433 /* set new coordinates, except for first step */
2436 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
2437 s_min, stepsize, s_min->f, s_try,
2438 constr, top, nrnb, wcycle, count);
2441 evaluate_energy(fplog, bVerbose, cr,
2442 state_global, top_global, s_try, top,
2443 inputrec, nrnb, wcycle, gstat,
2444 vsite, constr, fcd, graph, mdatoms, fr,
2445 mu_tot, enerd, vir, pres, count, count == 0);
2449 print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
2454 s_min->epot = s_try->epot + 1;
2457 /* Print it if necessary */
2462 fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2463 count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
2464 (s_try->epot < s_min->epot) ? '\n' : '\r');
2467 if (s_try->epot < s_min->epot)
2469 /* Store the new (lower) energies */
2470 upd_mdebin(mdebin, FALSE, FALSE, (double)count,
2471 mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
2472 s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
2473 print_ebin(outf->fp_ene, TRUE,
2474 do_per_step(steps_accepted, inputrec->nstdisreout),
2475 do_per_step(steps_accepted, inputrec->nstorireout),
2476 fplog, count, count, eprNORMAL, TRUE,
2477 mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2482 /* Now if the new energy is smaller than the previous...
2483 * or if this is the first step!
2484 * or if we did random steps!
2487 if ( (count == 0) || (s_try->epot < s_min->epot) )
2491 /* Test whether the convergence criterion is met... */
2492 bDone = (s_try->fmax < inputrec->em_tol);
2494 /* Copy the arrays for force, positions and energy */
2495 /* The 'Min' array always holds the coords and forces of the minimal
2497 swap_em_state(s_min, s_try);
2503 /* Write to trn, if necessary */
2504 do_x = do_per_step(steps_accepted, inputrec->nstxout);
2505 do_f = do_per_step(steps_accepted, inputrec->nstfout);
2506 write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
2507 top_global, inputrec, count,
2508 s_min, state_global, f_global);
2512 /* If energy is not smaller make the step smaller... */
2515 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2517 /* Reload the old state */
2518 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
2519 s_min, top, mdatoms, fr, vsite, constr,
2524 /* Determine new step */
2525 stepsize = ustep/s_min->fmax;
2527 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2529 if (count == nsteps || ustep < 1e-12)
2531 if (count == nsteps || ustep < 1e-6)
2536 warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
2537 warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
2543 } /* End of the loop */
2545 /* Print some shit... */
2548 fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2550 write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
2551 top_global, inputrec, count,
2552 s_min, state_global, f_global);
2554 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2558 print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
2559 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2560 print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
2561 s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2564 finish_em(fplog, cr, outf, runtime, wcycle);
2566 /* To print the actual number of steps we needed somewhere */
2567 inputrec->nsteps = count;
2569 runtime->nsteps_done = count;
2572 } /* That's all folks */
2575 double do_nm(FILE *fplog, t_commrec *cr,
2576 int nfile, const t_filenm fnm[],
2577 const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
2579 gmx_vsite_t *vsite, gmx_constr_t constr,
2581 t_inputrec *inputrec,
2582 gmx_mtop_t *top_global, t_fcdata *fcd,
2583 t_state *state_global,
2585 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2588 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2589 gmx_membed_t membed,
2590 real cpt_period, real max_hours,
2591 const char *deviceOptions,
2592 unsigned long Flags,
2593 gmx_runtime_t *runtime)
2595 const char *NM = "Normal Mode Analysis";
2597 int natoms, atom, d;
2600 gmx_localtop_t *top;
2601 gmx_enerdata_t *enerd;
2603 gmx_global_stat_t gstat;
2605 real t, t0, lambda, lam0;
2610 gmx_bool bSparse; /* use sparse matrix storage format */
2612 gmx_sparsematrix_t * sparse_matrix = NULL;
2613 real * full_matrix = NULL;
2614 em_state_t * state_work;
2616 /* added with respect to mdrun */
2617 int i, j, k, row, col;
2618 real der_range = 10.0*sqrt(GMX_REAL_EPS);
2624 gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
2627 state_work = init_em_state();
2629 /* Init em and store the local state in state_minimum */
2630 init_em(fplog, NM, cr, inputrec,
2631 state_global, top_global, state_work, &top,
2633 nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2634 nfile, fnm, &outf, NULL);
2636 natoms = top_global->natoms;
2644 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2645 " which MIGHT not be accurate enough for normal mode analysis.\n"
2646 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2647 " are fairly modest even if you recompile in double precision.\n\n");
2651 /* Check if we can/should use sparse storage format.
2653 * Sparse format is only useful when the Hessian itself is sparse, which it
2654 * will be when we use a cutoff.
2655 * For small systems (n<1000) it is easier to always use full matrix format, though.
2657 if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
2659 fprintf(stderr, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
2662 else if (top_global->natoms < 1000)
2664 fprintf(stderr, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
2669 fprintf(stderr, "Using compressed symmetric sparse Hessian format.\n");
2673 sz = DIM*top_global->natoms;
2675 fprintf(stderr, "Allocating Hessian memory...\n\n");
2679 sparse_matrix = gmx_sparsematrix_init(sz);
2680 sparse_matrix->compressed_symmetric = TRUE;
2684 snew(full_matrix, sz*sz);
2687 /* Initial values */
2688 t0 = inputrec->init_t;
2689 lam0 = inputrec->fepvals->init_lambda;
2697 /* Write start time and temperature */
2698 print_em_start(fplog, cr, runtime, wcycle, NM);
2700 /* fudge nr of steps to nr of atoms */
2701 inputrec->nsteps = natoms*2;
2705 fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
2706 *(top_global->name), (int)inputrec->nsteps);
2709 nnodes = cr->nnodes;
2711 /* Make evaluate_energy do a single node force calculation */
2713 evaluate_energy(fplog, bVerbose, cr,
2714 state_global, top_global, state_work, top,
2715 inputrec, nrnb, wcycle, gstat,
2716 vsite, constr, fcd, graph, mdatoms, fr,
2717 mu_tot, enerd, vir, pres, -1, TRUE);
2718 cr->nnodes = nnodes;
2720 /* if forces are not small, warn user */
2721 get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
2725 fprintf(stderr, "Maximum force:%12.5e\n", state_work->fmax);
2726 if (state_work->fmax > 1.0e-3)
2728 fprintf(stderr, "Maximum force probably not small enough to");
2729 fprintf(stderr, " ensure that you are in an \nenergy well. ");
2730 fprintf(stderr, "Be aware that negative eigenvalues may occur");
2731 fprintf(stderr, " when the\nresulting matrix is diagonalized.\n");
2735 /***********************************************************
2737 * Loop over all pairs in matrix
2739 * do_force called twice. Once with positive and
2740 * once with negative displacement
2742 ************************************************************/
2744 /* Steps are divided one by one over the nodes */
2745 for (atom = cr->nodeid; atom < natoms; atom += nnodes)
2748 for (d = 0; d < DIM; d++)
2750 x_min = state_work->s.x[atom][d];
2752 state_work->s.x[atom][d] = x_min - der_range;
2754 /* Make evaluate_energy do a single node force calculation */
2756 evaluate_energy(fplog, bVerbose, cr,
2757 state_global, top_global, state_work, top,
2758 inputrec, nrnb, wcycle, gstat,
2759 vsite, constr, fcd, graph, mdatoms, fr,
2760 mu_tot, enerd, vir, pres, atom*2, FALSE);
2762 for (i = 0; i < natoms; i++)
2764 copy_rvec(state_work->f[i], fneg[i]);
2767 state_work->s.x[atom][d] = x_min + der_range;
2769 evaluate_energy(fplog, bVerbose, cr,
2770 state_global, top_global, state_work, top,
2771 inputrec, nrnb, wcycle, gstat,
2772 vsite, constr, fcd, graph, mdatoms, fr,
2773 mu_tot, enerd, vir, pres, atom*2+1, FALSE);
2774 cr->nnodes = nnodes;
2776 /* x is restored to original */
2777 state_work->s.x[atom][d] = x_min;
2779 for (j = 0; j < natoms; j++)
2781 for (k = 0; (k < DIM); k++)
2784 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2792 #define mpi_type MPI_DOUBLE
2794 #define mpi_type MPI_FLOAT
2796 MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
2797 cr->mpi_comm_mygroup);
2802 for (node = 0; (node < nnodes && atom+node < natoms); node++)
2808 MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
2809 cr->mpi_comm_mygroup, &stat);
2814 row = (atom + node)*DIM + d;
2816 for (j = 0; j < natoms; j++)
2818 for (k = 0; k < DIM; k++)
2824 if (col >= row && dfdx[j][k] != 0.0)
2826 gmx_sparsematrix_increment_value(sparse_matrix,
2827 row, col, dfdx[j][k]);
2832 full_matrix[row*sz+col] = dfdx[j][k];
2839 if (bVerbose && fplog)
2844 /* write progress */
2845 if (MASTER(cr) && bVerbose)
2847 fprintf(stderr, "\rFinished step %d out of %d",
2848 min(atom+nnodes, natoms), natoms);
2855 fprintf(stderr, "\n\nWriting Hessian...\n");
2856 gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
2859 finish_em(fplog, cr, outf, runtime, wcycle);
2861 runtime->nsteps_done = natoms*2;