2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
57 #include "gmx_fatal.h"
69 #include "md_support.h"
73 #include "sparsematrix.h"
77 #include "gmx_wallcycle.h"
78 #include "mtop_util.h"
82 #include "gmx_omp_nthreads.h"
94 static em_state_t *init_em_state()
100 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
101 snew(ems->s.lambda,efptNR);
106 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
107 gmx_wallcycle_t wcycle,
112 runtime_start(runtime);
114 sprintf(buf,"Started %s",name);
115 print_date_and_time(fplog,cr->nodeid,buf,NULL);
117 wallcycle_start(wcycle,ewcRUN);
119 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
120 gmx_wallcycle_t wcycle)
122 wallcycle_stop(wcycle,ewcRUN);
124 runtime_end(runtime);
127 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
130 fprintf(out,"%s:\n",minimizer);
131 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
132 fprintf(out," Number of steps = %12d\n",nsteps);
135 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
141 "\nEnergy minimization reached the maximum number"
142 "of steps before the forces reached the requested"
143 "precision Fmax < %g.\n",ftol);
148 "\nEnergy minimization has stopped, but the forces have"
149 "not converged to the requested precision Fmax < %g (which"
150 "may not be possible for your system). It stopped"
151 "because the algorithm tried to make a new step whose size"
152 "was too small, or there was no change in the energy since"
153 "last step. Either way, we regard the minimization as"
154 "converged to within the available machine precision,"
155 "given your starting configuration and EM parameters.\n%s%s",
157 sizeof(real)<sizeof(double) ?
158 "\nDouble precision normally gives you higher accuracy, but"
159 "this is often not needed for preparing to run molecular"
163 "You might need to increase your constraint accuracy, or turn\n"
164 "off constraints altogether (set constraints = none in mdp file)\n" :
167 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
172 static void print_converged(FILE *fp,const char *alg,real ftol,
173 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
174 real epot,real fmax, int nfmax, real fnorm)
176 char buf[STEPSTRSIZE];
179 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
180 alg,ftol,gmx_step_str(count,buf));
181 else if(count<nsteps)
182 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
183 "but did not reach the requested Fmax < %g.\n",
184 alg,gmx_step_str(count,buf),ftol);
186 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
187 alg,ftol,gmx_step_str(count,buf));
190 fprintf(fp,"Potential Energy = %21.14e\n",epot);
191 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
192 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
194 fprintf(fp,"Potential Energy = %14.7e\n",epot);
195 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
196 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
200 static void get_f_norm_max(t_commrec *cr,
201 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
202 real *fnorm,real *fmax,int *a_fmax)
205 real fmax2,fmax2_0,fam;
206 int la_max,a_max,start,end,i,m,gf;
208 /* This routine finds the largest force and returns it.
209 * On parallel machines the global max is taken.
215 start = mdatoms->start;
216 end = mdatoms->homenr + start;
217 if (mdatoms->cFREEZE) {
218 for(i=start; i<end; i++) {
219 gf = mdatoms->cFREEZE[i];
222 if (!opts->nFreeze[gf][m])
231 for(i=start; i<end; i++) {
241 if (la_max >= 0 && DOMAINDECOMP(cr)) {
242 a_max = cr->dd->gatindex[la_max];
247 snew(sum,2*cr->nnodes+1);
248 sum[2*cr->nodeid] = fmax2;
249 sum[2*cr->nodeid+1] = a_max;
250 sum[2*cr->nnodes] = fnorm2;
251 gmx_sumd(2*cr->nnodes+1,sum,cr);
252 fnorm2 = sum[2*cr->nnodes];
253 /* Determine the global maximum */
254 for(i=0; i<cr->nnodes; i++) {
255 if (sum[2*i] > fmax2) {
257 a_max = (int)(sum[2*i+1] + 0.5);
264 *fnorm = sqrt(fnorm2);
271 static void get_state_f_norm_max(t_commrec *cr,
272 t_grpopts *opts,t_mdatoms *mdatoms,
275 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
278 void init_em(FILE *fplog,const char *title,
279 t_commrec *cr,t_inputrec *ir,
280 t_state *state_global,gmx_mtop_t *top_global,
281 em_state_t *ems,gmx_localtop_t **top,
282 rvec **f,rvec **f_global,
283 t_nrnb *nrnb,rvec mu_tot,
284 t_forcerec *fr,gmx_enerdata_t **enerd,
285 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
286 gmx_vsite_t *vsite,gmx_constr_t constr,
287 int nfile,const t_filenm fnm[],
288 gmx_mdoutf_t **outf,t_mdebin **mdebin)
295 fprintf(fplog,"Initiating %s\n",title);
298 state_global->ngtc = 0;
300 /* Initialize lambda variables */
301 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
305 if (DOMAINDECOMP(cr))
307 *top = dd_init_local_top(top_global);
309 dd_init_local_state(cr->dd,state_global,&ems->s);
313 /* Distribute the charge groups over the nodes from the master node */
314 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
315 state_global,top_global,ir,
316 &ems->s,&ems->f,mdatoms,*top,
317 fr,vsite,NULL,constr,
319 dd_store_state(cr->dd,&ems->s);
323 snew(*f_global,top_global->natoms);
333 snew(*f,top_global->natoms);
335 /* Just copy the state */
336 ems->s = *state_global;
337 snew(ems->s.x,ems->s.nalloc);
338 snew(ems->f,ems->s.nalloc);
339 for(i=0; i<state_global->natoms; i++)
341 copy_rvec(state_global->x[i],ems->s.x[i]);
343 copy_mat(state_global->box,ems->s.box);
345 if (PAR(cr) && ir->eI != eiNM)
347 /* Initialize the particle decomposition and split the topology */
348 *top = split_system(fplog,top_global,ir,cr);
350 pd_cg_range(cr,&fr->cg0,&fr->hcg);
354 *top = gmx_mtop_generate_local_top(top_global,ir);
358 forcerec_set_excl_load(fr,*top,cr);
360 init_bonded_thread_force_reduction(fr,&(*top)->idef);
362 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
364 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
373 pd_at_range(cr,&start,&homenr);
379 homenr = top_global->natoms;
381 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
382 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
386 set_vsite_top(vsite,*top,mdatoms,cr);
392 if (ir->eConstrAlg == econtSHAKE &&
393 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
395 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
396 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
399 if (!DOMAINDECOMP(cr))
401 set_constraints(constr,*top,ir,mdatoms,cr);
404 if (!ir->bContinuation)
406 /* Constrain the starting coordinates */
408 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
409 ir,NULL,cr,-1,0,mdatoms,
410 ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
411 ems->s.lambda[efptFEP],&dvdlambda,
412 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
418 *gstat = global_stat_init(ir);
421 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
424 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
429 /* Init bin for energy stuff */
430 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
434 calc_shifts(ems->s.box,fr->shift_vec);
437 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
438 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
440 if (!(cr->duty & DUTY_PME)) {
441 /* Tell the PME only node to finish */
442 gmx_pme_send_finish(cr);
447 em_time_end(fplog,cr,runtime,wcycle);
450 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
459 static void copy_em_coords(em_state_t *ems,t_state *state)
463 for(i=0; (i<state->natoms); i++)
465 copy_rvec(ems->s.x[i],state->x[i]);
469 static void write_em_traj(FILE *fplog,t_commrec *cr,
471 gmx_bool bX,gmx_bool bF,const char *confout,
472 gmx_mtop_t *top_global,
473 t_inputrec *ir,gmx_large_int_t step,
475 t_state *state_global,rvec *f_global)
479 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
481 copy_em_coords(state,state_global);
486 if (bX) { mdof_flags |= MDOF_X; }
487 if (bF) { mdof_flags |= MDOF_F; }
488 write_traj(fplog,cr,outf,mdof_flags,
489 top_global,step,(double)step,
490 &state->s,state_global,state->f,f_global,NULL,NULL);
492 if (confout != NULL && MASTER(cr))
494 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
496 /* Make molecules whole only for confout writing */
497 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
501 write_sto_conf_mtop(confout,
502 *top_global->name,top_global,
503 state_global->x,NULL,ir->ePBC,state_global->box);
507 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
509 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
510 gmx_constr_t constr,gmx_localtop_t *top,
511 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
512 gmx_large_int_t count)
524 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
526 gmx_incons("state mismatch in do_em_step");
529 s2->flags = s1->flags;
531 if (s2->nalloc != s1->nalloc)
533 s2->nalloc = s1->nalloc;
534 srenew(s2->x,s1->nalloc);
535 srenew(ems2->f, s1->nalloc);
536 if (s2->flags & (1<<estCGP))
538 srenew(s2->cg_p, s1->nalloc);
542 s2->natoms = s1->natoms;
543 copy_mat(s1->box,s2->box);
544 /* Copy free energy state */
545 for (i=0;i<efptNR;i++)
547 s2->lambda[i] = s1->lambda[i];
549 copy_mat(s1->box,s2->box);
552 end = md->start + md->homenr;
557 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
562 #pragma omp for schedule(static) nowait
563 for(i=start; i<end; i++)
571 if (ir->opts.nFreeze[gf][m])
577 x2[i][m] = x1[i][m] + a*f[i][m];
582 if (s2->flags & (1<<estCGP))
584 /* Copy the CG p vector */
587 #pragma omp for schedule(static) nowait
588 for(i=start; i<end; i++)
590 copy_rvec(x1[i],x2[i]);
594 if (DOMAINDECOMP(cr))
596 s2->ddp_count = s1->ddp_count;
597 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
600 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
601 srenew(s2->cg_gl,s2->cg_gl_nalloc);
604 s2->ncg_gl = s1->ncg_gl;
605 #pragma omp for schedule(static) nowait
606 for(i=0; i<s2->ncg_gl; i++)
608 s2->cg_gl[i] = s1->cg_gl[i];
610 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
616 wallcycle_start(wcycle,ewcCONSTR);
618 constrain(NULL,TRUE,TRUE,constr,&top->idef,
619 ir,NULL,cr,count,0,md,
620 s1->x,s2->x,NULL,bMolPBC,s2->box,
621 s2->lambda[efptBONDED],&dvdlambda,
622 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
623 wallcycle_stop(wcycle,ewcCONSTR);
627 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
628 gmx_mtop_t *top_global,t_inputrec *ir,
629 em_state_t *ems,gmx_localtop_t *top,
630 t_mdatoms *mdatoms,t_forcerec *fr,
631 gmx_vsite_t *vsite,gmx_constr_t constr,
632 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
634 /* Repartition the domain decomposition */
635 wallcycle_start(wcycle,ewcDOMDEC);
636 dd_partition_system(fplog,step,cr,FALSE,1,
639 mdatoms,top,fr,vsite,NULL,constr,
641 dd_store_state(cr->dd,&ems->s);
642 wallcycle_stop(wcycle,ewcDOMDEC);
645 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
646 t_state *state_global,gmx_mtop_t *top_global,
647 em_state_t *ems,gmx_localtop_t *top,
648 t_inputrec *inputrec,
649 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
650 gmx_global_stat_t gstat,
651 gmx_vsite_t *vsite,gmx_constr_t constr,
653 t_graph *graph,t_mdatoms *mdatoms,
654 t_forcerec *fr,rvec mu_tot,
655 gmx_enerdata_t *enerd,tensor vir,tensor pres,
656 gmx_large_int_t count,gmx_bool bFirst)
661 tensor force_vir,shake_vir,ekin;
662 real dvdlambda,prescorr,enercorr,dvdlcorr;
665 /* Set the time to the initial time, the time does not change during EM */
666 t = inputrec->init_t;
669 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
670 /* This the first state or an old state used before the last ns */
674 if (inputrec->nstlist > 0) {
676 } else if (inputrec->nstlist == -1) {
677 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
679 gmx_sumi(1,&nabnsb,cr);
685 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
686 top->idef.iparams,top->idef.il,
687 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
689 if (DOMAINDECOMP(cr)) {
691 /* Repartition the domain decomposition */
692 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
693 ems,top,mdatoms,fr,vsite,constr,
698 /* Calc force & energy on new trial position */
699 /* do_force always puts the charge groups in the box and shifts again
700 * We do not unshift, so molecules are always whole in congrad.c
702 do_force(fplog,cr,inputrec,
703 count,nrnb,wcycle,top,top_global,&top_global->groups,
704 ems->s.box,ems->s.x,&ems->s.hist,
705 ems->f,force_vir,mdatoms,enerd,fcd,
706 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
707 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
708 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
709 (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
711 /* Clear the unused shake virial and pressure */
712 clear_mat(shake_vir);
715 /* Communicate stuff when parallel */
716 if (PAR(cr) && inputrec->eI != eiNM)
718 wallcycle_start(wcycle,ewcMoveE);
720 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
721 inputrec,NULL,NULL,NULL,1,&terminate,
722 top_global,&ems->s,FALSE,
728 wallcycle_stop(wcycle,ewcMoveE);
731 /* Calculate long range corrections to pressure and energy */
732 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
733 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
734 enerd->term[F_DISPCORR] = enercorr;
735 enerd->term[F_EPOT] += enercorr;
736 enerd->term[F_PRES] += prescorr;
737 enerd->term[F_DVDL] += dvdlcorr;
739 ems->epot = enerd->term[F_EPOT];
742 /* Project out the constraint components of the force */
743 wallcycle_start(wcycle,ewcCONSTR);
745 constrain(NULL,FALSE,FALSE,constr,&top->idef,
746 inputrec,NULL,cr,count,0,mdatoms,
747 ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
748 ems->s.lambda[efptBONDED],&dvdlambda,
749 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
750 if (fr->bSepDVDL && fplog)
751 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
752 enerd->term[F_DVDL_BONDED] += dvdlambda;
753 m_add(force_vir,shake_vir,vir);
754 wallcycle_stop(wcycle,ewcCONSTR);
756 copy_mat(force_vir,vir);
760 enerd->term[F_PRES] =
761 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
763 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
765 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
767 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
771 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
773 em_state_t *s_min,em_state_t *s_b)
777 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
779 unsigned char *grpnrFREEZE;
782 fprintf(debug,"Doing reorder_partsum\n");
787 cgs_gl = dd_charge_groups_global(cr->dd);
788 index = cgs_gl->index;
790 /* Collect fm in a global vector fmg.
791 * This conflicts with the spirit of domain decomposition,
792 * but to fully optimize this a much more complicated algorithm is required.
794 snew(fmg,mtop->natoms);
796 ncg = s_min->s.ncg_gl;
797 cg_gl = s_min->s.cg_gl;
799 for(c=0; c<ncg; c++) {
803 for(a=a0; a<a1; a++) {
804 copy_rvec(fm[i],fmg[a]);
808 gmx_sum(mtop->natoms*3,fmg[0],cr);
810 /* Now we will determine the part of the sum for the cgs in state s_b */
812 cg_gl = s_b->s.cg_gl;
816 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
817 for(c=0; c<ncg; c++) {
821 for(a=a0; a<a1; a++) {
822 if (mdatoms->cFREEZE && grpnrFREEZE) {
825 for(m=0; m<DIM; m++) {
826 if (!opts->nFreeze[gf][m]) {
827 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
839 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
841 em_state_t *s_min,em_state_t *s_b)
847 /* This is just the classical Polak-Ribiere calculation of beta;
848 * it looks a bit complicated since we take freeze groups into account,
849 * and might have to sum it in parallel runs.
852 if (!DOMAINDECOMP(cr) ||
853 (s_min->s.ddp_count == cr->dd->ddp_count &&
854 s_b->s.ddp_count == cr->dd->ddp_count)) {
859 /* This part of code can be incorrect with DD,
860 * since the atom ordering in s_b and s_min might differ.
862 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
863 if (mdatoms->cFREEZE)
864 gf = mdatoms->cFREEZE[i];
866 if (!opts->nFreeze[gf][m]) {
867 sum += (fb[i][m] - fm[i][m])*fb[i][m];
871 /* We need to reorder cgs while summing */
872 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
877 return sum/sqr(s_min->fnorm);
880 double do_cg(FILE *fplog,t_commrec *cr,
881 int nfile,const t_filenm fnm[],
882 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
884 gmx_vsite_t *vsite,gmx_constr_t constr,
886 t_inputrec *inputrec,
887 gmx_mtop_t *top_global,t_fcdata *fcd,
888 t_state *state_global,
890 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
893 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
895 real cpt_period,real max_hours,
896 const char *deviceOptions,
898 gmx_runtime_t *runtime)
900 const char *CG="Polak-Ribiere Conjugate Gradients";
902 em_state_t *s_min,*s_a,*s_b,*s_c;
904 gmx_enerdata_t *enerd;
906 gmx_global_stat_t gstat;
908 rvec *f_global,*p,*sf,*sfm;
909 double gpa,gpb,gpc,tmp,sum[2],minstep;
916 gmx_bool converged,foundlower;
918 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
920 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
922 int i,m,gf,step,nminstep;
927 s_min = init_em_state();
928 s_a = init_em_state();
929 s_b = init_em_state();
930 s_c = init_em_state();
932 /* Init em and store the local state in s_min */
933 init_em(fplog,CG,cr,inputrec,
934 state_global,top_global,s_min,&top,&f,&f_global,
935 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
936 nfile,fnm,&outf,&mdebin);
938 /* Print to log file */
939 print_em_start(fplog,cr,runtime,wcycle,CG);
941 /* Max number of steps */
942 number_steps=inputrec->nsteps;
945 sp_header(stderr,CG,inputrec->em_tol,number_steps);
947 sp_header(fplog,CG,inputrec->em_tol,number_steps);
949 /* Call the force routine and some auxiliary (neighboursearching etc.) */
950 /* do_force always puts the charge groups in the box and shifts again
951 * We do not unshift, so molecules are always whole in congrad.c
953 evaluate_energy(fplog,bVerbose,cr,
954 state_global,top_global,s_min,top,
955 inputrec,nrnb,wcycle,gstat,
956 vsite,constr,fcd,graph,mdatoms,fr,
957 mu_tot,enerd,vir,pres,-1,TRUE);
961 /* Copy stuff to the energy bin for easy printing etc. */
962 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
963 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
964 NULL,NULL,vir,pres,NULL,mu_tot,constr);
966 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
967 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
968 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
972 /* Estimate/guess the initial stepsize */
973 stepsize = inputrec->em_stepsize/s_min->fnorm;
976 fprintf(stderr," F-max = %12.5e on atom %d\n",
977 s_min->fmax,s_min->a_fmax+1);
978 fprintf(stderr," F-Norm = %12.5e\n",
979 s_min->fnorm/sqrt(state_global->natoms));
980 fprintf(stderr,"\n");
981 /* and copy to the log file too... */
982 fprintf(fplog," F-max = %12.5e on atom %d\n",
983 s_min->fmax,s_min->a_fmax+1);
984 fprintf(fplog," F-Norm = %12.5e\n",
985 s_min->fnorm/sqrt(state_global->natoms));
988 /* Start the loop over CG steps.
989 * Each successful step is counted, and we continue until
990 * we either converge or reach the max number of steps.
993 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
995 /* start taking steps in a new direction
996 * First time we enter the routine, beta=0, and the direction is
997 * simply the negative gradient.
1000 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1005 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1006 if (mdatoms->cFREEZE)
1007 gf = mdatoms->cFREEZE[i];
1008 for(m=0; m<DIM; m++) {
1009 if (!inputrec->opts.nFreeze[gf][m]) {
1010 p[i][m] = sf[i][m] + beta*p[i][m];
1011 gpa -= p[i][m]*sf[i][m];
1012 /* f is negative gradient, thus the sign */
1019 /* Sum the gradient along the line across CPUs */
1021 gmx_sumd(1,&gpa,cr);
1023 /* Calculate the norm of the search vector */
1024 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1026 /* Just in case stepsize reaches zero due to numerical precision... */
1028 stepsize = inputrec->em_stepsize/pnorm;
1031 * Double check the value of the derivative in the search direction.
1032 * If it is positive it must be due to the old information in the
1033 * CG formula, so just remove that and start over with beta=0.
1034 * This corresponds to a steepest descent step.
1038 step--; /* Don't count this step since we are restarting */
1039 continue; /* Go back to the beginning of the big for-loop */
1042 /* Calculate minimum allowed stepsize, before the average (norm)
1043 * relative change in coordinate is smaller than precision
1046 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1047 for(m=0; m<DIM; m++) {
1048 tmp = fabs(s_min->s.x[i][m]);
1055 /* Add up from all CPUs */
1057 gmx_sumd(1,&minstep,cr);
1059 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1061 if(stepsize<minstep) {
1066 /* Write coordinates if necessary */
1067 do_x = do_per_step(step,inputrec->nstxout);
1068 do_f = do_per_step(step,inputrec->nstfout);
1070 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1071 top_global,inputrec,step,
1072 s_min,state_global,f_global);
1074 /* Take a step downhill.
1075 * In theory, we should minimize the function along this direction.
1076 * That is quite possible, but it turns out to take 5-10 function evaluations
1077 * for each line. However, we dont really need to find the exact minimum -
1078 * it is much better to start a new CG step in a modified direction as soon
1079 * as we are close to it. This will save a lot of energy evaluations.
1081 * In practice, we just try to take a single step.
1082 * If it worked (i.e. lowered the energy), we increase the stepsize but
1083 * the continue straight to the next CG step without trying to find any minimum.
1084 * If it didn't work (higher energy), there must be a minimum somewhere between
1085 * the old position and the new one.
1087 * Due to the finite numerical accuracy, it turns out that it is a good idea
1088 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1089 * This leads to lower final energies in the tests I've done. / Erik
1091 s_a->epot = s_min->epot;
1093 c = a + stepsize; /* reference position along line is zero */
1095 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1096 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1097 s_min,top,mdatoms,fr,vsite,constr,
1101 /* Take a trial step (new coords in s_c) */
1102 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1103 constr,top,nrnb,wcycle,-1);
1106 /* Calculate energy for the trial step */
1107 evaluate_energy(fplog,bVerbose,cr,
1108 state_global,top_global,s_c,top,
1109 inputrec,nrnb,wcycle,gstat,
1110 vsite,constr,fcd,graph,mdatoms,fr,
1111 mu_tot,enerd,vir,pres,-1,FALSE);
1113 /* Calc derivative along line */
1117 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1118 for(m=0; m<DIM; m++)
1119 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1121 /* Sum the gradient along the line across CPUs */
1123 gmx_sumd(1,&gpc,cr);
1125 /* This is the max amount of increase in energy we tolerate */
1126 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1128 /* Accept the step if the energy is lower, or if it is not significantly higher
1129 * and the line derivative is still negative.
1131 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1133 /* Great, we found a better energy. Increase step for next iteration
1134 * if we are still going down, decrease it otherwise
1137 stepsize *= 1.618034; /* The golden section */
1139 stepsize *= 0.618034; /* 1/golden section */
1141 /* New energy is the same or higher. We will have to do some work
1142 * to find a smaller value in the interval. Take smaller step next time!
1145 stepsize *= 0.618034;
1151 /* OK, if we didn't find a lower value we will have to locate one now - there must
1152 * be one in the interval [a=0,c].
1153 * The same thing is valid here, though: Don't spend dozens of iterations to find
1154 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1155 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1157 * I also have a safeguard for potentially really patological functions so we never
1158 * take more than 20 steps before we give up ...
1160 * If we already found a lower value we just skip this step and continue to the update.
1166 /* Select a new trial point.
1167 * If the derivatives at points a & c have different sign we interpolate to zero,
1168 * otherwise just do a bisection.
1171 b = a + gpa*(a-c)/(gpc-gpa);
1175 /* safeguard if interpolation close to machine accuracy causes errors:
1176 * never go outside the interval
1181 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1182 /* Reload the old state */
1183 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1184 s_min,top,mdatoms,fr,vsite,constr,
1188 /* Take a trial step to this new point - new coords in s_b */
1189 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1190 constr,top,nrnb,wcycle,-1);
1193 /* Calculate energy for the trial step */
1194 evaluate_energy(fplog,bVerbose,cr,
1195 state_global,top_global,s_b,top,
1196 inputrec,nrnb,wcycle,gstat,
1197 vsite,constr,fcd,graph,mdatoms,fr,
1198 mu_tot,enerd,vir,pres,-1,FALSE);
1200 /* p does not change within a step, but since the domain decomposition
1201 * might change, we have to use cg_p of s_b here.
1206 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1207 for(m=0; m<DIM; m++)
1208 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1210 /* Sum the gradient along the line across CPUs */
1212 gmx_sumd(1,&gpb,cr);
1215 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1216 s_a->epot,s_b->epot,s_c->epot,gpb);
1218 epot_repl = s_b->epot;
1220 /* Keep one of the intervals based on the value of the derivative at the new point */
1222 /* Replace c endpoint with b */
1223 swap_em_state(s_b,s_c);
1227 /* Replace a endpoint with b */
1228 swap_em_state(s_b,s_a);
1234 * Stop search as soon as we find a value smaller than the endpoints.
1235 * Never run more than 20 steps, no matter what.
1238 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1241 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1243 /* OK. We couldn't find a significantly lower energy.
1244 * If beta==0 this was steepest descent, and then we give up.
1245 * If not, set beta=0 and restart with steepest descent before quitting.
1252 /* Reset memory before giving up */
1258 /* Select min energy state of A & C, put the best in B.
1260 if (s_c->epot < s_a->epot) {
1262 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1263 s_c->epot,s_a->epot);
1264 swap_em_state(s_b,s_c);
1269 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1270 s_a->epot,s_c->epot);
1271 swap_em_state(s_b,s_a);
1278 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1280 swap_em_state(s_b,s_c);
1285 /* new search direction */
1286 /* beta = 0 means forget all memory and restart with steepest descents. */
1287 if (nstcg && ((step % nstcg)==0))
1290 /* s_min->fnorm cannot be zero, because then we would have converged
1294 /* Polak-Ribiere update.
1295 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1297 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1299 /* Limit beta to prevent oscillations */
1300 if (fabs(beta) > 5.0)
1304 /* update positions */
1305 swap_em_state(s_min,s_b);
1308 /* Print it if necessary */
1311 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1312 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1313 s_min->fmax,s_min->a_fmax+1);
1314 /* Store the new (lower) energies */
1315 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1316 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1317 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1319 do_log = do_per_step(step,inputrec->nstlog);
1320 do_ene = do_per_step(step,inputrec->nstenergy);
1322 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1323 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1324 do_log ? fplog : NULL,step,step,eprNORMAL,
1325 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1328 /* Stop when the maximum force lies below tolerance.
1329 * If we have reached machine precision, converged is already set to true.
1331 converged = converged || (s_min->fmax < inputrec->em_tol);
1333 } /* End of the loop */
1336 step--; /* we never took that last step in this case */
1338 if (s_min->fmax > inputrec->em_tol)
1342 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1343 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1349 /* If we printed energy and/or logfile last step (which was the last step)
1350 * we don't have to do it again, but otherwise print the final values.
1353 /* Write final value to log since we didn't do anything the last step */
1354 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1356 if (!do_ene || !do_log) {
1357 /* Write final energy file entries */
1358 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1359 !do_log ? fplog : NULL,step,step,eprNORMAL,
1360 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1364 /* Print some stuff... */
1366 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1369 * For accurate normal mode calculation it is imperative that we
1370 * store the last conformation into the full precision binary trajectory.
1372 * However, we should only do it if we did NOT already write this step
1373 * above (which we did if do_x or do_f was true).
1375 do_x = !do_per_step(step,inputrec->nstxout);
1376 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1378 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1379 top_global,inputrec,step,
1380 s_min,state_global,f_global);
1382 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1385 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1386 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1387 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1388 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1390 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1393 finish_em(fplog,cr,outf,runtime,wcycle);
1395 /* To print the actual number of steps we needed somewhere */
1396 runtime->nsteps_done = step;
1399 } /* That's all folks */
1402 double do_lbfgs(FILE *fplog,t_commrec *cr,
1403 int nfile,const t_filenm fnm[],
1404 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1406 gmx_vsite_t *vsite,gmx_constr_t constr,
1408 t_inputrec *inputrec,
1409 gmx_mtop_t *top_global,t_fcdata *fcd,
1412 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1415 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1416 gmx_membed_t membed,
1417 real cpt_period,real max_hours,
1418 const char *deviceOptions,
1419 unsigned long Flags,
1420 gmx_runtime_t *runtime)
1422 static const char *LBFGS="Low-Memory BFGS Minimizer";
1424 gmx_localtop_t *top;
1425 gmx_enerdata_t *enerd;
1427 gmx_global_stat_t gstat;
1430 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1431 double stepsize,gpa,gpb,gpc,tmp,minstep;
1432 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1433 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1434 real a,b,c,maxdelta,delta;
1435 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1436 real dgdx,dgdg,sq,yr,beta;
1438 gmx_bool converged,first;
1441 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1443 int start,end,number_steps;
1445 int i,k,m,n,nfmax,gf,step;
1451 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1455 gmx_fatal(FARGS,"The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1458 n = 3*state->natoms;
1459 nmaxcorr = inputrec->nbfgscorr;
1461 /* Allocate memory */
1462 /* Use pointers to real so we dont have to loop over both atoms and
1463 * dimensions all the time...
1464 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1465 * that point to the same memory.
1479 snew(alpha,nmaxcorr);
1482 for(i=0;i<nmaxcorr;i++)
1486 for(i=0;i<nmaxcorr;i++)
1493 init_em(fplog,LBFGS,cr,inputrec,
1494 state,top_global,&ems,&top,&f,&f_global,
1495 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1496 nfile,fnm,&outf,&mdebin);
1497 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1498 * so we free some memory again.
1503 xx = (real *)state->x;
1506 start = mdatoms->start;
1507 end = mdatoms->homenr + start;
1509 /* Print to log file */
1510 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1512 do_log = do_ene = do_x = do_f = TRUE;
1514 /* Max number of steps */
1515 number_steps=inputrec->nsteps;
1517 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1519 for(i=start; i<end; i++) {
1520 if (mdatoms->cFREEZE)
1521 gf = mdatoms->cFREEZE[i];
1522 for(m=0; m<DIM; m++)
1523 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1526 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1528 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1531 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1532 top->idef.iparams,top->idef.il,
1533 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1535 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1536 /* do_force always puts the charge groups in the box and shifts again
1537 * We do not unshift, so molecules are always whole
1542 evaluate_energy(fplog,bVerbose,cr,
1543 state,top_global,&ems,top,
1544 inputrec,nrnb,wcycle,gstat,
1545 vsite,constr,fcd,graph,mdatoms,fr,
1546 mu_tot,enerd,vir,pres,-1,TRUE);
1550 /* Copy stuff to the energy bin for easy printing etc. */
1551 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1552 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1553 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1555 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1556 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1557 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1561 /* This is the starting energy */
1562 Epot = enerd->term[F_EPOT];
1568 /* Set the initial step.
1569 * since it will be multiplied by the non-normalized search direction
1570 * vector (force vector the first time), we scale it by the
1571 * norm of the force.
1575 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1576 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1577 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1578 fprintf(stderr,"\n");
1579 /* and copy to the log file too... */
1580 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1581 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1582 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1583 fprintf(fplog,"\n");
1589 dx[point][i] = ff[i]; /* Initial search direction */
1593 stepsize = 1.0/fnorm;
1596 /* Start the loop over BFGS steps.
1597 * Each successful step is counted, and we continue until
1598 * we either converge or reach the max number of steps.
1603 /* Set the gradient from the force */
1605 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1607 /* Write coordinates if necessary */
1608 do_x = do_per_step(step,inputrec->nstxout);
1609 do_f = do_per_step(step,inputrec->nstfout);
1614 mdof_flags |= MDOF_X;
1619 mdof_flags |= MDOF_F;
1622 write_traj(fplog,cr,outf,mdof_flags,
1623 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1625 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1627 /* pointer to current direction - point=0 first time here */
1630 /* calculate line gradient */
1631 for(gpa=0,i=0;i<n;i++)
1634 /* Calculate minimum allowed stepsize, before the average (norm)
1635 * relative change in coordinate is smaller than precision
1637 for(minstep=0,i=0;i<n;i++) {
1644 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1646 if(stepsize<minstep) {
1651 /* Store old forces and coordinates */
1663 /* Take a step downhill.
1664 * In theory, we should minimize the function along this direction.
1665 * That is quite possible, but it turns out to take 5-10 function evaluations
1666 * for each line. However, we dont really need to find the exact minimum -
1667 * it is much better to start a new BFGS step in a modified direction as soon
1668 * as we are close to it. This will save a lot of energy evaluations.
1670 * In practice, we just try to take a single step.
1671 * If it worked (i.e. lowered the energy), we increase the stepsize but
1672 * the continue straight to the next BFGS step without trying to find any minimum.
1673 * If it didn't work (higher energy), there must be a minimum somewhere between
1674 * the old position and the new one.
1676 * Due to the finite numerical accuracy, it turns out that it is a good idea
1677 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1678 * This leads to lower final energies in the tests I've done. / Erik
1683 c = a + stepsize; /* reference position along line is zero */
1685 /* Check stepsize first. We do not allow displacements
1686 * larger than emstep.
1696 if(maxdelta>inputrec->em_stepsize)
1698 } while(maxdelta>inputrec->em_stepsize);
1700 /* Take a trial step */
1702 xc[i] = lastx[i] + c*s[i];
1705 /* Calculate energy for the trial step */
1706 ems.s.x = (rvec *)xc;
1708 evaluate_energy(fplog,bVerbose,cr,
1709 state,top_global,&ems,top,
1710 inputrec,nrnb,wcycle,gstat,
1711 vsite,constr,fcd,graph,mdatoms,fr,
1712 mu_tot,enerd,vir,pres,step,FALSE);
1715 /* Calc derivative along line */
1716 for(gpc=0,i=0; i<n; i++) {
1717 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1719 /* Sum the gradient along the line across CPUs */
1721 gmx_sumd(1,&gpc,cr);
1723 /* This is the max amount of increase in energy we tolerate */
1724 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1726 /* Accept the step if the energy is lower, or if it is not significantly higher
1727 * and the line derivative is still negative.
1729 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1731 /* Great, we found a better energy. Increase step for next iteration
1732 * if we are still going down, decrease it otherwise
1735 stepsize *= 1.618034; /* The golden section */
1737 stepsize *= 0.618034; /* 1/golden section */
1739 /* New energy is the same or higher. We will have to do some work
1740 * to find a smaller value in the interval. Take smaller step next time!
1743 stepsize *= 0.618034;
1746 /* OK, if we didn't find a lower value we will have to locate one now - there must
1747 * be one in the interval [a=0,c].
1748 * The same thing is valid here, though: Don't spend dozens of iterations to find
1749 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1750 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1752 * I also have a safeguard for potentially really patological functions so we never
1753 * take more than 20 steps before we give up ...
1755 * If we already found a lower value we just skip this step and continue to the update.
1762 /* Select a new trial point.
1763 * If the derivatives at points a & c have different sign we interpolate to zero,
1764 * otherwise just do a bisection.
1768 b = a + gpa*(a-c)/(gpc-gpa);
1772 /* safeguard if interpolation close to machine accuracy causes errors:
1773 * never go outside the interval
1778 /* Take a trial step */
1780 xb[i] = lastx[i] + b*s[i];
1783 /* Calculate energy for the trial step */
1784 ems.s.x = (rvec *)xb;
1786 evaluate_energy(fplog,bVerbose,cr,
1787 state,top_global,&ems,top,
1788 inputrec,nrnb,wcycle,gstat,
1789 vsite,constr,fcd,graph,mdatoms,fr,
1790 mu_tot,enerd,vir,pres,step,FALSE);
1795 for(gpb=0,i=0; i<n; i++)
1796 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1798 /* Sum the gradient along the line across CPUs */
1800 gmx_sumd(1,&gpb,cr);
1802 /* Keep one of the intervals based on the value of the derivative at the new point */
1804 /* Replace c endpoint with b */
1808 /* swap coord pointers b/c */
1816 /* Replace a endpoint with b */
1820 /* swap coord pointers a/b */
1830 * Stop search as soon as we find a value smaller than the endpoints,
1831 * or if the tolerance is below machine precision.
1832 * Never run more than 20 steps, no matter what.
1835 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1837 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1838 /* OK. We couldn't find a significantly lower energy.
1839 * If ncorr==0 this was steepest descent, and then we give up.
1840 * If not, reset memory to restart as steepest descent before quitting.
1849 /* Search in gradient direction */
1852 /* Reset stepsize */
1853 stepsize = 1.0/fnorm;
1858 /* Select min energy state of A & C, put the best in xx/ff/Epot
1889 /* Update the memory information, and calculate a new
1890 * approximation of the inverse hessian
1893 /* Have new data in Epot, xx, ff */
1898 dg[point][i]=lastf[i]-ff[i];
1899 dx[point][i]*=stepsize;
1905 dgdg+=dg[point][i]*dg[point][i];
1906 dgdx+=dg[point][i]*dx[point][i];
1911 rho[point]=1.0/dgdx;
1923 /* Recursive update. First go back over the memory points */
1924 for(k=0;k<ncorr;k++) {
1933 alpha[cp]=rho[cp]*sq;
1936 p[i] -= alpha[cp]*dg[cp][i];
1942 /* And then go forward again */
1943 for(k=0;k<ncorr;k++) {
1946 yr += p[i]*dg[cp][i];
1949 beta = alpha[cp]-beta;
1952 p[i] += beta*dx[cp][i];
1961 dx[point][i] = p[i];
1967 /* Test whether the convergence criterion is met */
1968 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1970 /* Print it if necessary */
1973 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1974 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1975 /* Store the new (lower) energies */
1976 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1977 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1978 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1979 do_log = do_per_step(step,inputrec->nstlog);
1980 do_ene = do_per_step(step,inputrec->nstenergy);
1982 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1983 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1984 do_log ? fplog : NULL,step,step,eprNORMAL,
1985 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1988 /* Stop when the maximum force lies below tolerance.
1989 * If we have reached machine precision, converged is already set to true.
1992 converged = converged || (fmax < inputrec->em_tol);
1994 } /* End of the loop */
1997 step--; /* we never took that last step in this case */
1999 if(fmax>inputrec->em_tol)
2003 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
2004 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
2009 /* If we printed energy and/or logfile last step (which was the last step)
2010 * we don't have to do it again, but otherwise print the final values.
2012 if(!do_log) /* Write final value to log since we didn't do anythin last step */
2013 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2014 if(!do_ene || !do_log) /* Write final energy file entries */
2015 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2016 !do_log ? fplog : NULL,step,step,eprNORMAL,
2017 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2019 /* Print some stuff... */
2021 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2024 * For accurate normal mode calculation it is imperative that we
2025 * store the last conformation into the full precision binary trajectory.
2027 * However, we should only do it if we did NOT already write this step
2028 * above (which we did if do_x or do_f was true).
2030 do_x = !do_per_step(step,inputrec->nstxout);
2031 do_f = !do_per_step(step,inputrec->nstfout);
2032 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2033 top_global,inputrec,step,
2037 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2038 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2039 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2040 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2042 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2045 finish_em(fplog,cr,outf,runtime,wcycle);
2047 /* To print the actual number of steps we needed somewhere */
2048 runtime->nsteps_done = step;
2051 } /* That's all folks */
2054 double do_steep(FILE *fplog,t_commrec *cr,
2055 int nfile, const t_filenm fnm[],
2056 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2058 gmx_vsite_t *vsite,gmx_constr_t constr,
2060 t_inputrec *inputrec,
2061 gmx_mtop_t *top_global,t_fcdata *fcd,
2062 t_state *state_global,
2064 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2067 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2068 gmx_membed_t membed,
2069 real cpt_period,real max_hours,
2070 const char *deviceOptions,
2071 unsigned long Flags,
2072 gmx_runtime_t *runtime)
2074 const char *SD="Steepest Descents";
2075 em_state_t *s_min,*s_try;
2077 gmx_localtop_t *top;
2078 gmx_enerdata_t *enerd;
2080 gmx_global_stat_t gstat;
2082 real stepsize,constepsize;
2083 real ustep,dvdlambda,fnormn;
2086 gmx_bool bDone,bAbort,do_x,do_f;
2091 int steps_accepted=0;
2095 s_min = init_em_state();
2096 s_try = init_em_state();
2098 /* Init em and store the local state in s_try */
2099 init_em(fplog,SD,cr,inputrec,
2100 state_global,top_global,s_try,&top,&f,&f_global,
2101 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2102 nfile,fnm,&outf,&mdebin);
2104 /* Print to log file */
2105 print_em_start(fplog,cr,runtime,wcycle,SD);
2107 /* Set variables for stepsize (in nm). This is the largest
2108 * step that we are going to make in any direction.
2110 ustep = inputrec->em_stepsize;
2113 /* Max number of steps */
2114 nsteps = inputrec->nsteps;
2117 /* Print to the screen */
2118 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2120 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2122 /**** HERE STARTS THE LOOP ****
2123 * count is the counter for the number of steps
2124 * bDone will be TRUE when the minimization has converged
2125 * bAbort will be TRUE when nsteps steps have been performed or when
2126 * the stepsize becomes smaller than is reasonable for machine precision
2131 while( !bDone && !bAbort ) {
2132 bAbort = (nsteps >= 0) && (count == nsteps);
2134 /* set new coordinates, except for first step */
2136 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2137 s_min,stepsize,s_min->f,s_try,
2138 constr,top,nrnb,wcycle,count);
2141 evaluate_energy(fplog,bVerbose,cr,
2142 state_global,top_global,s_try,top,
2143 inputrec,nrnb,wcycle,gstat,
2144 vsite,constr,fcd,graph,mdatoms,fr,
2145 mu_tot,enerd,vir,pres,count,count==0);
2148 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2151 s_min->epot = s_try->epot + 1;
2153 /* Print it if necessary */
2156 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2157 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2158 (s_try->epot < s_min->epot) ? '\n' : '\r');
2161 if (s_try->epot < s_min->epot) {
2162 /* Store the new (lower) energies */
2163 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2164 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2165 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2166 print_ebin(outf->fp_ene,TRUE,
2167 do_per_step(steps_accepted,inputrec->nstdisreout),
2168 do_per_step(steps_accepted,inputrec->nstorireout),
2169 fplog,count,count,eprNORMAL,TRUE,
2170 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2175 /* Now if the new energy is smaller than the previous...
2176 * or if this is the first step!
2177 * or if we did random steps!
2180 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2183 /* Test whether the convergence criterion is met... */
2184 bDone = (s_try->fmax < inputrec->em_tol);
2186 /* Copy the arrays for force, positions and energy */
2187 /* The 'Min' array always holds the coords and forces of the minimal
2189 swap_em_state(s_min,s_try);
2193 /* Write to trn, if necessary */
2194 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2195 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2196 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2197 top_global,inputrec,count,
2198 s_min,state_global,f_global);
2201 /* If energy is not smaller make the step smaller... */
2204 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2205 /* Reload the old state */
2206 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2207 s_min,top,mdatoms,fr,vsite,constr,
2212 /* Determine new step */
2213 stepsize = ustep/s_min->fmax;
2215 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2217 if (count == nsteps || ustep < 1e-12)
2219 if (count == nsteps || ustep < 1e-6)
2224 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2225 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2231 } /* End of the loop */
2233 /* Print some shit... */
2235 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2236 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2237 top_global,inputrec,count,
2238 s_min,state_global,f_global);
2240 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2243 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2244 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2245 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2246 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2249 finish_em(fplog,cr,outf,runtime,wcycle);
2251 /* To print the actual number of steps we needed somewhere */
2252 inputrec->nsteps=count;
2254 runtime->nsteps_done = count;
2257 } /* That's all folks */
2260 double do_nm(FILE *fplog,t_commrec *cr,
2261 int nfile,const t_filenm fnm[],
2262 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2264 gmx_vsite_t *vsite,gmx_constr_t constr,
2266 t_inputrec *inputrec,
2267 gmx_mtop_t *top_global,t_fcdata *fcd,
2268 t_state *state_global,
2270 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2273 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2274 gmx_membed_t membed,
2275 real cpt_period,real max_hours,
2276 const char *deviceOptions,
2277 unsigned long Flags,
2278 gmx_runtime_t *runtime)
2280 const char *NM = "Normal Mode Analysis";
2285 gmx_localtop_t *top;
2286 gmx_enerdata_t *enerd;
2288 gmx_global_stat_t gstat;
2290 real t,t0,lambda,lam0;
2295 gmx_bool bSparse; /* use sparse matrix storage format */
2297 gmx_sparsematrix_t * sparse_matrix = NULL;
2298 real * full_matrix = NULL;
2299 em_state_t * state_work;
2301 /* added with respect to mdrun */
2303 real der_range=10.0*sqrt(GMX_REAL_EPS);
2309 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2312 state_work = init_em_state();
2314 /* Init em and store the local state in state_minimum */
2315 init_em(fplog,NM,cr,inputrec,
2316 state_global,top_global,state_work,&top,
2318 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2319 nfile,fnm,&outf,NULL);
2321 natoms = top_global->natoms;
2329 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2330 " which MIGHT not be accurate enough for normal mode analysis.\n"
2331 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2332 " are fairly modest even if you recompile in double precision.\n\n");
2336 /* Check if we can/should use sparse storage format.
2338 * Sparse format is only useful when the Hessian itself is sparse, which it
2339 * will be when we use a cutoff.
2340 * For small systems (n<1000) it is easier to always use full matrix format, though.
2342 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2344 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2347 else if(top_global->natoms < 1000)
2349 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2354 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2358 sz = DIM*top_global->natoms;
2360 fprintf(stderr,"Allocating Hessian memory...\n\n");
2364 sparse_matrix=gmx_sparsematrix_init(sz);
2365 sparse_matrix->compressed_symmetric = TRUE;
2369 snew(full_matrix,sz*sz);
2372 /* Initial values */
2373 t0 = inputrec->init_t;
2374 lam0 = inputrec->fepvals->init_lambda;
2382 /* Write start time and temperature */
2383 print_em_start(fplog,cr,runtime,wcycle,NM);
2385 /* fudge nr of steps to nr of atoms */
2386 inputrec->nsteps = natoms*2;
2390 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2391 *(top_global->name),(int)inputrec->nsteps);
2394 nnodes = cr->nnodes;
2396 /* Make evaluate_energy do a single node force calculation */
2398 evaluate_energy(fplog,bVerbose,cr,
2399 state_global,top_global,state_work,top,
2400 inputrec,nrnb,wcycle,gstat,
2401 vsite,constr,fcd,graph,mdatoms,fr,
2402 mu_tot,enerd,vir,pres,-1,TRUE);
2403 cr->nnodes = nnodes;
2405 /* if forces are not small, warn user */
2406 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2410 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2411 if (state_work->fmax > 1.0e-3)
2413 fprintf(stderr,"Maximum force probably not small enough to");
2414 fprintf(stderr," ensure that you are in an \nenergy well. ");
2415 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2416 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2420 /***********************************************************
2422 * Loop over all pairs in matrix
2424 * do_force called twice. Once with positive and
2425 * once with negative displacement
2427 ************************************************************/
2429 /* Steps are divided one by one over the nodes */
2430 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2433 for (d=0; d<DIM; d++)
2435 x_min = state_work->s.x[atom][d];
2437 state_work->s.x[atom][d] = x_min - der_range;
2439 /* Make evaluate_energy do a single node force calculation */
2441 evaluate_energy(fplog,bVerbose,cr,
2442 state_global,top_global,state_work,top,
2443 inputrec,nrnb,wcycle,gstat,
2444 vsite,constr,fcd,graph,mdatoms,fr,
2445 mu_tot,enerd,vir,pres,atom*2,FALSE);
2447 for(i=0; i<natoms; i++)
2449 copy_rvec(state_work->f[i], fneg[i]);
2452 state_work->s.x[atom][d] = x_min + der_range;
2454 evaluate_energy(fplog,bVerbose,cr,
2455 state_global,top_global,state_work,top,
2456 inputrec,nrnb,wcycle,gstat,
2457 vsite,constr,fcd,graph,mdatoms,fr,
2458 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2459 cr->nnodes = nnodes;
2461 /* x is restored to original */
2462 state_work->s.x[atom][d] = x_min;
2464 for(j=0; j<natoms; j++)
2466 for (k=0; (k<DIM); k++)
2469 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2477 #define mpi_type MPI_DOUBLE
2479 #define mpi_type MPI_FLOAT
2481 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2482 cr->mpi_comm_mygroup);
2487 for(node=0; (node<nnodes && atom+node<natoms); node++)
2493 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2494 cr->mpi_comm_mygroup,&stat);
2499 row = (atom + node)*DIM + d;
2501 for(j=0; j<natoms; j++)
2503 for(k=0; k<DIM; k++)
2509 if (col >= row && dfdx[j][k] != 0.0)
2511 gmx_sparsematrix_increment_value(sparse_matrix,
2512 row,col,dfdx[j][k]);
2517 full_matrix[row*sz+col] = dfdx[j][k];
2524 if (bVerbose && fplog)
2529 /* write progress */
2530 if (MASTER(cr) && bVerbose)
2532 fprintf(stderr,"\rFinished step %d out of %d",
2533 min(atom+nnodes,natoms),natoms);
2540 fprintf(stderr,"\n\nWriting Hessian...\n");
2541 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2544 finish_em(fplog,cr,outf,runtime,wcycle);
2546 runtime->nsteps_done = natoms*2;