1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
70 #include "sparsematrix.h"
74 #include "gmx_wallcycle.h"
75 #include "mtop_util.h"
88 static em_state_t *init_em_state()
94 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
95 snew(ems->s.lambda,efptNR);
100 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
101 gmx_wallcycle_t wcycle,
106 runtime_start(runtime);
108 sprintf(buf,"Started %s",name);
109 print_date_and_time(fplog,cr->nodeid,buf,NULL);
111 wallcycle_start(wcycle,ewcRUN);
113 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
114 gmx_wallcycle_t wcycle)
116 wallcycle_stop(wcycle,ewcRUN);
118 runtime_end(runtime);
121 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
124 fprintf(out,"%s:\n",minimizer);
125 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
126 fprintf(out," Number of steps = %12d\n",nsteps);
129 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
135 "\nEnergy minimization reached the maximum number"
136 "of steps before the forces reached the requested"
137 "precision Fmax < %g.\n",ftol);
142 "\nEnergy minimization has stopped, but the forces have"
143 "not converged to the requested precision Fmax < %g (which"
144 "may not be possible for your system). It stopped"
145 "because the algorithm tried to make a new step whose size"
146 "was too small, or there was no change in the energy since"
147 "last step. Either way, we regard the minimization as"
148 "converged to within the available machine precision,"
149 "given your starting configuration and EM parameters.\n%s%s",
151 sizeof(real)<sizeof(double) ?
152 "\nDouble precision normally gives you higher accuracy, but"
153 "this is often not needed for preparing to run molecular"
157 "You might need to increase your constraint accuracy, or turn\n"
158 "off constraints altogether (set constraints = none in mdp file)\n" :
161 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
166 static void print_converged(FILE *fp,const char *alg,real ftol,
167 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
168 real epot,real fmax, int nfmax, real fnorm)
170 char buf[STEPSTRSIZE];
173 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
174 alg,ftol,gmx_step_str(count,buf));
175 else if(count<nsteps)
176 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
177 "but did not reach the requested Fmax < %g.\n",
178 alg,gmx_step_str(count,buf),ftol);
180 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
181 alg,ftol,gmx_step_str(count,buf));
184 fprintf(fp,"Potential Energy = %21.14e\n",epot);
185 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
186 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
188 fprintf(fp,"Potential Energy = %14.7e\n",epot);
189 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
190 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
194 static void get_f_norm_max(t_commrec *cr,
195 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
196 real *fnorm,real *fmax,int *a_fmax)
199 real fmax2,fmax2_0,fam;
200 int la_max,a_max,start,end,i,m,gf;
202 /* This routine finds the largest force and returns it.
203 * On parallel machines the global max is taken.
209 start = mdatoms->start;
210 end = mdatoms->homenr + start;
211 if (mdatoms->cFREEZE) {
212 for(i=start; i<end; i++) {
213 gf = mdatoms->cFREEZE[i];
216 if (!opts->nFreeze[gf][m])
225 for(i=start; i<end; i++) {
235 if (la_max >= 0 && DOMAINDECOMP(cr)) {
236 a_max = cr->dd->gatindex[la_max];
241 snew(sum,2*cr->nnodes+1);
242 sum[2*cr->nodeid] = fmax2;
243 sum[2*cr->nodeid+1] = a_max;
244 sum[2*cr->nnodes] = fnorm2;
245 gmx_sumd(2*cr->nnodes+1,sum,cr);
246 fnorm2 = sum[2*cr->nnodes];
247 /* Determine the global maximum */
248 for(i=0; i<cr->nnodes; i++) {
249 if (sum[2*i] > fmax2) {
251 a_max = (int)(sum[2*i+1] + 0.5);
258 *fnorm = sqrt(fnorm2);
265 static void get_state_f_norm_max(t_commrec *cr,
266 t_grpopts *opts,t_mdatoms *mdatoms,
269 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
272 void init_em(FILE *fplog,const char *title,
273 t_commrec *cr,t_inputrec *ir,
274 t_state *state_global,gmx_mtop_t *top_global,
275 em_state_t *ems,gmx_localtop_t **top,
276 rvec **f,rvec **f_global,
277 t_nrnb *nrnb,rvec mu_tot,
278 t_forcerec *fr,gmx_enerdata_t **enerd,
279 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
280 gmx_vsite_t *vsite,gmx_constr_t constr,
281 int nfile,const t_filenm fnm[],
282 gmx_mdoutf_t **outf,t_mdebin **mdebin)
289 fprintf(fplog,"Initiating %s\n",title);
292 state_global->ngtc = 0;
294 /* Initialize lambda variables */
295 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
299 if (DOMAINDECOMP(cr))
301 *top = dd_init_local_top(top_global);
303 dd_init_local_state(cr->dd,state_global,&ems->s);
307 /* Distribute the charge groups over the nodes from the master node */
308 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
309 state_global,top_global,ir,
310 &ems->s,&ems->f,mdatoms,*top,
311 fr,vsite,NULL,constr,
313 dd_store_state(cr->dd,&ems->s);
317 snew(*f_global,top_global->natoms);
327 snew(*f,top_global->natoms);
329 /* Just copy the state */
330 ems->s = *state_global;
331 snew(ems->s.x,ems->s.nalloc);
332 snew(ems->f,ems->s.nalloc);
333 for(i=0; i<state_global->natoms; i++)
335 copy_rvec(state_global->x[i],ems->s.x[i]);
337 copy_mat(state_global->box,ems->s.box);
339 if (PAR(cr) && ir->eI != eiNM)
341 /* Initialize the particle decomposition and split the topology */
342 *top = split_system(fplog,top_global,ir,cr);
344 pd_cg_range(cr,&fr->cg0,&fr->hcg);
348 *top = gmx_mtop_generate_local_top(top_global,ir);
352 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
354 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
363 pd_at_range(cr,&start,&homenr);
369 homenr = top_global->natoms;
371 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
372 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
376 set_vsite_top(vsite,*top,mdatoms,cr);
382 if (ir->eConstrAlg == econtSHAKE &&
383 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
385 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
386 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
389 if (!DOMAINDECOMP(cr))
391 set_constraints(constr,*top,ir,mdatoms,cr);
394 if (!ir->bContinuation)
396 /* Constrain the starting coordinates */
398 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
399 ir,NULL,cr,-1,0,mdatoms,
400 ems->s.x,ems->s.x,NULL,ems->s.box,
401 ems->s.lambda[efptFEP],&dvdlambda,
402 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
408 *gstat = global_stat_init(ir);
411 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
414 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
419 /* Init bin for energy stuff */
420 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
424 calc_shifts(ems->s.box,fr->shift_vec);
427 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
428 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
430 if (!(cr->duty & DUTY_PME)) {
431 /* Tell the PME only node to finish */
437 em_time_end(fplog,cr,runtime,wcycle);
440 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
449 static void copy_em_coords(em_state_t *ems,t_state *state)
453 for(i=0; (i<state->natoms); i++)
455 copy_rvec(ems->s.x[i],state->x[i]);
459 static void write_em_traj(FILE *fplog,t_commrec *cr,
461 gmx_bool bX,gmx_bool bF,const char *confout,
462 gmx_mtop_t *top_global,
463 t_inputrec *ir,gmx_large_int_t step,
465 t_state *state_global,rvec *f_global)
469 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
471 copy_em_coords(state,state_global);
476 if (bX) { mdof_flags |= MDOF_X; }
477 if (bF) { mdof_flags |= MDOF_F; }
478 write_traj(fplog,cr,outf,mdof_flags,
479 top_global,step,(double)step,
480 &state->s,state_global,state->f,f_global,NULL,NULL);
482 if (confout != NULL && MASTER(cr))
484 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
486 /* Make molecules whole only for confout writing */
487 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
491 write_sto_conf_mtop(confout,
492 *top_global->name,top_global,
493 state_global->x,NULL,ir->ePBC,state_global->box);
497 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
498 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
499 gmx_constr_t constr,gmx_localtop_t *top,
500 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
501 gmx_large_int_t count)
505 int start,end,gf,i,m;
512 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
513 gmx_incons("state mismatch in do_em_step");
515 s2->flags = s1->flags;
517 if (s2->nalloc != s1->nalloc) {
518 s2->nalloc = s1->nalloc;
519 srenew(s2->x,s1->nalloc);
520 srenew(ems2->f, s1->nalloc);
521 if (s2->flags & (1<<estCGP))
522 srenew(s2->cg_p, s1->nalloc);
525 s2->natoms = s1->natoms;
526 /* Copy free energy state -> is this necessary? */
527 for (i=0;i<efptNR;i++)
529 s2->lambda[i] = s1->lambda[i];
531 copy_mat(s1->box,s2->box);
534 end = md->start + md->homenr;
539 for(i=start; i<end; i++) {
542 for(m=0; m<DIM; m++) {
543 if (ir->opts.nFreeze[gf][m])
546 x2[i][m] = x1[i][m] + a*f[i][m];
550 if (s2->flags & (1<<estCGP)) {
551 /* Copy the CG p vector */
554 for(i=start; i<end; i++)
555 copy_rvec(x1[i],x2[i]);
558 if (DOMAINDECOMP(cr)) {
559 s2->ddp_count = s1->ddp_count;
560 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
561 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
562 srenew(s2->cg_gl,s2->cg_gl_nalloc);
564 s2->ncg_gl = s1->ncg_gl;
565 for(i=0; i<s2->ncg_gl; i++)
566 s2->cg_gl[i] = s1->cg_gl[i];
567 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
571 wallcycle_start(wcycle,ewcCONSTR);
573 constrain(NULL,TRUE,TRUE,constr,&top->idef,
574 ir,NULL,cr,count,0,md,
575 s1->x,s2->x,NULL,s2->box,s2->lambda[efptBONDED],
576 &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
577 wallcycle_stop(wcycle,ewcCONSTR);
581 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
582 gmx_mtop_t *top_global,t_inputrec *ir,
583 em_state_t *ems,gmx_localtop_t *top,
584 t_mdatoms *mdatoms,t_forcerec *fr,
585 gmx_vsite_t *vsite,gmx_constr_t constr,
586 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
588 /* Repartition the domain decomposition */
589 wallcycle_start(wcycle,ewcDOMDEC);
590 dd_partition_system(fplog,step,cr,FALSE,1,
593 mdatoms,top,fr,vsite,NULL,constr,
595 dd_store_state(cr->dd,&ems->s);
596 wallcycle_stop(wcycle,ewcDOMDEC);
599 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
600 t_state *state_global,gmx_mtop_t *top_global,
601 em_state_t *ems,gmx_localtop_t *top,
602 t_inputrec *inputrec,
603 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
604 gmx_global_stat_t gstat,
605 gmx_vsite_t *vsite,gmx_constr_t constr,
607 t_graph *graph,t_mdatoms *mdatoms,
608 t_forcerec *fr,rvec mu_tot,
609 gmx_enerdata_t *enerd,tensor vir,tensor pres,
610 gmx_large_int_t count,gmx_bool bFirst)
615 tensor force_vir,shake_vir,ekin;
616 real dvdlambda,prescorr,enercorr,dvdlcorr;
619 /* Set the time to the initial time, the time does not change during EM */
620 t = inputrec->init_t;
623 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
624 /* This the first state or an old state used before the last ns */
628 if (inputrec->nstlist > 0) {
630 } else if (inputrec->nstlist == -1) {
631 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
633 gmx_sumi(1,&nabnsb,cr);
639 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
640 top->idef.iparams,top->idef.il,
641 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
643 if (DOMAINDECOMP(cr)) {
645 /* Repartition the domain decomposition */
646 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
647 ems,top,mdatoms,fr,vsite,constr,
652 /* Calc force & energy on new trial position */
653 /* do_force always puts the charge groups in the box and shifts again
654 * We do not unshift, so molecules are always whole in congrad.c
656 do_force(fplog,cr,inputrec,
657 count,nrnb,wcycle,top,top_global,&top_global->groups,
658 ems->s.box,ems->s.x,&ems->s.hist,
659 ems->f,force_vir,mdatoms,enerd,fcd,
660 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
661 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
662 (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
664 /* Clear the unused shake virial and pressure */
665 clear_mat(shake_vir);
668 /* Communicate stuff when parallel */
669 if (PAR(cr) && inputrec->eI != eiNM)
671 wallcycle_start(wcycle,ewcMoveE);
673 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
674 inputrec,NULL,NULL,NULL,1,&terminate,
675 top_global,&ems->s,FALSE,
681 wallcycle_stop(wcycle,ewcMoveE);
684 /* Calculate long range corrections to pressure and energy */
685 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
686 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
687 enerd->term[F_DISPCORR] = enercorr;
688 enerd->term[F_EPOT] += enercorr;
689 enerd->term[F_PRES] += prescorr;
690 enerd->term[F_DVDL] += dvdlcorr;
692 ems->epot = enerd->term[F_EPOT];
695 /* Project out the constraint components of the force */
696 wallcycle_start(wcycle,ewcCONSTR);
698 constrain(NULL,FALSE,FALSE,constr,&top->idef,
699 inputrec,NULL,cr,count,0,mdatoms,
700 ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda[efptBONDED],&dvdlambda,
701 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
702 if (fr->bSepDVDL && fplog)
703 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
704 enerd->term[F_DVDL_BONDED] += dvdlambda;
705 m_add(force_vir,shake_vir,vir);
706 wallcycle_stop(wcycle,ewcCONSTR);
708 copy_mat(force_vir,vir);
712 enerd->term[F_PRES] =
713 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
715 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
717 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
719 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
723 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
725 em_state_t *s_min,em_state_t *s_b)
729 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
731 unsigned char *grpnrFREEZE;
734 fprintf(debug,"Doing reorder_partsum\n");
739 cgs_gl = dd_charge_groups_global(cr->dd);
740 index = cgs_gl->index;
742 /* Collect fm in a global vector fmg.
743 * This conflicts with the spirit of domain decomposition,
744 * but to fully optimize this a much more complicated algorithm is required.
746 snew(fmg,mtop->natoms);
748 ncg = s_min->s.ncg_gl;
749 cg_gl = s_min->s.cg_gl;
751 for(c=0; c<ncg; c++) {
755 for(a=a0; a<a1; a++) {
756 copy_rvec(fm[i],fmg[a]);
760 gmx_sum(mtop->natoms*3,fmg[0],cr);
762 /* Now we will determine the part of the sum for the cgs in state s_b */
764 cg_gl = s_b->s.cg_gl;
768 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
769 for(c=0; c<ncg; c++) {
773 for(a=a0; a<a1; a++) {
774 if (mdatoms->cFREEZE && grpnrFREEZE) {
777 for(m=0; m<DIM; m++) {
778 if (!opts->nFreeze[gf][m]) {
779 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
791 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
793 em_state_t *s_min,em_state_t *s_b)
799 /* This is just the classical Polak-Ribiere calculation of beta;
800 * it looks a bit complicated since we take freeze groups into account,
801 * and might have to sum it in parallel runs.
804 if (!DOMAINDECOMP(cr) ||
805 (s_min->s.ddp_count == cr->dd->ddp_count &&
806 s_b->s.ddp_count == cr->dd->ddp_count)) {
811 /* This part of code can be incorrect with DD,
812 * since the atom ordering in s_b and s_min might differ.
814 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
815 if (mdatoms->cFREEZE)
816 gf = mdatoms->cFREEZE[i];
818 if (!opts->nFreeze[gf][m]) {
819 sum += (fb[i][m] - fm[i][m])*fb[i][m];
823 /* We need to reorder cgs while summing */
824 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
829 return sum/sqr(s_min->fnorm);
832 double do_cg(FILE *fplog,t_commrec *cr,
833 int nfile,const t_filenm fnm[],
834 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
836 gmx_vsite_t *vsite,gmx_constr_t constr,
838 t_inputrec *inputrec,
839 gmx_mtop_t *top_global,t_fcdata *fcd,
840 t_state *state_global,
842 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
845 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
847 real cpt_period,real max_hours,
848 const char *deviceOptions,
850 gmx_runtime_t *runtime)
852 const char *CG="Polak-Ribiere Conjugate Gradients";
854 em_state_t *s_min,*s_a,*s_b,*s_c;
856 gmx_enerdata_t *enerd;
858 gmx_global_stat_t gstat;
860 rvec *f_global,*p,*sf,*sfm;
861 double gpa,gpb,gpc,tmp,sum[2],minstep;
868 gmx_bool converged,foundlower;
870 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
872 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
874 int i,m,gf,step,nminstep;
879 s_min = init_em_state();
880 s_a = init_em_state();
881 s_b = init_em_state();
882 s_c = init_em_state();
884 /* Init em and store the local state in s_min */
885 init_em(fplog,CG,cr,inputrec,
886 state_global,top_global,s_min,&top,&f,&f_global,
887 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
888 nfile,fnm,&outf,&mdebin);
890 /* Print to log file */
891 print_em_start(fplog,cr,runtime,wcycle,CG);
893 /* Max number of steps */
894 number_steps=inputrec->nsteps;
897 sp_header(stderr,CG,inputrec->em_tol,number_steps);
899 sp_header(fplog,CG,inputrec->em_tol,number_steps);
901 /* Call the force routine and some auxiliary (neighboursearching etc.) */
902 /* do_force always puts the charge groups in the box and shifts again
903 * We do not unshift, so molecules are always whole in congrad.c
905 evaluate_energy(fplog,bVerbose,cr,
906 state_global,top_global,s_min,top,
907 inputrec,nrnb,wcycle,gstat,
908 vsite,constr,fcd,graph,mdatoms,fr,
909 mu_tot,enerd,vir,pres,-1,TRUE);
913 /* Copy stuff to the energy bin for easy printing etc. */
914 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
915 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
916 NULL,NULL,vir,pres,NULL,mu_tot,constr);
918 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
919 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
920 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
924 /* Estimate/guess the initial stepsize */
925 stepsize = inputrec->em_stepsize/s_min->fnorm;
928 fprintf(stderr," F-max = %12.5e on atom %d\n",
929 s_min->fmax,s_min->a_fmax+1);
930 fprintf(stderr," F-Norm = %12.5e\n",
931 s_min->fnorm/sqrt(state_global->natoms));
932 fprintf(stderr,"\n");
933 /* and copy to the log file too... */
934 fprintf(fplog," F-max = %12.5e on atom %d\n",
935 s_min->fmax,s_min->a_fmax+1);
936 fprintf(fplog," F-Norm = %12.5e\n",
937 s_min->fnorm/sqrt(state_global->natoms));
940 /* Start the loop over CG steps.
941 * Each successful step is counted, and we continue until
942 * we either converge or reach the max number of steps.
945 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
947 /* start taking steps in a new direction
948 * First time we enter the routine, beta=0, and the direction is
949 * simply the negative gradient.
952 /* Calculate the new direction in p, and the gradient in this direction, gpa */
957 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
958 if (mdatoms->cFREEZE)
959 gf = mdatoms->cFREEZE[i];
960 for(m=0; m<DIM; m++) {
961 if (!inputrec->opts.nFreeze[gf][m]) {
962 p[i][m] = sf[i][m] + beta*p[i][m];
963 gpa -= p[i][m]*sf[i][m];
964 /* f is negative gradient, thus the sign */
971 /* Sum the gradient along the line across CPUs */
975 /* Calculate the norm of the search vector */
976 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
978 /* Just in case stepsize reaches zero due to numerical precision... */
980 stepsize = inputrec->em_stepsize/pnorm;
983 * Double check the value of the derivative in the search direction.
984 * If it is positive it must be due to the old information in the
985 * CG formula, so just remove that and start over with beta=0.
986 * This corresponds to a steepest descent step.
990 step--; /* Don't count this step since we are restarting */
991 continue; /* Go back to the beginning of the big for-loop */
994 /* Calculate minimum allowed stepsize, before the average (norm)
995 * relative change in coordinate is smaller than precision
998 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
999 for(m=0; m<DIM; m++) {
1000 tmp = fabs(s_min->s.x[i][m]);
1007 /* Add up from all CPUs */
1009 gmx_sumd(1,&minstep,cr);
1011 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1013 if(stepsize<minstep) {
1018 /* Write coordinates if necessary */
1019 do_x = do_per_step(step,inputrec->nstxout);
1020 do_f = do_per_step(step,inputrec->nstfout);
1022 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1023 top_global,inputrec,step,
1024 s_min,state_global,f_global);
1026 /* Take a step downhill.
1027 * In theory, we should minimize the function along this direction.
1028 * That is quite possible, but it turns out to take 5-10 function evaluations
1029 * for each line. However, we dont really need to find the exact minimum -
1030 * it is much better to start a new CG step in a modified direction as soon
1031 * as we are close to it. This will save a lot of energy evaluations.
1033 * In practice, we just try to take a single step.
1034 * If it worked (i.e. lowered the energy), we increase the stepsize but
1035 * the continue straight to the next CG step without trying to find any minimum.
1036 * If it didn't work (higher energy), there must be a minimum somewhere between
1037 * the old position and the new one.
1039 * Due to the finite numerical accuracy, it turns out that it is a good idea
1040 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1041 * This leads to lower final energies in the tests I've done. / Erik
1043 s_a->epot = s_min->epot;
1045 c = a + stepsize; /* reference position along line is zero */
1047 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1048 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1049 s_min,top,mdatoms,fr,vsite,constr,
1053 /* Take a trial step (new coords in s_c) */
1054 do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1055 constr,top,nrnb,wcycle,-1);
1058 /* Calculate energy for the trial step */
1059 evaluate_energy(fplog,bVerbose,cr,
1060 state_global,top_global,s_c,top,
1061 inputrec,nrnb,wcycle,gstat,
1062 vsite,constr,fcd,graph,mdatoms,fr,
1063 mu_tot,enerd,vir,pres,-1,FALSE);
1065 /* Calc derivative along line */
1069 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1070 for(m=0; m<DIM; m++)
1071 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1073 /* Sum the gradient along the line across CPUs */
1075 gmx_sumd(1,&gpc,cr);
1077 /* This is the max amount of increase in energy we tolerate */
1078 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1080 /* Accept the step if the energy is lower, or if it is not significantly higher
1081 * and the line derivative is still negative.
1083 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1085 /* Great, we found a better energy. Increase step for next iteration
1086 * if we are still going down, decrease it otherwise
1089 stepsize *= 1.618034; /* The golden section */
1091 stepsize *= 0.618034; /* 1/golden section */
1093 /* New energy is the same or higher. We will have to do some work
1094 * to find a smaller value in the interval. Take smaller step next time!
1097 stepsize *= 0.618034;
1103 /* OK, if we didn't find a lower value we will have to locate one now - there must
1104 * be one in the interval [a=0,c].
1105 * The same thing is valid here, though: Don't spend dozens of iterations to find
1106 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1107 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1109 * I also have a safeguard for potentially really patological functions so we never
1110 * take more than 20 steps before we give up ...
1112 * If we already found a lower value we just skip this step and continue to the update.
1118 /* Select a new trial point.
1119 * If the derivatives at points a & c have different sign we interpolate to zero,
1120 * otherwise just do a bisection.
1123 b = a + gpa*(a-c)/(gpc-gpa);
1127 /* safeguard if interpolation close to machine accuracy causes errors:
1128 * never go outside the interval
1133 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1134 /* Reload the old state */
1135 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1136 s_min,top,mdatoms,fr,vsite,constr,
1140 /* Take a trial step to this new point - new coords in s_b */
1141 do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1142 constr,top,nrnb,wcycle,-1);
1145 /* Calculate energy for the trial step */
1146 evaluate_energy(fplog,bVerbose,cr,
1147 state_global,top_global,s_b,top,
1148 inputrec,nrnb,wcycle,gstat,
1149 vsite,constr,fcd,graph,mdatoms,fr,
1150 mu_tot,enerd,vir,pres,-1,FALSE);
1152 /* p does not change within a step, but since the domain decomposition
1153 * might change, we have to use cg_p of s_b here.
1158 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1159 for(m=0; m<DIM; m++)
1160 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1162 /* Sum the gradient along the line across CPUs */
1164 gmx_sumd(1,&gpb,cr);
1167 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1168 s_a->epot,s_b->epot,s_c->epot,gpb);
1170 epot_repl = s_b->epot;
1172 /* Keep one of the intervals based on the value of the derivative at the new point */
1174 /* Replace c endpoint with b */
1175 swap_em_state(s_b,s_c);
1179 /* Replace a endpoint with b */
1180 swap_em_state(s_b,s_a);
1186 * Stop search as soon as we find a value smaller than the endpoints.
1187 * Never run more than 20 steps, no matter what.
1190 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1193 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1195 /* OK. We couldn't find a significantly lower energy.
1196 * If beta==0 this was steepest descent, and then we give up.
1197 * If not, set beta=0 and restart with steepest descent before quitting.
1204 /* Reset memory before giving up */
1210 /* Select min energy state of A & C, put the best in B.
1212 if (s_c->epot < s_a->epot) {
1214 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1215 s_c->epot,s_a->epot);
1216 swap_em_state(s_b,s_c);
1221 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1222 s_a->epot,s_c->epot);
1223 swap_em_state(s_b,s_a);
1230 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1232 swap_em_state(s_b,s_c);
1237 /* new search direction */
1238 /* beta = 0 means forget all memory and restart with steepest descents. */
1239 if (nstcg && ((step % nstcg)==0))
1242 /* s_min->fnorm cannot be zero, because then we would have converged
1246 /* Polak-Ribiere update.
1247 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1249 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1251 /* Limit beta to prevent oscillations */
1252 if (fabs(beta) > 5.0)
1256 /* update positions */
1257 swap_em_state(s_min,s_b);
1260 /* Print it if necessary */
1263 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1264 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1265 s_min->fmax,s_min->a_fmax+1);
1266 /* Store the new (lower) energies */
1267 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1268 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1269 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1271 do_log = do_per_step(step,inputrec->nstlog);
1272 do_ene = do_per_step(step,inputrec->nstenergy);
1274 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1275 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1276 do_log ? fplog : NULL,step,step,eprNORMAL,
1277 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1280 /* Stop when the maximum force lies below tolerance.
1281 * If we have reached machine precision, converged is already set to true.
1283 converged = converged || (s_min->fmax < inputrec->em_tol);
1285 } /* End of the loop */
1288 step--; /* we never took that last step in this case */
1290 if (s_min->fmax > inputrec->em_tol)
1294 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1295 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1301 /* If we printed energy and/or logfile last step (which was the last step)
1302 * we don't have to do it again, but otherwise print the final values.
1305 /* Write final value to log since we didn't do anything the last step */
1306 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1308 if (!do_ene || !do_log) {
1309 /* Write final energy file entries */
1310 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1311 !do_log ? fplog : NULL,step,step,eprNORMAL,
1312 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1316 /* Print some stuff... */
1318 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1321 * For accurate normal mode calculation it is imperative that we
1322 * store the last conformation into the full precision binary trajectory.
1324 * However, we should only do it if we did NOT already write this step
1325 * above (which we did if do_x or do_f was true).
1327 do_x = !do_per_step(step,inputrec->nstxout);
1328 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1330 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1331 top_global,inputrec,step,
1332 s_min,state_global,f_global);
1334 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1337 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1338 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1339 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1340 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1342 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1345 finish_em(fplog,cr,outf,runtime,wcycle);
1347 /* To print the actual number of steps we needed somewhere */
1348 runtime->nsteps_done = step;
1351 } /* That's all folks */
1354 double do_lbfgs(FILE *fplog,t_commrec *cr,
1355 int nfile,const t_filenm fnm[],
1356 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1358 gmx_vsite_t *vsite,gmx_constr_t constr,
1360 t_inputrec *inputrec,
1361 gmx_mtop_t *top_global,t_fcdata *fcd,
1364 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1367 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1368 gmx_membed_t membed,
1369 real cpt_period,real max_hours,
1370 const char *deviceOptions,
1371 unsigned long Flags,
1372 gmx_runtime_t *runtime)
1374 static const char *LBFGS="Low-Memory BFGS Minimizer";
1376 gmx_localtop_t *top;
1377 gmx_enerdata_t *enerd;
1379 gmx_global_stat_t gstat;
1382 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1383 double stepsize,gpa,gpb,gpc,tmp,minstep;
1384 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1385 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1386 real a,b,c,maxdelta,delta;
1387 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1388 real dgdx,dgdg,sq,yr,beta;
1390 gmx_bool converged,first;
1393 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1395 int start,end,number_steps;
1397 int i,k,m,n,nfmax,gf,step;
1403 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1405 n = 3*state->natoms;
1406 nmaxcorr = inputrec->nbfgscorr;
1408 /* Allocate memory */
1409 /* Use pointers to real so we dont have to loop over both atoms and
1410 * dimensions all the time...
1411 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1412 * that point to the same memory.
1426 snew(alpha,nmaxcorr);
1429 for(i=0;i<nmaxcorr;i++)
1433 for(i=0;i<nmaxcorr;i++)
1440 init_em(fplog,LBFGS,cr,inputrec,
1441 state,top_global,&ems,&top,&f,&f_global,
1442 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1443 nfile,fnm,&outf,&mdebin);
1444 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1445 * so we free some memory again.
1450 xx = (real *)state->x;
1453 start = mdatoms->start;
1454 end = mdatoms->homenr + start;
1456 /* Print to log file */
1457 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1459 do_log = do_ene = do_x = do_f = TRUE;
1461 /* Max number of steps */
1462 number_steps=inputrec->nsteps;
1464 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1466 for(i=start; i<end; i++) {
1467 if (mdatoms->cFREEZE)
1468 gf = mdatoms->cFREEZE[i];
1469 for(m=0; m<DIM; m++)
1470 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1473 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1475 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1478 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1479 top->idef.iparams,top->idef.il,
1480 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1482 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1483 /* do_force always puts the charge groups in the box and shifts again
1484 * We do not unshift, so molecules are always whole
1489 evaluate_energy(fplog,bVerbose,cr,
1490 state,top_global,&ems,top,
1491 inputrec,nrnb,wcycle,gstat,
1492 vsite,constr,fcd,graph,mdatoms,fr,
1493 mu_tot,enerd,vir,pres,-1,TRUE);
1497 /* Copy stuff to the energy bin for easy printing etc. */
1498 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1499 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1500 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1502 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1503 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1504 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1508 /* This is the starting energy */
1509 Epot = enerd->term[F_EPOT];
1515 /* Set the initial step.
1516 * since it will be multiplied by the non-normalized search direction
1517 * vector (force vector the first time), we scale it by the
1518 * norm of the force.
1522 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1523 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1524 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1525 fprintf(stderr,"\n");
1526 /* and copy to the log file too... */
1527 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1528 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1529 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1530 fprintf(fplog,"\n");
1536 dx[point][i] = ff[i]; /* Initial search direction */
1540 stepsize = 1.0/fnorm;
1543 /* Start the loop over BFGS steps.
1544 * Each successful step is counted, and we continue until
1545 * we either converge or reach the max number of steps.
1550 /* Set the gradient from the force */
1552 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1554 /* Write coordinates if necessary */
1555 do_x = do_per_step(step,inputrec->nstxout);
1556 do_f = do_per_step(step,inputrec->nstfout);
1561 mdof_flags |= MDOF_X;
1566 mdof_flags |= MDOF_F;
1569 write_traj(fplog,cr,outf,mdof_flags,
1570 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1572 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1574 /* pointer to current direction - point=0 first time here */
1577 /* calculate line gradient */
1578 for(gpa=0,i=0;i<n;i++)
1581 /* Calculate minimum allowed stepsize, before the average (norm)
1582 * relative change in coordinate is smaller than precision
1584 for(minstep=0,i=0;i<n;i++) {
1591 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1593 if(stepsize<minstep) {
1598 /* Store old forces and coordinates */
1610 /* Take a step downhill.
1611 * In theory, we should minimize the function along this direction.
1612 * That is quite possible, but it turns out to take 5-10 function evaluations
1613 * for each line. However, we dont really need to find the exact minimum -
1614 * it is much better to start a new BFGS step in a modified direction as soon
1615 * as we are close to it. This will save a lot of energy evaluations.
1617 * In practice, we just try to take a single step.
1618 * If it worked (i.e. lowered the energy), we increase the stepsize but
1619 * the continue straight to the next BFGS step without trying to find any minimum.
1620 * If it didn't work (higher energy), there must be a minimum somewhere between
1621 * the old position and the new one.
1623 * Due to the finite numerical accuracy, it turns out that it is a good idea
1624 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1625 * This leads to lower final energies in the tests I've done. / Erik
1630 c = a + stepsize; /* reference position along line is zero */
1632 /* Check stepsize first. We do not allow displacements
1633 * larger than emstep.
1643 if(maxdelta>inputrec->em_stepsize)
1645 } while(maxdelta>inputrec->em_stepsize);
1647 /* Take a trial step */
1649 xc[i] = lastx[i] + c*s[i];
1652 /* Calculate energy for the trial step */
1653 ems.s.x = (rvec *)xc;
1655 evaluate_energy(fplog,bVerbose,cr,
1656 state,top_global,&ems,top,
1657 inputrec,nrnb,wcycle,gstat,
1658 vsite,constr,fcd,graph,mdatoms,fr,
1659 mu_tot,enerd,vir,pres,step,FALSE);
1662 /* Calc derivative along line */
1663 for(gpc=0,i=0; i<n; i++) {
1664 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1666 /* Sum the gradient along the line across CPUs */
1668 gmx_sumd(1,&gpc,cr);
1670 /* This is the max amount of increase in energy we tolerate */
1671 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1673 /* Accept the step if the energy is lower, or if it is not significantly higher
1674 * and the line derivative is still negative.
1676 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1678 /* Great, we found a better energy. Increase step for next iteration
1679 * if we are still going down, decrease it otherwise
1682 stepsize *= 1.618034; /* The golden section */
1684 stepsize *= 0.618034; /* 1/golden section */
1686 /* New energy is the same or higher. We will have to do some work
1687 * to find a smaller value in the interval. Take smaller step next time!
1690 stepsize *= 0.618034;
1693 /* OK, if we didn't find a lower value we will have to locate one now - there must
1694 * be one in the interval [a=0,c].
1695 * The same thing is valid here, though: Don't spend dozens of iterations to find
1696 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1697 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1699 * I also have a safeguard for potentially really patological functions so we never
1700 * take more than 20 steps before we give up ...
1702 * If we already found a lower value we just skip this step and continue to the update.
1709 /* Select a new trial point.
1710 * If the derivatives at points a & c have different sign we interpolate to zero,
1711 * otherwise just do a bisection.
1715 b = a + gpa*(a-c)/(gpc-gpa);
1719 /* safeguard if interpolation close to machine accuracy causes errors:
1720 * never go outside the interval
1725 /* Take a trial step */
1727 xb[i] = lastx[i] + b*s[i];
1730 /* Calculate energy for the trial step */
1731 ems.s.x = (rvec *)xb;
1733 evaluate_energy(fplog,bVerbose,cr,
1734 state,top_global,&ems,top,
1735 inputrec,nrnb,wcycle,gstat,
1736 vsite,constr,fcd,graph,mdatoms,fr,
1737 mu_tot,enerd,vir,pres,step,FALSE);
1742 for(gpb=0,i=0; i<n; i++)
1743 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1745 /* Sum the gradient along the line across CPUs */
1747 gmx_sumd(1,&gpb,cr);
1749 /* Keep one of the intervals based on the value of the derivative at the new point */
1751 /* Replace c endpoint with b */
1755 /* swap coord pointers b/c */
1763 /* Replace a endpoint with b */
1767 /* swap coord pointers a/b */
1777 * Stop search as soon as we find a value smaller than the endpoints,
1778 * or if the tolerance is below machine precision.
1779 * Never run more than 20 steps, no matter what.
1782 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1784 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1785 /* OK. We couldn't find a significantly lower energy.
1786 * If ncorr==0 this was steepest descent, and then we give up.
1787 * If not, reset memory to restart as steepest descent before quitting.
1796 /* Search in gradient direction */
1799 /* Reset stepsize */
1800 stepsize = 1.0/fnorm;
1805 /* Select min energy state of A & C, put the best in xx/ff/Epot
1836 /* Update the memory information, and calculate a new
1837 * approximation of the inverse hessian
1840 /* Have new data in Epot, xx, ff */
1845 dg[point][i]=lastf[i]-ff[i];
1846 dx[point][i]*=stepsize;
1852 dgdg+=dg[point][i]*dg[point][i];
1853 dgdx+=dg[point][i]*dx[point][i];
1858 rho[point]=1.0/dgdx;
1870 /* Recursive update. First go back over the memory points */
1871 for(k=0;k<ncorr;k++) {
1880 alpha[cp]=rho[cp]*sq;
1883 p[i] -= alpha[cp]*dg[cp][i];
1889 /* And then go forward again */
1890 for(k=0;k<ncorr;k++) {
1893 yr += p[i]*dg[cp][i];
1896 beta = alpha[cp]-beta;
1899 p[i] += beta*dx[cp][i];
1908 dx[point][i] = p[i];
1914 /* Test whether the convergence criterion is met */
1915 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1917 /* Print it if necessary */
1920 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1921 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1922 /* Store the new (lower) energies */
1923 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1924 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1925 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1926 do_log = do_per_step(step,inputrec->nstlog);
1927 do_ene = do_per_step(step,inputrec->nstenergy);
1929 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1930 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1931 do_log ? fplog : NULL,step,step,eprNORMAL,
1932 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1935 /* Stop when the maximum force lies below tolerance.
1936 * If we have reached machine precision, converged is already set to true.
1939 converged = converged || (fmax < inputrec->em_tol);
1941 } /* End of the loop */
1944 step--; /* we never took that last step in this case */
1946 if(fmax>inputrec->em_tol)
1950 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1951 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1956 /* If we printed energy and/or logfile last step (which was the last step)
1957 * we don't have to do it again, but otherwise print the final values.
1959 if(!do_log) /* Write final value to log since we didn't do anythin last step */
1960 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1961 if(!do_ene || !do_log) /* Write final energy file entries */
1962 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1963 !do_log ? fplog : NULL,step,step,eprNORMAL,
1964 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1966 /* Print some stuff... */
1968 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1971 * For accurate normal mode calculation it is imperative that we
1972 * store the last conformation into the full precision binary trajectory.
1974 * However, we should only do it if we did NOT already write this step
1975 * above (which we did if do_x or do_f was true).
1977 do_x = !do_per_step(step,inputrec->nstxout);
1978 do_f = !do_per_step(step,inputrec->nstfout);
1979 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1980 top_global,inputrec,step,
1984 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1985 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1986 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1987 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1989 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1992 finish_em(fplog,cr,outf,runtime,wcycle);
1994 /* To print the actual number of steps we needed somewhere */
1995 runtime->nsteps_done = step;
1998 } /* That's all folks */
2001 double do_steep(FILE *fplog,t_commrec *cr,
2002 int nfile, const t_filenm fnm[],
2003 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2005 gmx_vsite_t *vsite,gmx_constr_t constr,
2007 t_inputrec *inputrec,
2008 gmx_mtop_t *top_global,t_fcdata *fcd,
2009 t_state *state_global,
2011 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2014 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2015 gmx_membed_t membed,
2016 real cpt_period,real max_hours,
2017 const char *deviceOptions,
2018 unsigned long Flags,
2019 gmx_runtime_t *runtime)
2021 const char *SD="Steepest Descents";
2022 em_state_t *s_min,*s_try;
2024 gmx_localtop_t *top;
2025 gmx_enerdata_t *enerd;
2027 gmx_global_stat_t gstat;
2029 real stepsize,constepsize;
2030 real ustep,dvdlambda,fnormn;
2033 gmx_bool bDone,bAbort,do_x,do_f;
2038 int steps_accepted=0;
2042 s_min = init_em_state();
2043 s_try = init_em_state();
2045 /* Init em and store the local state in s_try */
2046 init_em(fplog,SD,cr,inputrec,
2047 state_global,top_global,s_try,&top,&f,&f_global,
2048 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2049 nfile,fnm,&outf,&mdebin);
2051 /* Print to log file */
2052 print_em_start(fplog,cr,runtime,wcycle,SD);
2054 /* Set variables for stepsize (in nm). This is the largest
2055 * step that we are going to make in any direction.
2057 ustep = inputrec->em_stepsize;
2060 /* Max number of steps */
2061 nsteps = inputrec->nsteps;
2064 /* Print to the screen */
2065 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2067 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2069 /**** HERE STARTS THE LOOP ****
2070 * count is the counter for the number of steps
2071 * bDone will be TRUE when the minimization has converged
2072 * bAbort will be TRUE when nsteps steps have been performed or when
2073 * the stepsize becomes smaller than is reasonable for machine precision
2078 while( !bDone && !bAbort ) {
2079 bAbort = (nsteps >= 0) && (count == nsteps);
2081 /* set new coordinates, except for first step */
2083 do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2084 constr,top,nrnb,wcycle,count);
2087 evaluate_energy(fplog,bVerbose,cr,
2088 state_global,top_global,s_try,top,
2089 inputrec,nrnb,wcycle,gstat,
2090 vsite,constr,fcd,graph,mdatoms,fr,
2091 mu_tot,enerd,vir,pres,count,count==0);
2094 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2097 s_min->epot = s_try->epot + 1;
2099 /* Print it if necessary */
2102 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2103 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2104 (s_try->epot < s_min->epot) ? '\n' : '\r');
2107 if (s_try->epot < s_min->epot) {
2108 /* Store the new (lower) energies */
2109 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2110 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2111 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2112 print_ebin(outf->fp_ene,TRUE,
2113 do_per_step(steps_accepted,inputrec->nstdisreout),
2114 do_per_step(steps_accepted,inputrec->nstorireout),
2115 fplog,count,count,eprNORMAL,TRUE,
2116 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2121 /* Now if the new energy is smaller than the previous...
2122 * or if this is the first step!
2123 * or if we did random steps!
2126 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2129 /* Test whether the convergence criterion is met... */
2130 bDone = (s_try->fmax < inputrec->em_tol);
2132 /* Copy the arrays for force, positions and energy */
2133 /* The 'Min' array always holds the coords and forces of the minimal
2135 swap_em_state(s_min,s_try);
2139 /* Write to trn, if necessary */
2140 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2141 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2142 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2143 top_global,inputrec,count,
2144 s_min,state_global,f_global);
2147 /* If energy is not smaller make the step smaller... */
2150 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2151 /* Reload the old state */
2152 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2153 s_min,top,mdatoms,fr,vsite,constr,
2158 /* Determine new step */
2159 stepsize = ustep/s_min->fmax;
2161 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2163 if (count == nsteps || ustep < 1e-12)
2165 if (count == nsteps || ustep < 1e-6)
2170 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2171 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2177 } /* End of the loop */
2179 /* Print some shit... */
2181 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2182 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2183 top_global,inputrec,count,
2184 s_min,state_global,f_global);
2186 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2189 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2190 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2191 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2192 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2195 finish_em(fplog,cr,outf,runtime,wcycle);
2197 /* To print the actual number of steps we needed somewhere */
2198 inputrec->nsteps=count;
2200 runtime->nsteps_done = count;
2203 } /* That's all folks */
2206 double do_nm(FILE *fplog,t_commrec *cr,
2207 int nfile,const t_filenm fnm[],
2208 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2210 gmx_vsite_t *vsite,gmx_constr_t constr,
2212 t_inputrec *inputrec,
2213 gmx_mtop_t *top_global,t_fcdata *fcd,
2214 t_state *state_global,
2216 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2219 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2220 gmx_membed_t membed,
2221 real cpt_period,real max_hours,
2222 const char *deviceOptions,
2223 unsigned long Flags,
2224 gmx_runtime_t *runtime)
2226 const char *NM = "Normal Mode Analysis";
2231 gmx_localtop_t *top;
2232 gmx_enerdata_t *enerd;
2234 gmx_global_stat_t gstat;
2236 real t,t0,lambda,lam0;
2241 gmx_bool bSparse; /* use sparse matrix storage format */
2243 gmx_sparsematrix_t * sparse_matrix = NULL;
2244 real * full_matrix = NULL;
2245 em_state_t * state_work;
2247 /* added with respect to mdrun */
2249 real der_range=10.0*sqrt(GMX_REAL_EPS);
2255 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2258 state_work = init_em_state();
2260 /* Init em and store the local state in state_minimum */
2261 init_em(fplog,NM,cr,inputrec,
2262 state_global,top_global,state_work,&top,
2264 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2265 nfile,fnm,&outf,NULL);
2267 natoms = top_global->natoms;
2275 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2276 " which MIGHT not be accurate enough for normal mode analysis.\n"
2277 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2278 " are fairly modest even if you recompile in double precision.\n\n");
2282 /* Check if we can/should use sparse storage format.
2284 * Sparse format is only useful when the Hessian itself is sparse, which it
2285 * will be when we use a cutoff.
2286 * For small systems (n<1000) it is easier to always use full matrix format, though.
2288 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2290 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2293 else if(top_global->natoms < 1000)
2295 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2300 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2304 sz = DIM*top_global->natoms;
2306 fprintf(stderr,"Allocating Hessian memory...\n\n");
2310 sparse_matrix=gmx_sparsematrix_init(sz);
2311 sparse_matrix->compressed_symmetric = TRUE;
2315 snew(full_matrix,sz*sz);
2318 /* Initial values */
2319 t0 = inputrec->init_t;
2320 lam0 = inputrec->fepvals->init_lambda;
2328 /* Write start time and temperature */
2329 print_em_start(fplog,cr,runtime,wcycle,NM);
2331 /* fudge nr of steps to nr of atoms */
2332 inputrec->nsteps = natoms*2;
2336 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2337 *(top_global->name),(int)inputrec->nsteps);
2340 nnodes = cr->nnodes;
2342 /* Make evaluate_energy do a single node force calculation */
2344 evaluate_energy(fplog,bVerbose,cr,
2345 state_global,top_global,state_work,top,
2346 inputrec,nrnb,wcycle,gstat,
2347 vsite,constr,fcd,graph,mdatoms,fr,
2348 mu_tot,enerd,vir,pres,-1,TRUE);
2349 cr->nnodes = nnodes;
2351 /* if forces are not small, warn user */
2352 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2356 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2357 if (state_work->fmax > 1.0e-3)
2359 fprintf(stderr,"Maximum force probably not small enough to");
2360 fprintf(stderr," ensure that you are in an \nenergy well. ");
2361 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2362 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2366 /***********************************************************
2368 * Loop over all pairs in matrix
2370 * do_force called twice. Once with positive and
2371 * once with negative displacement
2373 ************************************************************/
2375 /* Steps are divided one by one over the nodes */
2376 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2379 for (d=0; d<DIM; d++)
2381 x_min = state_work->s.x[atom][d];
2383 state_work->s.x[atom][d] = x_min - der_range;
2385 /* Make evaluate_energy do a single node force calculation */
2387 evaluate_energy(fplog,bVerbose,cr,
2388 state_global,top_global,state_work,top,
2389 inputrec,nrnb,wcycle,gstat,
2390 vsite,constr,fcd,graph,mdatoms,fr,
2391 mu_tot,enerd,vir,pres,atom*2,FALSE);
2393 for(i=0; i<natoms; i++)
2395 copy_rvec(state_work->f[i], fneg[i]);
2398 state_work->s.x[atom][d] = x_min + der_range;
2400 evaluate_energy(fplog,bVerbose,cr,
2401 state_global,top_global,state_work,top,
2402 inputrec,nrnb,wcycle,gstat,
2403 vsite,constr,fcd,graph,mdatoms,fr,
2404 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2405 cr->nnodes = nnodes;
2407 /* x is restored to original */
2408 state_work->s.x[atom][d] = x_min;
2410 for(j=0; j<natoms; j++)
2412 for (k=0; (k<DIM); k++)
2415 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2423 #define mpi_type MPI_DOUBLE
2425 #define mpi_type MPI_FLOAT
2427 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2428 cr->mpi_comm_mygroup);
2433 for(node=0; (node<nnodes && atom+node<natoms); node++)
2439 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2440 cr->mpi_comm_mygroup,&stat);
2445 row = (atom + node)*DIM + d;
2447 for(j=0; j<natoms; j++)
2449 for(k=0; k<DIM; k++)
2455 if (col >= row && dfdx[j][k] != 0.0)
2457 gmx_sparsematrix_increment_value(sparse_matrix,
2458 row,col,dfdx[j][k]);
2463 full_matrix[row*sz+col] = dfdx[j][k];
2470 if (bVerbose && fplog)
2475 /* write progress */
2476 if (MASTER(cr) && bVerbose)
2478 fprintf(stderr,"\rFinished step %d out of %d",
2479 min(atom+nnodes,natoms),natoms);
2486 fprintf(stderr,"\n\nWriting Hessian...\n");
2487 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2490 finish_em(fplog,cr,outf,runtime,wcycle);
2492 runtime->nsteps_done = natoms*2;