2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
57 #include "gmx_fatal.h"
69 #include "md_support.h"
73 #include "sparsematrix.h"
77 #include "gmx_wallcycle.h"
78 #include "mtop_util.h"
82 #include "gmx_omp_nthreads.h"
94 static em_state_t *init_em_state()
100 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
101 snew(ems->s.lambda,efptNR);
106 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
107 gmx_wallcycle_t wcycle,
112 runtime_start(runtime);
114 sprintf(buf,"Started %s",name);
115 print_date_and_time(fplog,cr->nodeid,buf,NULL);
117 wallcycle_start(wcycle,ewcRUN);
119 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
120 gmx_wallcycle_t wcycle)
122 wallcycle_stop(wcycle,ewcRUN);
124 runtime_end(runtime);
127 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
130 fprintf(out,"%s:\n",minimizer);
131 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
132 fprintf(out," Number of steps = %12d\n",nsteps);
135 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
141 "\nEnergy minimization reached the maximum number"
142 "of steps before the forces reached the requested"
143 "precision Fmax < %g.\n",ftol);
148 "\nEnergy minimization has stopped, but the forces have"
149 "not converged to the requested precision Fmax < %g (which"
150 "may not be possible for your system). It stopped"
151 "because the algorithm tried to make a new step whose size"
152 "was too small, or there was no change in the energy since"
153 "last step. Either way, we regard the minimization as"
154 "converged to within the available machine precision,"
155 "given your starting configuration and EM parameters.\n%s%s",
157 sizeof(real)<sizeof(double) ?
158 "\nDouble precision normally gives you higher accuracy, but"
159 "this is often not needed for preparing to run molecular"
163 "You might need to increase your constraint accuracy, or turn\n"
164 "off constraints altogether (set constraints = none in mdp file)\n" :
167 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
172 static void print_converged(FILE *fp,const char *alg,real ftol,
173 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
174 real epot,real fmax, int nfmax, real fnorm)
176 char buf[STEPSTRSIZE];
179 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
180 alg,ftol,gmx_step_str(count,buf));
181 else if(count<nsteps)
182 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
183 "but did not reach the requested Fmax < %g.\n",
184 alg,gmx_step_str(count,buf),ftol);
186 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
187 alg,ftol,gmx_step_str(count,buf));
190 fprintf(fp,"Potential Energy = %21.14e\n",epot);
191 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
192 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
194 fprintf(fp,"Potential Energy = %14.7e\n",epot);
195 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
196 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
200 static void get_f_norm_max(t_commrec *cr,
201 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
202 real *fnorm,real *fmax,int *a_fmax)
205 real fmax2,fmax2_0,fam;
206 int la_max,a_max,start,end,i,m,gf;
208 /* This routine finds the largest force and returns it.
209 * On parallel machines the global max is taken.
215 start = mdatoms->start;
216 end = mdatoms->homenr + start;
217 if (mdatoms->cFREEZE) {
218 for(i=start; i<end; i++) {
219 gf = mdatoms->cFREEZE[i];
222 if (!opts->nFreeze[gf][m])
231 for(i=start; i<end; i++) {
241 if (la_max >= 0 && DOMAINDECOMP(cr)) {
242 a_max = cr->dd->gatindex[la_max];
247 snew(sum,2*cr->nnodes+1);
248 sum[2*cr->nodeid] = fmax2;
249 sum[2*cr->nodeid+1] = a_max;
250 sum[2*cr->nnodes] = fnorm2;
251 gmx_sumd(2*cr->nnodes+1,sum,cr);
252 fnorm2 = sum[2*cr->nnodes];
253 /* Determine the global maximum */
254 for(i=0; i<cr->nnodes; i++) {
255 if (sum[2*i] > fmax2) {
257 a_max = (int)(sum[2*i+1] + 0.5);
264 *fnorm = sqrt(fnorm2);
271 static void get_state_f_norm_max(t_commrec *cr,
272 t_grpopts *opts,t_mdatoms *mdatoms,
275 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
278 void init_em(FILE *fplog,const char *title,
279 t_commrec *cr,t_inputrec *ir,
280 t_state *state_global,gmx_mtop_t *top_global,
281 em_state_t *ems,gmx_localtop_t **top,
282 rvec **f,rvec **f_global,
283 t_nrnb *nrnb,rvec mu_tot,
284 t_forcerec *fr,gmx_enerdata_t **enerd,
285 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
286 gmx_vsite_t *vsite,gmx_constr_t constr,
287 int nfile,const t_filenm fnm[],
288 gmx_mdoutf_t **outf,t_mdebin **mdebin)
295 fprintf(fplog,"Initiating %s\n",title);
298 state_global->ngtc = 0;
300 /* Initialize lambda variables */
301 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
305 if (DOMAINDECOMP(cr))
307 *top = dd_init_local_top(top_global);
309 dd_init_local_state(cr->dd,state_global,&ems->s);
313 /* Distribute the charge groups over the nodes from the master node */
314 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
315 state_global,top_global,ir,
316 &ems->s,&ems->f,mdatoms,*top,
317 fr,vsite,NULL,constr,
319 dd_store_state(cr->dd,&ems->s);
323 snew(*f_global,top_global->natoms);
333 snew(*f,top_global->natoms);
335 /* Just copy the state */
336 ems->s = *state_global;
337 snew(ems->s.x,ems->s.nalloc);
338 snew(ems->f,ems->s.nalloc);
339 for(i=0; i<state_global->natoms; i++)
341 copy_rvec(state_global->x[i],ems->s.x[i]);
343 copy_mat(state_global->box,ems->s.box);
345 if (PAR(cr) && ir->eI != eiNM)
347 /* Initialize the particle decomposition and split the topology */
348 *top = split_system(fplog,top_global,ir,cr);
350 pd_cg_range(cr,&fr->cg0,&fr->hcg);
354 *top = gmx_mtop_generate_local_top(top_global,ir);
358 forcerec_set_excl_load(fr,*top,cr);
360 init_bonded_thread_force_reduction(fr,&(*top)->idef);
362 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
364 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
373 pd_at_range(cr,&start,&homenr);
379 homenr = top_global->natoms;
381 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
382 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
386 set_vsite_top(vsite,*top,mdatoms,cr);
392 if (ir->eConstrAlg == econtSHAKE &&
393 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
395 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
396 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
399 if (!DOMAINDECOMP(cr))
401 set_constraints(constr,*top,ir,mdatoms,cr);
404 if (!ir->bContinuation)
406 /* Constrain the starting coordinates */
408 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
409 ir,NULL,cr,-1,0,mdatoms,
410 ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
411 ems->s.lambda[efptFEP],&dvdlambda,
412 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
418 *gstat = global_stat_init(ir);
421 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
424 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
429 /* Init bin for energy stuff */
430 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
434 calc_shifts(ems->s.box,fr->shift_vec);
437 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
438 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
440 if (!(cr->duty & DUTY_PME)) {
441 /* Tell the PME only node to finish */
442 gmx_pme_send_finish(cr);
447 em_time_end(fplog,cr,runtime,wcycle);
450 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
459 static void copy_em_coords(em_state_t *ems,t_state *state)
463 for(i=0; (i<state->natoms); i++)
465 copy_rvec(ems->s.x[i],state->x[i]);
469 static void write_em_traj(FILE *fplog,t_commrec *cr,
471 gmx_bool bX,gmx_bool bF,const char *confout,
472 gmx_mtop_t *top_global,
473 t_inputrec *ir,gmx_large_int_t step,
475 t_state *state_global,rvec *f_global)
479 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
481 copy_em_coords(state,state_global);
486 if (bX) { mdof_flags |= MDOF_X; }
487 if (bF) { mdof_flags |= MDOF_F; }
488 write_traj(fplog,cr,outf,mdof_flags,
489 top_global,step,(double)step,
490 &state->s,state_global,state->f,f_global,NULL,NULL);
492 if (confout != NULL && MASTER(cr))
494 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
496 /* Make molecules whole only for confout writing */
497 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
501 write_sto_conf_mtop(confout,
502 *top_global->name,top_global,
503 state_global->x,NULL,ir->ePBC,state_global->box);
507 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
509 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
510 gmx_constr_t constr,gmx_localtop_t *top,
511 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
512 gmx_large_int_t count)
524 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
526 gmx_incons("state mismatch in do_em_step");
529 s2->flags = s1->flags;
531 if (s2->nalloc != s1->nalloc)
533 s2->nalloc = s1->nalloc;
534 srenew(s2->x,s1->nalloc);
535 srenew(ems2->f, s1->nalloc);
536 if (s2->flags & (1<<estCGP))
538 srenew(s2->cg_p, s1->nalloc);
542 s2->natoms = s1->natoms;
543 copy_mat(s1->box,s2->box);
544 /* Copy free energy state */
545 for (i=0;i<efptNR;i++)
547 s2->lambda[i] = s1->lambda[i];
549 copy_mat(s1->box,s2->box);
552 end = md->start + md->homenr;
557 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
562 #pragma omp for schedule(static) nowait
563 for(i=start; i<end; i++)
571 if (ir->opts.nFreeze[gf][m])
577 x2[i][m] = x1[i][m] + a*f[i][m];
582 if (s2->flags & (1<<estCGP))
584 /* Copy the CG p vector */
587 #pragma omp for schedule(static) nowait
588 for(i=start; i<end; i++)
590 copy_rvec(x1[i],x2[i]);
594 if (DOMAINDECOMP(cr))
596 s2->ddp_count = s1->ddp_count;
597 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
600 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
601 srenew(s2->cg_gl,s2->cg_gl_nalloc);
604 s2->ncg_gl = s1->ncg_gl;
605 #pragma omp for schedule(static) nowait
606 for(i=0; i<s2->ncg_gl; i++)
608 s2->cg_gl[i] = s1->cg_gl[i];
610 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
616 wallcycle_start(wcycle,ewcCONSTR);
618 constrain(NULL,TRUE,TRUE,constr,&top->idef,
619 ir,NULL,cr,count,0,md,
620 s1->x,s2->x,NULL,bMolPBC,s2->box,
621 s2->lambda[efptBONDED],&dvdlambda,
622 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
623 wallcycle_stop(wcycle,ewcCONSTR);
627 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
628 gmx_mtop_t *top_global,t_inputrec *ir,
629 em_state_t *ems,gmx_localtop_t *top,
630 t_mdatoms *mdatoms,t_forcerec *fr,
631 gmx_vsite_t *vsite,gmx_constr_t constr,
632 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
634 /* Repartition the domain decomposition */
635 wallcycle_start(wcycle,ewcDOMDEC);
636 dd_partition_system(fplog,step,cr,FALSE,1,
639 mdatoms,top,fr,vsite,NULL,constr,
641 dd_store_state(cr->dd,&ems->s);
642 wallcycle_stop(wcycle,ewcDOMDEC);
645 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
646 t_state *state_global,gmx_mtop_t *top_global,
647 em_state_t *ems,gmx_localtop_t *top,
648 t_inputrec *inputrec,
649 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
650 gmx_global_stat_t gstat,
651 gmx_vsite_t *vsite,gmx_constr_t constr,
653 t_graph *graph,t_mdatoms *mdatoms,
654 t_forcerec *fr,rvec mu_tot,
655 gmx_enerdata_t *enerd,tensor vir,tensor pres,
656 gmx_large_int_t count,gmx_bool bFirst)
661 tensor force_vir,shake_vir,ekin;
662 real dvdlambda,prescorr,enercorr,dvdlcorr;
665 /* Set the time to the initial time, the time does not change during EM */
666 t = inputrec->init_t;
669 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
670 /* This the first state or an old state used before the last ns */
674 if (inputrec->nstlist > 0) {
676 } else if (inputrec->nstlist == -1) {
677 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
679 gmx_sumi(1,&nabnsb,cr);
685 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
686 top->idef.iparams,top->idef.il,
687 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
689 if (DOMAINDECOMP(cr)) {
691 /* Repartition the domain decomposition */
692 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
693 ems,top,mdatoms,fr,vsite,constr,
698 /* Calc force & energy on new trial position */
699 /* do_force always puts the charge groups in the box and shifts again
700 * We do not unshift, so molecules are always whole in congrad.c
702 do_force(fplog,cr,inputrec,
703 count,nrnb,wcycle,top,top_global,&top_global->groups,
704 ems->s.box,ems->s.x,&ems->s.hist,
705 ems->f,force_vir,mdatoms,enerd,fcd,
706 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
707 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
708 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
709 (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
711 /* Clear the unused shake virial and pressure */
712 clear_mat(shake_vir);
715 /* Communicate stuff when parallel */
716 if (PAR(cr) && inputrec->eI != eiNM)
718 wallcycle_start(wcycle,ewcMoveE);
720 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
721 inputrec,NULL,NULL,NULL,1,&terminate,
722 top_global,&ems->s,FALSE,
728 wallcycle_stop(wcycle,ewcMoveE);
731 /* Calculate long range corrections to pressure and energy */
732 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
733 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
734 enerd->term[F_DISPCORR] = enercorr;
735 enerd->term[F_EPOT] += enercorr;
736 enerd->term[F_PRES] += prescorr;
737 enerd->term[F_DVDL] += dvdlcorr;
739 ems->epot = enerd->term[F_EPOT];
742 /* Project out the constraint components of the force */
743 wallcycle_start(wcycle,ewcCONSTR);
745 constrain(NULL,FALSE,FALSE,constr,&top->idef,
746 inputrec,NULL,cr,count,0,mdatoms,
747 ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
748 ems->s.lambda[efptBONDED],&dvdlambda,
749 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
750 if (fr->bSepDVDL && fplog)
751 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
752 enerd->term[F_DVDL_BONDED] += dvdlambda;
753 m_add(force_vir,shake_vir,vir);
754 wallcycle_stop(wcycle,ewcCONSTR);
756 copy_mat(force_vir,vir);
760 enerd->term[F_PRES] =
761 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
763 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
765 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
767 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
771 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
773 em_state_t *s_min,em_state_t *s_b)
777 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
779 unsigned char *grpnrFREEZE;
782 fprintf(debug,"Doing reorder_partsum\n");
787 cgs_gl = dd_charge_groups_global(cr->dd);
788 index = cgs_gl->index;
790 /* Collect fm in a global vector fmg.
791 * This conflicts with the spirit of domain decomposition,
792 * but to fully optimize this a much more complicated algorithm is required.
794 snew(fmg,mtop->natoms);
796 ncg = s_min->s.ncg_gl;
797 cg_gl = s_min->s.cg_gl;
799 for(c=0; c<ncg; c++) {
803 for(a=a0; a<a1; a++) {
804 copy_rvec(fm[i],fmg[a]);
808 gmx_sum(mtop->natoms*3,fmg[0],cr);
810 /* Now we will determine the part of the sum for the cgs in state s_b */
812 cg_gl = s_b->s.cg_gl;
816 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
817 for(c=0; c<ncg; c++) {
821 for(a=a0; a<a1; a++) {
822 if (mdatoms->cFREEZE && grpnrFREEZE) {
825 for(m=0; m<DIM; m++) {
826 if (!opts->nFreeze[gf][m]) {
827 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
839 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
841 em_state_t *s_min,em_state_t *s_b)
847 /* This is just the classical Polak-Ribiere calculation of beta;
848 * it looks a bit complicated since we take freeze groups into account,
849 * and might have to sum it in parallel runs.
852 if (!DOMAINDECOMP(cr) ||
853 (s_min->s.ddp_count == cr->dd->ddp_count &&
854 s_b->s.ddp_count == cr->dd->ddp_count)) {
859 /* This part of code can be incorrect with DD,
860 * since the atom ordering in s_b and s_min might differ.
862 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
863 if (mdatoms->cFREEZE)
864 gf = mdatoms->cFREEZE[i];
866 if (!opts->nFreeze[gf][m]) {
867 sum += (fb[i][m] - fm[i][m])*fb[i][m];
871 /* We need to reorder cgs while summing */
872 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
877 return sum/sqr(s_min->fnorm);
880 double do_cg(FILE *fplog,t_commrec *cr,
881 int nfile,const t_filenm fnm[],
882 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
884 gmx_vsite_t *vsite,gmx_constr_t constr,
886 t_inputrec *inputrec,
887 gmx_mtop_t *top_global,t_fcdata *fcd,
888 t_state *state_global,
890 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
893 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
895 real cpt_period,real max_hours,
896 const char *deviceOptions,
898 gmx_runtime_t *runtime)
900 const char *CG="Polak-Ribiere Conjugate Gradients";
902 em_state_t *s_min,*s_a,*s_b,*s_c;
904 gmx_enerdata_t *enerd;
906 gmx_global_stat_t gstat;
908 rvec *f_global,*p,*sf,*sfm;
909 double gpa,gpb,gpc,tmp,sum[2],minstep;
916 gmx_bool converged,foundlower;
918 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
920 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
922 int i,m,gf,step,nminstep;
927 s_min = init_em_state();
928 s_a = init_em_state();
929 s_b = init_em_state();
930 s_c = init_em_state();
932 /* Init em and store the local state in s_min */
933 init_em(fplog,CG,cr,inputrec,
934 state_global,top_global,s_min,&top,&f,&f_global,
935 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
936 nfile,fnm,&outf,&mdebin);
938 /* Print to log file */
939 print_em_start(fplog,cr,runtime,wcycle,CG);
941 /* Max number of steps */
942 number_steps=inputrec->nsteps;
945 sp_header(stderr,CG,inputrec->em_tol,number_steps);
947 sp_header(fplog,CG,inputrec->em_tol,number_steps);
949 /* Call the force routine and some auxiliary (neighboursearching etc.) */
950 /* do_force always puts the charge groups in the box and shifts again
951 * We do not unshift, so molecules are always whole in congrad.c
953 evaluate_energy(fplog,bVerbose,cr,
954 state_global,top_global,s_min,top,
955 inputrec,nrnb,wcycle,gstat,
956 vsite,constr,fcd,graph,mdatoms,fr,
957 mu_tot,enerd,vir,pres,-1,TRUE);
961 /* Copy stuff to the energy bin for easy printing etc. */
962 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
963 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
964 NULL,NULL,vir,pres,NULL,mu_tot,constr);
966 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
967 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
968 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
972 /* Estimate/guess the initial stepsize */
973 stepsize = inputrec->em_stepsize/s_min->fnorm;
976 fprintf(stderr," F-max = %12.5e on atom %d\n",
977 s_min->fmax,s_min->a_fmax+1);
978 fprintf(stderr," F-Norm = %12.5e\n",
979 s_min->fnorm/sqrt(state_global->natoms));
980 fprintf(stderr,"\n");
981 /* and copy to the log file too... */
982 fprintf(fplog," F-max = %12.5e on atom %d\n",
983 s_min->fmax,s_min->a_fmax+1);
984 fprintf(fplog," F-Norm = %12.5e\n",
985 s_min->fnorm/sqrt(state_global->natoms));
988 /* Start the loop over CG steps.
989 * Each successful step is counted, and we continue until
990 * we either converge or reach the max number of steps.
993 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
995 /* start taking steps in a new direction
996 * First time we enter the routine, beta=0, and the direction is
997 * simply the negative gradient.
1000 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1005 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1006 if (mdatoms->cFREEZE)
1007 gf = mdatoms->cFREEZE[i];
1008 for(m=0; m<DIM; m++) {
1009 if (!inputrec->opts.nFreeze[gf][m]) {
1010 p[i][m] = sf[i][m] + beta*p[i][m];
1011 gpa -= p[i][m]*sf[i][m];
1012 /* f is negative gradient, thus the sign */
1019 /* Sum the gradient along the line across CPUs */
1021 gmx_sumd(1,&gpa,cr);
1023 /* Calculate the norm of the search vector */
1024 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1026 /* Just in case stepsize reaches zero due to numerical precision... */
1028 stepsize = inputrec->em_stepsize/pnorm;
1031 * Double check the value of the derivative in the search direction.
1032 * If it is positive it must be due to the old information in the
1033 * CG formula, so just remove that and start over with beta=0.
1034 * This corresponds to a steepest descent step.
1038 step--; /* Don't count this step since we are restarting */
1039 continue; /* Go back to the beginning of the big for-loop */
1042 /* Calculate minimum allowed stepsize, before the average (norm)
1043 * relative change in coordinate is smaller than precision
1046 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1047 for(m=0; m<DIM; m++) {
1048 tmp = fabs(s_min->s.x[i][m]);
1055 /* Add up from all CPUs */
1057 gmx_sumd(1,&minstep,cr);
1059 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1061 if(stepsize<minstep) {
1066 /* Write coordinates if necessary */
1067 do_x = do_per_step(step,inputrec->nstxout);
1068 do_f = do_per_step(step,inputrec->nstfout);
1070 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1071 top_global,inputrec,step,
1072 s_min,state_global,f_global);
1074 /* Take a step downhill.
1075 * In theory, we should minimize the function along this direction.
1076 * That is quite possible, but it turns out to take 5-10 function evaluations
1077 * for each line. However, we dont really need to find the exact minimum -
1078 * it is much better to start a new CG step in a modified direction as soon
1079 * as we are close to it. This will save a lot of energy evaluations.
1081 * In practice, we just try to take a single step.
1082 * If it worked (i.e. lowered the energy), we increase the stepsize but
1083 * the continue straight to the next CG step without trying to find any minimum.
1084 * If it didn't work (higher energy), there must be a minimum somewhere between
1085 * the old position and the new one.
1087 * Due to the finite numerical accuracy, it turns out that it is a good idea
1088 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1089 * This leads to lower final energies in the tests I've done. / Erik
1091 s_a->epot = s_min->epot;
1093 c = a + stepsize; /* reference position along line is zero */
1095 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1096 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1097 s_min,top,mdatoms,fr,vsite,constr,
1101 /* Take a trial step (new coords in s_c) */
1102 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1103 constr,top,nrnb,wcycle,-1);
1106 /* Calculate energy for the trial step */
1107 evaluate_energy(fplog,bVerbose,cr,
1108 state_global,top_global,s_c,top,
1109 inputrec,nrnb,wcycle,gstat,
1110 vsite,constr,fcd,graph,mdatoms,fr,
1111 mu_tot,enerd,vir,pres,-1,FALSE);
1113 /* Calc derivative along line */
1117 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1118 for(m=0; m<DIM; m++)
1119 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1121 /* Sum the gradient along the line across CPUs */
1123 gmx_sumd(1,&gpc,cr);
1125 /* This is the max amount of increase in energy we tolerate */
1126 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1128 /* Accept the step if the energy is lower, or if it is not significantly higher
1129 * and the line derivative is still negative.
1131 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1133 /* Great, we found a better energy. Increase step for next iteration
1134 * if we are still going down, decrease it otherwise
1137 stepsize *= 1.618034; /* The golden section */
1139 stepsize *= 0.618034; /* 1/golden section */
1141 /* New energy is the same or higher. We will have to do some work
1142 * to find a smaller value in the interval. Take smaller step next time!
1145 stepsize *= 0.618034;
1151 /* OK, if we didn't find a lower value we will have to locate one now - there must
1152 * be one in the interval [a=0,c].
1153 * The same thing is valid here, though: Don't spend dozens of iterations to find
1154 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1155 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1157 * I also have a safeguard for potentially really patological functions so we never
1158 * take more than 20 steps before we give up ...
1160 * If we already found a lower value we just skip this step and continue to the update.
1166 /* Select a new trial point.
1167 * If the derivatives at points a & c have different sign we interpolate to zero,
1168 * otherwise just do a bisection.
1171 b = a + gpa*(a-c)/(gpc-gpa);
1175 /* safeguard if interpolation close to machine accuracy causes errors:
1176 * never go outside the interval
1181 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1182 /* Reload the old state */
1183 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1184 s_min,top,mdatoms,fr,vsite,constr,
1188 /* Take a trial step to this new point - new coords in s_b */
1189 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1190 constr,top,nrnb,wcycle,-1);
1193 /* Calculate energy for the trial step */
1194 evaluate_energy(fplog,bVerbose,cr,
1195 state_global,top_global,s_b,top,
1196 inputrec,nrnb,wcycle,gstat,
1197 vsite,constr,fcd,graph,mdatoms,fr,
1198 mu_tot,enerd,vir,pres,-1,FALSE);
1200 /* p does not change within a step, but since the domain decomposition
1201 * might change, we have to use cg_p of s_b here.
1206 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1207 for(m=0; m<DIM; m++)
1208 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1210 /* Sum the gradient along the line across CPUs */
1212 gmx_sumd(1,&gpb,cr);
1215 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1216 s_a->epot,s_b->epot,s_c->epot,gpb);
1218 epot_repl = s_b->epot;
1220 /* Keep one of the intervals based on the value of the derivative at the new point */
1222 /* Replace c endpoint with b */
1223 swap_em_state(s_b,s_c);
1227 /* Replace a endpoint with b */
1228 swap_em_state(s_b,s_a);
1234 * Stop search as soon as we find a value smaller than the endpoints.
1235 * Never run more than 20 steps, no matter what.
1238 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1241 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1243 /* OK. We couldn't find a significantly lower energy.
1244 * If beta==0 this was steepest descent, and then we give up.
1245 * If not, set beta=0 and restart with steepest descent before quitting.
1252 /* Reset memory before giving up */
1258 /* Select min energy state of A & C, put the best in B.
1260 if (s_c->epot < s_a->epot) {
1262 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1263 s_c->epot,s_a->epot);
1264 swap_em_state(s_b,s_c);
1269 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1270 s_a->epot,s_c->epot);
1271 swap_em_state(s_b,s_a);
1278 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1280 swap_em_state(s_b,s_c);
1285 /* new search direction */
1286 /* beta = 0 means forget all memory and restart with steepest descents. */
1287 if (nstcg && ((step % nstcg)==0))
1290 /* s_min->fnorm cannot be zero, because then we would have converged
1294 /* Polak-Ribiere update.
1295 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1297 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1299 /* Limit beta to prevent oscillations */
1300 if (fabs(beta) > 5.0)
1304 /* update positions */
1305 swap_em_state(s_min,s_b);
1308 /* Print it if necessary */
1311 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1312 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1313 s_min->fmax,s_min->a_fmax+1);
1314 /* Store the new (lower) energies */
1315 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1316 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1317 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1319 do_log = do_per_step(step,inputrec->nstlog);
1320 do_ene = do_per_step(step,inputrec->nstenergy);
1322 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1323 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1324 do_log ? fplog : NULL,step,step,eprNORMAL,
1325 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1328 /* Stop when the maximum force lies below tolerance.
1329 * If we have reached machine precision, converged is already set to true.
1331 converged = converged || (s_min->fmax < inputrec->em_tol);
1333 } /* End of the loop */
1336 step--; /* we never took that last step in this case */
1338 if (s_min->fmax > inputrec->em_tol)
1342 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1343 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1349 /* If we printed energy and/or logfile last step (which was the last step)
1350 * we don't have to do it again, but otherwise print the final values.
1353 /* Write final value to log since we didn't do anything the last step */
1354 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1356 if (!do_ene || !do_log) {
1357 /* Write final energy file entries */
1358 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1359 !do_log ? fplog : NULL,step,step,eprNORMAL,
1360 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1364 /* Print some stuff... */
1366 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1369 * For accurate normal mode calculation it is imperative that we
1370 * store the last conformation into the full precision binary trajectory.
1372 * However, we should only do it if we did NOT already write this step
1373 * above (which we did if do_x or do_f was true).
1375 do_x = !do_per_step(step,inputrec->nstxout);
1376 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1378 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1379 top_global,inputrec,step,
1380 s_min,state_global,f_global);
1382 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1385 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1386 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1387 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1388 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1390 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1393 finish_em(fplog,cr,outf,runtime,wcycle);
1395 /* To print the actual number of steps we needed somewhere */
1396 runtime->nsteps_done = step;
1399 } /* That's all folks */
1402 double do_lbfgs(FILE *fplog,t_commrec *cr,
1403 int nfile,const t_filenm fnm[],
1404 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1406 gmx_vsite_t *vsite,gmx_constr_t constr,
1408 t_inputrec *inputrec,
1409 gmx_mtop_t *top_global,t_fcdata *fcd,
1412 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1415 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1416 gmx_membed_t membed,
1417 real cpt_period,real max_hours,
1418 const char *deviceOptions,
1419 unsigned long Flags,
1420 gmx_runtime_t *runtime)
1422 static const char *LBFGS="Low-Memory BFGS Minimizer";
1424 gmx_localtop_t *top;
1425 gmx_enerdata_t *enerd;
1427 gmx_global_stat_t gstat;
1430 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1431 double stepsize,gpa,gpb,gpc,tmp,minstep;
1432 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1433 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1434 real a,b,c,maxdelta,delta;
1435 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1436 real dgdx,dgdg,sq,yr,beta;
1438 gmx_bool converged,first;
1441 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1443 int start,end,number_steps;
1445 int i,k,m,n,nfmax,gf,step;
1451 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1453 n = 3*state->natoms;
1454 nmaxcorr = inputrec->nbfgscorr;
1456 /* Allocate memory */
1457 /* Use pointers to real so we dont have to loop over both atoms and
1458 * dimensions all the time...
1459 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1460 * that point to the same memory.
1474 snew(alpha,nmaxcorr);
1477 for(i=0;i<nmaxcorr;i++)
1481 for(i=0;i<nmaxcorr;i++)
1488 init_em(fplog,LBFGS,cr,inputrec,
1489 state,top_global,&ems,&top,&f,&f_global,
1490 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1491 nfile,fnm,&outf,&mdebin);
1492 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1493 * so we free some memory again.
1498 xx = (real *)state->x;
1501 start = mdatoms->start;
1502 end = mdatoms->homenr + start;
1504 /* Print to log file */
1505 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1507 do_log = do_ene = do_x = do_f = TRUE;
1509 /* Max number of steps */
1510 number_steps=inputrec->nsteps;
1512 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1514 for(i=start; i<end; i++) {
1515 if (mdatoms->cFREEZE)
1516 gf = mdatoms->cFREEZE[i];
1517 for(m=0; m<DIM; m++)
1518 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1521 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1523 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1526 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1527 top->idef.iparams,top->idef.il,
1528 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1530 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1531 /* do_force always puts the charge groups in the box and shifts again
1532 * We do not unshift, so molecules are always whole
1537 evaluate_energy(fplog,bVerbose,cr,
1538 state,top_global,&ems,top,
1539 inputrec,nrnb,wcycle,gstat,
1540 vsite,constr,fcd,graph,mdatoms,fr,
1541 mu_tot,enerd,vir,pres,-1,TRUE);
1545 /* Copy stuff to the energy bin for easy printing etc. */
1546 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1547 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1548 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1550 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1551 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1552 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1556 /* This is the starting energy */
1557 Epot = enerd->term[F_EPOT];
1563 /* Set the initial step.
1564 * since it will be multiplied by the non-normalized search direction
1565 * vector (force vector the first time), we scale it by the
1566 * norm of the force.
1570 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1571 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1572 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1573 fprintf(stderr,"\n");
1574 /* and copy to the log file too... */
1575 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1576 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1577 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1578 fprintf(fplog,"\n");
1584 dx[point][i] = ff[i]; /* Initial search direction */
1588 stepsize = 1.0/fnorm;
1591 /* Start the loop over BFGS steps.
1592 * Each successful step is counted, and we continue until
1593 * we either converge or reach the max number of steps.
1598 /* Set the gradient from the force */
1600 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1602 /* Write coordinates if necessary */
1603 do_x = do_per_step(step,inputrec->nstxout);
1604 do_f = do_per_step(step,inputrec->nstfout);
1609 mdof_flags |= MDOF_X;
1614 mdof_flags |= MDOF_F;
1617 write_traj(fplog,cr,outf,mdof_flags,
1618 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1620 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1622 /* pointer to current direction - point=0 first time here */
1625 /* calculate line gradient */
1626 for(gpa=0,i=0;i<n;i++)
1629 /* Calculate minimum allowed stepsize, before the average (norm)
1630 * relative change in coordinate is smaller than precision
1632 for(minstep=0,i=0;i<n;i++) {
1639 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1641 if(stepsize<minstep) {
1646 /* Store old forces and coordinates */
1658 /* Take a step downhill.
1659 * In theory, we should minimize the function along this direction.
1660 * That is quite possible, but it turns out to take 5-10 function evaluations
1661 * for each line. However, we dont really need to find the exact minimum -
1662 * it is much better to start a new BFGS step in a modified direction as soon
1663 * as we are close to it. This will save a lot of energy evaluations.
1665 * In practice, we just try to take a single step.
1666 * If it worked (i.e. lowered the energy), we increase the stepsize but
1667 * the continue straight to the next BFGS step without trying to find any minimum.
1668 * If it didn't work (higher energy), there must be a minimum somewhere between
1669 * the old position and the new one.
1671 * Due to the finite numerical accuracy, it turns out that it is a good idea
1672 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1673 * This leads to lower final energies in the tests I've done. / Erik
1678 c = a + stepsize; /* reference position along line is zero */
1680 /* Check stepsize first. We do not allow displacements
1681 * larger than emstep.
1691 if(maxdelta>inputrec->em_stepsize)
1693 } while(maxdelta>inputrec->em_stepsize);
1695 /* Take a trial step */
1697 xc[i] = lastx[i] + c*s[i];
1700 /* Calculate energy for the trial step */
1701 ems.s.x = (rvec *)xc;
1703 evaluate_energy(fplog,bVerbose,cr,
1704 state,top_global,&ems,top,
1705 inputrec,nrnb,wcycle,gstat,
1706 vsite,constr,fcd,graph,mdatoms,fr,
1707 mu_tot,enerd,vir,pres,step,FALSE);
1710 /* Calc derivative along line */
1711 for(gpc=0,i=0; i<n; i++) {
1712 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1714 /* Sum the gradient along the line across CPUs */
1716 gmx_sumd(1,&gpc,cr);
1718 /* This is the max amount of increase in energy we tolerate */
1719 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1721 /* Accept the step if the energy is lower, or if it is not significantly higher
1722 * and the line derivative is still negative.
1724 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1726 /* Great, we found a better energy. Increase step for next iteration
1727 * if we are still going down, decrease it otherwise
1730 stepsize *= 1.618034; /* The golden section */
1732 stepsize *= 0.618034; /* 1/golden section */
1734 /* New energy is the same or higher. We will have to do some work
1735 * to find a smaller value in the interval. Take smaller step next time!
1738 stepsize *= 0.618034;
1741 /* OK, if we didn't find a lower value we will have to locate one now - there must
1742 * be one in the interval [a=0,c].
1743 * The same thing is valid here, though: Don't spend dozens of iterations to find
1744 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1745 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1747 * I also have a safeguard for potentially really patological functions so we never
1748 * take more than 20 steps before we give up ...
1750 * If we already found a lower value we just skip this step and continue to the update.
1757 /* Select a new trial point.
1758 * If the derivatives at points a & c have different sign we interpolate to zero,
1759 * otherwise just do a bisection.
1763 b = a + gpa*(a-c)/(gpc-gpa);
1767 /* safeguard if interpolation close to machine accuracy causes errors:
1768 * never go outside the interval
1773 /* Take a trial step */
1775 xb[i] = lastx[i] + b*s[i];
1778 /* Calculate energy for the trial step */
1779 ems.s.x = (rvec *)xb;
1781 evaluate_energy(fplog,bVerbose,cr,
1782 state,top_global,&ems,top,
1783 inputrec,nrnb,wcycle,gstat,
1784 vsite,constr,fcd,graph,mdatoms,fr,
1785 mu_tot,enerd,vir,pres,step,FALSE);
1790 for(gpb=0,i=0; i<n; i++)
1791 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1793 /* Sum the gradient along the line across CPUs */
1795 gmx_sumd(1,&gpb,cr);
1797 /* Keep one of the intervals based on the value of the derivative at the new point */
1799 /* Replace c endpoint with b */
1803 /* swap coord pointers b/c */
1811 /* Replace a endpoint with b */
1815 /* swap coord pointers a/b */
1825 * Stop search as soon as we find a value smaller than the endpoints,
1826 * or if the tolerance is below machine precision.
1827 * Never run more than 20 steps, no matter what.
1830 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1832 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1833 /* OK. We couldn't find a significantly lower energy.
1834 * If ncorr==0 this was steepest descent, and then we give up.
1835 * If not, reset memory to restart as steepest descent before quitting.
1844 /* Search in gradient direction */
1847 /* Reset stepsize */
1848 stepsize = 1.0/fnorm;
1853 /* Select min energy state of A & C, put the best in xx/ff/Epot
1884 /* Update the memory information, and calculate a new
1885 * approximation of the inverse hessian
1888 /* Have new data in Epot, xx, ff */
1893 dg[point][i]=lastf[i]-ff[i];
1894 dx[point][i]*=stepsize;
1900 dgdg+=dg[point][i]*dg[point][i];
1901 dgdx+=dg[point][i]*dx[point][i];
1906 rho[point]=1.0/dgdx;
1918 /* Recursive update. First go back over the memory points */
1919 for(k=0;k<ncorr;k++) {
1928 alpha[cp]=rho[cp]*sq;
1931 p[i] -= alpha[cp]*dg[cp][i];
1937 /* And then go forward again */
1938 for(k=0;k<ncorr;k++) {
1941 yr += p[i]*dg[cp][i];
1944 beta = alpha[cp]-beta;
1947 p[i] += beta*dx[cp][i];
1956 dx[point][i] = p[i];
1962 /* Test whether the convergence criterion is met */
1963 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1965 /* Print it if necessary */
1968 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1969 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1970 /* Store the new (lower) energies */
1971 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1972 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1973 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1974 do_log = do_per_step(step,inputrec->nstlog);
1975 do_ene = do_per_step(step,inputrec->nstenergy);
1977 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1978 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1979 do_log ? fplog : NULL,step,step,eprNORMAL,
1980 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1983 /* Stop when the maximum force lies below tolerance.
1984 * If we have reached machine precision, converged is already set to true.
1987 converged = converged || (fmax < inputrec->em_tol);
1989 } /* End of the loop */
1992 step--; /* we never took that last step in this case */
1994 if(fmax>inputrec->em_tol)
1998 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1999 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
2004 /* If we printed energy and/or logfile last step (which was the last step)
2005 * we don't have to do it again, but otherwise print the final values.
2007 if(!do_log) /* Write final value to log since we didn't do anythin last step */
2008 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2009 if(!do_ene || !do_log) /* Write final energy file entries */
2010 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2011 !do_log ? fplog : NULL,step,step,eprNORMAL,
2012 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2014 /* Print some stuff... */
2016 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2019 * For accurate normal mode calculation it is imperative that we
2020 * store the last conformation into the full precision binary trajectory.
2022 * However, we should only do it if we did NOT already write this step
2023 * above (which we did if do_x or do_f was true).
2025 do_x = !do_per_step(step,inputrec->nstxout);
2026 do_f = !do_per_step(step,inputrec->nstfout);
2027 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2028 top_global,inputrec,step,
2032 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2033 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2034 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2035 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2037 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2040 finish_em(fplog,cr,outf,runtime,wcycle);
2042 /* To print the actual number of steps we needed somewhere */
2043 runtime->nsteps_done = step;
2046 } /* That's all folks */
2049 double do_steep(FILE *fplog,t_commrec *cr,
2050 int nfile, const t_filenm fnm[],
2051 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2053 gmx_vsite_t *vsite,gmx_constr_t constr,
2055 t_inputrec *inputrec,
2056 gmx_mtop_t *top_global,t_fcdata *fcd,
2057 t_state *state_global,
2059 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2062 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2063 gmx_membed_t membed,
2064 real cpt_period,real max_hours,
2065 const char *deviceOptions,
2066 unsigned long Flags,
2067 gmx_runtime_t *runtime)
2069 const char *SD="Steepest Descents";
2070 em_state_t *s_min,*s_try;
2072 gmx_localtop_t *top;
2073 gmx_enerdata_t *enerd;
2075 gmx_global_stat_t gstat;
2077 real stepsize,constepsize;
2078 real ustep,dvdlambda,fnormn;
2081 gmx_bool bDone,bAbort,do_x,do_f;
2086 int steps_accepted=0;
2090 s_min = init_em_state();
2091 s_try = init_em_state();
2093 /* Init em and store the local state in s_try */
2094 init_em(fplog,SD,cr,inputrec,
2095 state_global,top_global,s_try,&top,&f,&f_global,
2096 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2097 nfile,fnm,&outf,&mdebin);
2099 /* Print to log file */
2100 print_em_start(fplog,cr,runtime,wcycle,SD);
2102 /* Set variables for stepsize (in nm). This is the largest
2103 * step that we are going to make in any direction.
2105 ustep = inputrec->em_stepsize;
2108 /* Max number of steps */
2109 nsteps = inputrec->nsteps;
2112 /* Print to the screen */
2113 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2115 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2117 /**** HERE STARTS THE LOOP ****
2118 * count is the counter for the number of steps
2119 * bDone will be TRUE when the minimization has converged
2120 * bAbort will be TRUE when nsteps steps have been performed or when
2121 * the stepsize becomes smaller than is reasonable for machine precision
2126 while( !bDone && !bAbort ) {
2127 bAbort = (nsteps >= 0) && (count == nsteps);
2129 /* set new coordinates, except for first step */
2131 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2132 s_min,stepsize,s_min->f,s_try,
2133 constr,top,nrnb,wcycle,count);
2136 evaluate_energy(fplog,bVerbose,cr,
2137 state_global,top_global,s_try,top,
2138 inputrec,nrnb,wcycle,gstat,
2139 vsite,constr,fcd,graph,mdatoms,fr,
2140 mu_tot,enerd,vir,pres,count,count==0);
2143 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2146 s_min->epot = s_try->epot + 1;
2148 /* Print it if necessary */
2151 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2152 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2153 (s_try->epot < s_min->epot) ? '\n' : '\r');
2156 if (s_try->epot < s_min->epot) {
2157 /* Store the new (lower) energies */
2158 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2159 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2160 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2161 print_ebin(outf->fp_ene,TRUE,
2162 do_per_step(steps_accepted,inputrec->nstdisreout),
2163 do_per_step(steps_accepted,inputrec->nstorireout),
2164 fplog,count,count,eprNORMAL,TRUE,
2165 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2170 /* Now if the new energy is smaller than the previous...
2171 * or if this is the first step!
2172 * or if we did random steps!
2175 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2178 /* Test whether the convergence criterion is met... */
2179 bDone = (s_try->fmax < inputrec->em_tol);
2181 /* Copy the arrays for force, positions and energy */
2182 /* The 'Min' array always holds the coords and forces of the minimal
2184 swap_em_state(s_min,s_try);
2188 /* Write to trn, if necessary */
2189 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2190 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2191 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2192 top_global,inputrec,count,
2193 s_min,state_global,f_global);
2196 /* If energy is not smaller make the step smaller... */
2199 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2200 /* Reload the old state */
2201 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2202 s_min,top,mdatoms,fr,vsite,constr,
2207 /* Determine new step */
2208 stepsize = ustep/s_min->fmax;
2210 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2212 if (count == nsteps || ustep < 1e-12)
2214 if (count == nsteps || ustep < 1e-6)
2219 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2220 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2226 } /* End of the loop */
2228 /* Print some shit... */
2230 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2231 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2232 top_global,inputrec,count,
2233 s_min,state_global,f_global);
2235 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2238 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2239 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2240 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2241 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2244 finish_em(fplog,cr,outf,runtime,wcycle);
2246 /* To print the actual number of steps we needed somewhere */
2247 inputrec->nsteps=count;
2249 runtime->nsteps_done = count;
2252 } /* That's all folks */
2255 double do_nm(FILE *fplog,t_commrec *cr,
2256 int nfile,const t_filenm fnm[],
2257 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2259 gmx_vsite_t *vsite,gmx_constr_t constr,
2261 t_inputrec *inputrec,
2262 gmx_mtop_t *top_global,t_fcdata *fcd,
2263 t_state *state_global,
2265 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2268 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2269 gmx_membed_t membed,
2270 real cpt_period,real max_hours,
2271 const char *deviceOptions,
2272 unsigned long Flags,
2273 gmx_runtime_t *runtime)
2275 const char *NM = "Normal Mode Analysis";
2280 gmx_localtop_t *top;
2281 gmx_enerdata_t *enerd;
2283 gmx_global_stat_t gstat;
2285 real t,t0,lambda,lam0;
2290 gmx_bool bSparse; /* use sparse matrix storage format */
2292 gmx_sparsematrix_t * sparse_matrix = NULL;
2293 real * full_matrix = NULL;
2294 em_state_t * state_work;
2296 /* added with respect to mdrun */
2298 real der_range=10.0*sqrt(GMX_REAL_EPS);
2304 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2307 state_work = init_em_state();
2309 /* Init em and store the local state in state_minimum */
2310 init_em(fplog,NM,cr,inputrec,
2311 state_global,top_global,state_work,&top,
2313 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2314 nfile,fnm,&outf,NULL);
2316 natoms = top_global->natoms;
2324 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2325 " which MIGHT not be accurate enough for normal mode analysis.\n"
2326 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2327 " are fairly modest even if you recompile in double precision.\n\n");
2331 /* Check if we can/should use sparse storage format.
2333 * Sparse format is only useful when the Hessian itself is sparse, which it
2334 * will be when we use a cutoff.
2335 * For small systems (n<1000) it is easier to always use full matrix format, though.
2337 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2339 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2342 else if(top_global->natoms < 1000)
2344 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2349 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2353 sz = DIM*top_global->natoms;
2355 fprintf(stderr,"Allocating Hessian memory...\n\n");
2359 sparse_matrix=gmx_sparsematrix_init(sz);
2360 sparse_matrix->compressed_symmetric = TRUE;
2364 snew(full_matrix,sz*sz);
2367 /* Initial values */
2368 t0 = inputrec->init_t;
2369 lam0 = inputrec->fepvals->init_lambda;
2377 /* Write start time and temperature */
2378 print_em_start(fplog,cr,runtime,wcycle,NM);
2380 /* fudge nr of steps to nr of atoms */
2381 inputrec->nsteps = natoms*2;
2385 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2386 *(top_global->name),(int)inputrec->nsteps);
2389 nnodes = cr->nnodes;
2391 /* Make evaluate_energy do a single node force calculation */
2393 evaluate_energy(fplog,bVerbose,cr,
2394 state_global,top_global,state_work,top,
2395 inputrec,nrnb,wcycle,gstat,
2396 vsite,constr,fcd,graph,mdatoms,fr,
2397 mu_tot,enerd,vir,pres,-1,TRUE);
2398 cr->nnodes = nnodes;
2400 /* if forces are not small, warn user */
2401 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2405 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2406 if (state_work->fmax > 1.0e-3)
2408 fprintf(stderr,"Maximum force probably not small enough to");
2409 fprintf(stderr," ensure that you are in an \nenergy well. ");
2410 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2411 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2415 /***********************************************************
2417 * Loop over all pairs in matrix
2419 * do_force called twice. Once with positive and
2420 * once with negative displacement
2422 ************************************************************/
2424 /* Steps are divided one by one over the nodes */
2425 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2428 for (d=0; d<DIM; d++)
2430 x_min = state_work->s.x[atom][d];
2432 state_work->s.x[atom][d] = x_min - der_range;
2434 /* Make evaluate_energy do a single node force calculation */
2436 evaluate_energy(fplog,bVerbose,cr,
2437 state_global,top_global,state_work,top,
2438 inputrec,nrnb,wcycle,gstat,
2439 vsite,constr,fcd,graph,mdatoms,fr,
2440 mu_tot,enerd,vir,pres,atom*2,FALSE);
2442 for(i=0; i<natoms; i++)
2444 copy_rvec(state_work->f[i], fneg[i]);
2447 state_work->s.x[atom][d] = x_min + der_range;
2449 evaluate_energy(fplog,bVerbose,cr,
2450 state_global,top_global,state_work,top,
2451 inputrec,nrnb,wcycle,gstat,
2452 vsite,constr,fcd,graph,mdatoms,fr,
2453 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2454 cr->nnodes = nnodes;
2456 /* x is restored to original */
2457 state_work->s.x[atom][d] = x_min;
2459 for(j=0; j<natoms; j++)
2461 for (k=0; (k<DIM); k++)
2464 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2472 #define mpi_type MPI_DOUBLE
2474 #define mpi_type MPI_FLOAT
2476 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2477 cr->mpi_comm_mygroup);
2482 for(node=0; (node<nnodes && atom+node<natoms); node++)
2488 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2489 cr->mpi_comm_mygroup,&stat);
2494 row = (atom + node)*DIM + d;
2496 for(j=0; j<natoms; j++)
2498 for(k=0; k<DIM; k++)
2504 if (col >= row && dfdx[j][k] != 0.0)
2506 gmx_sparsematrix_increment_value(sparse_matrix,
2507 row,col,dfdx[j][k]);
2512 full_matrix[row*sz+col] = dfdx[j][k];
2519 if (bVerbose && fplog)
2524 /* write progress */
2525 if (MASTER(cr) && bVerbose)
2527 fprintf(stderr,"\rFinished step %d out of %d",
2528 min(atom+nnodes,natoms),natoms);
2535 fprintf(stderr,"\n\nWriting Hessian...\n");
2536 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2539 finish_em(fplog,cr,outf,runtime,wcycle);
2541 runtime->nsteps_done = natoms*2;