1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
67 #include "md_support.h"
73 #include "gmx_wallcycle.h"
74 #include "mtop_util.h"
78 #include "gmx_omp_nthreads.h"
81 #include "gromacs/linearalgebra/mtxio.h"
82 #include "gromacs/linearalgebra/sparsematrix.h"
93 static em_state_t *init_em_state()
99 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
100 snew(ems->s.lambda,efptNR);
105 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
106 gmx_wallcycle_t wcycle,
111 runtime_start(runtime);
113 sprintf(buf,"Started %s",name);
114 print_date_and_time(fplog,cr->nodeid,buf,NULL);
116 wallcycle_start(wcycle,ewcRUN);
118 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
119 gmx_wallcycle_t wcycle)
121 wallcycle_stop(wcycle,ewcRUN);
123 runtime_end(runtime);
126 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
129 fprintf(out,"%s:\n",minimizer);
130 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
131 fprintf(out," Number of steps = %12d\n",nsteps);
134 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
140 "\nEnergy minimization reached the maximum number"
141 "of steps before the forces reached the requested"
142 "precision Fmax < %g.\n",ftol);
147 "\nEnergy minimization has stopped, but the forces have"
148 "not converged to the requested precision Fmax < %g (which"
149 "may not be possible for your system). It stopped"
150 "because the algorithm tried to make a new step whose size"
151 "was too small, or there was no change in the energy since"
152 "last step. Either way, we regard the minimization as"
153 "converged to within the available machine precision,"
154 "given your starting configuration and EM parameters.\n%s%s",
156 sizeof(real)<sizeof(double) ?
157 "\nDouble precision normally gives you higher accuracy, but"
158 "this is often not needed for preparing to run molecular"
162 "You might need to increase your constraint accuracy, or turn\n"
163 "off constraints altogether (set constraints = none in mdp file)\n" :
166 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
171 static void print_converged(FILE *fp,const char *alg,real ftol,
172 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
173 real epot,real fmax, int nfmax, real fnorm)
175 char buf[STEPSTRSIZE];
178 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
179 alg,ftol,gmx_step_str(count,buf));
180 else if(count<nsteps)
181 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
182 "but did not reach the requested Fmax < %g.\n",
183 alg,gmx_step_str(count,buf),ftol);
185 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
186 alg,ftol,gmx_step_str(count,buf));
189 fprintf(fp,"Potential Energy = %21.14e\n",epot);
190 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
191 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
193 fprintf(fp,"Potential Energy = %14.7e\n",epot);
194 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
195 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
199 static void get_f_norm_max(t_commrec *cr,
200 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
201 real *fnorm,real *fmax,int *a_fmax)
204 real fmax2,fmax2_0,fam;
205 int la_max,a_max,start,end,i,m,gf;
207 /* This routine finds the largest force and returns it.
208 * On parallel machines the global max is taken.
214 start = mdatoms->start;
215 end = mdatoms->homenr + start;
216 if (mdatoms->cFREEZE) {
217 for(i=start; i<end; i++) {
218 gf = mdatoms->cFREEZE[i];
221 if (!opts->nFreeze[gf][m])
230 for(i=start; i<end; i++) {
240 if (la_max >= 0 && DOMAINDECOMP(cr)) {
241 a_max = cr->dd->gatindex[la_max];
246 snew(sum,2*cr->nnodes+1);
247 sum[2*cr->nodeid] = fmax2;
248 sum[2*cr->nodeid+1] = a_max;
249 sum[2*cr->nnodes] = fnorm2;
250 gmx_sumd(2*cr->nnodes+1,sum,cr);
251 fnorm2 = sum[2*cr->nnodes];
252 /* Determine the global maximum */
253 for(i=0; i<cr->nnodes; i++) {
254 if (sum[2*i] > fmax2) {
256 a_max = (int)(sum[2*i+1] + 0.5);
263 *fnorm = sqrt(fnorm2);
270 static void get_state_f_norm_max(t_commrec *cr,
271 t_grpopts *opts,t_mdatoms *mdatoms,
274 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
277 void init_em(FILE *fplog,const char *title,
278 t_commrec *cr,t_inputrec *ir,
279 t_state *state_global,gmx_mtop_t *top_global,
280 em_state_t *ems,gmx_localtop_t **top,
281 rvec **f,rvec **f_global,
282 t_nrnb *nrnb,rvec mu_tot,
283 t_forcerec *fr,gmx_enerdata_t **enerd,
284 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
285 gmx_vsite_t *vsite,gmx_constr_t constr,
286 int nfile,const t_filenm fnm[],
287 gmx_mdoutf_t **outf,t_mdebin **mdebin)
294 fprintf(fplog,"Initiating %s\n",title);
297 state_global->ngtc = 0;
299 /* Initialize lambda variables */
300 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
304 if (DOMAINDECOMP(cr))
306 *top = dd_init_local_top(top_global);
308 dd_init_local_state(cr->dd,state_global,&ems->s);
312 /* Distribute the charge groups over the nodes from the master node */
313 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
314 state_global,top_global,ir,
315 &ems->s,&ems->f,mdatoms,*top,
316 fr,vsite,NULL,constr,
318 dd_store_state(cr->dd,&ems->s);
322 snew(*f_global,top_global->natoms);
332 snew(*f,top_global->natoms);
334 /* Just copy the state */
335 ems->s = *state_global;
336 snew(ems->s.x,ems->s.nalloc);
337 snew(ems->f,ems->s.nalloc);
338 for(i=0; i<state_global->natoms; i++)
340 copy_rvec(state_global->x[i],ems->s.x[i]);
342 copy_mat(state_global->box,ems->s.box);
344 if (PAR(cr) && ir->eI != eiNM)
346 /* Initialize the particle decomposition and split the topology */
347 *top = split_system(fplog,top_global,ir,cr);
349 pd_cg_range(cr,&fr->cg0,&fr->hcg);
353 *top = gmx_mtop_generate_local_top(top_global,ir);
357 forcerec_set_excl_load(fr,*top,cr);
359 init_bonded_thread_force_reduction(fr,&(*top)->idef);
361 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
363 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
372 pd_at_range(cr,&start,&homenr);
378 homenr = top_global->natoms;
380 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
381 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
385 set_vsite_top(vsite,*top,mdatoms,cr);
391 if (ir->eConstrAlg == econtSHAKE &&
392 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
394 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
395 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
398 if (!DOMAINDECOMP(cr))
400 set_constraints(constr,*top,ir,mdatoms,cr);
403 if (!ir->bContinuation)
405 /* Constrain the starting coordinates */
407 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
408 ir,NULL,cr,-1,0,mdatoms,
409 ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
410 ems->s.lambda[efptFEP],&dvdlambda,
411 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
417 *gstat = global_stat_init(ir);
420 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
423 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
428 /* Init bin for energy stuff */
429 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
433 calc_shifts(ems->s.box,fr->shift_vec);
436 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
437 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
439 if (!(cr->duty & DUTY_PME)) {
440 /* Tell the PME only node to finish */
441 gmx_pme_send_finish(cr);
446 em_time_end(fplog,cr,runtime,wcycle);
449 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
458 static void copy_em_coords(em_state_t *ems,t_state *state)
462 for(i=0; (i<state->natoms); i++)
464 copy_rvec(ems->s.x[i],state->x[i]);
468 static void write_em_traj(FILE *fplog,t_commrec *cr,
470 gmx_bool bX,gmx_bool bF,const char *confout,
471 gmx_mtop_t *top_global,
472 t_inputrec *ir,gmx_large_int_t step,
474 t_state *state_global,rvec *f_global)
478 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
480 copy_em_coords(state,state_global);
485 if (bX) { mdof_flags |= MDOF_X; }
486 if (bF) { mdof_flags |= MDOF_F; }
487 write_traj(fplog,cr,outf,mdof_flags,
488 top_global,step,(double)step,
489 &state->s,state_global,state->f,f_global,NULL,NULL);
491 if (confout != NULL && MASTER(cr))
493 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
495 /* Make molecules whole only for confout writing */
496 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
500 write_sto_conf_mtop(confout,
501 *top_global->name,top_global,
502 state_global->x,NULL,ir->ePBC,state_global->box);
506 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
508 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
509 gmx_constr_t constr,gmx_localtop_t *top,
510 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
511 gmx_large_int_t count)
523 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
525 gmx_incons("state mismatch in do_em_step");
528 s2->flags = s1->flags;
530 if (s2->nalloc != s1->nalloc)
532 s2->nalloc = s1->nalloc;
533 srenew(s2->x,s1->nalloc);
534 srenew(ems2->f, s1->nalloc);
535 if (s2->flags & (1<<estCGP))
537 srenew(s2->cg_p, s1->nalloc);
541 s2->natoms = s1->natoms;
542 copy_mat(s1->box,s2->box);
543 /* Copy free energy state */
544 for (i=0;i<efptNR;i++)
546 s2->lambda[i] = s1->lambda[i];
548 copy_mat(s1->box,s2->box);
551 end = md->start + md->homenr;
556 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
561 #pragma omp for schedule(static) nowait
562 for(i=start; i<end; i++)
570 if (ir->opts.nFreeze[gf][m])
576 x2[i][m] = x1[i][m] + a*f[i][m];
581 if (s2->flags & (1<<estCGP))
583 /* Copy the CG p vector */
586 #pragma omp for schedule(static) nowait
587 for(i=start; i<end; i++)
589 copy_rvec(x1[i],x2[i]);
593 if (DOMAINDECOMP(cr))
595 s2->ddp_count = s1->ddp_count;
596 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
599 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
600 srenew(s2->cg_gl,s2->cg_gl_nalloc);
603 s2->ncg_gl = s1->ncg_gl;
604 #pragma omp for schedule(static) nowait
605 for(i=0; i<s2->ncg_gl; i++)
607 s2->cg_gl[i] = s1->cg_gl[i];
609 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
615 wallcycle_start(wcycle,ewcCONSTR);
617 constrain(NULL,TRUE,TRUE,constr,&top->idef,
618 ir,NULL,cr,count,0,md,
619 s1->x,s2->x,NULL,bMolPBC,s2->box,
620 s2->lambda[efptBONDED],&dvdlambda,
621 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
622 wallcycle_stop(wcycle,ewcCONSTR);
626 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
627 gmx_mtop_t *top_global,t_inputrec *ir,
628 em_state_t *ems,gmx_localtop_t *top,
629 t_mdatoms *mdatoms,t_forcerec *fr,
630 gmx_vsite_t *vsite,gmx_constr_t constr,
631 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
633 /* Repartition the domain decomposition */
634 wallcycle_start(wcycle,ewcDOMDEC);
635 dd_partition_system(fplog,step,cr,FALSE,1,
638 mdatoms,top,fr,vsite,NULL,constr,
640 dd_store_state(cr->dd,&ems->s);
641 wallcycle_stop(wcycle,ewcDOMDEC);
644 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
645 t_state *state_global,gmx_mtop_t *top_global,
646 em_state_t *ems,gmx_localtop_t *top,
647 t_inputrec *inputrec,
648 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
649 gmx_global_stat_t gstat,
650 gmx_vsite_t *vsite,gmx_constr_t constr,
652 t_graph *graph,t_mdatoms *mdatoms,
653 t_forcerec *fr,rvec mu_tot,
654 gmx_enerdata_t *enerd,tensor vir,tensor pres,
655 gmx_large_int_t count,gmx_bool bFirst)
660 tensor force_vir,shake_vir,ekin;
661 real dvdlambda,prescorr,enercorr,dvdlcorr;
664 /* Set the time to the initial time, the time does not change during EM */
665 t = inputrec->init_t;
668 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
669 /* This the first state or an old state used before the last ns */
673 if (inputrec->nstlist > 0) {
675 } else if (inputrec->nstlist == -1) {
676 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
678 gmx_sumi(1,&nabnsb,cr);
684 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
685 top->idef.iparams,top->idef.il,
686 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
688 if (DOMAINDECOMP(cr)) {
690 /* Repartition the domain decomposition */
691 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
692 ems,top,mdatoms,fr,vsite,constr,
697 /* Calc force & energy on new trial position */
698 /* do_force always puts the charge groups in the box and shifts again
699 * We do not unshift, so molecules are always whole in congrad.c
701 do_force(fplog,cr,inputrec,
702 count,nrnb,wcycle,top,top_global,&top_global->groups,
703 ems->s.box,ems->s.x,&ems->s.hist,
704 ems->f,force_vir,mdatoms,enerd,fcd,
705 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
706 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
707 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
708 (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
710 /* Clear the unused shake virial and pressure */
711 clear_mat(shake_vir);
714 /* Communicate stuff when parallel */
715 if (PAR(cr) && inputrec->eI != eiNM)
717 wallcycle_start(wcycle,ewcMoveE);
719 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
720 inputrec,NULL,NULL,NULL,1,&terminate,
721 top_global,&ems->s,FALSE,
727 wallcycle_stop(wcycle,ewcMoveE);
730 /* Calculate long range corrections to pressure and energy */
731 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
732 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
733 enerd->term[F_DISPCORR] = enercorr;
734 enerd->term[F_EPOT] += enercorr;
735 enerd->term[F_PRES] += prescorr;
736 enerd->term[F_DVDL] += dvdlcorr;
738 ems->epot = enerd->term[F_EPOT];
741 /* Project out the constraint components of the force */
742 wallcycle_start(wcycle,ewcCONSTR);
744 constrain(NULL,FALSE,FALSE,constr,&top->idef,
745 inputrec,NULL,cr,count,0,mdatoms,
746 ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
747 ems->s.lambda[efptBONDED],&dvdlambda,
748 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
749 if (fr->bSepDVDL && fplog)
750 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
751 enerd->term[F_DVDL_BONDED] += dvdlambda;
752 m_add(force_vir,shake_vir,vir);
753 wallcycle_stop(wcycle,ewcCONSTR);
755 copy_mat(force_vir,vir);
759 enerd->term[F_PRES] =
760 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
762 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
764 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
766 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
770 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
772 em_state_t *s_min,em_state_t *s_b)
776 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
778 unsigned char *grpnrFREEZE;
781 fprintf(debug,"Doing reorder_partsum\n");
786 cgs_gl = dd_charge_groups_global(cr->dd);
787 index = cgs_gl->index;
789 /* Collect fm in a global vector fmg.
790 * This conflicts with the spirit of domain decomposition,
791 * but to fully optimize this a much more complicated algorithm is required.
793 snew(fmg,mtop->natoms);
795 ncg = s_min->s.ncg_gl;
796 cg_gl = s_min->s.cg_gl;
798 for(c=0; c<ncg; c++) {
802 for(a=a0; a<a1; a++) {
803 copy_rvec(fm[i],fmg[a]);
807 gmx_sum(mtop->natoms*3,fmg[0],cr);
809 /* Now we will determine the part of the sum for the cgs in state s_b */
811 cg_gl = s_b->s.cg_gl;
815 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
816 for(c=0; c<ncg; c++) {
820 for(a=a0; a<a1; a++) {
821 if (mdatoms->cFREEZE && grpnrFREEZE) {
824 for(m=0; m<DIM; m++) {
825 if (!opts->nFreeze[gf][m]) {
826 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
838 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
840 em_state_t *s_min,em_state_t *s_b)
846 /* This is just the classical Polak-Ribiere calculation of beta;
847 * it looks a bit complicated since we take freeze groups into account,
848 * and might have to sum it in parallel runs.
851 if (!DOMAINDECOMP(cr) ||
852 (s_min->s.ddp_count == cr->dd->ddp_count &&
853 s_b->s.ddp_count == cr->dd->ddp_count)) {
858 /* This part of code can be incorrect with DD,
859 * since the atom ordering in s_b and s_min might differ.
861 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
862 if (mdatoms->cFREEZE)
863 gf = mdatoms->cFREEZE[i];
865 if (!opts->nFreeze[gf][m]) {
866 sum += (fb[i][m] - fm[i][m])*fb[i][m];
870 /* We need to reorder cgs while summing */
871 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
876 return sum/sqr(s_min->fnorm);
879 double do_cg(FILE *fplog,t_commrec *cr,
880 int nfile,const t_filenm fnm[],
881 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
883 gmx_vsite_t *vsite,gmx_constr_t constr,
885 t_inputrec *inputrec,
886 gmx_mtop_t *top_global,t_fcdata *fcd,
887 t_state *state_global,
889 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
892 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
894 real cpt_period,real max_hours,
895 const char *deviceOptions,
897 gmx_runtime_t *runtime)
899 const char *CG="Polak-Ribiere Conjugate Gradients";
901 em_state_t *s_min,*s_a,*s_b,*s_c;
903 gmx_enerdata_t *enerd;
905 gmx_global_stat_t gstat;
907 rvec *f_global,*p,*sf,*sfm;
908 double gpa,gpb,gpc,tmp,sum[2],minstep;
915 gmx_bool converged,foundlower;
917 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
919 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
921 int i,m,gf,step,nminstep;
926 s_min = init_em_state();
927 s_a = init_em_state();
928 s_b = init_em_state();
929 s_c = init_em_state();
931 /* Init em and store the local state in s_min */
932 init_em(fplog,CG,cr,inputrec,
933 state_global,top_global,s_min,&top,&f,&f_global,
934 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
935 nfile,fnm,&outf,&mdebin);
937 /* Print to log file */
938 print_em_start(fplog,cr,runtime,wcycle,CG);
940 /* Max number of steps */
941 number_steps=inputrec->nsteps;
944 sp_header(stderr,CG,inputrec->em_tol,number_steps);
946 sp_header(fplog,CG,inputrec->em_tol,number_steps);
948 /* Call the force routine and some auxiliary (neighboursearching etc.) */
949 /* do_force always puts the charge groups in the box and shifts again
950 * We do not unshift, so molecules are always whole in congrad.c
952 evaluate_energy(fplog,bVerbose,cr,
953 state_global,top_global,s_min,top,
954 inputrec,nrnb,wcycle,gstat,
955 vsite,constr,fcd,graph,mdatoms,fr,
956 mu_tot,enerd,vir,pres,-1,TRUE);
960 /* Copy stuff to the energy bin for easy printing etc. */
961 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
962 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
963 NULL,NULL,vir,pres,NULL,mu_tot,constr);
965 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
966 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
967 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
971 /* Estimate/guess the initial stepsize */
972 stepsize = inputrec->em_stepsize/s_min->fnorm;
975 fprintf(stderr," F-max = %12.5e on atom %d\n",
976 s_min->fmax,s_min->a_fmax+1);
977 fprintf(stderr," F-Norm = %12.5e\n",
978 s_min->fnorm/sqrt(state_global->natoms));
979 fprintf(stderr,"\n");
980 /* and copy to the log file too... */
981 fprintf(fplog," F-max = %12.5e on atom %d\n",
982 s_min->fmax,s_min->a_fmax+1);
983 fprintf(fplog," F-Norm = %12.5e\n",
984 s_min->fnorm/sqrt(state_global->natoms));
987 /* Start the loop over CG steps.
988 * Each successful step is counted, and we continue until
989 * we either converge or reach the max number of steps.
992 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
994 /* start taking steps in a new direction
995 * First time we enter the routine, beta=0, and the direction is
996 * simply the negative gradient.
999 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1004 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1005 if (mdatoms->cFREEZE)
1006 gf = mdatoms->cFREEZE[i];
1007 for(m=0; m<DIM; m++) {
1008 if (!inputrec->opts.nFreeze[gf][m]) {
1009 p[i][m] = sf[i][m] + beta*p[i][m];
1010 gpa -= p[i][m]*sf[i][m];
1011 /* f is negative gradient, thus the sign */
1018 /* Sum the gradient along the line across CPUs */
1020 gmx_sumd(1,&gpa,cr);
1022 /* Calculate the norm of the search vector */
1023 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1025 /* Just in case stepsize reaches zero due to numerical precision... */
1027 stepsize = inputrec->em_stepsize/pnorm;
1030 * Double check the value of the derivative in the search direction.
1031 * If it is positive it must be due to the old information in the
1032 * CG formula, so just remove that and start over with beta=0.
1033 * This corresponds to a steepest descent step.
1037 step--; /* Don't count this step since we are restarting */
1038 continue; /* Go back to the beginning of the big for-loop */
1041 /* Calculate minimum allowed stepsize, before the average (norm)
1042 * relative change in coordinate is smaller than precision
1045 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1046 for(m=0; m<DIM; m++) {
1047 tmp = fabs(s_min->s.x[i][m]);
1054 /* Add up from all CPUs */
1056 gmx_sumd(1,&minstep,cr);
1058 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1060 if(stepsize<minstep) {
1065 /* Write coordinates if necessary */
1066 do_x = do_per_step(step,inputrec->nstxout);
1067 do_f = do_per_step(step,inputrec->nstfout);
1069 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1070 top_global,inputrec,step,
1071 s_min,state_global,f_global);
1073 /* Take a step downhill.
1074 * In theory, we should minimize the function along this direction.
1075 * That is quite possible, but it turns out to take 5-10 function evaluations
1076 * for each line. However, we dont really need to find the exact minimum -
1077 * it is much better to start a new CG step in a modified direction as soon
1078 * as we are close to it. This will save a lot of energy evaluations.
1080 * In practice, we just try to take a single step.
1081 * If it worked (i.e. lowered the energy), we increase the stepsize but
1082 * the continue straight to the next CG step without trying to find any minimum.
1083 * If it didn't work (higher energy), there must be a minimum somewhere between
1084 * the old position and the new one.
1086 * Due to the finite numerical accuracy, it turns out that it is a good idea
1087 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1088 * This leads to lower final energies in the tests I've done. / Erik
1090 s_a->epot = s_min->epot;
1092 c = a + stepsize; /* reference position along line is zero */
1094 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1095 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1096 s_min,top,mdatoms,fr,vsite,constr,
1100 /* Take a trial step (new coords in s_c) */
1101 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1102 constr,top,nrnb,wcycle,-1);
1105 /* Calculate energy for the trial step */
1106 evaluate_energy(fplog,bVerbose,cr,
1107 state_global,top_global,s_c,top,
1108 inputrec,nrnb,wcycle,gstat,
1109 vsite,constr,fcd,graph,mdatoms,fr,
1110 mu_tot,enerd,vir,pres,-1,FALSE);
1112 /* Calc derivative along line */
1116 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1117 for(m=0; m<DIM; m++)
1118 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1120 /* Sum the gradient along the line across CPUs */
1122 gmx_sumd(1,&gpc,cr);
1124 /* This is the max amount of increase in energy we tolerate */
1125 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1127 /* Accept the step if the energy is lower, or if it is not significantly higher
1128 * and the line derivative is still negative.
1130 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1132 /* Great, we found a better energy. Increase step for next iteration
1133 * if we are still going down, decrease it otherwise
1136 stepsize *= 1.618034; /* The golden section */
1138 stepsize *= 0.618034; /* 1/golden section */
1140 /* New energy is the same or higher. We will have to do some work
1141 * to find a smaller value in the interval. Take smaller step next time!
1144 stepsize *= 0.618034;
1150 /* OK, if we didn't find a lower value we will have to locate one now - there must
1151 * be one in the interval [a=0,c].
1152 * The same thing is valid here, though: Don't spend dozens of iterations to find
1153 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1154 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1156 * I also have a safeguard for potentially really patological functions so we never
1157 * take more than 20 steps before we give up ...
1159 * If we already found a lower value we just skip this step and continue to the update.
1165 /* Select a new trial point.
1166 * If the derivatives at points a & c have different sign we interpolate to zero,
1167 * otherwise just do a bisection.
1170 b = a + gpa*(a-c)/(gpc-gpa);
1174 /* safeguard if interpolation close to machine accuracy causes errors:
1175 * never go outside the interval
1180 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1181 /* Reload the old state */
1182 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1183 s_min,top,mdatoms,fr,vsite,constr,
1187 /* Take a trial step to this new point - new coords in s_b */
1188 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1189 constr,top,nrnb,wcycle,-1);
1192 /* Calculate energy for the trial step */
1193 evaluate_energy(fplog,bVerbose,cr,
1194 state_global,top_global,s_b,top,
1195 inputrec,nrnb,wcycle,gstat,
1196 vsite,constr,fcd,graph,mdatoms,fr,
1197 mu_tot,enerd,vir,pres,-1,FALSE);
1199 /* p does not change within a step, but since the domain decomposition
1200 * might change, we have to use cg_p of s_b here.
1205 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1206 for(m=0; m<DIM; m++)
1207 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1209 /* Sum the gradient along the line across CPUs */
1211 gmx_sumd(1,&gpb,cr);
1214 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1215 s_a->epot,s_b->epot,s_c->epot,gpb);
1217 epot_repl = s_b->epot;
1219 /* Keep one of the intervals based on the value of the derivative at the new point */
1221 /* Replace c endpoint with b */
1222 swap_em_state(s_b,s_c);
1226 /* Replace a endpoint with b */
1227 swap_em_state(s_b,s_a);
1233 * Stop search as soon as we find a value smaller than the endpoints.
1234 * Never run more than 20 steps, no matter what.
1237 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1240 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1242 /* OK. We couldn't find a significantly lower energy.
1243 * If beta==0 this was steepest descent, and then we give up.
1244 * If not, set beta=0 and restart with steepest descent before quitting.
1251 /* Reset memory before giving up */
1257 /* Select min energy state of A & C, put the best in B.
1259 if (s_c->epot < s_a->epot) {
1261 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1262 s_c->epot,s_a->epot);
1263 swap_em_state(s_b,s_c);
1268 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1269 s_a->epot,s_c->epot);
1270 swap_em_state(s_b,s_a);
1277 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1279 swap_em_state(s_b,s_c);
1284 /* new search direction */
1285 /* beta = 0 means forget all memory and restart with steepest descents. */
1286 if (nstcg && ((step % nstcg)==0))
1289 /* s_min->fnorm cannot be zero, because then we would have converged
1293 /* Polak-Ribiere update.
1294 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1296 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1298 /* Limit beta to prevent oscillations */
1299 if (fabs(beta) > 5.0)
1303 /* update positions */
1304 swap_em_state(s_min,s_b);
1307 /* Print it if necessary */
1310 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1311 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1312 s_min->fmax,s_min->a_fmax+1);
1313 /* Store the new (lower) energies */
1314 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1315 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1316 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1318 do_log = do_per_step(step,inputrec->nstlog);
1319 do_ene = do_per_step(step,inputrec->nstenergy);
1321 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1322 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1323 do_log ? fplog : NULL,step,step,eprNORMAL,
1324 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1327 /* Stop when the maximum force lies below tolerance.
1328 * If we have reached machine precision, converged is already set to true.
1330 converged = converged || (s_min->fmax < inputrec->em_tol);
1332 } /* End of the loop */
1335 step--; /* we never took that last step in this case */
1337 if (s_min->fmax > inputrec->em_tol)
1341 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1342 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1348 /* If we printed energy and/or logfile last step (which was the last step)
1349 * we don't have to do it again, but otherwise print the final values.
1352 /* Write final value to log since we didn't do anything the last step */
1353 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1355 if (!do_ene || !do_log) {
1356 /* Write final energy file entries */
1357 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1358 !do_log ? fplog : NULL,step,step,eprNORMAL,
1359 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1363 /* Print some stuff... */
1365 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1368 * For accurate normal mode calculation it is imperative that we
1369 * store the last conformation into the full precision binary trajectory.
1371 * However, we should only do it if we did NOT already write this step
1372 * above (which we did if do_x or do_f was true).
1374 do_x = !do_per_step(step,inputrec->nstxout);
1375 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1377 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1378 top_global,inputrec,step,
1379 s_min,state_global,f_global);
1381 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1384 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1385 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1386 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1387 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1389 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1392 finish_em(fplog,cr,outf,runtime,wcycle);
1394 /* To print the actual number of steps we needed somewhere */
1395 runtime->nsteps_done = step;
1398 } /* That's all folks */
1401 double do_lbfgs(FILE *fplog,t_commrec *cr,
1402 int nfile,const t_filenm fnm[],
1403 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1405 gmx_vsite_t *vsite,gmx_constr_t constr,
1407 t_inputrec *inputrec,
1408 gmx_mtop_t *top_global,t_fcdata *fcd,
1411 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1414 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1415 gmx_membed_t membed,
1416 real cpt_period,real max_hours,
1417 const char *deviceOptions,
1418 unsigned long Flags,
1419 gmx_runtime_t *runtime)
1421 static const char *LBFGS="Low-Memory BFGS Minimizer";
1423 gmx_localtop_t *top;
1424 gmx_enerdata_t *enerd;
1426 gmx_global_stat_t gstat;
1429 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1430 double stepsize,gpa,gpb,gpc,tmp,minstep;
1431 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1432 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1433 real a,b,c,maxdelta,delta;
1434 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1435 real dgdx,dgdg,sq,yr,beta;
1437 gmx_bool converged,first;
1440 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1442 int start,end,number_steps;
1444 int i,k,m,n,nfmax,gf,step;
1450 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1452 n = 3*state->natoms;
1453 nmaxcorr = inputrec->nbfgscorr;
1455 /* Allocate memory */
1456 /* Use pointers to real so we dont have to loop over both atoms and
1457 * dimensions all the time...
1458 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1459 * that point to the same memory.
1473 snew(alpha,nmaxcorr);
1476 for(i=0;i<nmaxcorr;i++)
1480 for(i=0;i<nmaxcorr;i++)
1487 init_em(fplog,LBFGS,cr,inputrec,
1488 state,top_global,&ems,&top,&f,&f_global,
1489 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1490 nfile,fnm,&outf,&mdebin);
1491 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1492 * so we free some memory again.
1497 xx = (real *)state->x;
1500 start = mdatoms->start;
1501 end = mdatoms->homenr + start;
1503 /* Print to log file */
1504 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1506 do_log = do_ene = do_x = do_f = TRUE;
1508 /* Max number of steps */
1509 number_steps=inputrec->nsteps;
1511 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1513 for(i=start; i<end; i++) {
1514 if (mdatoms->cFREEZE)
1515 gf = mdatoms->cFREEZE[i];
1516 for(m=0; m<DIM; m++)
1517 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1520 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1522 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1525 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1526 top->idef.iparams,top->idef.il,
1527 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1529 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1530 /* do_force always puts the charge groups in the box and shifts again
1531 * We do not unshift, so molecules are always whole
1536 evaluate_energy(fplog,bVerbose,cr,
1537 state,top_global,&ems,top,
1538 inputrec,nrnb,wcycle,gstat,
1539 vsite,constr,fcd,graph,mdatoms,fr,
1540 mu_tot,enerd,vir,pres,-1,TRUE);
1544 /* Copy stuff to the energy bin for easy printing etc. */
1545 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1546 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1547 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1549 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1550 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1551 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1555 /* This is the starting energy */
1556 Epot = enerd->term[F_EPOT];
1562 /* Set the initial step.
1563 * since it will be multiplied by the non-normalized search direction
1564 * vector (force vector the first time), we scale it by the
1565 * norm of the force.
1569 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1570 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1571 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1572 fprintf(stderr,"\n");
1573 /* and copy to the log file too... */
1574 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1575 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1576 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1577 fprintf(fplog,"\n");
1583 dx[point][i] = ff[i]; /* Initial search direction */
1587 stepsize = 1.0/fnorm;
1590 /* Start the loop over BFGS steps.
1591 * Each successful step is counted, and we continue until
1592 * we either converge or reach the max number of steps.
1597 /* Set the gradient from the force */
1599 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1601 /* Write coordinates if necessary */
1602 do_x = do_per_step(step,inputrec->nstxout);
1603 do_f = do_per_step(step,inputrec->nstfout);
1608 mdof_flags |= MDOF_X;
1613 mdof_flags |= MDOF_F;
1616 write_traj(fplog,cr,outf,mdof_flags,
1617 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1619 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1621 /* pointer to current direction - point=0 first time here */
1624 /* calculate line gradient */
1625 for(gpa=0,i=0;i<n;i++)
1628 /* Calculate minimum allowed stepsize, before the average (norm)
1629 * relative change in coordinate is smaller than precision
1631 for(minstep=0,i=0;i<n;i++) {
1638 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1640 if(stepsize<minstep) {
1645 /* Store old forces and coordinates */
1657 /* Take a step downhill.
1658 * In theory, we should minimize the function along this direction.
1659 * That is quite possible, but it turns out to take 5-10 function evaluations
1660 * for each line. However, we dont really need to find the exact minimum -
1661 * it is much better to start a new BFGS step in a modified direction as soon
1662 * as we are close to it. This will save a lot of energy evaluations.
1664 * In practice, we just try to take a single step.
1665 * If it worked (i.e. lowered the energy), we increase the stepsize but
1666 * the continue straight to the next BFGS step without trying to find any minimum.
1667 * If it didn't work (higher energy), there must be a minimum somewhere between
1668 * the old position and the new one.
1670 * Due to the finite numerical accuracy, it turns out that it is a good idea
1671 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1672 * This leads to lower final energies in the tests I've done. / Erik
1677 c = a + stepsize; /* reference position along line is zero */
1679 /* Check stepsize first. We do not allow displacements
1680 * larger than emstep.
1690 if(maxdelta>inputrec->em_stepsize)
1692 } while(maxdelta>inputrec->em_stepsize);
1694 /* Take a trial step */
1696 xc[i] = lastx[i] + c*s[i];
1699 /* Calculate energy for the trial step */
1700 ems.s.x = (rvec *)xc;
1702 evaluate_energy(fplog,bVerbose,cr,
1703 state,top_global,&ems,top,
1704 inputrec,nrnb,wcycle,gstat,
1705 vsite,constr,fcd,graph,mdatoms,fr,
1706 mu_tot,enerd,vir,pres,step,FALSE);
1709 /* Calc derivative along line */
1710 for(gpc=0,i=0; i<n; i++) {
1711 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1713 /* Sum the gradient along the line across CPUs */
1715 gmx_sumd(1,&gpc,cr);
1717 /* This is the max amount of increase in energy we tolerate */
1718 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1720 /* Accept the step if the energy is lower, or if it is not significantly higher
1721 * and the line derivative is still negative.
1723 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1725 /* Great, we found a better energy. Increase step for next iteration
1726 * if we are still going down, decrease it otherwise
1729 stepsize *= 1.618034; /* The golden section */
1731 stepsize *= 0.618034; /* 1/golden section */
1733 /* New energy is the same or higher. We will have to do some work
1734 * to find a smaller value in the interval. Take smaller step next time!
1737 stepsize *= 0.618034;
1740 /* OK, if we didn't find a lower value we will have to locate one now - there must
1741 * be one in the interval [a=0,c].
1742 * The same thing is valid here, though: Don't spend dozens of iterations to find
1743 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1744 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1746 * I also have a safeguard for potentially really patological functions so we never
1747 * take more than 20 steps before we give up ...
1749 * If we already found a lower value we just skip this step and continue to the update.
1756 /* Select a new trial point.
1757 * If the derivatives at points a & c have different sign we interpolate to zero,
1758 * otherwise just do a bisection.
1762 b = a + gpa*(a-c)/(gpc-gpa);
1766 /* safeguard if interpolation close to machine accuracy causes errors:
1767 * never go outside the interval
1772 /* Take a trial step */
1774 xb[i] = lastx[i] + b*s[i];
1777 /* Calculate energy for the trial step */
1778 ems.s.x = (rvec *)xb;
1780 evaluate_energy(fplog,bVerbose,cr,
1781 state,top_global,&ems,top,
1782 inputrec,nrnb,wcycle,gstat,
1783 vsite,constr,fcd,graph,mdatoms,fr,
1784 mu_tot,enerd,vir,pres,step,FALSE);
1789 for(gpb=0,i=0; i<n; i++)
1790 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1792 /* Sum the gradient along the line across CPUs */
1794 gmx_sumd(1,&gpb,cr);
1796 /* Keep one of the intervals based on the value of the derivative at the new point */
1798 /* Replace c endpoint with b */
1802 /* swap coord pointers b/c */
1810 /* Replace a endpoint with b */
1814 /* swap coord pointers a/b */
1824 * Stop search as soon as we find a value smaller than the endpoints,
1825 * or if the tolerance is below machine precision.
1826 * Never run more than 20 steps, no matter what.
1829 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1831 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1832 /* OK. We couldn't find a significantly lower energy.
1833 * If ncorr==0 this was steepest descent, and then we give up.
1834 * If not, reset memory to restart as steepest descent before quitting.
1843 /* Search in gradient direction */
1846 /* Reset stepsize */
1847 stepsize = 1.0/fnorm;
1852 /* Select min energy state of A & C, put the best in xx/ff/Epot
1883 /* Update the memory information, and calculate a new
1884 * approximation of the inverse hessian
1887 /* Have new data in Epot, xx, ff */
1892 dg[point][i]=lastf[i]-ff[i];
1893 dx[point][i]*=stepsize;
1899 dgdg+=dg[point][i]*dg[point][i];
1900 dgdx+=dg[point][i]*dx[point][i];
1905 rho[point]=1.0/dgdx;
1917 /* Recursive update. First go back over the memory points */
1918 for(k=0;k<ncorr;k++) {
1927 alpha[cp]=rho[cp]*sq;
1930 p[i] -= alpha[cp]*dg[cp][i];
1936 /* And then go forward again */
1937 for(k=0;k<ncorr;k++) {
1940 yr += p[i]*dg[cp][i];
1943 beta = alpha[cp]-beta;
1946 p[i] += beta*dx[cp][i];
1955 dx[point][i] = p[i];
1961 /* Test whether the convergence criterion is met */
1962 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1964 /* Print it if necessary */
1967 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1968 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1969 /* Store the new (lower) energies */
1970 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1971 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1972 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1973 do_log = do_per_step(step,inputrec->nstlog);
1974 do_ene = do_per_step(step,inputrec->nstenergy);
1976 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1977 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1978 do_log ? fplog : NULL,step,step,eprNORMAL,
1979 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1982 /* Stop when the maximum force lies below tolerance.
1983 * If we have reached machine precision, converged is already set to true.
1986 converged = converged || (fmax < inputrec->em_tol);
1988 } /* End of the loop */
1991 step--; /* we never took that last step in this case */
1993 if(fmax>inputrec->em_tol)
1997 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1998 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
2003 /* If we printed energy and/or logfile last step (which was the last step)
2004 * we don't have to do it again, but otherwise print the final values.
2006 if(!do_log) /* Write final value to log since we didn't do anythin last step */
2007 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2008 if(!do_ene || !do_log) /* Write final energy file entries */
2009 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2010 !do_log ? fplog : NULL,step,step,eprNORMAL,
2011 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2013 /* Print some stuff... */
2015 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2018 * For accurate normal mode calculation it is imperative that we
2019 * store the last conformation into the full precision binary trajectory.
2021 * However, we should only do it if we did NOT already write this step
2022 * above (which we did if do_x or do_f was true).
2024 do_x = !do_per_step(step,inputrec->nstxout);
2025 do_f = !do_per_step(step,inputrec->nstfout);
2026 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2027 top_global,inputrec,step,
2031 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2032 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2033 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2034 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2036 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2039 finish_em(fplog,cr,outf,runtime,wcycle);
2041 /* To print the actual number of steps we needed somewhere */
2042 runtime->nsteps_done = step;
2045 } /* That's all folks */
2048 double do_steep(FILE *fplog,t_commrec *cr,
2049 int nfile, const t_filenm fnm[],
2050 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2052 gmx_vsite_t *vsite,gmx_constr_t constr,
2054 t_inputrec *inputrec,
2055 gmx_mtop_t *top_global,t_fcdata *fcd,
2056 t_state *state_global,
2058 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2061 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2062 gmx_membed_t membed,
2063 real cpt_period,real max_hours,
2064 const char *deviceOptions,
2065 unsigned long Flags,
2066 gmx_runtime_t *runtime)
2068 const char *SD="Steepest Descents";
2069 em_state_t *s_min,*s_try;
2071 gmx_localtop_t *top;
2072 gmx_enerdata_t *enerd;
2074 gmx_global_stat_t gstat;
2076 real stepsize,constepsize;
2077 real ustep,dvdlambda,fnormn;
2080 gmx_bool bDone,bAbort,do_x,do_f;
2085 int steps_accepted=0;
2089 s_min = init_em_state();
2090 s_try = init_em_state();
2092 /* Init em and store the local state in s_try */
2093 init_em(fplog,SD,cr,inputrec,
2094 state_global,top_global,s_try,&top,&f,&f_global,
2095 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2096 nfile,fnm,&outf,&mdebin);
2098 /* Print to log file */
2099 print_em_start(fplog,cr,runtime,wcycle,SD);
2101 /* Set variables for stepsize (in nm). This is the largest
2102 * step that we are going to make in any direction.
2104 ustep = inputrec->em_stepsize;
2107 /* Max number of steps */
2108 nsteps = inputrec->nsteps;
2111 /* Print to the screen */
2112 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2114 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2116 /**** HERE STARTS THE LOOP ****
2117 * count is the counter for the number of steps
2118 * bDone will be TRUE when the minimization has converged
2119 * bAbort will be TRUE when nsteps steps have been performed or when
2120 * the stepsize becomes smaller than is reasonable for machine precision
2125 while( !bDone && !bAbort ) {
2126 bAbort = (nsteps >= 0) && (count == nsteps);
2128 /* set new coordinates, except for first step */
2130 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2131 s_min,stepsize,s_min->f,s_try,
2132 constr,top,nrnb,wcycle,count);
2135 evaluate_energy(fplog,bVerbose,cr,
2136 state_global,top_global,s_try,top,
2137 inputrec,nrnb,wcycle,gstat,
2138 vsite,constr,fcd,graph,mdatoms,fr,
2139 mu_tot,enerd,vir,pres,count,count==0);
2142 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2145 s_min->epot = s_try->epot + 1;
2147 /* Print it if necessary */
2150 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2151 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2152 (s_try->epot < s_min->epot) ? '\n' : '\r');
2155 if (s_try->epot < s_min->epot) {
2156 /* Store the new (lower) energies */
2157 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2158 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2159 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2160 print_ebin(outf->fp_ene,TRUE,
2161 do_per_step(steps_accepted,inputrec->nstdisreout),
2162 do_per_step(steps_accepted,inputrec->nstorireout),
2163 fplog,count,count,eprNORMAL,TRUE,
2164 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2169 /* Now if the new energy is smaller than the previous...
2170 * or if this is the first step!
2171 * or if we did random steps!
2174 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2177 /* Test whether the convergence criterion is met... */
2178 bDone = (s_try->fmax < inputrec->em_tol);
2180 /* Copy the arrays for force, positions and energy */
2181 /* The 'Min' array always holds the coords and forces of the minimal
2183 swap_em_state(s_min,s_try);
2187 /* Write to trn, if necessary */
2188 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2189 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2190 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2191 top_global,inputrec,count,
2192 s_min,state_global,f_global);
2195 /* If energy is not smaller make the step smaller... */
2198 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2199 /* Reload the old state */
2200 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2201 s_min,top,mdatoms,fr,vsite,constr,
2206 /* Determine new step */
2207 stepsize = ustep/s_min->fmax;
2209 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2211 if (count == nsteps || ustep < 1e-12)
2213 if (count == nsteps || ustep < 1e-6)
2218 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2219 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2225 } /* End of the loop */
2227 /* Print some shit... */
2229 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2230 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2231 top_global,inputrec,count,
2232 s_min,state_global,f_global);
2234 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2237 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2238 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2239 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2240 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2243 finish_em(fplog,cr,outf,runtime,wcycle);
2245 /* To print the actual number of steps we needed somewhere */
2246 inputrec->nsteps=count;
2248 runtime->nsteps_done = count;
2251 } /* That's all folks */
2254 double do_nm(FILE *fplog,t_commrec *cr,
2255 int nfile,const t_filenm fnm[],
2256 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2258 gmx_vsite_t *vsite,gmx_constr_t constr,
2260 t_inputrec *inputrec,
2261 gmx_mtop_t *top_global,t_fcdata *fcd,
2262 t_state *state_global,
2264 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2267 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2268 gmx_membed_t membed,
2269 real cpt_period,real max_hours,
2270 const char *deviceOptions,
2271 unsigned long Flags,
2272 gmx_runtime_t *runtime)
2274 const char *NM = "Normal Mode Analysis";
2279 gmx_localtop_t *top;
2280 gmx_enerdata_t *enerd;
2282 gmx_global_stat_t gstat;
2284 real t,t0,lambda,lam0;
2289 gmx_bool bSparse; /* use sparse matrix storage format */
2291 gmx_sparsematrix_t * sparse_matrix = NULL;
2292 real * full_matrix = NULL;
2293 em_state_t * state_work;
2295 /* added with respect to mdrun */
2297 real der_range=10.0*sqrt(GMX_REAL_EPS);
2303 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2306 state_work = init_em_state();
2308 /* Init em and store the local state in state_minimum */
2309 init_em(fplog,NM,cr,inputrec,
2310 state_global,top_global,state_work,&top,
2312 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2313 nfile,fnm,&outf,NULL);
2315 natoms = top_global->natoms;
2323 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2324 " which MIGHT not be accurate enough for normal mode analysis.\n"
2325 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2326 " are fairly modest even if you recompile in double precision.\n\n");
2330 /* Check if we can/should use sparse storage format.
2332 * Sparse format is only useful when the Hessian itself is sparse, which it
2333 * will be when we use a cutoff.
2334 * For small systems (n<1000) it is easier to always use full matrix format, though.
2336 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2338 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2341 else if(top_global->natoms < 1000)
2343 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2348 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2352 sz = DIM*top_global->natoms;
2354 fprintf(stderr,"Allocating Hessian memory...\n\n");
2358 sparse_matrix=gmx_sparsematrix_init(sz);
2359 sparse_matrix->compressed_symmetric = TRUE;
2363 snew(full_matrix,sz*sz);
2366 /* Initial values */
2367 t0 = inputrec->init_t;
2368 lam0 = inputrec->fepvals->init_lambda;
2376 /* Write start time and temperature */
2377 print_em_start(fplog,cr,runtime,wcycle,NM);
2379 /* fudge nr of steps to nr of atoms */
2380 inputrec->nsteps = natoms*2;
2384 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2385 *(top_global->name),(int)inputrec->nsteps);
2388 nnodes = cr->nnodes;
2390 /* Make evaluate_energy do a single node force calculation */
2392 evaluate_energy(fplog,bVerbose,cr,
2393 state_global,top_global,state_work,top,
2394 inputrec,nrnb,wcycle,gstat,
2395 vsite,constr,fcd,graph,mdatoms,fr,
2396 mu_tot,enerd,vir,pres,-1,TRUE);
2397 cr->nnodes = nnodes;
2399 /* if forces are not small, warn user */
2400 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2404 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2405 if (state_work->fmax > 1.0e-3)
2407 fprintf(stderr,"Maximum force probably not small enough to");
2408 fprintf(stderr," ensure that you are in an \nenergy well. ");
2409 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2410 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2414 /***********************************************************
2416 * Loop over all pairs in matrix
2418 * do_force called twice. Once with positive and
2419 * once with negative displacement
2421 ************************************************************/
2423 /* Steps are divided one by one over the nodes */
2424 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2427 for (d=0; d<DIM; d++)
2429 x_min = state_work->s.x[atom][d];
2431 state_work->s.x[atom][d] = x_min - der_range;
2433 /* Make evaluate_energy do a single node force calculation */
2435 evaluate_energy(fplog,bVerbose,cr,
2436 state_global,top_global,state_work,top,
2437 inputrec,nrnb,wcycle,gstat,
2438 vsite,constr,fcd,graph,mdatoms,fr,
2439 mu_tot,enerd,vir,pres,atom*2,FALSE);
2441 for(i=0; i<natoms; i++)
2443 copy_rvec(state_work->f[i], fneg[i]);
2446 state_work->s.x[atom][d] = x_min + der_range;
2448 evaluate_energy(fplog,bVerbose,cr,
2449 state_global,top_global,state_work,top,
2450 inputrec,nrnb,wcycle,gstat,
2451 vsite,constr,fcd,graph,mdatoms,fr,
2452 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2453 cr->nnodes = nnodes;
2455 /* x is restored to original */
2456 state_work->s.x[atom][d] = x_min;
2458 for(j=0; j<natoms; j++)
2460 for (k=0; (k<DIM); k++)
2463 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2471 #define mpi_type MPI_DOUBLE
2473 #define mpi_type MPI_FLOAT
2475 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2476 cr->mpi_comm_mygroup);
2481 for(node=0; (node<nnodes && atom+node<natoms); node++)
2487 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2488 cr->mpi_comm_mygroup,&stat);
2493 row = (atom + node)*DIM + d;
2495 for(j=0; j<natoms; j++)
2497 for(k=0; k<DIM; k++)
2503 if (col >= row && dfdx[j][k] != 0.0)
2505 gmx_sparsematrix_increment_value(sparse_matrix,
2506 row,col,dfdx[j][k]);
2511 full_matrix[row*sz+col] = dfdx[j][k];
2518 if (bVerbose && fplog)
2523 /* write progress */
2524 if (MASTER(cr) && bVerbose)
2526 fprintf(stderr,"\rFinished step %d out of %d",
2527 min(atom+nnodes,natoms),natoms);
2534 fprintf(stderr,"\n\nWriting Hessian...\n");
2535 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2538 finish_em(fplog,cr,outf,runtime,wcycle);
2540 runtime->nsteps_done = natoms*2;