1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
67 #include "md_support.h"
73 #include "gmx_wallcycle.h"
74 #include "mtop_util.h"
78 #include "gmx_omp_nthreads.h"
81 #include "gromacs/linearalgebra/mtxio.h"
82 #include "gromacs/linearalgebra/sparsematrix.h"
93 static em_state_t *init_em_state()
99 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
100 snew(ems->s.lambda,efptNR);
105 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
106 gmx_wallcycle_t wcycle,
111 runtime_start(runtime);
113 sprintf(buf,"Started %s",name);
114 print_date_and_time(fplog,cr->nodeid,buf,NULL);
116 wallcycle_start(wcycle,ewcRUN);
118 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
119 gmx_wallcycle_t wcycle)
121 wallcycle_stop(wcycle,ewcRUN);
123 runtime_end(runtime);
126 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
129 fprintf(out,"%s:\n",minimizer);
130 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
131 fprintf(out," Number of steps = %12d\n",nsteps);
134 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
140 "\nEnergy minimization reached the maximum number"
141 "of steps before the forces reached the requested"
142 "precision Fmax < %g.\n",ftol);
147 "\nEnergy minimization has stopped, but the forces have"
148 "not converged to the requested precision Fmax < %g (which"
149 "may not be possible for your system). It stopped"
150 "because the algorithm tried to make a new step whose size"
151 "was too small, or there was no change in the energy since"
152 "last step. Either way, we regard the minimization as"
153 "converged to within the available machine precision,"
154 "given your starting configuration and EM parameters.\n%s%s",
156 sizeof(real)<sizeof(double) ?
157 "\nDouble precision normally gives you higher accuracy, but"
158 "this is often not needed for preparing to run molecular"
162 "You might need to increase your constraint accuracy, or turn\n"
163 "off constraints altogether (set constraints = none in mdp file)\n" :
166 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
171 static void print_converged(FILE *fp,const char *alg,real ftol,
172 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
173 real epot,real fmax, int nfmax, real fnorm)
175 char buf[STEPSTRSIZE];
178 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
179 alg,ftol,gmx_step_str(count,buf));
180 else if(count<nsteps)
181 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
182 "but did not reach the requested Fmax < %g.\n",
183 alg,gmx_step_str(count,buf),ftol);
185 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
186 alg,ftol,gmx_step_str(count,buf));
189 fprintf(fp,"Potential Energy = %21.14e\n",epot);
190 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
191 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
193 fprintf(fp,"Potential Energy = %14.7e\n",epot);
194 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
195 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
199 static void get_f_norm_max(t_commrec *cr,
200 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
201 real *fnorm,real *fmax,int *a_fmax)
204 real fmax2,fmax2_0,fam;
205 int la_max,a_max,start,end,i,m,gf;
207 /* This routine finds the largest force and returns it.
208 * On parallel machines the global max is taken.
214 start = mdatoms->start;
215 end = mdatoms->homenr + start;
216 if (mdatoms->cFREEZE) {
217 for(i=start; i<end; i++) {
218 gf = mdatoms->cFREEZE[i];
221 if (!opts->nFreeze[gf][m])
230 for(i=start; i<end; i++) {
240 if (la_max >= 0 && DOMAINDECOMP(cr)) {
241 a_max = cr->dd->gatindex[la_max];
246 snew(sum,2*cr->nnodes+1);
247 sum[2*cr->nodeid] = fmax2;
248 sum[2*cr->nodeid+1] = a_max;
249 sum[2*cr->nnodes] = fnorm2;
250 gmx_sumd(2*cr->nnodes+1,sum,cr);
251 fnorm2 = sum[2*cr->nnodes];
252 /* Determine the global maximum */
253 for(i=0; i<cr->nnodes; i++) {
254 if (sum[2*i] > fmax2) {
256 a_max = (int)(sum[2*i+1] + 0.5);
263 *fnorm = sqrt(fnorm2);
270 static void get_state_f_norm_max(t_commrec *cr,
271 t_grpopts *opts,t_mdatoms *mdatoms,
274 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
277 void init_em(FILE *fplog,const char *title,
278 t_commrec *cr,t_inputrec *ir,
279 t_state *state_global,gmx_mtop_t *top_global,
280 em_state_t *ems,gmx_localtop_t **top,
281 rvec **f,rvec **f_global,
282 t_nrnb *nrnb,rvec mu_tot,
283 t_forcerec *fr,gmx_enerdata_t **enerd,
284 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
285 gmx_vsite_t *vsite,gmx_constr_t constr,
286 int nfile,const t_filenm fnm[],
287 gmx_mdoutf_t **outf,t_mdebin **mdebin)
294 fprintf(fplog,"Initiating %s\n",title);
297 state_global->ngtc = 0;
299 /* Initialize lambda variables */
300 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
304 if (DOMAINDECOMP(cr))
306 *top = dd_init_local_top(top_global);
308 dd_init_local_state(cr->dd,state_global,&ems->s);
312 /* Distribute the charge groups over the nodes from the master node */
313 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
314 state_global,top_global,ir,
315 &ems->s,&ems->f,mdatoms,*top,
316 fr,vsite,NULL,constr,
318 dd_store_state(cr->dd,&ems->s);
322 snew(*f_global,top_global->natoms);
332 snew(*f,top_global->natoms);
334 /* Just copy the state */
335 ems->s = *state_global;
336 snew(ems->s.x,ems->s.nalloc);
337 snew(ems->f,ems->s.nalloc);
338 for(i=0; i<state_global->natoms; i++)
340 copy_rvec(state_global->x[i],ems->s.x[i]);
342 copy_mat(state_global->box,ems->s.box);
344 if (PAR(cr) && ir->eI != eiNM)
346 /* Initialize the particle decomposition and split the topology */
347 *top = split_system(fplog,top_global,ir,cr);
349 pd_cg_range(cr,&fr->cg0,&fr->hcg);
353 *top = gmx_mtop_generate_local_top(top_global,ir);
357 forcerec_set_excl_load(fr,*top,cr);
359 init_bonded_thread_force_reduction(fr,&(*top)->idef);
361 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
363 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
372 pd_at_range(cr,&start,&homenr);
378 homenr = top_global->natoms;
380 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
381 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
385 set_vsite_top(vsite,*top,mdatoms,cr);
391 if (ir->eConstrAlg == econtSHAKE &&
392 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
394 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
395 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
398 if (!DOMAINDECOMP(cr))
400 set_constraints(constr,*top,ir,mdatoms,cr);
403 if (!ir->bContinuation)
405 /* Constrain the starting coordinates */
407 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
408 ir,NULL,cr,-1,0,mdatoms,
409 ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
410 ems->s.lambda[efptFEP],&dvdlambda,
411 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
417 *gstat = global_stat_init(ir);
420 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
423 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
428 /* Init bin for energy stuff */
429 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
433 calc_shifts(ems->s.box,fr->shift_vec);
436 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
437 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
439 if (!(cr->duty & DUTY_PME)) {
440 /* Tell the PME only node to finish */
441 gmx_pme_send_finish(cr);
446 em_time_end(fplog,cr,runtime,wcycle);
449 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
458 static void copy_em_coords(em_state_t *ems,t_state *state)
462 for(i=0; (i<state->natoms); i++)
464 copy_rvec(ems->s.x[i],state->x[i]);
468 static void write_em_traj(FILE *fplog,t_commrec *cr,
470 gmx_bool bX,gmx_bool bF,const char *confout,
471 gmx_mtop_t *top_global,
472 t_inputrec *ir,gmx_large_int_t step,
474 t_state *state_global,rvec *f_global)
478 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
480 copy_em_coords(state,state_global);
485 if (bX) { mdof_flags |= MDOF_X; }
486 if (bF) { mdof_flags |= MDOF_F; }
487 write_traj(fplog,cr,outf,mdof_flags,
488 top_global,step,(double)step,
489 &state->s,state_global,state->f,f_global,NULL,NULL);
491 if (confout != NULL && MASTER(cr))
493 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
495 /* Make molecules whole only for confout writing */
496 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
500 write_sto_conf_mtop(confout,
501 *top_global->name,top_global,
502 state_global->x,NULL,ir->ePBC,state_global->box);
506 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
508 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
509 gmx_constr_t constr,gmx_localtop_t *top,
510 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
511 gmx_large_int_t count)
523 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
525 gmx_incons("state mismatch in do_em_step");
528 s2->flags = s1->flags;
530 if (s2->nalloc != s1->nalloc)
532 s2->nalloc = s1->nalloc;
533 srenew(s2->x,s1->nalloc);
534 srenew(ems2->f, s1->nalloc);
535 if (s2->flags & (1<<estCGP))
537 srenew(s2->cg_p, s1->nalloc);
541 s2->natoms = s1->natoms;
542 copy_mat(s1->box,s2->box);
543 /* Copy free energy state */
544 for (i=0;i<efptNR;i++)
546 s2->lambda[i] = s1->lambda[i];
548 copy_mat(s1->box,s2->box);
551 end = md->start + md->homenr;
556 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
561 #pragma omp for schedule(static) nowait
562 for(i=start; i<end; i++)
570 if (ir->opts.nFreeze[gf][m])
576 x2[i][m] = x1[i][m] + a*f[i][m];
581 if (s2->flags & (1<<estCGP))
583 /* Copy the CG p vector */
586 #pragma omp for schedule(static) nowait
587 for(i=start; i<end; i++)
589 copy_rvec(x1[i],x2[i]);
593 if (DOMAINDECOMP(cr))
595 s2->ddp_count = s1->ddp_count;
596 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
599 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
600 srenew(s2->cg_gl,s2->cg_gl_nalloc);
603 s2->ncg_gl = s1->ncg_gl;
604 #pragma omp for schedule(static) nowait
605 for(i=0; i<s2->ncg_gl; i++)
607 s2->cg_gl[i] = s1->cg_gl[i];
609 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
615 wallcycle_start(wcycle,ewcCONSTR);
617 constrain(NULL,TRUE,TRUE,constr,&top->idef,
618 ir,NULL,cr,count,0,md,
619 s1->x,s2->x,NULL,bMolPBC,s2->box,
620 s2->lambda[efptBONDED],&dvdlambda,
621 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
622 wallcycle_stop(wcycle,ewcCONSTR);
626 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
627 gmx_mtop_t *top_global,t_inputrec *ir,
628 em_state_t *ems,gmx_localtop_t *top,
629 t_mdatoms *mdatoms,t_forcerec *fr,
630 gmx_vsite_t *vsite,gmx_constr_t constr,
631 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
633 /* Repartition the domain decomposition */
634 wallcycle_start(wcycle,ewcDOMDEC);
635 dd_partition_system(fplog,step,cr,FALSE,1,
638 mdatoms,top,fr,vsite,NULL,constr,
640 dd_store_state(cr->dd,&ems->s);
641 wallcycle_stop(wcycle,ewcDOMDEC);
644 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
645 t_state *state_global,gmx_mtop_t *top_global,
646 em_state_t *ems,gmx_localtop_t *top,
647 t_inputrec *inputrec,
648 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
649 gmx_global_stat_t gstat,
650 gmx_vsite_t *vsite,gmx_constr_t constr,
652 t_graph *graph,t_mdatoms *mdatoms,
653 t_forcerec *fr,rvec mu_tot,
654 gmx_enerdata_t *enerd,tensor vir,tensor pres,
655 gmx_large_int_t count,gmx_bool bFirst)
660 tensor force_vir,shake_vir,ekin;
661 real dvdlambda,prescorr,enercorr,dvdlcorr;
664 /* Set the time to the initial time, the time does not change during EM */
665 t = inputrec->init_t;
668 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
669 /* This the first state or an old state used before the last ns */
673 if (inputrec->nstlist > 0) {
675 } else if (inputrec->nstlist == -1) {
676 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
678 gmx_sumi(1,&nabnsb,cr);
684 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
685 top->idef.iparams,top->idef.il,
686 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
688 if (DOMAINDECOMP(cr)) {
690 /* Repartition the domain decomposition */
691 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
692 ems,top,mdatoms,fr,vsite,constr,
697 /* Calc force & energy on new trial position */
698 /* do_force always puts the charge groups in the box and shifts again
699 * We do not unshift, so molecules are always whole in congrad.c
701 do_force(fplog,cr,inputrec,
702 count,nrnb,wcycle,top,top_global,&top_global->groups,
703 ems->s.box,ems->s.x,&ems->s.hist,
704 ems->f,force_vir,mdatoms,enerd,fcd,
705 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
706 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
707 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
708 (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
710 /* Clear the unused shake virial and pressure */
711 clear_mat(shake_vir);
714 /* Communicate stuff when parallel */
715 if (PAR(cr) && inputrec->eI != eiNM)
717 wallcycle_start(wcycle,ewcMoveE);
719 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
720 inputrec,NULL,NULL,NULL,1,&terminate,
721 top_global,&ems->s,FALSE,
727 wallcycle_stop(wcycle,ewcMoveE);
730 /* Calculate long range corrections to pressure and energy */
731 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
732 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
733 enerd->term[F_DISPCORR] = enercorr;
734 enerd->term[F_EPOT] += enercorr;
735 enerd->term[F_PRES] += prescorr;
736 enerd->term[F_DVDL] += dvdlcorr;
738 ems->epot = enerd->term[F_EPOT];
741 /* Project out the constraint components of the force */
742 wallcycle_start(wcycle,ewcCONSTR);
744 constrain(NULL,FALSE,FALSE,constr,&top->idef,
745 inputrec,NULL,cr,count,0,mdatoms,
746 ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
747 ems->s.lambda[efptBONDED],&dvdlambda,
748 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
749 if (fr->bSepDVDL && fplog)
750 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
751 enerd->term[F_DVDL_BONDED] += dvdlambda;
752 m_add(force_vir,shake_vir,vir);
753 wallcycle_stop(wcycle,ewcCONSTR);
755 copy_mat(force_vir,vir);
759 enerd->term[F_PRES] =
760 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
762 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
764 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
766 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
770 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
772 em_state_t *s_min,em_state_t *s_b)
776 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
778 unsigned char *grpnrFREEZE;
781 fprintf(debug,"Doing reorder_partsum\n");
786 cgs_gl = dd_charge_groups_global(cr->dd);
787 index = cgs_gl->index;
789 /* Collect fm in a global vector fmg.
790 * This conflicts with the spirit of domain decomposition,
791 * but to fully optimize this a much more complicated algorithm is required.
793 snew(fmg,mtop->natoms);
795 ncg = s_min->s.ncg_gl;
796 cg_gl = s_min->s.cg_gl;
798 for(c=0; c<ncg; c++) {
802 for(a=a0; a<a1; a++) {
803 copy_rvec(fm[i],fmg[a]);
807 gmx_sum(mtop->natoms*3,fmg[0],cr);
809 /* Now we will determine the part of the sum for the cgs in state s_b */
811 cg_gl = s_b->s.cg_gl;
815 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
816 for(c=0; c<ncg; c++) {
820 for(a=a0; a<a1; a++) {
821 if (mdatoms->cFREEZE && grpnrFREEZE) {
824 for(m=0; m<DIM; m++) {
825 if (!opts->nFreeze[gf][m]) {
826 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
838 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
840 em_state_t *s_min,em_state_t *s_b)
846 /* This is just the classical Polak-Ribiere calculation of beta;
847 * it looks a bit complicated since we take freeze groups into account,
848 * and might have to sum it in parallel runs.
851 if (!DOMAINDECOMP(cr) ||
852 (s_min->s.ddp_count == cr->dd->ddp_count &&
853 s_b->s.ddp_count == cr->dd->ddp_count)) {
858 /* This part of code can be incorrect with DD,
859 * since the atom ordering in s_b and s_min might differ.
861 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
862 if (mdatoms->cFREEZE)
863 gf = mdatoms->cFREEZE[i];
865 if (!opts->nFreeze[gf][m]) {
866 sum += (fb[i][m] - fm[i][m])*fb[i][m];
870 /* We need to reorder cgs while summing */
871 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
876 return sum/sqr(s_min->fnorm);
879 double do_cg(FILE *fplog,t_commrec *cr,
880 int nfile,const t_filenm fnm[],
881 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
883 gmx_vsite_t *vsite,gmx_constr_t constr,
885 t_inputrec *inputrec,
886 gmx_mtop_t *top_global,t_fcdata *fcd,
887 t_state *state_global,
889 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
892 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
894 real cpt_period,real max_hours,
895 const char *deviceOptions,
897 gmx_runtime_t *runtime)
899 const char *CG="Polak-Ribiere Conjugate Gradients";
901 em_state_t *s_min,*s_a,*s_b,*s_c;
903 gmx_enerdata_t *enerd;
905 gmx_global_stat_t gstat;
907 rvec *f_global,*p,*sf,*sfm;
908 double gpa,gpb,gpc,tmp,sum[2],minstep;
915 gmx_bool converged,foundlower;
917 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
919 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
921 int i,m,gf,step,nminstep;
926 s_min = init_em_state();
927 s_a = init_em_state();
928 s_b = init_em_state();
929 s_c = init_em_state();
931 /* Init em and store the local state in s_min */
932 init_em(fplog,CG,cr,inputrec,
933 state_global,top_global,s_min,&top,&f,&f_global,
934 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
935 nfile,fnm,&outf,&mdebin);
937 /* Print to log file */
938 print_em_start(fplog,cr,runtime,wcycle,CG);
940 /* Max number of steps */
941 number_steps=inputrec->nsteps;
944 sp_header(stderr,CG,inputrec->em_tol,number_steps);
946 sp_header(fplog,CG,inputrec->em_tol,number_steps);
948 /* Call the force routine and some auxiliary (neighboursearching etc.) */
949 /* do_force always puts the charge groups in the box and shifts again
950 * We do not unshift, so molecules are always whole in congrad.c
952 evaluate_energy(fplog,bVerbose,cr,
953 state_global,top_global,s_min,top,
954 inputrec,nrnb,wcycle,gstat,
955 vsite,constr,fcd,graph,mdatoms,fr,
956 mu_tot,enerd,vir,pres,-1,TRUE);
960 /* Copy stuff to the energy bin for easy printing etc. */
961 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
962 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
963 NULL,NULL,vir,pres,NULL,mu_tot,constr);
965 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
966 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
967 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
971 /* Estimate/guess the initial stepsize */
972 stepsize = inputrec->em_stepsize/s_min->fnorm;
975 fprintf(stderr," F-max = %12.5e on atom %d\n",
976 s_min->fmax,s_min->a_fmax+1);
977 fprintf(stderr," F-Norm = %12.5e\n",
978 s_min->fnorm/sqrt(state_global->natoms));
979 fprintf(stderr,"\n");
980 /* and copy to the log file too... */
981 fprintf(fplog," F-max = %12.5e on atom %d\n",
982 s_min->fmax,s_min->a_fmax+1);
983 fprintf(fplog," F-Norm = %12.5e\n",
984 s_min->fnorm/sqrt(state_global->natoms));
987 /* Start the loop over CG steps.
988 * Each successful step is counted, and we continue until
989 * we either converge or reach the max number of steps.
992 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
994 /* start taking steps in a new direction
995 * First time we enter the routine, beta=0, and the direction is
996 * simply the negative gradient.
999 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1004 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1005 if (mdatoms->cFREEZE)
1006 gf = mdatoms->cFREEZE[i];
1007 for(m=0; m<DIM; m++) {
1008 if (!inputrec->opts.nFreeze[gf][m]) {
1009 p[i][m] = sf[i][m] + beta*p[i][m];
1010 gpa -= p[i][m]*sf[i][m];
1011 /* f is negative gradient, thus the sign */
1018 /* Sum the gradient along the line across CPUs */
1020 gmx_sumd(1,&gpa,cr);
1022 /* Calculate the norm of the search vector */
1023 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1025 /* Just in case stepsize reaches zero due to numerical precision... */
1027 stepsize = inputrec->em_stepsize/pnorm;
1030 * Double check the value of the derivative in the search direction.
1031 * If it is positive it must be due to the old information in the
1032 * CG formula, so just remove that and start over with beta=0.
1033 * This corresponds to a steepest descent step.
1037 step--; /* Don't count this step since we are restarting */
1038 continue; /* Go back to the beginning of the big for-loop */
1041 /* Calculate minimum allowed stepsize, before the average (norm)
1042 * relative change in coordinate is smaller than precision
1045 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1046 for(m=0; m<DIM; m++) {
1047 tmp = fabs(s_min->s.x[i][m]);
1054 /* Add up from all CPUs */
1056 gmx_sumd(1,&minstep,cr);
1058 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1060 if(stepsize<minstep) {
1065 /* Write coordinates if necessary */
1066 do_x = do_per_step(step,inputrec->nstxout);
1067 do_f = do_per_step(step,inputrec->nstfout);
1069 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1070 top_global,inputrec,step,
1071 s_min,state_global,f_global);
1073 /* Take a step downhill.
1074 * In theory, we should minimize the function along this direction.
1075 * That is quite possible, but it turns out to take 5-10 function evaluations
1076 * for each line. However, we dont really need to find the exact minimum -
1077 * it is much better to start a new CG step in a modified direction as soon
1078 * as we are close to it. This will save a lot of energy evaluations.
1080 * In practice, we just try to take a single step.
1081 * If it worked (i.e. lowered the energy), we increase the stepsize but
1082 * the continue straight to the next CG step without trying to find any minimum.
1083 * If it didn't work (higher energy), there must be a minimum somewhere between
1084 * the old position and the new one.
1086 * Due to the finite numerical accuracy, it turns out that it is a good idea
1087 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1088 * This leads to lower final energies in the tests I've done. / Erik
1090 s_a->epot = s_min->epot;
1092 c = a + stepsize; /* reference position along line is zero */
1094 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1095 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1096 s_min,top,mdatoms,fr,vsite,constr,
1100 /* Take a trial step (new coords in s_c) */
1101 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1102 constr,top,nrnb,wcycle,-1);
1105 /* Calculate energy for the trial step */
1106 evaluate_energy(fplog,bVerbose,cr,
1107 state_global,top_global,s_c,top,
1108 inputrec,nrnb,wcycle,gstat,
1109 vsite,constr,fcd,graph,mdatoms,fr,
1110 mu_tot,enerd,vir,pres,-1,FALSE);
1112 /* Calc derivative along line */
1116 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1117 for(m=0; m<DIM; m++)
1118 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1120 /* Sum the gradient along the line across CPUs */
1122 gmx_sumd(1,&gpc,cr);
1124 /* This is the max amount of increase in energy we tolerate */
1125 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1127 /* Accept the step if the energy is lower, or if it is not significantly higher
1128 * and the line derivative is still negative.
1130 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1132 /* Great, we found a better energy. Increase step for next iteration
1133 * if we are still going down, decrease it otherwise
1136 stepsize *= 1.618034; /* The golden section */
1138 stepsize *= 0.618034; /* 1/golden section */
1140 /* New energy is the same or higher. We will have to do some work
1141 * to find a smaller value in the interval. Take smaller step next time!
1144 stepsize *= 0.618034;
1150 /* OK, if we didn't find a lower value we will have to locate one now - there must
1151 * be one in the interval [a=0,c].
1152 * The same thing is valid here, though: Don't spend dozens of iterations to find
1153 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1154 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1156 * I also have a safeguard for potentially really patological functions so we never
1157 * take more than 20 steps before we give up ...
1159 * If we already found a lower value we just skip this step and continue to the update.
1165 /* Select a new trial point.
1166 * If the derivatives at points a & c have different sign we interpolate to zero,
1167 * otherwise just do a bisection.
1170 b = a + gpa*(a-c)/(gpc-gpa);
1174 /* safeguard if interpolation close to machine accuracy causes errors:
1175 * never go outside the interval
1180 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1181 /* Reload the old state */
1182 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1183 s_min,top,mdatoms,fr,vsite,constr,
1187 /* Take a trial step to this new point - new coords in s_b */
1188 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1189 constr,top,nrnb,wcycle,-1);
1192 /* Calculate energy for the trial step */
1193 evaluate_energy(fplog,bVerbose,cr,
1194 state_global,top_global,s_b,top,
1195 inputrec,nrnb,wcycle,gstat,
1196 vsite,constr,fcd,graph,mdatoms,fr,
1197 mu_tot,enerd,vir,pres,-1,FALSE);
1199 /* p does not change within a step, but since the domain decomposition
1200 * might change, we have to use cg_p of s_b here.
1205 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1206 for(m=0; m<DIM; m++)
1207 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1209 /* Sum the gradient along the line across CPUs */
1211 gmx_sumd(1,&gpb,cr);
1214 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1215 s_a->epot,s_b->epot,s_c->epot,gpb);
1217 epot_repl = s_b->epot;
1219 /* Keep one of the intervals based on the value of the derivative at the new point */
1221 /* Replace c endpoint with b */
1222 swap_em_state(s_b,s_c);
1226 /* Replace a endpoint with b */
1227 swap_em_state(s_b,s_a);
1233 * Stop search as soon as we find a value smaller than the endpoints.
1234 * Never run more than 20 steps, no matter what.
1237 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1240 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1242 /* OK. We couldn't find a significantly lower energy.
1243 * If beta==0 this was steepest descent, and then we give up.
1244 * If not, set beta=0 and restart with steepest descent before quitting.
1251 /* Reset memory before giving up */
1257 /* Select min energy state of A & C, put the best in B.
1259 if (s_c->epot < s_a->epot) {
1261 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1262 s_c->epot,s_a->epot);
1263 swap_em_state(s_b,s_c);
1268 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1269 s_a->epot,s_c->epot);
1270 swap_em_state(s_b,s_a);
1277 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1279 swap_em_state(s_b,s_c);
1284 /* new search direction */
1285 /* beta = 0 means forget all memory and restart with steepest descents. */
1286 if (nstcg && ((step % nstcg)==0))
1289 /* s_min->fnorm cannot be zero, because then we would have converged
1293 /* Polak-Ribiere update.
1294 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1296 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1298 /* Limit beta to prevent oscillations */
1299 if (fabs(beta) > 5.0)
1303 /* update positions */
1304 swap_em_state(s_min,s_b);
1307 /* Print it if necessary */
1310 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1311 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1312 s_min->fmax,s_min->a_fmax+1);
1313 /* Store the new (lower) energies */
1314 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1315 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1316 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1318 do_log = do_per_step(step,inputrec->nstlog);
1319 do_ene = do_per_step(step,inputrec->nstenergy);
1321 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1322 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1323 do_log ? fplog : NULL,step,step,eprNORMAL,
1324 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1327 /* Stop when the maximum force lies below tolerance.
1328 * If we have reached machine precision, converged is already set to true.
1330 converged = converged || (s_min->fmax < inputrec->em_tol);
1332 } /* End of the loop */
1335 step--; /* we never took that last step in this case */
1337 if (s_min->fmax > inputrec->em_tol)
1341 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1342 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1348 /* If we printed energy and/or logfile last step (which was the last step)
1349 * we don't have to do it again, but otherwise print the final values.
1352 /* Write final value to log since we didn't do anything the last step */
1353 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1355 if (!do_ene || !do_log) {
1356 /* Write final energy file entries */
1357 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1358 !do_log ? fplog : NULL,step,step,eprNORMAL,
1359 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1363 /* Print some stuff... */
1365 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1368 * For accurate normal mode calculation it is imperative that we
1369 * store the last conformation into the full precision binary trajectory.
1371 * However, we should only do it if we did NOT already write this step
1372 * above (which we did if do_x or do_f was true).
1374 do_x = !do_per_step(step,inputrec->nstxout);
1375 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1377 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1378 top_global,inputrec,step,
1379 s_min,state_global,f_global);
1381 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1384 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1385 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1386 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1387 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1389 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1392 finish_em(fplog,cr,outf,runtime,wcycle);
1394 /* To print the actual number of steps we needed somewhere */
1395 runtime->nsteps_done = step;
1398 } /* That's all folks */
1401 double do_lbfgs(FILE *fplog,t_commrec *cr,
1402 int nfile,const t_filenm fnm[],
1403 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1405 gmx_vsite_t *vsite,gmx_constr_t constr,
1407 t_inputrec *inputrec,
1408 gmx_mtop_t *top_global,t_fcdata *fcd,
1411 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1414 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1415 gmx_membed_t membed,
1416 real cpt_period,real max_hours,
1417 const char *deviceOptions,
1418 unsigned long Flags,
1419 gmx_runtime_t *runtime)
1421 static const char *LBFGS="Low-Memory BFGS Minimizer";
1423 gmx_localtop_t *top;
1424 gmx_enerdata_t *enerd;
1426 gmx_global_stat_t gstat;
1429 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1430 double stepsize,gpa,gpb,gpc,tmp,minstep;
1431 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1432 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1433 real a,b,c,maxdelta,delta;
1434 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1435 real dgdx,dgdg,sq,yr,beta;
1437 gmx_bool converged,first;
1440 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1442 int start,end,number_steps;
1444 int i,k,m,n,nfmax,gf,step;
1450 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1454 gmx_fatal(FARGS,"The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1457 n = 3*state->natoms;
1458 nmaxcorr = inputrec->nbfgscorr;
1460 /* Allocate memory */
1461 /* Use pointers to real so we dont have to loop over both atoms and
1462 * dimensions all the time...
1463 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1464 * that point to the same memory.
1478 snew(alpha,nmaxcorr);
1481 for(i=0;i<nmaxcorr;i++)
1485 for(i=0;i<nmaxcorr;i++)
1492 init_em(fplog,LBFGS,cr,inputrec,
1493 state,top_global,&ems,&top,&f,&f_global,
1494 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1495 nfile,fnm,&outf,&mdebin);
1496 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1497 * so we free some memory again.
1502 xx = (real *)state->x;
1505 start = mdatoms->start;
1506 end = mdatoms->homenr + start;
1508 /* Print to log file */
1509 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1511 do_log = do_ene = do_x = do_f = TRUE;
1513 /* Max number of steps */
1514 number_steps=inputrec->nsteps;
1516 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1518 for(i=start; i<end; i++) {
1519 if (mdatoms->cFREEZE)
1520 gf = mdatoms->cFREEZE[i];
1521 for(m=0; m<DIM; m++)
1522 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1525 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1527 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1530 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1531 top->idef.iparams,top->idef.il,
1532 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1534 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1535 /* do_force always puts the charge groups in the box and shifts again
1536 * We do not unshift, so molecules are always whole
1541 evaluate_energy(fplog,bVerbose,cr,
1542 state,top_global,&ems,top,
1543 inputrec,nrnb,wcycle,gstat,
1544 vsite,constr,fcd,graph,mdatoms,fr,
1545 mu_tot,enerd,vir,pres,-1,TRUE);
1549 /* Copy stuff to the energy bin for easy printing etc. */
1550 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1551 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1552 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1554 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1555 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1556 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1560 /* This is the starting energy */
1561 Epot = enerd->term[F_EPOT];
1567 /* Set the initial step.
1568 * since it will be multiplied by the non-normalized search direction
1569 * vector (force vector the first time), we scale it by the
1570 * norm of the force.
1574 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1575 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1576 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1577 fprintf(stderr,"\n");
1578 /* and copy to the log file too... */
1579 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1580 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1581 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1582 fprintf(fplog,"\n");
1588 dx[point][i] = ff[i]; /* Initial search direction */
1592 stepsize = 1.0/fnorm;
1595 /* Start the loop over BFGS steps.
1596 * Each successful step is counted, and we continue until
1597 * we either converge or reach the max number of steps.
1602 /* Set the gradient from the force */
1604 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1606 /* Write coordinates if necessary */
1607 do_x = do_per_step(step,inputrec->nstxout);
1608 do_f = do_per_step(step,inputrec->nstfout);
1613 mdof_flags |= MDOF_X;
1618 mdof_flags |= MDOF_F;
1621 write_traj(fplog,cr,outf,mdof_flags,
1622 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1624 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1626 /* pointer to current direction - point=0 first time here */
1629 /* calculate line gradient */
1630 for(gpa=0,i=0;i<n;i++)
1633 /* Calculate minimum allowed stepsize, before the average (norm)
1634 * relative change in coordinate is smaller than precision
1636 for(minstep=0,i=0;i<n;i++) {
1643 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1645 if(stepsize<minstep) {
1650 /* Store old forces and coordinates */
1662 /* Take a step downhill.
1663 * In theory, we should minimize the function along this direction.
1664 * That is quite possible, but it turns out to take 5-10 function evaluations
1665 * for each line. However, we dont really need to find the exact minimum -
1666 * it is much better to start a new BFGS step in a modified direction as soon
1667 * as we are close to it. This will save a lot of energy evaluations.
1669 * In practice, we just try to take a single step.
1670 * If it worked (i.e. lowered the energy), we increase the stepsize but
1671 * the continue straight to the next BFGS step without trying to find any minimum.
1672 * If it didn't work (higher energy), there must be a minimum somewhere between
1673 * the old position and the new one.
1675 * Due to the finite numerical accuracy, it turns out that it is a good idea
1676 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1677 * This leads to lower final energies in the tests I've done. / Erik
1682 c = a + stepsize; /* reference position along line is zero */
1684 /* Check stepsize first. We do not allow displacements
1685 * larger than emstep.
1695 if(maxdelta>inputrec->em_stepsize)
1697 } while(maxdelta>inputrec->em_stepsize);
1699 /* Take a trial step */
1701 xc[i] = lastx[i] + c*s[i];
1704 /* Calculate energy for the trial step */
1705 ems.s.x = (rvec *)xc;
1707 evaluate_energy(fplog,bVerbose,cr,
1708 state,top_global,&ems,top,
1709 inputrec,nrnb,wcycle,gstat,
1710 vsite,constr,fcd,graph,mdatoms,fr,
1711 mu_tot,enerd,vir,pres,step,FALSE);
1714 /* Calc derivative along line */
1715 for(gpc=0,i=0; i<n; i++) {
1716 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1718 /* Sum the gradient along the line across CPUs */
1720 gmx_sumd(1,&gpc,cr);
1722 /* This is the max amount of increase in energy we tolerate */
1723 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1725 /* Accept the step if the energy is lower, or if it is not significantly higher
1726 * and the line derivative is still negative.
1728 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1730 /* Great, we found a better energy. Increase step for next iteration
1731 * if we are still going down, decrease it otherwise
1734 stepsize *= 1.618034; /* The golden section */
1736 stepsize *= 0.618034; /* 1/golden section */
1738 /* New energy is the same or higher. We will have to do some work
1739 * to find a smaller value in the interval. Take smaller step next time!
1742 stepsize *= 0.618034;
1745 /* OK, if we didn't find a lower value we will have to locate one now - there must
1746 * be one in the interval [a=0,c].
1747 * The same thing is valid here, though: Don't spend dozens of iterations to find
1748 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1749 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1751 * I also have a safeguard for potentially really patological functions so we never
1752 * take more than 20 steps before we give up ...
1754 * If we already found a lower value we just skip this step and continue to the update.
1761 /* Select a new trial point.
1762 * If the derivatives at points a & c have different sign we interpolate to zero,
1763 * otherwise just do a bisection.
1767 b = a + gpa*(a-c)/(gpc-gpa);
1771 /* safeguard if interpolation close to machine accuracy causes errors:
1772 * never go outside the interval
1777 /* Take a trial step */
1779 xb[i] = lastx[i] + b*s[i];
1782 /* Calculate energy for the trial step */
1783 ems.s.x = (rvec *)xb;
1785 evaluate_energy(fplog,bVerbose,cr,
1786 state,top_global,&ems,top,
1787 inputrec,nrnb,wcycle,gstat,
1788 vsite,constr,fcd,graph,mdatoms,fr,
1789 mu_tot,enerd,vir,pres,step,FALSE);
1794 for(gpb=0,i=0; i<n; i++)
1795 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1797 /* Sum the gradient along the line across CPUs */
1799 gmx_sumd(1,&gpb,cr);
1801 /* Keep one of the intervals based on the value of the derivative at the new point */
1803 /* Replace c endpoint with b */
1807 /* swap coord pointers b/c */
1815 /* Replace a endpoint with b */
1819 /* swap coord pointers a/b */
1829 * Stop search as soon as we find a value smaller than the endpoints,
1830 * or if the tolerance is below machine precision.
1831 * Never run more than 20 steps, no matter what.
1834 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1836 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1837 /* OK. We couldn't find a significantly lower energy.
1838 * If ncorr==0 this was steepest descent, and then we give up.
1839 * If not, reset memory to restart as steepest descent before quitting.
1848 /* Search in gradient direction */
1851 /* Reset stepsize */
1852 stepsize = 1.0/fnorm;
1857 /* Select min energy state of A & C, put the best in xx/ff/Epot
1888 /* Update the memory information, and calculate a new
1889 * approximation of the inverse hessian
1892 /* Have new data in Epot, xx, ff */
1897 dg[point][i]=lastf[i]-ff[i];
1898 dx[point][i]*=stepsize;
1904 dgdg+=dg[point][i]*dg[point][i];
1905 dgdx+=dg[point][i]*dx[point][i];
1910 rho[point]=1.0/dgdx;
1922 /* Recursive update. First go back over the memory points */
1923 for(k=0;k<ncorr;k++) {
1932 alpha[cp]=rho[cp]*sq;
1935 p[i] -= alpha[cp]*dg[cp][i];
1941 /* And then go forward again */
1942 for(k=0;k<ncorr;k++) {
1945 yr += p[i]*dg[cp][i];
1948 beta = alpha[cp]-beta;
1951 p[i] += beta*dx[cp][i];
1960 dx[point][i] = p[i];
1966 /* Test whether the convergence criterion is met */
1967 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1969 /* Print it if necessary */
1972 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1973 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1974 /* Store the new (lower) energies */
1975 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1976 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1977 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1978 do_log = do_per_step(step,inputrec->nstlog);
1979 do_ene = do_per_step(step,inputrec->nstenergy);
1981 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1982 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1983 do_log ? fplog : NULL,step,step,eprNORMAL,
1984 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1987 /* Stop when the maximum force lies below tolerance.
1988 * If we have reached machine precision, converged is already set to true.
1991 converged = converged || (fmax < inputrec->em_tol);
1993 } /* End of the loop */
1996 step--; /* we never took that last step in this case */
1998 if(fmax>inputrec->em_tol)
2002 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
2003 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
2008 /* If we printed energy and/or logfile last step (which was the last step)
2009 * we don't have to do it again, but otherwise print the final values.
2011 if(!do_log) /* Write final value to log since we didn't do anythin last step */
2012 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2013 if(!do_ene || !do_log) /* Write final energy file entries */
2014 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2015 !do_log ? fplog : NULL,step,step,eprNORMAL,
2016 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2018 /* Print some stuff... */
2020 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2023 * For accurate normal mode calculation it is imperative that we
2024 * store the last conformation into the full precision binary trajectory.
2026 * However, we should only do it if we did NOT already write this step
2027 * above (which we did if do_x or do_f was true).
2029 do_x = !do_per_step(step,inputrec->nstxout);
2030 do_f = !do_per_step(step,inputrec->nstfout);
2031 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2032 top_global,inputrec,step,
2036 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2037 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2038 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2039 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2041 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2044 finish_em(fplog,cr,outf,runtime,wcycle);
2046 /* To print the actual number of steps we needed somewhere */
2047 runtime->nsteps_done = step;
2050 } /* That's all folks */
2053 double do_steep(FILE *fplog,t_commrec *cr,
2054 int nfile, const t_filenm fnm[],
2055 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2057 gmx_vsite_t *vsite,gmx_constr_t constr,
2059 t_inputrec *inputrec,
2060 gmx_mtop_t *top_global,t_fcdata *fcd,
2061 t_state *state_global,
2063 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2066 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2067 gmx_membed_t membed,
2068 real cpt_period,real max_hours,
2069 const char *deviceOptions,
2070 unsigned long Flags,
2071 gmx_runtime_t *runtime)
2073 const char *SD="Steepest Descents";
2074 em_state_t *s_min,*s_try;
2076 gmx_localtop_t *top;
2077 gmx_enerdata_t *enerd;
2079 gmx_global_stat_t gstat;
2081 real stepsize,constepsize;
2082 real ustep,dvdlambda,fnormn;
2085 gmx_bool bDone,bAbort,do_x,do_f;
2090 int steps_accepted=0;
2094 s_min = init_em_state();
2095 s_try = init_em_state();
2097 /* Init em and store the local state in s_try */
2098 init_em(fplog,SD,cr,inputrec,
2099 state_global,top_global,s_try,&top,&f,&f_global,
2100 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2101 nfile,fnm,&outf,&mdebin);
2103 /* Print to log file */
2104 print_em_start(fplog,cr,runtime,wcycle,SD);
2106 /* Set variables for stepsize (in nm). This is the largest
2107 * step that we are going to make in any direction.
2109 ustep = inputrec->em_stepsize;
2112 /* Max number of steps */
2113 nsteps = inputrec->nsteps;
2116 /* Print to the screen */
2117 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2119 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2121 /**** HERE STARTS THE LOOP ****
2122 * count is the counter for the number of steps
2123 * bDone will be TRUE when the minimization has converged
2124 * bAbort will be TRUE when nsteps steps have been performed or when
2125 * the stepsize becomes smaller than is reasonable for machine precision
2130 while( !bDone && !bAbort ) {
2131 bAbort = (nsteps >= 0) && (count == nsteps);
2133 /* set new coordinates, except for first step */
2135 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2136 s_min,stepsize,s_min->f,s_try,
2137 constr,top,nrnb,wcycle,count);
2140 evaluate_energy(fplog,bVerbose,cr,
2141 state_global,top_global,s_try,top,
2142 inputrec,nrnb,wcycle,gstat,
2143 vsite,constr,fcd,graph,mdatoms,fr,
2144 mu_tot,enerd,vir,pres,count,count==0);
2147 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2150 s_min->epot = s_try->epot + 1;
2152 /* Print it if necessary */
2155 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2156 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2157 (s_try->epot < s_min->epot) ? '\n' : '\r');
2160 if (s_try->epot < s_min->epot) {
2161 /* Store the new (lower) energies */
2162 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2163 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2164 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2165 print_ebin(outf->fp_ene,TRUE,
2166 do_per_step(steps_accepted,inputrec->nstdisreout),
2167 do_per_step(steps_accepted,inputrec->nstorireout),
2168 fplog,count,count,eprNORMAL,TRUE,
2169 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2174 /* Now if the new energy is smaller than the previous...
2175 * or if this is the first step!
2176 * or if we did random steps!
2179 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2182 /* Test whether the convergence criterion is met... */
2183 bDone = (s_try->fmax < inputrec->em_tol);
2185 /* Copy the arrays for force, positions and energy */
2186 /* The 'Min' array always holds the coords and forces of the minimal
2188 swap_em_state(s_min,s_try);
2192 /* Write to trn, if necessary */
2193 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2194 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2195 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2196 top_global,inputrec,count,
2197 s_min,state_global,f_global);
2200 /* If energy is not smaller make the step smaller... */
2203 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2204 /* Reload the old state */
2205 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2206 s_min,top,mdatoms,fr,vsite,constr,
2211 /* Determine new step */
2212 stepsize = ustep/s_min->fmax;
2214 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2216 if (count == nsteps || ustep < 1e-12)
2218 if (count == nsteps || ustep < 1e-6)
2223 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2224 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2230 } /* End of the loop */
2232 /* Print some shit... */
2234 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2235 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2236 top_global,inputrec,count,
2237 s_min,state_global,f_global);
2239 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2242 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2243 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2244 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2245 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2248 finish_em(fplog,cr,outf,runtime,wcycle);
2250 /* To print the actual number of steps we needed somewhere */
2251 inputrec->nsteps=count;
2253 runtime->nsteps_done = count;
2256 } /* That's all folks */
2259 double do_nm(FILE *fplog,t_commrec *cr,
2260 int nfile,const t_filenm fnm[],
2261 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2263 gmx_vsite_t *vsite,gmx_constr_t constr,
2265 t_inputrec *inputrec,
2266 gmx_mtop_t *top_global,t_fcdata *fcd,
2267 t_state *state_global,
2269 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2272 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2273 gmx_membed_t membed,
2274 real cpt_period,real max_hours,
2275 const char *deviceOptions,
2276 unsigned long Flags,
2277 gmx_runtime_t *runtime)
2279 const char *NM = "Normal Mode Analysis";
2284 gmx_localtop_t *top;
2285 gmx_enerdata_t *enerd;
2287 gmx_global_stat_t gstat;
2289 real t,t0,lambda,lam0;
2294 gmx_bool bSparse; /* use sparse matrix storage format */
2296 gmx_sparsematrix_t * sparse_matrix = NULL;
2297 real * full_matrix = NULL;
2298 em_state_t * state_work;
2300 /* added with respect to mdrun */
2302 real der_range=10.0*sqrt(GMX_REAL_EPS);
2308 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2311 state_work = init_em_state();
2313 /* Init em and store the local state in state_minimum */
2314 init_em(fplog,NM,cr,inputrec,
2315 state_global,top_global,state_work,&top,
2317 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2318 nfile,fnm,&outf,NULL);
2320 natoms = top_global->natoms;
2328 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2329 " which MIGHT not be accurate enough for normal mode analysis.\n"
2330 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2331 " are fairly modest even if you recompile in double precision.\n\n");
2335 /* Check if we can/should use sparse storage format.
2337 * Sparse format is only useful when the Hessian itself is sparse, which it
2338 * will be when we use a cutoff.
2339 * For small systems (n<1000) it is easier to always use full matrix format, though.
2341 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2343 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2346 else if(top_global->natoms < 1000)
2348 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2353 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2357 sz = DIM*top_global->natoms;
2359 fprintf(stderr,"Allocating Hessian memory...\n\n");
2363 sparse_matrix=gmx_sparsematrix_init(sz);
2364 sparse_matrix->compressed_symmetric = TRUE;
2368 snew(full_matrix,sz*sz);
2371 /* Initial values */
2372 t0 = inputrec->init_t;
2373 lam0 = inputrec->fepvals->init_lambda;
2381 /* Write start time and temperature */
2382 print_em_start(fplog,cr,runtime,wcycle,NM);
2384 /* fudge nr of steps to nr of atoms */
2385 inputrec->nsteps = natoms*2;
2389 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2390 *(top_global->name),(int)inputrec->nsteps);
2393 nnodes = cr->nnodes;
2395 /* Make evaluate_energy do a single node force calculation */
2397 evaluate_energy(fplog,bVerbose,cr,
2398 state_global,top_global,state_work,top,
2399 inputrec,nrnb,wcycle,gstat,
2400 vsite,constr,fcd,graph,mdatoms,fr,
2401 mu_tot,enerd,vir,pres,-1,TRUE);
2402 cr->nnodes = nnodes;
2404 /* if forces are not small, warn user */
2405 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2409 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2410 if (state_work->fmax > 1.0e-3)
2412 fprintf(stderr,"Maximum force probably not small enough to");
2413 fprintf(stderr," ensure that you are in an \nenergy well. ");
2414 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2415 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2419 /***********************************************************
2421 * Loop over all pairs in matrix
2423 * do_force called twice. Once with positive and
2424 * once with negative displacement
2426 ************************************************************/
2428 /* Steps are divided one by one over the nodes */
2429 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2432 for (d=0; d<DIM; d++)
2434 x_min = state_work->s.x[atom][d];
2436 state_work->s.x[atom][d] = x_min - der_range;
2438 /* Make evaluate_energy do a single node force calculation */
2440 evaluate_energy(fplog,bVerbose,cr,
2441 state_global,top_global,state_work,top,
2442 inputrec,nrnb,wcycle,gstat,
2443 vsite,constr,fcd,graph,mdatoms,fr,
2444 mu_tot,enerd,vir,pres,atom*2,FALSE);
2446 for(i=0; i<natoms; i++)
2448 copy_rvec(state_work->f[i], fneg[i]);
2451 state_work->s.x[atom][d] = x_min + der_range;
2453 evaluate_energy(fplog,bVerbose,cr,
2454 state_global,top_global,state_work,top,
2455 inputrec,nrnb,wcycle,gstat,
2456 vsite,constr,fcd,graph,mdatoms,fr,
2457 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2458 cr->nnodes = nnodes;
2460 /* x is restored to original */
2461 state_work->s.x[atom][d] = x_min;
2463 for(j=0; j<natoms; j++)
2465 for (k=0; (k<DIM); k++)
2468 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2476 #define mpi_type MPI_DOUBLE
2478 #define mpi_type MPI_FLOAT
2480 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2481 cr->mpi_comm_mygroup);
2486 for(node=0; (node<nnodes && atom+node<natoms); node++)
2492 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2493 cr->mpi_comm_mygroup,&stat);
2498 row = (atom + node)*DIM + d;
2500 for(j=0; j<natoms; j++)
2502 for(k=0; k<DIM; k++)
2508 if (col >= row && dfdx[j][k] != 0.0)
2510 gmx_sparsematrix_increment_value(sparse_matrix,
2511 row,col,dfdx[j][k]);
2516 full_matrix[row*sz+col] = dfdx[j][k];
2523 if (bVerbose && fplog)
2528 /* write progress */
2529 if (MASTER(cr) && bVerbose)
2531 fprintf(stderr,"\rFinished step %d out of %d",
2532 min(atom+nnodes,natoms),natoms);
2539 fprintf(stderr,"\n\nWriting Hessian...\n");
2540 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2543 finish_em(fplog,cr,outf,runtime,wcycle);
2545 runtime->nsteps_done = natoms*2;