1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
72 #include "gmx_wallcycle.h"
73 #include "mtop_util.h"
77 #include "gromacs/linearalgebra/mtxio.h"
78 #include "gromacs/linearalgebra/sparsematrix.h"
89 static em_state_t *init_em_state()
95 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
96 snew(ems->s.lambda,efptNR);
101 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
102 gmx_wallcycle_t wcycle,
107 runtime_start(runtime);
109 sprintf(buf,"Started %s",name);
110 print_date_and_time(fplog,cr->nodeid,buf,NULL);
112 wallcycle_start(wcycle,ewcRUN);
114 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
115 gmx_wallcycle_t wcycle)
117 wallcycle_stop(wcycle,ewcRUN);
119 runtime_end(runtime);
122 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
125 fprintf(out,"%s:\n",minimizer);
126 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
127 fprintf(out," Number of steps = %12d\n",nsteps);
130 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
136 "\nEnergy minimization reached the maximum number"
137 "of steps before the forces reached the requested"
138 "precision Fmax < %g.\n",ftol);
143 "\nEnergy minimization has stopped, but the forces have"
144 "not converged to the requested precision Fmax < %g (which"
145 "may not be possible for your system). It stopped"
146 "because the algorithm tried to make a new step whose size"
147 "was too small, or there was no change in the energy since"
148 "last step. Either way, we regard the minimization as"
149 "converged to within the available machine precision,"
150 "given your starting configuration and EM parameters.\n%s%s",
152 sizeof(real)<sizeof(double) ?
153 "\nDouble precision normally gives you higher accuracy, but"
154 "this is often not needed for preparing to run molecular"
158 "You might need to increase your constraint accuracy, or turn\n"
159 "off constraints altogether (set constraints = none in mdp file)\n" :
162 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
167 static void print_converged(FILE *fp,const char *alg,real ftol,
168 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
169 real epot,real fmax, int nfmax, real fnorm)
171 char buf[STEPSTRSIZE];
174 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
175 alg,ftol,gmx_step_str(count,buf));
176 else if(count<nsteps)
177 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
178 "but did not reach the requested Fmax < %g.\n",
179 alg,gmx_step_str(count,buf),ftol);
181 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
182 alg,ftol,gmx_step_str(count,buf));
185 fprintf(fp,"Potential Energy = %21.14e\n",epot);
186 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
187 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
189 fprintf(fp,"Potential Energy = %14.7e\n",epot);
190 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
191 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
195 static void get_f_norm_max(t_commrec *cr,
196 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
197 real *fnorm,real *fmax,int *a_fmax)
200 real fmax2,fmax2_0,fam;
201 int la_max,a_max,start,end,i,m,gf;
203 /* This routine finds the largest force and returns it.
204 * On parallel machines the global max is taken.
210 start = mdatoms->start;
211 end = mdatoms->homenr + start;
212 if (mdatoms->cFREEZE) {
213 for(i=start; i<end; i++) {
214 gf = mdatoms->cFREEZE[i];
217 if (!opts->nFreeze[gf][m])
226 for(i=start; i<end; i++) {
236 if (la_max >= 0 && DOMAINDECOMP(cr)) {
237 a_max = cr->dd->gatindex[la_max];
242 snew(sum,2*cr->nnodes+1);
243 sum[2*cr->nodeid] = fmax2;
244 sum[2*cr->nodeid+1] = a_max;
245 sum[2*cr->nnodes] = fnorm2;
246 gmx_sumd(2*cr->nnodes+1,sum,cr);
247 fnorm2 = sum[2*cr->nnodes];
248 /* Determine the global maximum */
249 for(i=0; i<cr->nnodes; i++) {
250 if (sum[2*i] > fmax2) {
252 a_max = (int)(sum[2*i+1] + 0.5);
259 *fnorm = sqrt(fnorm2);
266 static void get_state_f_norm_max(t_commrec *cr,
267 t_grpopts *opts,t_mdatoms *mdatoms,
270 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
273 void init_em(FILE *fplog,const char *title,
274 t_commrec *cr,t_inputrec *ir,
275 t_state *state_global,gmx_mtop_t *top_global,
276 em_state_t *ems,gmx_localtop_t **top,
277 rvec **f,rvec **f_global,
278 t_nrnb *nrnb,rvec mu_tot,
279 t_forcerec *fr,gmx_enerdata_t **enerd,
280 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
281 gmx_vsite_t *vsite,gmx_constr_t constr,
282 int nfile,const t_filenm fnm[],
283 gmx_mdoutf_t **outf,t_mdebin **mdebin)
290 fprintf(fplog,"Initiating %s\n",title);
293 state_global->ngtc = 0;
295 /* Initialize lambda variables */
296 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
300 if (DOMAINDECOMP(cr))
302 *top = dd_init_local_top(top_global);
304 dd_init_local_state(cr->dd,state_global,&ems->s);
308 /* Distribute the charge groups over the nodes from the master node */
309 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
310 state_global,top_global,ir,
311 &ems->s,&ems->f,mdatoms,*top,
312 fr,vsite,NULL,constr,
314 dd_store_state(cr->dd,&ems->s);
318 snew(*f_global,top_global->natoms);
328 snew(*f,top_global->natoms);
330 /* Just copy the state */
331 ems->s = *state_global;
332 snew(ems->s.x,ems->s.nalloc);
333 snew(ems->f,ems->s.nalloc);
334 for(i=0; i<state_global->natoms; i++)
336 copy_rvec(state_global->x[i],ems->s.x[i]);
338 copy_mat(state_global->box,ems->s.box);
340 if (PAR(cr) && ir->eI != eiNM)
342 /* Initialize the particle decomposition and split the topology */
343 *top = split_system(fplog,top_global,ir,cr);
345 pd_cg_range(cr,&fr->cg0,&fr->hcg);
349 *top = gmx_mtop_generate_local_top(top_global,ir);
353 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
355 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
364 pd_at_range(cr,&start,&homenr);
370 homenr = top_global->natoms;
372 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
373 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
377 set_vsite_top(vsite,*top,mdatoms,cr);
383 if (ir->eConstrAlg == econtSHAKE &&
384 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
386 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
387 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
390 if (!DOMAINDECOMP(cr))
392 set_constraints(constr,*top,ir,mdatoms,cr);
395 if (!ir->bContinuation)
397 /* Constrain the starting coordinates */
399 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
400 ir,NULL,cr,-1,0,mdatoms,
401 ems->s.x,ems->s.x,NULL,ems->s.box,
402 ems->s.lambda[efptFEP],&dvdlambda,
403 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
409 *gstat = global_stat_init(ir);
412 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
415 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
420 /* Init bin for energy stuff */
421 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
425 calc_shifts(ems->s.box,fr->shift_vec);
428 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
429 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
431 if (!(cr->duty & DUTY_PME)) {
432 /* Tell the PME only node to finish */
438 em_time_end(fplog,cr,runtime,wcycle);
441 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
450 static void copy_em_coords(em_state_t *ems,t_state *state)
454 for(i=0; (i<state->natoms); i++)
456 copy_rvec(ems->s.x[i],state->x[i]);
460 static void write_em_traj(FILE *fplog,t_commrec *cr,
462 gmx_bool bX,gmx_bool bF,const char *confout,
463 gmx_mtop_t *top_global,
464 t_inputrec *ir,gmx_large_int_t step,
466 t_state *state_global,rvec *f_global)
470 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
472 copy_em_coords(state,state_global);
477 if (bX) { mdof_flags |= MDOF_X; }
478 if (bF) { mdof_flags |= MDOF_F; }
479 write_traj(fplog,cr,outf,mdof_flags,
480 top_global,step,(double)step,
481 &state->s,state_global,state->f,f_global,NULL,NULL);
483 if (confout != NULL && MASTER(cr))
485 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
487 /* Make molecules whole only for confout writing */
488 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
492 write_sto_conf_mtop(confout,
493 *top_global->name,top_global,
494 state_global->x,NULL,ir->ePBC,state_global->box);
498 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
499 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
500 gmx_constr_t constr,gmx_localtop_t *top,
501 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
502 gmx_large_int_t count)
506 int start,end,gf,i,m;
513 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
514 gmx_incons("state mismatch in do_em_step");
516 s2->flags = s1->flags;
518 if (s2->nalloc != s1->nalloc) {
519 s2->nalloc = s1->nalloc;
520 srenew(s2->x,s1->nalloc);
521 srenew(ems2->f, s1->nalloc);
522 if (s2->flags & (1<<estCGP))
523 srenew(s2->cg_p, s1->nalloc);
526 s2->natoms = s1->natoms;
527 /* Copy free energy state -> is this necessary? */
528 for (i=0;i<efptNR;i++)
530 s2->lambda[i] = s1->lambda[i];
532 copy_mat(s1->box,s2->box);
535 end = md->start + md->homenr;
540 for(i=start; i<end; i++) {
543 for(m=0; m<DIM; m++) {
544 if (ir->opts.nFreeze[gf][m])
547 x2[i][m] = x1[i][m] + a*f[i][m];
551 if (s2->flags & (1<<estCGP)) {
552 /* Copy the CG p vector */
555 for(i=start; i<end; i++)
556 copy_rvec(x1[i],x2[i]);
559 if (DOMAINDECOMP(cr)) {
560 s2->ddp_count = s1->ddp_count;
561 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
562 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
563 srenew(s2->cg_gl,s2->cg_gl_nalloc);
565 s2->ncg_gl = s1->ncg_gl;
566 for(i=0; i<s2->ncg_gl; i++)
567 s2->cg_gl[i] = s1->cg_gl[i];
568 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
572 wallcycle_start(wcycle,ewcCONSTR);
574 constrain(NULL,TRUE,TRUE,constr,&top->idef,
575 ir,NULL,cr,count,0,md,
576 s1->x,s2->x,NULL,s2->box,s2->lambda[efptBONDED],
577 &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
578 wallcycle_stop(wcycle,ewcCONSTR);
582 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
583 gmx_mtop_t *top_global,t_inputrec *ir,
584 em_state_t *ems,gmx_localtop_t *top,
585 t_mdatoms *mdatoms,t_forcerec *fr,
586 gmx_vsite_t *vsite,gmx_constr_t constr,
587 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
589 /* Repartition the domain decomposition */
590 wallcycle_start(wcycle,ewcDOMDEC);
591 dd_partition_system(fplog,step,cr,FALSE,1,
594 mdatoms,top,fr,vsite,NULL,constr,
596 dd_store_state(cr->dd,&ems->s);
597 wallcycle_stop(wcycle,ewcDOMDEC);
600 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
601 t_state *state_global,gmx_mtop_t *top_global,
602 em_state_t *ems,gmx_localtop_t *top,
603 t_inputrec *inputrec,
604 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
605 gmx_global_stat_t gstat,
606 gmx_vsite_t *vsite,gmx_constr_t constr,
608 t_graph *graph,t_mdatoms *mdatoms,
609 t_forcerec *fr,rvec mu_tot,
610 gmx_enerdata_t *enerd,tensor vir,tensor pres,
611 gmx_large_int_t count,gmx_bool bFirst)
616 tensor force_vir,shake_vir,ekin;
617 real dvdlambda,prescorr,enercorr,dvdlcorr;
620 /* Set the time to the initial time, the time does not change during EM */
621 t = inputrec->init_t;
624 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
625 /* This the first state or an old state used before the last ns */
629 if (inputrec->nstlist > 0) {
631 } else if (inputrec->nstlist == -1) {
632 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
634 gmx_sumi(1,&nabnsb,cr);
640 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
641 top->idef.iparams,top->idef.il,
642 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
644 if (DOMAINDECOMP(cr)) {
646 /* Repartition the domain decomposition */
647 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
648 ems,top,mdatoms,fr,vsite,constr,
653 /* Calc force & energy on new trial position */
654 /* do_force always puts the charge groups in the box and shifts again
655 * We do not unshift, so molecules are always whole in congrad.c
657 do_force(fplog,cr,inputrec,
658 count,nrnb,wcycle,top,top_global,&top_global->groups,
659 ems->s.box,ems->s.x,&ems->s.hist,
660 ems->f,force_vir,mdatoms,enerd,fcd,
661 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
662 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
663 (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
665 /* Clear the unused shake virial and pressure */
666 clear_mat(shake_vir);
669 /* Communicate stuff when parallel */
670 if (PAR(cr) && inputrec->eI != eiNM)
672 wallcycle_start(wcycle,ewcMoveE);
674 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
675 inputrec,NULL,NULL,NULL,1,&terminate,
676 top_global,&ems->s,FALSE,
682 wallcycle_stop(wcycle,ewcMoveE);
685 /* Calculate long range corrections to pressure and energy */
686 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
687 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
688 enerd->term[F_DISPCORR] = enercorr;
689 enerd->term[F_EPOT] += enercorr;
690 enerd->term[F_PRES] += prescorr;
691 enerd->term[F_DVDL] += dvdlcorr;
693 ems->epot = enerd->term[F_EPOT];
696 /* Project out the constraint components of the force */
697 wallcycle_start(wcycle,ewcCONSTR);
699 constrain(NULL,FALSE,FALSE,constr,&top->idef,
700 inputrec,NULL,cr,count,0,mdatoms,
701 ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda[efptBONDED],&dvdlambda,
702 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
703 if (fr->bSepDVDL && fplog)
704 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
705 enerd->term[F_DVDL_BONDED] += dvdlambda;
706 m_add(force_vir,shake_vir,vir);
707 wallcycle_stop(wcycle,ewcCONSTR);
709 copy_mat(force_vir,vir);
713 enerd->term[F_PRES] =
714 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
716 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
718 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
720 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
724 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
726 em_state_t *s_min,em_state_t *s_b)
730 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
732 unsigned char *grpnrFREEZE;
735 fprintf(debug,"Doing reorder_partsum\n");
740 cgs_gl = dd_charge_groups_global(cr->dd);
741 index = cgs_gl->index;
743 /* Collect fm in a global vector fmg.
744 * This conflicts with the spirit of domain decomposition,
745 * but to fully optimize this a much more complicated algorithm is required.
747 snew(fmg,mtop->natoms);
749 ncg = s_min->s.ncg_gl;
750 cg_gl = s_min->s.cg_gl;
752 for(c=0; c<ncg; c++) {
756 for(a=a0; a<a1; a++) {
757 copy_rvec(fm[i],fmg[a]);
761 gmx_sum(mtop->natoms*3,fmg[0],cr);
763 /* Now we will determine the part of the sum for the cgs in state s_b */
765 cg_gl = s_b->s.cg_gl;
769 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
770 for(c=0; c<ncg; c++) {
774 for(a=a0; a<a1; a++) {
775 if (mdatoms->cFREEZE && grpnrFREEZE) {
778 for(m=0; m<DIM; m++) {
779 if (!opts->nFreeze[gf][m]) {
780 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
792 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
794 em_state_t *s_min,em_state_t *s_b)
800 /* This is just the classical Polak-Ribiere calculation of beta;
801 * it looks a bit complicated since we take freeze groups into account,
802 * and might have to sum it in parallel runs.
805 if (!DOMAINDECOMP(cr) ||
806 (s_min->s.ddp_count == cr->dd->ddp_count &&
807 s_b->s.ddp_count == cr->dd->ddp_count)) {
812 /* This part of code can be incorrect with DD,
813 * since the atom ordering in s_b and s_min might differ.
815 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
816 if (mdatoms->cFREEZE)
817 gf = mdatoms->cFREEZE[i];
819 if (!opts->nFreeze[gf][m]) {
820 sum += (fb[i][m] - fm[i][m])*fb[i][m];
824 /* We need to reorder cgs while summing */
825 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
830 return sum/sqr(s_min->fnorm);
833 double do_cg(FILE *fplog,t_commrec *cr,
834 int nfile,const t_filenm fnm[],
835 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
837 gmx_vsite_t *vsite,gmx_constr_t constr,
839 t_inputrec *inputrec,
840 gmx_mtop_t *top_global,t_fcdata *fcd,
841 t_state *state_global,
843 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
846 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
848 real cpt_period,real max_hours,
849 const char *deviceOptions,
851 gmx_runtime_t *runtime)
853 const char *CG="Polak-Ribiere Conjugate Gradients";
855 em_state_t *s_min,*s_a,*s_b,*s_c;
857 gmx_enerdata_t *enerd;
859 gmx_global_stat_t gstat;
861 rvec *f_global,*p,*sf,*sfm;
862 double gpa,gpb,gpc,tmp,sum[2],minstep;
869 gmx_bool converged,foundlower;
871 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
873 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
875 int i,m,gf,step,nminstep;
880 s_min = init_em_state();
881 s_a = init_em_state();
882 s_b = init_em_state();
883 s_c = init_em_state();
885 /* Init em and store the local state in s_min */
886 init_em(fplog,CG,cr,inputrec,
887 state_global,top_global,s_min,&top,&f,&f_global,
888 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
889 nfile,fnm,&outf,&mdebin);
891 /* Print to log file */
892 print_em_start(fplog,cr,runtime,wcycle,CG);
894 /* Max number of steps */
895 number_steps=inputrec->nsteps;
898 sp_header(stderr,CG,inputrec->em_tol,number_steps);
900 sp_header(fplog,CG,inputrec->em_tol,number_steps);
902 /* Call the force routine and some auxiliary (neighboursearching etc.) */
903 /* do_force always puts the charge groups in the box and shifts again
904 * We do not unshift, so molecules are always whole in congrad.c
906 evaluate_energy(fplog,bVerbose,cr,
907 state_global,top_global,s_min,top,
908 inputrec,nrnb,wcycle,gstat,
909 vsite,constr,fcd,graph,mdatoms,fr,
910 mu_tot,enerd,vir,pres,-1,TRUE);
914 /* Copy stuff to the energy bin for easy printing etc. */
915 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
916 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
917 NULL,NULL,vir,pres,NULL,mu_tot,constr);
919 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
920 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
921 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
925 /* Estimate/guess the initial stepsize */
926 stepsize = inputrec->em_stepsize/s_min->fnorm;
929 fprintf(stderr," F-max = %12.5e on atom %d\n",
930 s_min->fmax,s_min->a_fmax+1);
931 fprintf(stderr," F-Norm = %12.5e\n",
932 s_min->fnorm/sqrt(state_global->natoms));
933 fprintf(stderr,"\n");
934 /* and copy to the log file too... */
935 fprintf(fplog," F-max = %12.5e on atom %d\n",
936 s_min->fmax,s_min->a_fmax+1);
937 fprintf(fplog," F-Norm = %12.5e\n",
938 s_min->fnorm/sqrt(state_global->natoms));
941 /* Start the loop over CG steps.
942 * Each successful step is counted, and we continue until
943 * we either converge or reach the max number of steps.
946 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
948 /* start taking steps in a new direction
949 * First time we enter the routine, beta=0, and the direction is
950 * simply the negative gradient.
953 /* Calculate the new direction in p, and the gradient in this direction, gpa */
958 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
959 if (mdatoms->cFREEZE)
960 gf = mdatoms->cFREEZE[i];
961 for(m=0; m<DIM; m++) {
962 if (!inputrec->opts.nFreeze[gf][m]) {
963 p[i][m] = sf[i][m] + beta*p[i][m];
964 gpa -= p[i][m]*sf[i][m];
965 /* f is negative gradient, thus the sign */
972 /* Sum the gradient along the line across CPUs */
976 /* Calculate the norm of the search vector */
977 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
979 /* Just in case stepsize reaches zero due to numerical precision... */
981 stepsize = inputrec->em_stepsize/pnorm;
984 * Double check the value of the derivative in the search direction.
985 * If it is positive it must be due to the old information in the
986 * CG formula, so just remove that and start over with beta=0.
987 * This corresponds to a steepest descent step.
991 step--; /* Don't count this step since we are restarting */
992 continue; /* Go back to the beginning of the big for-loop */
995 /* Calculate minimum allowed stepsize, before the average (norm)
996 * relative change in coordinate is smaller than precision
999 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1000 for(m=0; m<DIM; m++) {
1001 tmp = fabs(s_min->s.x[i][m]);
1008 /* Add up from all CPUs */
1010 gmx_sumd(1,&minstep,cr);
1012 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1014 if(stepsize<minstep) {
1019 /* Write coordinates if necessary */
1020 do_x = do_per_step(step,inputrec->nstxout);
1021 do_f = do_per_step(step,inputrec->nstfout);
1023 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1024 top_global,inputrec,step,
1025 s_min,state_global,f_global);
1027 /* Take a step downhill.
1028 * In theory, we should minimize the function along this direction.
1029 * That is quite possible, but it turns out to take 5-10 function evaluations
1030 * for each line. However, we dont really need to find the exact minimum -
1031 * it is much better to start a new CG step in a modified direction as soon
1032 * as we are close to it. This will save a lot of energy evaluations.
1034 * In practice, we just try to take a single step.
1035 * If it worked (i.e. lowered the energy), we increase the stepsize but
1036 * the continue straight to the next CG step without trying to find any minimum.
1037 * If it didn't work (higher energy), there must be a minimum somewhere between
1038 * the old position and the new one.
1040 * Due to the finite numerical accuracy, it turns out that it is a good idea
1041 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1042 * This leads to lower final energies in the tests I've done. / Erik
1044 s_a->epot = s_min->epot;
1046 c = a + stepsize; /* reference position along line is zero */
1048 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1049 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1050 s_min,top,mdatoms,fr,vsite,constr,
1054 /* Take a trial step (new coords in s_c) */
1055 do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1056 constr,top,nrnb,wcycle,-1);
1059 /* Calculate energy for the trial step */
1060 evaluate_energy(fplog,bVerbose,cr,
1061 state_global,top_global,s_c,top,
1062 inputrec,nrnb,wcycle,gstat,
1063 vsite,constr,fcd,graph,mdatoms,fr,
1064 mu_tot,enerd,vir,pres,-1,FALSE);
1066 /* Calc derivative along line */
1070 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1071 for(m=0; m<DIM; m++)
1072 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1074 /* Sum the gradient along the line across CPUs */
1076 gmx_sumd(1,&gpc,cr);
1078 /* This is the max amount of increase in energy we tolerate */
1079 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1081 /* Accept the step if the energy is lower, or if it is not significantly higher
1082 * and the line derivative is still negative.
1084 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1086 /* Great, we found a better energy. Increase step for next iteration
1087 * if we are still going down, decrease it otherwise
1090 stepsize *= 1.618034; /* The golden section */
1092 stepsize *= 0.618034; /* 1/golden section */
1094 /* New energy is the same or higher. We will have to do some work
1095 * to find a smaller value in the interval. Take smaller step next time!
1098 stepsize *= 0.618034;
1104 /* OK, if we didn't find a lower value we will have to locate one now - there must
1105 * be one in the interval [a=0,c].
1106 * The same thing is valid here, though: Don't spend dozens of iterations to find
1107 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1108 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1110 * I also have a safeguard for potentially really patological functions so we never
1111 * take more than 20 steps before we give up ...
1113 * If we already found a lower value we just skip this step and continue to the update.
1119 /* Select a new trial point.
1120 * If the derivatives at points a & c have different sign we interpolate to zero,
1121 * otherwise just do a bisection.
1124 b = a + gpa*(a-c)/(gpc-gpa);
1128 /* safeguard if interpolation close to machine accuracy causes errors:
1129 * never go outside the interval
1134 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1135 /* Reload the old state */
1136 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1137 s_min,top,mdatoms,fr,vsite,constr,
1141 /* Take a trial step to this new point - new coords in s_b */
1142 do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1143 constr,top,nrnb,wcycle,-1);
1146 /* Calculate energy for the trial step */
1147 evaluate_energy(fplog,bVerbose,cr,
1148 state_global,top_global,s_b,top,
1149 inputrec,nrnb,wcycle,gstat,
1150 vsite,constr,fcd,graph,mdatoms,fr,
1151 mu_tot,enerd,vir,pres,-1,FALSE);
1153 /* p does not change within a step, but since the domain decomposition
1154 * might change, we have to use cg_p of s_b here.
1159 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1160 for(m=0; m<DIM; m++)
1161 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1163 /* Sum the gradient along the line across CPUs */
1165 gmx_sumd(1,&gpb,cr);
1168 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1169 s_a->epot,s_b->epot,s_c->epot,gpb);
1171 epot_repl = s_b->epot;
1173 /* Keep one of the intervals based on the value of the derivative at the new point */
1175 /* Replace c endpoint with b */
1176 swap_em_state(s_b,s_c);
1180 /* Replace a endpoint with b */
1181 swap_em_state(s_b,s_a);
1187 * Stop search as soon as we find a value smaller than the endpoints.
1188 * Never run more than 20 steps, no matter what.
1191 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1194 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1196 /* OK. We couldn't find a significantly lower energy.
1197 * If beta==0 this was steepest descent, and then we give up.
1198 * If not, set beta=0 and restart with steepest descent before quitting.
1205 /* Reset memory before giving up */
1211 /* Select min energy state of A & C, put the best in B.
1213 if (s_c->epot < s_a->epot) {
1215 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1216 s_c->epot,s_a->epot);
1217 swap_em_state(s_b,s_c);
1222 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1223 s_a->epot,s_c->epot);
1224 swap_em_state(s_b,s_a);
1231 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1233 swap_em_state(s_b,s_c);
1238 /* new search direction */
1239 /* beta = 0 means forget all memory and restart with steepest descents. */
1240 if (nstcg && ((step % nstcg)==0))
1243 /* s_min->fnorm cannot be zero, because then we would have converged
1247 /* Polak-Ribiere update.
1248 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1250 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1252 /* Limit beta to prevent oscillations */
1253 if (fabs(beta) > 5.0)
1257 /* update positions */
1258 swap_em_state(s_min,s_b);
1261 /* Print it if necessary */
1264 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1265 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1266 s_min->fmax,s_min->a_fmax+1);
1267 /* Store the new (lower) energies */
1268 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1269 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1270 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1272 do_log = do_per_step(step,inputrec->nstlog);
1273 do_ene = do_per_step(step,inputrec->nstenergy);
1275 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1276 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1277 do_log ? fplog : NULL,step,step,eprNORMAL,
1278 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1281 /* Stop when the maximum force lies below tolerance.
1282 * If we have reached machine precision, converged is already set to true.
1284 converged = converged || (s_min->fmax < inputrec->em_tol);
1286 } /* End of the loop */
1289 step--; /* we never took that last step in this case */
1291 if (s_min->fmax > inputrec->em_tol)
1295 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1296 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1302 /* If we printed energy and/or logfile last step (which was the last step)
1303 * we don't have to do it again, but otherwise print the final values.
1306 /* Write final value to log since we didn't do anything the last step */
1307 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1309 if (!do_ene || !do_log) {
1310 /* Write final energy file entries */
1311 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1312 !do_log ? fplog : NULL,step,step,eprNORMAL,
1313 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1317 /* Print some stuff... */
1319 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1322 * For accurate normal mode calculation it is imperative that we
1323 * store the last conformation into the full precision binary trajectory.
1325 * However, we should only do it if we did NOT already write this step
1326 * above (which we did if do_x or do_f was true).
1328 do_x = !do_per_step(step,inputrec->nstxout);
1329 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1331 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1332 top_global,inputrec,step,
1333 s_min,state_global,f_global);
1335 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1338 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1339 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1340 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1341 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1343 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1346 finish_em(fplog,cr,outf,runtime,wcycle);
1348 /* To print the actual number of steps we needed somewhere */
1349 runtime->nsteps_done = step;
1352 } /* That's all folks */
1355 double do_lbfgs(FILE *fplog,t_commrec *cr,
1356 int nfile,const t_filenm fnm[],
1357 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1359 gmx_vsite_t *vsite,gmx_constr_t constr,
1361 t_inputrec *inputrec,
1362 gmx_mtop_t *top_global,t_fcdata *fcd,
1365 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1368 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1369 gmx_membed_t membed,
1370 real cpt_period,real max_hours,
1371 const char *deviceOptions,
1372 unsigned long Flags,
1373 gmx_runtime_t *runtime)
1375 static const char *LBFGS="Low-Memory BFGS Minimizer";
1377 gmx_localtop_t *top;
1378 gmx_enerdata_t *enerd;
1380 gmx_global_stat_t gstat;
1383 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1384 double stepsize,gpa,gpb,gpc,tmp,minstep;
1385 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1386 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1387 real a,b,c,maxdelta,delta;
1388 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1389 real dgdx,dgdg,sq,yr,beta;
1391 gmx_bool converged,first;
1394 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1396 int start,end,number_steps;
1398 int i,k,m,n,nfmax,gf,step;
1404 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1406 n = 3*state->natoms;
1407 nmaxcorr = inputrec->nbfgscorr;
1409 /* Allocate memory */
1410 /* Use pointers to real so we dont have to loop over both atoms and
1411 * dimensions all the time...
1412 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1413 * that point to the same memory.
1427 snew(alpha,nmaxcorr);
1430 for(i=0;i<nmaxcorr;i++)
1434 for(i=0;i<nmaxcorr;i++)
1441 init_em(fplog,LBFGS,cr,inputrec,
1442 state,top_global,&ems,&top,&f,&f_global,
1443 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1444 nfile,fnm,&outf,&mdebin);
1445 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1446 * so we free some memory again.
1451 xx = (real *)state->x;
1454 start = mdatoms->start;
1455 end = mdatoms->homenr + start;
1457 /* Print to log file */
1458 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1460 do_log = do_ene = do_x = do_f = TRUE;
1462 /* Max number of steps */
1463 number_steps=inputrec->nsteps;
1465 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1467 for(i=start; i<end; i++) {
1468 if (mdatoms->cFREEZE)
1469 gf = mdatoms->cFREEZE[i];
1470 for(m=0; m<DIM; m++)
1471 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1474 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1476 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1479 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1480 top->idef.iparams,top->idef.il,
1481 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1483 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1484 /* do_force always puts the charge groups in the box and shifts again
1485 * We do not unshift, so molecules are always whole
1490 evaluate_energy(fplog,bVerbose,cr,
1491 state,top_global,&ems,top,
1492 inputrec,nrnb,wcycle,gstat,
1493 vsite,constr,fcd,graph,mdatoms,fr,
1494 mu_tot,enerd,vir,pres,-1,TRUE);
1498 /* Copy stuff to the energy bin for easy printing etc. */
1499 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1500 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1501 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1503 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1504 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1505 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1509 /* This is the starting energy */
1510 Epot = enerd->term[F_EPOT];
1516 /* Set the initial step.
1517 * since it will be multiplied by the non-normalized search direction
1518 * vector (force vector the first time), we scale it by the
1519 * norm of the force.
1523 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1524 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1525 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1526 fprintf(stderr,"\n");
1527 /* and copy to the log file too... */
1528 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1529 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1530 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1531 fprintf(fplog,"\n");
1537 dx[point][i] = ff[i]; /* Initial search direction */
1541 stepsize = 1.0/fnorm;
1544 /* Start the loop over BFGS steps.
1545 * Each successful step is counted, and we continue until
1546 * we either converge or reach the max number of steps.
1551 /* Set the gradient from the force */
1553 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1555 /* Write coordinates if necessary */
1556 do_x = do_per_step(step,inputrec->nstxout);
1557 do_f = do_per_step(step,inputrec->nstfout);
1562 mdof_flags |= MDOF_X;
1567 mdof_flags |= MDOF_F;
1570 write_traj(fplog,cr,outf,mdof_flags,
1571 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1573 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1575 /* pointer to current direction - point=0 first time here */
1578 /* calculate line gradient */
1579 for(gpa=0,i=0;i<n;i++)
1582 /* Calculate minimum allowed stepsize, before the average (norm)
1583 * relative change in coordinate is smaller than precision
1585 for(minstep=0,i=0;i<n;i++) {
1592 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1594 if(stepsize<minstep) {
1599 /* Store old forces and coordinates */
1611 /* Take a step downhill.
1612 * In theory, we should minimize the function along this direction.
1613 * That is quite possible, but it turns out to take 5-10 function evaluations
1614 * for each line. However, we dont really need to find the exact minimum -
1615 * it is much better to start a new BFGS step in a modified direction as soon
1616 * as we are close to it. This will save a lot of energy evaluations.
1618 * In practice, we just try to take a single step.
1619 * If it worked (i.e. lowered the energy), we increase the stepsize but
1620 * the continue straight to the next BFGS step without trying to find any minimum.
1621 * If it didn't work (higher energy), there must be a minimum somewhere between
1622 * the old position and the new one.
1624 * Due to the finite numerical accuracy, it turns out that it is a good idea
1625 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1626 * This leads to lower final energies in the tests I've done. / Erik
1631 c = a + stepsize; /* reference position along line is zero */
1633 /* Check stepsize first. We do not allow displacements
1634 * larger than emstep.
1644 if(maxdelta>inputrec->em_stepsize)
1646 } while(maxdelta>inputrec->em_stepsize);
1648 /* Take a trial step */
1650 xc[i] = lastx[i] + c*s[i];
1653 /* Calculate energy for the trial step */
1654 ems.s.x = (rvec *)xc;
1656 evaluate_energy(fplog,bVerbose,cr,
1657 state,top_global,&ems,top,
1658 inputrec,nrnb,wcycle,gstat,
1659 vsite,constr,fcd,graph,mdatoms,fr,
1660 mu_tot,enerd,vir,pres,step,FALSE);
1663 /* Calc derivative along line */
1664 for(gpc=0,i=0; i<n; i++) {
1665 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1667 /* Sum the gradient along the line across CPUs */
1669 gmx_sumd(1,&gpc,cr);
1671 /* This is the max amount of increase in energy we tolerate */
1672 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1674 /* Accept the step if the energy is lower, or if it is not significantly higher
1675 * and the line derivative is still negative.
1677 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1679 /* Great, we found a better energy. Increase step for next iteration
1680 * if we are still going down, decrease it otherwise
1683 stepsize *= 1.618034; /* The golden section */
1685 stepsize *= 0.618034; /* 1/golden section */
1687 /* New energy is the same or higher. We will have to do some work
1688 * to find a smaller value in the interval. Take smaller step next time!
1691 stepsize *= 0.618034;
1694 /* OK, if we didn't find a lower value we will have to locate one now - there must
1695 * be one in the interval [a=0,c].
1696 * The same thing is valid here, though: Don't spend dozens of iterations to find
1697 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1698 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1700 * I also have a safeguard for potentially really patological functions so we never
1701 * take more than 20 steps before we give up ...
1703 * If we already found a lower value we just skip this step and continue to the update.
1710 /* Select a new trial point.
1711 * If the derivatives at points a & c have different sign we interpolate to zero,
1712 * otherwise just do a bisection.
1716 b = a + gpa*(a-c)/(gpc-gpa);
1720 /* safeguard if interpolation close to machine accuracy causes errors:
1721 * never go outside the interval
1726 /* Take a trial step */
1728 xb[i] = lastx[i] + b*s[i];
1731 /* Calculate energy for the trial step */
1732 ems.s.x = (rvec *)xb;
1734 evaluate_energy(fplog,bVerbose,cr,
1735 state,top_global,&ems,top,
1736 inputrec,nrnb,wcycle,gstat,
1737 vsite,constr,fcd,graph,mdatoms,fr,
1738 mu_tot,enerd,vir,pres,step,FALSE);
1743 for(gpb=0,i=0; i<n; i++)
1744 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1746 /* Sum the gradient along the line across CPUs */
1748 gmx_sumd(1,&gpb,cr);
1750 /* Keep one of the intervals based on the value of the derivative at the new point */
1752 /* Replace c endpoint with b */
1756 /* swap coord pointers b/c */
1764 /* Replace a endpoint with b */
1768 /* swap coord pointers a/b */
1778 * Stop search as soon as we find a value smaller than the endpoints,
1779 * or if the tolerance is below machine precision.
1780 * Never run more than 20 steps, no matter what.
1783 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1785 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1786 /* OK. We couldn't find a significantly lower energy.
1787 * If ncorr==0 this was steepest descent, and then we give up.
1788 * If not, reset memory to restart as steepest descent before quitting.
1797 /* Search in gradient direction */
1800 /* Reset stepsize */
1801 stepsize = 1.0/fnorm;
1806 /* Select min energy state of A & C, put the best in xx/ff/Epot
1837 /* Update the memory information, and calculate a new
1838 * approximation of the inverse hessian
1841 /* Have new data in Epot, xx, ff */
1846 dg[point][i]=lastf[i]-ff[i];
1847 dx[point][i]*=stepsize;
1853 dgdg+=dg[point][i]*dg[point][i];
1854 dgdx+=dg[point][i]*dx[point][i];
1859 rho[point]=1.0/dgdx;
1871 /* Recursive update. First go back over the memory points */
1872 for(k=0;k<ncorr;k++) {
1881 alpha[cp]=rho[cp]*sq;
1884 p[i] -= alpha[cp]*dg[cp][i];
1890 /* And then go forward again */
1891 for(k=0;k<ncorr;k++) {
1894 yr += p[i]*dg[cp][i];
1897 beta = alpha[cp]-beta;
1900 p[i] += beta*dx[cp][i];
1909 dx[point][i] = p[i];
1915 /* Test whether the convergence criterion is met */
1916 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1918 /* Print it if necessary */
1921 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1922 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1923 /* Store the new (lower) energies */
1924 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1925 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1926 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1927 do_log = do_per_step(step,inputrec->nstlog);
1928 do_ene = do_per_step(step,inputrec->nstenergy);
1930 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1931 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1932 do_log ? fplog : NULL,step,step,eprNORMAL,
1933 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1936 /* Stop when the maximum force lies below tolerance.
1937 * If we have reached machine precision, converged is already set to true.
1940 converged = converged || (fmax < inputrec->em_tol);
1942 } /* End of the loop */
1945 step--; /* we never took that last step in this case */
1947 if(fmax>inputrec->em_tol)
1951 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1952 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1957 /* If we printed energy and/or logfile last step (which was the last step)
1958 * we don't have to do it again, but otherwise print the final values.
1960 if(!do_log) /* Write final value to log since we didn't do anythin last step */
1961 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1962 if(!do_ene || !do_log) /* Write final energy file entries */
1963 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1964 !do_log ? fplog : NULL,step,step,eprNORMAL,
1965 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1967 /* Print some stuff... */
1969 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1972 * For accurate normal mode calculation it is imperative that we
1973 * store the last conformation into the full precision binary trajectory.
1975 * However, we should only do it if we did NOT already write this step
1976 * above (which we did if do_x or do_f was true).
1978 do_x = !do_per_step(step,inputrec->nstxout);
1979 do_f = !do_per_step(step,inputrec->nstfout);
1980 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1981 top_global,inputrec,step,
1985 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1986 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1987 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1988 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1990 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1993 finish_em(fplog,cr,outf,runtime,wcycle);
1995 /* To print the actual number of steps we needed somewhere */
1996 runtime->nsteps_done = step;
1999 } /* That's all folks */
2002 double do_steep(FILE *fplog,t_commrec *cr,
2003 int nfile, const t_filenm fnm[],
2004 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2006 gmx_vsite_t *vsite,gmx_constr_t constr,
2008 t_inputrec *inputrec,
2009 gmx_mtop_t *top_global,t_fcdata *fcd,
2010 t_state *state_global,
2012 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2015 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2016 gmx_membed_t membed,
2017 real cpt_period,real max_hours,
2018 const char *deviceOptions,
2019 unsigned long Flags,
2020 gmx_runtime_t *runtime)
2022 const char *SD="Steepest Descents";
2023 em_state_t *s_min,*s_try;
2025 gmx_localtop_t *top;
2026 gmx_enerdata_t *enerd;
2028 gmx_global_stat_t gstat;
2030 real stepsize,constepsize;
2031 real ustep,dvdlambda,fnormn;
2034 gmx_bool bDone,bAbort,do_x,do_f;
2039 int steps_accepted=0;
2043 s_min = init_em_state();
2044 s_try = init_em_state();
2046 /* Init em and store the local state in s_try */
2047 init_em(fplog,SD,cr,inputrec,
2048 state_global,top_global,s_try,&top,&f,&f_global,
2049 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2050 nfile,fnm,&outf,&mdebin);
2052 /* Print to log file */
2053 print_em_start(fplog,cr,runtime,wcycle,SD);
2055 /* Set variables for stepsize (in nm). This is the largest
2056 * step that we are going to make in any direction.
2058 ustep = inputrec->em_stepsize;
2061 /* Max number of steps */
2062 nsteps = inputrec->nsteps;
2065 /* Print to the screen */
2066 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2068 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2070 /**** HERE STARTS THE LOOP ****
2071 * count is the counter for the number of steps
2072 * bDone will be TRUE when the minimization has converged
2073 * bAbort will be TRUE when nsteps steps have been performed or when
2074 * the stepsize becomes smaller than is reasonable for machine precision
2079 while( !bDone && !bAbort ) {
2080 bAbort = (nsteps >= 0) && (count == nsteps);
2082 /* set new coordinates, except for first step */
2084 do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2085 constr,top,nrnb,wcycle,count);
2088 evaluate_energy(fplog,bVerbose,cr,
2089 state_global,top_global,s_try,top,
2090 inputrec,nrnb,wcycle,gstat,
2091 vsite,constr,fcd,graph,mdatoms,fr,
2092 mu_tot,enerd,vir,pres,count,count==0);
2095 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2098 s_min->epot = s_try->epot + 1;
2100 /* Print it if necessary */
2103 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2104 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2105 (s_try->epot < s_min->epot) ? '\n' : '\r');
2108 if (s_try->epot < s_min->epot) {
2109 /* Store the new (lower) energies */
2110 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2111 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2112 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2113 print_ebin(outf->fp_ene,TRUE,
2114 do_per_step(steps_accepted,inputrec->nstdisreout),
2115 do_per_step(steps_accepted,inputrec->nstorireout),
2116 fplog,count,count,eprNORMAL,TRUE,
2117 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2122 /* Now if the new energy is smaller than the previous...
2123 * or if this is the first step!
2124 * or if we did random steps!
2127 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2130 /* Test whether the convergence criterion is met... */
2131 bDone = (s_try->fmax < inputrec->em_tol);
2133 /* Copy the arrays for force, positions and energy */
2134 /* The 'Min' array always holds the coords and forces of the minimal
2136 swap_em_state(s_min,s_try);
2140 /* Write to trn, if necessary */
2141 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2142 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2143 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2144 top_global,inputrec,count,
2145 s_min,state_global,f_global);
2148 /* If energy is not smaller make the step smaller... */
2151 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2152 /* Reload the old state */
2153 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2154 s_min,top,mdatoms,fr,vsite,constr,
2159 /* Determine new step */
2160 stepsize = ustep/s_min->fmax;
2162 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2164 if (count == nsteps || ustep < 1e-12)
2166 if (count == nsteps || ustep < 1e-6)
2171 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2172 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2178 } /* End of the loop */
2180 /* Print some shit... */
2182 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2183 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2184 top_global,inputrec,count,
2185 s_min,state_global,f_global);
2187 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2190 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2191 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2192 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2193 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2196 finish_em(fplog,cr,outf,runtime,wcycle);
2198 /* To print the actual number of steps we needed somewhere */
2199 inputrec->nsteps=count;
2201 runtime->nsteps_done = count;
2204 } /* That's all folks */
2207 double do_nm(FILE *fplog,t_commrec *cr,
2208 int nfile,const t_filenm fnm[],
2209 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2211 gmx_vsite_t *vsite,gmx_constr_t constr,
2213 t_inputrec *inputrec,
2214 gmx_mtop_t *top_global,t_fcdata *fcd,
2215 t_state *state_global,
2217 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2220 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2221 gmx_membed_t membed,
2222 real cpt_period,real max_hours,
2223 const char *deviceOptions,
2224 unsigned long Flags,
2225 gmx_runtime_t *runtime)
2227 const char *NM = "Normal Mode Analysis";
2232 gmx_localtop_t *top;
2233 gmx_enerdata_t *enerd;
2235 gmx_global_stat_t gstat;
2237 real t,t0,lambda,lam0;
2242 gmx_bool bSparse; /* use sparse matrix storage format */
2244 gmx_sparsematrix_t * sparse_matrix = NULL;
2245 real * full_matrix = NULL;
2246 em_state_t * state_work;
2248 /* added with respect to mdrun */
2250 real der_range=10.0*sqrt(GMX_REAL_EPS);
2256 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2259 state_work = init_em_state();
2261 /* Init em and store the local state in state_minimum */
2262 init_em(fplog,NM,cr,inputrec,
2263 state_global,top_global,state_work,&top,
2265 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2266 nfile,fnm,&outf,NULL);
2268 natoms = top_global->natoms;
2276 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2277 " which MIGHT not be accurate enough for normal mode analysis.\n"
2278 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2279 " are fairly modest even if you recompile in double precision.\n\n");
2283 /* Check if we can/should use sparse storage format.
2285 * Sparse format is only useful when the Hessian itself is sparse, which it
2286 * will be when we use a cutoff.
2287 * For small systems (n<1000) it is easier to always use full matrix format, though.
2289 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2291 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2294 else if(top_global->natoms < 1000)
2296 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2301 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2305 sz = DIM*top_global->natoms;
2307 fprintf(stderr,"Allocating Hessian memory...\n\n");
2311 sparse_matrix=gmx_sparsematrix_init(sz);
2312 sparse_matrix->compressed_symmetric = TRUE;
2316 snew(full_matrix,sz*sz);
2319 /* Initial values */
2320 t0 = inputrec->init_t;
2321 lam0 = inputrec->fepvals->init_lambda;
2329 /* Write start time and temperature */
2330 print_em_start(fplog,cr,runtime,wcycle,NM);
2332 /* fudge nr of steps to nr of atoms */
2333 inputrec->nsteps = natoms*2;
2337 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2338 *(top_global->name),(int)inputrec->nsteps);
2341 nnodes = cr->nnodes;
2343 /* Make evaluate_energy do a single node force calculation */
2345 evaluate_energy(fplog,bVerbose,cr,
2346 state_global,top_global,state_work,top,
2347 inputrec,nrnb,wcycle,gstat,
2348 vsite,constr,fcd,graph,mdatoms,fr,
2349 mu_tot,enerd,vir,pres,-1,TRUE);
2350 cr->nnodes = nnodes;
2352 /* if forces are not small, warn user */
2353 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2357 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2358 if (state_work->fmax > 1.0e-3)
2360 fprintf(stderr,"Maximum force probably not small enough to");
2361 fprintf(stderr," ensure that you are in an \nenergy well. ");
2362 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2363 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2367 /***********************************************************
2369 * Loop over all pairs in matrix
2371 * do_force called twice. Once with positive and
2372 * once with negative displacement
2374 ************************************************************/
2376 /* Steps are divided one by one over the nodes */
2377 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2380 for (d=0; d<DIM; d++)
2382 x_min = state_work->s.x[atom][d];
2384 state_work->s.x[atom][d] = x_min - der_range;
2386 /* Make evaluate_energy do a single node force calculation */
2388 evaluate_energy(fplog,bVerbose,cr,
2389 state_global,top_global,state_work,top,
2390 inputrec,nrnb,wcycle,gstat,
2391 vsite,constr,fcd,graph,mdatoms,fr,
2392 mu_tot,enerd,vir,pres,atom*2,FALSE);
2394 for(i=0; i<natoms; i++)
2396 copy_rvec(state_work->f[i], fneg[i]);
2399 state_work->s.x[atom][d] = x_min + der_range;
2401 evaluate_energy(fplog,bVerbose,cr,
2402 state_global,top_global,state_work,top,
2403 inputrec,nrnb,wcycle,gstat,
2404 vsite,constr,fcd,graph,mdatoms,fr,
2405 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2406 cr->nnodes = nnodes;
2408 /* x is restored to original */
2409 state_work->s.x[atom][d] = x_min;
2411 for(j=0; j<natoms; j++)
2413 for (k=0; (k<DIM); k++)
2416 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2424 #define mpi_type MPI_DOUBLE
2426 #define mpi_type MPI_FLOAT
2428 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2429 cr->mpi_comm_mygroup);
2434 for(node=0; (node<nnodes && atom+node<natoms); node++)
2440 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2441 cr->mpi_comm_mygroup,&stat);
2446 row = (atom + node)*DIM + d;
2448 for(j=0; j<natoms; j++)
2450 for(k=0; k<DIM; k++)
2456 if (col >= row && dfdx[j][k] != 0.0)
2458 gmx_sparsematrix_increment_value(sparse_matrix,
2459 row,col,dfdx[j][k]);
2464 full_matrix[row*sz+col] = dfdx[j][k];
2471 if (bVerbose && fplog)
2476 /* write progress */
2477 if (MASTER(cr) && bVerbose)
2479 fprintf(stderr,"\rFinished step %d out of %d",
2480 min(atom+nnodes,natoms),natoms);
2487 fprintf(stderr,"\n\nWriting Hessian...\n");
2488 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2491 finish_em(fplog,cr,outf,runtime,wcycle);
2493 runtime->nsteps_done = natoms*2;