1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
70 #include "sparsematrix.h"
74 #include "gmx_wallcycle.h"
75 #include "mtop_util.h"
78 #include "gmx_membed.h"
89 static em_state_t *init_em_state()
98 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
99 gmx_wallcycle_t wcycle,
104 runtime_start(runtime);
106 sprintf(buf,"Started %s",name);
107 print_date_and_time(fplog,cr->nodeid,buf,NULL);
109 wallcycle_start(wcycle,ewcRUN);
111 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
112 gmx_wallcycle_t wcycle)
114 wallcycle_stop(wcycle,ewcRUN);
116 runtime_end(runtime);
119 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
122 fprintf(out,"%s:\n",minimizer);
123 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
124 fprintf(out," Number of steps = %12d\n",nsteps);
127 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
131 fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
135 fprintf(fp,"\nStepsize too small, or no change in energy.\n"
136 "Converged to machine precision,\n"
137 "but not to the requested precision Fmax < %g\n",
139 if (sizeof(real)<sizeof(double))
141 fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
145 fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
146 "off constraints alltogether (set constraints = none in mdp file)\n");
153 static void print_converged(FILE *fp,const char *alg,real ftol,
154 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
155 real epot,real fmax, int nfmax, real fnorm)
157 char buf[STEPSTRSIZE];
160 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
161 alg,ftol,gmx_step_str(count,buf));
162 else if(count<nsteps)
163 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
164 "but did not reach the requested Fmax < %g.\n",
165 alg,gmx_step_str(count,buf),ftol);
167 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
168 alg,ftol,gmx_step_str(count,buf));
171 fprintf(fp,"Potential Energy = %21.14e\n",epot);
172 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
173 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
175 fprintf(fp,"Potential Energy = %14.7e\n",epot);
176 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
177 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
181 static void get_f_norm_max(t_commrec *cr,
182 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
183 real *fnorm,real *fmax,int *a_fmax)
186 real fmax2,fmax2_0,fam;
187 int la_max,a_max,start,end,i,m,gf;
189 /* This routine finds the largest force and returns it.
190 * On parallel machines the global max is taken.
196 start = mdatoms->start;
197 end = mdatoms->homenr + start;
198 if (mdatoms->cFREEZE) {
199 for(i=start; i<end; i++) {
200 gf = mdatoms->cFREEZE[i];
203 if (!opts->nFreeze[gf][m])
212 for(i=start; i<end; i++) {
222 if (la_max >= 0 && DOMAINDECOMP(cr)) {
223 a_max = cr->dd->gatindex[la_max];
228 snew(sum,2*cr->nnodes+1);
229 sum[2*cr->nodeid] = fmax2;
230 sum[2*cr->nodeid+1] = a_max;
231 sum[2*cr->nnodes] = fnorm2;
232 gmx_sumd(2*cr->nnodes+1,sum,cr);
233 fnorm2 = sum[2*cr->nnodes];
234 /* Determine the global maximum */
235 for(i=0; i<cr->nnodes; i++) {
236 if (sum[2*i] > fmax2) {
238 a_max = (int)(sum[2*i+1] + 0.5);
245 *fnorm = sqrt(fnorm2);
252 static void get_state_f_norm_max(t_commrec *cr,
253 t_grpopts *opts,t_mdatoms *mdatoms,
256 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
259 void init_em(FILE *fplog,const char *title,
260 t_commrec *cr,t_inputrec *ir,
261 t_state *state_global,gmx_mtop_t *top_global,
262 em_state_t *ems,gmx_localtop_t **top,
263 rvec **f,rvec **f_global,
264 t_nrnb *nrnb,rvec mu_tot,
265 t_forcerec *fr,gmx_enerdata_t **enerd,
266 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
267 gmx_vsite_t *vsite,gmx_constr_t constr,
268 int nfile,const t_filenm fnm[],
269 gmx_mdoutf_t **outf,t_mdebin **mdebin)
276 fprintf(fplog,"Initiating %s\n",title);
279 state_global->ngtc = 0;
281 /* Initiate some variables */
282 if (ir->efep != efepNO)
284 state_global->lambda = ir->init_lambda;
288 state_global->lambda = 0.0;
293 if (DOMAINDECOMP(cr))
295 *top = dd_init_local_top(top_global);
297 dd_init_local_state(cr->dd,state_global,&ems->s);
301 /* Distribute the charge groups over the nodes from the master node */
302 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
303 state_global,top_global,ir,
304 &ems->s,&ems->f,mdatoms,*top,
305 fr,vsite,NULL,constr,
307 dd_store_state(cr->dd,&ems->s);
311 snew(*f_global,top_global->natoms);
321 snew(*f,top_global->natoms);
323 /* Just copy the state */
324 ems->s = *state_global;
325 snew(ems->s.x,ems->s.nalloc);
326 snew(ems->f,ems->s.nalloc);
327 for(i=0; i<state_global->natoms; i++)
329 copy_rvec(state_global->x[i],ems->s.x[i]);
331 copy_mat(state_global->box,ems->s.box);
333 if (PAR(cr) && ir->eI != eiNM)
335 /* Initialize the particle decomposition and split the topology */
336 *top = split_system(fplog,top_global,ir,cr);
338 pd_cg_range(cr,&fr->cg0,&fr->hcg);
342 *top = gmx_mtop_generate_local_top(top_global,ir);
346 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
348 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
357 pd_at_range(cr,&start,&homenr);
363 homenr = top_global->natoms;
365 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
366 update_mdatoms(mdatoms,state_global->lambda);
370 set_vsite_top(vsite,*top,mdatoms,cr);
376 if (ir->eConstrAlg == econtSHAKE &&
377 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
379 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
380 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
383 if (!DOMAINDECOMP(cr))
385 set_constraints(constr,*top,ir,mdatoms,cr);
388 if (!ir->bContinuation)
390 /* Constrain the starting coordinates */
392 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
393 ir,NULL,cr,-1,0,mdatoms,
394 ems->s.x,ems->s.x,NULL,ems->s.box,
395 ems->s.lambda,&dvdlambda,
396 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
402 *gstat = global_stat_init(ir);
405 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
408 init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
412 /* Init bin for energy stuff */
413 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
417 calc_shifts(ems->s.box,fr->shift_vec);
420 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
421 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
423 if (!(cr->duty & DUTY_PME)) {
424 /* Tell the PME only node to finish */
430 em_time_end(fplog,cr,runtime,wcycle);
433 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
442 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
446 for(i=0; (i<state->natoms); i++)
447 copy_rvec(ems->s.x[i],state->x[i]);
449 copy_rvec(ems->f[i],f[i]);
452 static void write_em_traj(FILE *fplog,t_commrec *cr,
454 gmx_bool bX,gmx_bool bF,const char *confout,
455 gmx_mtop_t *top_global,
456 t_inputrec *ir,gmx_large_int_t step,
458 t_state *state_global,rvec *f_global)
462 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
465 copy_em_coords_back(state,state_global,bF ? f_global : NULL);
469 if (bX) { mdof_flags |= MDOF_X; }
470 if (bF) { mdof_flags |= MDOF_F; }
471 write_traj(fplog,cr,outf,mdof_flags,
472 top_global,step,(double)step,
473 &state->s,state_global,state->f,f_global,NULL,NULL);
475 if (confout != NULL && MASTER(cr))
477 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
479 /* Make molecules whole only for confout writing */
480 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
484 write_sto_conf_mtop(confout,
485 *top_global->name,top_global,
486 state_global->x,NULL,ir->ePBC,state_global->box);
490 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
491 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
492 gmx_constr_t constr,gmx_localtop_t *top,
493 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
494 gmx_large_int_t count)
498 int start,end,gf,i,m;
505 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
506 gmx_incons("state mismatch in do_em_step");
508 s2->flags = s1->flags;
510 if (s2->nalloc != s1->nalloc) {
511 s2->nalloc = s1->nalloc;
512 srenew(s2->x,s1->nalloc);
513 srenew(ems2->f, s1->nalloc);
514 if (s2->flags & (1<<estCGP))
515 srenew(s2->cg_p, s1->nalloc);
518 s2->natoms = s1->natoms;
519 s2->lambda = s1->lambda;
520 copy_mat(s1->box,s2->box);
523 end = md->start + md->homenr;
528 for(i=start; i<end; i++) {
531 for(m=0; m<DIM; m++) {
532 if (ir->opts.nFreeze[gf][m])
535 x2[i][m] = x1[i][m] + a*f[i][m];
539 if (s2->flags & (1<<estCGP)) {
540 /* Copy the CG p vector */
543 for(i=start; i<end; i++)
544 copy_rvec(x1[i],x2[i]);
547 if (DOMAINDECOMP(cr)) {
548 s2->ddp_count = s1->ddp_count;
549 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
550 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
551 srenew(s2->cg_gl,s2->cg_gl_nalloc);
553 s2->ncg_gl = s1->ncg_gl;
554 for(i=0; i<s2->ncg_gl; i++)
555 s2->cg_gl[i] = s1->cg_gl[i];
556 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
560 wallcycle_start(wcycle,ewcCONSTR);
562 constrain(NULL,TRUE,TRUE,constr,&top->idef,
563 ir,NULL,cr,count,0,md,
564 s1->x,s2->x,NULL,s2->box,s2->lambda,
565 &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
566 wallcycle_stop(wcycle,ewcCONSTR);
570 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
575 if (DOMAINDECOMP(cr)) {
577 end = cr->dd->nat_home;
578 } else if (PARTDECOMP(cr)) {
579 pd_at_range(cr,&start,&end);
585 for(i=start; i<end; i++) {
586 for(m=0; m<DIM; m++) {
587 x2[i][m] = x1[i][m] + a*f[i][m];
592 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
597 if (DOMAINDECOMP(cr)) {
599 end = cr->dd->nat_home;
600 } else if (PARTDECOMP(cr)) {
601 pd_at_range(cr,&start,&end);
607 for(i=start; i<end; i++) {
608 for(m=0; m<DIM; m++) {
609 f[i][m] = (x1[i][m] - x2[i][m])*a;
614 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
615 gmx_mtop_t *top_global,t_inputrec *ir,
616 em_state_t *ems,gmx_localtop_t *top,
617 t_mdatoms *mdatoms,t_forcerec *fr,
618 gmx_vsite_t *vsite,gmx_constr_t constr,
619 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
621 /* Repartition the domain decomposition */
622 wallcycle_start(wcycle,ewcDOMDEC);
623 dd_partition_system(fplog,step,cr,FALSE,1,
626 mdatoms,top,fr,vsite,NULL,constr,
628 dd_store_state(cr->dd,&ems->s);
629 wallcycle_stop(wcycle,ewcDOMDEC);
632 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
633 t_state *state_global,gmx_mtop_t *top_global,
634 em_state_t *ems,gmx_localtop_t *top,
635 t_inputrec *inputrec,
636 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
637 gmx_global_stat_t gstat,
638 gmx_vsite_t *vsite,gmx_constr_t constr,
640 t_graph *graph,t_mdatoms *mdatoms,
641 t_forcerec *fr,rvec mu_tot,
642 gmx_enerdata_t *enerd,tensor vir,tensor pres,
643 gmx_large_int_t count,gmx_bool bFirst)
648 tensor force_vir,shake_vir,ekin;
649 real dvdl,prescorr,enercorr,dvdlcorr;
652 /* Set the time to the initial time, the time does not change during EM */
653 t = inputrec->init_t;
656 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
657 /* This the first state or an old state used before the last ns */
661 if (inputrec->nstlist > 0) {
663 } else if (inputrec->nstlist == -1) {
664 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
666 gmx_sumi(1,&nabnsb,cr);
672 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
673 top->idef.iparams,top->idef.il,
674 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
676 if (DOMAINDECOMP(cr)) {
678 /* Repartition the domain decomposition */
679 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
680 ems,top,mdatoms,fr,vsite,constr,
685 /* Calc force & energy on new trial position */
686 /* do_force always puts the charge groups in the box and shifts again
687 * We do not unshift, so molecules are always whole in congrad.c
689 do_force(fplog,cr,inputrec,
690 count,nrnb,wcycle,top,top_global,&top_global->groups,
691 ems->s.box,ems->s.x,&ems->s.hist,
692 ems->f,force_vir,mdatoms,enerd,fcd,
693 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
694 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
695 (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
697 /* Clear the unused shake virial and pressure */
698 clear_mat(shake_vir);
701 /* Calculate long range corrections to pressure and energy */
702 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
703 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
704 /* don't think these next 4 lines can be moved in for now, because we
705 don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
706 enerd->term[F_DISPCORR] = enercorr;
707 enerd->term[F_EPOT] += enercorr;
708 enerd->term[F_PRES] += prescorr;
709 enerd->term[F_DVDL] += dvdlcorr;
711 /* Communicate stuff when parallel */
712 if (PAR(cr) && inputrec->eI != eiNM)
714 wallcycle_start(wcycle,ewcMoveE);
716 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
717 inputrec,NULL,NULL,NULL,1,&terminate,
718 top_global,&ems->s,FALSE,
724 wallcycle_stop(wcycle,ewcMoveE);
727 ems->epot = enerd->term[F_EPOT];
730 /* Project out the constraint components of the force */
731 wallcycle_start(wcycle,ewcCONSTR);
733 constrain(NULL,FALSE,FALSE,constr,&top->idef,
734 inputrec,NULL,cr,count,0,mdatoms,
735 ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
736 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
737 if (fr->bSepDVDL && fplog)
738 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
739 enerd->term[F_DHDL_CON] += dvdl;
740 m_add(force_vir,shake_vir,vir);
741 wallcycle_stop(wcycle,ewcCONSTR);
743 copy_mat(force_vir,vir);
747 enerd->term[F_PRES] =
748 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
749 (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
751 sum_dhdl(enerd,ems->s.lambda,inputrec);
753 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
755 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
759 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
761 em_state_t *s_min,em_state_t *s_b)
765 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
767 unsigned char *grpnrFREEZE;
770 fprintf(debug,"Doing reorder_partsum\n");
775 cgs_gl = dd_charge_groups_global(cr->dd);
776 index = cgs_gl->index;
778 /* Collect fm in a global vector fmg.
779 * This conflicts with the spirit of domain decomposition,
780 * but to fully optimize this a much more complicated algorithm is required.
782 snew(fmg,mtop->natoms);
784 ncg = s_min->s.ncg_gl;
785 cg_gl = s_min->s.cg_gl;
787 for(c=0; c<ncg; c++) {
791 for(a=a0; a<a1; a++) {
792 copy_rvec(fm[i],fmg[a]);
796 gmx_sum(mtop->natoms*3,fmg[0],cr);
798 /* Now we will determine the part of the sum for the cgs in state s_b */
800 cg_gl = s_b->s.cg_gl;
804 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
805 for(c=0; c<ncg; c++) {
809 for(a=a0; a<a1; a++) {
810 if (mdatoms->cFREEZE && grpnrFREEZE) {
813 for(m=0; m<DIM; m++) {
814 if (!opts->nFreeze[gf][m]) {
815 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
827 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
829 em_state_t *s_min,em_state_t *s_b)
835 /* This is just the classical Polak-Ribiere calculation of beta;
836 * it looks a bit complicated since we take freeze groups into account,
837 * and might have to sum it in parallel runs.
840 if (!DOMAINDECOMP(cr) ||
841 (s_min->s.ddp_count == cr->dd->ddp_count &&
842 s_b->s.ddp_count == cr->dd->ddp_count)) {
847 /* This part of code can be incorrect with DD,
848 * since the atom ordering in s_b and s_min might differ.
850 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
851 if (mdatoms->cFREEZE)
852 gf = mdatoms->cFREEZE[i];
854 if (!opts->nFreeze[gf][m]) {
855 sum += (fb[i][m] - fm[i][m])*fb[i][m];
859 /* We need to reorder cgs while summing */
860 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
865 return sum/sqr(s_min->fnorm);
868 double do_cg(FILE *fplog,t_commrec *cr,
869 int nfile,const t_filenm fnm[],
870 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
872 gmx_vsite_t *vsite,gmx_constr_t constr,
874 t_inputrec *inputrec,
875 gmx_mtop_t *top_global,t_fcdata *fcd,
876 t_state *state_global,
878 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
881 int repl_ex_nst,int repl_ex_seed,
882 gmx_membed_t *membed,
883 real cpt_period,real max_hours,
884 const char *deviceOptions,
886 gmx_runtime_t *runtime)
888 const char *CG="Polak-Ribiere Conjugate Gradients";
890 em_state_t *s_min,*s_a,*s_b,*s_c;
892 gmx_enerdata_t *enerd;
894 gmx_global_stat_t gstat;
896 rvec *f_global,*p,*sf,*sfm;
897 double gpa,gpb,gpc,tmp,sum[2],minstep;
904 gmx_bool converged,foundlower;
906 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
908 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
910 int i,m,gf,step,nminstep;
915 s_min = init_em_state();
916 s_a = init_em_state();
917 s_b = init_em_state();
918 s_c = init_em_state();
920 /* Init em and store the local state in s_min */
921 init_em(fplog,CG,cr,inputrec,
922 state_global,top_global,s_min,&top,&f,&f_global,
923 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
924 nfile,fnm,&outf,&mdebin);
926 /* Print to log file */
927 print_em_start(fplog,cr,runtime,wcycle,CG);
929 /* Max number of steps */
930 number_steps=inputrec->nsteps;
933 sp_header(stderr,CG,inputrec->em_tol,number_steps);
935 sp_header(fplog,CG,inputrec->em_tol,number_steps);
937 /* Call the force routine and some auxiliary (neighboursearching etc.) */
938 /* do_force always puts the charge groups in the box and shifts again
939 * We do not unshift, so molecules are always whole in congrad.c
941 evaluate_energy(fplog,bVerbose,cr,
942 state_global,top_global,s_min,top,
943 inputrec,nrnb,wcycle,gstat,
944 vsite,constr,fcd,graph,mdatoms,fr,
945 mu_tot,enerd,vir,pres,-1,TRUE);
949 /* Copy stuff to the energy bin for easy printing etc. */
950 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
951 mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
952 NULL,NULL,vir,pres,NULL,mu_tot,constr);
954 print_ebin_header(fplog,step,step,s_min->s.lambda);
955 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
956 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
960 /* Estimate/guess the initial stepsize */
961 stepsize = inputrec->em_stepsize/s_min->fnorm;
964 fprintf(stderr," F-max = %12.5e on atom %d\n",
965 s_min->fmax,s_min->a_fmax+1);
966 fprintf(stderr," F-Norm = %12.5e\n",
967 s_min->fnorm/sqrt(state_global->natoms));
968 fprintf(stderr,"\n");
969 /* and copy to the log file too... */
970 fprintf(fplog," F-max = %12.5e on atom %d\n",
971 s_min->fmax,s_min->a_fmax+1);
972 fprintf(fplog," F-Norm = %12.5e\n",
973 s_min->fnorm/sqrt(state_global->natoms));
976 /* Start the loop over CG steps.
977 * Each successful step is counted, and we continue until
978 * we either converge or reach the max number of steps.
981 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
983 /* start taking steps in a new direction
984 * First time we enter the routine, beta=0, and the direction is
985 * simply the negative gradient.
988 /* Calculate the new direction in p, and the gradient in this direction, gpa */
993 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
994 if (mdatoms->cFREEZE)
995 gf = mdatoms->cFREEZE[i];
996 for(m=0; m<DIM; m++) {
997 if (!inputrec->opts.nFreeze[gf][m]) {
998 p[i][m] = sf[i][m] + beta*p[i][m];
999 gpa -= p[i][m]*sf[i][m];
1000 /* f is negative gradient, thus the sign */
1007 /* Sum the gradient along the line across CPUs */
1009 gmx_sumd(1,&gpa,cr);
1011 /* Calculate the norm of the search vector */
1012 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1014 /* Just in case stepsize reaches zero due to numerical precision... */
1016 stepsize = inputrec->em_stepsize/pnorm;
1019 * Double check the value of the derivative in the search direction.
1020 * If it is positive it must be due to the old information in the
1021 * CG formula, so just remove that and start over with beta=0.
1022 * This corresponds to a steepest descent step.
1026 step--; /* Don't count this step since we are restarting */
1027 continue; /* Go back to the beginning of the big for-loop */
1030 /* Calculate minimum allowed stepsize, before the average (norm)
1031 * relative change in coordinate is smaller than precision
1034 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1035 for(m=0; m<DIM; m++) {
1036 tmp = fabs(s_min->s.x[i][m]);
1043 /* Add up from all CPUs */
1045 gmx_sumd(1,&minstep,cr);
1047 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1049 if(stepsize<minstep) {
1054 /* Write coordinates if necessary */
1055 do_x = do_per_step(step,inputrec->nstxout);
1056 do_f = do_per_step(step,inputrec->nstfout);
1058 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1059 top_global,inputrec,step,
1060 s_min,state_global,f_global);
1062 /* Take a step downhill.
1063 * In theory, we should minimize the function along this direction.
1064 * That is quite possible, but it turns out to take 5-10 function evaluations
1065 * for each line. However, we dont really need to find the exact minimum -
1066 * it is much better to start a new CG step in a modified direction as soon
1067 * as we are close to it. This will save a lot of energy evaluations.
1069 * In practice, we just try to take a single step.
1070 * If it worked (i.e. lowered the energy), we increase the stepsize but
1071 * the continue straight to the next CG step without trying to find any minimum.
1072 * If it didn't work (higher energy), there must be a minimum somewhere between
1073 * the old position and the new one.
1075 * Due to the finite numerical accuracy, it turns out that it is a good idea
1076 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1077 * This leads to lower final energies in the tests I've done. / Erik
1079 s_a->epot = s_min->epot;
1081 c = a + stepsize; /* reference position along line is zero */
1083 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1084 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1085 s_min,top,mdatoms,fr,vsite,constr,
1089 /* Take a trial step (new coords in s_c) */
1090 do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1091 constr,top,nrnb,wcycle,-1);
1094 /* Calculate energy for the trial step */
1095 evaluate_energy(fplog,bVerbose,cr,
1096 state_global,top_global,s_c,top,
1097 inputrec,nrnb,wcycle,gstat,
1098 vsite,constr,fcd,graph,mdatoms,fr,
1099 mu_tot,enerd,vir,pres,-1,FALSE);
1101 /* Calc derivative along line */
1105 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1106 for(m=0; m<DIM; m++)
1107 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1109 /* Sum the gradient along the line across CPUs */
1111 gmx_sumd(1,&gpc,cr);
1113 /* This is the max amount of increase in energy we tolerate */
1114 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1116 /* Accept the step if the energy is lower, or if it is not significantly higher
1117 * and the line derivative is still negative.
1119 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1121 /* Great, we found a better energy. Increase step for next iteration
1122 * if we are still going down, decrease it otherwise
1125 stepsize *= 1.618034; /* The golden section */
1127 stepsize *= 0.618034; /* 1/golden section */
1129 /* New energy is the same or higher. We will have to do some work
1130 * to find a smaller value in the interval. Take smaller step next time!
1133 stepsize *= 0.618034;
1139 /* OK, if we didn't find a lower value we will have to locate one now - there must
1140 * be one in the interval [a=0,c].
1141 * The same thing is valid here, though: Don't spend dozens of iterations to find
1142 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1143 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1145 * I also have a safeguard for potentially really patological functions so we never
1146 * take more than 20 steps before we give up ...
1148 * If we already found a lower value we just skip this step and continue to the update.
1154 /* Select a new trial point.
1155 * If the derivatives at points a & c have different sign we interpolate to zero,
1156 * otherwise just do a bisection.
1159 b = a + gpa*(a-c)/(gpc-gpa);
1163 /* safeguard if interpolation close to machine accuracy causes errors:
1164 * never go outside the interval
1169 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1170 /* Reload the old state */
1171 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1172 s_min,top,mdatoms,fr,vsite,constr,
1176 /* Take a trial step to this new point - new coords in s_b */
1177 do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1178 constr,top,nrnb,wcycle,-1);
1181 /* Calculate energy for the trial step */
1182 evaluate_energy(fplog,bVerbose,cr,
1183 state_global,top_global,s_b,top,
1184 inputrec,nrnb,wcycle,gstat,
1185 vsite,constr,fcd,graph,mdatoms,fr,
1186 mu_tot,enerd,vir,pres,-1,FALSE);
1188 /* p does not change within a step, but since the domain decomposition
1189 * might change, we have to use cg_p of s_b here.
1194 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1195 for(m=0; m<DIM; m++)
1196 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1198 /* Sum the gradient along the line across CPUs */
1200 gmx_sumd(1,&gpb,cr);
1203 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1204 s_a->epot,s_b->epot,s_c->epot,gpb);
1206 epot_repl = s_b->epot;
1208 /* Keep one of the intervals based on the value of the derivative at the new point */
1210 /* Replace c endpoint with b */
1211 swap_em_state(s_b,s_c);
1215 /* Replace a endpoint with b */
1216 swap_em_state(s_b,s_a);
1222 * Stop search as soon as we find a value smaller than the endpoints.
1223 * Never run more than 20 steps, no matter what.
1226 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1229 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1231 /* OK. We couldn't find a significantly lower energy.
1232 * If beta==0 this was steepest descent, and then we give up.
1233 * If not, set beta=0 and restart with steepest descent before quitting.
1240 /* Reset memory before giving up */
1246 /* Select min energy state of A & C, put the best in B.
1248 if (s_c->epot < s_a->epot) {
1250 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1251 s_c->epot,s_a->epot);
1252 swap_em_state(s_b,s_c);
1257 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1258 s_a->epot,s_c->epot);
1259 swap_em_state(s_b,s_a);
1266 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1268 swap_em_state(s_b,s_c);
1273 /* new search direction */
1274 /* beta = 0 means forget all memory and restart with steepest descents. */
1275 if (nstcg && ((step % nstcg)==0))
1278 /* s_min->fnorm cannot be zero, because then we would have converged
1282 /* Polak-Ribiere update.
1283 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1285 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1287 /* Limit beta to prevent oscillations */
1288 if (fabs(beta) > 5.0)
1292 /* update positions */
1293 swap_em_state(s_min,s_b);
1296 /* Print it if necessary */
1299 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1300 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1301 s_min->fmax,s_min->a_fmax+1);
1302 /* Store the new (lower) energies */
1303 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1304 mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1305 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1306 do_log = do_per_step(step,inputrec->nstlog);
1307 do_ene = do_per_step(step,inputrec->nstenergy);
1309 print_ebin_header(fplog,step,step,s_min->s.lambda);
1310 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1311 do_log ? fplog : NULL,step,step,eprNORMAL,
1312 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1315 /* Stop when the maximum force lies below tolerance.
1316 * If we have reached machine precision, converged is already set to true.
1318 converged = converged || (s_min->fmax < inputrec->em_tol);
1320 } /* End of the loop */
1323 step--; /* we never took that last step in this case */
1325 if (s_min->fmax > inputrec->em_tol)
1329 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1330 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1336 /* If we printed energy and/or logfile last step (which was the last step)
1337 * we don't have to do it again, but otherwise print the final values.
1340 /* Write final value to log since we didn't do anything the last step */
1341 print_ebin_header(fplog,step,step,s_min->s.lambda);
1343 if (!do_ene || !do_log) {
1344 /* Write final energy file entries */
1345 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1346 !do_log ? fplog : NULL,step,step,eprNORMAL,
1347 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1351 /* Print some stuff... */
1353 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1356 * For accurate normal mode calculation it is imperative that we
1357 * store the last conformation into the full precision binary trajectory.
1359 * However, we should only do it if we did NOT already write this step
1360 * above (which we did if do_x or do_f was true).
1362 do_x = !do_per_step(step,inputrec->nstxout);
1363 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1365 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1366 top_global,inputrec,step,
1367 s_min,state_global,f_global);
1369 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1372 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1373 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1374 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1375 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1377 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1380 finish_em(fplog,cr,outf,runtime,wcycle);
1382 /* To print the actual number of steps we needed somewhere */
1383 runtime->nsteps_done = step;
1386 } /* That's all folks */
1389 double do_lbfgs(FILE *fplog,t_commrec *cr,
1390 int nfile,const t_filenm fnm[],
1391 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1393 gmx_vsite_t *vsite,gmx_constr_t constr,
1395 t_inputrec *inputrec,
1396 gmx_mtop_t *top_global,t_fcdata *fcd,
1399 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1402 int repl_ex_nst,int repl_ex_seed,
1403 gmx_membed_t *membed,
1404 real cpt_period,real max_hours,
1405 const char *deviceOptions,
1406 unsigned long Flags,
1407 gmx_runtime_t *runtime)
1409 static const char *LBFGS="Low-Memory BFGS Minimizer";
1411 gmx_localtop_t *top;
1412 gmx_enerdata_t *enerd;
1414 gmx_global_stat_t gstat;
1417 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1418 double stepsize,gpa,gpb,gpc,tmp,minstep;
1419 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1420 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1421 real a,b,c,maxdelta,delta;
1422 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1423 real dgdx,dgdg,sq,yr,beta;
1425 gmx_bool converged,first;
1428 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1430 int start,end,number_steps;
1432 int i,k,m,n,nfmax,gf,step;
1437 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1439 n = 3*state->natoms;
1440 nmaxcorr = inputrec->nbfgscorr;
1442 /* Allocate memory */
1443 /* Use pointers to real so we dont have to loop over both atoms and
1444 * dimensions all the time...
1445 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1446 * that point to the same memory.
1460 snew(alpha,nmaxcorr);
1463 for(i=0;i<nmaxcorr;i++)
1467 for(i=0;i<nmaxcorr;i++)
1474 init_em(fplog,LBFGS,cr,inputrec,
1475 state,top_global,&ems,&top,&f,&f_global,
1476 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1477 nfile,fnm,&outf,&mdebin);
1478 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1479 * so we free some memory again.
1484 xx = (real *)state->x;
1487 start = mdatoms->start;
1488 end = mdatoms->homenr + start;
1490 /* Print to log file */
1491 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1493 do_log = do_ene = do_x = do_f = TRUE;
1495 /* Max number of steps */
1496 number_steps=inputrec->nsteps;
1498 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1500 for(i=start; i<end; i++) {
1501 if (mdatoms->cFREEZE)
1502 gf = mdatoms->cFREEZE[i];
1503 for(m=0; m<DIM; m++)
1504 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1507 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1509 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1512 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1513 top->idef.iparams,top->idef.il,
1514 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1516 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1517 /* do_force always puts the charge groups in the box and shifts again
1518 * We do not unshift, so molecules are always whole
1523 evaluate_energy(fplog,bVerbose,cr,
1524 state,top_global,&ems,top,
1525 inputrec,nrnb,wcycle,gstat,
1526 vsite,constr,fcd,graph,mdatoms,fr,
1527 mu_tot,enerd,vir,pres,-1,TRUE);
1531 /* Copy stuff to the energy bin for easy printing etc. */
1532 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1533 mdatoms->tmass,enerd,state,state->box,
1534 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1536 print_ebin_header(fplog,step,step,state->lambda);
1537 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1538 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1542 /* This is the starting energy */
1543 Epot = enerd->term[F_EPOT];
1549 /* Set the initial step.
1550 * since it will be multiplied by the non-normalized search direction
1551 * vector (force vector the first time), we scale it by the
1552 * norm of the force.
1556 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1557 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1558 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1559 fprintf(stderr,"\n");
1560 /* and copy to the log file too... */
1561 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1562 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1563 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1564 fprintf(fplog,"\n");
1570 dx[point][i] = ff[i]; /* Initial search direction */
1574 stepsize = 1.0/fnorm;
1577 /* Start the loop over BFGS steps.
1578 * Each successful step is counted, and we continue until
1579 * we either converge or reach the max number of steps.
1584 /* Set the gradient from the force */
1586 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1588 /* Write coordinates if necessary */
1589 do_x = do_per_step(step,inputrec->nstxout);
1590 do_f = do_per_step(step,inputrec->nstfout);
1592 write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
1593 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1595 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1597 /* pointer to current direction - point=0 first time here */
1600 /* calculate line gradient */
1601 for(gpa=0,i=0;i<n;i++)
1604 /* Calculate minimum allowed stepsize, before the average (norm)
1605 * relative change in coordinate is smaller than precision
1607 for(minstep=0,i=0;i<n;i++) {
1614 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1616 if(stepsize<minstep) {
1621 /* Store old forces and coordinates */
1633 /* Take a step downhill.
1634 * In theory, we should minimize the function along this direction.
1635 * That is quite possible, but it turns out to take 5-10 function evaluations
1636 * for each line. However, we dont really need to find the exact minimum -
1637 * it is much better to start a new BFGS step in a modified direction as soon
1638 * as we are close to it. This will save a lot of energy evaluations.
1640 * In practice, we just try to take a single step.
1641 * If it worked (i.e. lowered the energy), we increase the stepsize but
1642 * the continue straight to the next BFGS step without trying to find any minimum.
1643 * If it didn't work (higher energy), there must be a minimum somewhere between
1644 * the old position and the new one.
1646 * Due to the finite numerical accuracy, it turns out that it is a good idea
1647 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1648 * This leads to lower final energies in the tests I've done. / Erik
1653 c = a + stepsize; /* reference position along line is zero */
1655 /* Check stepsize first. We do not allow displacements
1656 * larger than emstep.
1666 if(maxdelta>inputrec->em_stepsize)
1668 } while(maxdelta>inputrec->em_stepsize);
1670 /* Take a trial step */
1672 xc[i] = lastx[i] + c*s[i];
1675 /* Calculate energy for the trial step */
1676 ems.s.x = (rvec *)xc;
1678 evaluate_energy(fplog,bVerbose,cr,
1679 state,top_global,&ems,top,
1680 inputrec,nrnb,wcycle,gstat,
1681 vsite,constr,fcd,graph,mdatoms,fr,
1682 mu_tot,enerd,vir,pres,step,FALSE);
1685 /* Calc derivative along line */
1686 for(gpc=0,i=0; i<n; i++) {
1687 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1689 /* Sum the gradient along the line across CPUs */
1691 gmx_sumd(1,&gpc,cr);
1693 /* This is the max amount of increase in energy we tolerate */
1694 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1696 /* Accept the step if the energy is lower, or if it is not significantly higher
1697 * and the line derivative is still negative.
1699 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1701 /* Great, we found a better energy. Increase step for next iteration
1702 * if we are still going down, decrease it otherwise
1705 stepsize *= 1.618034; /* The golden section */
1707 stepsize *= 0.618034; /* 1/golden section */
1709 /* New energy is the same or higher. We will have to do some work
1710 * to find a smaller value in the interval. Take smaller step next time!
1713 stepsize *= 0.618034;
1716 /* OK, if we didn't find a lower value we will have to locate one now - there must
1717 * be one in the interval [a=0,c].
1718 * The same thing is valid here, though: Don't spend dozens of iterations to find
1719 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1720 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1722 * I also have a safeguard for potentially really patological functions so we never
1723 * take more than 20 steps before we give up ...
1725 * If we already found a lower value we just skip this step and continue to the update.
1732 /* Select a new trial point.
1733 * If the derivatives at points a & c have different sign we interpolate to zero,
1734 * otherwise just do a bisection.
1738 b = a + gpa*(a-c)/(gpc-gpa);
1742 /* safeguard if interpolation close to machine accuracy causes errors:
1743 * never go outside the interval
1748 /* Take a trial step */
1750 xb[i] = lastx[i] + b*s[i];
1753 /* Calculate energy for the trial step */
1754 ems.s.x = (rvec *)xb;
1756 evaluate_energy(fplog,bVerbose,cr,
1757 state,top_global,&ems,top,
1758 inputrec,nrnb,wcycle,gstat,
1759 vsite,constr,fcd,graph,mdatoms,fr,
1760 mu_tot,enerd,vir,pres,step,FALSE);
1765 for(gpb=0,i=0; i<n; i++)
1766 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1768 /* Sum the gradient along the line across CPUs */
1770 gmx_sumd(1,&gpb,cr);
1772 /* Keep one of the intervals based on the value of the derivative at the new point */
1774 /* Replace c endpoint with b */
1778 /* swap coord pointers b/c */
1786 /* Replace a endpoint with b */
1790 /* swap coord pointers a/b */
1800 * Stop search as soon as we find a value smaller than the endpoints,
1801 * or if the tolerance is below machine precision.
1802 * Never run more than 20 steps, no matter what.
1805 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1807 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1808 /* OK. We couldn't find a significantly lower energy.
1809 * If ncorr==0 this was steepest descent, and then we give up.
1810 * If not, reset memory to restart as steepest descent before quitting.
1819 /* Search in gradient direction */
1822 /* Reset stepsize */
1823 stepsize = 1.0/fnorm;
1828 /* Select min energy state of A & C, put the best in xx/ff/Epot
1859 /* Update the memory information, and calculate a new
1860 * approximation of the inverse hessian
1863 /* Have new data in Epot, xx, ff */
1868 dg[point][i]=lastf[i]-ff[i];
1869 dx[point][i]*=stepsize;
1875 dgdg+=dg[point][i]*dg[point][i];
1876 dgdx+=dg[point][i]*dx[point][i];
1881 rho[point]=1.0/dgdx;
1893 /* Recursive update. First go back over the memory points */
1894 for(k=0;k<ncorr;k++) {
1903 alpha[cp]=rho[cp]*sq;
1906 p[i] -= alpha[cp]*dg[cp][i];
1912 /* And then go forward again */
1913 for(k=0;k<ncorr;k++) {
1916 yr += p[i]*dg[cp][i];
1919 beta = alpha[cp]-beta;
1922 p[i] += beta*dx[cp][i];
1931 dx[point][i] = p[i];
1937 /* Test whether the convergence criterion is met */
1938 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1940 /* Print it if necessary */
1943 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1944 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1945 /* Store the new (lower) energies */
1946 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1947 mdatoms->tmass,enerd,state,state->box,
1948 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1949 do_log = do_per_step(step,inputrec->nstlog);
1950 do_ene = do_per_step(step,inputrec->nstenergy);
1952 print_ebin_header(fplog,step,step,state->lambda);
1953 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1954 do_log ? fplog : NULL,step,step,eprNORMAL,
1955 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1958 /* Stop when the maximum force lies below tolerance.
1959 * If we have reached machine precision, converged is already set to true.
1962 converged = converged || (fmax < inputrec->em_tol);
1964 } /* End of the loop */
1967 step--; /* we never took that last step in this case */
1969 if(fmax>inputrec->em_tol)
1973 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1974 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1979 /* If we printed energy and/or logfile last step (which was the last step)
1980 * we don't have to do it again, but otherwise print the final values.
1982 if(!do_log) /* Write final value to log since we didn't do anythin last step */
1983 print_ebin_header(fplog,step,step,state->lambda);
1984 if(!do_ene || !do_log) /* Write final energy file entries */
1985 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1986 !do_log ? fplog : NULL,step,step,eprNORMAL,
1987 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1989 /* Print some stuff... */
1991 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1994 * For accurate normal mode calculation it is imperative that we
1995 * store the last conformation into the full precision binary trajectory.
1997 * However, we should only do it if we did NOT already write this step
1998 * above (which we did if do_x or do_f was true).
2000 do_x = !do_per_step(step,inputrec->nstxout);
2001 do_f = !do_per_step(step,inputrec->nstfout);
2002 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2003 top_global,inputrec,step,
2007 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2008 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2009 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2010 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2012 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2015 finish_em(fplog,cr,outf,runtime,wcycle);
2017 /* To print the actual number of steps we needed somewhere */
2018 runtime->nsteps_done = step;
2021 } /* That's all folks */
2024 double do_steep(FILE *fplog,t_commrec *cr,
2025 int nfile, const t_filenm fnm[],
2026 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2028 gmx_vsite_t *vsite,gmx_constr_t constr,
2030 t_inputrec *inputrec,
2031 gmx_mtop_t *top_global,t_fcdata *fcd,
2032 t_state *state_global,
2034 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2037 int repl_ex_nst,int repl_ex_seed,
2038 gmx_membed_t *membed,
2039 real cpt_period,real max_hours,
2040 const char *deviceOptions,
2041 unsigned long Flags,
2042 gmx_runtime_t *runtime)
2044 const char *SD="Steepest Descents";
2045 em_state_t *s_min,*s_try;
2047 gmx_localtop_t *top;
2048 gmx_enerdata_t *enerd;
2050 gmx_global_stat_t gstat;
2052 real stepsize,constepsize;
2053 real ustep,dvdlambda,fnormn;
2056 gmx_bool bDone,bAbort,do_x,do_f;
2061 int steps_accepted=0;
2065 s_min = init_em_state();
2066 s_try = init_em_state();
2068 /* Init em and store the local state in s_try */
2069 init_em(fplog,SD,cr,inputrec,
2070 state_global,top_global,s_try,&top,&f,&f_global,
2071 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2072 nfile,fnm,&outf,&mdebin);
2074 /* Print to log file */
2075 print_em_start(fplog,cr,runtime,wcycle,SD);
2077 /* Set variables for stepsize (in nm). This is the largest
2078 * step that we are going to make in any direction.
2080 ustep = inputrec->em_stepsize;
2083 /* Max number of steps */
2084 nsteps = inputrec->nsteps;
2087 /* Print to the screen */
2088 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2090 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2092 /**** HERE STARTS THE LOOP ****
2093 * count is the counter for the number of steps
2094 * bDone will be TRUE when the minimization has converged
2095 * bAbort will be TRUE when nsteps steps have been performed or when
2096 * the stepsize becomes smaller than is reasonable for machine precision
2101 while( !bDone && !bAbort ) {
2102 bAbort = (nsteps >= 0) && (count == nsteps);
2104 /* set new coordinates, except for first step */
2106 do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2107 constr,top,nrnb,wcycle,count);
2110 evaluate_energy(fplog,bVerbose,cr,
2111 state_global,top_global,s_try,top,
2112 inputrec,nrnb,wcycle,gstat,
2113 vsite,constr,fcd,graph,mdatoms,fr,
2114 mu_tot,enerd,vir,pres,count,count==0);
2117 print_ebin_header(fplog,count,count,s_try->s.lambda);
2120 s_min->epot = s_try->epot + 1;
2122 /* Print it if necessary */
2125 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2126 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2127 (s_try->epot < s_min->epot) ? '\n' : '\r');
2130 if (s_try->epot < s_min->epot) {
2131 /* Store the new (lower) energies */
2132 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2133 mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2134 NULL,NULL,vir,pres,NULL,mu_tot,constr);
2135 print_ebin(outf->fp_ene,TRUE,
2136 do_per_step(steps_accepted,inputrec->nstdisreout),
2137 do_per_step(steps_accepted,inputrec->nstorireout),
2138 fplog,count,count,eprNORMAL,TRUE,
2139 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2144 /* Now if the new energy is smaller than the previous...
2145 * or if this is the first step!
2146 * or if we did random steps!
2149 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2152 /* Test whether the convergence criterion is met... */
2153 bDone = (s_try->fmax < inputrec->em_tol);
2155 /* Copy the arrays for force, positions and energy */
2156 /* The 'Min' array always holds the coords and forces of the minimal
2158 swap_em_state(s_min,s_try);
2162 /* Write to trn, if necessary */
2163 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2164 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2165 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2166 top_global,inputrec,count,
2167 s_min,state_global,f_global);
2170 /* If energy is not smaller make the step smaller... */
2173 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2174 /* Reload the old state */
2175 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2176 s_min,top,mdatoms,fr,vsite,constr,
2181 /* Determine new step */
2182 stepsize = ustep/s_min->fmax;
2184 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2186 if (count == nsteps || ustep < 1e-12)
2188 if (count == nsteps || ustep < 1e-6)
2193 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2194 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2200 } /* End of the loop */
2202 /* Print some shit... */
2204 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2205 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2206 top_global,inputrec,count,
2207 s_min,state_global,f_global);
2209 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2212 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2213 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2214 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2215 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2218 finish_em(fplog,cr,outf,runtime,wcycle);
2220 /* To print the actual number of steps we needed somewhere */
2221 inputrec->nsteps=count;
2223 runtime->nsteps_done = count;
2226 } /* That's all folks */
2229 double do_nm(FILE *fplog,t_commrec *cr,
2230 int nfile,const t_filenm fnm[],
2231 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2233 gmx_vsite_t *vsite,gmx_constr_t constr,
2235 t_inputrec *inputrec,
2236 gmx_mtop_t *top_global,t_fcdata *fcd,
2237 t_state *state_global,
2239 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2242 int repl_ex_nst,int repl_ex_seed,
2243 gmx_membed_t *membed,
2244 real cpt_period,real max_hours,
2245 const char *deviceOptions,
2246 unsigned long Flags,
2247 gmx_runtime_t *runtime)
2249 const char *NM = "Normal Mode Analysis";
2254 gmx_localtop_t *top;
2255 gmx_enerdata_t *enerd;
2257 gmx_global_stat_t gstat;
2264 gmx_bool bSparse; /* use sparse matrix storage format */
2266 gmx_sparsematrix_t * sparse_matrix = NULL;
2267 real * full_matrix = NULL;
2268 em_state_t * state_work;
2270 /* added with respect to mdrun */
2272 real der_range=10.0*sqrt(GMX_REAL_EPS);
2278 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2281 state_work = init_em_state();
2283 /* Init em and store the local state in state_minimum */
2284 init_em(fplog,NM,cr,inputrec,
2285 state_global,top_global,state_work,&top,
2287 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2288 nfile,fnm,&outf,NULL);
2290 natoms = top_global->natoms;
2298 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2299 " which MIGHT not be accurate enough for normal mode analysis.\n"
2300 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2301 " are fairly modest even if you recompile in double precision.\n\n");
2305 /* Check if we can/should use sparse storage format.
2307 * Sparse format is only useful when the Hessian itself is sparse, which it
2308 * will be when we use a cutoff.
2309 * For small systems (n<1000) it is easier to always use full matrix format, though.
2311 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2313 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2316 else if(top_global->natoms < 1000)
2318 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2323 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2327 sz = DIM*top_global->natoms;
2329 fprintf(stderr,"Allocating Hessian memory...\n\n");
2333 sparse_matrix=gmx_sparsematrix_init(sz);
2334 sparse_matrix->compressed_symmetric = TRUE;
2338 snew(full_matrix,sz*sz);
2341 /* Initial values */
2342 t = inputrec->init_t;
2343 lambda = inputrec->init_lambda;
2349 /* Write start time and temperature */
2350 print_em_start(fplog,cr,runtime,wcycle,NM);
2352 /* fudge nr of steps to nr of atoms */
2353 inputrec->nsteps = natoms*2;
2357 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2358 *(top_global->name),(int)inputrec->nsteps);
2361 nnodes = cr->nnodes;
2363 /* Make evaluate_energy do a single node force calculation */
2365 evaluate_energy(fplog,bVerbose,cr,
2366 state_global,top_global,state_work,top,
2367 inputrec,nrnb,wcycle,gstat,
2368 vsite,constr,fcd,graph,mdatoms,fr,
2369 mu_tot,enerd,vir,pres,-1,TRUE);
2370 cr->nnodes = nnodes;
2372 /* if forces are not small, warn user */
2373 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2377 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2378 if (state_work->fmax > 1.0e-3)
2380 fprintf(stderr,"Maximum force probably not small enough to");
2381 fprintf(stderr," ensure that you are in an \nenergy well. ");
2382 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2383 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2387 /***********************************************************
2389 * Loop over all pairs in matrix
2391 * do_force called twice. Once with positive and
2392 * once with negative displacement
2394 ************************************************************/
2396 /* Steps are divided one by one over the nodes */
2397 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2400 for (d=0; d<DIM; d++)
2402 x_min = state_work->s.x[atom][d];
2404 state_work->s.x[atom][d] = x_min - der_range;
2406 /* Make evaluate_energy do a single node force calculation */
2408 evaluate_energy(fplog,bVerbose,cr,
2409 state_global,top_global,state_work,top,
2410 inputrec,nrnb,wcycle,gstat,
2411 vsite,constr,fcd,graph,mdatoms,fr,
2412 mu_tot,enerd,vir,pres,atom*2,FALSE);
2414 for(i=0; i<natoms; i++)
2416 copy_rvec(state_work->f[i], fneg[i]);
2419 state_work->s.x[atom][d] = x_min + der_range;
2421 evaluate_energy(fplog,bVerbose,cr,
2422 state_global,top_global,state_work,top,
2423 inputrec,nrnb,wcycle,gstat,
2424 vsite,constr,fcd,graph,mdatoms,fr,
2425 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2426 cr->nnodes = nnodes;
2428 /* x is restored to original */
2429 state_work->s.x[atom][d] = x_min;
2431 for(j=0; j<natoms; j++)
2433 for (k=0; (k<DIM); k++)
2436 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2444 #define mpi_type MPI_DOUBLE
2446 #define mpi_type MPI_FLOAT
2448 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2449 cr->mpi_comm_mygroup);
2454 for(node=0; (node<nnodes && atom+node<natoms); node++)
2460 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2461 cr->mpi_comm_mygroup,&stat);
2466 row = (atom + node)*DIM + d;
2468 for(j=0; j<natoms; j++)
2470 for(k=0; k<DIM; k++)
2476 if (col >= row && dfdx[j][k] != 0.0)
2478 gmx_sparsematrix_increment_value(sparse_matrix,
2479 row,col,dfdx[j][k]);
2484 full_matrix[row*sz+col] = dfdx[j][k];
2491 if (bVerbose && fplog)
2496 /* write progress */
2497 if (MASTER(cr) && bVerbose)
2499 fprintf(stderr,"\rFinished step %d out of %d",
2500 min(atom+nnodes,natoms),natoms);
2507 fprintf(stderr,"\n\nWriting Hessian...\n");
2508 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2511 finish_em(fplog,cr,outf,runtime,wcycle);
2513 runtime->nsteps_done = natoms*2;