src/gromacs/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "copyrite.h"
  48 #include "smalloc.h"
  49 #include "nrnb.h"
  50 #include "main.h"
  51 #include "force.h"
  52 #include "macros.h"
  53 #include "random.h"
  54 #include "names.h"
  55 #include "gmx_fatal.h"
  56 #include "txtdump.h"
  57 #include "typedefs.h"
  58 #include "update.h"
  59 #include "constr.h"
  60 #include "vec.h"
  61 #include "statutil.h"
  62 #include "tgroup.h"
  63 #include "mdebin.h"
  64 #include "vsite.h"
  65 #include "force.h"
  66 #include "mdrun.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "trnio.h"
  70 #include "mdatoms.h"
  71 #include "ns.h"
  72 #include "gmx_wallcycle.h"
  73 #include "mtop_util.h"
  74 #include "gmxfio.h"
  75 #include "pme.h"
  76
  77 #include "gromacs/linearalgebra/mtxio.h"
  78 #include "gromacs/linearalgebra/sparsematrix.h"
  79
  80 typedef struct {
  81   t_state s;
  82   rvec    *f;
  83   real    epot;
  84   real    fnorm;
  85   real    fmax;
  86   int     a_fmax;
  87 } em_state_t;
  88
  89 static em_state_t *init_em_state()
  90 {
  91   em_state_t *ems;
  92
  93   snew(ems,1);
  94
  95   /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
  96   snew(ems->s.lambda,efptNR);
  97
  98   return ems;
  99 }
 100
 101 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 102                            gmx_wallcycle_t wcycle,
 103                            const char *name)
 104 {
 105     char buf[STRLEN];
 106
 107     runtime_start(runtime);
 108
 109     sprintf(buf,"Started %s",name);
 110     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 111
 112     wallcycle_start(wcycle,ewcRUN);
 113 }
 114 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 115                         gmx_wallcycle_t wcycle)
 116 {
 117     wallcycle_stop(wcycle,ewcRUN);
 118
 119     runtime_end(runtime);
 120 }
 121
 122 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 123 {
 124     fprintf(out,"\n");
 125     fprintf(out,"%s:\n",minimizer);
 126     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 127     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 128 }
 129
 130 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 131 {
 132     if (bLastStep)
 133     {
 134         fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 135     }
 136     else
 137     {
 138         fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 139                 "Converged to machine precision,\n"
 140                 "but not to the requested precision Fmax < %g\n",
 141                 ftol);
 142         if (sizeof(real)<sizeof(double))
 143         {
 144             fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 145         }
 146         if (bConstrain)
 147         {
 148             fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 149                     "off constraints alltogether (set constraints = none in mdp file)\n");
 150         }
 151     }
 152 }
 153
 154
 155
 156 static void print_converged(FILE *fp,const char *alg,real ftol,
 157                             gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 158                             real epot,real fmax, int nfmax, real fnorm)
 159 {
 160   char buf[STEPSTRSIZE];
 161
 162   if (bDone)
 163     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 164             alg,ftol,gmx_step_str(count,buf));
 165   else if(count<nsteps)
 166     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 167                "but did not reach the requested Fmax < %g.\n",
 168             alg,gmx_step_str(count,buf),ftol);
 169   else
 170     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 171             alg,ftol,gmx_step_str(count,buf));
 172
 173 #ifdef GMX_DOUBLE
 174   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 175   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 176   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 177 #else
 178   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 179   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 180   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 181 #endif
 182 }
 183
 184 static void get_f_norm_max(t_commrec *cr,
 185                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 186                            real *fnorm,real *fmax,int *a_fmax)
 187 {
 188   double fnorm2,*sum;
 189   real fmax2,fmax2_0,fam;
 190   int  la_max,a_max,start,end,i,m,gf;
 191
 192   /* This routine finds the largest force and returns it.
 193    * On parallel machines the global max is taken.
 194    */
 195   fnorm2 = 0;
 196   fmax2 = 0;
 197   la_max = -1;
 198   gf = 0;
 199   start = mdatoms->start;
 200   end   = mdatoms->homenr + start;
 201   if (mdatoms->cFREEZE) {
 202     for(i=start; i<end; i++) {
 203       gf = mdatoms->cFREEZE[i];
 204       fam = 0;
 205       for(m=0; m<DIM; m++)
 206         if (!opts->nFreeze[gf][m])
 207           fam += sqr(f[i][m]);
 208       fnorm2 += fam;
 209       if (fam > fmax2) {
 210         fmax2  = fam;
 211         la_max = i;
 212       }
 213     }
 214   } else {
 215     for(i=start; i<end; i++) {
 216       fam = norm2(f[i]);
 217       fnorm2 += fam;
 218       if (fam > fmax2) {
 219         fmax2  = fam;
 220         la_max = i;
 221       }
 222     }
 223   }
 224
 225   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 226     a_max = cr->dd->gatindex[la_max];
 227   } else {
 228     a_max = la_max;
 229   }
 230   if (PAR(cr)) {
 231     snew(sum,2*cr->nnodes+1);
 232     sum[2*cr->nodeid]   = fmax2;
 233     sum[2*cr->nodeid+1] = a_max;
 234     sum[2*cr->nnodes]   = fnorm2;
 235     gmx_sumd(2*cr->nnodes+1,sum,cr);
 236     fnorm2 = sum[2*cr->nnodes];
 237     /* Determine the global maximum */
 238     for(i=0; i<cr->nnodes; i++) {
 239       if (sum[2*i] > fmax2) {
 240         fmax2 = sum[2*i];
 241         a_max = (int)(sum[2*i+1] + 0.5);
 242       }
 243     }
 244     sfree(sum);
 245   }
 246
 247   if (fnorm)
 248     *fnorm = sqrt(fnorm2);
 249   if (fmax)
 250     *fmax  = sqrt(fmax2);
 251   if (a_fmax)
 252     *a_fmax = a_max;
 253 }
 254
 255 static void get_state_f_norm_max(t_commrec *cr,
 256                            t_grpopts *opts,t_mdatoms *mdatoms,
 257                            em_state_t *ems)
 258 {
 259   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 260 }
 261
 262 void init_em(FILE *fplog,const char *title,
 263              t_commrec *cr,t_inputrec *ir,
 264              t_state *state_global,gmx_mtop_t *top_global,
 265              em_state_t *ems,gmx_localtop_t **top,
 266              rvec **f,rvec **f_global,
 267              t_nrnb *nrnb,rvec mu_tot,
 268              t_forcerec *fr,gmx_enerdata_t **enerd,
 269              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 270              gmx_vsite_t *vsite,gmx_constr_t constr,
 271              int nfile,const t_filenm fnm[],
 272              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 273 {
 274     int  start,homenr,i;
 275     real dvdlambda;
 276
 277     if (fplog)
 278     {
 279         fprintf(fplog,"Initiating %s\n",title);
 280     }
 281
 282     state_global->ngtc = 0;
 283
 284     /* Initialize lambda variables */
 285     initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
 286
 287     init_nrnb(nrnb);
 288
 289     if (DOMAINDECOMP(cr))
 290     {
 291         *top = dd_init_local_top(top_global);
 292
 293         dd_init_local_state(cr->dd,state_global,&ems->s);
 294
 295         *f = NULL;
 296
 297         /* Distribute the charge groups over the nodes from the master node */
 298         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 299                             state_global,top_global,ir,
 300                             &ems->s,&ems->f,mdatoms,*top,
 301                             fr,vsite,NULL,constr,
 302                             nrnb,NULL,FALSE);
 303         dd_store_state(cr->dd,&ems->s);
 304
 305         if (ir->nstfout)
 306         {
 307             snew(*f_global,top_global->natoms);
 308         }
 309         else
 310         {
 311             *f_global = NULL;
 312         }
 313         *graph = NULL;
 314     }
 315     else
 316     {
 317         snew(*f,top_global->natoms);
 318
 319         /* Just copy the state */
 320         ems->s = *state_global;
 321         snew(ems->s.x,ems->s.nalloc);
 322         snew(ems->f,ems->s.nalloc);
 323         for(i=0; i<state_global->natoms; i++)
 324         {
 325             copy_rvec(state_global->x[i],ems->s.x[i]);
 326         }
 327         copy_mat(state_global->box,ems->s.box);
 328
 329         if (PAR(cr) && ir->eI != eiNM)
 330         {
 331             /* Initialize the particle decomposition and split the topology */
 332             *top = split_system(fplog,top_global,ir,cr);
 333
 334             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 335         }
 336         else
 337         {
 338             *top = gmx_mtop_generate_local_top(top_global,ir);
 339         }
 340         *f_global = *f;
 341
 342         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 343         {
 344             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 345         }
 346         else
 347         {
 348             *graph = NULL;
 349         }
 350
 351         if (PARTDECOMP(cr))
 352         {
 353             pd_at_range(cr,&start,&homenr);
 354             homenr -= start;
 355         }
 356         else
 357         {
 358             start  = 0;
 359             homenr = top_global->natoms;
 360         }
 361         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 362         update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
 363
 364         if (vsite)
 365         {
 366             set_vsite_top(vsite,*top,mdatoms,cr);
 367         }
 368     }
 369
 370     if (constr)
 371     {
 372         if (ir->eConstrAlg == econtSHAKE &&
 373             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 374         {
 375             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 376                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 377         }
 378
 379         if (!DOMAINDECOMP(cr))
 380         {
 381             set_constraints(constr,*top,ir,mdatoms,cr);
 382         }
 383
 384         if (!ir->bContinuation)
 385         {
 386             /* Constrain the starting coordinates */
 387             dvdlambda=0;
 388             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 389                       ir,NULL,cr,-1,0,mdatoms,
 390                       ems->s.x,ems->s.x,NULL,ems->s.box,
 391                       ems->s.lambda[efptFEP],&dvdlambda,
 392                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 393         }
 394     }
 395
 396     if (PAR(cr))
 397     {
 398         *gstat = global_stat_init(ir);
 399     }
 400
 401     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 402
 403     snew(*enerd,1);
 404     init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 405                   *enerd);
 406
 407     if (mdebin != NULL)
 408     {
 409         /* Init bin for energy stuff */
 410         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 411     }
 412
 413     clear_rvec(mu_tot);
 414     calc_shifts(ems->s.box,fr->shift_vec);
 415 }
 416
 417 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 418                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 419 {
 420   if (!(cr->duty & DUTY_PME)) {
 421     /* Tell the PME only node to finish */
 422     gmx_pme_finish(cr);
 423   }
 424
 425   done_mdoutf(outf);
 426
 427   em_time_end(fplog,cr,runtime,wcycle);
 428 }
 429
 430 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 431 {
 432   em_state_t tmp;
 433
 434   tmp   = *ems1;
 435   *ems1 = *ems2;
 436   *ems2 = tmp;
 437 }
 438
 439 static void copy_em_coords(em_state_t *ems,t_state *state)
 440 {
 441     int i;
 442
 443     for(i=0; (i<state->natoms); i++)
 444     {
 445         copy_rvec(ems->s.x[i],state->x[i]);
 446     }
 447 }
 448
 449 static void write_em_traj(FILE *fplog,t_commrec *cr,
 450                           gmx_mdoutf_t *outf,
 451                           gmx_bool bX,gmx_bool bF,const char *confout,
 452                           gmx_mtop_t *top_global,
 453                           t_inputrec *ir,gmx_large_int_t step,
 454                           em_state_t *state,
 455                           t_state *state_global,rvec *f_global)
 456 {
 457     int mdof_flags;
 458
 459     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 460     {
 461         copy_em_coords(state,state_global);
 462         f_global = state->f;
 463     }
 464
 465     mdof_flags = 0;
 466     if (bX) { mdof_flags |= MDOF_X; }
 467     if (bF) { mdof_flags |= MDOF_F; }
 468     write_traj(fplog,cr,outf,mdof_flags,
 469                top_global,step,(double)step,
 470                &state->s,state_global,state->f,f_global,NULL,NULL);
 471
 472     if (confout != NULL && MASTER(cr))
 473     {
 474         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 475         {
 476             /* Make molecules whole only for confout writing */
 477             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 478                         state_global->x);
 479         }
 480
 481         write_sto_conf_mtop(confout,
 482                             *top_global->name,top_global,
 483                             state_global->x,NULL,ir->ePBC,state_global->box);
 484     }
 485 }
 486
 487 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 488                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 489                        gmx_constr_t constr,gmx_localtop_t *top,
 490                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 491                        gmx_large_int_t count)
 492
 493 {
 494   t_state *s1,*s2;
 495   int  start,end,gf,i,m;
 496   rvec *x1,*x2;
 497   real dvdlambda;
 498
 499   s1 = &ems1->s;
 500   s2 = &ems2->s;
 501
 502   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 503     gmx_incons("state mismatch in do_em_step");
 504
 505   s2->flags = s1->flags;
 506
 507   if (s2->nalloc != s1->nalloc) {
 508     s2->nalloc = s1->nalloc;
 509     srenew(s2->x,s1->nalloc);
 510     srenew(ems2->f,  s1->nalloc);
 511     if (s2->flags & (1<<estCGP))
 512       srenew(s2->cg_p,  s1->nalloc);
 513   }
 514
 515   s2->natoms = s1->natoms;
 516   /* Copy free energy state -> is this necessary? */
 517   for (i=0;i<efptNR;i++)
 518   {
 519       s2->lambda[i] = s1->lambda[i];
 520   }
 521   copy_mat(s1->box,s2->box);
 522
 523   start = md->start;
 524   end   = md->start + md->homenr;
 525
 526   x1 = s1->x;
 527   x2 = s2->x;
 528   gf = 0;
 529   for(i=start; i<end; i++) {
 530     if (md->cFREEZE)
 531       gf = md->cFREEZE[i];
 532     for(m=0; m<DIM; m++) {
 533       if (ir->opts.nFreeze[gf][m])
 534         x2[i][m] = x1[i][m];
 535       else
 536         x2[i][m] = x1[i][m] + a*f[i][m];
 537     }
 538   }
 539
 540   if (s2->flags & (1<<estCGP)) {
 541     /* Copy the CG p vector */
 542     x1 = s1->cg_p;
 543     x2 = s2->cg_p;
 544     for(i=start; i<end; i++)
 545       copy_rvec(x1[i],x2[i]);
 546   }
 547
 548   if (DOMAINDECOMP(cr)) {
 549     s2->ddp_count = s1->ddp_count;
 550     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 551       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 552       srenew(s2->cg_gl,s2->cg_gl_nalloc);
 553     }
 554     s2->ncg_gl = s1->ncg_gl;
 555     for(i=0; i<s2->ncg_gl; i++)
 556       s2->cg_gl[i] = s1->cg_gl[i];
 557     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 558   }
 559
 560   if (constr) {
 561     wallcycle_start(wcycle,ewcCONSTR);
 562     dvdlambda = 0;
 563     constrain(NULL,TRUE,TRUE,constr,&top->idef,
 564               ir,NULL,cr,count,0,md,
 565               s1->x,s2->x,NULL,s2->box,s2->lambda[efptBONDED],
 566               &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 567     wallcycle_stop(wcycle,ewcCONSTR);
 568   }
 569 }
 570
 571 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 572                                    gmx_mtop_t *top_global,t_inputrec *ir,
 573                                    em_state_t *ems,gmx_localtop_t *top,
 574                                    t_mdatoms *mdatoms,t_forcerec *fr,
 575                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 576                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 577 {
 578     /* Repartition the domain decomposition */
 579     wallcycle_start(wcycle,ewcDOMDEC);
 580     dd_partition_system(fplog,step,cr,FALSE,1,
 581                         NULL,top_global,ir,
 582                         &ems->s,&ems->f,
 583                         mdatoms,top,fr,vsite,NULL,constr,
 584                         nrnb,wcycle,FALSE);
 585     dd_store_state(cr->dd,&ems->s);
 586     wallcycle_stop(wcycle,ewcDOMDEC);
 587 }
 588
 589 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 590                             t_state *state_global,gmx_mtop_t *top_global,
 591                             em_state_t *ems,gmx_localtop_t *top,
 592                             t_inputrec *inputrec,
 593                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 594                             gmx_global_stat_t gstat,
 595                             gmx_vsite_t *vsite,gmx_constr_t constr,
 596                             t_fcdata *fcd,
 597                             t_graph *graph,t_mdatoms *mdatoms,
 598                             t_forcerec *fr,rvec mu_tot,
 599                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 600                             gmx_large_int_t count,gmx_bool bFirst)
 601 {
 602   real t;
 603   gmx_bool bNS;
 604   int  nabnsb;
 605   tensor force_vir,shake_vir,ekin;
 606   real dvdlambda,prescorr,enercorr,dvdlcorr;
 607   real terminate=0;
 608
 609   /* Set the time to the initial time, the time does not change during EM */
 610   t = inputrec->init_t;
 611
 612   if (bFirst ||
 613       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 614     /* This the first state or an old state used before the last ns */
 615     bNS = TRUE;
 616   } else {
 617     bNS = FALSE;
 618     if (inputrec->nstlist > 0) {
 619       bNS = TRUE;
 620     } else if (inputrec->nstlist == -1) {
 621       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 622       if (PAR(cr))
 623         gmx_sumi(1,&nabnsb,cr);
 624       bNS = (nabnsb > 0);
 625     }
 626   }
 627
 628   if (vsite)
 629     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 630                      top->idef.iparams,top->idef.il,
 631                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 632
 633   if (DOMAINDECOMP(cr)) {
 634     if (bNS) {
 635       /* Repartition the domain decomposition */
 636       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 637                              ems,top,mdatoms,fr,vsite,constr,
 638                              nrnb,wcycle);
 639     }
 640   }
 641
 642     /* Calc force & energy on new trial position  */
 643     /* do_force always puts the charge groups in the box and shifts again
 644      * We do not unshift, so molecules are always whole in congrad.c
 645      */
 646     do_force(fplog,cr,inputrec,
 647              count,nrnb,wcycle,top,top_global,&top_global->groups,
 648              ems->s.box,ems->s.x,&ems->s.hist,
 649              ems->f,force_vir,mdatoms,enerd,fcd,
 650              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 651              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 652              (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 653
 654     /* Clear the unused shake virial and pressure */
 655     clear_mat(shake_vir);
 656     clear_mat(pres);
 657
 658     /* Communicate stuff when parallel */
 659     if (PAR(cr) && inputrec->eI != eiNM)
 660     {
 661         wallcycle_start(wcycle,ewcMoveE);
 662
 663         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 664                     inputrec,NULL,NULL,NULL,1,&terminate,
 665                     top_global,&ems->s,FALSE,
 666                     CGLO_ENERGY |
 667                     CGLO_PRESSURE |
 668                     CGLO_CONSTRAINT |
 669                     CGLO_FIRSTITERATE);
 670
 671         wallcycle_stop(wcycle,ewcMoveE);
 672     }
 673
 674     /* Calculate long range corrections to pressure and energy */
 675     calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
 676                   pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 677     enerd->term[F_DISPCORR] = enercorr;
 678     enerd->term[F_EPOT] += enercorr;
 679     enerd->term[F_PRES] += prescorr;
 680     enerd->term[F_DVDL] += dvdlcorr;
 681
 682   ems->epot = enerd->term[F_EPOT];
 683
 684   if (constr) {
 685     /* Project out the constraint components of the force */
 686     wallcycle_start(wcycle,ewcCONSTR);
 687     dvdlambda = 0;
 688     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 689               inputrec,NULL,cr,count,0,mdatoms,
 690               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda[efptBONDED],&dvdlambda,
 691               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 692     if (fr->bSepDVDL && fplog)
 693       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
 694     enerd->term[F_DVDL_BONDED] += dvdlambda;
 695     m_add(force_vir,shake_vir,vir);
 696     wallcycle_stop(wcycle,ewcCONSTR);
 697   } else {
 698     copy_mat(force_vir,vir);
 699   }
 700
 701   clear_mat(ekin);
 702   enerd->term[F_PRES] =
 703     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 704
 705   sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
 706
 707     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 708     {
 709         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 710     }
 711 }
 712
 713 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 714                               gmx_mtop_t *mtop,
 715                               em_state_t *s_min,em_state_t *s_b)
 716 {
 717   rvec *fm,*fb,*fmg;
 718   t_block *cgs_gl;
 719   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 720   double partsum;
 721   unsigned char *grpnrFREEZE;
 722
 723   if (debug)
 724     fprintf(debug,"Doing reorder_partsum\n");
 725
 726   fm = s_min->f;
 727   fb = s_b->f;
 728
 729   cgs_gl = dd_charge_groups_global(cr->dd);
 730   index = cgs_gl->index;
 731
 732   /* Collect fm in a global vector fmg.
 733    * This conflicts with the spirit of domain decomposition,
 734    * but to fully optimize this a much more complicated algorithm is required.
 735    */
 736   snew(fmg,mtop->natoms);
 737
 738   ncg   = s_min->s.ncg_gl;
 739   cg_gl = s_min->s.cg_gl;
 740   i = 0;
 741   for(c=0; c<ncg; c++) {
 742     cg = cg_gl[c];
 743     a0 = index[cg];
 744     a1 = index[cg+1];
 745     for(a=a0; a<a1; a++) {
 746       copy_rvec(fm[i],fmg[a]);
 747       i++;
 748     }
 749   }
 750   gmx_sum(mtop->natoms*3,fmg[0],cr);
 751
 752   /* Now we will determine the part of the sum for the cgs in state s_b */
 753   ncg   = s_b->s.ncg_gl;
 754   cg_gl = s_b->s.cg_gl;
 755   partsum = 0;
 756   i = 0;
 757   gf = 0;
 758   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 759   for(c=0; c<ncg; c++) {
 760     cg = cg_gl[c];
 761     a0 = index[cg];
 762     a1 = index[cg+1];
 763     for(a=a0; a<a1; a++) {
 764       if (mdatoms->cFREEZE && grpnrFREEZE) {
 765         gf = grpnrFREEZE[i];
 766       }
 767       for(m=0; m<DIM; m++) {
 768         if (!opts->nFreeze[gf][m]) {
 769           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 770         }
 771       }
 772       i++;
 773     }
 774   }
 775
 776   sfree(fmg);
 777
 778   return partsum;
 779 }
 780
 781 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 782                     gmx_mtop_t *mtop,
 783                     em_state_t *s_min,em_state_t *s_b)
 784 {
 785   rvec *fm,*fb;
 786   double sum;
 787   int  gf,i,m;
 788
 789   /* This is just the classical Polak-Ribiere calculation of beta;
 790    * it looks a bit complicated since we take freeze groups into account,
 791    * and might have to sum it in parallel runs.
 792    */
 793
 794   if (!DOMAINDECOMP(cr) ||
 795       (s_min->s.ddp_count == cr->dd->ddp_count &&
 796        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 797     fm = s_min->f;
 798     fb = s_b->f;
 799     sum = 0;
 800     gf = 0;
 801     /* This part of code can be incorrect with DD,
 802      * since the atom ordering in s_b and s_min might differ.
 803      */
 804     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 805       if (mdatoms->cFREEZE)
 806         gf = mdatoms->cFREEZE[i];
 807       for(m=0; m<DIM; m++)
 808         if (!opts->nFreeze[gf][m]) {
 809           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 810         }
 811     }
 812   } else {
 813     /* We need to reorder cgs while summing */
 814     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 815   }
 816   if (PAR(cr))
 817     gmx_sumd(1,&sum,cr);
 818
 819   return sum/sqr(s_min->fnorm);
 820 }
 821
 822 double do_cg(FILE *fplog,t_commrec *cr,
 823              int nfile,const t_filenm fnm[],
 824              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 825              int nstglobalcomm,
 826              gmx_vsite_t *vsite,gmx_constr_t constr,
 827              int stepout,
 828              t_inputrec *inputrec,
 829              gmx_mtop_t *top_global,t_fcdata *fcd,
 830              t_state *state_global,
 831              t_mdatoms *mdatoms,
 832              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 833              gmx_edsam_t ed,
 834              t_forcerec *fr,
 835              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 836              gmx_membed_t membed,
 837              real cpt_period,real max_hours,
 838              const char *deviceOptions,
 839              unsigned long Flags,
 840              gmx_runtime_t *runtime)
 841 {
 842   const char *CG="Polak-Ribiere Conjugate Gradients";
 843
 844   em_state_t *s_min,*s_a,*s_b,*s_c;
 845   gmx_localtop_t *top;
 846   gmx_enerdata_t *enerd;
 847   rvec   *f;
 848   gmx_global_stat_t gstat;
 849   t_graph    *graph;
 850   rvec   *f_global,*p,*sf,*sfm;
 851   double gpa,gpb,gpc,tmp,sum[2],minstep;
 852   real   fnormn;
 853   real   stepsize;
 854   real   a,b,c,beta=0.0;
 855   real   epot_repl=0;
 856   real   pnorm;
 857   t_mdebin   *mdebin;
 858   gmx_bool   converged,foundlower;
 859   rvec   mu_tot;
 860   gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 861   tensor vir,pres;
 862   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 863   gmx_mdoutf_t *outf;
 864   int    i,m,gf,step,nminstep;
 865   real   terminate=0;
 866
 867   step=0;
 868
 869   s_min = init_em_state();
 870   s_a   = init_em_state();
 871   s_b   = init_em_state();
 872   s_c   = init_em_state();
 873
 874   /* Init em and store the local state in s_min */
 875   init_em(fplog,CG,cr,inputrec,
 876           state_global,top_global,s_min,&top,&f,&f_global,
 877           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 878           nfile,fnm,&outf,&mdebin);
 879
 880   /* Print to log file */
 881   print_em_start(fplog,cr,runtime,wcycle,CG);
 882
 883   /* Max number of steps */
 884   number_steps=inputrec->nsteps;
 885
 886   if (MASTER(cr))
 887     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 888   if (fplog)
 889     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 890
 891   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 892   /* do_force always puts the charge groups in the box and shifts again
 893    * We do not unshift, so molecules are always whole in congrad.c
 894    */
 895   evaluate_energy(fplog,bVerbose,cr,
 896                   state_global,top_global,s_min,top,
 897                   inputrec,nrnb,wcycle,gstat,
 898                   vsite,constr,fcd,graph,mdatoms,fr,
 899                   mu_tot,enerd,vir,pres,-1,TRUE);
 900   where();
 901
 902   if (MASTER(cr)) {
 903     /* Copy stuff to the energy bin for easy printing etc. */
 904     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 905                mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 906                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 907
 908     print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 909     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 910                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 911   }
 912   where();
 913
 914   /* Estimate/guess the initial stepsize */
 915   stepsize = inputrec->em_stepsize/s_min->fnorm;
 916
 917   if (MASTER(cr)) {
 918     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 919             s_min->fmax,s_min->a_fmax+1);
 920     fprintf(stderr,"   F-Norm            = %12.5e\n",
 921             s_min->fnorm/sqrt(state_global->natoms));
 922     fprintf(stderr,"\n");
 923     /* and copy to the log file too... */
 924     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 925             s_min->fmax,s_min->a_fmax+1);
 926     fprintf(fplog,"   F-Norm            = %12.5e\n",
 927             s_min->fnorm/sqrt(state_global->natoms));
 928     fprintf(fplog,"\n");
 929   }
 930   /* Start the loop over CG steps.
 931    * Each successful step is counted, and we continue until
 932    * we either converge or reach the max number of steps.
 933    */
 934   converged = FALSE;
 935   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 936
 937     /* start taking steps in a new direction
 938      * First time we enter the routine, beta=0, and the direction is
 939      * simply the negative gradient.
 940      */
 941
 942     /* Calculate the new direction in p, and the gradient in this direction, gpa */
 943     p  = s_min->s.cg_p;
 944     sf = s_min->f;
 945     gpa = 0;
 946     gf = 0;
 947     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 948       if (mdatoms->cFREEZE)
 949         gf = mdatoms->cFREEZE[i];
 950       for(m=0; m<DIM; m++) {
 951         if (!inputrec->opts.nFreeze[gf][m]) {
 952           p[i][m] = sf[i][m] + beta*p[i][m];
 953           gpa -= p[i][m]*sf[i][m];
 954           /* f is negative gradient, thus the sign */
 955         } else {
 956           p[i][m] = 0;
 957         }
 958       }
 959     }
 960
 961     /* Sum the gradient along the line across CPUs */
 962     if (PAR(cr))
 963       gmx_sumd(1,&gpa,cr);
 964
 965     /* Calculate the norm of the search vector */
 966     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 967
 968     /* Just in case stepsize reaches zero due to numerical precision... */
 969     if(stepsize<=0)
 970       stepsize = inputrec->em_stepsize/pnorm;
 971
 972     /*
 973      * Double check the value of the derivative in the search direction.
 974      * If it is positive it must be due to the old information in the
 975      * CG formula, so just remove that and start over with beta=0.
 976      * This corresponds to a steepest descent step.
 977      */
 978     if(gpa>0) {
 979       beta = 0;
 980       step--; /* Don't count this step since we are restarting */
 981       continue; /* Go back to the beginning of the big for-loop */
 982     }
 983
 984     /* Calculate minimum allowed stepsize, before the average (norm)
 985      * relative change in coordinate is smaller than precision
 986      */
 987     minstep=0;
 988     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 989       for(m=0; m<DIM; m++) {
 990         tmp = fabs(s_min->s.x[i][m]);
 991         if(tmp < 1.0)
 992           tmp = 1.0;
 993         tmp = p[i][m]/tmp;
 994         minstep += tmp*tmp;
 995       }
 996     }
 997     /* Add up from all CPUs */
 998     if(PAR(cr))
 999       gmx_sumd(1,&minstep,cr);
1000
1001     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1002
1003     if(stepsize<minstep) {
1004       converged=TRUE;
1005       break;
1006     }
1007
1008     /* Write coordinates if necessary */
1009     do_x = do_per_step(step,inputrec->nstxout);
1010     do_f = do_per_step(step,inputrec->nstfout);
1011
1012     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1013                   top_global,inputrec,step,
1014                   s_min,state_global,f_global);
1015
1016     /* Take a step downhill.
1017      * In theory, we should minimize the function along this direction.
1018      * That is quite possible, but it turns out to take 5-10 function evaluations
1019      * for each line. However, we dont really need to find the exact minimum -
1020      * it is much better to start a new CG step in a modified direction as soon
1021      * as we are close to it. This will save a lot of energy evaluations.
1022      *
1023      * In practice, we just try to take a single step.
1024      * If it worked (i.e. lowered the energy), we increase the stepsize but
1025      * the continue straight to the next CG step without trying to find any minimum.
1026      * If it didn't work (higher energy), there must be a minimum somewhere between
1027      * the old position and the new one.
1028      *
1029      * Due to the finite numerical accuracy, it turns out that it is a good idea
1030      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1031      * This leads to lower final energies in the tests I've done. / Erik
1032      */
1033     s_a->epot = s_min->epot;
1034     a = 0.0;
1035     c = a + stepsize; /* reference position along line is zero */
1036
1037     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1038       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1039                              s_min,top,mdatoms,fr,vsite,constr,
1040                              nrnb,wcycle);
1041     }
1042
1043     /* Take a trial step (new coords in s_c) */
1044     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1045                constr,top,nrnb,wcycle,-1);
1046
1047     neval++;
1048     /* Calculate energy for the trial step */
1049     evaluate_energy(fplog,bVerbose,cr,
1050                     state_global,top_global,s_c,top,
1051                     inputrec,nrnb,wcycle,gstat,
1052                     vsite,constr,fcd,graph,mdatoms,fr,
1053                     mu_tot,enerd,vir,pres,-1,FALSE);
1054
1055     /* Calc derivative along line */
1056     p  = s_c->s.cg_p;
1057     sf = s_c->f;
1058     gpc=0;
1059     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1060       for(m=0; m<DIM; m++)
1061           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1062     }
1063     /* Sum the gradient along the line across CPUs */
1064     if (PAR(cr))
1065       gmx_sumd(1,&gpc,cr);
1066
1067     /* This is the max amount of increase in energy we tolerate */
1068     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1069
1070     /* Accept the step if the energy is lower, or if it is not significantly higher
1071      * and the line derivative is still negative.
1072      */
1073     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1074       foundlower = TRUE;
1075       /* Great, we found a better energy. Increase step for next iteration
1076        * if we are still going down, decrease it otherwise
1077        */
1078       if(gpc<0)
1079         stepsize *= 1.618034;  /* The golden section */
1080       else
1081         stepsize *= 0.618034;  /* 1/golden section */
1082     } else {
1083       /* New energy is the same or higher. We will have to do some work
1084        * to find a smaller value in the interval. Take smaller step next time!
1085        */
1086       foundlower = FALSE;
1087       stepsize *= 0.618034;
1088     }
1089
1090
1091
1092
1093     /* OK, if we didn't find a lower value we will have to locate one now - there must
1094      * be one in the interval [a=0,c].
1095      * The same thing is valid here, though: Don't spend dozens of iterations to find
1096      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1097      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1098      *
1099      * I also have a safeguard for potentially really patological functions so we never
1100      * take more than 20 steps before we give up ...
1101      *
1102      * If we already found a lower value we just skip this step and continue to the update.
1103      */
1104     if (!foundlower) {
1105       nminstep=0;
1106
1107       do {
1108         /* Select a new trial point.
1109          * If the derivatives at points a & c have different sign we interpolate to zero,
1110          * otherwise just do a bisection.
1111          */
1112         if(gpa<0 && gpc>0)
1113           b = a + gpa*(a-c)/(gpc-gpa);
1114         else
1115           b = 0.5*(a+c);
1116
1117         /* safeguard if interpolation close to machine accuracy causes errors:
1118          * never go outside the interval
1119          */
1120         if(b<=a || b>=c)
1121           b = 0.5*(a+c);
1122
1123         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1124           /* Reload the old state */
1125           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1126                                  s_min,top,mdatoms,fr,vsite,constr,
1127                                  nrnb,wcycle);
1128         }
1129
1130         /* Take a trial step to this new point - new coords in s_b */
1131         do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1132                    constr,top,nrnb,wcycle,-1);
1133
1134         neval++;
1135         /* Calculate energy for the trial step */
1136         evaluate_energy(fplog,bVerbose,cr,
1137                         state_global,top_global,s_b,top,
1138                         inputrec,nrnb,wcycle,gstat,
1139                         vsite,constr,fcd,graph,mdatoms,fr,
1140                         mu_tot,enerd,vir,pres,-1,FALSE);
1141
1142         /* p does not change within a step, but since the domain decomposition
1143          * might change, we have to use cg_p of s_b here.
1144          */
1145         p  = s_b->s.cg_p;
1146         sf = s_b->f;
1147         gpb=0;
1148         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1149           for(m=0; m<DIM; m++)
1150               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1151         }
1152         /* Sum the gradient along the line across CPUs */
1153         if (PAR(cr))
1154           gmx_sumd(1,&gpb,cr);
1155
1156         if (debug)
1157           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1158                   s_a->epot,s_b->epot,s_c->epot,gpb);
1159
1160         epot_repl = s_b->epot;
1161
1162         /* Keep one of the intervals based on the value of the derivative at the new point */
1163         if (gpb > 0) {
1164           /* Replace c endpoint with b */
1165           swap_em_state(s_b,s_c);
1166           c = b;
1167           gpc = gpb;
1168         } else {
1169           /* Replace a endpoint with b */
1170           swap_em_state(s_b,s_a);
1171           a = b;
1172           gpa = gpb;
1173         }
1174
1175         /*
1176          * Stop search as soon as we find a value smaller than the endpoints.
1177          * Never run more than 20 steps, no matter what.
1178          */
1179         nminstep++;
1180       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1181                (nminstep < 20));
1182
1183       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1184           nminstep >= 20) {
1185         /* OK. We couldn't find a significantly lower energy.
1186          * If beta==0 this was steepest descent, and then we give up.
1187          * If not, set beta=0 and restart with steepest descent before quitting.
1188          */
1189         if (beta == 0.0) {
1190           /* Converged */
1191           converged = TRUE;
1192           break;
1193         } else {
1194           /* Reset memory before giving up */
1195           beta = 0.0;
1196           continue;
1197         }
1198       }
1199
1200       /* Select min energy state of A & C, put the best in B.
1201        */
1202       if (s_c->epot < s_a->epot) {
1203         if (debug)
1204           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1205                   s_c->epot,s_a->epot);
1206         swap_em_state(s_b,s_c);
1207         gpb = gpc;
1208         b = c;
1209       } else {
1210         if (debug)
1211           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1212                   s_a->epot,s_c->epot);
1213         swap_em_state(s_b,s_a);
1214         gpb = gpa;
1215         b = a;
1216       }
1217
1218     } else {
1219       if (debug)
1220         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1221                 s_c->epot);
1222       swap_em_state(s_b,s_c);
1223       gpb = gpc;
1224       b = c;
1225     }
1226
1227     /* new search direction */
1228     /* beta = 0 means forget all memory and restart with steepest descents. */
1229     if (nstcg && ((step % nstcg)==0))
1230       beta = 0.0;
1231     else {
1232       /* s_min->fnorm cannot be zero, because then we would have converged
1233        * and broken out.
1234        */
1235
1236       /* Polak-Ribiere update.
1237        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1238        */
1239       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1240     }
1241     /* Limit beta to prevent oscillations */
1242     if (fabs(beta) > 5.0)
1243       beta = 0.0;
1244
1245
1246     /* update positions */
1247     swap_em_state(s_min,s_b);
1248     gpa = gpb;
1249
1250     /* Print it if necessary */
1251     if (MASTER(cr)) {
1252       if(bVerbose)
1253         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1254                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1255                 s_min->fmax,s_min->a_fmax+1);
1256       /* Store the new (lower) energies */
1257       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1258                  mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1259                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1260
1261       do_log = do_per_step(step,inputrec->nstlog);
1262       do_ene = do_per_step(step,inputrec->nstenergy);
1263       if(do_log)
1264           print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1265       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1266                  do_log ? fplog : NULL,step,step,eprNORMAL,
1267                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1268     }
1269
1270     /* Stop when the maximum force lies below tolerance.
1271      * If we have reached machine precision, converged is already set to true.
1272      */
1273     converged = converged || (s_min->fmax < inputrec->em_tol);
1274
1275   } /* End of the loop */
1276
1277   if (converged)
1278     step--; /* we never took that last step in this case */
1279
1280     if (s_min->fmax > inputrec->em_tol)
1281     {
1282         if (MASTER(cr))
1283         {
1284             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1285             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1286         }
1287         converged = FALSE;
1288     }
1289
1290   if (MASTER(cr)) {
1291     /* If we printed energy and/or logfile last step (which was the last step)
1292      * we don't have to do it again, but otherwise print the final values.
1293      */
1294     if(!do_log) {
1295       /* Write final value to log since we didn't do anything the last step */
1296       print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1297     }
1298     if (!do_ene || !do_log) {
1299       /* Write final energy file entries */
1300       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1301                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1302                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1303     }
1304   }
1305
1306   /* Print some stuff... */
1307   if (MASTER(cr))
1308     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1309
1310   /* IMPORTANT!
1311    * For accurate normal mode calculation it is imperative that we
1312    * store the last conformation into the full precision binary trajectory.
1313    *
1314    * However, we should only do it if we did NOT already write this step
1315    * above (which we did if do_x or do_f was true).
1316    */
1317   do_x = !do_per_step(step,inputrec->nstxout);
1318   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1319
1320   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1321                 top_global,inputrec,step,
1322                 s_min,state_global,f_global);
1323
1324   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1325
1326   if (MASTER(cr)) {
1327     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1328                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1329     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1330                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1331
1332     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1333   }
1334
1335   finish_em(fplog,cr,outf,runtime,wcycle);
1336
1337   /* To print the actual number of steps we needed somewhere */
1338   runtime->nsteps_done = step;
1339
1340   return 0;
1341 } /* That's all folks */
1342
1343
1344 double do_lbfgs(FILE *fplog,t_commrec *cr,
1345                 int nfile,const t_filenm fnm[],
1346                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1347                 int nstglobalcomm,
1348                 gmx_vsite_t *vsite,gmx_constr_t constr,
1349                 int stepout,
1350                 t_inputrec *inputrec,
1351                 gmx_mtop_t *top_global,t_fcdata *fcd,
1352                 t_state *state,
1353                 t_mdatoms *mdatoms,
1354                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1355                 gmx_edsam_t ed,
1356                 t_forcerec *fr,
1357                 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1358                 gmx_membed_t membed,
1359                 real cpt_period,real max_hours,
1360                 const char *deviceOptions,
1361                 unsigned long Flags,
1362                 gmx_runtime_t *runtime)
1363 {
1364   static const char *LBFGS="Low-Memory BFGS Minimizer";
1365   em_state_t ems;
1366   gmx_localtop_t *top;
1367   gmx_enerdata_t *enerd;
1368   rvec   *f;
1369   gmx_global_stat_t gstat;
1370   t_graph    *graph;
1371   rvec   *f_global;
1372   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1373   double stepsize,gpa,gpb,gpc,tmp,minstep;
1374   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1375   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1376   real   a,b,c,maxdelta,delta;
1377   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1378   real   dgdx,dgdg,sq,yr,beta;
1379   t_mdebin   *mdebin;
1380   gmx_bool   converged,first;
1381   rvec   mu_tot;
1382   real   fnorm,fmax;
1383   gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1384   tensor vir,pres;
1385   int    start,end,number_steps;
1386   gmx_mdoutf_t *outf;
1387   int    i,k,m,n,nfmax,gf,step;
1388   int    mdof_flags;
1389   /* not used */
1390   real   terminate;
1391
1392   if (PAR(cr))
1393     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1394
1395   n = 3*state->natoms;
1396   nmaxcorr = inputrec->nbfgscorr;
1397
1398   /* Allocate memory */
1399   /* Use pointers to real so we dont have to loop over both atoms and
1400    * dimensions all the time...
1401    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1402    * that point to the same memory.
1403    */
1404   snew(xa,n);
1405   snew(xb,n);
1406   snew(xc,n);
1407   snew(fa,n);
1408   snew(fb,n);
1409   snew(fc,n);
1410   snew(frozen,n);
1411
1412   snew(p,n);
1413   snew(lastx,n);
1414   snew(lastf,n);
1415   snew(rho,nmaxcorr);
1416   snew(alpha,nmaxcorr);
1417
1418   snew(dx,nmaxcorr);
1419   for(i=0;i<nmaxcorr;i++)
1420     snew(dx[i],n);
1421
1422   snew(dg,nmaxcorr);
1423   for(i=0;i<nmaxcorr;i++)
1424     snew(dg[i],n);
1425
1426   step = 0;
1427   neval = 0;
1428
1429   /* Init em */
1430   init_em(fplog,LBFGS,cr,inputrec,
1431           state,top_global,&ems,&top,&f,&f_global,
1432           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1433           nfile,fnm,&outf,&mdebin);
1434   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1435    * so we free some memory again.
1436    */
1437   sfree(ems.s.x);
1438   sfree(ems.f);
1439
1440   xx = (real *)state->x;
1441   ff = (real *)f;
1442
1443   start = mdatoms->start;
1444   end   = mdatoms->homenr + start;
1445
1446   /* Print to log file */
1447   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1448
1449   do_log = do_ene = do_x = do_f = TRUE;
1450
1451   /* Max number of steps */
1452   number_steps=inputrec->nsteps;
1453
1454   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1455   gf = 0;
1456   for(i=start; i<end; i++) {
1457     if (mdatoms->cFREEZE)
1458       gf = mdatoms->cFREEZE[i];
1459      for(m=0; m<DIM; m++)
1460        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1461   }
1462   if (MASTER(cr))
1463     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1464   if (fplog)
1465     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1466
1467   if (vsite)
1468     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1469                      top->idef.iparams,top->idef.il,
1470                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1471
1472   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1473   /* do_force always puts the charge groups in the box and shifts again
1474    * We do not unshift, so molecules are always whole
1475    */
1476   neval++;
1477   ems.s.x = state->x;
1478   ems.f = f;
1479   evaluate_energy(fplog,bVerbose,cr,
1480                   state,top_global,&ems,top,
1481                   inputrec,nrnb,wcycle,gstat,
1482                   vsite,constr,fcd,graph,mdatoms,fr,
1483                   mu_tot,enerd,vir,pres,-1,TRUE);
1484   where();
1485
1486   if (MASTER(cr)) {
1487     /* Copy stuff to the energy bin for easy printing etc. */
1488     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1489                mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1490                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1491
1492     print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1493     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1494                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1495   }
1496   where();
1497
1498   /* This is the starting energy */
1499   Epot = enerd->term[F_EPOT];
1500
1501   fnorm = ems.fnorm;
1502   fmax  = ems.fmax;
1503   nfmax = ems.a_fmax;
1504
1505   /* Set the initial step.
1506    * since it will be multiplied by the non-normalized search direction
1507    * vector (force vector the first time), we scale it by the
1508    * norm of the force.
1509    */
1510
1511   if (MASTER(cr)) {
1512     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1513     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1514     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1515     fprintf(stderr,"\n");
1516     /* and copy to the log file too... */
1517     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1518     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1519     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1520     fprintf(fplog,"\n");
1521   }
1522
1523   point=0;
1524   for(i=0;i<n;i++)
1525     if(!frozen[i])
1526       dx[point][i] = ff[i];  /* Initial search direction */
1527     else
1528       dx[point][i] = 0;
1529
1530   stepsize = 1.0/fnorm;
1531   converged = FALSE;
1532
1533   /* Start the loop over BFGS steps.
1534    * Each successful step is counted, and we continue until
1535    * we either converge or reach the max number of steps.
1536    */
1537
1538   ncorr=0;
1539
1540   /* Set the gradient from the force */
1541   converged = FALSE;
1542   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1543
1544     /* Write coordinates if necessary */
1545     do_x = do_per_step(step,inputrec->nstxout);
1546     do_f = do_per_step(step,inputrec->nstfout);
1547
1548     mdof_flags = 0;
1549     if (do_x)
1550     {
1551         mdof_flags |= MDOF_X;
1552     }
1553
1554     if (do_f)
1555     {
1556         mdof_flags |= MDOF_F;
1557     }
1558
1559     write_traj(fplog,cr,outf,mdof_flags,
1560                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1561
1562     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1563
1564     /* pointer to current direction - point=0 first time here */
1565     s=dx[point];
1566
1567     /* calculate line gradient */
1568     for(gpa=0,i=0;i<n;i++)
1569         gpa-=s[i]*ff[i];
1570
1571     /* Calculate minimum allowed stepsize, before the average (norm)
1572      * relative change in coordinate is smaller than precision
1573      */
1574     for(minstep=0,i=0;i<n;i++) {
1575       tmp=fabs(xx[i]);
1576       if(tmp<1.0)
1577         tmp=1.0;
1578       tmp = s[i]/tmp;
1579       minstep += tmp*tmp;
1580     }
1581     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1582
1583     if(stepsize<minstep) {
1584       converged=TRUE;
1585       break;
1586     }
1587
1588     /* Store old forces and coordinates */
1589     for(i=0;i<n;i++) {
1590       lastx[i]=xx[i];
1591       lastf[i]=ff[i];
1592     }
1593     Epot0=Epot;
1594
1595     first=TRUE;
1596
1597     for(i=0;i<n;i++)
1598       xa[i]=xx[i];
1599
1600     /* Take a step downhill.
1601      * In theory, we should minimize the function along this direction.
1602      * That is quite possible, but it turns out to take 5-10 function evaluations
1603      * for each line. However, we dont really need to find the exact minimum -
1604      * it is much better to start a new BFGS step in a modified direction as soon
1605      * as we are close to it. This will save a lot of energy evaluations.
1606      *
1607      * In practice, we just try to take a single step.
1608      * If it worked (i.e. lowered the energy), we increase the stepsize but
1609      * the continue straight to the next BFGS step without trying to find any minimum.
1610      * If it didn't work (higher energy), there must be a minimum somewhere between
1611      * the old position and the new one.
1612      *
1613      * Due to the finite numerical accuracy, it turns out that it is a good idea
1614      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1615      * This leads to lower final energies in the tests I've done. / Erik
1616      */
1617     foundlower=FALSE;
1618     EpotA = Epot0;
1619     a = 0.0;
1620     c = a + stepsize; /* reference position along line is zero */
1621
1622     /* Check stepsize first. We do not allow displacements
1623      * larger than emstep.
1624      */
1625     do {
1626       c = a + stepsize;
1627       maxdelta=0;
1628       for(i=0;i<n;i++) {
1629         delta=c*s[i];
1630         if(delta>maxdelta)
1631           maxdelta=delta;
1632       }
1633       if(maxdelta>inputrec->em_stepsize)
1634         stepsize*=0.1;
1635     } while(maxdelta>inputrec->em_stepsize);
1636
1637     /* Take a trial step */
1638     for (i=0; i<n; i++)
1639       xc[i] = lastx[i] + c*s[i];
1640
1641     neval++;
1642     /* Calculate energy for the trial step */
1643     ems.s.x = (rvec *)xc;
1644     ems.f   = (rvec *)fc;
1645     evaluate_energy(fplog,bVerbose,cr,
1646                     state,top_global,&ems,top,
1647                     inputrec,nrnb,wcycle,gstat,
1648                     vsite,constr,fcd,graph,mdatoms,fr,
1649                     mu_tot,enerd,vir,pres,step,FALSE);
1650     EpotC = ems.epot;
1651
1652     /* Calc derivative along line */
1653     for(gpc=0,i=0; i<n; i++) {
1654         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1655     }
1656     /* Sum the gradient along the line across CPUs */
1657     if (PAR(cr))
1658       gmx_sumd(1,&gpc,cr);
1659
1660      /* This is the max amount of increase in energy we tolerate */
1661    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1662
1663     /* Accept the step if the energy is lower, or if it is not significantly higher
1664      * and the line derivative is still negative.
1665      */
1666     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1667       foundlower = TRUE;
1668       /* Great, we found a better energy. Increase step for next iteration
1669        * if we are still going down, decrease it otherwise
1670        */
1671       if(gpc<0)
1672         stepsize *= 1.618034;  /* The golden section */
1673       else
1674         stepsize *= 0.618034;  /* 1/golden section */
1675     } else {
1676       /* New energy is the same or higher. We will have to do some work
1677        * to find a smaller value in the interval. Take smaller step next time!
1678        */
1679       foundlower = FALSE;
1680       stepsize *= 0.618034;
1681     }
1682
1683     /* OK, if we didn't find a lower value we will have to locate one now - there must
1684      * be one in the interval [a=0,c].
1685      * The same thing is valid here, though: Don't spend dozens of iterations to find
1686      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1687      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1688      *
1689      * I also have a safeguard for potentially really patological functions so we never
1690      * take more than 20 steps before we give up ...
1691      *
1692      * If we already found a lower value we just skip this step and continue to the update.
1693      */
1694
1695     if(!foundlower) {
1696
1697       nminstep=0;
1698       do {
1699         /* Select a new trial point.
1700          * If the derivatives at points a & c have different sign we interpolate to zero,
1701          * otherwise just do a bisection.
1702          */
1703
1704         if(gpa<0 && gpc>0)
1705           b = a + gpa*(a-c)/(gpc-gpa);
1706         else
1707           b = 0.5*(a+c);
1708
1709         /* safeguard if interpolation close to machine accuracy causes errors:
1710          * never go outside the interval
1711          */
1712         if(b<=a || b>=c)
1713           b = 0.5*(a+c);
1714
1715         /* Take a trial step */
1716         for (i=0; i<n; i++)
1717           xb[i] = lastx[i] + b*s[i];
1718
1719         neval++;
1720         /* Calculate energy for the trial step */
1721         ems.s.x = (rvec *)xb;
1722         ems.f   = (rvec *)fb;
1723         evaluate_energy(fplog,bVerbose,cr,
1724                         state,top_global,&ems,top,
1725                         inputrec,nrnb,wcycle,gstat,
1726                         vsite,constr,fcd,graph,mdatoms,fr,
1727                         mu_tot,enerd,vir,pres,step,FALSE);
1728         EpotB = ems.epot;
1729
1730         fnorm = ems.fnorm;
1731
1732         for(gpb=0,i=0; i<n; i++)
1733           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1734
1735         /* Sum the gradient along the line across CPUs */
1736         if (PAR(cr))
1737           gmx_sumd(1,&gpb,cr);
1738
1739         /* Keep one of the intervals based on the value of the derivative at the new point */
1740         if(gpb>0) {
1741           /* Replace c endpoint with b */
1742           EpotC = EpotB;
1743           c = b;
1744           gpc = gpb;
1745           /* swap coord pointers b/c */
1746           xtmp = xb;
1747           ftmp = fb;
1748           xb = xc;
1749           fb = fc;
1750           xc = xtmp;
1751           fc = ftmp;
1752         } else {
1753           /* Replace a endpoint with b */
1754           EpotA = EpotB;
1755           a = b;
1756           gpa = gpb;
1757           /* swap coord pointers a/b */
1758           xtmp = xb;
1759           ftmp = fb;
1760           xb = xa;
1761           fb = fa;
1762           xa = xtmp;
1763           fa = ftmp;
1764         }
1765
1766         /*
1767          * Stop search as soon as we find a value smaller than the endpoints,
1768          * or if the tolerance is below machine precision.
1769          * Never run more than 20 steps, no matter what.
1770          */
1771         nminstep++;
1772       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1773
1774       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1775         /* OK. We couldn't find a significantly lower energy.
1776          * If ncorr==0 this was steepest descent, and then we give up.
1777          * If not, reset memory to restart as steepest descent before quitting.
1778          */
1779         if(ncorr==0) {
1780         /* Converged */
1781           converged=TRUE;
1782           break;
1783         } else {
1784           /* Reset memory */
1785           ncorr=0;
1786           /* Search in gradient direction */
1787           for(i=0;i<n;i++)
1788             dx[point][i]=ff[i];
1789           /* Reset stepsize */
1790           stepsize = 1.0/fnorm;
1791           continue;
1792         }
1793       }
1794
1795       /* Select min energy state of A & C, put the best in xx/ff/Epot
1796        */
1797       if(EpotC<EpotA) {
1798         Epot = EpotC;
1799         /* Use state C */
1800         for(i=0;i<n;i++) {
1801           xx[i]=xc[i];
1802           ff[i]=fc[i];
1803         }
1804         stepsize=c;
1805       } else {
1806         Epot = EpotA;
1807         /* Use state A */
1808         for(i=0;i<n;i++) {
1809           xx[i]=xa[i];
1810           ff[i]=fa[i];
1811         }
1812         stepsize=a;
1813       }
1814
1815     } else {
1816       /* found lower */
1817       Epot = EpotC;
1818       /* Use state C */
1819       for(i=0;i<n;i++) {
1820         xx[i]=xc[i];
1821         ff[i]=fc[i];
1822       }
1823       stepsize=c;
1824     }
1825
1826     /* Update the memory information, and calculate a new
1827      * approximation of the inverse hessian
1828      */
1829
1830     /* Have new data in Epot, xx, ff */
1831     if(ncorr<nmaxcorr)
1832       ncorr++;
1833
1834     for(i=0;i<n;i++) {
1835       dg[point][i]=lastf[i]-ff[i];
1836       dx[point][i]*=stepsize;
1837     }
1838
1839     dgdg=0;
1840     dgdx=0;
1841     for(i=0;i<n;i++) {
1842       dgdg+=dg[point][i]*dg[point][i];
1843       dgdx+=dg[point][i]*dx[point][i];
1844     }
1845
1846     diag=dgdx/dgdg;
1847
1848     rho[point]=1.0/dgdx;
1849     point++;
1850
1851     if(point>=nmaxcorr)
1852       point=0;
1853
1854     /* Update */
1855     for(i=0;i<n;i++)
1856       p[i]=ff[i];
1857
1858     cp=point;
1859
1860     /* Recursive update. First go back over the memory points */
1861     for(k=0;k<ncorr;k++) {
1862       cp--;
1863       if(cp<0)
1864         cp=ncorr-1;
1865
1866       sq=0;
1867       for(i=0;i<n;i++)
1868         sq+=dx[cp][i]*p[i];
1869
1870       alpha[cp]=rho[cp]*sq;
1871
1872       for(i=0;i<n;i++)
1873         p[i] -= alpha[cp]*dg[cp][i];
1874     }
1875
1876     for(i=0;i<n;i++)
1877       p[i] *= diag;
1878
1879     /* And then go forward again */
1880     for(k=0;k<ncorr;k++) {
1881       yr = 0;
1882       for(i=0;i<n;i++)
1883         yr += p[i]*dg[cp][i];
1884
1885       beta = rho[cp]*yr;
1886       beta = alpha[cp]-beta;
1887
1888       for(i=0;i<n;i++)
1889         p[i] += beta*dx[cp][i];
1890
1891       cp++;
1892       if(cp>=ncorr)
1893         cp=0;
1894     }
1895
1896     for(i=0;i<n;i++)
1897       if(!frozen[i])
1898         dx[point][i] = p[i];
1899       else
1900         dx[point][i] = 0;
1901
1902     stepsize=1.0;
1903
1904     /* Test whether the convergence criterion is met */
1905     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1906
1907     /* Print it if necessary */
1908     if (MASTER(cr)) {
1909       if(bVerbose)
1910         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1911                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1912       /* Store the new (lower) energies */
1913       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1914                  mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1915                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1916       do_log = do_per_step(step,inputrec->nstlog);
1917       do_ene = do_per_step(step,inputrec->nstenergy);
1918       if(do_log)
1919           print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1920       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1921                  do_log ? fplog : NULL,step,step,eprNORMAL,
1922                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1923     }
1924
1925     /* Stop when the maximum force lies below tolerance.
1926      * If we have reached machine precision, converged is already set to true.
1927      */
1928
1929     converged = converged || (fmax < inputrec->em_tol);
1930
1931   } /* End of the loop */
1932
1933   if(converged)
1934     step--; /* we never took that last step in this case */
1935
1936     if(fmax>inputrec->em_tol)
1937     {
1938         if (MASTER(cr))
1939         {
1940             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1941             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1942         }
1943         converged = FALSE;
1944     }
1945
1946   /* If we printed energy and/or logfile last step (which was the last step)
1947    * we don't have to do it again, but otherwise print the final values.
1948    */
1949   if(!do_log) /* Write final value to log since we didn't do anythin last step */
1950     print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1951   if(!do_ene || !do_log) /* Write final energy file entries */
1952     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1953                !do_log ? fplog : NULL,step,step,eprNORMAL,
1954                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1955
1956   /* Print some stuff... */
1957   if (MASTER(cr))
1958     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1959
1960   /* IMPORTANT!
1961    * For accurate normal mode calculation it is imperative that we
1962    * store the last conformation into the full precision binary trajectory.
1963    *
1964    * However, we should only do it if we did NOT already write this step
1965    * above (which we did if do_x or do_f was true).
1966    */
1967   do_x = !do_per_step(step,inputrec->nstxout);
1968   do_f = !do_per_step(step,inputrec->nstfout);
1969   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1970                 top_global,inputrec,step,
1971                 &ems,state,f);
1972
1973   if (MASTER(cr)) {
1974     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1975                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1976     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1977                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1978
1979     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1980   }
1981
1982   finish_em(fplog,cr,outf,runtime,wcycle);
1983
1984   /* To print the actual number of steps we needed somewhere */
1985   runtime->nsteps_done = step;
1986
1987   return 0;
1988 } /* That's all folks */
1989
1990
1991 double do_steep(FILE *fplog,t_commrec *cr,
1992                 int nfile, const t_filenm fnm[],
1993                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1994                 int nstglobalcomm,
1995                 gmx_vsite_t *vsite,gmx_constr_t constr,
1996                 int stepout,
1997                 t_inputrec *inputrec,
1998                 gmx_mtop_t *top_global,t_fcdata *fcd,
1999                 t_state *state_global,
2000                 t_mdatoms *mdatoms,
2001                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2002                 gmx_edsam_t ed,
2003                 t_forcerec *fr,
2004                 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2005                 gmx_membed_t membed,
2006                 real cpt_period,real max_hours,
2007                 const char *deviceOptions,
2008                 unsigned long Flags,
2009                 gmx_runtime_t *runtime)
2010 {
2011   const char *SD="Steepest Descents";
2012   em_state_t *s_min,*s_try;
2013   rvec       *f_global;
2014   gmx_localtop_t *top;
2015   gmx_enerdata_t *enerd;
2016   rvec   *f;
2017   gmx_global_stat_t gstat;
2018   t_graph    *graph;
2019   real   stepsize,constepsize;
2020   real   ustep,dvdlambda,fnormn;
2021   gmx_mdoutf_t *outf;
2022   t_mdebin   *mdebin;
2023   gmx_bool   bDone,bAbort,do_x,do_f;
2024   tensor vir,pres;
2025   rvec   mu_tot;
2026   int    nsteps;
2027   int    count=0;
2028   int    steps_accepted=0;
2029   /* not used */
2030   real   terminate=0;
2031
2032   s_min = init_em_state();
2033   s_try = init_em_state();
2034
2035   /* Init em and store the local state in s_try */
2036   init_em(fplog,SD,cr,inputrec,
2037           state_global,top_global,s_try,&top,&f,&f_global,
2038           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2039           nfile,fnm,&outf,&mdebin);
2040
2041   /* Print to log file  */
2042   print_em_start(fplog,cr,runtime,wcycle,SD);
2043
2044   /* Set variables for stepsize (in nm). This is the largest
2045    * step that we are going to make in any direction.
2046    */
2047   ustep = inputrec->em_stepsize;
2048   stepsize = 0;
2049
2050   /* Max number of steps  */
2051   nsteps = inputrec->nsteps;
2052
2053   if (MASTER(cr))
2054     /* Print to the screen  */
2055     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2056   if (fplog)
2057     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2058
2059   /**** HERE STARTS THE LOOP ****
2060    * count is the counter for the number of steps
2061    * bDone will be TRUE when the minimization has converged
2062    * bAbort will be TRUE when nsteps steps have been performed or when
2063    * the stepsize becomes smaller than is reasonable for machine precision
2064    */
2065   count  = 0;
2066   bDone  = FALSE;
2067   bAbort = FALSE;
2068   while( !bDone && !bAbort ) {
2069     bAbort = (nsteps >= 0) && (count == nsteps);
2070
2071     /* set new coordinates, except for first step */
2072     if (count > 0) {
2073       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2074                  constr,top,nrnb,wcycle,count);
2075     }
2076
2077     evaluate_energy(fplog,bVerbose,cr,
2078                     state_global,top_global,s_try,top,
2079                     inputrec,nrnb,wcycle,gstat,
2080                     vsite,constr,fcd,graph,mdatoms,fr,
2081                     mu_tot,enerd,vir,pres,count,count==0);
2082
2083     if (MASTER(cr))
2084       print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2085
2086     if (count == 0)
2087       s_min->epot = s_try->epot + 1;
2088
2089     /* Print it if necessary  */
2090     if (MASTER(cr)) {
2091       if (bVerbose) {
2092         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2093                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2094                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2095       }
2096
2097       if (s_try->epot < s_min->epot) {
2098         /* Store the new (lower) energies  */
2099         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2100                    mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2101                    s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2102         print_ebin(outf->fp_ene,TRUE,
2103                    do_per_step(steps_accepted,inputrec->nstdisreout),
2104                    do_per_step(steps_accepted,inputrec->nstorireout),
2105                    fplog,count,count,eprNORMAL,TRUE,
2106                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2107         fflush(fplog);
2108       }
2109     }
2110
2111     /* Now if the new energy is smaller than the previous...
2112      * or if this is the first step!
2113      * or if we did random steps!
2114      */
2115
2116     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2117       steps_accepted++;
2118
2119       /* Test whether the convergence criterion is met...  */
2120       bDone = (s_try->fmax < inputrec->em_tol);
2121
2122       /* Copy the arrays for force, positions and energy  */
2123       /* The 'Min' array always holds the coords and forces of the minimal
2124          sampled energy  */
2125       swap_em_state(s_min,s_try);
2126       if (count > 0)
2127         ustep *= 1.2;
2128
2129       /* Write to trn, if necessary */
2130       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2131       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2132       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2133                     top_global,inputrec,count,
2134                     s_min,state_global,f_global);
2135     }
2136     else {
2137       /* If energy is not smaller make the step smaller...  */
2138       ustep *= 0.5;
2139
2140       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2141         /* Reload the old state */
2142         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2143                                s_min,top,mdatoms,fr,vsite,constr,
2144                                nrnb,wcycle);
2145       }
2146     }
2147
2148     /* Determine new step  */
2149     stepsize = ustep/s_min->fmax;
2150
2151     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2152 #ifdef GMX_DOUBLE
2153         if (count == nsteps || ustep < 1e-12)
2154 #else
2155         if (count == nsteps || ustep < 1e-6)
2156 #endif
2157         {
2158             if (MASTER(cr))
2159             {
2160                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2161                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2162             }
2163             bAbort=TRUE;
2164         }
2165
2166     count++;
2167   } /* End of the loop  */
2168
2169     /* Print some shit...  */
2170   if (MASTER(cr))
2171     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2172   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2173                 top_global,inputrec,count,
2174                 s_min,state_global,f_global);
2175
2176   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2177
2178   if (MASTER(cr)) {
2179     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2180                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2181     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2182                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2183   }
2184
2185   finish_em(fplog,cr,outf,runtime,wcycle);
2186
2187   /* To print the actual number of steps we needed somewhere */
2188   inputrec->nsteps=count;
2189
2190   runtime->nsteps_done = count;
2191
2192   return 0;
2193 } /* That's all folks */
2194
2195
2196 double do_nm(FILE *fplog,t_commrec *cr,
2197              int nfile,const t_filenm fnm[],
2198              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2199              int nstglobalcomm,
2200              gmx_vsite_t *vsite,gmx_constr_t constr,
2201              int stepout,
2202              t_inputrec *inputrec,
2203              gmx_mtop_t *top_global,t_fcdata *fcd,
2204              t_state *state_global,
2205              t_mdatoms *mdatoms,
2206              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2207              gmx_edsam_t ed,
2208              t_forcerec *fr,
2209              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2210              gmx_membed_t membed,
2211              real cpt_period,real max_hours,
2212              const char *deviceOptions,
2213              unsigned long Flags,
2214              gmx_runtime_t *runtime)
2215 {
2216     const char *NM = "Normal Mode Analysis";
2217     gmx_mdoutf_t *outf;
2218     int        natoms,atom,d;
2219     int        nnodes,node;
2220     rvec       *f_global;
2221     gmx_localtop_t *top;
2222     gmx_enerdata_t *enerd;
2223     rvec       *f;
2224     gmx_global_stat_t gstat;
2225     t_graph    *graph;
2226     real       t,t0,lambda,lam0;
2227     gmx_bool       bNS;
2228     tensor     vir,pres;
2229     rvec       mu_tot;
2230     rvec       *fneg,*dfdx;
2231     gmx_bool       bSparse; /* use sparse matrix storage format */
2232     size_t     sz;
2233     gmx_sparsematrix_t * sparse_matrix = NULL;
2234     real *     full_matrix             = NULL;
2235     em_state_t *   state_work;
2236
2237     /* added with respect to mdrun */
2238     int        i,j,k,row,col;
2239     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2240     real       x_min;
2241     real       fnorm,fmax;
2242
2243     if (constr != NULL)
2244     {
2245         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2246     }
2247
2248     state_work = init_em_state();
2249
2250     /* Init em and store the local state in state_minimum */
2251     init_em(fplog,NM,cr,inputrec,
2252             state_global,top_global,state_work,&top,
2253             &f,&f_global,
2254             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2255             nfile,fnm,&outf,NULL);
2256
2257     natoms = top_global->natoms;
2258     snew(fneg,natoms);
2259     snew(dfdx,natoms);
2260
2261 #ifndef GMX_DOUBLE
2262     if (MASTER(cr))
2263     {
2264         fprintf(stderr,
2265                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2266                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2267                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2268                 "      are fairly modest even if you recompile in double precision.\n\n");
2269     }
2270 #endif
2271
2272     /* Check if we can/should use sparse storage format.
2273      *
2274      * Sparse format is only useful when the Hessian itself is sparse, which it
2275       * will be when we use a cutoff.
2276       * For small systems (n<1000) it is easier to always use full matrix format, though.
2277       */
2278     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2279     {
2280         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2281         bSparse = FALSE;
2282     }
2283     else if(top_global->natoms < 1000)
2284     {
2285         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2286         bSparse = FALSE;
2287     }
2288     else
2289     {
2290         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2291         bSparse = TRUE;
2292     }
2293
2294     sz = DIM*top_global->natoms;
2295
2296     fprintf(stderr,"Allocating Hessian memory...\n\n");
2297
2298     if(bSparse)
2299     {
2300         sparse_matrix=gmx_sparsematrix_init(sz);
2301         sparse_matrix->compressed_symmetric = TRUE;
2302     }
2303     else
2304     {
2305         snew(full_matrix,sz*sz);
2306     }
2307
2308     /* Initial values */
2309     t0           = inputrec->init_t;
2310     lam0         = inputrec->fepvals->init_lambda;
2311     t            = t0;
2312     lambda       = lam0;
2313
2314     init_nrnb(nrnb);
2315
2316     where();
2317
2318     /* Write start time and temperature */
2319     print_em_start(fplog,cr,runtime,wcycle,NM);
2320
2321     /* fudge nr of steps to nr of atoms */
2322     inputrec->nsteps = natoms*2;
2323
2324     if (MASTER(cr))
2325     {
2326         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2327                 *(top_global->name),(int)inputrec->nsteps);
2328     }
2329
2330     nnodes = cr->nnodes;
2331
2332     /* Make evaluate_energy do a single node force calculation */
2333     cr->nnodes = 1;
2334     evaluate_energy(fplog,bVerbose,cr,
2335                     state_global,top_global,state_work,top,
2336                     inputrec,nrnb,wcycle,gstat,
2337                     vsite,constr,fcd,graph,mdatoms,fr,
2338                     mu_tot,enerd,vir,pres,-1,TRUE);
2339     cr->nnodes = nnodes;
2340
2341     /* if forces are not small, warn user */
2342     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2343
2344     if (MASTER(cr))
2345     {
2346         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2347         if (state_work->fmax > 1.0e-3)
2348         {
2349             fprintf(stderr,"Maximum force probably not small enough to");
2350             fprintf(stderr," ensure that you are in an \nenergy well. ");
2351             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2352             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2353         }
2354     }
2355
2356     /***********************************************************
2357      *
2358      *      Loop over all pairs in matrix
2359      *
2360      *      do_force called twice. Once with positive and
2361      *      once with negative displacement
2362      *
2363      ************************************************************/
2364
2365     /* Steps are divided one by one over the nodes */
2366     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2367     {
2368
2369         for (d=0; d<DIM; d++)
2370         {
2371             x_min = state_work->s.x[atom][d];
2372
2373             state_work->s.x[atom][d] = x_min - der_range;
2374
2375             /* Make evaluate_energy do a single node force calculation */
2376             cr->nnodes = 1;
2377             evaluate_energy(fplog,bVerbose,cr,
2378                             state_global,top_global,state_work,top,
2379                             inputrec,nrnb,wcycle,gstat,
2380                             vsite,constr,fcd,graph,mdatoms,fr,
2381                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2382
2383             for(i=0; i<natoms; i++)
2384             {
2385                 copy_rvec(state_work->f[i], fneg[i]);
2386             }
2387
2388             state_work->s.x[atom][d] = x_min + der_range;
2389
2390             evaluate_energy(fplog,bVerbose,cr,
2391                             state_global,top_global,state_work,top,
2392                             inputrec,nrnb,wcycle,gstat,
2393                             vsite,constr,fcd,graph,mdatoms,fr,
2394                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2395             cr->nnodes = nnodes;
2396
2397             /* x is restored to original */
2398             state_work->s.x[atom][d] = x_min;
2399
2400             for(j=0; j<natoms; j++)
2401             {
2402                 for (k=0; (k<DIM); k++)
2403                 {
2404                     dfdx[j][k] =
2405                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2406                 }
2407             }
2408
2409             if (!MASTER(cr))
2410             {
2411 #ifdef GMX_MPI
2412 #ifdef GMX_DOUBLE
2413 #define mpi_type MPI_DOUBLE
2414 #else
2415 #define mpi_type MPI_FLOAT
2416 #endif
2417                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2418                          cr->mpi_comm_mygroup);
2419 #endif
2420             }
2421             else
2422             {
2423                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2424                 {
2425                     if (node > 0)
2426                     {
2427 #ifdef GMX_MPI
2428                         MPI_Status stat;
2429                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2430                                  cr->mpi_comm_mygroup,&stat);
2431 #undef mpi_type
2432 #endif
2433                     }
2434
2435                     row = (atom + node)*DIM + d;
2436
2437                     for(j=0; j<natoms; j++)
2438                     {
2439                         for(k=0; k<DIM; k++)
2440                         {
2441                             col = j*DIM + k;
2442
2443                             if (bSparse)
2444                             {
2445                                 if (col >= row && dfdx[j][k] != 0.0)
2446                                 {
2447                                     gmx_sparsematrix_increment_value(sparse_matrix,
2448                                                                      row,col,dfdx[j][k]);
2449                                 }
2450                             }
2451                             else
2452                             {
2453                                 full_matrix[row*sz+col] = dfdx[j][k];
2454                             }
2455                         }
2456                     }
2457                 }
2458             }
2459
2460             if (bVerbose && fplog)
2461             {
2462                 fflush(fplog);
2463             }
2464         }
2465         /* write progress */
2466         if (MASTER(cr) && bVerbose)
2467         {
2468             fprintf(stderr,"\rFinished step %d out of %d",
2469                     min(atom+nnodes,natoms),natoms);
2470             fflush(stderr);
2471         }
2472     }
2473
2474     if (MASTER(cr))
2475     {
2476         fprintf(stderr,"\n\nWriting Hessian...\n");
2477         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2478     }
2479
2480     finish_em(fplog,cr,outf,runtime,wcycle);
2481
2482     runtime->nsteps_done = natoms*2;
2483
2484     return 0;
2485 }
2486