src/gromacs/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "copyrite.h"
  48 #include "smalloc.h"
  49 #include "nrnb.h"
  50 #include "main.h"
  51 #include "force.h"
  52 #include "macros.h"
  53 #include "random.h"
  54 #include "names.h"
  55 #include "gmx_fatal.h"
  56 #include "txtdump.h"
  57 #include "typedefs.h"
  58 #include "update.h"
  59 #include "constr.h"
  60 #include "vec.h"
  61 #include "statutil.h"
  62 #include "tgroup.h"
  63 #include "mdebin.h"
  64 #include "vsite.h"
  65 #include "force.h"
  66 #include "mdrun.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "trnio.h"
  70 #include "sparsematrix.h"
  71 #include "mtxio.h"
  72 #include "mdatoms.h"
  73 #include "ns.h"
  74 #include "gmx_wallcycle.h"
  75 #include "mtop_util.h"
  76 #include "gmxfio.h"
  77 #include "pme.h"
  78
  79 typedef struct {
  80   t_state s;
  81   rvec    *f;
  82   real    epot;
  83   real    fnorm;
  84   real    fmax;
  85   int     a_fmax;
  86 } em_state_t;
  87
  88 static em_state_t *init_em_state()
  89 {
  90   em_state_t *ems;
  91
  92   snew(ems,1);
  93
  94   return ems;
  95 }
  96
  97 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
  98                            gmx_wallcycle_t wcycle,
  99                            const char *name)
 100 {
 101     char buf[STRLEN];
 102
 103     runtime_start(runtime);
 104
 105     sprintf(buf,"Started %s",name);
 106     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 107
 108     wallcycle_start(wcycle,ewcRUN);
 109 }
 110 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 111                         gmx_wallcycle_t wcycle)
 112 {
 113     wallcycle_stop(wcycle,ewcRUN);
 114
 115     runtime_end(runtime);
 116 }
 117
 118 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 119 {
 120     fprintf(out,"\n");
 121     fprintf(out,"%s:\n",minimizer);
 122     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 123     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 124 }
 125
 126 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 127 {
 128     if (bLastStep)
 129     {
 130         fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 131     }
 132     else
 133     {
 134         fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 135                 "Converged to machine precision,\n"
 136                 "but not to the requested precision Fmax < %g\n",
 137                 ftol);
 138         if (sizeof(real)<sizeof(double))
 139         {
 140             fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 141         }
 142         if (bConstrain)
 143         {
 144             fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 145                     "off constraints alltogether (set constraints = none in mdp file)\n");
 146         }
 147     }
 148 }
 149
 150
 151
 152 static void print_converged(FILE *fp,const char *alg,real ftol,
 153                             gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 154                             real epot,real fmax, int nfmax, real fnorm)
 155 {
 156   char buf[STEPSTRSIZE];
 157
 158   if (bDone)
 159     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 160             alg,ftol,gmx_step_str(count,buf));
 161   else if(count<nsteps)
 162     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 163                "but did not reach the requested Fmax < %g.\n",
 164             alg,gmx_step_str(count,buf),ftol);
 165   else
 166     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 167             alg,ftol,gmx_step_str(count,buf));
 168
 169 #ifdef GMX_DOUBLE
 170   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 171   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 172   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 173 #else
 174   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 175   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 176   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 177 #endif
 178 }
 179
 180 static void get_f_norm_max(t_commrec *cr,
 181                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 182                            real *fnorm,real *fmax,int *a_fmax)
 183 {
 184   double fnorm2,*sum;
 185   real fmax2,fmax2_0,fam;
 186   int  la_max,a_max,start,end,i,m,gf;
 187
 188   /* This routine finds the largest force and returns it.
 189    * On parallel machines the global max is taken.
 190    */
 191   fnorm2 = 0;
 192   fmax2 = 0;
 193   la_max = -1;
 194   gf = 0;
 195   start = mdatoms->start;
 196   end   = mdatoms->homenr + start;
 197   if (mdatoms->cFREEZE) {
 198     for(i=start; i<end; i++) {
 199       gf = mdatoms->cFREEZE[i];
 200       fam = 0;
 201       for(m=0; m<DIM; m++)
 202         if (!opts->nFreeze[gf][m])
 203           fam += sqr(f[i][m]);
 204       fnorm2 += fam;
 205       if (fam > fmax2) {
 206         fmax2  = fam;
 207         la_max = i;
 208       }
 209     }
 210   } else {
 211     for(i=start; i<end; i++) {
 212       fam = norm2(f[i]);
 213       fnorm2 += fam;
 214       if (fam > fmax2) {
 215         fmax2  = fam;
 216         la_max = i;
 217       }
 218     }
 219   }
 220
 221   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 222     a_max = cr->dd->gatindex[la_max];
 223   } else {
 224     a_max = la_max;
 225   }
 226   if (PAR(cr)) {
 227     snew(sum,2*cr->nnodes+1);
 228     sum[2*cr->nodeid]   = fmax2;
 229     sum[2*cr->nodeid+1] = a_max;
 230     sum[2*cr->nnodes]   = fnorm2;
 231     gmx_sumd(2*cr->nnodes+1,sum,cr);
 232     fnorm2 = sum[2*cr->nnodes];
 233     /* Determine the global maximum */
 234     for(i=0; i<cr->nnodes; i++) {
 235       if (sum[2*i] > fmax2) {
 236         fmax2 = sum[2*i];
 237         a_max = (int)(sum[2*i+1] + 0.5);
 238       }
 239     }
 240     sfree(sum);
 241   }
 242
 243   if (fnorm)
 244     *fnorm = sqrt(fnorm2);
 245   if (fmax)
 246     *fmax  = sqrt(fmax2);
 247   if (a_fmax)
 248     *a_fmax = a_max;
 249 }
 250
 251 static void get_state_f_norm_max(t_commrec *cr,
 252                            t_grpopts *opts,t_mdatoms *mdatoms,
 253                            em_state_t *ems)
 254 {
 255   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 256 }
 257
 258 void init_em(FILE *fplog,const char *title,
 259              t_commrec *cr,t_inputrec *ir,
 260              t_state *state_global,gmx_mtop_t *top_global,
 261              em_state_t *ems,gmx_localtop_t **top,
 262              rvec **f,rvec **f_global,
 263              t_nrnb *nrnb,rvec mu_tot,
 264              t_forcerec *fr,gmx_enerdata_t **enerd,
 265              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 266              gmx_vsite_t *vsite,gmx_constr_t constr,
 267              int nfile,const t_filenm fnm[],
 268              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 269 {
 270     int  start,homenr,i;
 271     real dvdlambda;
 272
 273     if (fplog)
 274     {
 275         fprintf(fplog,"Initiating %s\n",title);
 276     }
 277
 278     state_global->ngtc = 0;
 279
 280     /* Initiate some variables */
 281     if (ir->efep != efepNO)
 282     {
 283         state_global->lambda = ir->init_lambda;
 284     }
 285     else
 286     {
 287         state_global->lambda = 0.0;
 288     }
 289
 290     init_nrnb(nrnb);
 291
 292     if (DOMAINDECOMP(cr))
 293     {
 294         *top = dd_init_local_top(top_global);
 295
 296         dd_init_local_state(cr->dd,state_global,&ems->s);
 297
 298         *f = NULL;
 299
 300         /* Distribute the charge groups over the nodes from the master node */
 301         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 302                             state_global,top_global,ir,
 303                             &ems->s,&ems->f,mdatoms,*top,
 304                             fr,vsite,NULL,constr,
 305                             nrnb,NULL,FALSE);
 306         dd_store_state(cr->dd,&ems->s);
 307
 308         if (ir->nstfout)
 309         {
 310             snew(*f_global,top_global->natoms);
 311         }
 312         else
 313         {
 314             *f_global = NULL;
 315         }
 316         *graph = NULL;
 317     }
 318     else
 319     {
 320         snew(*f,top_global->natoms);
 321
 322         /* Just copy the state */
 323         ems->s = *state_global;
 324         snew(ems->s.x,ems->s.nalloc);
 325         snew(ems->f,ems->s.nalloc);
 326         for(i=0; i<state_global->natoms; i++)
 327         {
 328             copy_rvec(state_global->x[i],ems->s.x[i]);
 329         }
 330         copy_mat(state_global->box,ems->s.box);
 331
 332         if (PAR(cr) && ir->eI != eiNM)
 333         {
 334             /* Initialize the particle decomposition and split the topology */
 335             *top = split_system(fplog,top_global,ir,cr);
 336
 337             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 338         }
 339         else
 340         {
 341             *top = gmx_mtop_generate_local_top(top_global,ir);
 342         }
 343         *f_global = *f;
 344
 345         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 346         {
 347             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 348         }
 349         else
 350         {
 351             *graph = NULL;
 352         }
 353
 354         if (PARTDECOMP(cr))
 355         {
 356             pd_at_range(cr,&start,&homenr);
 357             homenr -= start;
 358         }
 359         else
 360         {
 361             start  = 0;
 362             homenr = top_global->natoms;
 363         }
 364         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 365         update_mdatoms(mdatoms,state_global->lambda);
 366
 367         if (vsite)
 368         {
 369             set_vsite_top(vsite,*top,mdatoms,cr);
 370         }
 371     }
 372
 373     if (constr)
 374     {
 375         if (ir->eConstrAlg == econtSHAKE &&
 376             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 377         {
 378             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 379                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 380         }
 381
 382         if (!DOMAINDECOMP(cr))
 383         {
 384             set_constraints(constr,*top,ir,mdatoms,cr);
 385         }
 386
 387         if (!ir->bContinuation)
 388         {
 389             /* Constrain the starting coordinates */
 390             dvdlambda=0;
 391             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 392                       ir,NULL,cr,-1,0,mdatoms,
 393                       ems->s.x,ems->s.x,NULL,ems->s.box,
 394                       ems->s.lambda,&dvdlambda,
 395                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 396         }
 397     }
 398
 399     if (PAR(cr))
 400     {
 401         *gstat = global_stat_init(ir);
 402     }
 403
 404     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 405
 406     snew(*enerd,1);
 407     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 408
 409     if (mdebin != NULL)
 410     {
 411         /* Init bin for energy stuff */
 412         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 413     }
 414
 415     clear_rvec(mu_tot);
 416     calc_shifts(ems->s.box,fr->shift_vec);
 417 }
 418
 419 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 420                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 421 {
 422   if (!(cr->duty & DUTY_PME)) {
 423     /* Tell the PME only node to finish */
 424     gmx_pme_finish(cr);
 425   }
 426
 427   done_mdoutf(outf);
 428
 429   em_time_end(fplog,cr,runtime,wcycle);
 430 }
 431
 432 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 433 {
 434   em_state_t tmp;
 435
 436   tmp   = *ems1;
 437   *ems1 = *ems2;
 438   *ems2 = tmp;
 439 }
 440
 441 static void copy_em_coords(em_state_t *ems,t_state *state)
 442 {
 443     int i;
 444
 445     for(i=0; (i<state->natoms); i++)
 446     {
 447         copy_rvec(ems->s.x[i],state->x[i]);
 448     }
 449 }
 450
 451 static void write_em_traj(FILE *fplog,t_commrec *cr,
 452                           gmx_mdoutf_t *outf,
 453                           gmx_bool bX,gmx_bool bF,const char *confout,
 454                           gmx_mtop_t *top_global,
 455                           t_inputrec *ir,gmx_large_int_t step,
 456                           em_state_t *state,
 457                           t_state *state_global,rvec *f_global)
 458 {
 459     int mdof_flags;
 460
 461     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 462     {
 463         copy_em_coords(state,state_global);
 464         f_global = state->f;
 465     }
 466
 467     mdof_flags = 0;
 468     if (bX) { mdof_flags |= MDOF_X; }
 469     if (bF) { mdof_flags |= MDOF_F; }
 470     write_traj(fplog,cr,outf,mdof_flags,
 471                top_global,step,(double)step,
 472                &state->s,state_global,state->f,f_global,NULL,NULL);
 473
 474     if (confout != NULL && MASTER(cr))
 475     {
 476         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 477         {
 478             /* Make molecules whole only for confout writing */
 479             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 480                         state_global->x);
 481         }
 482
 483         write_sto_conf_mtop(confout,
 484                             *top_global->name,top_global,
 485                             state_global->x,NULL,ir->ePBC,state_global->box);
 486     }
 487 }
 488
 489 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 490                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 491                        gmx_constr_t constr,gmx_localtop_t *top,
 492                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 493                        gmx_large_int_t count)
 494
 495 {
 496   t_state *s1,*s2;
 497   int  start,end,gf,i,m;
 498   rvec *x1,*x2;
 499   real dvdlambda;
 500
 501   s1 = &ems1->s;
 502   s2 = &ems2->s;
 503
 504   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 505     gmx_incons("state mismatch in do_em_step");
 506
 507   s2->flags = s1->flags;
 508
 509   if (s2->nalloc != s1->nalloc) {
 510     s2->nalloc = s1->nalloc;
 511     srenew(s2->x,s1->nalloc);
 512     srenew(ems2->f,  s1->nalloc);
 513     if (s2->flags & (1<<estCGP))
 514       srenew(s2->cg_p,  s1->nalloc);
 515   }
 516
 517   s2->natoms = s1->natoms;
 518   s2->lambda = s1->lambda;
 519   copy_mat(s1->box,s2->box);
 520
 521   start = md->start;
 522   end   = md->start + md->homenr;
 523
 524   x1 = s1->x;
 525   x2 = s2->x;
 526   gf = 0;
 527   for(i=start; i<end; i++) {
 528     if (md->cFREEZE)
 529       gf = md->cFREEZE[i];
 530     for(m=0; m<DIM; m++) {
 531       if (ir->opts.nFreeze[gf][m])
 532         x2[i][m] = x1[i][m];
 533       else
 534         x2[i][m] = x1[i][m] + a*f[i][m];
 535     }
 536   }
 537
 538   if (s2->flags & (1<<estCGP)) {
 539     /* Copy the CG p vector */
 540     x1 = s1->cg_p;
 541     x2 = s2->cg_p;
 542     for(i=start; i<end; i++)
 543       copy_rvec(x1[i],x2[i]);
 544   }
 545
 546   if (DOMAINDECOMP(cr)) {
 547     s2->ddp_count = s1->ddp_count;
 548     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 549       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 550       srenew(s2->cg_gl,s2->cg_gl_nalloc);
 551     }
 552     s2->ncg_gl = s1->ncg_gl;
 553     for(i=0; i<s2->ncg_gl; i++)
 554       s2->cg_gl[i] = s1->cg_gl[i];
 555     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 556   }
 557
 558   if (constr) {
 559     wallcycle_start(wcycle,ewcCONSTR);
 560     dvdlambda = 0;
 561     constrain(NULL,TRUE,TRUE,constr,&top->idef,
 562               ir,NULL,cr,count,0,md,
 563               s1->x,s2->x,NULL,s2->box,s2->lambda,
 564               &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 565     wallcycle_stop(wcycle,ewcCONSTR);
 566   }
 567 }
 568
 569 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 570                                    gmx_mtop_t *top_global,t_inputrec *ir,
 571                                    em_state_t *ems,gmx_localtop_t *top,
 572                                    t_mdatoms *mdatoms,t_forcerec *fr,
 573                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 574                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 575 {
 576     /* Repartition the domain decomposition */
 577     wallcycle_start(wcycle,ewcDOMDEC);
 578     dd_partition_system(fplog,step,cr,FALSE,1,
 579                         NULL,top_global,ir,
 580                         &ems->s,&ems->f,
 581                         mdatoms,top,fr,vsite,NULL,constr,
 582                         nrnb,wcycle,FALSE);
 583     dd_store_state(cr->dd,&ems->s);
 584     wallcycle_stop(wcycle,ewcDOMDEC);
 585 }
 586
 587 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 588                             t_state *state_global,gmx_mtop_t *top_global,
 589                             em_state_t *ems,gmx_localtop_t *top,
 590                             t_inputrec *inputrec,
 591                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 592                             gmx_global_stat_t gstat,
 593                             gmx_vsite_t *vsite,gmx_constr_t constr,
 594                             t_fcdata *fcd,
 595                             t_graph *graph,t_mdatoms *mdatoms,
 596                             t_forcerec *fr,rvec mu_tot,
 597                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 598                             gmx_large_int_t count,gmx_bool bFirst)
 599 {
 600   real t;
 601   gmx_bool bNS;
 602   int  nabnsb;
 603   tensor force_vir,shake_vir,ekin;
 604   real dvdl,prescorr,enercorr,dvdlcorr;
 605   real terminate=0;
 606
 607   /* Set the time to the initial time, the time does not change during EM */
 608   t = inputrec->init_t;
 609
 610   if (bFirst ||
 611       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 612     /* This the first state or an old state used before the last ns */
 613     bNS = TRUE;
 614   } else {
 615     bNS = FALSE;
 616     if (inputrec->nstlist > 0) {
 617       bNS = TRUE;
 618     } else if (inputrec->nstlist == -1) {
 619       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 620       if (PAR(cr))
 621         gmx_sumi(1,&nabnsb,cr);
 622       bNS = (nabnsb > 0);
 623     }
 624   }
 625
 626   if (vsite)
 627     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 628                      top->idef.iparams,top->idef.il,
 629                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 630
 631   if (DOMAINDECOMP(cr)) {
 632     if (bNS) {
 633       /* Repartition the domain decomposition */
 634       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 635                              ems,top,mdatoms,fr,vsite,constr,
 636                              nrnb,wcycle);
 637     }
 638   }
 639
 640     /* Calc force & energy on new trial position  */
 641     /* do_force always puts the charge groups in the box and shifts again
 642      * We do not unshift, so molecules are always whole in congrad.c
 643      */
 644     do_force(fplog,cr,inputrec,
 645              count,nrnb,wcycle,top,top_global,&top_global->groups,
 646              ems->s.box,ems->s.x,&ems->s.hist,
 647              ems->f,force_vir,mdatoms,enerd,fcd,
 648              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 649              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 650              (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 651
 652   /* Clear the unused shake virial and pressure */
 653   clear_mat(shake_vir);
 654   clear_mat(pres);
 655
 656     /* Communicate stuff when parallel */
 657     if (PAR(cr) && inputrec->eI != eiNM)
 658     {
 659         wallcycle_start(wcycle,ewcMoveE);
 660
 661         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 662                     inputrec,NULL,NULL,NULL,1,&terminate,
 663                     top_global,&ems->s,FALSE,
 664                     CGLO_ENERGY |
 665                     CGLO_PRESSURE |
 666                     CGLO_CONSTRAINT |
 667                     CGLO_FIRSTITERATE);
 668
 669         wallcycle_stop(wcycle,ewcMoveE);
 670     }
 671
 672     /* Calculate long range corrections to pressure and energy */
 673     calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
 674                   pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 675     enerd->term[F_DISPCORR] = enercorr;
 676     enerd->term[F_EPOT] += enercorr;
 677     enerd->term[F_PRES] += prescorr;
 678     enerd->term[F_DVDL] += dvdlcorr;
 679
 680   ems->epot = enerd->term[F_EPOT];
 681
 682   if (constr) {
 683     /* Project out the constraint components of the force */
 684     wallcycle_start(wcycle,ewcCONSTR);
 685     dvdl = 0;
 686     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 687               inputrec,NULL,cr,count,0,mdatoms,
 688               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 689               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 690     if (fr->bSepDVDL && fplog)
 691       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 692     enerd->term[F_DHDL_CON] += dvdl;
 693     m_add(force_vir,shake_vir,vir);
 694     wallcycle_stop(wcycle,ewcCONSTR);
 695   } else {
 696     copy_mat(force_vir,vir);
 697   }
 698
 699   clear_mat(ekin);
 700   enerd->term[F_PRES] =
 701     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 702
 703   sum_dhdl(enerd,ems->s.lambda,inputrec);
 704
 705     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 706     {
 707         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 708     }
 709 }
 710
 711 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 712                               gmx_mtop_t *mtop,
 713                               em_state_t *s_min,em_state_t *s_b)
 714 {
 715   rvec *fm,*fb,*fmg;
 716   t_block *cgs_gl;
 717   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 718   double partsum;
 719   unsigned char *grpnrFREEZE;
 720
 721   if (debug)
 722     fprintf(debug,"Doing reorder_partsum\n");
 723
 724   fm = s_min->f;
 725   fb = s_b->f;
 726
 727   cgs_gl = dd_charge_groups_global(cr->dd);
 728   index = cgs_gl->index;
 729
 730   /* Collect fm in a global vector fmg.
 731    * This conflicts with the spirit of domain decomposition,
 732    * but to fully optimize this a much more complicated algorithm is required.
 733    */
 734   snew(fmg,mtop->natoms);
 735
 736   ncg   = s_min->s.ncg_gl;
 737   cg_gl = s_min->s.cg_gl;
 738   i = 0;
 739   for(c=0; c<ncg; c++) {
 740     cg = cg_gl[c];
 741     a0 = index[cg];
 742     a1 = index[cg+1];
 743     for(a=a0; a<a1; a++) {
 744       copy_rvec(fm[i],fmg[a]);
 745       i++;
 746     }
 747   }
 748   gmx_sum(mtop->natoms*3,fmg[0],cr);
 749
 750   /* Now we will determine the part of the sum for the cgs in state s_b */
 751   ncg   = s_b->s.ncg_gl;
 752   cg_gl = s_b->s.cg_gl;
 753   partsum = 0;
 754   i = 0;
 755   gf = 0;
 756   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 757   for(c=0; c<ncg; c++) {
 758     cg = cg_gl[c];
 759     a0 = index[cg];
 760     a1 = index[cg+1];
 761     for(a=a0; a<a1; a++) {
 762       if (mdatoms->cFREEZE && grpnrFREEZE) {
 763         gf = grpnrFREEZE[i];
 764       }
 765       for(m=0; m<DIM; m++) {
 766         if (!opts->nFreeze[gf][m]) {
 767           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 768         }
 769       }
 770       i++;
 771     }
 772   }
 773
 774   sfree(fmg);
 775
 776   return partsum;
 777 }
 778
 779 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 780                     gmx_mtop_t *mtop,
 781                     em_state_t *s_min,em_state_t *s_b)
 782 {
 783   rvec *fm,*fb;
 784   double sum;
 785   int  gf,i,m;
 786
 787   /* This is just the classical Polak-Ribiere calculation of beta;
 788    * it looks a bit complicated since we take freeze groups into account,
 789    * and might have to sum it in parallel runs.
 790    */
 791
 792   if (!DOMAINDECOMP(cr) ||
 793       (s_min->s.ddp_count == cr->dd->ddp_count &&
 794        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 795     fm = s_min->f;
 796     fb = s_b->f;
 797     sum = 0;
 798     gf = 0;
 799     /* This part of code can be incorrect with DD,
 800      * since the atom ordering in s_b and s_min might differ.
 801      */
 802     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 803       if (mdatoms->cFREEZE)
 804         gf = mdatoms->cFREEZE[i];
 805       for(m=0; m<DIM; m++)
 806         if (!opts->nFreeze[gf][m]) {
 807           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 808         }
 809     }
 810   } else {
 811     /* We need to reorder cgs while summing */
 812     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 813   }
 814   if (PAR(cr))
 815     gmx_sumd(1,&sum,cr);
 816
 817   return sum/sqr(s_min->fnorm);
 818 }
 819
 820 double do_cg(FILE *fplog,t_commrec *cr,
 821              int nfile,const t_filenm fnm[],
 822              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 823              int nstglobalcomm,
 824              gmx_vsite_t *vsite,gmx_constr_t constr,
 825              int stepout,
 826              t_inputrec *inputrec,
 827              gmx_mtop_t *top_global,t_fcdata *fcd,
 828              t_state *state_global,
 829              t_mdatoms *mdatoms,
 830              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 831              gmx_edsam_t ed,
 832              t_forcerec *fr,
 833              int repl_ex_nst,int repl_ex_seed,
 834              gmx_membed_t membed,
 835              real cpt_period,real max_hours,
 836              const char *deviceOptions,
 837              unsigned long Flags,
 838              gmx_runtime_t *runtime)
 839 {
 840   const char *CG="Polak-Ribiere Conjugate Gradients";
 841
 842   em_state_t *s_min,*s_a,*s_b,*s_c;
 843   gmx_localtop_t *top;
 844   gmx_enerdata_t *enerd;
 845   rvec   *f;
 846   gmx_global_stat_t gstat;
 847   t_graph    *graph;
 848   rvec   *f_global,*p,*sf,*sfm;
 849   double gpa,gpb,gpc,tmp,sum[2],minstep;
 850   real   fnormn;
 851   real   stepsize;
 852   real   a,b,c,beta=0.0;
 853   real   epot_repl=0;
 854   real   pnorm;
 855   t_mdebin   *mdebin;
 856   gmx_bool   converged,foundlower;
 857   rvec   mu_tot;
 858   gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 859   tensor vir,pres;
 860   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 861   gmx_mdoutf_t *outf;
 862   int    i,m,gf,step,nminstep;
 863   real   terminate=0;
 864
 865   step=0;
 866
 867   s_min = init_em_state();
 868   s_a   = init_em_state();
 869   s_b   = init_em_state();
 870   s_c   = init_em_state();
 871
 872   /* Init em and store the local state in s_min */
 873   init_em(fplog,CG,cr,inputrec,
 874           state_global,top_global,s_min,&top,&f,&f_global,
 875           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 876           nfile,fnm,&outf,&mdebin);
 877
 878   /* Print to log file */
 879   print_em_start(fplog,cr,runtime,wcycle,CG);
 880
 881   /* Max number of steps */
 882   number_steps=inputrec->nsteps;
 883
 884   if (MASTER(cr))
 885     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 886   if (fplog)
 887     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 888
 889   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 890   /* do_force always puts the charge groups in the box and shifts again
 891    * We do not unshift, so molecules are always whole in congrad.c
 892    */
 893   evaluate_energy(fplog,bVerbose,cr,
 894                   state_global,top_global,s_min,top,
 895                   inputrec,nrnb,wcycle,gstat,
 896                   vsite,constr,fcd,graph,mdatoms,fr,
 897                   mu_tot,enerd,vir,pres,-1,TRUE);
 898   where();
 899
 900   if (MASTER(cr)) {
 901     /* Copy stuff to the energy bin for easy printing etc. */
 902     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 903                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 904                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 905
 906     print_ebin_header(fplog,step,step,s_min->s.lambda);
 907     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 908                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 909   }
 910   where();
 911
 912   /* Estimate/guess the initial stepsize */
 913   stepsize = inputrec->em_stepsize/s_min->fnorm;
 914
 915   if (MASTER(cr)) {
 916     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 917             s_min->fmax,s_min->a_fmax+1);
 918     fprintf(stderr,"   F-Norm            = %12.5e\n",
 919             s_min->fnorm/sqrt(state_global->natoms));
 920     fprintf(stderr,"\n");
 921     /* and copy to the log file too... */
 922     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 923             s_min->fmax,s_min->a_fmax+1);
 924     fprintf(fplog,"   F-Norm            = %12.5e\n",
 925             s_min->fnorm/sqrt(state_global->natoms));
 926     fprintf(fplog,"\n");
 927   }
 928   /* Start the loop over CG steps.
 929    * Each successful step is counted, and we continue until
 930    * we either converge or reach the max number of steps.
 931    */
 932   converged = FALSE;
 933   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 934
 935     /* start taking steps in a new direction
 936      * First time we enter the routine, beta=0, and the direction is
 937      * simply the negative gradient.
 938      */
 939
 940     /* Calculate the new direction in p, and the gradient in this direction, gpa */
 941     p  = s_min->s.cg_p;
 942     sf = s_min->f;
 943     gpa = 0;
 944     gf = 0;
 945     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 946       if (mdatoms->cFREEZE)
 947         gf = mdatoms->cFREEZE[i];
 948       for(m=0; m<DIM; m++) {
 949         if (!inputrec->opts.nFreeze[gf][m]) {
 950           p[i][m] = sf[i][m] + beta*p[i][m];
 951           gpa -= p[i][m]*sf[i][m];
 952           /* f is negative gradient, thus the sign */
 953         } else {
 954           p[i][m] = 0;
 955         }
 956       }
 957     }
 958
 959     /* Sum the gradient along the line across CPUs */
 960     if (PAR(cr))
 961       gmx_sumd(1,&gpa,cr);
 962
 963     /* Calculate the norm of the search vector */
 964     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 965
 966     /* Just in case stepsize reaches zero due to numerical precision... */
 967     if(stepsize<=0)
 968       stepsize = inputrec->em_stepsize/pnorm;
 969
 970     /*
 971      * Double check the value of the derivative in the search direction.
 972      * If it is positive it must be due to the old information in the
 973      * CG formula, so just remove that and start over with beta=0.
 974      * This corresponds to a steepest descent step.
 975      */
 976     if(gpa>0) {
 977       beta = 0;
 978       step--; /* Don't count this step since we are restarting */
 979       continue; /* Go back to the beginning of the big for-loop */
 980     }
 981
 982     /* Calculate minimum allowed stepsize, before the average (norm)
 983      * relative change in coordinate is smaller than precision
 984      */
 985     minstep=0;
 986     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 987       for(m=0; m<DIM; m++) {
 988         tmp = fabs(s_min->s.x[i][m]);
 989         if(tmp < 1.0)
 990           tmp = 1.0;
 991         tmp = p[i][m]/tmp;
 992         minstep += tmp*tmp;
 993       }
 994     }
 995     /* Add up from all CPUs */
 996     if(PAR(cr))
 997       gmx_sumd(1,&minstep,cr);
 998
 999     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1000
1001     if(stepsize<minstep) {
1002       converged=TRUE;
1003       break;
1004     }
1005
1006     /* Write coordinates if necessary */
1007     do_x = do_per_step(step,inputrec->nstxout);
1008     do_f = do_per_step(step,inputrec->nstfout);
1009
1010     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1011                   top_global,inputrec,step,
1012                   s_min,state_global,f_global);
1013
1014     /* Take a step downhill.
1015      * In theory, we should minimize the function along this direction.
1016      * That is quite possible, but it turns out to take 5-10 function evaluations
1017      * for each line. However, we dont really need to find the exact minimum -
1018      * it is much better to start a new CG step in a modified direction as soon
1019      * as we are close to it. This will save a lot of energy evaluations.
1020      *
1021      * In practice, we just try to take a single step.
1022      * If it worked (i.e. lowered the energy), we increase the stepsize but
1023      * the continue straight to the next CG step without trying to find any minimum.
1024      * If it didn't work (higher energy), there must be a minimum somewhere between
1025      * the old position and the new one.
1026      *
1027      * Due to the finite numerical accuracy, it turns out that it is a good idea
1028      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1029      * This leads to lower final energies in the tests I've done. / Erik
1030      */
1031     s_a->epot = s_min->epot;
1032     a = 0.0;
1033     c = a + stepsize; /* reference position along line is zero */
1034
1035     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1036       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1037                              s_min,top,mdatoms,fr,vsite,constr,
1038                              nrnb,wcycle);
1039     }
1040
1041     /* Take a trial step (new coords in s_c) */
1042     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1043                constr,top,nrnb,wcycle,-1);
1044
1045     neval++;
1046     /* Calculate energy for the trial step */
1047     evaluate_energy(fplog,bVerbose,cr,
1048                     state_global,top_global,s_c,top,
1049                     inputrec,nrnb,wcycle,gstat,
1050                     vsite,constr,fcd,graph,mdatoms,fr,
1051                     mu_tot,enerd,vir,pres,-1,FALSE);
1052
1053     /* Calc derivative along line */
1054     p  = s_c->s.cg_p;
1055     sf = s_c->f;
1056     gpc=0;
1057     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1058       for(m=0; m<DIM; m++)
1059           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1060     }
1061     /* Sum the gradient along the line across CPUs */
1062     if (PAR(cr))
1063       gmx_sumd(1,&gpc,cr);
1064
1065     /* This is the max amount of increase in energy we tolerate */
1066     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1067
1068     /* Accept the step if the energy is lower, or if it is not significantly higher
1069      * and the line derivative is still negative.
1070      */
1071     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1072       foundlower = TRUE;
1073       /* Great, we found a better energy. Increase step for next iteration
1074        * if we are still going down, decrease it otherwise
1075        */
1076       if(gpc<0)
1077         stepsize *= 1.618034;  /* The golden section */
1078       else
1079         stepsize *= 0.618034;  /* 1/golden section */
1080     } else {
1081       /* New energy is the same or higher. We will have to do some work
1082        * to find a smaller value in the interval. Take smaller step next time!
1083        */
1084       foundlower = FALSE;
1085       stepsize *= 0.618034;
1086     }
1087
1088
1089
1090
1091     /* OK, if we didn't find a lower value we will have to locate one now - there must
1092      * be one in the interval [a=0,c].
1093      * The same thing is valid here, though: Don't spend dozens of iterations to find
1094      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1095      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1096      *
1097      * I also have a safeguard for potentially really patological functions so we never
1098      * take more than 20 steps before we give up ...
1099      *
1100      * If we already found a lower value we just skip this step and continue to the update.
1101      */
1102     if (!foundlower) {
1103       nminstep=0;
1104
1105       do {
1106         /* Select a new trial point.
1107          * If the derivatives at points a & c have different sign we interpolate to zero,
1108          * otherwise just do a bisection.
1109          */
1110         if(gpa<0 && gpc>0)
1111           b = a + gpa*(a-c)/(gpc-gpa);
1112         else
1113           b = 0.5*(a+c);
1114
1115         /* safeguard if interpolation close to machine accuracy causes errors:
1116          * never go outside the interval
1117          */
1118         if(b<=a || b>=c)
1119           b = 0.5*(a+c);
1120
1121         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1122           /* Reload the old state */
1123           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1124                                  s_min,top,mdatoms,fr,vsite,constr,
1125                                  nrnb,wcycle);
1126         }
1127
1128         /* Take a trial step to this new point - new coords in s_b */
1129         do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1130                    constr,top,nrnb,wcycle,-1);
1131
1132         neval++;
1133         /* Calculate energy for the trial step */
1134         evaluate_energy(fplog,bVerbose,cr,
1135                         state_global,top_global,s_b,top,
1136                         inputrec,nrnb,wcycle,gstat,
1137                         vsite,constr,fcd,graph,mdatoms,fr,
1138                         mu_tot,enerd,vir,pres,-1,FALSE);
1139
1140         /* p does not change within a step, but since the domain decomposition
1141          * might change, we have to use cg_p of s_b here.
1142          */
1143         p  = s_b->s.cg_p;
1144         sf = s_b->f;
1145         gpb=0;
1146         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1147           for(m=0; m<DIM; m++)
1148               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1149         }
1150         /* Sum the gradient along the line across CPUs */
1151         if (PAR(cr))
1152           gmx_sumd(1,&gpb,cr);
1153
1154         if (debug)
1155           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1156                   s_a->epot,s_b->epot,s_c->epot,gpb);
1157
1158         epot_repl = s_b->epot;
1159
1160         /* Keep one of the intervals based on the value of the derivative at the new point */
1161         if (gpb > 0) {
1162           /* Replace c endpoint with b */
1163           swap_em_state(s_b,s_c);
1164           c = b;
1165           gpc = gpb;
1166         } else {
1167           /* Replace a endpoint with b */
1168           swap_em_state(s_b,s_a);
1169           a = b;
1170           gpa = gpb;
1171         }
1172
1173         /*
1174          * Stop search as soon as we find a value smaller than the endpoints.
1175          * Never run more than 20 steps, no matter what.
1176          */
1177         nminstep++;
1178       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1179                (nminstep < 20));
1180
1181       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1182           nminstep >= 20) {
1183         /* OK. We couldn't find a significantly lower energy.
1184          * If beta==0 this was steepest descent, and then we give up.
1185          * If not, set beta=0 and restart with steepest descent before quitting.
1186          */
1187         if (beta == 0.0) {
1188           /* Converged */
1189           converged = TRUE;
1190           break;
1191         } else {
1192           /* Reset memory before giving up */
1193           beta = 0.0;
1194           continue;
1195         }
1196       }
1197
1198       /* Select min energy state of A & C, put the best in B.
1199        */
1200       if (s_c->epot < s_a->epot) {
1201         if (debug)
1202           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1203                   s_c->epot,s_a->epot);
1204         swap_em_state(s_b,s_c);
1205         gpb = gpc;
1206         b = c;
1207       } else {
1208         if (debug)
1209           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1210                   s_a->epot,s_c->epot);
1211         swap_em_state(s_b,s_a);
1212         gpb = gpa;
1213         b = a;
1214       }
1215
1216     } else {
1217       if (debug)
1218         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1219                 s_c->epot);
1220       swap_em_state(s_b,s_c);
1221       gpb = gpc;
1222       b = c;
1223     }
1224
1225     /* new search direction */
1226     /* beta = 0 means forget all memory and restart with steepest descents. */
1227     if (nstcg && ((step % nstcg)==0))
1228       beta = 0.0;
1229     else {
1230       /* s_min->fnorm cannot be zero, because then we would have converged
1231        * and broken out.
1232        */
1233
1234       /* Polak-Ribiere update.
1235        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1236        */
1237       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1238     }
1239     /* Limit beta to prevent oscillations */
1240     if (fabs(beta) > 5.0)
1241       beta = 0.0;
1242
1243
1244     /* update positions */
1245     swap_em_state(s_min,s_b);
1246     gpa = gpb;
1247
1248     /* Print it if necessary */
1249     if (MASTER(cr)) {
1250       if(bVerbose)
1251         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1252                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1253                 s_min->fmax,s_min->a_fmax+1);
1254       /* Store the new (lower) energies */
1255       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1256                  mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1257                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1258       do_log = do_per_step(step,inputrec->nstlog);
1259       do_ene = do_per_step(step,inputrec->nstenergy);
1260       if(do_log)
1261         print_ebin_header(fplog,step,step,s_min->s.lambda);
1262       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1263                  do_log ? fplog : NULL,step,step,eprNORMAL,
1264                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1265     }
1266
1267     /* Stop when the maximum force lies below tolerance.
1268      * If we have reached machine precision, converged is already set to true.
1269      */
1270     converged = converged || (s_min->fmax < inputrec->em_tol);
1271
1272   } /* End of the loop */
1273
1274   if (converged)
1275     step--; /* we never took that last step in this case */
1276
1277     if (s_min->fmax > inputrec->em_tol)
1278     {
1279         if (MASTER(cr))
1280         {
1281             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1282             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1283         }
1284         converged = FALSE;
1285     }
1286
1287   if (MASTER(cr)) {
1288     /* If we printed energy and/or logfile last step (which was the last step)
1289      * we don't have to do it again, but otherwise print the final values.
1290      */
1291     if(!do_log) {
1292       /* Write final value to log since we didn't do anything the last step */
1293       print_ebin_header(fplog,step,step,s_min->s.lambda);
1294     }
1295     if (!do_ene || !do_log) {
1296       /* Write final energy file entries */
1297       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1298                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1299                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1300     }
1301   }
1302
1303   /* Print some stuff... */
1304   if (MASTER(cr))
1305     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1306
1307   /* IMPORTANT!
1308    * For accurate normal mode calculation it is imperative that we
1309    * store the last conformation into the full precision binary trajectory.
1310    *
1311    * However, we should only do it if we did NOT already write this step
1312    * above (which we did if do_x or do_f was true).
1313    */
1314   do_x = !do_per_step(step,inputrec->nstxout);
1315   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1316
1317   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1318                 top_global,inputrec,step,
1319                 s_min,state_global,f_global);
1320
1321   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1322
1323   if (MASTER(cr)) {
1324     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1325                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1326     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1327                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1328
1329     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1330   }
1331
1332   finish_em(fplog,cr,outf,runtime,wcycle);
1333
1334   /* To print the actual number of steps we needed somewhere */
1335   runtime->nsteps_done = step;
1336
1337   return 0;
1338 } /* That's all folks */
1339
1340
1341 double do_lbfgs(FILE *fplog,t_commrec *cr,
1342                 int nfile,const t_filenm fnm[],
1343                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1344                 int nstglobalcomm,
1345                 gmx_vsite_t *vsite,gmx_constr_t constr,
1346                 int stepout,
1347                 t_inputrec *inputrec,
1348                 gmx_mtop_t *top_global,t_fcdata *fcd,
1349                 t_state *state,
1350                 t_mdatoms *mdatoms,
1351                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1352                 gmx_edsam_t ed,
1353                 t_forcerec *fr,
1354                 int repl_ex_nst,int repl_ex_seed,
1355                 gmx_membed_t membed,
1356                 real cpt_period,real max_hours,
1357                 const char *deviceOptions,
1358                 unsigned long Flags,
1359                 gmx_runtime_t *runtime)
1360 {
1361   static const char *LBFGS="Low-Memory BFGS Minimizer";
1362   em_state_t ems;
1363   gmx_localtop_t *top;
1364   gmx_enerdata_t *enerd;
1365   rvec   *f;
1366   gmx_global_stat_t gstat;
1367   t_graph    *graph;
1368   rvec   *f_global;
1369   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1370   double stepsize,gpa,gpb,gpc,tmp,minstep;
1371   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1372   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1373   real   a,b,c,maxdelta,delta;
1374   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1375   real   dgdx,dgdg,sq,yr,beta;
1376   t_mdebin   *mdebin;
1377   gmx_bool   converged,first;
1378   rvec   mu_tot;
1379   real   fnorm,fmax;
1380   gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1381   tensor vir,pres;
1382   int    start,end,number_steps;
1383   gmx_mdoutf_t *outf;
1384   int    i,k,m,n,nfmax,gf,step;
1385   /* not used */
1386   real   terminate;
1387
1388   if (PAR(cr))
1389     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1390
1391   n = 3*state->natoms;
1392   nmaxcorr = inputrec->nbfgscorr;
1393
1394   /* Allocate memory */
1395   /* Use pointers to real so we dont have to loop over both atoms and
1396    * dimensions all the time...
1397    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1398    * that point to the same memory.
1399    */
1400   snew(xa,n);
1401   snew(xb,n);
1402   snew(xc,n);
1403   snew(fa,n);
1404   snew(fb,n);
1405   snew(fc,n);
1406   snew(frozen,n);
1407
1408   snew(p,n);
1409   snew(lastx,n);
1410   snew(lastf,n);
1411   snew(rho,nmaxcorr);
1412   snew(alpha,nmaxcorr);
1413
1414   snew(dx,nmaxcorr);
1415   for(i=0;i<nmaxcorr;i++)
1416     snew(dx[i],n);
1417
1418   snew(dg,nmaxcorr);
1419   for(i=0;i<nmaxcorr;i++)
1420     snew(dg[i],n);
1421
1422   step = 0;
1423   neval = 0;
1424
1425   /* Init em */
1426   init_em(fplog,LBFGS,cr,inputrec,
1427           state,top_global,&ems,&top,&f,&f_global,
1428           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1429           nfile,fnm,&outf,&mdebin);
1430   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1431    * so we free some memory again.
1432    */
1433   sfree(ems.s.x);
1434   sfree(ems.f);
1435
1436   xx = (real *)state->x;
1437   ff = (real *)f;
1438
1439   start = mdatoms->start;
1440   end   = mdatoms->homenr + start;
1441
1442   /* Print to log file */
1443   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1444
1445   do_log = do_ene = do_x = do_f = TRUE;
1446
1447   /* Max number of steps */
1448   number_steps=inputrec->nsteps;
1449
1450   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1451   gf = 0;
1452   for(i=start; i<end; i++) {
1453     if (mdatoms->cFREEZE)
1454       gf = mdatoms->cFREEZE[i];
1455      for(m=0; m<DIM; m++)
1456        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1457   }
1458   if (MASTER(cr))
1459     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1460   if (fplog)
1461     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1462
1463   if (vsite)
1464     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1465                      top->idef.iparams,top->idef.il,
1466                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1467
1468   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1469   /* do_force always puts the charge groups in the box and shifts again
1470    * We do not unshift, so molecules are always whole
1471    */
1472   neval++;
1473   ems.s.x = state->x;
1474   ems.f = f;
1475   evaluate_energy(fplog,bVerbose,cr,
1476                   state,top_global,&ems,top,
1477                   inputrec,nrnb,wcycle,gstat,
1478                   vsite,constr,fcd,graph,mdatoms,fr,
1479                   mu_tot,enerd,vir,pres,-1,TRUE);
1480   where();
1481
1482   if (MASTER(cr)) {
1483     /* Copy stuff to the energy bin for easy printing etc. */
1484     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1485                mdatoms->tmass,enerd,state,state->box,
1486                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1487
1488     print_ebin_header(fplog,step,step,state->lambda);
1489     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1490                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1491   }
1492   where();
1493
1494   /* This is the starting energy */
1495   Epot = enerd->term[F_EPOT];
1496
1497   fnorm = ems.fnorm;
1498   fmax  = ems.fmax;
1499   nfmax = ems.a_fmax;
1500
1501   /* Set the initial step.
1502    * since it will be multiplied by the non-normalized search direction
1503    * vector (force vector the first time), we scale it by the
1504    * norm of the force.
1505    */
1506
1507   if (MASTER(cr)) {
1508     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1509     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1510     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1511     fprintf(stderr,"\n");
1512     /* and copy to the log file too... */
1513     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1514     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1515     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1516     fprintf(fplog,"\n");
1517   }
1518
1519   point=0;
1520   for(i=0;i<n;i++)
1521     if(!frozen[i])
1522       dx[point][i] = ff[i];  /* Initial search direction */
1523     else
1524       dx[point][i] = 0;
1525
1526   stepsize = 1.0/fnorm;
1527   converged = FALSE;
1528
1529   /* Start the loop over BFGS steps.
1530    * Each successful step is counted, and we continue until
1531    * we either converge or reach the max number of steps.
1532    */
1533
1534   ncorr=0;
1535
1536   /* Set the gradient from the force */
1537   converged = FALSE;
1538   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1539
1540     /* Write coordinates if necessary */
1541     do_x = do_per_step(step,inputrec->nstxout);
1542     do_f = do_per_step(step,inputrec->nstfout);
1543
1544     write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
1545                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1546
1547     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1548
1549     /* pointer to current direction - point=0 first time here */
1550     s=dx[point];
1551
1552     /* calculate line gradient */
1553     for(gpa=0,i=0;i<n;i++)
1554         gpa-=s[i]*ff[i];
1555
1556     /* Calculate minimum allowed stepsize, before the average (norm)
1557      * relative change in coordinate is smaller than precision
1558      */
1559     for(minstep=0,i=0;i<n;i++) {
1560       tmp=fabs(xx[i]);
1561       if(tmp<1.0)
1562         tmp=1.0;
1563       tmp = s[i]/tmp;
1564       minstep += tmp*tmp;
1565     }
1566     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1567
1568     if(stepsize<minstep) {
1569       converged=TRUE;
1570       break;
1571     }
1572
1573     /* Store old forces and coordinates */
1574     for(i=0;i<n;i++) {
1575       lastx[i]=xx[i];
1576       lastf[i]=ff[i];
1577     }
1578     Epot0=Epot;
1579
1580     first=TRUE;
1581
1582     for(i=0;i<n;i++)
1583       xa[i]=xx[i];
1584
1585     /* Take a step downhill.
1586      * In theory, we should minimize the function along this direction.
1587      * That is quite possible, but it turns out to take 5-10 function evaluations
1588      * for each line. However, we dont really need to find the exact minimum -
1589      * it is much better to start a new BFGS step in a modified direction as soon
1590      * as we are close to it. This will save a lot of energy evaluations.
1591      *
1592      * In practice, we just try to take a single step.
1593      * If it worked (i.e. lowered the energy), we increase the stepsize but
1594      * the continue straight to the next BFGS step without trying to find any minimum.
1595      * If it didn't work (higher energy), there must be a minimum somewhere between
1596      * the old position and the new one.
1597      *
1598      * Due to the finite numerical accuracy, it turns out that it is a good idea
1599      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1600      * This leads to lower final energies in the tests I've done. / Erik
1601      */
1602     foundlower=FALSE;
1603     EpotA = Epot0;
1604     a = 0.0;
1605     c = a + stepsize; /* reference position along line is zero */
1606
1607     /* Check stepsize first. We do not allow displacements
1608      * larger than emstep.
1609      */
1610     do {
1611       c = a + stepsize;
1612       maxdelta=0;
1613       for(i=0;i<n;i++) {
1614         delta=c*s[i];
1615         if(delta>maxdelta)
1616           maxdelta=delta;
1617       }
1618       if(maxdelta>inputrec->em_stepsize)
1619         stepsize*=0.1;
1620     } while(maxdelta>inputrec->em_stepsize);
1621
1622     /* Take a trial step */
1623     for (i=0; i<n; i++)
1624       xc[i] = lastx[i] + c*s[i];
1625
1626     neval++;
1627     /* Calculate energy for the trial step */
1628     ems.s.x = (rvec *)xc;
1629     ems.f   = (rvec *)fc;
1630     evaluate_energy(fplog,bVerbose,cr,
1631                     state,top_global,&ems,top,
1632                     inputrec,nrnb,wcycle,gstat,
1633                     vsite,constr,fcd,graph,mdatoms,fr,
1634                     mu_tot,enerd,vir,pres,step,FALSE);
1635     EpotC = ems.epot;
1636
1637     /* Calc derivative along line */
1638     for(gpc=0,i=0; i<n; i++) {
1639         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1640     }
1641     /* Sum the gradient along the line across CPUs */
1642     if (PAR(cr))
1643       gmx_sumd(1,&gpc,cr);
1644
1645      /* This is the max amount of increase in energy we tolerate */
1646    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1647
1648     /* Accept the step if the energy is lower, or if it is not significantly higher
1649      * and the line derivative is still negative.
1650      */
1651     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1652       foundlower = TRUE;
1653       /* Great, we found a better energy. Increase step for next iteration
1654        * if we are still going down, decrease it otherwise
1655        */
1656       if(gpc<0)
1657         stepsize *= 1.618034;  /* The golden section */
1658       else
1659         stepsize *= 0.618034;  /* 1/golden section */
1660     } else {
1661       /* New energy is the same or higher. We will have to do some work
1662        * to find a smaller value in the interval. Take smaller step next time!
1663        */
1664       foundlower = FALSE;
1665       stepsize *= 0.618034;
1666     }
1667
1668     /* OK, if we didn't find a lower value we will have to locate one now - there must
1669      * be one in the interval [a=0,c].
1670      * The same thing is valid here, though: Don't spend dozens of iterations to find
1671      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1672      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1673      *
1674      * I also have a safeguard for potentially really patological functions so we never
1675      * take more than 20 steps before we give up ...
1676      *
1677      * If we already found a lower value we just skip this step and continue to the update.
1678      */
1679
1680     if(!foundlower) {
1681
1682       nminstep=0;
1683       do {
1684         /* Select a new trial point.
1685          * If the derivatives at points a & c have different sign we interpolate to zero,
1686          * otherwise just do a bisection.
1687          */
1688
1689         if(gpa<0 && gpc>0)
1690           b = a + gpa*(a-c)/(gpc-gpa);
1691         else
1692           b = 0.5*(a+c);
1693
1694         /* safeguard if interpolation close to machine accuracy causes errors:
1695          * never go outside the interval
1696          */
1697         if(b<=a || b>=c)
1698           b = 0.5*(a+c);
1699
1700         /* Take a trial step */
1701         for (i=0; i<n; i++)
1702           xb[i] = lastx[i] + b*s[i];
1703
1704         neval++;
1705         /* Calculate energy for the trial step */
1706         ems.s.x = (rvec *)xb;
1707         ems.f   = (rvec *)fb;
1708         evaluate_energy(fplog,bVerbose,cr,
1709                         state,top_global,&ems,top,
1710                         inputrec,nrnb,wcycle,gstat,
1711                         vsite,constr,fcd,graph,mdatoms,fr,
1712                         mu_tot,enerd,vir,pres,step,FALSE);
1713         EpotB = ems.epot;
1714
1715         fnorm = ems.fnorm;
1716
1717         for(gpb=0,i=0; i<n; i++)
1718           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1719
1720         /* Sum the gradient along the line across CPUs */
1721         if (PAR(cr))
1722           gmx_sumd(1,&gpb,cr);
1723
1724         /* Keep one of the intervals based on the value of the derivative at the new point */
1725         if(gpb>0) {
1726           /* Replace c endpoint with b */
1727           EpotC = EpotB;
1728           c = b;
1729           gpc = gpb;
1730           /* swap coord pointers b/c */
1731           xtmp = xb;
1732           ftmp = fb;
1733           xb = xc;
1734           fb = fc;
1735           xc = xtmp;
1736           fc = ftmp;
1737         } else {
1738           /* Replace a endpoint with b */
1739           EpotA = EpotB;
1740           a = b;
1741           gpa = gpb;
1742           /* swap coord pointers a/b */
1743           xtmp = xb;
1744           ftmp = fb;
1745           xb = xa;
1746           fb = fa;
1747           xa = xtmp;
1748           fa = ftmp;
1749         }
1750
1751         /*
1752          * Stop search as soon as we find a value smaller than the endpoints,
1753          * or if the tolerance is below machine precision.
1754          * Never run more than 20 steps, no matter what.
1755          */
1756         nminstep++;
1757       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1758
1759       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1760         /* OK. We couldn't find a significantly lower energy.
1761          * If ncorr==0 this was steepest descent, and then we give up.
1762          * If not, reset memory to restart as steepest descent before quitting.
1763          */
1764         if(ncorr==0) {
1765         /* Converged */
1766           converged=TRUE;
1767           break;
1768         } else {
1769           /* Reset memory */
1770           ncorr=0;
1771           /* Search in gradient direction */
1772           for(i=0;i<n;i++)
1773             dx[point][i]=ff[i];
1774           /* Reset stepsize */
1775           stepsize = 1.0/fnorm;
1776           continue;
1777         }
1778       }
1779
1780       /* Select min energy state of A & C, put the best in xx/ff/Epot
1781        */
1782       if(EpotC<EpotA) {
1783         Epot = EpotC;
1784         /* Use state C */
1785         for(i=0;i<n;i++) {
1786           xx[i]=xc[i];
1787           ff[i]=fc[i];
1788         }
1789         stepsize=c;
1790       } else {
1791         Epot = EpotA;
1792         /* Use state A */
1793         for(i=0;i<n;i++) {
1794           xx[i]=xa[i];
1795           ff[i]=fa[i];
1796         }
1797         stepsize=a;
1798       }
1799
1800     } else {
1801       /* found lower */
1802       Epot = EpotC;
1803       /* Use state C */
1804       for(i=0;i<n;i++) {
1805         xx[i]=xc[i];
1806         ff[i]=fc[i];
1807       }
1808       stepsize=c;
1809     }
1810
1811     /* Update the memory information, and calculate a new
1812      * approximation of the inverse hessian
1813      */
1814
1815     /* Have new data in Epot, xx, ff */
1816     if(ncorr<nmaxcorr)
1817       ncorr++;
1818
1819     for(i=0;i<n;i++) {
1820       dg[point][i]=lastf[i]-ff[i];
1821       dx[point][i]*=stepsize;
1822     }
1823
1824     dgdg=0;
1825     dgdx=0;
1826     for(i=0;i<n;i++) {
1827       dgdg+=dg[point][i]*dg[point][i];
1828       dgdx+=dg[point][i]*dx[point][i];
1829     }
1830
1831     diag=dgdx/dgdg;
1832
1833     rho[point]=1.0/dgdx;
1834     point++;
1835
1836     if(point>=nmaxcorr)
1837       point=0;
1838
1839     /* Update */
1840     for(i=0;i<n;i++)
1841       p[i]=ff[i];
1842
1843     cp=point;
1844
1845     /* Recursive update. First go back over the memory points */
1846     for(k=0;k<ncorr;k++) {
1847       cp--;
1848       if(cp<0)
1849         cp=ncorr-1;
1850
1851       sq=0;
1852       for(i=0;i<n;i++)
1853         sq+=dx[cp][i]*p[i];
1854
1855       alpha[cp]=rho[cp]*sq;
1856
1857       for(i=0;i<n;i++)
1858         p[i] -= alpha[cp]*dg[cp][i];
1859     }
1860
1861     for(i=0;i<n;i++)
1862       p[i] *= diag;
1863
1864     /* And then go forward again */
1865     for(k=0;k<ncorr;k++) {
1866       yr = 0;
1867       for(i=0;i<n;i++)
1868         yr += p[i]*dg[cp][i];
1869
1870       beta = rho[cp]*yr;
1871       beta = alpha[cp]-beta;
1872
1873       for(i=0;i<n;i++)
1874         p[i] += beta*dx[cp][i];
1875
1876       cp++;
1877       if(cp>=ncorr)
1878         cp=0;
1879     }
1880
1881     for(i=0;i<n;i++)
1882       if(!frozen[i])
1883         dx[point][i] = p[i];
1884       else
1885         dx[point][i] = 0;
1886
1887     stepsize=1.0;
1888
1889     /* Test whether the convergence criterion is met */
1890     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1891
1892     /* Print it if necessary */
1893     if (MASTER(cr)) {
1894       if(bVerbose)
1895         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1896                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1897       /* Store the new (lower) energies */
1898       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1899                  mdatoms->tmass,enerd,state,state->box,
1900                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1901       do_log = do_per_step(step,inputrec->nstlog);
1902       do_ene = do_per_step(step,inputrec->nstenergy);
1903       if(do_log)
1904         print_ebin_header(fplog,step,step,state->lambda);
1905       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1906                  do_log ? fplog : NULL,step,step,eprNORMAL,
1907                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1908     }
1909
1910     /* Stop when the maximum force lies below tolerance.
1911      * If we have reached machine precision, converged is already set to true.
1912      */
1913
1914     converged = converged || (fmax < inputrec->em_tol);
1915
1916   } /* End of the loop */
1917
1918   if(converged)
1919     step--; /* we never took that last step in this case */
1920
1921     if(fmax>inputrec->em_tol)
1922     {
1923         if (MASTER(cr))
1924         {
1925             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1926             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1927         }
1928         converged = FALSE;
1929     }
1930
1931   /* If we printed energy and/or logfile last step (which was the last step)
1932    * we don't have to do it again, but otherwise print the final values.
1933    */
1934   if(!do_log) /* Write final value to log since we didn't do anythin last step */
1935     print_ebin_header(fplog,step,step,state->lambda);
1936   if(!do_ene || !do_log) /* Write final energy file entries */
1937     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1938                !do_log ? fplog : NULL,step,step,eprNORMAL,
1939                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1940
1941   /* Print some stuff... */
1942   if (MASTER(cr))
1943     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1944
1945   /* IMPORTANT!
1946    * For accurate normal mode calculation it is imperative that we
1947    * store the last conformation into the full precision binary trajectory.
1948    *
1949    * However, we should only do it if we did NOT already write this step
1950    * above (which we did if do_x or do_f was true).
1951    */
1952   do_x = !do_per_step(step,inputrec->nstxout);
1953   do_f = !do_per_step(step,inputrec->nstfout);
1954   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1955                 top_global,inputrec,step,
1956                 &ems,state,f);
1957
1958   if (MASTER(cr)) {
1959     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1960                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1961     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1962                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1963
1964     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1965   }
1966
1967   finish_em(fplog,cr,outf,runtime,wcycle);
1968
1969   /* To print the actual number of steps we needed somewhere */
1970   runtime->nsteps_done = step;
1971
1972   return 0;
1973 } /* That's all folks */
1974
1975
1976 double do_steep(FILE *fplog,t_commrec *cr,
1977                 int nfile, const t_filenm fnm[],
1978                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1979                 int nstglobalcomm,
1980                 gmx_vsite_t *vsite,gmx_constr_t constr,
1981                 int stepout,
1982                 t_inputrec *inputrec,
1983                 gmx_mtop_t *top_global,t_fcdata *fcd,
1984                 t_state *state_global,
1985                 t_mdatoms *mdatoms,
1986                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1987                 gmx_edsam_t ed,
1988                 t_forcerec *fr,
1989                 int repl_ex_nst,int repl_ex_seed,
1990                 gmx_membed_t membed,
1991                 real cpt_period,real max_hours,
1992                 const char *deviceOptions,
1993                 unsigned long Flags,
1994                 gmx_runtime_t *runtime)
1995 {
1996   const char *SD="Steepest Descents";
1997   em_state_t *s_min,*s_try;
1998   rvec       *f_global;
1999   gmx_localtop_t *top;
2000   gmx_enerdata_t *enerd;
2001   rvec   *f;
2002   gmx_global_stat_t gstat;
2003   t_graph    *graph;
2004   real   stepsize,constepsize;
2005   real   ustep,dvdlambda,fnormn;
2006   gmx_mdoutf_t *outf;
2007   t_mdebin   *mdebin;
2008   gmx_bool   bDone,bAbort,do_x,do_f;
2009   tensor vir,pres;
2010   rvec   mu_tot;
2011   int    nsteps;
2012   int    count=0;
2013   int    steps_accepted=0;
2014   /* not used */
2015   real   terminate=0;
2016
2017   s_min = init_em_state();
2018   s_try = init_em_state();
2019
2020   /* Init em and store the local state in s_try */
2021   init_em(fplog,SD,cr,inputrec,
2022           state_global,top_global,s_try,&top,&f,&f_global,
2023           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2024           nfile,fnm,&outf,&mdebin);
2025
2026   /* Print to log file  */
2027   print_em_start(fplog,cr,runtime,wcycle,SD);
2028
2029   /* Set variables for stepsize (in nm). This is the largest
2030    * step that we are going to make in any direction.
2031    */
2032   ustep = inputrec->em_stepsize;
2033   stepsize = 0;
2034
2035   /* Max number of steps  */
2036   nsteps = inputrec->nsteps;
2037
2038   if (MASTER(cr))
2039     /* Print to the screen  */
2040     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2041   if (fplog)
2042     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2043
2044   /**** HERE STARTS THE LOOP ****
2045    * count is the counter for the number of steps
2046    * bDone will be TRUE when the minimization has converged
2047    * bAbort will be TRUE when nsteps steps have been performed or when
2048    * the stepsize becomes smaller than is reasonable for machine precision
2049    */
2050   count  = 0;
2051   bDone  = FALSE;
2052   bAbort = FALSE;
2053   while( !bDone && !bAbort ) {
2054     bAbort = (nsteps >= 0) && (count == nsteps);
2055
2056     /* set new coordinates, except for first step */
2057     if (count > 0) {
2058       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2059                  constr,top,nrnb,wcycle,count);
2060     }
2061
2062     evaluate_energy(fplog,bVerbose,cr,
2063                     state_global,top_global,s_try,top,
2064                     inputrec,nrnb,wcycle,gstat,
2065                     vsite,constr,fcd,graph,mdatoms,fr,
2066                     mu_tot,enerd,vir,pres,count,count==0);
2067
2068     if (MASTER(cr))
2069       print_ebin_header(fplog,count,count,s_try->s.lambda);
2070
2071     if (count == 0)
2072       s_min->epot = s_try->epot + 1;
2073
2074     /* Print it if necessary  */
2075     if (MASTER(cr)) {
2076       if (bVerbose) {
2077         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2078                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2079                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2080       }
2081
2082       if (s_try->epot < s_min->epot) {
2083         /* Store the new (lower) energies  */
2084         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2085                    mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2086                    NULL,NULL,vir,pres,NULL,mu_tot,constr);
2087         print_ebin(outf->fp_ene,TRUE,
2088                    do_per_step(steps_accepted,inputrec->nstdisreout),
2089                    do_per_step(steps_accepted,inputrec->nstorireout),
2090                    fplog,count,count,eprNORMAL,TRUE,
2091                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2092         fflush(fplog);
2093       }
2094     }
2095
2096     /* Now if the new energy is smaller than the previous...
2097      * or if this is the first step!
2098      * or if we did random steps!
2099      */
2100
2101     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2102       steps_accepted++;
2103
2104       /* Test whether the convergence criterion is met...  */
2105       bDone = (s_try->fmax < inputrec->em_tol);
2106
2107       /* Copy the arrays for force, positions and energy  */
2108       /* The 'Min' array always holds the coords and forces of the minimal
2109          sampled energy  */
2110       swap_em_state(s_min,s_try);
2111       if (count > 0)
2112         ustep *= 1.2;
2113
2114       /* Write to trn, if necessary */
2115       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2116       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2117       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2118                     top_global,inputrec,count,
2119                     s_min,state_global,f_global);
2120     }
2121     else {
2122       /* If energy is not smaller make the step smaller...  */
2123       ustep *= 0.5;
2124
2125       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2126         /* Reload the old state */
2127         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2128                                s_min,top,mdatoms,fr,vsite,constr,
2129                                nrnb,wcycle);
2130       }
2131     }
2132
2133     /* Determine new step  */
2134     stepsize = ustep/s_min->fmax;
2135
2136     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2137 #ifdef GMX_DOUBLE
2138         if (count == nsteps || ustep < 1e-12)
2139 #else
2140         if (count == nsteps || ustep < 1e-6)
2141 #endif
2142         {
2143             if (MASTER(cr))
2144             {
2145                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2146                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2147             }
2148             bAbort=TRUE;
2149         }
2150
2151     count++;
2152   } /* End of the loop  */
2153
2154     /* Print some shit...  */
2155   if (MASTER(cr))
2156     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2157   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2158                 top_global,inputrec,count,
2159                 s_min,state_global,f_global);
2160
2161   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2162
2163   if (MASTER(cr)) {
2164     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2165                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2166     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2167                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2168   }
2169
2170   finish_em(fplog,cr,outf,runtime,wcycle);
2171
2172   /* To print the actual number of steps we needed somewhere */
2173   inputrec->nsteps=count;
2174
2175   runtime->nsteps_done = count;
2176
2177   return 0;
2178 } /* That's all folks */
2179
2180
2181 double do_nm(FILE *fplog,t_commrec *cr,
2182              int nfile,const t_filenm fnm[],
2183              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2184              int nstglobalcomm,
2185              gmx_vsite_t *vsite,gmx_constr_t constr,
2186              int stepout,
2187              t_inputrec *inputrec,
2188              gmx_mtop_t *top_global,t_fcdata *fcd,
2189              t_state *state_global,
2190              t_mdatoms *mdatoms,
2191              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2192              gmx_edsam_t ed,
2193              t_forcerec *fr,
2194              int repl_ex_nst,int repl_ex_seed,
2195              gmx_membed_t membed,
2196              real cpt_period,real max_hours,
2197              const char *deviceOptions,
2198              unsigned long Flags,
2199              gmx_runtime_t *runtime)
2200 {
2201     const char *NM = "Normal Mode Analysis";
2202     gmx_mdoutf_t *outf;
2203     int        natoms,atom,d;
2204     int        nnodes,node;
2205     rvec       *f_global;
2206     gmx_localtop_t *top;
2207     gmx_enerdata_t *enerd;
2208     rvec       *f;
2209     gmx_global_stat_t gstat;
2210     t_graph    *graph;
2211     real       t,lambda;
2212     gmx_bool       bNS;
2213     tensor     vir,pres;
2214     rvec       mu_tot;
2215     rvec       *fneg,*dfdx;
2216     gmx_bool       bSparse; /* use sparse matrix storage format */
2217     size_t     sz;
2218     gmx_sparsematrix_t * sparse_matrix = NULL;
2219     real *     full_matrix             = NULL;
2220     em_state_t *   state_work;
2221
2222     /* added with respect to mdrun */
2223     int        i,j,k,row,col;
2224     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2225     real       x_min;
2226     real       fnorm,fmax;
2227
2228     if (constr != NULL)
2229     {
2230         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2231     }
2232
2233     state_work = init_em_state();
2234
2235     /* Init em and store the local state in state_minimum */
2236     init_em(fplog,NM,cr,inputrec,
2237             state_global,top_global,state_work,&top,
2238             &f,&f_global,
2239             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2240             nfile,fnm,&outf,NULL);
2241
2242     natoms = top_global->natoms;
2243     snew(fneg,natoms);
2244     snew(dfdx,natoms);
2245
2246 #ifndef GMX_DOUBLE
2247     if (MASTER(cr))
2248     {
2249         fprintf(stderr,
2250                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2251                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2252                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2253                 "      are fairly modest even if you recompile in double precision.\n\n");
2254     }
2255 #endif
2256
2257     /* Check if we can/should use sparse storage format.
2258      *
2259      * Sparse format is only useful when the Hessian itself is sparse, which it
2260       * will be when we use a cutoff.
2261       * For small systems (n<1000) it is easier to always use full matrix format, though.
2262       */
2263     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2264     {
2265         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2266         bSparse = FALSE;
2267     }
2268     else if(top_global->natoms < 1000)
2269     {
2270         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2271         bSparse = FALSE;
2272     }
2273     else
2274     {
2275         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2276         bSparse = TRUE;
2277     }
2278
2279     sz = DIM*top_global->natoms;
2280
2281     fprintf(stderr,"Allocating Hessian memory...\n\n");
2282
2283     if(bSparse)
2284     {
2285         sparse_matrix=gmx_sparsematrix_init(sz);
2286         sparse_matrix->compressed_symmetric = TRUE;
2287     }
2288     else
2289     {
2290         snew(full_matrix,sz*sz);
2291     }
2292
2293     /* Initial values */
2294     t      = inputrec->init_t;
2295     lambda = inputrec->init_lambda;
2296
2297     init_nrnb(nrnb);
2298
2299     where();
2300
2301     /* Write start time and temperature */
2302     print_em_start(fplog,cr,runtime,wcycle,NM);
2303
2304     /* fudge nr of steps to nr of atoms */
2305     inputrec->nsteps = natoms*2;
2306
2307     if (MASTER(cr))
2308     {
2309         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2310                 *(top_global->name),(int)inputrec->nsteps);
2311     }
2312
2313     nnodes = cr->nnodes;
2314
2315     /* Make evaluate_energy do a single node force calculation */
2316     cr->nnodes = 1;
2317     evaluate_energy(fplog,bVerbose,cr,
2318                     state_global,top_global,state_work,top,
2319                     inputrec,nrnb,wcycle,gstat,
2320                     vsite,constr,fcd,graph,mdatoms,fr,
2321                     mu_tot,enerd,vir,pres,-1,TRUE);
2322     cr->nnodes = nnodes;
2323
2324     /* if forces are not small, warn user */
2325     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2326
2327     if (MASTER(cr))
2328     {
2329         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2330         if (state_work->fmax > 1.0e-3)
2331         {
2332             fprintf(stderr,"Maximum force probably not small enough to");
2333             fprintf(stderr," ensure that you are in an \nenergy well. ");
2334             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2335             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2336         }
2337     }
2338
2339     /***********************************************************
2340      *
2341      *      Loop over all pairs in matrix
2342      *
2343      *      do_force called twice. Once with positive and
2344      *      once with negative displacement
2345      *
2346      ************************************************************/
2347
2348     /* Steps are divided one by one over the nodes */
2349     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2350     {
2351
2352         for (d=0; d<DIM; d++)
2353         {
2354             x_min = state_work->s.x[atom][d];
2355
2356             state_work->s.x[atom][d] = x_min - der_range;
2357
2358             /* Make evaluate_energy do a single node force calculation */
2359             cr->nnodes = 1;
2360             evaluate_energy(fplog,bVerbose,cr,
2361                             state_global,top_global,state_work,top,
2362                             inputrec,nrnb,wcycle,gstat,
2363                             vsite,constr,fcd,graph,mdatoms,fr,
2364                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2365
2366             for(i=0; i<natoms; i++)
2367             {
2368                 copy_rvec(state_work->f[i], fneg[i]);
2369             }
2370
2371             state_work->s.x[atom][d] = x_min + der_range;
2372
2373             evaluate_energy(fplog,bVerbose,cr,
2374                             state_global,top_global,state_work,top,
2375                             inputrec,nrnb,wcycle,gstat,
2376                             vsite,constr,fcd,graph,mdatoms,fr,
2377                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2378             cr->nnodes = nnodes;
2379
2380             /* x is restored to original */
2381             state_work->s.x[atom][d] = x_min;
2382
2383             for(j=0; j<natoms; j++)
2384             {
2385                 for (k=0; (k<DIM); k++)
2386                 {
2387                     dfdx[j][k] =
2388                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2389                 }
2390             }
2391
2392             if (!MASTER(cr))
2393             {
2394 #ifdef GMX_MPI
2395 #ifdef GMX_DOUBLE
2396 #define mpi_type MPI_DOUBLE
2397 #else
2398 #define mpi_type MPI_FLOAT
2399 #endif
2400                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2401                          cr->mpi_comm_mygroup);
2402 #endif
2403             }
2404             else
2405             {
2406                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2407                 {
2408                     if (node > 0)
2409                     {
2410 #ifdef GMX_MPI
2411                         MPI_Status stat;
2412                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2413                                  cr->mpi_comm_mygroup,&stat);
2414 #undef mpi_type
2415 #endif
2416                     }
2417
2418                     row = (atom + node)*DIM + d;
2419
2420                     for(j=0; j<natoms; j++)
2421                     {
2422                         for(k=0; k<DIM; k++)
2423                         {
2424                             col = j*DIM + k;
2425
2426                             if (bSparse)
2427                             {
2428                                 if (col >= row && dfdx[j][k] != 0.0)
2429                                 {
2430                                     gmx_sparsematrix_increment_value(sparse_matrix,
2431                                                                      row,col,dfdx[j][k]);
2432                                 }
2433                             }
2434                             else
2435                             {
2436                                 full_matrix[row*sz+col] = dfdx[j][k];
2437                             }
2438                         }
2439                     }
2440                 }
2441             }
2442
2443             if (bVerbose && fplog)
2444             {
2445                 fflush(fplog);
2446             }
2447         }
2448         /* write progress */
2449         if (MASTER(cr) && bVerbose)
2450         {
2451             fprintf(stderr,"\rFinished step %d out of %d",
2452                     min(atom+nnodes,natoms),natoms);
2453             fflush(stderr);
2454         }
2455     }
2456
2457     if (MASTER(cr))
2458     {
2459         fprintf(stderr,"\n\nWriting Hessian...\n");
2460         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2461     }
2462
2463     finish_em(fplog,cr,outf,runtime,wcycle);
2464
2465     runtime->nsteps_done = natoms*2;
2466
2467     return 0;
2468 }