src/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "copyrite.h"
  48 #include "smalloc.h"
  49 #include "nrnb.h"
  50 #include "main.h"
  51 #include "force.h"
  52 #include "macros.h"
  53 #include "random.h"
  54 #include "names.h"
  55 #include "gmx_fatal.h"
  56 #include "txtdump.h"
  57 #include "typedefs.h"
  58 #include "update.h"
  59 #include "constr.h"
  60 #include "vec.h"
  61 #include "statutil.h"
  62 #include "tgroup.h"
  63 #include "mdebin.h"
  64 #include "vsite.h"
  65 #include "force.h"
  66 #include "mdrun.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "trnio.h"
  70 #include "sparsematrix.h"
  71 #include "mtxio.h"
  72 #include "mdatoms.h"
  73 #include "ns.h"
  74 #include "gmx_wallcycle.h"
  75 #include "mtop_util.h"
  76 #include "gmxfio.h"
  77 #include "pme.h"
  78
  79 typedef struct {
  80   t_state s;
  81   rvec    *f;
  82   real    epot;
  83   real    fnorm;
  84   real    fmax;
  85   int     a_fmax;
  86 } em_state_t;
  87
  88 static em_state_t *init_em_state()
  89 {
  90   em_state_t *ems;
  91
  92   snew(ems,1);
  93
  94   return ems;
  95 }
  96
  97 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
  98                            gmx_wallcycle_t wcycle,
  99                            const char *name)
 100 {
 101     char buf[STRLEN];
 102
 103     runtime_start(runtime);
 104
 105     sprintf(buf,"Started %s",name);
 106     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 107
 108     wallcycle_start(wcycle,ewcRUN);
 109 }
 110 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 111                         gmx_wallcycle_t wcycle)
 112 {
 113     wallcycle_stop(wcycle,ewcRUN);
 114
 115     runtime_end(runtime);
 116 }
 117
 118 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 119 {
 120     fprintf(out,"\n");
 121     fprintf(out,"%s:\n",minimizer);
 122     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 123     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 124 }
 125
 126 static void warn_step(FILE *fp,real ftol,bool bLastStep,bool bConstrain)
 127 {
 128     if (bLastStep)
 129     {
 130         fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 131     }
 132     else
 133     {
 134         fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 135                 "Converged to machine precision,\n"
 136                 "but not to the requested precision Fmax < %g\n",
 137                 ftol);
 138         if (sizeof(real)<sizeof(double))
 139         {
 140             fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 141         }
 142         if (bConstrain)
 143         {
 144             fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 145                     "off constraints alltogether (set constraints = none in mdp file)\n");
 146         }
 147     }
 148 }
 149
 150
 151
 152 static void print_converged(FILE *fp,const char *alg,real ftol,
 153                             gmx_large_int_t count,bool bDone,gmx_large_int_t nsteps,
 154                             real epot,real fmax, int nfmax, real fnorm)
 155 {
 156   char buf[STEPSTRSIZE];
 157
 158   if (bDone)
 159     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 160             alg,ftol,gmx_step_str(count,buf));
 161   else if(count<nsteps)
 162     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 163                "but did not reach the requested Fmax < %g.\n",
 164             alg,gmx_step_str(count,buf),ftol);
 165   else
 166     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 167             alg,ftol,gmx_step_str(count,buf));
 168
 169 #ifdef GMX_DOUBLE
 170   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 171   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 172   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 173 #else
 174   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 175   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 176   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 177 #endif
 178 }
 179
 180 static void get_f_norm_max(t_commrec *cr,
 181                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 182                            real *fnorm,real *fmax,int *a_fmax)
 183 {
 184   double fnorm2,*sum;
 185   real fmax2,fmax2_0,fam;
 186   int  la_max,a_max,start,end,i,m,gf;
 187
 188   /* This routine finds the largest force and returns it.
 189    * On parallel machines the global max is taken.
 190    */
 191   fnorm2 = 0;
 192   fmax2 = 0;
 193   la_max = -1;
 194   gf = 0;
 195   start = mdatoms->start;
 196   end   = mdatoms->homenr + start;
 197   if (mdatoms->cFREEZE) {
 198     for(i=start; i<end; i++) {
 199       gf = mdatoms->cFREEZE[i];
 200       fam = 0;
 201       for(m=0; m<DIM; m++)
 202         if (!opts->nFreeze[gf][m])
 203           fam += sqr(f[i][m]);
 204       fnorm2 += fam;
 205       if (fam > fmax2) {
 206         fmax2  = fam;
 207         la_max = i;
 208       }
 209     }
 210   } else {
 211     for(i=start; i<end; i++) {
 212       fam = norm2(f[i]);
 213       fnorm2 += fam;
 214       if (fam > fmax2) {
 215         fmax2  = fam;
 216         la_max = i;
 217       }
 218     }
 219   }
 220
 221   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 222     a_max = cr->dd->gatindex[la_max];
 223   } else {
 224     a_max = la_max;
 225   }
 226   if (PAR(cr)) {
 227     snew(sum,2*cr->nnodes+1);
 228     sum[2*cr->nodeid]   = fmax2;
 229     sum[2*cr->nodeid+1] = a_max;
 230     sum[2*cr->nnodes]   = fnorm2;
 231     gmx_sumd(2*cr->nnodes+1,sum,cr);
 232     fnorm2 = sum[2*cr->nnodes];
 233     /* Determine the global maximum */
 234     for(i=0; i<cr->nnodes; i++) {
 235       if (sum[2*i] > fmax2) {
 236         fmax2 = sum[2*i];
 237         a_max = (int)(sum[2*i+1] + 0.5);
 238       }
 239     }
 240     sfree(sum);
 241   }
 242
 243   if (fnorm)
 244     *fnorm = sqrt(fnorm2);
 245   if (fmax)
 246     *fmax  = sqrt(fmax2);
 247   if (a_fmax)
 248     *a_fmax = a_max;
 249 }
 250
 251 static void get_state_f_norm_max(t_commrec *cr,
 252                            t_grpopts *opts,t_mdatoms *mdatoms,
 253                            em_state_t *ems)
 254 {
 255   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 256 }
 257
 258 void init_em(FILE *fplog,const char *title,
 259              t_commrec *cr,t_inputrec *ir,
 260              t_state *state_global,gmx_mtop_t *top_global,
 261              em_state_t *ems,gmx_localtop_t **top,
 262              rvec **f,rvec **f_global,
 263              t_nrnb *nrnb,rvec mu_tot,
 264              t_forcerec *fr,gmx_enerdata_t **enerd,
 265              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 266              gmx_vsite_t *vsite,gmx_constr_t constr,
 267              int nfile,const t_filenm fnm[],
 268              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 269 {
 270     int  start,homenr,i;
 271     real dvdlambda;
 272
 273     if (fplog)
 274     {
 275         fprintf(fplog,"Initiating %s\n",title);
 276     }
 277
 278     state_global->ngtc = 0;
 279
 280     /* Initiate some variables */
 281     if (ir->efep != efepNO)
 282     {
 283         state_global->lambda = ir->init_lambda;
 284     }
 285     else
 286     {
 287         state_global->lambda = 0.0;
 288     }
 289
 290     init_nrnb(nrnb);
 291
 292     if (DOMAINDECOMP(cr))
 293     {
 294         *top = dd_init_local_top(top_global);
 295
 296         dd_init_local_state(cr->dd,state_global,&ems->s);
 297
 298         *f = NULL;
 299
 300         /* Distribute the charge groups over the nodes from the master node */
 301         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 302                             state_global,top_global,ir,
 303                             &ems->s,&ems->f,mdatoms,*top,
 304                             fr,vsite,NULL,constr,
 305                             nrnb,NULL,FALSE);
 306         dd_store_state(cr->dd,&ems->s);
 307
 308         if (ir->nstfout)
 309         {
 310             snew(*f_global,top_global->natoms);
 311         }
 312         else
 313         {
 314             *f_global = NULL;
 315         }
 316         *graph = NULL;
 317     }
 318     else
 319     {
 320         snew(*f,top_global->natoms);
 321
 322         /* Just copy the state */
 323         ems->s = *state_global;
 324         snew(ems->s.x,ems->s.nalloc);
 325         snew(ems->f,ems->s.nalloc);
 326         for(i=0; i<state_global->natoms; i++)
 327         {
 328             copy_rvec(state_global->x[i],ems->s.x[i]);
 329         }
 330         copy_mat(state_global->box,ems->s.box);
 331
 332         if (PAR(cr) && ir->eI != eiNM)
 333         {
 334             /* Initialize the particle decomposition and split the topology */
 335             *top = split_system(fplog,top_global,ir,cr);
 336
 337             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 338         }
 339         else
 340         {
 341             *top = gmx_mtop_generate_local_top(top_global,ir);
 342         }
 343         *f_global = *f;
 344
 345         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 346         {
 347             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 348         }
 349         else
 350         {
 351             *graph = NULL;
 352         }
 353
 354         if (PARTDECOMP(cr))
 355         {
 356             pd_at_range(cr,&start,&homenr);
 357             homenr -= start;
 358         }
 359         else
 360         {
 361             start  = 0;
 362             homenr = top_global->natoms;
 363         }
 364         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 365         update_mdatoms(mdatoms,state_global->lambda);
 366
 367         if (vsite)
 368         {
 369             set_vsite_top(vsite,*top,mdatoms,cr);
 370         }
 371     }
 372
 373     if (constr)
 374     {
 375         if (ir->eConstrAlg == econtSHAKE &&
 376             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 377         {
 378             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 379                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 380         }
 381
 382         if (!DOMAINDECOMP(cr))
 383         {
 384             set_constraints(constr,*top,ir,mdatoms,cr);
 385         }
 386
 387         if (!ir->bContinuation)
 388         {
 389             /* Constrain the starting coordinates */
 390             dvdlambda=0;
 391             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 392                       ir,NULL,cr,-1,0,mdatoms,
 393                       ems->s.x,ems->s.x,NULL,ems->s.box,
 394                       ems->s.lambda,&dvdlambda,
 395                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 396         }
 397     }
 398
 399     if (PAR(cr))
 400     {
 401         *gstat = global_stat_init(ir);
 402     }
 403
 404     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 405
 406     snew(*enerd,1);
 407     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 408
 409     /* Init bin for energy stuff */
 410     *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 411
 412     clear_rvec(mu_tot);
 413     calc_shifts(ems->s.box,fr->shift_vec);
 414 }
 415
 416 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 417                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 418 {
 419   if (!(cr->duty & DUTY_PME)) {
 420     /* Tell the PME only node to finish */
 421     gmx_pme_finish(cr);
 422   }
 423
 424   done_mdoutf(outf);
 425
 426   em_time_end(fplog,cr,runtime,wcycle);
 427 }
 428
 429 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 430 {
 431   em_state_t tmp;
 432
 433   tmp   = *ems1;
 434   *ems1 = *ems2;
 435   *ems2 = tmp;
 436 }
 437
 438 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
 439 {
 440   int i;
 441
 442   for(i=0; (i<state->natoms); i++)
 443     copy_rvec(ems->s.x[i],state->x[i]);
 444   if (f != NULL)
 445     copy_rvec(ems->f[i],f[i]);
 446 }
 447
 448 static void write_em_traj(FILE *fplog,t_commrec *cr,
 449                           gmx_mdoutf_t *outf,
 450                           bool bX,bool bF,const char *confout,
 451                           gmx_mtop_t *top_global,
 452                           t_inputrec *ir,gmx_large_int_t step,
 453                           em_state_t *state,
 454                           t_state *state_global,rvec *f_global)
 455 {
 456     int mdof_flags;
 457
 458     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 459     {
 460         f_global = state->f;
 461         copy_em_coords_back(state,state_global,bF ? f_global : NULL);
 462     }
 463
 464     mdof_flags = 0;
 465     if (bX) { mdof_flags |= MDOF_X; }
 466     if (bF) { mdof_flags |= MDOF_F; }
 467     write_traj(fplog,cr,outf,mdof_flags,
 468                top_global,step,(double)step,
 469                &state->s,state_global,state->f,f_global,NULL,NULL);
 470
 471     if (confout != NULL && MASTER(cr))
 472     {
 473         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 474         {
 475             /* Make molecules whole only for confout writing */
 476             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 477                         state_global->x);
 478         }
 479
 480         write_sto_conf_mtop(confout,
 481                             *top_global->name,top_global,
 482                             state_global->x,NULL,ir->ePBC,state_global->box);
 483     }
 484 }
 485
 486 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 487                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 488                        gmx_constr_t constr,gmx_localtop_t *top,
 489                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 490                        gmx_large_int_t count)
 491
 492 {
 493   t_state *s1,*s2;
 494   int  start,end,gf,i,m;
 495   rvec *x1,*x2;
 496   real dvdlambda;
 497
 498   s1 = &ems1->s;
 499   s2 = &ems2->s;
 500
 501   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 502     gmx_incons("state mismatch in do_em_step");
 503
 504   s2->flags = s1->flags;
 505
 506   if (s2->nalloc != s1->nalloc) {
 507     s2->nalloc = s1->nalloc;
 508     srenew(s2->x,s1->nalloc);
 509     srenew(ems2->f,  s1->nalloc);
 510     if (s2->flags & (1<<estCGP))
 511       srenew(s2->cg_p,  s1->nalloc);
 512   }
 513
 514   s2->natoms = s1->natoms;
 515   s2->lambda = s1->lambda;
 516   copy_mat(s1->box,s2->box);
 517
 518   start = md->start;
 519   end   = md->start + md->homenr;
 520
 521   x1 = s1->x;
 522   x2 = s2->x;
 523   gf = 0;
 524   for(i=start; i<end; i++) {
 525     if (md->cFREEZE)
 526       gf = md->cFREEZE[i];
 527     for(m=0; m<DIM; m++) {
 528       if (ir->opts.nFreeze[gf][m])
 529         x2[i][m] = x1[i][m];
 530       else
 531         x2[i][m] = x1[i][m] + a*f[i][m];
 532     }
 533   }
 534
 535   if (s2->flags & (1<<estCGP)) {
 536     /* Copy the CG p vector */
 537     x1 = s1->cg_p;
 538     x2 = s2->cg_p;
 539     for(i=start; i<end; i++)
 540       copy_rvec(x1[i],x2[i]);
 541   }
 542
 543   if (DOMAINDECOMP(cr)) {
 544     s2->ddp_count = s1->ddp_count;
 545     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 546       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 547       srenew(s2->cg_gl,s2->cg_gl_nalloc);
 548     }
 549     s2->ncg_gl = s1->ncg_gl;
 550     for(i=0; i<s2->ncg_gl; i++)
 551       s2->cg_gl[i] = s1->cg_gl[i];
 552     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 553   }
 554
 555   if (constr) {
 556     wallcycle_start(wcycle,ewcCONSTR);
 557     dvdlambda = 0;
 558     constrain(NULL,TRUE,TRUE,constr,&top->idef,
 559               ir,NULL,cr,count,0,md,
 560               s1->x,s2->x,NULL,s2->box,s2->lambda,
 561               &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 562     wallcycle_stop(wcycle,ewcCONSTR);
 563   }
 564 }
 565
 566 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
 567
 568 {
 569   int  start,end,i,m;
 570
 571   if (DOMAINDECOMP(cr)) {
 572     start = 0;
 573     end   = cr->dd->nat_home;
 574   } else if (PARTDECOMP(cr)) {
 575     pd_at_range(cr,&start,&end);
 576   } else {
 577     start = 0;
 578     end   = n;
 579   }
 580
 581   for(i=start; i<end; i++) {
 582     for(m=0; m<DIM; m++) {
 583       x2[i][m] = x1[i][m] + a*f[i][m];
 584     }
 585   }
 586 }
 587
 588 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
 589
 590 {
 591   int  start,end,i,m;
 592
 593   if (DOMAINDECOMP(cr)) {
 594     start = 0;
 595     end   = cr->dd->nat_home;
 596   } else if (PARTDECOMP(cr)) {
 597     pd_at_range(cr,&start,&end);
 598   } else {
 599     start = 0;
 600     end   = n;
 601   }
 602
 603   for(i=start; i<end; i++) {
 604     for(m=0; m<DIM; m++) {
 605       f[i][m] = (x1[i][m] - x2[i][m])*a;
 606     }
 607   }
 608 }
 609
 610 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 611                                    gmx_mtop_t *top_global,t_inputrec *ir,
 612                                    em_state_t *ems,gmx_localtop_t *top,
 613                                    t_mdatoms *mdatoms,t_forcerec *fr,
 614                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 615                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 616 {
 617     /* Repartition the domain decomposition */
 618     wallcycle_start(wcycle,ewcDOMDEC);
 619     dd_partition_system(fplog,step,cr,FALSE,1,
 620                         NULL,top_global,ir,
 621                         &ems->s,&ems->f,
 622                         mdatoms,top,fr,vsite,NULL,constr,
 623                         nrnb,wcycle,FALSE);
 624     dd_store_state(cr->dd,&ems->s);
 625     wallcycle_stop(wcycle,ewcDOMDEC);
 626 }
 627
 628 static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr,
 629                             t_state *state_global,gmx_mtop_t *top_global,
 630                             em_state_t *ems,gmx_localtop_t *top,
 631                             t_inputrec *inputrec,
 632                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 633                             gmx_global_stat_t gstat,
 634                             gmx_vsite_t *vsite,gmx_constr_t constr,
 635                             t_fcdata *fcd,
 636                             t_graph *graph,t_mdatoms *mdatoms,
 637                             t_forcerec *fr,rvec mu_tot,
 638                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 639                             gmx_large_int_t count,bool bFirst)
 640 {
 641   real t;
 642   bool bNS;
 643   int  nabnsb;
 644   tensor force_vir,shake_vir,ekin;
 645   real dvdl,prescorr,enercorr,dvdlcorr;
 646   real terminate=0;
 647
 648   /* Set the time to the initial time, the time does not change during EM */
 649   t = inputrec->init_t;
 650
 651   if (bFirst ||
 652       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 653     /* This the first state or an old state used before the last ns */
 654     bNS = TRUE;
 655   } else {
 656     bNS = FALSE;
 657     if (inputrec->nstlist > 0) {
 658       bNS = TRUE;
 659     } else if (inputrec->nstlist == -1) {
 660       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 661       if (PAR(cr))
 662         gmx_sumi(1,&nabnsb,cr);
 663       bNS = (nabnsb > 0);
 664     }
 665   }
 666
 667   if (vsite)
 668     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 669                      top->idef.iparams,top->idef.il,
 670                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 671
 672   if (DOMAINDECOMP(cr)) {
 673     if (bNS) {
 674       /* Repartition the domain decomposition */
 675       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 676                              ems,top,mdatoms,fr,vsite,constr,
 677                              nrnb,wcycle);
 678     }
 679   }
 680
 681     /* Calc force & energy on new trial position  */
 682     /* do_force always puts the charge groups in the box and shifts again
 683      * We do not unshift, so molecules are always whole in congrad.c
 684      */
 685     do_force(fplog,cr,inputrec,
 686              count,nrnb,wcycle,top,top_global,&top_global->groups,
 687              ems->s.box,ems->s.x,&ems->s.hist,
 688              ems->f,force_vir,mdatoms,enerd,fcd,
 689              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 690              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 691              (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 692
 693   /* Clear the unused shake virial and pressure */
 694   clear_mat(shake_vir);
 695   clear_mat(pres);
 696
 697   /* Calculate long range corrections to pressure and energy */
 698   calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
 699                 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 700   /* don't think these next 4 lines  can be moved in for now, because we
 701      don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
 702   enerd->term[F_DISPCORR] = enercorr;
 703   enerd->term[F_EPOT] += enercorr;
 704   enerd->term[F_PRES] += prescorr;
 705   enerd->term[F_DVDL] += dvdlcorr;
 706
 707     /* Communicate stuff when parallel */
 708     if (PAR(cr) && inputrec->eI != eiNM)
 709     {
 710         wallcycle_start(wcycle,ewcMoveE);
 711
 712         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 713                     inputrec,NULL,NULL,NULL,1,&terminate,
 714                     top_global,&ems->s,FALSE,
 715                     CGLO_ENERGY |
 716                     CGLO_PRESSURE |
 717                     CGLO_CONSTRAINT |
 718                     CGLO_FIRSTITERATE);
 719
 720         wallcycle_stop(wcycle,ewcMoveE);
 721     }
 722
 723   ems->epot = enerd->term[F_EPOT];
 724
 725   if (constr) {
 726     /* Project out the constraint components of the force */
 727     wallcycle_start(wcycle,ewcCONSTR);
 728     dvdl = 0;
 729     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 730               inputrec,NULL,cr,count,0,mdatoms,
 731               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 732               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 733     if (fr->bSepDVDL && fplog)
 734       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 735     enerd->term[F_DHDL_CON] += dvdl;
 736     m_add(force_vir,shake_vir,vir);
 737     wallcycle_stop(wcycle,ewcCONSTR);
 738   } else {
 739     copy_mat(force_vir,vir);
 740   }
 741
 742   clear_mat(ekin);
 743   enerd->term[F_PRES] =
 744     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
 745               (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
 746
 747   sum_dhdl(enerd,ems->s.lambda,inputrec);
 748
 749     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 750     {
 751         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 752     }
 753 }
 754
 755 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 756                               gmx_mtop_t *mtop,
 757                               em_state_t *s_min,em_state_t *s_b)
 758 {
 759   rvec *fm,*fb,*fmg;
 760   t_block *cgs_gl;
 761   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 762   double partsum;
 763   unsigned char *grpnrFREEZE;
 764
 765   if (debug)
 766     fprintf(debug,"Doing reorder_partsum\n");
 767
 768   fm = s_min->f;
 769   fb = s_b->f;
 770
 771   cgs_gl = dd_charge_groups_global(cr->dd);
 772   index = cgs_gl->index;
 773
 774   /* Collect fm in a global vector fmg.
 775    * This conflicts with the spirit of domain decomposition,
 776    * but to fully optimize this a much more complicated algorithm is required.
 777    */
 778   snew(fmg,mtop->natoms);
 779
 780   ncg   = s_min->s.ncg_gl;
 781   cg_gl = s_min->s.cg_gl;
 782   i = 0;
 783   for(c=0; c<ncg; c++) {
 784     cg = cg_gl[c];
 785     a0 = index[cg];
 786     a1 = index[cg+1];
 787     for(a=a0; a<a1; a++) {
 788       copy_rvec(fm[i],fmg[a]);
 789       i++;
 790     }
 791   }
 792   gmx_sum(mtop->natoms*3,fmg[0],cr);
 793
 794   /* Now we will determine the part of the sum for the cgs in state s_b */
 795   ncg   = s_b->s.ncg_gl;
 796   cg_gl = s_b->s.cg_gl;
 797   partsum = 0;
 798   i = 0;
 799   gf = 0;
 800   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 801   for(c=0; c<ncg; c++) {
 802     cg = cg_gl[c];
 803     a0 = index[cg];
 804     a1 = index[cg+1];
 805     for(a=a0; a<a1; a++) {
 806       if (mdatoms->cFREEZE && grpnrFREEZE) {
 807         gf = grpnrFREEZE[i];
 808       }
 809       for(m=0; m<DIM; m++) {
 810         if (!opts->nFreeze[gf][m]) {
 811           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 812         }
 813       }
 814       i++;
 815     }
 816   }
 817
 818   sfree(fmg);
 819
 820   return partsum;
 821 }
 822
 823 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 824                     gmx_mtop_t *mtop,
 825                     em_state_t *s_min,em_state_t *s_b)
 826 {
 827   rvec *fm,*fb;
 828   double sum;
 829   int  gf,i,m;
 830
 831   /* This is just the classical Polak-Ribiere calculation of beta;
 832    * it looks a bit complicated since we take freeze groups into account,
 833    * and might have to sum it in parallel runs.
 834    */
 835
 836   if (!DOMAINDECOMP(cr) ||
 837       (s_min->s.ddp_count == cr->dd->ddp_count &&
 838        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 839     fm = s_min->f;
 840     fb = s_b->f;
 841     sum = 0;
 842     gf = 0;
 843     /* This part of code can be incorrect with DD,
 844      * since the atom ordering in s_b and s_min might differ.
 845      */
 846     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 847       if (mdatoms->cFREEZE)
 848         gf = mdatoms->cFREEZE[i];
 849       for(m=0; m<DIM; m++)
 850         if (!opts->nFreeze[gf][m]) {
 851           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 852         }
 853     }
 854   } else {
 855     /* We need to reorder cgs while summing */
 856     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 857   }
 858   if (PAR(cr))
 859     gmx_sumd(1,&sum,cr);
 860
 861   return sum/sqr(s_min->fnorm);
 862 }
 863
 864 double do_cg(FILE *fplog,t_commrec *cr,
 865              int nfile,const t_filenm fnm[],
 866              const output_env_t oenv, bool bVerbose,bool bCompact,
 867              int nstglobalcomm,
 868              gmx_vsite_t *vsite,gmx_constr_t constr,
 869              int stepout,
 870              t_inputrec *inputrec,
 871              gmx_mtop_t *top_global,t_fcdata *fcd,
 872              t_state *state_global,
 873              t_mdatoms *mdatoms,
 874              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 875              gmx_edsam_t ed,
 876              t_forcerec *fr,
 877              int repl_ex_nst,int repl_ex_seed,
 878              real cpt_period,real max_hours,
 879              const char *deviceOptions,
 880              unsigned long Flags,
 881              gmx_runtime_t *runtime)
 882 {
 883   const char *CG="Polak-Ribiere Conjugate Gradients";
 884
 885   em_state_t *s_min,*s_a,*s_b,*s_c;
 886   gmx_localtop_t *top;
 887   gmx_enerdata_t *enerd;
 888   rvec   *f;
 889   gmx_global_stat_t gstat;
 890   t_graph    *graph;
 891   rvec   *f_global,*p,*sf,*sfm;
 892   double gpa,gpb,gpc,tmp,sum[2],minstep;
 893   real   fnormn;
 894   real   stepsize;
 895   real   a,b,c,beta=0.0;
 896   real   epot_repl=0;
 897   real   pnorm;
 898   t_mdebin   *mdebin;
 899   bool   converged,foundlower;
 900   rvec   mu_tot;
 901   bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 902   tensor vir,pres;
 903   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 904   gmx_mdoutf_t *outf;
 905   int    i,m,gf,step,nminstep;
 906   real   terminate=0;
 907
 908   step=0;
 909
 910   s_min = init_em_state();
 911   s_a   = init_em_state();
 912   s_b   = init_em_state();
 913   s_c   = init_em_state();
 914
 915   /* Init em and store the local state in s_min */
 916   init_em(fplog,CG,cr,inputrec,
 917           state_global,top_global,s_min,&top,&f,&f_global,
 918           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 919           nfile,fnm,&outf,&mdebin);
 920
 921   /* Print to log file */
 922   print_em_start(fplog,cr,runtime,wcycle,CG);
 923
 924   /* Max number of steps */
 925   number_steps=inputrec->nsteps;
 926
 927   if (MASTER(cr))
 928     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 929   if (fplog)
 930     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 931
 932   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 933   /* do_force always puts the charge groups in the box and shifts again
 934    * We do not unshift, so molecules are always whole in congrad.c
 935    */
 936   evaluate_energy(fplog,bVerbose,cr,
 937                   state_global,top_global,s_min,top,
 938                   inputrec,nrnb,wcycle,gstat,
 939                   vsite,constr,fcd,graph,mdatoms,fr,
 940                   mu_tot,enerd,vir,pres,-1,TRUE);
 941   where();
 942
 943   if (MASTER(cr)) {
 944     /* Copy stuff to the energy bin for easy printing etc. */
 945     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 946                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 947                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 948
 949     print_ebin_header(fplog,step,step,s_min->s.lambda);
 950     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 951                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 952   }
 953   where();
 954
 955   /* Estimate/guess the initial stepsize */
 956   stepsize = inputrec->em_stepsize/s_min->fnorm;
 957
 958   if (MASTER(cr)) {
 959     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 960             s_min->fmax,s_min->a_fmax+1);
 961     fprintf(stderr,"   F-Norm            = %12.5e\n",
 962             s_min->fnorm/sqrt(state_global->natoms));
 963     fprintf(stderr,"\n");
 964     /* and copy to the log file too... */
 965     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 966             s_min->fmax,s_min->a_fmax+1);
 967     fprintf(fplog,"   F-Norm            = %12.5e\n",
 968             s_min->fnorm/sqrt(state_global->natoms));
 969     fprintf(fplog,"\n");
 970   }
 971   /* Start the loop over CG steps.
 972    * Each successful step is counted, and we continue until
 973    * we either converge or reach the max number of steps.
 974    */
 975   converged = FALSE;
 976   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 977
 978     /* start taking steps in a new direction
 979      * First time we enter the routine, beta=0, and the direction is
 980      * simply the negative gradient.
 981      */
 982
 983     /* Calculate the new direction in p, and the gradient in this direction, gpa */
 984     p  = s_min->s.cg_p;
 985     sf = s_min->f;
 986     gpa = 0;
 987     gf = 0;
 988     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 989       if (mdatoms->cFREEZE)
 990         gf = mdatoms->cFREEZE[i];
 991       for(m=0; m<DIM; m++) {
 992         if (!inputrec->opts.nFreeze[gf][m]) {
 993           p[i][m] = sf[i][m] + beta*p[i][m];
 994           gpa -= p[i][m]*sf[i][m];
 995           /* f is negative gradient, thus the sign */
 996         } else {
 997           p[i][m] = 0;
 998         }
 999       }
1000     }
1001
1002     /* Sum the gradient along the line across CPUs */
1003     if (PAR(cr))
1004       gmx_sumd(1,&gpa,cr);
1005
1006     /* Calculate the norm of the search vector */
1007     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1008
1009     /* Just in case stepsize reaches zero due to numerical precision... */
1010     if(stepsize<=0)
1011       stepsize = inputrec->em_stepsize/pnorm;
1012
1013     /*
1014      * Double check the value of the derivative in the search direction.
1015      * If it is positive it must be due to the old information in the
1016      * CG formula, so just remove that and start over with beta=0.
1017      * This corresponds to a steepest descent step.
1018      */
1019     if(gpa>0) {
1020       beta = 0;
1021       step--; /* Don't count this step since we are restarting */
1022       continue; /* Go back to the beginning of the big for-loop */
1023     }
1024
1025     /* Calculate minimum allowed stepsize, before the average (norm)
1026      * relative change in coordinate is smaller than precision
1027      */
1028     minstep=0;
1029     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1030       for(m=0; m<DIM; m++) {
1031         tmp = fabs(s_min->s.x[i][m]);
1032         if(tmp < 1.0)
1033           tmp = 1.0;
1034         tmp = p[i][m]/tmp;
1035         minstep += tmp*tmp;
1036       }
1037     }
1038     /* Add up from all CPUs */
1039     if(PAR(cr))
1040       gmx_sumd(1,&minstep,cr);
1041
1042     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1043
1044     if(stepsize<minstep) {
1045       converged=TRUE;
1046       break;
1047     }
1048
1049     /* Write coordinates if necessary */
1050     do_x = do_per_step(step,inputrec->nstxout);
1051     do_f = do_per_step(step,inputrec->nstfout);
1052
1053     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1054                   top_global,inputrec,step,
1055                   s_min,state_global,f_global);
1056
1057     /* Take a step downhill.
1058      * In theory, we should minimize the function along this direction.
1059      * That is quite possible, but it turns out to take 5-10 function evaluations
1060      * for each line. However, we dont really need to find the exact minimum -
1061      * it is much better to start a new CG step in a modified direction as soon
1062      * as we are close to it. This will save a lot of energy evaluations.
1063      *
1064      * In practice, we just try to take a single step.
1065      * If it worked (i.e. lowered the energy), we increase the stepsize but
1066      * the continue straight to the next CG step without trying to find any minimum.
1067      * If it didn't work (higher energy), there must be a minimum somewhere between
1068      * the old position and the new one.
1069      *
1070      * Due to the finite numerical accuracy, it turns out that it is a good idea
1071      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1072      * This leads to lower final energies in the tests I've done. / Erik
1073      */
1074     s_a->epot = s_min->epot;
1075     a = 0.0;
1076     c = a + stepsize; /* reference position along line is zero */
1077
1078     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1079       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1080                              s_min,top,mdatoms,fr,vsite,constr,
1081                              nrnb,wcycle);
1082     }
1083
1084     /* Take a trial step (new coords in s_c) */
1085     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1086                constr,top,nrnb,wcycle,-1);
1087
1088     neval++;
1089     /* Calculate energy for the trial step */
1090     evaluate_energy(fplog,bVerbose,cr,
1091                     state_global,top_global,s_c,top,
1092                     inputrec,nrnb,wcycle,gstat,
1093                     vsite,constr,fcd,graph,mdatoms,fr,
1094                     mu_tot,enerd,vir,pres,-1,FALSE);
1095
1096     /* Calc derivative along line */
1097     p  = s_c->s.cg_p;
1098     sf = s_c->f;
1099     gpc=0;
1100     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1101       for(m=0; m<DIM; m++)
1102           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1103     }
1104     /* Sum the gradient along the line across CPUs */
1105     if (PAR(cr))
1106       gmx_sumd(1,&gpc,cr);
1107
1108     /* This is the max amount of increase in energy we tolerate */
1109     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1110
1111     /* Accept the step if the energy is lower, or if it is not significantly higher
1112      * and the line derivative is still negative.
1113      */
1114     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1115       foundlower = TRUE;
1116       /* Great, we found a better energy. Increase step for next iteration
1117        * if we are still going down, decrease it otherwise
1118        */
1119       if(gpc<0)
1120         stepsize *= 1.618034;  /* The golden section */
1121       else
1122         stepsize *= 0.618034;  /* 1/golden section */
1123     } else {
1124       /* New energy is the same or higher. We will have to do some work
1125        * to find a smaller value in the interval. Take smaller step next time!
1126        */
1127       foundlower = FALSE;
1128       stepsize *= 0.618034;
1129     }
1130
1131
1132
1133
1134     /* OK, if we didn't find a lower value we will have to locate one now - there must
1135      * be one in the interval [a=0,c].
1136      * The same thing is valid here, though: Don't spend dozens of iterations to find
1137      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1138      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1139      *
1140      * I also have a safeguard for potentially really patological functions so we never
1141      * take more than 20 steps before we give up ...
1142      *
1143      * If we already found a lower value we just skip this step and continue to the update.
1144      */
1145     if (!foundlower) {
1146       nminstep=0;
1147
1148       do {
1149         /* Select a new trial point.
1150          * If the derivatives at points a & c have different sign we interpolate to zero,
1151          * otherwise just do a bisection.
1152          */
1153         if(gpa<0 && gpc>0)
1154           b = a + gpa*(a-c)/(gpc-gpa);
1155         else
1156           b = 0.5*(a+c);
1157
1158         /* safeguard if interpolation close to machine accuracy causes errors:
1159          * never go outside the interval
1160          */
1161         if(b<=a || b>=c)
1162           b = 0.5*(a+c);
1163
1164         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1165           /* Reload the old state */
1166           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1167                                  s_min,top,mdatoms,fr,vsite,constr,
1168                                  nrnb,wcycle);
1169         }
1170
1171         /* Take a trial step to this new point - new coords in s_b */
1172         do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1173                    constr,top,nrnb,wcycle,-1);
1174
1175         neval++;
1176         /* Calculate energy for the trial step */
1177         evaluate_energy(fplog,bVerbose,cr,
1178                         state_global,top_global,s_b,top,
1179                         inputrec,nrnb,wcycle,gstat,
1180                         vsite,constr,fcd,graph,mdatoms,fr,
1181                         mu_tot,enerd,vir,pres,-1,FALSE);
1182
1183         /* p does not change within a step, but since the domain decomposition
1184          * might change, we have to use cg_p of s_b here.
1185          */
1186         p  = s_b->s.cg_p;
1187         sf = s_b->f;
1188         gpb=0;
1189         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1190           for(m=0; m<DIM; m++)
1191               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1192         }
1193         /* Sum the gradient along the line across CPUs */
1194         if (PAR(cr))
1195           gmx_sumd(1,&gpb,cr);
1196
1197         if (debug)
1198           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1199                   s_a->epot,s_b->epot,s_c->epot,gpb);
1200
1201         epot_repl = s_b->epot;
1202
1203         /* Keep one of the intervals based on the value of the derivative at the new point */
1204         if (gpb > 0) {
1205           /* Replace c endpoint with b */
1206           swap_em_state(s_b,s_c);
1207           c = b;
1208           gpc = gpb;
1209         } else {
1210           /* Replace a endpoint with b */
1211           swap_em_state(s_b,s_a);
1212           a = b;
1213           gpa = gpb;
1214         }
1215
1216         /*
1217          * Stop search as soon as we find a value smaller than the endpoints.
1218          * Never run more than 20 steps, no matter what.
1219          */
1220         nminstep++;
1221       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1222                (nminstep < 20));
1223
1224       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1225           nminstep >= 20) {
1226         /* OK. We couldn't find a significantly lower energy.
1227          * If beta==0 this was steepest descent, and then we give up.
1228          * If not, set beta=0 and restart with steepest descent before quitting.
1229          */
1230         if (beta == 0.0) {
1231           /* Converged */
1232           converged = TRUE;
1233           break;
1234         } else {
1235           /* Reset memory before giving up */
1236           beta = 0.0;
1237           continue;
1238         }
1239       }
1240
1241       /* Select min energy state of A & C, put the best in B.
1242        */
1243       if (s_c->epot < s_a->epot) {
1244         if (debug)
1245           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1246                   s_c->epot,s_a->epot);
1247         swap_em_state(s_b,s_c);
1248         gpb = gpc;
1249         b = c;
1250       } else {
1251         if (debug)
1252           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1253                   s_a->epot,s_c->epot);
1254         swap_em_state(s_b,s_a);
1255         gpb = gpa;
1256         b = a;
1257       }
1258
1259     } else {
1260       if (debug)
1261         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1262                 s_c->epot);
1263       swap_em_state(s_b,s_c);
1264       gpb = gpc;
1265       b = c;
1266     }
1267
1268     /* new search direction */
1269     /* beta = 0 means forget all memory and restart with steepest descents. */
1270     if (nstcg && ((step % nstcg)==0))
1271       beta = 0.0;
1272     else {
1273       /* s_min->fnorm cannot be zero, because then we would have converged
1274        * and broken out.
1275        */
1276
1277       /* Polak-Ribiere update.
1278        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1279        */
1280       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1281     }
1282     /* Limit beta to prevent oscillations */
1283     if (fabs(beta) > 5.0)
1284       beta = 0.0;
1285
1286
1287     /* update positions */
1288     swap_em_state(s_min,s_b);
1289     gpa = gpb;
1290
1291     /* Print it if necessary */
1292     if (MASTER(cr)) {
1293       if(bVerbose)
1294         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1295                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1296                 s_min->fmax,s_min->a_fmax+1);
1297       /* Store the new (lower) energies */
1298       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1299                  mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1300                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1301       do_log = do_per_step(step,inputrec->nstlog);
1302       do_ene = do_per_step(step,inputrec->nstenergy);
1303       if(do_log)
1304         print_ebin_header(fplog,step,step,s_min->s.lambda);
1305       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1306                  do_log ? fplog : NULL,step,step,eprNORMAL,
1307                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1308     }
1309
1310     /* Stop when the maximum force lies below tolerance.
1311      * If we have reached machine precision, converged is already set to true.
1312      */
1313     converged = converged || (s_min->fmax < inputrec->em_tol);
1314
1315   } /* End of the loop */
1316
1317   if (converged)
1318     step--; /* we never took that last step in this case */
1319
1320     if (s_min->fmax > inputrec->em_tol)
1321     {
1322         if (MASTER(cr))
1323         {
1324             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1325             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1326         }
1327         converged = FALSE;
1328     }
1329
1330   if (MASTER(cr)) {
1331     /* If we printed energy and/or logfile last step (which was the last step)
1332      * we don't have to do it again, but otherwise print the final values.
1333      */
1334     if(!do_log) {
1335       /* Write final value to log since we didn't do anything the last step */
1336       print_ebin_header(fplog,step,step,s_min->s.lambda);
1337     }
1338     if (!do_ene || !do_log) {
1339       /* Write final energy file entries */
1340       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1341                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1342                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1343     }
1344   }
1345
1346   /* Print some stuff... */
1347   if (MASTER(cr))
1348     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1349
1350   /* IMPORTANT!
1351    * For accurate normal mode calculation it is imperative that we
1352    * store the last conformation into the full precision binary trajectory.
1353    *
1354    * However, we should only do it if we did NOT already write this step
1355    * above (which we did if do_x or do_f was true).
1356    */
1357   do_x = !do_per_step(step,inputrec->nstxout);
1358   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1359
1360   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1361                 top_global,inputrec,step,
1362                 s_min,state_global,f_global);
1363
1364   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1365
1366   if (MASTER(cr)) {
1367     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1368                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1369     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1370                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1371
1372     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1373   }
1374
1375   finish_em(fplog,cr,outf,runtime,wcycle);
1376
1377   /* To print the actual number of steps we needed somewhere */
1378   runtime->nsteps_done = step;
1379
1380   return 0;
1381 } /* That's all folks */
1382
1383
1384 double do_lbfgs(FILE *fplog,t_commrec *cr,
1385                 int nfile,const t_filenm fnm[],
1386                 const output_env_t oenv, bool bVerbose,bool bCompact,
1387                 int nstglobalcomm,
1388                 gmx_vsite_t *vsite,gmx_constr_t constr,
1389                 int stepout,
1390                 t_inputrec *inputrec,
1391                 gmx_mtop_t *top_global,t_fcdata *fcd,
1392                 t_state *state,
1393                 t_mdatoms *mdatoms,
1394                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1395                 gmx_edsam_t ed,
1396                 t_forcerec *fr,
1397                 int repl_ex_nst,int repl_ex_seed,
1398                 real cpt_period,real max_hours,
1399                 const char *deviceOptions,
1400                 unsigned long Flags,
1401                 gmx_runtime_t *runtime)
1402 {
1403   static const char *LBFGS="Low-Memory BFGS Minimizer";
1404   em_state_t ems;
1405   gmx_localtop_t *top;
1406   gmx_enerdata_t *enerd;
1407   rvec   *f;
1408   gmx_global_stat_t gstat;
1409   t_graph    *graph;
1410   rvec   *f_global;
1411   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1412   double stepsize,gpa,gpb,gpc,tmp,minstep;
1413   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1414   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1415   real   a,b,c,maxdelta,delta;
1416   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1417   real   dgdx,dgdg,sq,yr,beta;
1418   t_mdebin   *mdebin;
1419   bool   converged,first;
1420   rvec   mu_tot;
1421   real   fnorm,fmax;
1422   bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1423   tensor vir,pres;
1424   int    start,end,number_steps;
1425   gmx_mdoutf_t *outf;
1426   int    i,k,m,n,nfmax,gf,step;
1427   /* not used */
1428   real   terminate;
1429
1430   if (PAR(cr))
1431     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1432
1433   n = 3*state->natoms;
1434   nmaxcorr = inputrec->nbfgscorr;
1435
1436   /* Allocate memory */
1437   /* Use pointers to real so we dont have to loop over both atoms and
1438    * dimensions all the time...
1439    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1440    * that point to the same memory.
1441    */
1442   snew(xa,n);
1443   snew(xb,n);
1444   snew(xc,n);
1445   snew(fa,n);
1446   snew(fb,n);
1447   snew(fc,n);
1448   snew(frozen,n);
1449
1450   snew(p,n);
1451   snew(lastx,n);
1452   snew(lastf,n);
1453   snew(rho,nmaxcorr);
1454   snew(alpha,nmaxcorr);
1455
1456   snew(dx,nmaxcorr);
1457   for(i=0;i<nmaxcorr;i++)
1458     snew(dx[i],n);
1459
1460   snew(dg,nmaxcorr);
1461   for(i=0;i<nmaxcorr;i++)
1462     snew(dg[i],n);
1463
1464   step = 0;
1465   neval = 0;
1466
1467   /* Init em */
1468   init_em(fplog,LBFGS,cr,inputrec,
1469           state,top_global,&ems,&top,&f,&f_global,
1470           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1471           nfile,fnm,&outf,&mdebin);
1472   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1473    * so we free some memory again.
1474    */
1475   sfree(ems.s.x);
1476   sfree(ems.f);
1477
1478   xx = (real *)state->x;
1479   ff = (real *)f;
1480
1481   start = mdatoms->start;
1482   end   = mdatoms->homenr + start;
1483
1484   /* Print to log file */
1485   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1486
1487   do_log = do_ene = do_x = do_f = TRUE;
1488
1489   /* Max number of steps */
1490   number_steps=inputrec->nsteps;
1491
1492   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1493   gf = 0;
1494   for(i=start; i<end; i++) {
1495     if (mdatoms->cFREEZE)
1496       gf = mdatoms->cFREEZE[i];
1497      for(m=0; m<DIM; m++)
1498        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1499   }
1500   if (MASTER(cr))
1501     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1502   if (fplog)
1503     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1504
1505   if (vsite)
1506     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1507                      top->idef.iparams,top->idef.il,
1508                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1509
1510   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1511   /* do_force always puts the charge groups in the box and shifts again
1512    * We do not unshift, so molecules are always whole
1513    */
1514   neval++;
1515   ems.s.x = state->x;
1516   ems.f = f;
1517   evaluate_energy(fplog,bVerbose,cr,
1518                   state,top_global,&ems,top,
1519                   inputrec,nrnb,wcycle,gstat,
1520                   vsite,constr,fcd,graph,mdatoms,fr,
1521                   mu_tot,enerd,vir,pres,-1,TRUE);
1522   where();
1523
1524   if (MASTER(cr)) {
1525     /* Copy stuff to the energy bin for easy printing etc. */
1526     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1527                mdatoms->tmass,enerd,state,state->box,
1528                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1529
1530     print_ebin_header(fplog,step,step,state->lambda);
1531     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1532                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1533   }
1534   where();
1535
1536   /* This is the starting energy */
1537   Epot = enerd->term[F_EPOT];
1538
1539   fnorm = ems.fnorm;
1540   fmax  = ems.fmax;
1541   nfmax = ems.a_fmax;
1542
1543   /* Set the initial step.
1544    * since it will be multiplied by the non-normalized search direction
1545    * vector (force vector the first time), we scale it by the
1546    * norm of the force.
1547    */
1548
1549   if (MASTER(cr)) {
1550     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1551     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1552     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1553     fprintf(stderr,"\n");
1554     /* and copy to the log file too... */
1555     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1556     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1557     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1558     fprintf(fplog,"\n");
1559   }
1560
1561   point=0;
1562   for(i=0;i<n;i++)
1563     if(!frozen[i])
1564       dx[point][i] = ff[i];  /* Initial search direction */
1565     else
1566       dx[point][i] = 0;
1567
1568   stepsize = 1.0/fnorm;
1569   converged = FALSE;
1570
1571   /* Start the loop over BFGS steps.
1572    * Each successful step is counted, and we continue until
1573    * we either converge or reach the max number of steps.
1574    */
1575
1576   ncorr=0;
1577
1578   /* Set the gradient from the force */
1579   converged = FALSE;
1580   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1581
1582     /* Write coordinates if necessary */
1583     do_x = do_per_step(step,inputrec->nstxout);
1584     do_f = do_per_step(step,inputrec->nstfout);
1585
1586     write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
1587                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1588
1589     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1590
1591     /* pointer to current direction - point=0 first time here */
1592     s=dx[point];
1593
1594     /* calculate line gradient */
1595     for(gpa=0,i=0;i<n;i++)
1596         gpa-=s[i]*ff[i];
1597
1598     /* Calculate minimum allowed stepsize, before the average (norm)
1599      * relative change in coordinate is smaller than precision
1600      */
1601     for(minstep=0,i=0;i<n;i++) {
1602       tmp=fabs(xx[i]);
1603       if(tmp<1.0)
1604         tmp=1.0;
1605       tmp = s[i]/tmp;
1606       minstep += tmp*tmp;
1607     }
1608     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1609
1610     if(stepsize<minstep) {
1611       converged=TRUE;
1612       break;
1613     }
1614
1615     /* Store old forces and coordinates */
1616     for(i=0;i<n;i++) {
1617       lastx[i]=xx[i];
1618       lastf[i]=ff[i];
1619     }
1620     Epot0=Epot;
1621
1622     first=TRUE;
1623
1624     for(i=0;i<n;i++)
1625       xa[i]=xx[i];
1626
1627     /* Take a step downhill.
1628      * In theory, we should minimize the function along this direction.
1629      * That is quite possible, but it turns out to take 5-10 function evaluations
1630      * for each line. However, we dont really need to find the exact minimum -
1631      * it is much better to start a new BFGS step in a modified direction as soon
1632      * as we are close to it. This will save a lot of energy evaluations.
1633      *
1634      * In practice, we just try to take a single step.
1635      * If it worked (i.e. lowered the energy), we increase the stepsize but
1636      * the continue straight to the next BFGS step without trying to find any minimum.
1637      * If it didn't work (higher energy), there must be a minimum somewhere between
1638      * the old position and the new one.
1639      *
1640      * Due to the finite numerical accuracy, it turns out that it is a good idea
1641      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1642      * This leads to lower final energies in the tests I've done. / Erik
1643      */
1644     foundlower=FALSE;
1645     EpotA = Epot0;
1646     a = 0.0;
1647     c = a + stepsize; /* reference position along line is zero */
1648
1649     /* Check stepsize first. We do not allow displacements
1650      * larger than emstep.
1651      */
1652     do {
1653       c = a + stepsize;
1654       maxdelta=0;
1655       for(i=0;i<n;i++) {
1656         delta=c*s[i];
1657         if(delta>maxdelta)
1658           maxdelta=delta;
1659       }
1660       if(maxdelta>inputrec->em_stepsize)
1661         stepsize*=0.1;
1662     } while(maxdelta>inputrec->em_stepsize);
1663
1664     /* Take a trial step */
1665     for (i=0; i<n; i++)
1666       xc[i] = lastx[i] + c*s[i];
1667
1668     neval++;
1669     /* Calculate energy for the trial step */
1670     ems.s.x = (rvec *)xc;
1671     ems.f   = (rvec *)fc;
1672     evaluate_energy(fplog,bVerbose,cr,
1673                     state,top_global,&ems,top,
1674                     inputrec,nrnb,wcycle,gstat,
1675                     vsite,constr,fcd,graph,mdatoms,fr,
1676                     mu_tot,enerd,vir,pres,step,FALSE);
1677     EpotC = ems.epot;
1678
1679     /* Calc derivative along line */
1680     for(gpc=0,i=0; i<n; i++) {
1681         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1682     }
1683     /* Sum the gradient along the line across CPUs */
1684     if (PAR(cr))
1685       gmx_sumd(1,&gpc,cr);
1686
1687      /* This is the max amount of increase in energy we tolerate */
1688    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1689
1690     /* Accept the step if the energy is lower, or if it is not significantly higher
1691      * and the line derivative is still negative.
1692      */
1693     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1694       foundlower = TRUE;
1695       /* Great, we found a better energy. Increase step for next iteration
1696        * if we are still going down, decrease it otherwise
1697        */
1698       if(gpc<0)
1699         stepsize *= 1.618034;  /* The golden section */
1700       else
1701         stepsize *= 0.618034;  /* 1/golden section */
1702     } else {
1703       /* New energy is the same or higher. We will have to do some work
1704        * to find a smaller value in the interval. Take smaller step next time!
1705        */
1706       foundlower = FALSE;
1707       stepsize *= 0.618034;
1708     }
1709
1710     /* OK, if we didn't find a lower value we will have to locate one now - there must
1711      * be one in the interval [a=0,c].
1712      * The same thing is valid here, though: Don't spend dozens of iterations to find
1713      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1714      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1715      *
1716      * I also have a safeguard for potentially really patological functions so we never
1717      * take more than 20 steps before we give up ...
1718      *
1719      * If we already found a lower value we just skip this step and continue to the update.
1720      */
1721
1722     if(!foundlower) {
1723
1724       nminstep=0;
1725       do {
1726         /* Select a new trial point.
1727          * If the derivatives at points a & c have different sign we interpolate to zero,
1728          * otherwise just do a bisection.
1729          */
1730
1731         if(gpa<0 && gpc>0)
1732           b = a + gpa*(a-c)/(gpc-gpa);
1733         else
1734           b = 0.5*(a+c);
1735
1736         /* safeguard if interpolation close to machine accuracy causes errors:
1737          * never go outside the interval
1738          */
1739         if(b<=a || b>=c)
1740           b = 0.5*(a+c);
1741
1742         /* Take a trial step */
1743         for (i=0; i<n; i++)
1744           xb[i] = lastx[i] + b*s[i];
1745
1746         neval++;
1747         /* Calculate energy for the trial step */
1748         ems.s.x = (rvec *)xb;
1749         ems.f   = (rvec *)fb;
1750         evaluate_energy(fplog,bVerbose,cr,
1751                         state,top_global,&ems,top,
1752                         inputrec,nrnb,wcycle,gstat,
1753                         vsite,constr,fcd,graph,mdatoms,fr,
1754                         mu_tot,enerd,vir,pres,step,FALSE);
1755         EpotB = ems.epot;
1756
1757         fnorm = ems.fnorm;
1758
1759         for(gpb=0,i=0; i<n; i++)
1760           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1761
1762         /* Sum the gradient along the line across CPUs */
1763         if (PAR(cr))
1764           gmx_sumd(1,&gpb,cr);
1765
1766         /* Keep one of the intervals based on the value of the derivative at the new point */
1767         if(gpb>0) {
1768           /* Replace c endpoint with b */
1769           EpotC = EpotB;
1770           c = b;
1771           gpc = gpb;
1772           /* swap coord pointers b/c */
1773           xtmp = xb;
1774           ftmp = fb;
1775           xb = xc;
1776           fb = fc;
1777           xc = xtmp;
1778           fc = ftmp;
1779         } else {
1780           /* Replace a endpoint with b */
1781           EpotA = EpotB;
1782           a = b;
1783           gpa = gpb;
1784           /* swap coord pointers a/b */
1785           xtmp = xb;
1786           ftmp = fb;
1787           xb = xa;
1788           fb = fa;
1789           xa = xtmp;
1790           fa = ftmp;
1791         }
1792
1793         /*
1794          * Stop search as soon as we find a value smaller than the endpoints,
1795          * or if the tolerance is below machine precision.
1796          * Never run more than 20 steps, no matter what.
1797          */
1798         nminstep++;
1799       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1800
1801       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1802         /* OK. We couldn't find a significantly lower energy.
1803          * If ncorr==0 this was steepest descent, and then we give up.
1804          * If not, reset memory to restart as steepest descent before quitting.
1805          */
1806         if(ncorr==0) {
1807         /* Converged */
1808           converged=TRUE;
1809           break;
1810         } else {
1811           /* Reset memory */
1812           ncorr=0;
1813           /* Search in gradient direction */
1814           for(i=0;i<n;i++)
1815             dx[point][i]=ff[i];
1816           /* Reset stepsize */
1817           stepsize = 1.0/fnorm;
1818           continue;
1819         }
1820       }
1821
1822       /* Select min energy state of A & C, put the best in xx/ff/Epot
1823        */
1824       if(EpotC<EpotA) {
1825         Epot = EpotC;
1826         /* Use state C */
1827         for(i=0;i<n;i++) {
1828           xx[i]=xc[i];
1829           ff[i]=fc[i];
1830         }
1831         stepsize=c;
1832       } else {
1833         Epot = EpotA;
1834         /* Use state A */
1835         for(i=0;i<n;i++) {
1836           xx[i]=xa[i];
1837           ff[i]=fa[i];
1838         }
1839         stepsize=a;
1840       }
1841
1842     } else {
1843       /* found lower */
1844       Epot = EpotC;
1845       /* Use state C */
1846       for(i=0;i<n;i++) {
1847         xx[i]=xc[i];
1848         ff[i]=fc[i];
1849       }
1850       stepsize=c;
1851     }
1852
1853     /* Update the memory information, and calculate a new
1854      * approximation of the inverse hessian
1855      */
1856
1857     /* Have new data in Epot, xx, ff */
1858     if(ncorr<nmaxcorr)
1859       ncorr++;
1860
1861     for(i=0;i<n;i++) {
1862       dg[point][i]=lastf[i]-ff[i];
1863       dx[point][i]*=stepsize;
1864     }
1865
1866     dgdg=0;
1867     dgdx=0;
1868     for(i=0;i<n;i++) {
1869       dgdg+=dg[point][i]*dg[point][i];
1870       dgdx+=dg[point][i]*dx[point][i];
1871     }
1872
1873     diag=dgdx/dgdg;
1874
1875     rho[point]=1.0/dgdx;
1876     point++;
1877
1878     if(point>=nmaxcorr)
1879       point=0;
1880
1881     /* Update */
1882     for(i=0;i<n;i++)
1883       p[i]=ff[i];
1884
1885     cp=point;
1886
1887     /* Recursive update. First go back over the memory points */
1888     for(k=0;k<ncorr;k++) {
1889       cp--;
1890       if(cp<0)
1891         cp=ncorr-1;
1892
1893       sq=0;
1894       for(i=0;i<n;i++)
1895         sq+=dx[cp][i]*p[i];
1896
1897       alpha[cp]=rho[cp]*sq;
1898
1899       for(i=0;i<n;i++)
1900         p[i] -= alpha[cp]*dg[cp][i];
1901     }
1902
1903     for(i=0;i<n;i++)
1904       p[i] *= diag;
1905
1906     /* And then go forward again */
1907     for(k=0;k<ncorr;k++) {
1908       yr = 0;
1909       for(i=0;i<n;i++)
1910         yr += p[i]*dg[cp][i];
1911
1912       beta = rho[cp]*yr;
1913       beta = alpha[cp]-beta;
1914
1915       for(i=0;i<n;i++)
1916         p[i] += beta*dx[cp][i];
1917
1918       cp++;
1919       if(cp>=ncorr)
1920         cp=0;
1921     }
1922
1923     for(i=0;i<n;i++)
1924       if(!frozen[i])
1925         dx[point][i] = p[i];
1926       else
1927         dx[point][i] = 0;
1928
1929     stepsize=1.0;
1930
1931     /* Test whether the convergence criterion is met */
1932     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1933
1934     /* Print it if necessary */
1935     if (MASTER(cr)) {
1936       if(bVerbose)
1937         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1938                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1939       /* Store the new (lower) energies */
1940       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1941                  mdatoms->tmass,enerd,state,state->box,
1942                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1943       do_log = do_per_step(step,inputrec->nstlog);
1944       do_ene = do_per_step(step,inputrec->nstenergy);
1945       if(do_log)
1946         print_ebin_header(fplog,step,step,state->lambda);
1947       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1948                  do_log ? fplog : NULL,step,step,eprNORMAL,
1949                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1950     }
1951
1952     /* Stop when the maximum force lies below tolerance.
1953      * If we have reached machine precision, converged is already set to true.
1954      */
1955
1956     converged = converged || (fmax < inputrec->em_tol);
1957
1958   } /* End of the loop */
1959
1960   if(converged)
1961     step--; /* we never took that last step in this case */
1962
1963     if(fmax>inputrec->em_tol)
1964     {
1965         if (MASTER(cr))
1966         {
1967             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1968             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1969         }
1970         converged = FALSE;
1971     }
1972
1973   /* If we printed energy and/or logfile last step (which was the last step)
1974    * we don't have to do it again, but otherwise print the final values.
1975    */
1976   if(!do_log) /* Write final value to log since we didn't do anythin last step */
1977     print_ebin_header(fplog,step,step,state->lambda);
1978   if(!do_ene || !do_log) /* Write final energy file entries */
1979     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1980                !do_log ? fplog : NULL,step,step,eprNORMAL,
1981                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1982
1983   /* Print some stuff... */
1984   if (MASTER(cr))
1985     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1986
1987   /* IMPORTANT!
1988    * For accurate normal mode calculation it is imperative that we
1989    * store the last conformation into the full precision binary trajectory.
1990    *
1991    * However, we should only do it if we did NOT already write this step
1992    * above (which we did if do_x or do_f was true).
1993    */
1994   do_x = !do_per_step(step,inputrec->nstxout);
1995   do_f = !do_per_step(step,inputrec->nstfout);
1996   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1997                 top_global,inputrec,step,
1998                 &ems,state,f);
1999
2000   if (MASTER(cr)) {
2001     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2002                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2003     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2004                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2005
2006     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2007   }
2008
2009   finish_em(fplog,cr,outf,runtime,wcycle);
2010
2011   /* To print the actual number of steps we needed somewhere */
2012   runtime->nsteps_done = step;
2013
2014   return 0;
2015 } /* That's all folks */
2016
2017
2018 double do_steep(FILE *fplog,t_commrec *cr,
2019                 int nfile, const t_filenm fnm[],
2020                 const output_env_t oenv, bool bVerbose,bool bCompact,
2021                 int nstglobalcomm,
2022                 gmx_vsite_t *vsite,gmx_constr_t constr,
2023                 int stepout,
2024                 t_inputrec *inputrec,
2025                 gmx_mtop_t *top_global,t_fcdata *fcd,
2026                 t_state *state_global,
2027                 t_mdatoms *mdatoms,
2028                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2029                 gmx_edsam_t ed,
2030                 t_forcerec *fr,
2031                 int repl_ex_nst,int repl_ex_seed,
2032                 real cpt_period,real max_hours,
2033                 const char *deviceOptions,
2034                 unsigned long Flags,
2035                 gmx_runtime_t *runtime)
2036 {
2037   const char *SD="Steepest Descents";
2038   em_state_t *s_min,*s_try;
2039   rvec       *f_global;
2040   gmx_localtop_t *top;
2041   gmx_enerdata_t *enerd;
2042   rvec   *f;
2043   gmx_global_stat_t gstat;
2044   t_graph    *graph;
2045   real   stepsize,constepsize;
2046   real   ustep,dvdlambda,fnormn;
2047   gmx_mdoutf_t *outf;
2048   t_mdebin   *mdebin;
2049   bool   bDone,bAbort,do_x,do_f;
2050   tensor vir,pres;
2051   rvec   mu_tot;
2052   int    nsteps;
2053   int    count=0;
2054   int    steps_accepted=0;
2055   /* not used */
2056   real   terminate=0;
2057
2058   s_min = init_em_state();
2059   s_try = init_em_state();
2060
2061   /* Init em and store the local state in s_try */
2062   init_em(fplog,SD,cr,inputrec,
2063           state_global,top_global,s_try,&top,&f,&f_global,
2064           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2065           nfile,fnm,&outf,&mdebin);
2066
2067   /* Print to log file  */
2068   print_em_start(fplog,cr,runtime,wcycle,SD);
2069
2070   /* Set variables for stepsize (in nm). This is the largest
2071    * step that we are going to make in any direction.
2072    */
2073   ustep = inputrec->em_stepsize;
2074   stepsize = 0;
2075
2076   /* Max number of steps  */
2077   nsteps = inputrec->nsteps;
2078
2079   if (MASTER(cr))
2080     /* Print to the screen  */
2081     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2082   if (fplog)
2083     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2084
2085   /**** HERE STARTS THE LOOP ****
2086    * count is the counter for the number of steps
2087    * bDone will be TRUE when the minimization has converged
2088    * bAbort will be TRUE when nsteps steps have been performed or when
2089    * the stepsize becomes smaller than is reasonable for machine precision
2090    */
2091   count  = 0;
2092   bDone  = FALSE;
2093   bAbort = FALSE;
2094   while( !bDone && !bAbort ) {
2095     bAbort = (nsteps >= 0) && (count == nsteps);
2096
2097     /* set new coordinates, except for first step */
2098     if (count > 0) {
2099       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2100                  constr,top,nrnb,wcycle,count);
2101     }
2102
2103     evaluate_energy(fplog,bVerbose,cr,
2104                     state_global,top_global,s_try,top,
2105                     inputrec,nrnb,wcycle,gstat,
2106                     vsite,constr,fcd,graph,mdatoms,fr,
2107                     mu_tot,enerd,vir,pres,count,count==0);
2108
2109     if (MASTER(cr))
2110       print_ebin_header(fplog,count,count,s_try->s.lambda);
2111
2112     if (count == 0)
2113       s_min->epot = s_try->epot + 1;
2114
2115     /* Print it if necessary  */
2116     if (MASTER(cr)) {
2117       if (bVerbose) {
2118         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2119                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2120                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2121       }
2122
2123       if (s_try->epot < s_min->epot) {
2124         /* Store the new (lower) energies  */
2125         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2126                    mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2127                    NULL,NULL,vir,pres,NULL,mu_tot,constr);
2128         print_ebin(outf->fp_ene,TRUE,
2129                    do_per_step(steps_accepted,inputrec->nstdisreout),
2130                    do_per_step(steps_accepted,inputrec->nstorireout),
2131                    fplog,count,count,eprNORMAL,TRUE,
2132                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2133         fflush(fplog);
2134       }
2135     }
2136
2137     /* Now if the new energy is smaller than the previous...
2138      * or if this is the first step!
2139      * or if we did random steps!
2140      */
2141
2142     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2143       steps_accepted++;
2144
2145       /* Test whether the convergence criterion is met...  */
2146       bDone = (s_try->fmax < inputrec->em_tol);
2147
2148       /* Copy the arrays for force, positions and energy  */
2149       /* The 'Min' array always holds the coords and forces of the minimal
2150          sampled energy  */
2151       swap_em_state(s_min,s_try);
2152       if (count > 0)
2153         ustep *= 1.2;
2154
2155       /* Write to trn, if necessary */
2156       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2157       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2158       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2159                     top_global,inputrec,count,
2160                     s_min,state_global,f_global);
2161     }
2162     else {
2163       /* If energy is not smaller make the step smaller...  */
2164       ustep *= 0.5;
2165
2166       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2167         /* Reload the old state */
2168         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2169                                s_min,top,mdatoms,fr,vsite,constr,
2170                                nrnb,wcycle);
2171       }
2172     }
2173
2174     /* Determine new step  */
2175     stepsize = ustep/s_min->fmax;
2176
2177     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2178 #ifdef GMX_DOUBLE
2179         if (count == nsteps || ustep < 1e-12)
2180 #else
2181         if (count == nsteps || ustep < 1e-6)
2182 #endif
2183         {
2184             if (MASTER(cr))
2185             {
2186                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2187                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2188             }
2189             bAbort=TRUE;
2190         }
2191
2192     count++;
2193   } /* End of the loop  */
2194
2195     /* Print some shit...  */
2196   if (MASTER(cr))
2197     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2198   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2199                 top_global,inputrec,count,
2200                 s_min,state_global,f_global);
2201
2202   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2203
2204   if (MASTER(cr)) {
2205     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2206                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2207     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2208                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2209   }
2210
2211   finish_em(fplog,cr,outf,runtime,wcycle);
2212
2213   /* To print the actual number of steps we needed somewhere */
2214   inputrec->nsteps=count;
2215
2216   runtime->nsteps_done = count;
2217
2218   return 0;
2219 } /* That's all folks */
2220
2221
2222 double do_nm(FILE *fplog,t_commrec *cr,
2223              int nfile,const t_filenm fnm[],
2224              const output_env_t oenv, bool bVerbose,bool bCompact,
2225              int nstglobalcomm,
2226              gmx_vsite_t *vsite,gmx_constr_t constr,
2227              int stepout,
2228              t_inputrec *inputrec,
2229              gmx_mtop_t *top_global,t_fcdata *fcd,
2230              t_state *state_global,
2231              t_mdatoms *mdatoms,
2232              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2233              gmx_edsam_t ed,
2234              t_forcerec *fr,
2235              int repl_ex_nst,int repl_ex_seed,
2236              real cpt_period,real max_hours,
2237              const char *deviceOptions,
2238              unsigned long Flags,
2239              gmx_runtime_t *runtime)
2240 {
2241     t_mdebin   *mdebin;
2242     const char *NM = "Normal Mode Analysis";
2243     gmx_mdoutf_t *outf;
2244     int        natoms,atom,d;
2245     int        nnodes,node;
2246     rvec       *f_global;
2247     gmx_localtop_t *top;
2248     gmx_enerdata_t *enerd;
2249     rvec       *f;
2250     gmx_global_stat_t gstat;
2251     t_graph    *graph;
2252     real       t,lambda;
2253     bool       bNS;
2254     tensor     vir,pres;
2255     rvec       mu_tot;
2256     rvec       *fneg,*dfdx;
2257     bool       bSparse; /* use sparse matrix storage format */
2258     size_t     sz;
2259     gmx_sparsematrix_t * sparse_matrix = NULL;
2260     real *     full_matrix             = NULL;
2261     em_state_t *   state_work;
2262
2263     /* added with respect to mdrun */
2264     int        i,j,k,row,col;
2265     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2266     real       x_min;
2267     real       fnorm,fmax;
2268
2269     if (constr != NULL)
2270     {
2271         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2272     }
2273
2274     state_work = init_em_state();
2275
2276     /* Init em and store the local state in state_minimum */
2277     init_em(fplog,NM,cr,inputrec,
2278             state_global,top_global,state_work,&top,
2279             &f,&f_global,
2280             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2281             nfile,fnm,&outf,&mdebin);
2282
2283     natoms = top_global->natoms;
2284     snew(fneg,natoms);
2285     snew(dfdx,natoms);
2286
2287 #ifndef GMX_DOUBLE
2288     if (MASTER(cr))
2289     {
2290         fprintf(stderr,
2291                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2292                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2293                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2294                 "      are fairly modest even if you recompile in double precision.\n\n");
2295     }
2296 #endif
2297
2298     /* Check if we can/should use sparse storage format.
2299      *
2300      * Sparse format is only useful when the Hessian itself is sparse, which it
2301       * will be when we use a cutoff.
2302       * For small systems (n<1000) it is easier to always use full matrix format, though.
2303       */
2304     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2305     {
2306         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2307         bSparse = FALSE;
2308     }
2309     else if(top_global->natoms < 1000)
2310     {
2311         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2312         bSparse = FALSE;
2313     }
2314     else
2315     {
2316         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2317         bSparse = TRUE;
2318     }
2319
2320     sz = DIM*top_global->natoms;
2321
2322     fprintf(stderr,"Allocating Hessian memory...\n\n");
2323
2324     if(bSparse)
2325     {
2326         sparse_matrix=gmx_sparsematrix_init(sz);
2327         sparse_matrix->compressed_symmetric = TRUE;
2328     }
2329     else
2330     {
2331         snew(full_matrix,sz*sz);
2332     }
2333
2334     /* Initial values */
2335     t      = inputrec->init_t;
2336     lambda = inputrec->init_lambda;
2337
2338     init_nrnb(nrnb);
2339
2340     where();
2341
2342     /* Write start time and temperature */
2343     print_em_start(fplog,cr,runtime,wcycle,NM);
2344
2345     /* fudge nr of steps to nr of atoms */
2346     inputrec->nsteps = natoms*2;
2347
2348     if (MASTER(cr))
2349     {
2350         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2351                 *(top_global->name),(int)inputrec->nsteps);
2352     }
2353
2354     nnodes = cr->nnodes;
2355
2356     /* Make evaluate_energy do a single node force calculation */
2357     cr->nnodes = 1;
2358     evaluate_energy(fplog,bVerbose,cr,
2359                     state_global,top_global,state_work,top,
2360                     inputrec,nrnb,wcycle,gstat,
2361                     vsite,constr,fcd,graph,mdatoms,fr,
2362                     mu_tot,enerd,vir,pres,-1,TRUE);
2363     cr->nnodes = nnodes;
2364
2365     /* if forces are not small, warn user */
2366     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2367
2368     if (MASTER(cr))
2369     {
2370         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2371         if (state_work->fmax > 1.0e-3)
2372         {
2373             fprintf(stderr,"Maximum force probably not small enough to");
2374             fprintf(stderr," ensure that you are in an \nenergy well. ");
2375             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2376             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2377         }
2378     }
2379
2380     /***********************************************************
2381      *
2382      *      Loop over all pairs in matrix
2383      *
2384      *      do_force called twice. Once with positive and
2385      *      once with negative displacement
2386      *
2387      ************************************************************/
2388
2389     /* Steps are divided one by one over the nodes */
2390     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2391     {
2392
2393         for (d=0; d<DIM; d++)
2394         {
2395             x_min = state_work->s.x[atom][d];
2396
2397             state_work->s.x[atom][d] = x_min - der_range;
2398
2399             /* Make evaluate_energy do a single node force calculation */
2400             cr->nnodes = 1;
2401             evaluate_energy(fplog,bVerbose,cr,
2402                             state_global,top_global,state_work,top,
2403                             inputrec,nrnb,wcycle,gstat,
2404                             vsite,constr,fcd,graph,mdatoms,fr,
2405                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2406
2407             for(i=0; i<natoms; i++)
2408             {
2409                 copy_rvec(state_work->f[i], fneg[i]);
2410             }
2411
2412             state_work->s.x[atom][d] = x_min + der_range;
2413
2414             evaluate_energy(fplog,bVerbose,cr,
2415                             state_global,top_global,state_work,top,
2416                             inputrec,nrnb,wcycle,gstat,
2417                             vsite,constr,fcd,graph,mdatoms,fr,
2418                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2419             cr->nnodes = nnodes;
2420
2421             /* x is restored to original */
2422             state_work->s.x[atom][d] = x_min;
2423
2424             for(j=0; j<natoms; j++)
2425             {
2426                 for (k=0; (k<DIM); k++)
2427                 {
2428                     dfdx[j][k] =
2429                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2430                 }
2431             }
2432
2433             if (!MASTER(cr))
2434             {
2435 #ifdef GMX_MPI
2436 #ifdef GMX_DOUBLE
2437 #define mpi_type MPI_DOUBLE
2438 #else
2439 #define mpi_type MPI_FLOAT
2440 #endif
2441                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2442                          cr->mpi_comm_mygroup);
2443 #endif
2444             }
2445             else
2446             {
2447                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2448                 {
2449                     if (node > 0)
2450                     {
2451 #ifdef GMX_MPI
2452                         MPI_Status stat;
2453                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2454                                  cr->mpi_comm_mygroup,&stat);
2455 #undef mpi_type
2456 #endif
2457                     }
2458
2459                     row = (atom + node)*DIM + d;
2460
2461                     for(j=0; j<natoms; j++)
2462                     {
2463                         for(k=0; k<DIM; k++)
2464                         {
2465                             col = j*DIM + k;
2466
2467                             if (bSparse)
2468                             {
2469                                 if (col >= row && dfdx[j][k] != 0.0)
2470                                 {
2471                                     gmx_sparsematrix_increment_value(sparse_matrix,
2472                                                                      row,col,dfdx[j][k]);
2473                                 }
2474                             }
2475                             else
2476                             {
2477                                 full_matrix[row*sz+col] = dfdx[j][k];
2478                             }
2479                         }
2480                     }
2481                 }
2482             }
2483
2484             if (bVerbose && fplog)
2485             {
2486                 fflush(fplog);
2487             }
2488         }
2489         /* write progress */
2490         if (MASTER(cr) && bVerbose)
2491         {
2492             fprintf(stderr,"\rFinished step %d out of %d",
2493                     min(atom+nnodes,natoms),natoms);
2494             fflush(stderr);
2495         }
2496     }
2497
2498     if (MASTER(cr))
2499     {
2500         print_ebin(NULL,FALSE,FALSE,FALSE,fplog,atom,t,eprAVER,
2501                    FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2502
2503         fprintf(stderr,"\n\nWriting Hessian...\n");
2504         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2505     }
2506
2507     finish_em(fplog,cr,outf,runtime,wcycle);
2508
2509     runtime->nsteps_done = natoms*2;
2510
2511     return 0;
2512 }