src/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "copyrite.h"
  48 #include "smalloc.h"
  49 #include "nrnb.h"
  50 #include "main.h"
  51 #include "force.h"
  52 #include "macros.h"
  53 #include "random.h"
  54 #include "names.h"
  55 #include "gmx_fatal.h"
  56 #include "txtdump.h"
  57 #include "typedefs.h"
  58 #include "update.h"
  59 #include "constr.h"
  60 #include "vec.h"
  61 #include "statutil.h"
  62 #include "tgroup.h"
  63 #include "mdebin.h"
  64 #include "vsite.h"
  65 #include "force.h"
  66 #include "mdrun.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "trnio.h"
  70 #include "sparsematrix.h"
  71 #include "mtxio.h"
  72 #include "mdatoms.h"
  73 #include "ns.h"
  74 #include "gmx_wallcycle.h"
  75 #include "mtop_util.h"
  76 #include "gmxfio.h"
  77 #include "pme.h"
  78 #include "gmx_membed.h"
  79
  80 typedef struct {
  81   t_state s;
  82   rvec    *f;
  83   real    epot;
  84   real    fnorm;
  85   real    fmax;
  86   int     a_fmax;
  87 } em_state_t;
  88
  89 static em_state_t *init_em_state()
  90 {
  91   em_state_t *ems;
  92
  93   snew(ems,1);
  94
  95   return ems;
  96 }
  97
  98 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
  99                            gmx_wallcycle_t wcycle,
 100                            const char *name)
 101 {
 102     char buf[STRLEN];
 103
 104     runtime_start(runtime);
 105
 106     sprintf(buf,"Started %s",name);
 107     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 108
 109     wallcycle_start(wcycle,ewcRUN);
 110 }
 111 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 112                         gmx_wallcycle_t wcycle)
 113 {
 114     wallcycle_stop(wcycle,ewcRUN);
 115
 116     runtime_end(runtime);
 117 }
 118
 119 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 120 {
 121     fprintf(out,"\n");
 122     fprintf(out,"%s:\n",minimizer);
 123     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 124     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 125 }
 126
 127 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 128 {
 129     if (bLastStep)
 130     {
 131         fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 132     }
 133     else
 134     {
 135         fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 136                 "Converged to machine precision,\n"
 137                 "but not to the requested precision Fmax < %g\n",
 138                 ftol);
 139         if (sizeof(real)<sizeof(double))
 140         {
 141             fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 142         }
 143         if (bConstrain)
 144         {
 145             fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 146                     "off constraints alltogether (set constraints = none in mdp file)\n");
 147         }
 148     }
 149 }
 150
 151
 152
 153 static void print_converged(FILE *fp,const char *alg,real ftol,
 154                             gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 155                             real epot,real fmax, int nfmax, real fnorm)
 156 {
 157   char buf[STEPSTRSIZE];
 158
 159   if (bDone)
 160     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 161             alg,ftol,gmx_step_str(count,buf));
 162   else if(count<nsteps)
 163     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 164                "but did not reach the requested Fmax < %g.\n",
 165             alg,gmx_step_str(count,buf),ftol);
 166   else
 167     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 168             alg,ftol,gmx_step_str(count,buf));
 169
 170 #ifdef GMX_DOUBLE
 171   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 172   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 173   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 174 #else
 175   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 176   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 177   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 178 #endif
 179 }
 180
 181 static void get_f_norm_max(t_commrec *cr,
 182                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 183                            real *fnorm,real *fmax,int *a_fmax)
 184 {
 185   double fnorm2,*sum;
 186   real fmax2,fmax2_0,fam;
 187   int  la_max,a_max,start,end,i,m,gf;
 188
 189   /* This routine finds the largest force and returns it.
 190    * On parallel machines the global max is taken.
 191    */
 192   fnorm2 = 0;
 193   fmax2 = 0;
 194   la_max = -1;
 195   gf = 0;
 196   start = mdatoms->start;
 197   end   = mdatoms->homenr + start;
 198   if (mdatoms->cFREEZE) {
 199     for(i=start; i<end; i++) {
 200       gf = mdatoms->cFREEZE[i];
 201       fam = 0;
 202       for(m=0; m<DIM; m++)
 203         if (!opts->nFreeze[gf][m])
 204           fam += sqr(f[i][m]);
 205       fnorm2 += fam;
 206       if (fam > fmax2) {
 207         fmax2  = fam;
 208         la_max = i;
 209       }
 210     }
 211   } else {
 212     for(i=start; i<end; i++) {
 213       fam = norm2(f[i]);
 214       fnorm2 += fam;
 215       if (fam > fmax2) {
 216         fmax2  = fam;
 217         la_max = i;
 218       }
 219     }
 220   }
 221
 222   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 223     a_max = cr->dd->gatindex[la_max];
 224   } else {
 225     a_max = la_max;
 226   }
 227   if (PAR(cr)) {
 228     snew(sum,2*cr->nnodes+1);
 229     sum[2*cr->nodeid]   = fmax2;
 230     sum[2*cr->nodeid+1] = a_max;
 231     sum[2*cr->nnodes]   = fnorm2;
 232     gmx_sumd(2*cr->nnodes+1,sum,cr);
 233     fnorm2 = sum[2*cr->nnodes];
 234     /* Determine the global maximum */
 235     for(i=0; i<cr->nnodes; i++) {
 236       if (sum[2*i] > fmax2) {
 237         fmax2 = sum[2*i];
 238         a_max = (int)(sum[2*i+1] + 0.5);
 239       }
 240     }
 241     sfree(sum);
 242   }
 243
 244   if (fnorm)
 245     *fnorm = sqrt(fnorm2);
 246   if (fmax)
 247     *fmax  = sqrt(fmax2);
 248   if (a_fmax)
 249     *a_fmax = a_max;
 250 }
 251
 252 static void get_state_f_norm_max(t_commrec *cr,
 253                            t_grpopts *opts,t_mdatoms *mdatoms,
 254                            em_state_t *ems)
 255 {
 256   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 257 }
 258
 259 void init_em(FILE *fplog,const char *title,
 260              t_commrec *cr,t_inputrec *ir,
 261              t_state *state_global,gmx_mtop_t *top_global,
 262              em_state_t *ems,gmx_localtop_t **top,
 263              rvec **f,rvec **f_global,
 264              t_nrnb *nrnb,rvec mu_tot,
 265              t_forcerec *fr,gmx_enerdata_t **enerd,
 266              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 267              gmx_vsite_t *vsite,gmx_constr_t constr,
 268              int nfile,const t_filenm fnm[],
 269              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 270 {
 271     int  start,homenr,i;
 272     real dvdlambda;
 273
 274     if (fplog)
 275     {
 276         fprintf(fplog,"Initiating %s\n",title);
 277     }
 278
 279     state_global->ngtc = 0;
 280
 281     /* Initiate some variables */
 282     if (ir->efep != efepNO)
 283     {
 284         state_global->lambda = ir->init_lambda;
 285     }
 286     else
 287     {
 288         state_global->lambda = 0.0;
 289     }
 290
 291     init_nrnb(nrnb);
 292
 293     if (DOMAINDECOMP(cr))
 294     {
 295         *top = dd_init_local_top(top_global);
 296
 297         dd_init_local_state(cr->dd,state_global,&ems->s);
 298
 299         *f = NULL;
 300
 301         /* Distribute the charge groups over the nodes from the master node */
 302         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 303                             state_global,top_global,ir,
 304                             &ems->s,&ems->f,mdatoms,*top,
 305                             fr,vsite,NULL,constr,
 306                             nrnb,NULL,FALSE);
 307         dd_store_state(cr->dd,&ems->s);
 308
 309         if (ir->nstfout)
 310         {
 311             snew(*f_global,top_global->natoms);
 312         }
 313         else
 314         {
 315             *f_global = NULL;
 316         }
 317         *graph = NULL;
 318     }
 319     else
 320     {
 321         snew(*f,top_global->natoms);
 322
 323         /* Just copy the state */
 324         ems->s = *state_global;
 325         snew(ems->s.x,ems->s.nalloc);
 326         snew(ems->f,ems->s.nalloc);
 327         for(i=0; i<state_global->natoms; i++)
 328         {
 329             copy_rvec(state_global->x[i],ems->s.x[i]);
 330         }
 331         copy_mat(state_global->box,ems->s.box);
 332
 333         if (PAR(cr) && ir->eI != eiNM)
 334         {
 335             /* Initialize the particle decomposition and split the topology */
 336             *top = split_system(fplog,top_global,ir,cr);
 337
 338             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 339         }
 340         else
 341         {
 342             *top = gmx_mtop_generate_local_top(top_global,ir);
 343         }
 344         *f_global = *f;
 345
 346         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 347         {
 348             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 349         }
 350         else
 351         {
 352             *graph = NULL;
 353         }
 354
 355         if (PARTDECOMP(cr))
 356         {
 357             pd_at_range(cr,&start,&homenr);
 358             homenr -= start;
 359         }
 360         else
 361         {
 362             start  = 0;
 363             homenr = top_global->natoms;
 364         }
 365         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 366         update_mdatoms(mdatoms,state_global->lambda);
 367
 368         if (vsite)
 369         {
 370             set_vsite_top(vsite,*top,mdatoms,cr);
 371         }
 372     }
 373
 374     if (constr)
 375     {
 376         if (ir->eConstrAlg == econtSHAKE &&
 377             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 378         {
 379             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 380                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 381         }
 382
 383         if (!DOMAINDECOMP(cr))
 384         {
 385             set_constraints(constr,*top,ir,mdatoms,cr);
 386         }
 387
 388         if (!ir->bContinuation)
 389         {
 390             /* Constrain the starting coordinates */
 391             dvdlambda=0;
 392             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 393                       ir,NULL,cr,-1,0,mdatoms,
 394                       ems->s.x,ems->s.x,NULL,ems->s.box,
 395                       ems->s.lambda,&dvdlambda,
 396                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 397         }
 398     }
 399
 400     if (PAR(cr))
 401     {
 402         *gstat = global_stat_init(ir);
 403     }
 404
 405     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 406
 407     snew(*enerd,1);
 408     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 409
 410     if (mdebin != NULL)
 411     {
 412         /* Init bin for energy stuff */
 413         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 414     }
 415
 416     clear_rvec(mu_tot);
 417     calc_shifts(ems->s.box,fr->shift_vec);
 418 }
 419
 420 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 421                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 422 {
 423   if (!(cr->duty & DUTY_PME)) {
 424     /* Tell the PME only node to finish */
 425     gmx_pme_finish(cr);
 426   }
 427
 428   done_mdoutf(outf);
 429
 430   em_time_end(fplog,cr,runtime,wcycle);
 431 }
 432
 433 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 434 {
 435   em_state_t tmp;
 436
 437   tmp   = *ems1;
 438   *ems1 = *ems2;
 439   *ems2 = tmp;
 440 }
 441
 442 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
 443 {
 444   int i;
 445
 446   for(i=0; (i<state->natoms); i++)
 447     copy_rvec(ems->s.x[i],state->x[i]);
 448   if (f != NULL)
 449     copy_rvec(ems->f[i],f[i]);
 450 }
 451
 452 static void write_em_traj(FILE *fplog,t_commrec *cr,
 453                           gmx_mdoutf_t *outf,
 454                           gmx_bool bX,gmx_bool bF,const char *confout,
 455                           gmx_mtop_t *top_global,
 456                           t_inputrec *ir,gmx_large_int_t step,
 457                           em_state_t *state,
 458                           t_state *state_global,rvec *f_global)
 459 {
 460     int mdof_flags;
 461
 462     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 463     {
 464         f_global = state->f;
 465         copy_em_coords_back(state,state_global,bF ? f_global : NULL);
 466     }
 467
 468     mdof_flags = 0;
 469     if (bX) { mdof_flags |= MDOF_X; }
 470     if (bF) { mdof_flags |= MDOF_F; }
 471     write_traj(fplog,cr,outf,mdof_flags,
 472                top_global,step,(double)step,
 473                &state->s,state_global,state->f,f_global,NULL,NULL);
 474
 475     if (confout != NULL && MASTER(cr))
 476     {
 477         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 478         {
 479             /* Make molecules whole only for confout writing */
 480             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 481                         state_global->x);
 482         }
 483
 484         write_sto_conf_mtop(confout,
 485                             *top_global->name,top_global,
 486                             state_global->x,NULL,ir->ePBC,state_global->box);
 487     }
 488 }
 489
 490 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 491                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 492                        gmx_constr_t constr,gmx_localtop_t *top,
 493                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 494                        gmx_large_int_t count)
 495
 496 {
 497   t_state *s1,*s2;
 498   int  start,end,gf,i,m;
 499   rvec *x1,*x2;
 500   real dvdlambda;
 501
 502   s1 = &ems1->s;
 503   s2 = &ems2->s;
 504
 505   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 506     gmx_incons("state mismatch in do_em_step");
 507
 508   s2->flags = s1->flags;
 509
 510   if (s2->nalloc != s1->nalloc) {
 511     s2->nalloc = s1->nalloc;
 512     srenew(s2->x,s1->nalloc);
 513     srenew(ems2->f,  s1->nalloc);
 514     if (s2->flags & (1<<estCGP))
 515       srenew(s2->cg_p,  s1->nalloc);
 516   }
 517
 518   s2->natoms = s1->natoms;
 519   s2->lambda = s1->lambda;
 520   copy_mat(s1->box,s2->box);
 521
 522   start = md->start;
 523   end   = md->start + md->homenr;
 524
 525   x1 = s1->x;
 526   x2 = s2->x;
 527   gf = 0;
 528   for(i=start; i<end; i++) {
 529     if (md->cFREEZE)
 530       gf = md->cFREEZE[i];
 531     for(m=0; m<DIM; m++) {
 532       if (ir->opts.nFreeze[gf][m])
 533         x2[i][m] = x1[i][m];
 534       else
 535         x2[i][m] = x1[i][m] + a*f[i][m];
 536     }
 537   }
 538
 539   if (s2->flags & (1<<estCGP)) {
 540     /* Copy the CG p vector */
 541     x1 = s1->cg_p;
 542     x2 = s2->cg_p;
 543     for(i=start; i<end; i++)
 544       copy_rvec(x1[i],x2[i]);
 545   }
 546
 547   if (DOMAINDECOMP(cr)) {
 548     s2->ddp_count = s1->ddp_count;
 549     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 550       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 551       srenew(s2->cg_gl,s2->cg_gl_nalloc);
 552     }
 553     s2->ncg_gl = s1->ncg_gl;
 554     for(i=0; i<s2->ncg_gl; i++)
 555       s2->cg_gl[i] = s1->cg_gl[i];
 556     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 557   }
 558
 559   if (constr) {
 560     wallcycle_start(wcycle,ewcCONSTR);
 561     dvdlambda = 0;
 562     constrain(NULL,TRUE,TRUE,constr,&top->idef,
 563               ir,NULL,cr,count,0,md,
 564               s1->x,s2->x,NULL,s2->box,s2->lambda,
 565               &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 566     wallcycle_stop(wcycle,ewcCONSTR);
 567   }
 568 }
 569
 570 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
 571
 572 {
 573   int  start,end,i,m;
 574
 575   if (DOMAINDECOMP(cr)) {
 576     start = 0;
 577     end   = cr->dd->nat_home;
 578   } else if (PARTDECOMP(cr)) {
 579     pd_at_range(cr,&start,&end);
 580   } else {
 581     start = 0;
 582     end   = n;
 583   }
 584
 585   for(i=start; i<end; i++) {
 586     for(m=0; m<DIM; m++) {
 587       x2[i][m] = x1[i][m] + a*f[i][m];
 588     }
 589   }
 590 }
 591
 592 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
 593
 594 {
 595   int  start,end,i,m;
 596
 597   if (DOMAINDECOMP(cr)) {
 598     start = 0;
 599     end   = cr->dd->nat_home;
 600   } else if (PARTDECOMP(cr)) {
 601     pd_at_range(cr,&start,&end);
 602   } else {
 603     start = 0;
 604     end   = n;
 605   }
 606
 607   for(i=start; i<end; i++) {
 608     for(m=0; m<DIM; m++) {
 609       f[i][m] = (x1[i][m] - x2[i][m])*a;
 610     }
 611   }
 612 }
 613
 614 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 615                                    gmx_mtop_t *top_global,t_inputrec *ir,
 616                                    em_state_t *ems,gmx_localtop_t *top,
 617                                    t_mdatoms *mdatoms,t_forcerec *fr,
 618                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 619                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 620 {
 621     /* Repartition the domain decomposition */
 622     wallcycle_start(wcycle,ewcDOMDEC);
 623     dd_partition_system(fplog,step,cr,FALSE,1,
 624                         NULL,top_global,ir,
 625                         &ems->s,&ems->f,
 626                         mdatoms,top,fr,vsite,NULL,constr,
 627                         nrnb,wcycle,FALSE);
 628     dd_store_state(cr->dd,&ems->s);
 629     wallcycle_stop(wcycle,ewcDOMDEC);
 630 }
 631
 632 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 633                             t_state *state_global,gmx_mtop_t *top_global,
 634                             em_state_t *ems,gmx_localtop_t *top,
 635                             t_inputrec *inputrec,
 636                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 637                             gmx_global_stat_t gstat,
 638                             gmx_vsite_t *vsite,gmx_constr_t constr,
 639                             t_fcdata *fcd,
 640                             t_graph *graph,t_mdatoms *mdatoms,
 641                             t_forcerec *fr,rvec mu_tot,
 642                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 643                             gmx_large_int_t count,gmx_bool bFirst)
 644 {
 645   real t;
 646   gmx_bool bNS;
 647   int  nabnsb;
 648   tensor force_vir,shake_vir,ekin;
 649   real dvdl,prescorr,enercorr,dvdlcorr;
 650   real terminate=0;
 651
 652   /* Set the time to the initial time, the time does not change during EM */
 653   t = inputrec->init_t;
 654
 655   if (bFirst ||
 656       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 657     /* This the first state or an old state used before the last ns */
 658     bNS = TRUE;
 659   } else {
 660     bNS = FALSE;
 661     if (inputrec->nstlist > 0) {
 662       bNS = TRUE;
 663     } else if (inputrec->nstlist == -1) {
 664       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 665       if (PAR(cr))
 666         gmx_sumi(1,&nabnsb,cr);
 667       bNS = (nabnsb > 0);
 668     }
 669   }
 670
 671   if (vsite)
 672     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 673                      top->idef.iparams,top->idef.il,
 674                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 675
 676   if (DOMAINDECOMP(cr)) {
 677     if (bNS) {
 678       /* Repartition the domain decomposition */
 679       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 680                              ems,top,mdatoms,fr,vsite,constr,
 681                              nrnb,wcycle);
 682     }
 683   }
 684
 685     /* Calc force & energy on new trial position  */
 686     /* do_force always puts the charge groups in the box and shifts again
 687      * We do not unshift, so molecules are always whole in congrad.c
 688      */
 689     do_force(fplog,cr,inputrec,
 690              count,nrnb,wcycle,top,top_global,&top_global->groups,
 691              ems->s.box,ems->s.x,&ems->s.hist,
 692              ems->f,force_vir,mdatoms,enerd,fcd,
 693              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 694              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 695              (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 696
 697   /* Clear the unused shake virial and pressure */
 698   clear_mat(shake_vir);
 699   clear_mat(pres);
 700
 701   /* Calculate long range corrections to pressure and energy */
 702   calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
 703                 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 704   /* don't think these next 4 lines  can be moved in for now, because we
 705      don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
 706   enerd->term[F_DISPCORR] = enercorr;
 707   enerd->term[F_EPOT] += enercorr;
 708   enerd->term[F_PRES] += prescorr;
 709   enerd->term[F_DVDL] += dvdlcorr;
 710
 711     /* Communicate stuff when parallel */
 712     if (PAR(cr) && inputrec->eI != eiNM)
 713     {
 714         wallcycle_start(wcycle,ewcMoveE);
 715
 716         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 717                     inputrec,NULL,NULL,NULL,1,&terminate,
 718                     top_global,&ems->s,FALSE,
 719                     CGLO_ENERGY |
 720                     CGLO_PRESSURE |
 721                     CGLO_CONSTRAINT |
 722                     CGLO_FIRSTITERATE);
 723
 724         wallcycle_stop(wcycle,ewcMoveE);
 725     }
 726
 727   ems->epot = enerd->term[F_EPOT];
 728
 729   if (constr) {
 730     /* Project out the constraint components of the force */
 731     wallcycle_start(wcycle,ewcCONSTR);
 732     dvdl = 0;
 733     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 734               inputrec,NULL,cr,count,0,mdatoms,
 735               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 736               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 737     if (fr->bSepDVDL && fplog)
 738       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 739     enerd->term[F_DHDL_CON] += dvdl;
 740     m_add(force_vir,shake_vir,vir);
 741     wallcycle_stop(wcycle,ewcCONSTR);
 742   } else {
 743     copy_mat(force_vir,vir);
 744   }
 745
 746   clear_mat(ekin);
 747   enerd->term[F_PRES] =
 748     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
 749               (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
 750
 751   sum_dhdl(enerd,ems->s.lambda,inputrec);
 752
 753     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 754     {
 755         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 756     }
 757 }
 758
 759 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 760                               gmx_mtop_t *mtop,
 761                               em_state_t *s_min,em_state_t *s_b)
 762 {
 763   rvec *fm,*fb,*fmg;
 764   t_block *cgs_gl;
 765   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 766   double partsum;
 767   unsigned char *grpnrFREEZE;
 768
 769   if (debug)
 770     fprintf(debug,"Doing reorder_partsum\n");
 771
 772   fm = s_min->f;
 773   fb = s_b->f;
 774
 775   cgs_gl = dd_charge_groups_global(cr->dd);
 776   index = cgs_gl->index;
 777
 778   /* Collect fm in a global vector fmg.
 779    * This conflicts with the spirit of domain decomposition,
 780    * but to fully optimize this a much more complicated algorithm is required.
 781    */
 782   snew(fmg,mtop->natoms);
 783
 784   ncg   = s_min->s.ncg_gl;
 785   cg_gl = s_min->s.cg_gl;
 786   i = 0;
 787   for(c=0; c<ncg; c++) {
 788     cg = cg_gl[c];
 789     a0 = index[cg];
 790     a1 = index[cg+1];
 791     for(a=a0; a<a1; a++) {
 792       copy_rvec(fm[i],fmg[a]);
 793       i++;
 794     }
 795   }
 796   gmx_sum(mtop->natoms*3,fmg[0],cr);
 797
 798   /* Now we will determine the part of the sum for the cgs in state s_b */
 799   ncg   = s_b->s.ncg_gl;
 800   cg_gl = s_b->s.cg_gl;
 801   partsum = 0;
 802   i = 0;
 803   gf = 0;
 804   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 805   for(c=0; c<ncg; c++) {
 806     cg = cg_gl[c];
 807     a0 = index[cg];
 808     a1 = index[cg+1];
 809     for(a=a0; a<a1; a++) {
 810       if (mdatoms->cFREEZE && grpnrFREEZE) {
 811         gf = grpnrFREEZE[i];
 812       }
 813       for(m=0; m<DIM; m++) {
 814         if (!opts->nFreeze[gf][m]) {
 815           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 816         }
 817       }
 818       i++;
 819     }
 820   }
 821
 822   sfree(fmg);
 823
 824   return partsum;
 825 }
 826
 827 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 828                     gmx_mtop_t *mtop,
 829                     em_state_t *s_min,em_state_t *s_b)
 830 {
 831   rvec *fm,*fb;
 832   double sum;
 833   int  gf,i,m;
 834
 835   /* This is just the classical Polak-Ribiere calculation of beta;
 836    * it looks a bit complicated since we take freeze groups into account,
 837    * and might have to sum it in parallel runs.
 838    */
 839
 840   if (!DOMAINDECOMP(cr) ||
 841       (s_min->s.ddp_count == cr->dd->ddp_count &&
 842        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 843     fm = s_min->f;
 844     fb = s_b->f;
 845     sum = 0;
 846     gf = 0;
 847     /* This part of code can be incorrect with DD,
 848      * since the atom ordering in s_b and s_min might differ.
 849      */
 850     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 851       if (mdatoms->cFREEZE)
 852         gf = mdatoms->cFREEZE[i];
 853       for(m=0; m<DIM; m++)
 854         if (!opts->nFreeze[gf][m]) {
 855           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 856         }
 857     }
 858   } else {
 859     /* We need to reorder cgs while summing */
 860     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 861   }
 862   if (PAR(cr))
 863     gmx_sumd(1,&sum,cr);
 864
 865   return sum/sqr(s_min->fnorm);
 866 }
 867
 868 double do_cg(FILE *fplog,t_commrec *cr,
 869              int nfile,const t_filenm fnm[],
 870              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 871              int nstglobalcomm,
 872              gmx_vsite_t *vsite,gmx_constr_t constr,
 873              int stepout,
 874              t_inputrec *inputrec,
 875              gmx_mtop_t *top_global,t_fcdata *fcd,
 876              t_state *state_global,
 877              t_mdatoms *mdatoms,
 878              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 879              gmx_edsam_t ed,
 880              t_forcerec *fr,
 881              int repl_ex_nst,int repl_ex_seed,
 882              gmx_membed_t *membed,
 883              real cpt_period,real max_hours,
 884              const char *deviceOptions,
 885              unsigned long Flags,
 886              gmx_runtime_t *runtime)
 887 {
 888   const char *CG="Polak-Ribiere Conjugate Gradients";
 889
 890   em_state_t *s_min,*s_a,*s_b,*s_c;
 891   gmx_localtop_t *top;
 892   gmx_enerdata_t *enerd;
 893   rvec   *f;
 894   gmx_global_stat_t gstat;
 895   t_graph    *graph;
 896   rvec   *f_global,*p,*sf,*sfm;
 897   double gpa,gpb,gpc,tmp,sum[2],minstep;
 898   real   fnormn;
 899   real   stepsize;
 900   real   a,b,c,beta=0.0;
 901   real   epot_repl=0;
 902   real   pnorm;
 903   t_mdebin   *mdebin;
 904   gmx_bool   converged,foundlower;
 905   rvec   mu_tot;
 906   gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 907   tensor vir,pres;
 908   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 909   gmx_mdoutf_t *outf;
 910   int    i,m,gf,step,nminstep;
 911   real   terminate=0;
 912
 913   step=0;
 914
 915   s_min = init_em_state();
 916   s_a   = init_em_state();
 917   s_b   = init_em_state();
 918   s_c   = init_em_state();
 919
 920   /* Init em and store the local state in s_min */
 921   init_em(fplog,CG,cr,inputrec,
 922           state_global,top_global,s_min,&top,&f,&f_global,
 923           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 924           nfile,fnm,&outf,&mdebin);
 925
 926   /* Print to log file */
 927   print_em_start(fplog,cr,runtime,wcycle,CG);
 928
 929   /* Max number of steps */
 930   number_steps=inputrec->nsteps;
 931
 932   if (MASTER(cr))
 933     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 934   if (fplog)
 935     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 936
 937   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 938   /* do_force always puts the charge groups in the box and shifts again
 939    * We do not unshift, so molecules are always whole in congrad.c
 940    */
 941   evaluate_energy(fplog,bVerbose,cr,
 942                   state_global,top_global,s_min,top,
 943                   inputrec,nrnb,wcycle,gstat,
 944                   vsite,constr,fcd,graph,mdatoms,fr,
 945                   mu_tot,enerd,vir,pres,-1,TRUE);
 946   where();
 947
 948   if (MASTER(cr)) {
 949     /* Copy stuff to the energy bin for easy printing etc. */
 950     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 951                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 952                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 953
 954     print_ebin_header(fplog,step,step,s_min->s.lambda);
 955     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 956                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 957   }
 958   where();
 959
 960   /* Estimate/guess the initial stepsize */
 961   stepsize = inputrec->em_stepsize/s_min->fnorm;
 962
 963   if (MASTER(cr)) {
 964     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 965             s_min->fmax,s_min->a_fmax+1);
 966     fprintf(stderr,"   F-Norm            = %12.5e\n",
 967             s_min->fnorm/sqrt(state_global->natoms));
 968     fprintf(stderr,"\n");
 969     /* and copy to the log file too... */
 970     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 971             s_min->fmax,s_min->a_fmax+1);
 972     fprintf(fplog,"   F-Norm            = %12.5e\n",
 973             s_min->fnorm/sqrt(state_global->natoms));
 974     fprintf(fplog,"\n");
 975   }
 976   /* Start the loop over CG steps.
 977    * Each successful step is counted, and we continue until
 978    * we either converge or reach the max number of steps.
 979    */
 980   converged = FALSE;
 981   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 982
 983     /* start taking steps in a new direction
 984      * First time we enter the routine, beta=0, and the direction is
 985      * simply the negative gradient.
 986      */
 987
 988     /* Calculate the new direction in p, and the gradient in this direction, gpa */
 989     p  = s_min->s.cg_p;
 990     sf = s_min->f;
 991     gpa = 0;
 992     gf = 0;
 993     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 994       if (mdatoms->cFREEZE)
 995         gf = mdatoms->cFREEZE[i];
 996       for(m=0; m<DIM; m++) {
 997         if (!inputrec->opts.nFreeze[gf][m]) {
 998           p[i][m] = sf[i][m] + beta*p[i][m];
 999           gpa -= p[i][m]*sf[i][m];
1000           /* f is negative gradient, thus the sign */
1001         } else {
1002           p[i][m] = 0;
1003         }
1004       }
1005     }
1006
1007     /* Sum the gradient along the line across CPUs */
1008     if (PAR(cr))
1009       gmx_sumd(1,&gpa,cr);
1010
1011     /* Calculate the norm of the search vector */
1012     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1013
1014     /* Just in case stepsize reaches zero due to numerical precision... */
1015     if(stepsize<=0)
1016       stepsize = inputrec->em_stepsize/pnorm;
1017
1018     /*
1019      * Double check the value of the derivative in the search direction.
1020      * If it is positive it must be due to the old information in the
1021      * CG formula, so just remove that and start over with beta=0.
1022      * This corresponds to a steepest descent step.
1023      */
1024     if(gpa>0) {
1025       beta = 0;
1026       step--; /* Don't count this step since we are restarting */
1027       continue; /* Go back to the beginning of the big for-loop */
1028     }
1029
1030     /* Calculate minimum allowed stepsize, before the average (norm)
1031      * relative change in coordinate is smaller than precision
1032      */
1033     minstep=0;
1034     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1035       for(m=0; m<DIM; m++) {
1036         tmp = fabs(s_min->s.x[i][m]);
1037         if(tmp < 1.0)
1038           tmp = 1.0;
1039         tmp = p[i][m]/tmp;
1040         minstep += tmp*tmp;
1041       }
1042     }
1043     /* Add up from all CPUs */
1044     if(PAR(cr))
1045       gmx_sumd(1,&minstep,cr);
1046
1047     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1048
1049     if(stepsize<minstep) {
1050       converged=TRUE;
1051       break;
1052     }
1053
1054     /* Write coordinates if necessary */
1055     do_x = do_per_step(step,inputrec->nstxout);
1056     do_f = do_per_step(step,inputrec->nstfout);
1057
1058     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1059                   top_global,inputrec,step,
1060                   s_min,state_global,f_global);
1061
1062     /* Take a step downhill.
1063      * In theory, we should minimize the function along this direction.
1064      * That is quite possible, but it turns out to take 5-10 function evaluations
1065      * for each line. However, we dont really need to find the exact minimum -
1066      * it is much better to start a new CG step in a modified direction as soon
1067      * as we are close to it. This will save a lot of energy evaluations.
1068      *
1069      * In practice, we just try to take a single step.
1070      * If it worked (i.e. lowered the energy), we increase the stepsize but
1071      * the continue straight to the next CG step without trying to find any minimum.
1072      * If it didn't work (higher energy), there must be a minimum somewhere between
1073      * the old position and the new one.
1074      *
1075      * Due to the finite numerical accuracy, it turns out that it is a good idea
1076      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1077      * This leads to lower final energies in the tests I've done. / Erik
1078      */
1079     s_a->epot = s_min->epot;
1080     a = 0.0;
1081     c = a + stepsize; /* reference position along line is zero */
1082
1083     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1084       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1085                              s_min,top,mdatoms,fr,vsite,constr,
1086                              nrnb,wcycle);
1087     }
1088
1089     /* Take a trial step (new coords in s_c) */
1090     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1091                constr,top,nrnb,wcycle,-1);
1092
1093     neval++;
1094     /* Calculate energy for the trial step */
1095     evaluate_energy(fplog,bVerbose,cr,
1096                     state_global,top_global,s_c,top,
1097                     inputrec,nrnb,wcycle,gstat,
1098                     vsite,constr,fcd,graph,mdatoms,fr,
1099                     mu_tot,enerd,vir,pres,-1,FALSE);
1100
1101     /* Calc derivative along line */
1102     p  = s_c->s.cg_p;
1103     sf = s_c->f;
1104     gpc=0;
1105     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1106       for(m=0; m<DIM; m++)
1107           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1108     }
1109     /* Sum the gradient along the line across CPUs */
1110     if (PAR(cr))
1111       gmx_sumd(1,&gpc,cr);
1112
1113     /* This is the max amount of increase in energy we tolerate */
1114     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1115
1116     /* Accept the step if the energy is lower, or if it is not significantly higher
1117      * and the line derivative is still negative.
1118      */
1119     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1120       foundlower = TRUE;
1121       /* Great, we found a better energy. Increase step for next iteration
1122        * if we are still going down, decrease it otherwise
1123        */
1124       if(gpc<0)
1125         stepsize *= 1.618034;  /* The golden section */
1126       else
1127         stepsize *= 0.618034;  /* 1/golden section */
1128     } else {
1129       /* New energy is the same or higher. We will have to do some work
1130        * to find a smaller value in the interval. Take smaller step next time!
1131        */
1132       foundlower = FALSE;
1133       stepsize *= 0.618034;
1134     }
1135
1136
1137
1138
1139     /* OK, if we didn't find a lower value we will have to locate one now - there must
1140      * be one in the interval [a=0,c].
1141      * The same thing is valid here, though: Don't spend dozens of iterations to find
1142      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1143      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1144      *
1145      * I also have a safeguard for potentially really patological functions so we never
1146      * take more than 20 steps before we give up ...
1147      *
1148      * If we already found a lower value we just skip this step and continue to the update.
1149      */
1150     if (!foundlower) {
1151       nminstep=0;
1152
1153       do {
1154         /* Select a new trial point.
1155          * If the derivatives at points a & c have different sign we interpolate to zero,
1156          * otherwise just do a bisection.
1157          */
1158         if(gpa<0 && gpc>0)
1159           b = a + gpa*(a-c)/(gpc-gpa);
1160         else
1161           b = 0.5*(a+c);
1162
1163         /* safeguard if interpolation close to machine accuracy causes errors:
1164          * never go outside the interval
1165          */
1166         if(b<=a || b>=c)
1167           b = 0.5*(a+c);
1168
1169         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1170           /* Reload the old state */
1171           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1172                                  s_min,top,mdatoms,fr,vsite,constr,
1173                                  nrnb,wcycle);
1174         }
1175
1176         /* Take a trial step to this new point - new coords in s_b */
1177         do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1178                    constr,top,nrnb,wcycle,-1);
1179
1180         neval++;
1181         /* Calculate energy for the trial step */
1182         evaluate_energy(fplog,bVerbose,cr,
1183                         state_global,top_global,s_b,top,
1184                         inputrec,nrnb,wcycle,gstat,
1185                         vsite,constr,fcd,graph,mdatoms,fr,
1186                         mu_tot,enerd,vir,pres,-1,FALSE);
1187
1188         /* p does not change within a step, but since the domain decomposition
1189          * might change, we have to use cg_p of s_b here.
1190          */
1191         p  = s_b->s.cg_p;
1192         sf = s_b->f;
1193         gpb=0;
1194         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1195           for(m=0; m<DIM; m++)
1196               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1197         }
1198         /* Sum the gradient along the line across CPUs */
1199         if (PAR(cr))
1200           gmx_sumd(1,&gpb,cr);
1201
1202         if (debug)
1203           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1204                   s_a->epot,s_b->epot,s_c->epot,gpb);
1205
1206         epot_repl = s_b->epot;
1207
1208         /* Keep one of the intervals based on the value of the derivative at the new point */
1209         if (gpb > 0) {
1210           /* Replace c endpoint with b */
1211           swap_em_state(s_b,s_c);
1212           c = b;
1213           gpc = gpb;
1214         } else {
1215           /* Replace a endpoint with b */
1216           swap_em_state(s_b,s_a);
1217           a = b;
1218           gpa = gpb;
1219         }
1220
1221         /*
1222          * Stop search as soon as we find a value smaller than the endpoints.
1223          * Never run more than 20 steps, no matter what.
1224          */
1225         nminstep++;
1226       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1227                (nminstep < 20));
1228
1229       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1230           nminstep >= 20) {
1231         /* OK. We couldn't find a significantly lower energy.
1232          * If beta==0 this was steepest descent, and then we give up.
1233          * If not, set beta=0 and restart with steepest descent before quitting.
1234          */
1235         if (beta == 0.0) {
1236           /* Converged */
1237           converged = TRUE;
1238           break;
1239         } else {
1240           /* Reset memory before giving up */
1241           beta = 0.0;
1242           continue;
1243         }
1244       }
1245
1246       /* Select min energy state of A & C, put the best in B.
1247        */
1248       if (s_c->epot < s_a->epot) {
1249         if (debug)
1250           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1251                   s_c->epot,s_a->epot);
1252         swap_em_state(s_b,s_c);
1253         gpb = gpc;
1254         b = c;
1255       } else {
1256         if (debug)
1257           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1258                   s_a->epot,s_c->epot);
1259         swap_em_state(s_b,s_a);
1260         gpb = gpa;
1261         b = a;
1262       }
1263
1264     } else {
1265       if (debug)
1266         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1267                 s_c->epot);
1268       swap_em_state(s_b,s_c);
1269       gpb = gpc;
1270       b = c;
1271     }
1272
1273     /* new search direction */
1274     /* beta = 0 means forget all memory and restart with steepest descents. */
1275     if (nstcg && ((step % nstcg)==0))
1276       beta = 0.0;
1277     else {
1278       /* s_min->fnorm cannot be zero, because then we would have converged
1279        * and broken out.
1280        */
1281
1282       /* Polak-Ribiere update.
1283        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1284        */
1285       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1286     }
1287     /* Limit beta to prevent oscillations */
1288     if (fabs(beta) > 5.0)
1289       beta = 0.0;
1290
1291
1292     /* update positions */
1293     swap_em_state(s_min,s_b);
1294     gpa = gpb;
1295
1296     /* Print it if necessary */
1297     if (MASTER(cr)) {
1298       if(bVerbose)
1299         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1300                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1301                 s_min->fmax,s_min->a_fmax+1);
1302       /* Store the new (lower) energies */
1303       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1304                  mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1305                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1306       do_log = do_per_step(step,inputrec->nstlog);
1307       do_ene = do_per_step(step,inputrec->nstenergy);
1308       if(do_log)
1309         print_ebin_header(fplog,step,step,s_min->s.lambda);
1310       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1311                  do_log ? fplog : NULL,step,step,eprNORMAL,
1312                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1313     }
1314
1315     /* Stop when the maximum force lies below tolerance.
1316      * If we have reached machine precision, converged is already set to true.
1317      */
1318     converged = converged || (s_min->fmax < inputrec->em_tol);
1319
1320   } /* End of the loop */
1321
1322   if (converged)
1323     step--; /* we never took that last step in this case */
1324
1325     if (s_min->fmax > inputrec->em_tol)
1326     {
1327         if (MASTER(cr))
1328         {
1329             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1330             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1331         }
1332         converged = FALSE;
1333     }
1334
1335   if (MASTER(cr)) {
1336     /* If we printed energy and/or logfile last step (which was the last step)
1337      * we don't have to do it again, but otherwise print the final values.
1338      */
1339     if(!do_log) {
1340       /* Write final value to log since we didn't do anything the last step */
1341       print_ebin_header(fplog,step,step,s_min->s.lambda);
1342     }
1343     if (!do_ene || !do_log) {
1344       /* Write final energy file entries */
1345       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1346                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1347                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1348     }
1349   }
1350
1351   /* Print some stuff... */
1352   if (MASTER(cr))
1353     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1354
1355   /* IMPORTANT!
1356    * For accurate normal mode calculation it is imperative that we
1357    * store the last conformation into the full precision binary trajectory.
1358    *
1359    * However, we should only do it if we did NOT already write this step
1360    * above (which we did if do_x or do_f was true).
1361    */
1362   do_x = !do_per_step(step,inputrec->nstxout);
1363   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1364
1365   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1366                 top_global,inputrec,step,
1367                 s_min,state_global,f_global);
1368
1369   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1370
1371   if (MASTER(cr)) {
1372     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1373                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1374     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1375                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1376
1377     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1378   }
1379
1380   finish_em(fplog,cr,outf,runtime,wcycle);
1381
1382   /* To print the actual number of steps we needed somewhere */
1383   runtime->nsteps_done = step;
1384
1385   return 0;
1386 } /* That's all folks */
1387
1388
1389 double do_lbfgs(FILE *fplog,t_commrec *cr,
1390                 int nfile,const t_filenm fnm[],
1391                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1392                 int nstglobalcomm,
1393                 gmx_vsite_t *vsite,gmx_constr_t constr,
1394                 int stepout,
1395                 t_inputrec *inputrec,
1396                 gmx_mtop_t *top_global,t_fcdata *fcd,
1397                 t_state *state,
1398                 t_mdatoms *mdatoms,
1399                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1400                 gmx_edsam_t ed,
1401                 t_forcerec *fr,
1402                 int repl_ex_nst,int repl_ex_seed,
1403                 gmx_membed_t *membed,
1404                 real cpt_period,real max_hours,
1405                 const char *deviceOptions,
1406                 unsigned long Flags,
1407                 gmx_runtime_t *runtime)
1408 {
1409   static const char *LBFGS="Low-Memory BFGS Minimizer";
1410   em_state_t ems;
1411   gmx_localtop_t *top;
1412   gmx_enerdata_t *enerd;
1413   rvec   *f;
1414   gmx_global_stat_t gstat;
1415   t_graph    *graph;
1416   rvec   *f_global;
1417   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1418   double stepsize,gpa,gpb,gpc,tmp,minstep;
1419   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1420   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1421   real   a,b,c,maxdelta,delta;
1422   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1423   real   dgdx,dgdg,sq,yr,beta;
1424   t_mdebin   *mdebin;
1425   gmx_bool   converged,first;
1426   rvec   mu_tot;
1427   real   fnorm,fmax;
1428   gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1429   tensor vir,pres;
1430   int    start,end,number_steps;
1431   gmx_mdoutf_t *outf;
1432   int    i,k,m,n,nfmax,gf,step;
1433   /* not used */
1434   real   terminate;
1435
1436   if (PAR(cr))
1437     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1438
1439   n = 3*state->natoms;
1440   nmaxcorr = inputrec->nbfgscorr;
1441
1442   /* Allocate memory */
1443   /* Use pointers to real so we dont have to loop over both atoms and
1444    * dimensions all the time...
1445    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1446    * that point to the same memory.
1447    */
1448   snew(xa,n);
1449   snew(xb,n);
1450   snew(xc,n);
1451   snew(fa,n);
1452   snew(fb,n);
1453   snew(fc,n);
1454   snew(frozen,n);
1455
1456   snew(p,n);
1457   snew(lastx,n);
1458   snew(lastf,n);
1459   snew(rho,nmaxcorr);
1460   snew(alpha,nmaxcorr);
1461
1462   snew(dx,nmaxcorr);
1463   for(i=0;i<nmaxcorr;i++)
1464     snew(dx[i],n);
1465
1466   snew(dg,nmaxcorr);
1467   for(i=0;i<nmaxcorr;i++)
1468     snew(dg[i],n);
1469
1470   step = 0;
1471   neval = 0;
1472
1473   /* Init em */
1474   init_em(fplog,LBFGS,cr,inputrec,
1475           state,top_global,&ems,&top,&f,&f_global,
1476           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1477           nfile,fnm,&outf,&mdebin);
1478   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1479    * so we free some memory again.
1480    */
1481   sfree(ems.s.x);
1482   sfree(ems.f);
1483
1484   xx = (real *)state->x;
1485   ff = (real *)f;
1486
1487   start = mdatoms->start;
1488   end   = mdatoms->homenr + start;
1489
1490   /* Print to log file */
1491   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1492
1493   do_log = do_ene = do_x = do_f = TRUE;
1494
1495   /* Max number of steps */
1496   number_steps=inputrec->nsteps;
1497
1498   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1499   gf = 0;
1500   for(i=start; i<end; i++) {
1501     if (mdatoms->cFREEZE)
1502       gf = mdatoms->cFREEZE[i];
1503      for(m=0; m<DIM; m++)
1504        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1505   }
1506   if (MASTER(cr))
1507     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1508   if (fplog)
1509     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1510
1511   if (vsite)
1512     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1513                      top->idef.iparams,top->idef.il,
1514                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1515
1516   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1517   /* do_force always puts the charge groups in the box and shifts again
1518    * We do not unshift, so molecules are always whole
1519    */
1520   neval++;
1521   ems.s.x = state->x;
1522   ems.f = f;
1523   evaluate_energy(fplog,bVerbose,cr,
1524                   state,top_global,&ems,top,
1525                   inputrec,nrnb,wcycle,gstat,
1526                   vsite,constr,fcd,graph,mdatoms,fr,
1527                   mu_tot,enerd,vir,pres,-1,TRUE);
1528   where();
1529
1530   if (MASTER(cr)) {
1531     /* Copy stuff to the energy bin for easy printing etc. */
1532     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1533                mdatoms->tmass,enerd,state,state->box,
1534                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1535
1536     print_ebin_header(fplog,step,step,state->lambda);
1537     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1538                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1539   }
1540   where();
1541
1542   /* This is the starting energy */
1543   Epot = enerd->term[F_EPOT];
1544
1545   fnorm = ems.fnorm;
1546   fmax  = ems.fmax;
1547   nfmax = ems.a_fmax;
1548
1549   /* Set the initial step.
1550    * since it will be multiplied by the non-normalized search direction
1551    * vector (force vector the first time), we scale it by the
1552    * norm of the force.
1553    */
1554
1555   if (MASTER(cr)) {
1556     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1557     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1558     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1559     fprintf(stderr,"\n");
1560     /* and copy to the log file too... */
1561     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1562     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1563     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1564     fprintf(fplog,"\n");
1565   }
1566
1567   point=0;
1568   for(i=0;i<n;i++)
1569     if(!frozen[i])
1570       dx[point][i] = ff[i];  /* Initial search direction */
1571     else
1572       dx[point][i] = 0;
1573
1574   stepsize = 1.0/fnorm;
1575   converged = FALSE;
1576
1577   /* Start the loop over BFGS steps.
1578    * Each successful step is counted, and we continue until
1579    * we either converge or reach the max number of steps.
1580    */
1581
1582   ncorr=0;
1583
1584   /* Set the gradient from the force */
1585   converged = FALSE;
1586   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1587
1588     /* Write coordinates if necessary */
1589     do_x = do_per_step(step,inputrec->nstxout);
1590     do_f = do_per_step(step,inputrec->nstfout);
1591
1592     write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
1593                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1594
1595     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1596
1597     /* pointer to current direction - point=0 first time here */
1598     s=dx[point];
1599
1600     /* calculate line gradient */
1601     for(gpa=0,i=0;i<n;i++)
1602         gpa-=s[i]*ff[i];
1603
1604     /* Calculate minimum allowed stepsize, before the average (norm)
1605      * relative change in coordinate is smaller than precision
1606      */
1607     for(minstep=0,i=0;i<n;i++) {
1608       tmp=fabs(xx[i]);
1609       if(tmp<1.0)
1610         tmp=1.0;
1611       tmp = s[i]/tmp;
1612       minstep += tmp*tmp;
1613     }
1614     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1615
1616     if(stepsize<minstep) {
1617       converged=TRUE;
1618       break;
1619     }
1620
1621     /* Store old forces and coordinates */
1622     for(i=0;i<n;i++) {
1623       lastx[i]=xx[i];
1624       lastf[i]=ff[i];
1625     }
1626     Epot0=Epot;
1627
1628     first=TRUE;
1629
1630     for(i=0;i<n;i++)
1631       xa[i]=xx[i];
1632
1633     /* Take a step downhill.
1634      * In theory, we should minimize the function along this direction.
1635      * That is quite possible, but it turns out to take 5-10 function evaluations
1636      * for each line. However, we dont really need to find the exact minimum -
1637      * it is much better to start a new BFGS step in a modified direction as soon
1638      * as we are close to it. This will save a lot of energy evaluations.
1639      *
1640      * In practice, we just try to take a single step.
1641      * If it worked (i.e. lowered the energy), we increase the stepsize but
1642      * the continue straight to the next BFGS step without trying to find any minimum.
1643      * If it didn't work (higher energy), there must be a minimum somewhere between
1644      * the old position and the new one.
1645      *
1646      * Due to the finite numerical accuracy, it turns out that it is a good idea
1647      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1648      * This leads to lower final energies in the tests I've done. / Erik
1649      */
1650     foundlower=FALSE;
1651     EpotA = Epot0;
1652     a = 0.0;
1653     c = a + stepsize; /* reference position along line is zero */
1654
1655     /* Check stepsize first. We do not allow displacements
1656      * larger than emstep.
1657      */
1658     do {
1659       c = a + stepsize;
1660       maxdelta=0;
1661       for(i=0;i<n;i++) {
1662         delta=c*s[i];
1663         if(delta>maxdelta)
1664           maxdelta=delta;
1665       }
1666       if(maxdelta>inputrec->em_stepsize)
1667         stepsize*=0.1;
1668     } while(maxdelta>inputrec->em_stepsize);
1669
1670     /* Take a trial step */
1671     for (i=0; i<n; i++)
1672       xc[i] = lastx[i] + c*s[i];
1673
1674     neval++;
1675     /* Calculate energy for the trial step */
1676     ems.s.x = (rvec *)xc;
1677     ems.f   = (rvec *)fc;
1678     evaluate_energy(fplog,bVerbose,cr,
1679                     state,top_global,&ems,top,
1680                     inputrec,nrnb,wcycle,gstat,
1681                     vsite,constr,fcd,graph,mdatoms,fr,
1682                     mu_tot,enerd,vir,pres,step,FALSE);
1683     EpotC = ems.epot;
1684
1685     /* Calc derivative along line */
1686     for(gpc=0,i=0; i<n; i++) {
1687         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1688     }
1689     /* Sum the gradient along the line across CPUs */
1690     if (PAR(cr))
1691       gmx_sumd(1,&gpc,cr);
1692
1693      /* This is the max amount of increase in energy we tolerate */
1694    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1695
1696     /* Accept the step if the energy is lower, or if it is not significantly higher
1697      * and the line derivative is still negative.
1698      */
1699     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1700       foundlower = TRUE;
1701       /* Great, we found a better energy. Increase step for next iteration
1702        * if we are still going down, decrease it otherwise
1703        */
1704       if(gpc<0)
1705         stepsize *= 1.618034;  /* The golden section */
1706       else
1707         stepsize *= 0.618034;  /* 1/golden section */
1708     } else {
1709       /* New energy is the same or higher. We will have to do some work
1710        * to find a smaller value in the interval. Take smaller step next time!
1711        */
1712       foundlower = FALSE;
1713       stepsize *= 0.618034;
1714     }
1715
1716     /* OK, if we didn't find a lower value we will have to locate one now - there must
1717      * be one in the interval [a=0,c].
1718      * The same thing is valid here, though: Don't spend dozens of iterations to find
1719      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1720      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1721      *
1722      * I also have a safeguard for potentially really patological functions so we never
1723      * take more than 20 steps before we give up ...
1724      *
1725      * If we already found a lower value we just skip this step and continue to the update.
1726      */
1727
1728     if(!foundlower) {
1729
1730       nminstep=0;
1731       do {
1732         /* Select a new trial point.
1733          * If the derivatives at points a & c have different sign we interpolate to zero,
1734          * otherwise just do a bisection.
1735          */
1736
1737         if(gpa<0 && gpc>0)
1738           b = a + gpa*(a-c)/(gpc-gpa);
1739         else
1740           b = 0.5*(a+c);
1741
1742         /* safeguard if interpolation close to machine accuracy causes errors:
1743          * never go outside the interval
1744          */
1745         if(b<=a || b>=c)
1746           b = 0.5*(a+c);
1747
1748         /* Take a trial step */
1749         for (i=0; i<n; i++)
1750           xb[i] = lastx[i] + b*s[i];
1751
1752         neval++;
1753         /* Calculate energy for the trial step */
1754         ems.s.x = (rvec *)xb;
1755         ems.f   = (rvec *)fb;
1756         evaluate_energy(fplog,bVerbose,cr,
1757                         state,top_global,&ems,top,
1758                         inputrec,nrnb,wcycle,gstat,
1759                         vsite,constr,fcd,graph,mdatoms,fr,
1760                         mu_tot,enerd,vir,pres,step,FALSE);
1761         EpotB = ems.epot;
1762
1763         fnorm = ems.fnorm;
1764
1765         for(gpb=0,i=0; i<n; i++)
1766           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1767
1768         /* Sum the gradient along the line across CPUs */
1769         if (PAR(cr))
1770           gmx_sumd(1,&gpb,cr);
1771
1772         /* Keep one of the intervals based on the value of the derivative at the new point */
1773         if(gpb>0) {
1774           /* Replace c endpoint with b */
1775           EpotC = EpotB;
1776           c = b;
1777           gpc = gpb;
1778           /* swap coord pointers b/c */
1779           xtmp = xb;
1780           ftmp = fb;
1781           xb = xc;
1782           fb = fc;
1783           xc = xtmp;
1784           fc = ftmp;
1785         } else {
1786           /* Replace a endpoint with b */
1787           EpotA = EpotB;
1788           a = b;
1789           gpa = gpb;
1790           /* swap coord pointers a/b */
1791           xtmp = xb;
1792           ftmp = fb;
1793           xb = xa;
1794           fb = fa;
1795           xa = xtmp;
1796           fa = ftmp;
1797         }
1798
1799         /*
1800          * Stop search as soon as we find a value smaller than the endpoints,
1801          * or if the tolerance is below machine precision.
1802          * Never run more than 20 steps, no matter what.
1803          */
1804         nminstep++;
1805       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1806
1807       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1808         /* OK. We couldn't find a significantly lower energy.
1809          * If ncorr==0 this was steepest descent, and then we give up.
1810          * If not, reset memory to restart as steepest descent before quitting.
1811          */
1812         if(ncorr==0) {
1813         /* Converged */
1814           converged=TRUE;
1815           break;
1816         } else {
1817           /* Reset memory */
1818           ncorr=0;
1819           /* Search in gradient direction */
1820           for(i=0;i<n;i++)
1821             dx[point][i]=ff[i];
1822           /* Reset stepsize */
1823           stepsize = 1.0/fnorm;
1824           continue;
1825         }
1826       }
1827
1828       /* Select min energy state of A & C, put the best in xx/ff/Epot
1829        */
1830       if(EpotC<EpotA) {
1831         Epot = EpotC;
1832         /* Use state C */
1833         for(i=0;i<n;i++) {
1834           xx[i]=xc[i];
1835           ff[i]=fc[i];
1836         }
1837         stepsize=c;
1838       } else {
1839         Epot = EpotA;
1840         /* Use state A */
1841         for(i=0;i<n;i++) {
1842           xx[i]=xa[i];
1843           ff[i]=fa[i];
1844         }
1845         stepsize=a;
1846       }
1847
1848     } else {
1849       /* found lower */
1850       Epot = EpotC;
1851       /* Use state C */
1852       for(i=0;i<n;i++) {
1853         xx[i]=xc[i];
1854         ff[i]=fc[i];
1855       }
1856       stepsize=c;
1857     }
1858
1859     /* Update the memory information, and calculate a new
1860      * approximation of the inverse hessian
1861      */
1862
1863     /* Have new data in Epot, xx, ff */
1864     if(ncorr<nmaxcorr)
1865       ncorr++;
1866
1867     for(i=0;i<n;i++) {
1868       dg[point][i]=lastf[i]-ff[i];
1869       dx[point][i]*=stepsize;
1870     }
1871
1872     dgdg=0;
1873     dgdx=0;
1874     for(i=0;i<n;i++) {
1875       dgdg+=dg[point][i]*dg[point][i];
1876       dgdx+=dg[point][i]*dx[point][i];
1877     }
1878
1879     diag=dgdx/dgdg;
1880
1881     rho[point]=1.0/dgdx;
1882     point++;
1883
1884     if(point>=nmaxcorr)
1885       point=0;
1886
1887     /* Update */
1888     for(i=0;i<n;i++)
1889       p[i]=ff[i];
1890
1891     cp=point;
1892
1893     /* Recursive update. First go back over the memory points */
1894     for(k=0;k<ncorr;k++) {
1895       cp--;
1896       if(cp<0)
1897         cp=ncorr-1;
1898
1899       sq=0;
1900       for(i=0;i<n;i++)
1901         sq+=dx[cp][i]*p[i];
1902
1903       alpha[cp]=rho[cp]*sq;
1904
1905       for(i=0;i<n;i++)
1906         p[i] -= alpha[cp]*dg[cp][i];
1907     }
1908
1909     for(i=0;i<n;i++)
1910       p[i] *= diag;
1911
1912     /* And then go forward again */
1913     for(k=0;k<ncorr;k++) {
1914       yr = 0;
1915       for(i=0;i<n;i++)
1916         yr += p[i]*dg[cp][i];
1917
1918       beta = rho[cp]*yr;
1919       beta = alpha[cp]-beta;
1920
1921       for(i=0;i<n;i++)
1922         p[i] += beta*dx[cp][i];
1923
1924       cp++;
1925       if(cp>=ncorr)
1926         cp=0;
1927     }
1928
1929     for(i=0;i<n;i++)
1930       if(!frozen[i])
1931         dx[point][i] = p[i];
1932       else
1933         dx[point][i] = 0;
1934
1935     stepsize=1.0;
1936
1937     /* Test whether the convergence criterion is met */
1938     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1939
1940     /* Print it if necessary */
1941     if (MASTER(cr)) {
1942       if(bVerbose)
1943         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1944                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1945       /* Store the new (lower) energies */
1946       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1947                  mdatoms->tmass,enerd,state,state->box,
1948                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1949       do_log = do_per_step(step,inputrec->nstlog);
1950       do_ene = do_per_step(step,inputrec->nstenergy);
1951       if(do_log)
1952         print_ebin_header(fplog,step,step,state->lambda);
1953       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1954                  do_log ? fplog : NULL,step,step,eprNORMAL,
1955                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1956     }
1957
1958     /* Stop when the maximum force lies below tolerance.
1959      * If we have reached machine precision, converged is already set to true.
1960      */
1961
1962     converged = converged || (fmax < inputrec->em_tol);
1963
1964   } /* End of the loop */
1965
1966   if(converged)
1967     step--; /* we never took that last step in this case */
1968
1969     if(fmax>inputrec->em_tol)
1970     {
1971         if (MASTER(cr))
1972         {
1973             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1974             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1975         }
1976         converged = FALSE;
1977     }
1978
1979   /* If we printed energy and/or logfile last step (which was the last step)
1980    * we don't have to do it again, but otherwise print the final values.
1981    */
1982   if(!do_log) /* Write final value to log since we didn't do anythin last step */
1983     print_ebin_header(fplog,step,step,state->lambda);
1984   if(!do_ene || !do_log) /* Write final energy file entries */
1985     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1986                !do_log ? fplog : NULL,step,step,eprNORMAL,
1987                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1988
1989   /* Print some stuff... */
1990   if (MASTER(cr))
1991     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1992
1993   /* IMPORTANT!
1994    * For accurate normal mode calculation it is imperative that we
1995    * store the last conformation into the full precision binary trajectory.
1996    *
1997    * However, we should only do it if we did NOT already write this step
1998    * above (which we did if do_x or do_f was true).
1999    */
2000   do_x = !do_per_step(step,inputrec->nstxout);
2001   do_f = !do_per_step(step,inputrec->nstfout);
2002   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2003                 top_global,inputrec,step,
2004                 &ems,state,f);
2005
2006   if (MASTER(cr)) {
2007     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2008                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2009     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2010                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2011
2012     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2013   }
2014
2015   finish_em(fplog,cr,outf,runtime,wcycle);
2016
2017   /* To print the actual number of steps we needed somewhere */
2018   runtime->nsteps_done = step;
2019
2020   return 0;
2021 } /* That's all folks */
2022
2023
2024 double do_steep(FILE *fplog,t_commrec *cr,
2025                 int nfile, const t_filenm fnm[],
2026                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2027                 int nstglobalcomm,
2028                 gmx_vsite_t *vsite,gmx_constr_t constr,
2029                 int stepout,
2030                 t_inputrec *inputrec,
2031                 gmx_mtop_t *top_global,t_fcdata *fcd,
2032                 t_state *state_global,
2033                 t_mdatoms *mdatoms,
2034                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2035                 gmx_edsam_t ed,
2036                 t_forcerec *fr,
2037                 int repl_ex_nst,int repl_ex_seed,
2038                 gmx_membed_t *membed,
2039                 real cpt_period,real max_hours,
2040                 const char *deviceOptions,
2041                 unsigned long Flags,
2042                 gmx_runtime_t *runtime)
2043 {
2044   const char *SD="Steepest Descents";
2045   em_state_t *s_min,*s_try;
2046   rvec       *f_global;
2047   gmx_localtop_t *top;
2048   gmx_enerdata_t *enerd;
2049   rvec   *f;
2050   gmx_global_stat_t gstat;
2051   t_graph    *graph;
2052   real   stepsize,constepsize;
2053   real   ustep,dvdlambda,fnormn;
2054   gmx_mdoutf_t *outf;
2055   t_mdebin   *mdebin;
2056   gmx_bool   bDone,bAbort,do_x,do_f;
2057   tensor vir,pres;
2058   rvec   mu_tot;
2059   int    nsteps;
2060   int    count=0;
2061   int    steps_accepted=0;
2062   /* not used */
2063   real   terminate=0;
2064
2065   s_min = init_em_state();
2066   s_try = init_em_state();
2067
2068   /* Init em and store the local state in s_try */
2069   init_em(fplog,SD,cr,inputrec,
2070           state_global,top_global,s_try,&top,&f,&f_global,
2071           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2072           nfile,fnm,&outf,&mdebin);
2073
2074   /* Print to log file  */
2075   print_em_start(fplog,cr,runtime,wcycle,SD);
2076
2077   /* Set variables for stepsize (in nm). This is the largest
2078    * step that we are going to make in any direction.
2079    */
2080   ustep = inputrec->em_stepsize;
2081   stepsize = 0;
2082
2083   /* Max number of steps  */
2084   nsteps = inputrec->nsteps;
2085
2086   if (MASTER(cr))
2087     /* Print to the screen  */
2088     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2089   if (fplog)
2090     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2091
2092   /**** HERE STARTS THE LOOP ****
2093    * count is the counter for the number of steps
2094    * bDone will be TRUE when the minimization has converged
2095    * bAbort will be TRUE when nsteps steps have been performed or when
2096    * the stepsize becomes smaller than is reasonable for machine precision
2097    */
2098   count  = 0;
2099   bDone  = FALSE;
2100   bAbort = FALSE;
2101   while( !bDone && !bAbort ) {
2102     bAbort = (nsteps >= 0) && (count == nsteps);
2103
2104     /* set new coordinates, except for first step */
2105     if (count > 0) {
2106       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2107                  constr,top,nrnb,wcycle,count);
2108     }
2109
2110     evaluate_energy(fplog,bVerbose,cr,
2111                     state_global,top_global,s_try,top,
2112                     inputrec,nrnb,wcycle,gstat,
2113                     vsite,constr,fcd,graph,mdatoms,fr,
2114                     mu_tot,enerd,vir,pres,count,count==0);
2115
2116     if (MASTER(cr))
2117       print_ebin_header(fplog,count,count,s_try->s.lambda);
2118
2119     if (count == 0)
2120       s_min->epot = s_try->epot + 1;
2121
2122     /* Print it if necessary  */
2123     if (MASTER(cr)) {
2124       if (bVerbose) {
2125         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2126                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2127                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2128       }
2129
2130       if (s_try->epot < s_min->epot) {
2131         /* Store the new (lower) energies  */
2132         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2133                    mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2134                    NULL,NULL,vir,pres,NULL,mu_tot,constr);
2135         print_ebin(outf->fp_ene,TRUE,
2136                    do_per_step(steps_accepted,inputrec->nstdisreout),
2137                    do_per_step(steps_accepted,inputrec->nstorireout),
2138                    fplog,count,count,eprNORMAL,TRUE,
2139                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2140         fflush(fplog);
2141       }
2142     }
2143
2144     /* Now if the new energy is smaller than the previous...
2145      * or if this is the first step!
2146      * or if we did random steps!
2147      */
2148
2149     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2150       steps_accepted++;
2151
2152       /* Test whether the convergence criterion is met...  */
2153       bDone = (s_try->fmax < inputrec->em_tol);
2154
2155       /* Copy the arrays for force, positions and energy  */
2156       /* The 'Min' array always holds the coords and forces of the minimal
2157          sampled energy  */
2158       swap_em_state(s_min,s_try);
2159       if (count > 0)
2160         ustep *= 1.2;
2161
2162       /* Write to trn, if necessary */
2163       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2164       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2165       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2166                     top_global,inputrec,count,
2167                     s_min,state_global,f_global);
2168     }
2169     else {
2170       /* If energy is not smaller make the step smaller...  */
2171       ustep *= 0.5;
2172
2173       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2174         /* Reload the old state */
2175         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2176                                s_min,top,mdatoms,fr,vsite,constr,
2177                                nrnb,wcycle);
2178       }
2179     }
2180
2181     /* Determine new step  */
2182     stepsize = ustep/s_min->fmax;
2183
2184     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2185 #ifdef GMX_DOUBLE
2186         if (count == nsteps || ustep < 1e-12)
2187 #else
2188         if (count == nsteps || ustep < 1e-6)
2189 #endif
2190         {
2191             if (MASTER(cr))
2192             {
2193                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2194                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2195             }
2196             bAbort=TRUE;
2197         }
2198
2199     count++;
2200   } /* End of the loop  */
2201
2202     /* Print some shit...  */
2203   if (MASTER(cr))
2204     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2205   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2206                 top_global,inputrec,count,
2207                 s_min,state_global,f_global);
2208
2209   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2210
2211   if (MASTER(cr)) {
2212     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2213                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2214     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2215                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2216   }
2217
2218   finish_em(fplog,cr,outf,runtime,wcycle);
2219
2220   /* To print the actual number of steps we needed somewhere */
2221   inputrec->nsteps=count;
2222
2223   runtime->nsteps_done = count;
2224
2225   return 0;
2226 } /* That's all folks */
2227
2228
2229 double do_nm(FILE *fplog,t_commrec *cr,
2230              int nfile,const t_filenm fnm[],
2231              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2232              int nstglobalcomm,
2233              gmx_vsite_t *vsite,gmx_constr_t constr,
2234              int stepout,
2235              t_inputrec *inputrec,
2236              gmx_mtop_t *top_global,t_fcdata *fcd,
2237              t_state *state_global,
2238              t_mdatoms *mdatoms,
2239              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2240              gmx_edsam_t ed,
2241              t_forcerec *fr,
2242              int repl_ex_nst,int repl_ex_seed,
2243              gmx_membed_t *membed,
2244              real cpt_period,real max_hours,
2245              const char *deviceOptions,
2246              unsigned long Flags,
2247              gmx_runtime_t *runtime)
2248 {
2249     const char *NM = "Normal Mode Analysis";
2250     gmx_mdoutf_t *outf;
2251     int        natoms,atom,d;
2252     int        nnodes,node;
2253     rvec       *f_global;
2254     gmx_localtop_t *top;
2255     gmx_enerdata_t *enerd;
2256     rvec       *f;
2257     gmx_global_stat_t gstat;
2258     t_graph    *graph;
2259     real       t,lambda;
2260     gmx_bool       bNS;
2261     tensor     vir,pres;
2262     rvec       mu_tot;
2263     rvec       *fneg,*dfdx;
2264     gmx_bool       bSparse; /* use sparse matrix storage format */
2265     size_t     sz;
2266     gmx_sparsematrix_t * sparse_matrix = NULL;
2267     real *     full_matrix             = NULL;
2268     em_state_t *   state_work;
2269
2270     /* added with respect to mdrun */
2271     int        i,j,k,row,col;
2272     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2273     real       x_min;
2274     real       fnorm,fmax;
2275
2276     if (constr != NULL)
2277     {
2278         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2279     }
2280
2281     state_work = init_em_state();
2282
2283     /* Init em and store the local state in state_minimum */
2284     init_em(fplog,NM,cr,inputrec,
2285             state_global,top_global,state_work,&top,
2286             &f,&f_global,
2287             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2288             nfile,fnm,&outf,NULL);
2289
2290     natoms = top_global->natoms;
2291     snew(fneg,natoms);
2292     snew(dfdx,natoms);
2293
2294 #ifndef GMX_DOUBLE
2295     if (MASTER(cr))
2296     {
2297         fprintf(stderr,
2298                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2299                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2300                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2301                 "      are fairly modest even if you recompile in double precision.\n\n");
2302     }
2303 #endif
2304
2305     /* Check if we can/should use sparse storage format.
2306      *
2307      * Sparse format is only useful when the Hessian itself is sparse, which it
2308       * will be when we use a cutoff.
2309       * For small systems (n<1000) it is easier to always use full matrix format, though.
2310       */
2311     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2312     {
2313         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2314         bSparse = FALSE;
2315     }
2316     else if(top_global->natoms < 1000)
2317     {
2318         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2319         bSparse = FALSE;
2320     }
2321     else
2322     {
2323         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2324         bSparse = TRUE;
2325     }
2326
2327     sz = DIM*top_global->natoms;
2328
2329     fprintf(stderr,"Allocating Hessian memory...\n\n");
2330
2331     if(bSparse)
2332     {
2333         sparse_matrix=gmx_sparsematrix_init(sz);
2334         sparse_matrix->compressed_symmetric = TRUE;
2335     }
2336     else
2337     {
2338         snew(full_matrix,sz*sz);
2339     }
2340
2341     /* Initial values */
2342     t      = inputrec->init_t;
2343     lambda = inputrec->init_lambda;
2344
2345     init_nrnb(nrnb);
2346
2347     where();
2348
2349     /* Write start time and temperature */
2350     print_em_start(fplog,cr,runtime,wcycle,NM);
2351
2352     /* fudge nr of steps to nr of atoms */
2353     inputrec->nsteps = natoms*2;
2354
2355     if (MASTER(cr))
2356     {
2357         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2358                 *(top_global->name),(int)inputrec->nsteps);
2359     }
2360
2361     nnodes = cr->nnodes;
2362
2363     /* Make evaluate_energy do a single node force calculation */
2364     cr->nnodes = 1;
2365     evaluate_energy(fplog,bVerbose,cr,
2366                     state_global,top_global,state_work,top,
2367                     inputrec,nrnb,wcycle,gstat,
2368                     vsite,constr,fcd,graph,mdatoms,fr,
2369                     mu_tot,enerd,vir,pres,-1,TRUE);
2370     cr->nnodes = nnodes;
2371
2372     /* if forces are not small, warn user */
2373     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2374
2375     if (MASTER(cr))
2376     {
2377         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2378         if (state_work->fmax > 1.0e-3)
2379         {
2380             fprintf(stderr,"Maximum force probably not small enough to");
2381             fprintf(stderr," ensure that you are in an \nenergy well. ");
2382             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2383             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2384         }
2385     }
2386
2387     /***********************************************************
2388      *
2389      *      Loop over all pairs in matrix
2390      *
2391      *      do_force called twice. Once with positive and
2392      *      once with negative displacement
2393      *
2394      ************************************************************/
2395
2396     /* Steps are divided one by one over the nodes */
2397     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2398     {
2399
2400         for (d=0; d<DIM; d++)
2401         {
2402             x_min = state_work->s.x[atom][d];
2403
2404             state_work->s.x[atom][d] = x_min - der_range;
2405
2406             /* Make evaluate_energy do a single node force calculation */
2407             cr->nnodes = 1;
2408             evaluate_energy(fplog,bVerbose,cr,
2409                             state_global,top_global,state_work,top,
2410                             inputrec,nrnb,wcycle,gstat,
2411                             vsite,constr,fcd,graph,mdatoms,fr,
2412                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2413
2414             for(i=0; i<natoms; i++)
2415             {
2416                 copy_rvec(state_work->f[i], fneg[i]);
2417             }
2418
2419             state_work->s.x[atom][d] = x_min + der_range;
2420
2421             evaluate_energy(fplog,bVerbose,cr,
2422                             state_global,top_global,state_work,top,
2423                             inputrec,nrnb,wcycle,gstat,
2424                             vsite,constr,fcd,graph,mdatoms,fr,
2425                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2426             cr->nnodes = nnodes;
2427
2428             /* x is restored to original */
2429             state_work->s.x[atom][d] = x_min;
2430
2431             for(j=0; j<natoms; j++)
2432             {
2433                 for (k=0; (k<DIM); k++)
2434                 {
2435                     dfdx[j][k] =
2436                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2437                 }
2438             }
2439
2440             if (!MASTER(cr))
2441             {
2442 #ifdef GMX_MPI
2443 #ifdef GMX_DOUBLE
2444 #define mpi_type MPI_DOUBLE
2445 #else
2446 #define mpi_type MPI_FLOAT
2447 #endif
2448                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2449                          cr->mpi_comm_mygroup);
2450 #endif
2451             }
2452             else
2453             {
2454                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2455                 {
2456                     if (node > 0)
2457                     {
2458 #ifdef GMX_MPI
2459                         MPI_Status stat;
2460                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2461                                  cr->mpi_comm_mygroup,&stat);
2462 #undef mpi_type
2463 #endif
2464                     }
2465
2466                     row = (atom + node)*DIM + d;
2467
2468                     for(j=0; j<natoms; j++)
2469                     {
2470                         for(k=0; k<DIM; k++)
2471                         {
2472                             col = j*DIM + k;
2473
2474                             if (bSparse)
2475                             {
2476                                 if (col >= row && dfdx[j][k] != 0.0)
2477                                 {
2478                                     gmx_sparsematrix_increment_value(sparse_matrix,
2479                                                                      row,col,dfdx[j][k]);
2480                                 }
2481                             }
2482                             else
2483                             {
2484                                 full_matrix[row*sz+col] = dfdx[j][k];
2485                             }
2486                         }
2487                     }
2488                 }
2489             }
2490
2491             if (bVerbose && fplog)
2492             {
2493                 fflush(fplog);
2494             }
2495         }
2496         /* write progress */
2497         if (MASTER(cr) && bVerbose)
2498         {
2499             fprintf(stderr,"\rFinished step %d out of %d",
2500                     min(atom+nnodes,natoms),natoms);
2501             fflush(stderr);
2502         }
2503     }
2504
2505     if (MASTER(cr))
2506     {
2507         fprintf(stderr,"\n\nWriting Hessian...\n");
2508         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2509     }
2510
2511     finish_em(fplog,cr,outf,runtime,wcycle);
2512
2513     runtime->nsteps_done = natoms*2;
2514
2515     return 0;
2516 }