src/mdlib/minimize.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <string.h>
  43 #include <time.h>
  44 #include <math.h>
  45 #include "sysstuff.h"
  46 #include "string2.h"
  47 #include "network.h"
  48 #include "confio.h"
  49 #include "copyrite.h"
  50 #include "smalloc.h"
  51 #include "nrnb.h"
  52 #include "main.h"
  53 #include "force.h"
  54 #include "macros.h"
  55 #include "random.h"
  56 #include "names.h"
  57 #include "gmx_fatal.h"
  58 #include "txtdump.h"
  59 #include "typedefs.h"
  60 #include "update.h"
  61 #include "constr.h"
  62 #include "vec.h"
  63 #include "statutil.h"
  64 #include "tgroup.h"
  65 #include "mdebin.h"
  66 #include "vsite.h"
  67 #include "force.h"
  68 #include "mdrun.h"
  69 #include "md_support.h"
  70 #include "domdec.h"
  71 #include "partdec.h"
  72 #include "trnio.h"
  73 #include "sparsematrix.h"
  74 #include "mtxio.h"
  75 #include "mdatoms.h"
  76 #include "ns.h"
  77 #include "gmx_wallcycle.h"
  78 #include "mtop_util.h"
  79 #include "gmxfio.h"
  80 #include "pme.h"
  81 #include "bondf.h"
  82 #include "gmx_omp_nthreads.h"
  83
  84
  85 typedef struct {
  86   t_state s;
  87   rvec    *f;
  88   real    epot;
  89   real    fnorm;
  90   real    fmax;
  91   int     a_fmax;
  92 } em_state_t;
  93
  94 static em_state_t *init_em_state()
  95 {
  96   em_state_t *ems;
  97
  98   snew(ems,1);
  99
 100   /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
 101   snew(ems->s.lambda,efptNR);
 102
 103   return ems;
 104 }
 105
 106 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 107                            gmx_wallcycle_t wcycle,
 108                            const char *name)
 109 {
 110     char buf[STRLEN];
 111
 112     runtime_start(runtime);
 113
 114     sprintf(buf,"Started %s",name);
 115     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 116
 117     wallcycle_start(wcycle,ewcRUN);
 118 }
 119 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 120                         gmx_wallcycle_t wcycle)
 121 {
 122     wallcycle_stop(wcycle,ewcRUN);
 123
 124     runtime_end(runtime);
 125 }
 126
 127 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 128 {
 129     fprintf(out,"\n");
 130     fprintf(out,"%s:\n",minimizer);
 131     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 132     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 133 }
 134
 135 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 136 {
 137     char buffer[2048];
 138     if (bLastStep)
 139     {
 140         sprintf(buffer,
 141                 "\nEnergy minimization reached the maximum number"
 142                 "of steps before the forces reached the requested"
 143                 "precision Fmax < %g.\n",ftol);
 144     }
 145     else
 146     {
 147         sprintf(buffer,
 148                 "\nEnergy minimization has stopped, but the forces have"
 149                 "not converged to the requested precision Fmax < %g (which"
 150                 "may not be possible for your system). It stopped"
 151                 "because the algorithm tried to make a new step whose size"
 152                 "was too small, or there was no change in the energy since"
 153                 "last step. Either way, we regard the minimization as"
 154                 "converged to within the available machine precision,"
 155                 "given your starting configuration and EM parameters.\n%s%s",
 156                 ftol,
 157                 sizeof(real)<sizeof(double) ?
 158                 "\nDouble precision normally gives you higher accuracy, but"
 159                 "this is often not needed for preparing to run molecular"
 160                 "dynamics.\n" :
 161                 "",
 162                 bConstrain ?
 163                 "You might need to increase your constraint accuracy, or turn\n"
 164                 "off constraints altogether (set constraints = none in mdp file)\n" :
 165                 "");
 166     }
 167     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 168 }
 169
 170
 171
 172 static void print_converged(FILE *fp,const char *alg,real ftol,
 173                             gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 174                             real epot,real fmax, int nfmax, real fnorm)
 175 {
 176   char buf[STEPSTRSIZE];
 177
 178   if (bDone)
 179     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 180             alg,ftol,gmx_step_str(count,buf));
 181   else if(count<nsteps)
 182     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 183                "but did not reach the requested Fmax < %g.\n",
 184             alg,gmx_step_str(count,buf),ftol);
 185   else
 186     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 187             alg,ftol,gmx_step_str(count,buf));
 188
 189 #ifdef GMX_DOUBLE
 190   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 191   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 192   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 193 #else
 194   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 195   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 196   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 197 #endif
 198 }
 199
 200 static void get_f_norm_max(t_commrec *cr,
 201                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 202                            real *fnorm,real *fmax,int *a_fmax)
 203 {
 204   double fnorm2,*sum;
 205   real fmax2,fmax2_0,fam;
 206   int  la_max,a_max,start,end,i,m,gf;
 207
 208   /* This routine finds the largest force and returns it.
 209    * On parallel machines the global max is taken.
 210    */
 211   fnorm2 = 0;
 212   fmax2 = 0;
 213   la_max = -1;
 214   gf = 0;
 215   start = mdatoms->start;
 216   end   = mdatoms->homenr + start;
 217   if (mdatoms->cFREEZE) {
 218     for(i=start; i<end; i++) {
 219       gf = mdatoms->cFREEZE[i];
 220       fam = 0;
 221       for(m=0; m<DIM; m++)
 222         if (!opts->nFreeze[gf][m])
 223           fam += sqr(f[i][m]);
 224       fnorm2 += fam;
 225       if (fam > fmax2) {
 226         fmax2  = fam;
 227         la_max = i;
 228       }
 229     }
 230   } else {
 231     for(i=start; i<end; i++) {
 232       fam = norm2(f[i]);
 233       fnorm2 += fam;
 234       if (fam > fmax2) {
 235         fmax2  = fam;
 236         la_max = i;
 237       }
 238     }
 239   }
 240
 241   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 242     a_max = cr->dd->gatindex[la_max];
 243   } else {
 244     a_max = la_max;
 245   }
 246   if (PAR(cr)) {
 247     snew(sum,2*cr->nnodes+1);
 248     sum[2*cr->nodeid]   = fmax2;
 249     sum[2*cr->nodeid+1] = a_max;
 250     sum[2*cr->nnodes]   = fnorm2;
 251     gmx_sumd(2*cr->nnodes+1,sum,cr);
 252     fnorm2 = sum[2*cr->nnodes];
 253     /* Determine the global maximum */
 254     for(i=0; i<cr->nnodes; i++) {
 255       if (sum[2*i] > fmax2) {
 256         fmax2 = sum[2*i];
 257         a_max = (int)(sum[2*i+1] + 0.5);
 258       }
 259     }
 260     sfree(sum);
 261   }
 262
 263   if (fnorm)
 264     *fnorm = sqrt(fnorm2);
 265   if (fmax)
 266     *fmax  = sqrt(fmax2);
 267   if (a_fmax)
 268     *a_fmax = a_max;
 269 }
 270
 271 static void get_state_f_norm_max(t_commrec *cr,
 272                            t_grpopts *opts,t_mdatoms *mdatoms,
 273                            em_state_t *ems)
 274 {
 275   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 276 }
 277
 278 void init_em(FILE *fplog,const char *title,
 279              t_commrec *cr,t_inputrec *ir,
 280              t_state *state_global,gmx_mtop_t *top_global,
 281              em_state_t *ems,gmx_localtop_t **top,
 282              rvec **f,rvec **f_global,
 283              t_nrnb *nrnb,rvec mu_tot,
 284              t_forcerec *fr,gmx_enerdata_t **enerd,
 285              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 286              gmx_vsite_t *vsite,gmx_constr_t constr,
 287              int nfile,const t_filenm fnm[],
 288              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 289 {
 290     int  start,homenr,i;
 291     real dvdlambda;
 292
 293     if (fplog)
 294     {
 295         fprintf(fplog,"Initiating %s\n",title);
 296     }
 297
 298     state_global->ngtc = 0;
 299
 300     /* Initialize lambda variables */
 301     initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
 302
 303     init_nrnb(nrnb);
 304
 305     if (DOMAINDECOMP(cr))
 306     {
 307         *top = dd_init_local_top(top_global);
 308
 309         dd_init_local_state(cr->dd,state_global,&ems->s);
 310
 311         *f = NULL;
 312
 313         /* Distribute the charge groups over the nodes from the master node */
 314         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 315                             state_global,top_global,ir,
 316                             &ems->s,&ems->f,mdatoms,*top,
 317                             fr,vsite,NULL,constr,
 318                             nrnb,NULL,FALSE);
 319         dd_store_state(cr->dd,&ems->s);
 320
 321         if (ir->nstfout)
 322         {
 323             snew(*f_global,top_global->natoms);
 324         }
 325         else
 326         {
 327             *f_global = NULL;
 328         }
 329         *graph = NULL;
 330     }
 331     else
 332     {
 333         snew(*f,top_global->natoms);
 334
 335         /* Just copy the state */
 336         ems->s = *state_global;
 337         snew(ems->s.x,ems->s.nalloc);
 338         snew(ems->f,ems->s.nalloc);
 339         for(i=0; i<state_global->natoms; i++)
 340         {
 341             copy_rvec(state_global->x[i],ems->s.x[i]);
 342         }
 343         copy_mat(state_global->box,ems->s.box);
 344
 345         if (PAR(cr) && ir->eI != eiNM)
 346         {
 347             /* Initialize the particle decomposition and split the topology */
 348             *top = split_system(fplog,top_global,ir,cr);
 349
 350             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 351         }
 352         else
 353         {
 354             *top = gmx_mtop_generate_local_top(top_global,ir);
 355         }
 356         *f_global = *f;
 357
 358         forcerec_set_excl_load(fr,*top,cr);
 359
 360         init_bonded_thread_force_reduction(fr,&(*top)->idef);
 361
 362         if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 363         {
 364             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 365         }
 366         else
 367         {
 368             *graph = NULL;
 369         }
 370
 371         if (PARTDECOMP(cr))
 372         {
 373             pd_at_range(cr,&start,&homenr);
 374             homenr -= start;
 375         }
 376         else
 377         {
 378             start  = 0;
 379             homenr = top_global->natoms;
 380         }
 381         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 382         update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
 383
 384         if (vsite)
 385         {
 386             set_vsite_top(vsite,*top,mdatoms,cr);
 387         }
 388     }
 389
 390     if (constr)
 391     {
 392         if (ir->eConstrAlg == econtSHAKE &&
 393             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 394         {
 395             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 396                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 397         }
 398
 399         if (!DOMAINDECOMP(cr))
 400         {
 401             set_constraints(constr,*top,ir,mdatoms,cr);
 402         }
 403
 404         if (!ir->bContinuation)
 405         {
 406             /* Constrain the starting coordinates */
 407             dvdlambda=0;
 408             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 409                       ir,NULL,cr,-1,0,mdatoms,
 410                       ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
 411                       ems->s.lambda[efptFEP],&dvdlambda,
 412                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 413         }
 414     }
 415
 416     if (PAR(cr))
 417     {
 418         *gstat = global_stat_init(ir);
 419     }
 420
 421     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 422
 423     snew(*enerd,1);
 424     init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 425                   *enerd);
 426
 427     if (mdebin != NULL)
 428     {
 429         /* Init bin for energy stuff */
 430         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 431     }
 432
 433     clear_rvec(mu_tot);
 434     calc_shifts(ems->s.box,fr->shift_vec);
 435 }
 436
 437 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 438                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 439 {
 440   if (!(cr->duty & DUTY_PME)) {
 441     /* Tell the PME only node to finish */
 442     gmx_pme_send_finish(cr);
 443   }
 444
 445   done_mdoutf(outf);
 446
 447   em_time_end(fplog,cr,runtime,wcycle);
 448 }
 449
 450 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 451 {
 452   em_state_t tmp;
 453
 454   tmp   = *ems1;
 455   *ems1 = *ems2;
 456   *ems2 = tmp;
 457 }
 458
 459 static void copy_em_coords(em_state_t *ems,t_state *state)
 460 {
 461     int i;
 462
 463     for(i=0; (i<state->natoms); i++)
 464     {
 465         copy_rvec(ems->s.x[i],state->x[i]);
 466     }
 467 }
 468
 469 static void write_em_traj(FILE *fplog,t_commrec *cr,
 470                           gmx_mdoutf_t *outf,
 471                           gmx_bool bX,gmx_bool bF,const char *confout,
 472                           gmx_mtop_t *top_global,
 473                           t_inputrec *ir,gmx_large_int_t step,
 474                           em_state_t *state,
 475                           t_state *state_global,rvec *f_global)
 476 {
 477     int mdof_flags;
 478
 479     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 480     {
 481         copy_em_coords(state,state_global);
 482         f_global = state->f;
 483     }
 484
 485     mdof_flags = 0;
 486     if (bX) { mdof_flags |= MDOF_X; }
 487     if (bF) { mdof_flags |= MDOF_F; }
 488     write_traj(fplog,cr,outf,mdof_flags,
 489                top_global,step,(double)step,
 490                &state->s,state_global,state->f,f_global,NULL,NULL);
 491
 492     if (confout != NULL && MASTER(cr))
 493     {
 494         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 495         {
 496             /* Make molecules whole only for confout writing */
 497             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 498                         state_global->x);
 499         }
 500
 501         write_sto_conf_mtop(confout,
 502                             *top_global->name,top_global,
 503                             state_global->x,NULL,ir->ePBC,state_global->box);
 504     }
 505 }
 506
 507 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 508                        gmx_bool bMolPBC,
 509                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 510                        gmx_constr_t constr,gmx_localtop_t *top,
 511                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 512                        gmx_large_int_t count)
 513
 514 {
 515     t_state *s1,*s2;
 516     int  i;
 517     int  start,end;
 518     rvec *x1,*x2;
 519     real dvdlambda;
 520
 521     s1 = &ems1->s;
 522     s2 = &ems2->s;
 523
 524     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 525     {
 526         gmx_incons("state mismatch in do_em_step");
 527     }
 528
 529     s2->flags = s1->flags;
 530
 531     if (s2->nalloc != s1->nalloc)
 532     {
 533         s2->nalloc = s1->nalloc;
 534         srenew(s2->x,s1->nalloc);
 535         srenew(ems2->f,  s1->nalloc);
 536         if (s2->flags & (1<<estCGP))
 537         {
 538             srenew(s2->cg_p,  s1->nalloc);
 539         }
 540     }
 541
 542     s2->natoms = s1->natoms;
 543     copy_mat(s1->box,s2->box);
 544     /* Copy free energy state */
 545     for (i=0;i<efptNR;i++)
 546     {
 547         s2->lambda[i] = s1->lambda[i];
 548     }
 549     copy_mat(s1->box,s2->box);
 550
 551     start = md->start;
 552     end   = md->start + md->homenr;
 553
 554     x1 = s1->x;
 555     x2 = s2->x;
 556
 557 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
 558     {
 559         int gf,i,m;
 560
 561         gf = 0;
 562 #pragma omp for schedule(static) nowait
 563         for(i=start; i<end; i++)
 564         {
 565             if (md->cFREEZE)
 566             {
 567                 gf = md->cFREEZE[i];
 568             }
 569             for(m=0; m<DIM; m++)
 570             {
 571                 if (ir->opts.nFreeze[gf][m])
 572                 {
 573                     x2[i][m] = x1[i][m];
 574                 }
 575                 else
 576                 {
 577                     x2[i][m] = x1[i][m] + a*f[i][m];
 578                 }
 579             }
 580         }
 581
 582         if (s2->flags & (1<<estCGP))
 583         {
 584             /* Copy the CG p vector */
 585             x1 = s1->cg_p;
 586             x2 = s2->cg_p;
 587 #pragma omp for schedule(static) nowait
 588             for(i=start; i<end; i++)
 589             {
 590                 copy_rvec(x1[i],x2[i]);
 591             }
 592         }
 593
 594         if (DOMAINDECOMP(cr))
 595         {
 596             s2->ddp_count = s1->ddp_count;
 597             if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
 598             {
 599 #pragma omp barrier
 600                 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 601                 srenew(s2->cg_gl,s2->cg_gl_nalloc);
 602 #pragma omp barrier
 603             }
 604             s2->ncg_gl = s1->ncg_gl;
 605 #pragma omp for schedule(static) nowait
 606             for(i=0; i<s2->ncg_gl; i++)
 607             {
 608                 s2->cg_gl[i] = s1->cg_gl[i];
 609             }
 610             s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 611         }
 612     }
 613
 614     if (constr)
 615     {
 616         wallcycle_start(wcycle,ewcCONSTR);
 617         dvdlambda = 0;
 618         constrain(NULL,TRUE,TRUE,constr,&top->idef,
 619                   ir,NULL,cr,count,0,md,
 620                   s1->x,s2->x,NULL,bMolPBC,s2->box,
 621                   s2->lambda[efptBONDED],&dvdlambda,
 622                   NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 623         wallcycle_stop(wcycle,ewcCONSTR);
 624     }
 625 }
 626
 627 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 628                                    gmx_mtop_t *top_global,t_inputrec *ir,
 629                                    em_state_t *ems,gmx_localtop_t *top,
 630                                    t_mdatoms *mdatoms,t_forcerec *fr,
 631                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 632                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 633 {
 634     /* Repartition the domain decomposition */
 635     wallcycle_start(wcycle,ewcDOMDEC);
 636     dd_partition_system(fplog,step,cr,FALSE,1,
 637                         NULL,top_global,ir,
 638                         &ems->s,&ems->f,
 639                         mdatoms,top,fr,vsite,NULL,constr,
 640                         nrnb,wcycle,FALSE);
 641     dd_store_state(cr->dd,&ems->s);
 642     wallcycle_stop(wcycle,ewcDOMDEC);
 643 }
 644
 645 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 646                             t_state *state_global,gmx_mtop_t *top_global,
 647                             em_state_t *ems,gmx_localtop_t *top,
 648                             t_inputrec *inputrec,
 649                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 650                             gmx_global_stat_t gstat,
 651                             gmx_vsite_t *vsite,gmx_constr_t constr,
 652                             t_fcdata *fcd,
 653                             t_graph *graph,t_mdatoms *mdatoms,
 654                             t_forcerec *fr,rvec mu_tot,
 655                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 656                             gmx_large_int_t count,gmx_bool bFirst)
 657 {
 658   real t;
 659   gmx_bool bNS;
 660   int  nabnsb;
 661   tensor force_vir,shake_vir,ekin;
 662   real dvdlambda,prescorr,enercorr,dvdlcorr;
 663   real terminate=0;
 664
 665   /* Set the time to the initial time, the time does not change during EM */
 666   t = inputrec->init_t;
 667
 668   if (bFirst ||
 669       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 670     /* This the first state or an old state used before the last ns */
 671     bNS = TRUE;
 672   } else {
 673     bNS = FALSE;
 674     if (inputrec->nstlist > 0) {
 675       bNS = TRUE;
 676     } else if (inputrec->nstlist == -1) {
 677       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 678       if (PAR(cr))
 679         gmx_sumi(1,&nabnsb,cr);
 680       bNS = (nabnsb > 0);
 681     }
 682   }
 683
 684   if (vsite)
 685     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 686                      top->idef.iparams,top->idef.il,
 687                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 688
 689   if (DOMAINDECOMP(cr)) {
 690     if (bNS) {
 691       /* Repartition the domain decomposition */
 692       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 693                              ems,top,mdatoms,fr,vsite,constr,
 694                              nrnb,wcycle);
 695     }
 696   }
 697
 698     /* Calc force & energy on new trial position  */
 699     /* do_force always puts the charge groups in the box and shifts again
 700      * We do not unshift, so molecules are always whole in congrad.c
 701      */
 702     do_force(fplog,cr,inputrec,
 703              count,nrnb,wcycle,top,top_global,&top_global->groups,
 704              ems->s.box,ems->s.x,&ems->s.hist,
 705              ems->f,force_vir,mdatoms,enerd,fcd,
 706              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 707              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
 708              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 709              (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 710
 711     /* Clear the unused shake virial and pressure */
 712     clear_mat(shake_vir);
 713     clear_mat(pres);
 714
 715     /* Communicate stuff when parallel */
 716     if (PAR(cr) && inputrec->eI != eiNM)
 717     {
 718         wallcycle_start(wcycle,ewcMoveE);
 719
 720         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 721                     inputrec,NULL,NULL,NULL,1,&terminate,
 722                     top_global,&ems->s,FALSE,
 723                     CGLO_ENERGY |
 724                     CGLO_PRESSURE |
 725                     CGLO_CONSTRAINT |
 726                     CGLO_FIRSTITERATE);
 727
 728         wallcycle_stop(wcycle,ewcMoveE);
 729     }
 730
 731     /* Calculate long range corrections to pressure and energy */
 732     calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
 733                   pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 734     enerd->term[F_DISPCORR] = enercorr;
 735     enerd->term[F_EPOT] += enercorr;
 736     enerd->term[F_PRES] += prescorr;
 737     enerd->term[F_DVDL] += dvdlcorr;
 738
 739   ems->epot = enerd->term[F_EPOT];
 740
 741   if (constr) {
 742     /* Project out the constraint components of the force */
 743     wallcycle_start(wcycle,ewcCONSTR);
 744     dvdlambda = 0;
 745     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 746               inputrec,NULL,cr,count,0,mdatoms,
 747               ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
 748               ems->s.lambda[efptBONDED],&dvdlambda,
 749               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 750     if (fr->bSepDVDL && fplog)
 751       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
 752     enerd->term[F_DVDL_BONDED] += dvdlambda;
 753     m_add(force_vir,shake_vir,vir);
 754     wallcycle_stop(wcycle,ewcCONSTR);
 755   } else {
 756     copy_mat(force_vir,vir);
 757   }
 758
 759   clear_mat(ekin);
 760   enerd->term[F_PRES] =
 761     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 762
 763   sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
 764
 765     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 766     {
 767         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 768     }
 769 }
 770
 771 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 772                               gmx_mtop_t *mtop,
 773                               em_state_t *s_min,em_state_t *s_b)
 774 {
 775   rvec *fm,*fb,*fmg;
 776   t_block *cgs_gl;
 777   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 778   double partsum;
 779   unsigned char *grpnrFREEZE;
 780
 781   if (debug)
 782     fprintf(debug,"Doing reorder_partsum\n");
 783
 784   fm = s_min->f;
 785   fb = s_b->f;
 786
 787   cgs_gl = dd_charge_groups_global(cr->dd);
 788   index = cgs_gl->index;
 789
 790   /* Collect fm in a global vector fmg.
 791    * This conflicts with the spirit of domain decomposition,
 792    * but to fully optimize this a much more complicated algorithm is required.
 793    */
 794   snew(fmg,mtop->natoms);
 795
 796   ncg   = s_min->s.ncg_gl;
 797   cg_gl = s_min->s.cg_gl;
 798   i = 0;
 799   for(c=0; c<ncg; c++) {
 800     cg = cg_gl[c];
 801     a0 = index[cg];
 802     a1 = index[cg+1];
 803     for(a=a0; a<a1; a++) {
 804       copy_rvec(fm[i],fmg[a]);
 805       i++;
 806     }
 807   }
 808   gmx_sum(mtop->natoms*3,fmg[0],cr);
 809
 810   /* Now we will determine the part of the sum for the cgs in state s_b */
 811   ncg   = s_b->s.ncg_gl;
 812   cg_gl = s_b->s.cg_gl;
 813   partsum = 0;
 814   i = 0;
 815   gf = 0;
 816   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 817   for(c=0; c<ncg; c++) {
 818     cg = cg_gl[c];
 819     a0 = index[cg];
 820     a1 = index[cg+1];
 821     for(a=a0; a<a1; a++) {
 822       if (mdatoms->cFREEZE && grpnrFREEZE) {
 823         gf = grpnrFREEZE[i];
 824       }
 825       for(m=0; m<DIM; m++) {
 826         if (!opts->nFreeze[gf][m]) {
 827           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 828         }
 829       }
 830       i++;
 831     }
 832   }
 833
 834   sfree(fmg);
 835
 836   return partsum;
 837 }
 838
 839 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 840                     gmx_mtop_t *mtop,
 841                     em_state_t *s_min,em_state_t *s_b)
 842 {
 843   rvec *fm,*fb;
 844   double sum;
 845   int  gf,i,m;
 846
 847   /* This is just the classical Polak-Ribiere calculation of beta;
 848    * it looks a bit complicated since we take freeze groups into account,
 849    * and might have to sum it in parallel runs.
 850    */
 851
 852   if (!DOMAINDECOMP(cr) ||
 853       (s_min->s.ddp_count == cr->dd->ddp_count &&
 854        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 855     fm = s_min->f;
 856     fb = s_b->f;
 857     sum = 0;
 858     gf = 0;
 859     /* This part of code can be incorrect with DD,
 860      * since the atom ordering in s_b and s_min might differ.
 861      */
 862     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 863       if (mdatoms->cFREEZE)
 864         gf = mdatoms->cFREEZE[i];
 865       for(m=0; m<DIM; m++)
 866         if (!opts->nFreeze[gf][m]) {
 867           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 868         }
 869     }
 870   } else {
 871     /* We need to reorder cgs while summing */
 872     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 873   }
 874   if (PAR(cr))
 875     gmx_sumd(1,&sum,cr);
 876
 877   return sum/sqr(s_min->fnorm);
 878 }
 879
 880 double do_cg(FILE *fplog,t_commrec *cr,
 881              int nfile,const t_filenm fnm[],
 882              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 883              int nstglobalcomm,
 884              gmx_vsite_t *vsite,gmx_constr_t constr,
 885              int stepout,
 886              t_inputrec *inputrec,
 887              gmx_mtop_t *top_global,t_fcdata *fcd,
 888              t_state *state_global,
 889              t_mdatoms *mdatoms,
 890              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 891              gmx_edsam_t ed,
 892              t_forcerec *fr,
 893              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 894              gmx_membed_t membed,
 895              real cpt_period,real max_hours,
 896              const char *deviceOptions,
 897              unsigned long Flags,
 898              gmx_runtime_t *runtime)
 899 {
 900   const char *CG="Polak-Ribiere Conjugate Gradients";
 901
 902   em_state_t *s_min,*s_a,*s_b,*s_c;
 903   gmx_localtop_t *top;
 904   gmx_enerdata_t *enerd;
 905   rvec   *f;
 906   gmx_global_stat_t gstat;
 907   t_graph    *graph;
 908   rvec   *f_global,*p,*sf,*sfm;
 909   double gpa,gpb,gpc,tmp,sum[2],minstep;
 910   real   fnormn;
 911   real   stepsize;
 912   real   a,b,c,beta=0.0;
 913   real   epot_repl=0;
 914   real   pnorm;
 915   t_mdebin   *mdebin;
 916   gmx_bool   converged,foundlower;
 917   rvec   mu_tot;
 918   gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 919   tensor vir,pres;
 920   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 921   gmx_mdoutf_t *outf;
 922   int    i,m,gf,step,nminstep;
 923   real   terminate=0;
 924
 925   step=0;
 926
 927   s_min = init_em_state();
 928   s_a   = init_em_state();
 929   s_b   = init_em_state();
 930   s_c   = init_em_state();
 931
 932   /* Init em and store the local state in s_min */
 933   init_em(fplog,CG,cr,inputrec,
 934           state_global,top_global,s_min,&top,&f,&f_global,
 935           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 936           nfile,fnm,&outf,&mdebin);
 937
 938   /* Print to log file */
 939   print_em_start(fplog,cr,runtime,wcycle,CG);
 940
 941   /* Max number of steps */
 942   number_steps=inputrec->nsteps;
 943
 944   if (MASTER(cr))
 945     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 946   if (fplog)
 947     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 948
 949   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 950   /* do_force always puts the charge groups in the box and shifts again
 951    * We do not unshift, so molecules are always whole in congrad.c
 952    */
 953   evaluate_energy(fplog,bVerbose,cr,
 954                   state_global,top_global,s_min,top,
 955                   inputrec,nrnb,wcycle,gstat,
 956                   vsite,constr,fcd,graph,mdatoms,fr,
 957                   mu_tot,enerd,vir,pres,-1,TRUE);
 958   where();
 959
 960   if (MASTER(cr)) {
 961     /* Copy stuff to the energy bin for easy printing etc. */
 962     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 963                mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 964                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 965
 966     print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 967     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 968                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 969   }
 970   where();
 971
 972   /* Estimate/guess the initial stepsize */
 973   stepsize = inputrec->em_stepsize/s_min->fnorm;
 974
 975   if (MASTER(cr)) {
 976     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 977             s_min->fmax,s_min->a_fmax+1);
 978     fprintf(stderr,"   F-Norm            = %12.5e\n",
 979             s_min->fnorm/sqrt(state_global->natoms));
 980     fprintf(stderr,"\n");
 981     /* and copy to the log file too... */
 982     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 983             s_min->fmax,s_min->a_fmax+1);
 984     fprintf(fplog,"   F-Norm            = %12.5e\n",
 985             s_min->fnorm/sqrt(state_global->natoms));
 986     fprintf(fplog,"\n");
 987   }
 988   /* Start the loop over CG steps.
 989    * Each successful step is counted, and we continue until
 990    * we either converge or reach the max number of steps.
 991    */
 992   converged = FALSE;
 993   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 994
 995     /* start taking steps in a new direction
 996      * First time we enter the routine, beta=0, and the direction is
 997      * simply the negative gradient.
 998      */
 999
1000     /* Calculate the new direction in p, and the gradient in this direction, gpa */
1001     p  = s_min->s.cg_p;
1002     sf = s_min->f;
1003     gpa = 0;
1004     gf = 0;
1005     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1006       if (mdatoms->cFREEZE)
1007         gf = mdatoms->cFREEZE[i];
1008       for(m=0; m<DIM; m++) {
1009         if (!inputrec->opts.nFreeze[gf][m]) {
1010           p[i][m] = sf[i][m] + beta*p[i][m];
1011           gpa -= p[i][m]*sf[i][m];
1012           /* f is negative gradient, thus the sign */
1013         } else {
1014           p[i][m] = 0;
1015         }
1016       }
1017     }
1018
1019     /* Sum the gradient along the line across CPUs */
1020     if (PAR(cr))
1021       gmx_sumd(1,&gpa,cr);
1022
1023     /* Calculate the norm of the search vector */
1024     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1025
1026     /* Just in case stepsize reaches zero due to numerical precision... */
1027     if(stepsize<=0)
1028       stepsize = inputrec->em_stepsize/pnorm;
1029
1030     /*
1031      * Double check the value of the derivative in the search direction.
1032      * If it is positive it must be due to the old information in the
1033      * CG formula, so just remove that and start over with beta=0.
1034      * This corresponds to a steepest descent step.
1035      */
1036     if(gpa>0) {
1037       beta = 0;
1038       step--; /* Don't count this step since we are restarting */
1039       continue; /* Go back to the beginning of the big for-loop */
1040     }
1041
1042     /* Calculate minimum allowed stepsize, before the average (norm)
1043      * relative change in coordinate is smaller than precision
1044      */
1045     minstep=0;
1046     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1047       for(m=0; m<DIM; m++) {
1048         tmp = fabs(s_min->s.x[i][m]);
1049         if(tmp < 1.0)
1050           tmp = 1.0;
1051         tmp = p[i][m]/tmp;
1052         minstep += tmp*tmp;
1053       }
1054     }
1055     /* Add up from all CPUs */
1056     if(PAR(cr))
1057       gmx_sumd(1,&minstep,cr);
1058
1059     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1060
1061     if(stepsize<minstep) {
1062       converged=TRUE;
1063       break;
1064     }
1065
1066     /* Write coordinates if necessary */
1067     do_x = do_per_step(step,inputrec->nstxout);
1068     do_f = do_per_step(step,inputrec->nstfout);
1069
1070     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1071                   top_global,inputrec,step,
1072                   s_min,state_global,f_global);
1073
1074     /* Take a step downhill.
1075      * In theory, we should minimize the function along this direction.
1076      * That is quite possible, but it turns out to take 5-10 function evaluations
1077      * for each line. However, we dont really need to find the exact minimum -
1078      * it is much better to start a new CG step in a modified direction as soon
1079      * as we are close to it. This will save a lot of energy evaluations.
1080      *
1081      * In practice, we just try to take a single step.
1082      * If it worked (i.e. lowered the energy), we increase the stepsize but
1083      * the continue straight to the next CG step without trying to find any minimum.
1084      * If it didn't work (higher energy), there must be a minimum somewhere between
1085      * the old position and the new one.
1086      *
1087      * Due to the finite numerical accuracy, it turns out that it is a good idea
1088      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1089      * This leads to lower final energies in the tests I've done. / Erik
1090      */
1091     s_a->epot = s_min->epot;
1092     a = 0.0;
1093     c = a + stepsize; /* reference position along line is zero */
1094
1095     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1096       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1097                              s_min,top,mdatoms,fr,vsite,constr,
1098                              nrnb,wcycle);
1099     }
1100
1101     /* Take a trial step (new coords in s_c) */
1102     do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1103                constr,top,nrnb,wcycle,-1);
1104
1105     neval++;
1106     /* Calculate energy for the trial step */
1107     evaluate_energy(fplog,bVerbose,cr,
1108                     state_global,top_global,s_c,top,
1109                     inputrec,nrnb,wcycle,gstat,
1110                     vsite,constr,fcd,graph,mdatoms,fr,
1111                     mu_tot,enerd,vir,pres,-1,FALSE);
1112
1113     /* Calc derivative along line */
1114     p  = s_c->s.cg_p;
1115     sf = s_c->f;
1116     gpc=0;
1117     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1118       for(m=0; m<DIM; m++)
1119           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1120     }
1121     /* Sum the gradient along the line across CPUs */
1122     if (PAR(cr))
1123       gmx_sumd(1,&gpc,cr);
1124
1125     /* This is the max amount of increase in energy we tolerate */
1126     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1127
1128     /* Accept the step if the energy is lower, or if it is not significantly higher
1129      * and the line derivative is still negative.
1130      */
1131     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1132       foundlower = TRUE;
1133       /* Great, we found a better energy. Increase step for next iteration
1134        * if we are still going down, decrease it otherwise
1135        */
1136       if(gpc<0)
1137         stepsize *= 1.618034;  /* The golden section */
1138       else
1139         stepsize *= 0.618034;  /* 1/golden section */
1140     } else {
1141       /* New energy is the same or higher. We will have to do some work
1142        * to find a smaller value in the interval. Take smaller step next time!
1143        */
1144       foundlower = FALSE;
1145       stepsize *= 0.618034;
1146     }
1147
1148
1149
1150
1151     /* OK, if we didn't find a lower value we will have to locate one now - there must
1152      * be one in the interval [a=0,c].
1153      * The same thing is valid here, though: Don't spend dozens of iterations to find
1154      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1155      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1156      *
1157      * I also have a safeguard for potentially really patological functions so we never
1158      * take more than 20 steps before we give up ...
1159      *
1160      * If we already found a lower value we just skip this step and continue to the update.
1161      */
1162     if (!foundlower) {
1163       nminstep=0;
1164
1165       do {
1166         /* Select a new trial point.
1167          * If the derivatives at points a & c have different sign we interpolate to zero,
1168          * otherwise just do a bisection.
1169          */
1170         if(gpa<0 && gpc>0)
1171           b = a + gpa*(a-c)/(gpc-gpa);
1172         else
1173           b = 0.5*(a+c);
1174
1175         /* safeguard if interpolation close to machine accuracy causes errors:
1176          * never go outside the interval
1177          */
1178         if(b<=a || b>=c)
1179           b = 0.5*(a+c);
1180
1181         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1182           /* Reload the old state */
1183           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1184                                  s_min,top,mdatoms,fr,vsite,constr,
1185                                  nrnb,wcycle);
1186         }
1187
1188         /* Take a trial step to this new point - new coords in s_b */
1189         do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1190                constr,top,nrnb,wcycle,-1);
1191
1192         neval++;
1193         /* Calculate energy for the trial step */
1194         evaluate_energy(fplog,bVerbose,cr,
1195                         state_global,top_global,s_b,top,
1196                         inputrec,nrnb,wcycle,gstat,
1197                         vsite,constr,fcd,graph,mdatoms,fr,
1198                         mu_tot,enerd,vir,pres,-1,FALSE);
1199
1200         /* p does not change within a step, but since the domain decomposition
1201          * might change, we have to use cg_p of s_b here.
1202          */
1203         p  = s_b->s.cg_p;
1204         sf = s_b->f;
1205         gpb=0;
1206         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1207           for(m=0; m<DIM; m++)
1208               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1209         }
1210         /* Sum the gradient along the line across CPUs */
1211         if (PAR(cr))
1212           gmx_sumd(1,&gpb,cr);
1213
1214         if (debug)
1215           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1216                   s_a->epot,s_b->epot,s_c->epot,gpb);
1217
1218         epot_repl = s_b->epot;
1219
1220         /* Keep one of the intervals based on the value of the derivative at the new point */
1221         if (gpb > 0) {
1222           /* Replace c endpoint with b */
1223           swap_em_state(s_b,s_c);
1224           c = b;
1225           gpc = gpb;
1226         } else {
1227           /* Replace a endpoint with b */
1228           swap_em_state(s_b,s_a);
1229           a = b;
1230           gpa = gpb;
1231         }
1232
1233         /*
1234          * Stop search as soon as we find a value smaller than the endpoints.
1235          * Never run more than 20 steps, no matter what.
1236          */
1237         nminstep++;
1238       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1239                (nminstep < 20));
1240
1241       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1242           nminstep >= 20) {
1243         /* OK. We couldn't find a significantly lower energy.
1244          * If beta==0 this was steepest descent, and then we give up.
1245          * If not, set beta=0 and restart with steepest descent before quitting.
1246          */
1247         if (beta == 0.0) {
1248           /* Converged */
1249           converged = TRUE;
1250           break;
1251         } else {
1252           /* Reset memory before giving up */
1253           beta = 0.0;
1254           continue;
1255         }
1256       }
1257
1258       /* Select min energy state of A & C, put the best in B.
1259        */
1260       if (s_c->epot < s_a->epot) {
1261         if (debug)
1262           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1263                   s_c->epot,s_a->epot);
1264         swap_em_state(s_b,s_c);
1265         gpb = gpc;
1266         b = c;
1267       } else {
1268         if (debug)
1269           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1270                   s_a->epot,s_c->epot);
1271         swap_em_state(s_b,s_a);
1272         gpb = gpa;
1273         b = a;
1274       }
1275
1276     } else {
1277       if (debug)
1278         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1279                 s_c->epot);
1280       swap_em_state(s_b,s_c);
1281       gpb = gpc;
1282       b = c;
1283     }
1284
1285     /* new search direction */
1286     /* beta = 0 means forget all memory and restart with steepest descents. */
1287     if (nstcg && ((step % nstcg)==0))
1288       beta = 0.0;
1289     else {
1290       /* s_min->fnorm cannot be zero, because then we would have converged
1291        * and broken out.
1292        */
1293
1294       /* Polak-Ribiere update.
1295        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1296        */
1297       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1298     }
1299     /* Limit beta to prevent oscillations */
1300     if (fabs(beta) > 5.0)
1301       beta = 0.0;
1302
1303
1304     /* update positions */
1305     swap_em_state(s_min,s_b);
1306     gpa = gpb;
1307
1308     /* Print it if necessary */
1309     if (MASTER(cr)) {
1310       if(bVerbose)
1311         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1312                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1313                 s_min->fmax,s_min->a_fmax+1);
1314       /* Store the new (lower) energies */
1315       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1316                  mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1317                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1318
1319       do_log = do_per_step(step,inputrec->nstlog);
1320       do_ene = do_per_step(step,inputrec->nstenergy);
1321       if(do_log)
1322           print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1323       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1324                  do_log ? fplog : NULL,step,step,eprNORMAL,
1325                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1326     }
1327
1328     /* Stop when the maximum force lies below tolerance.
1329      * If we have reached machine precision, converged is already set to true.
1330      */
1331     converged = converged || (s_min->fmax < inputrec->em_tol);
1332
1333   } /* End of the loop */
1334
1335   if (converged)
1336     step--; /* we never took that last step in this case */
1337
1338     if (s_min->fmax > inputrec->em_tol)
1339     {
1340         if (MASTER(cr))
1341         {
1342             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1343             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1344         }
1345         converged = FALSE;
1346     }
1347
1348   if (MASTER(cr)) {
1349     /* If we printed energy and/or logfile last step (which was the last step)
1350      * we don't have to do it again, but otherwise print the final values.
1351      */
1352     if(!do_log) {
1353       /* Write final value to log since we didn't do anything the last step */
1354       print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1355     }
1356     if (!do_ene || !do_log) {
1357       /* Write final energy file entries */
1358       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1359                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1360                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1361     }
1362   }
1363
1364   /* Print some stuff... */
1365   if (MASTER(cr))
1366     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1367
1368   /* IMPORTANT!
1369    * For accurate normal mode calculation it is imperative that we
1370    * store the last conformation into the full precision binary trajectory.
1371    *
1372    * However, we should only do it if we did NOT already write this step
1373    * above (which we did if do_x or do_f was true).
1374    */
1375   do_x = !do_per_step(step,inputrec->nstxout);
1376   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1377
1378   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1379                 top_global,inputrec,step,
1380                 s_min,state_global,f_global);
1381
1382   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1383
1384   if (MASTER(cr)) {
1385     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1386                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1387     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1388                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1389
1390     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1391   }
1392
1393   finish_em(fplog,cr,outf,runtime,wcycle);
1394
1395   /* To print the actual number of steps we needed somewhere */
1396   runtime->nsteps_done = step;
1397
1398   return 0;
1399 } /* That's all folks */
1400
1401
1402 double do_lbfgs(FILE *fplog,t_commrec *cr,
1403                 int nfile,const t_filenm fnm[],
1404                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1405                 int nstglobalcomm,
1406                 gmx_vsite_t *vsite,gmx_constr_t constr,
1407                 int stepout,
1408                 t_inputrec *inputrec,
1409                 gmx_mtop_t *top_global,t_fcdata *fcd,
1410                 t_state *state,
1411                 t_mdatoms *mdatoms,
1412                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1413                 gmx_edsam_t ed,
1414                 t_forcerec *fr,
1415                 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1416                 gmx_membed_t membed,
1417                 real cpt_period,real max_hours,
1418                 const char *deviceOptions,
1419                 unsigned long Flags,
1420                 gmx_runtime_t *runtime)
1421 {
1422   static const char *LBFGS="Low-Memory BFGS Minimizer";
1423   em_state_t ems;
1424   gmx_localtop_t *top;
1425   gmx_enerdata_t *enerd;
1426   rvec   *f;
1427   gmx_global_stat_t gstat;
1428   t_graph    *graph;
1429   rvec   *f_global;
1430   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1431   double stepsize,gpa,gpb,gpc,tmp,minstep;
1432   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1433   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1434   real   a,b,c,maxdelta,delta;
1435   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1436   real   dgdx,dgdg,sq,yr,beta;
1437   t_mdebin   *mdebin;
1438   gmx_bool   converged,first;
1439   rvec   mu_tot;
1440   real   fnorm,fmax;
1441   gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1442   tensor vir,pres;
1443   int    start,end,number_steps;
1444   gmx_mdoutf_t *outf;
1445   int    i,k,m,n,nfmax,gf,step;
1446   int    mdof_flags;
1447   /* not used */
1448   real   terminate;
1449
1450   if (PAR(cr))
1451     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1452
1453   if (NULL != constr)
1454   {
1455       gmx_fatal(FARGS,"The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1456   }
1457
1458   n = 3*state->natoms;
1459   nmaxcorr = inputrec->nbfgscorr;
1460
1461   /* Allocate memory */
1462   /* Use pointers to real so we dont have to loop over both atoms and
1463    * dimensions all the time...
1464    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1465    * that point to the same memory.
1466    */
1467   snew(xa,n);
1468   snew(xb,n);
1469   snew(xc,n);
1470   snew(fa,n);
1471   snew(fb,n);
1472   snew(fc,n);
1473   snew(frozen,n);
1474
1475   snew(p,n);
1476   snew(lastx,n);
1477   snew(lastf,n);
1478   snew(rho,nmaxcorr);
1479   snew(alpha,nmaxcorr);
1480
1481   snew(dx,nmaxcorr);
1482   for(i=0;i<nmaxcorr;i++)
1483     snew(dx[i],n);
1484
1485   snew(dg,nmaxcorr);
1486   for(i=0;i<nmaxcorr;i++)
1487     snew(dg[i],n);
1488
1489   step = 0;
1490   neval = 0;
1491
1492   /* Init em */
1493   init_em(fplog,LBFGS,cr,inputrec,
1494           state,top_global,&ems,&top,&f,&f_global,
1495           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1496           nfile,fnm,&outf,&mdebin);
1497   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1498    * so we free some memory again.
1499    */
1500   sfree(ems.s.x);
1501   sfree(ems.f);
1502
1503   xx = (real *)state->x;
1504   ff = (real *)f;
1505
1506   start = mdatoms->start;
1507   end   = mdatoms->homenr + start;
1508
1509   /* Print to log file */
1510   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1511
1512   do_log = do_ene = do_x = do_f = TRUE;
1513
1514   /* Max number of steps */
1515   number_steps=inputrec->nsteps;
1516
1517   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1518   gf = 0;
1519   for(i=start; i<end; i++) {
1520     if (mdatoms->cFREEZE)
1521       gf = mdatoms->cFREEZE[i];
1522      for(m=0; m<DIM; m++)
1523        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1524   }
1525   if (MASTER(cr))
1526     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1527   if (fplog)
1528     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1529
1530   if (vsite)
1531     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1532                      top->idef.iparams,top->idef.il,
1533                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1534
1535   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1536   /* do_force always puts the charge groups in the box and shifts again
1537    * We do not unshift, so molecules are always whole
1538    */
1539   neval++;
1540   ems.s.x = state->x;
1541   ems.f = f;
1542   evaluate_energy(fplog,bVerbose,cr,
1543                   state,top_global,&ems,top,
1544                   inputrec,nrnb,wcycle,gstat,
1545                   vsite,constr,fcd,graph,mdatoms,fr,
1546                   mu_tot,enerd,vir,pres,-1,TRUE);
1547   where();
1548
1549   if (MASTER(cr)) {
1550     /* Copy stuff to the energy bin for easy printing etc. */
1551     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1552                mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1553                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1554
1555     print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1556     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1557                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1558   }
1559   where();
1560
1561   /* This is the starting energy */
1562   Epot = enerd->term[F_EPOT];
1563
1564   fnorm = ems.fnorm;
1565   fmax  = ems.fmax;
1566   nfmax = ems.a_fmax;
1567
1568   /* Set the initial step.
1569    * since it will be multiplied by the non-normalized search direction
1570    * vector (force vector the first time), we scale it by the
1571    * norm of the force.
1572    */
1573
1574   if (MASTER(cr)) {
1575     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1576     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1577     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1578     fprintf(stderr,"\n");
1579     /* and copy to the log file too... */
1580     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1581     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1582     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1583     fprintf(fplog,"\n");
1584   }
1585
1586   point=0;
1587   for(i=0;i<n;i++)
1588     if(!frozen[i])
1589       dx[point][i] = ff[i];  /* Initial search direction */
1590     else
1591       dx[point][i] = 0;
1592
1593   stepsize = 1.0/fnorm;
1594   converged = FALSE;
1595
1596   /* Start the loop over BFGS steps.
1597    * Each successful step is counted, and we continue until
1598    * we either converge or reach the max number of steps.
1599    */
1600
1601   ncorr=0;
1602
1603   /* Set the gradient from the force */
1604   converged = FALSE;
1605   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1606
1607     /* Write coordinates if necessary */
1608     do_x = do_per_step(step,inputrec->nstxout);
1609     do_f = do_per_step(step,inputrec->nstfout);
1610
1611     mdof_flags = 0;
1612     if (do_x)
1613     {
1614         mdof_flags |= MDOF_X;
1615     }
1616
1617     if (do_f)
1618     {
1619         mdof_flags |= MDOF_F;
1620     }
1621
1622     write_traj(fplog,cr,outf,mdof_flags,
1623                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1624
1625     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1626
1627     /* pointer to current direction - point=0 first time here */
1628     s=dx[point];
1629
1630     /* calculate line gradient */
1631     for(gpa=0,i=0;i<n;i++)
1632         gpa-=s[i]*ff[i];
1633
1634     /* Calculate minimum allowed stepsize, before the average (norm)
1635      * relative change in coordinate is smaller than precision
1636      */
1637     for(minstep=0,i=0;i<n;i++) {
1638       tmp=fabs(xx[i]);
1639       if(tmp<1.0)
1640         tmp=1.0;
1641       tmp = s[i]/tmp;
1642       minstep += tmp*tmp;
1643     }
1644     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1645
1646     if(stepsize<minstep) {
1647       converged=TRUE;
1648       break;
1649     }
1650
1651     /* Store old forces and coordinates */
1652     for(i=0;i<n;i++) {
1653       lastx[i]=xx[i];
1654       lastf[i]=ff[i];
1655     }
1656     Epot0=Epot;
1657
1658     first=TRUE;
1659
1660     for(i=0;i<n;i++)
1661       xa[i]=xx[i];
1662
1663     /* Take a step downhill.
1664      * In theory, we should minimize the function along this direction.
1665      * That is quite possible, but it turns out to take 5-10 function evaluations
1666      * for each line. However, we dont really need to find the exact minimum -
1667      * it is much better to start a new BFGS step in a modified direction as soon
1668      * as we are close to it. This will save a lot of energy evaluations.
1669      *
1670      * In practice, we just try to take a single step.
1671      * If it worked (i.e. lowered the energy), we increase the stepsize but
1672      * the continue straight to the next BFGS step without trying to find any minimum.
1673      * If it didn't work (higher energy), there must be a minimum somewhere between
1674      * the old position and the new one.
1675      *
1676      * Due to the finite numerical accuracy, it turns out that it is a good idea
1677      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1678      * This leads to lower final energies in the tests I've done. / Erik
1679      */
1680     foundlower=FALSE;
1681     EpotA = Epot0;
1682     a = 0.0;
1683     c = a + stepsize; /* reference position along line is zero */
1684
1685     /* Check stepsize first. We do not allow displacements
1686      * larger than emstep.
1687      */
1688     do {
1689       c = a + stepsize;
1690       maxdelta=0;
1691       for(i=0;i<n;i++) {
1692         delta=c*s[i];
1693         if(delta>maxdelta)
1694           maxdelta=delta;
1695       }
1696       if(maxdelta>inputrec->em_stepsize)
1697         stepsize*=0.1;
1698     } while(maxdelta>inputrec->em_stepsize);
1699
1700     /* Take a trial step */
1701     for (i=0; i<n; i++)
1702       xc[i] = lastx[i] + c*s[i];
1703
1704     neval++;
1705     /* Calculate energy for the trial step */
1706     ems.s.x = (rvec *)xc;
1707     ems.f   = (rvec *)fc;
1708     evaluate_energy(fplog,bVerbose,cr,
1709                     state,top_global,&ems,top,
1710                     inputrec,nrnb,wcycle,gstat,
1711                     vsite,constr,fcd,graph,mdatoms,fr,
1712                     mu_tot,enerd,vir,pres,step,FALSE);
1713     EpotC = ems.epot;
1714
1715     /* Calc derivative along line */
1716     for(gpc=0,i=0; i<n; i++) {
1717         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1718     }
1719     /* Sum the gradient along the line across CPUs */
1720     if (PAR(cr))
1721       gmx_sumd(1,&gpc,cr);
1722
1723      /* This is the max amount of increase in energy we tolerate */
1724    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1725
1726     /* Accept the step if the energy is lower, or if it is not significantly higher
1727      * and the line derivative is still negative.
1728      */
1729     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1730       foundlower = TRUE;
1731       /* Great, we found a better energy. Increase step for next iteration
1732        * if we are still going down, decrease it otherwise
1733        */
1734       if(gpc<0)
1735         stepsize *= 1.618034;  /* The golden section */
1736       else
1737         stepsize *= 0.618034;  /* 1/golden section */
1738     } else {
1739       /* New energy is the same or higher. We will have to do some work
1740        * to find a smaller value in the interval. Take smaller step next time!
1741        */
1742       foundlower = FALSE;
1743       stepsize *= 0.618034;
1744     }
1745
1746     /* OK, if we didn't find a lower value we will have to locate one now - there must
1747      * be one in the interval [a=0,c].
1748      * The same thing is valid here, though: Don't spend dozens of iterations to find
1749      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1750      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1751      *
1752      * I also have a safeguard for potentially really patological functions so we never
1753      * take more than 20 steps before we give up ...
1754      *
1755      * If we already found a lower value we just skip this step and continue to the update.
1756      */
1757
1758     if(!foundlower) {
1759
1760       nminstep=0;
1761       do {
1762         /* Select a new trial point.
1763          * If the derivatives at points a & c have different sign we interpolate to zero,
1764          * otherwise just do a bisection.
1765          */
1766
1767         if(gpa<0 && gpc>0)
1768           b = a + gpa*(a-c)/(gpc-gpa);
1769         else
1770           b = 0.5*(a+c);
1771
1772         /* safeguard if interpolation close to machine accuracy causes errors:
1773          * never go outside the interval
1774          */
1775         if(b<=a || b>=c)
1776           b = 0.5*(a+c);
1777
1778         /* Take a trial step */
1779         for (i=0; i<n; i++)
1780           xb[i] = lastx[i] + b*s[i];
1781
1782         neval++;
1783         /* Calculate energy for the trial step */
1784         ems.s.x = (rvec *)xb;
1785         ems.f   = (rvec *)fb;
1786         evaluate_energy(fplog,bVerbose,cr,
1787                         state,top_global,&ems,top,
1788                         inputrec,nrnb,wcycle,gstat,
1789                         vsite,constr,fcd,graph,mdatoms,fr,
1790                         mu_tot,enerd,vir,pres,step,FALSE);
1791         EpotB = ems.epot;
1792
1793         fnorm = ems.fnorm;
1794
1795         for(gpb=0,i=0; i<n; i++)
1796           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1797
1798         /* Sum the gradient along the line across CPUs */
1799         if (PAR(cr))
1800           gmx_sumd(1,&gpb,cr);
1801
1802         /* Keep one of the intervals based on the value of the derivative at the new point */
1803         if(gpb>0) {
1804           /* Replace c endpoint with b */
1805           EpotC = EpotB;
1806           c = b;
1807           gpc = gpb;
1808           /* swap coord pointers b/c */
1809           xtmp = xb;
1810           ftmp = fb;
1811           xb = xc;
1812           fb = fc;
1813           xc = xtmp;
1814           fc = ftmp;
1815         } else {
1816           /* Replace a endpoint with b */
1817           EpotA = EpotB;
1818           a = b;
1819           gpa = gpb;
1820           /* swap coord pointers a/b */
1821           xtmp = xb;
1822           ftmp = fb;
1823           xb = xa;
1824           fb = fa;
1825           xa = xtmp;
1826           fa = ftmp;
1827         }
1828
1829         /*
1830          * Stop search as soon as we find a value smaller than the endpoints,
1831          * or if the tolerance is below machine precision.
1832          * Never run more than 20 steps, no matter what.
1833          */
1834         nminstep++;
1835       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1836
1837       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1838         /* OK. We couldn't find a significantly lower energy.
1839          * If ncorr==0 this was steepest descent, and then we give up.
1840          * If not, reset memory to restart as steepest descent before quitting.
1841          */
1842         if(ncorr==0) {
1843         /* Converged */
1844           converged=TRUE;
1845           break;
1846         } else {
1847           /* Reset memory */
1848           ncorr=0;
1849           /* Search in gradient direction */
1850           for(i=0;i<n;i++)
1851             dx[point][i]=ff[i];
1852           /* Reset stepsize */
1853           stepsize = 1.0/fnorm;
1854           continue;
1855         }
1856       }
1857
1858       /* Select min energy state of A & C, put the best in xx/ff/Epot
1859        */
1860       if(EpotC<EpotA) {
1861         Epot = EpotC;
1862         /* Use state C */
1863         for(i=0;i<n;i++) {
1864           xx[i]=xc[i];
1865           ff[i]=fc[i];
1866         }
1867         stepsize=c;
1868       } else {
1869         Epot = EpotA;
1870         /* Use state A */
1871         for(i=0;i<n;i++) {
1872           xx[i]=xa[i];
1873           ff[i]=fa[i];
1874         }
1875         stepsize=a;
1876       }
1877
1878     } else {
1879       /* found lower */
1880       Epot = EpotC;
1881       /* Use state C */
1882       for(i=0;i<n;i++) {
1883         xx[i]=xc[i];
1884         ff[i]=fc[i];
1885       }
1886       stepsize=c;
1887     }
1888
1889     /* Update the memory information, and calculate a new
1890      * approximation of the inverse hessian
1891      */
1892
1893     /* Have new data in Epot, xx, ff */
1894     if(ncorr<nmaxcorr)
1895       ncorr++;
1896
1897     for(i=0;i<n;i++) {
1898       dg[point][i]=lastf[i]-ff[i];
1899       dx[point][i]*=stepsize;
1900     }
1901
1902     dgdg=0;
1903     dgdx=0;
1904     for(i=0;i<n;i++) {
1905       dgdg+=dg[point][i]*dg[point][i];
1906       dgdx+=dg[point][i]*dx[point][i];
1907     }
1908
1909     diag=dgdx/dgdg;
1910
1911     rho[point]=1.0/dgdx;
1912     point++;
1913
1914     if(point>=nmaxcorr)
1915       point=0;
1916
1917     /* Update */
1918     for(i=0;i<n;i++)
1919       p[i]=ff[i];
1920
1921     cp=point;
1922
1923     /* Recursive update. First go back over the memory points */
1924     for(k=0;k<ncorr;k++) {
1925       cp--;
1926       if(cp<0)
1927         cp=ncorr-1;
1928
1929       sq=0;
1930       for(i=0;i<n;i++)
1931         sq+=dx[cp][i]*p[i];
1932
1933       alpha[cp]=rho[cp]*sq;
1934
1935       for(i=0;i<n;i++)
1936         p[i] -= alpha[cp]*dg[cp][i];
1937     }
1938
1939     for(i=0;i<n;i++)
1940       p[i] *= diag;
1941
1942     /* And then go forward again */
1943     for(k=0;k<ncorr;k++) {
1944       yr = 0;
1945       for(i=0;i<n;i++)
1946         yr += p[i]*dg[cp][i];
1947
1948       beta = rho[cp]*yr;
1949       beta = alpha[cp]-beta;
1950
1951       for(i=0;i<n;i++)
1952         p[i] += beta*dx[cp][i];
1953
1954       cp++;
1955       if(cp>=ncorr)
1956         cp=0;
1957     }
1958
1959     for(i=0;i<n;i++)
1960       if(!frozen[i])
1961         dx[point][i] = p[i];
1962       else
1963         dx[point][i] = 0;
1964
1965     stepsize=1.0;
1966
1967     /* Test whether the convergence criterion is met */
1968     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1969
1970     /* Print it if necessary */
1971     if (MASTER(cr)) {
1972       if(bVerbose)
1973         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1974                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1975       /* Store the new (lower) energies */
1976       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1977                  mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1978                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1979       do_log = do_per_step(step,inputrec->nstlog);
1980       do_ene = do_per_step(step,inputrec->nstenergy);
1981       if(do_log)
1982           print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1983       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1984                  do_log ? fplog : NULL,step,step,eprNORMAL,
1985                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1986     }
1987
1988     /* Stop when the maximum force lies below tolerance.
1989      * If we have reached machine precision, converged is already set to true.
1990      */
1991
1992     converged = converged || (fmax < inputrec->em_tol);
1993
1994   } /* End of the loop */
1995
1996   if(converged)
1997     step--; /* we never took that last step in this case */
1998
1999     if(fmax>inputrec->em_tol)
2000     {
2001         if (MASTER(cr))
2002         {
2003             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
2004             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
2005         }
2006         converged = FALSE;
2007     }
2008
2009   /* If we printed energy and/or logfile last step (which was the last step)
2010    * we don't have to do it again, but otherwise print the final values.
2011    */
2012   if(!do_log) /* Write final value to log since we didn't do anythin last step */
2013     print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2014   if(!do_ene || !do_log) /* Write final energy file entries */
2015     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2016                !do_log ? fplog : NULL,step,step,eprNORMAL,
2017                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2018
2019   /* Print some stuff... */
2020   if (MASTER(cr))
2021     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2022
2023   /* IMPORTANT!
2024    * For accurate normal mode calculation it is imperative that we
2025    * store the last conformation into the full precision binary trajectory.
2026    *
2027    * However, we should only do it if we did NOT already write this step
2028    * above (which we did if do_x or do_f was true).
2029    */
2030   do_x = !do_per_step(step,inputrec->nstxout);
2031   do_f = !do_per_step(step,inputrec->nstfout);
2032   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2033                 top_global,inputrec,step,
2034                 &ems,state,f);
2035
2036   if (MASTER(cr)) {
2037     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2038                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2039     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2040                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2041
2042     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2043   }
2044
2045   finish_em(fplog,cr,outf,runtime,wcycle);
2046
2047   /* To print the actual number of steps we needed somewhere */
2048   runtime->nsteps_done = step;
2049
2050   return 0;
2051 } /* That's all folks */
2052
2053
2054 double do_steep(FILE *fplog,t_commrec *cr,
2055                 int nfile, const t_filenm fnm[],
2056                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2057                 int nstglobalcomm,
2058                 gmx_vsite_t *vsite,gmx_constr_t constr,
2059                 int stepout,
2060                 t_inputrec *inputrec,
2061                 gmx_mtop_t *top_global,t_fcdata *fcd,
2062                 t_state *state_global,
2063                 t_mdatoms *mdatoms,
2064                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2065                 gmx_edsam_t ed,
2066                 t_forcerec *fr,
2067                 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2068                 gmx_membed_t membed,
2069                 real cpt_period,real max_hours,
2070                 const char *deviceOptions,
2071                 unsigned long Flags,
2072                 gmx_runtime_t *runtime)
2073 {
2074   const char *SD="Steepest Descents";
2075   em_state_t *s_min,*s_try;
2076   rvec       *f_global;
2077   gmx_localtop_t *top;
2078   gmx_enerdata_t *enerd;
2079   rvec   *f;
2080   gmx_global_stat_t gstat;
2081   t_graph    *graph;
2082   real   stepsize,constepsize;
2083   real   ustep,dvdlambda,fnormn;
2084   gmx_mdoutf_t *outf;
2085   t_mdebin   *mdebin;
2086   gmx_bool   bDone,bAbort,do_x,do_f;
2087   tensor vir,pres;
2088   rvec   mu_tot;
2089   int    nsteps;
2090   int    count=0;
2091   int    steps_accepted=0;
2092   /* not used */
2093   real   terminate=0;
2094
2095   s_min = init_em_state();
2096   s_try = init_em_state();
2097
2098   /* Init em and store the local state in s_try */
2099   init_em(fplog,SD,cr,inputrec,
2100           state_global,top_global,s_try,&top,&f,&f_global,
2101           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2102           nfile,fnm,&outf,&mdebin);
2103
2104   /* Print to log file  */
2105   print_em_start(fplog,cr,runtime,wcycle,SD);
2106
2107   /* Set variables for stepsize (in nm). This is the largest
2108    * step that we are going to make in any direction.
2109    */
2110   ustep = inputrec->em_stepsize;
2111   stepsize = 0;
2112
2113   /* Max number of steps  */
2114   nsteps = inputrec->nsteps;
2115
2116   if (MASTER(cr))
2117     /* Print to the screen  */
2118     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2119   if (fplog)
2120     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2121
2122   /**** HERE STARTS THE LOOP ****
2123    * count is the counter for the number of steps
2124    * bDone will be TRUE when the minimization has converged
2125    * bAbort will be TRUE when nsteps steps have been performed or when
2126    * the stepsize becomes smaller than is reasonable for machine precision
2127    */
2128   count  = 0;
2129   bDone  = FALSE;
2130   bAbort = FALSE;
2131   while( !bDone && !bAbort ) {
2132     bAbort = (nsteps >= 0) && (count == nsteps);
2133
2134     /* set new coordinates, except for first step */
2135     if (count > 0) {
2136         do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2137                    s_min,stepsize,s_min->f,s_try,
2138                    constr,top,nrnb,wcycle,count);
2139     }
2140
2141     evaluate_energy(fplog,bVerbose,cr,
2142                     state_global,top_global,s_try,top,
2143                     inputrec,nrnb,wcycle,gstat,
2144                     vsite,constr,fcd,graph,mdatoms,fr,
2145                     mu_tot,enerd,vir,pres,count,count==0);
2146
2147     if (MASTER(cr))
2148       print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2149
2150     if (count == 0)
2151       s_min->epot = s_try->epot + 1;
2152
2153     /* Print it if necessary  */
2154     if (MASTER(cr)) {
2155       if (bVerbose) {
2156         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2157                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2158                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2159       }
2160
2161       if (s_try->epot < s_min->epot) {
2162         /* Store the new (lower) energies  */
2163         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2164                    mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2165                    s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2166         print_ebin(outf->fp_ene,TRUE,
2167                    do_per_step(steps_accepted,inputrec->nstdisreout),
2168                    do_per_step(steps_accepted,inputrec->nstorireout),
2169                    fplog,count,count,eprNORMAL,TRUE,
2170                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2171         fflush(fplog);
2172       }
2173     }
2174
2175     /* Now if the new energy is smaller than the previous...
2176      * or if this is the first step!
2177      * or if we did random steps!
2178      */
2179
2180     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2181       steps_accepted++;
2182
2183       /* Test whether the convergence criterion is met...  */
2184       bDone = (s_try->fmax < inputrec->em_tol);
2185
2186       /* Copy the arrays for force, positions and energy  */
2187       /* The 'Min' array always holds the coords and forces of the minimal
2188          sampled energy  */
2189       swap_em_state(s_min,s_try);
2190       if (count > 0)
2191         ustep *= 1.2;
2192
2193       /* Write to trn, if necessary */
2194       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2195       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2196       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2197                     top_global,inputrec,count,
2198                     s_min,state_global,f_global);
2199     }
2200     else {
2201       /* If energy is not smaller make the step smaller...  */
2202       ustep *= 0.5;
2203
2204       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2205         /* Reload the old state */
2206         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2207                                s_min,top,mdatoms,fr,vsite,constr,
2208                                nrnb,wcycle);
2209       }
2210     }
2211
2212     /* Determine new step  */
2213     stepsize = ustep/s_min->fmax;
2214
2215     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2216 #ifdef GMX_DOUBLE
2217         if (count == nsteps || ustep < 1e-12)
2218 #else
2219         if (count == nsteps || ustep < 1e-6)
2220 #endif
2221         {
2222             if (MASTER(cr))
2223             {
2224                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2225                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2226             }
2227             bAbort=TRUE;
2228         }
2229
2230     count++;
2231   } /* End of the loop  */
2232
2233     /* Print some shit...  */
2234   if (MASTER(cr))
2235     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2236   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2237                 top_global,inputrec,count,
2238                 s_min,state_global,f_global);
2239
2240   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2241
2242   if (MASTER(cr)) {
2243     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2244                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2245     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2246                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2247   }
2248
2249   finish_em(fplog,cr,outf,runtime,wcycle);
2250
2251   /* To print the actual number of steps we needed somewhere */
2252   inputrec->nsteps=count;
2253
2254   runtime->nsteps_done = count;
2255
2256   return 0;
2257 } /* That's all folks */
2258
2259
2260 double do_nm(FILE *fplog,t_commrec *cr,
2261              int nfile,const t_filenm fnm[],
2262              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2263              int nstglobalcomm,
2264              gmx_vsite_t *vsite,gmx_constr_t constr,
2265              int stepout,
2266              t_inputrec *inputrec,
2267              gmx_mtop_t *top_global,t_fcdata *fcd,
2268              t_state *state_global,
2269              t_mdatoms *mdatoms,
2270              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2271              gmx_edsam_t ed,
2272              t_forcerec *fr,
2273              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2274              gmx_membed_t membed,
2275              real cpt_period,real max_hours,
2276              const char *deviceOptions,
2277              unsigned long Flags,
2278              gmx_runtime_t *runtime)
2279 {
2280     const char *NM = "Normal Mode Analysis";
2281     gmx_mdoutf_t *outf;
2282     int        natoms,atom,d;
2283     int        nnodes,node;
2284     rvec       *f_global;
2285     gmx_localtop_t *top;
2286     gmx_enerdata_t *enerd;
2287     rvec       *f;
2288     gmx_global_stat_t gstat;
2289     t_graph    *graph;
2290     real       t,t0,lambda,lam0;
2291     gmx_bool       bNS;
2292     tensor     vir,pres;
2293     rvec       mu_tot;
2294     rvec       *fneg,*dfdx;
2295     gmx_bool       bSparse; /* use sparse matrix storage format */
2296     size_t     sz;
2297     gmx_sparsematrix_t * sparse_matrix = NULL;
2298     real *     full_matrix             = NULL;
2299     em_state_t *   state_work;
2300
2301     /* added with respect to mdrun */
2302     int        i,j,k,row,col;
2303     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2304     real       x_min;
2305     real       fnorm,fmax;
2306
2307     if (constr != NULL)
2308     {
2309         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2310     }
2311
2312     state_work = init_em_state();
2313
2314     /* Init em and store the local state in state_minimum */
2315     init_em(fplog,NM,cr,inputrec,
2316             state_global,top_global,state_work,&top,
2317             &f,&f_global,
2318             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2319             nfile,fnm,&outf,NULL);
2320
2321     natoms = top_global->natoms;
2322     snew(fneg,natoms);
2323     snew(dfdx,natoms);
2324
2325 #ifndef GMX_DOUBLE
2326     if (MASTER(cr))
2327     {
2328         fprintf(stderr,
2329                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2330                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2331                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2332                 "      are fairly modest even if you recompile in double precision.\n\n");
2333     }
2334 #endif
2335
2336     /* Check if we can/should use sparse storage format.
2337      *
2338      * Sparse format is only useful when the Hessian itself is sparse, which it
2339       * will be when we use a cutoff.
2340       * For small systems (n<1000) it is easier to always use full matrix format, though.
2341       */
2342     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2343     {
2344         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2345         bSparse = FALSE;
2346     }
2347     else if(top_global->natoms < 1000)
2348     {
2349         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2350         bSparse = FALSE;
2351     }
2352     else
2353     {
2354         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2355         bSparse = TRUE;
2356     }
2357
2358     sz = DIM*top_global->natoms;
2359
2360     fprintf(stderr,"Allocating Hessian memory...\n\n");
2361
2362     if(bSparse)
2363     {
2364         sparse_matrix=gmx_sparsematrix_init(sz);
2365         sparse_matrix->compressed_symmetric = TRUE;
2366     }
2367     else
2368     {
2369         snew(full_matrix,sz*sz);
2370     }
2371
2372     /* Initial values */
2373     t0           = inputrec->init_t;
2374     lam0         = inputrec->fepvals->init_lambda;
2375     t            = t0;
2376     lambda       = lam0;
2377
2378     init_nrnb(nrnb);
2379
2380     where();
2381
2382     /* Write start time and temperature */
2383     print_em_start(fplog,cr,runtime,wcycle,NM);
2384
2385     /* fudge nr of steps to nr of atoms */
2386     inputrec->nsteps = natoms*2;
2387
2388     if (MASTER(cr))
2389     {
2390         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2391                 *(top_global->name),(int)inputrec->nsteps);
2392     }
2393
2394     nnodes = cr->nnodes;
2395
2396     /* Make evaluate_energy do a single node force calculation */
2397     cr->nnodes = 1;
2398     evaluate_energy(fplog,bVerbose,cr,
2399                     state_global,top_global,state_work,top,
2400                     inputrec,nrnb,wcycle,gstat,
2401                     vsite,constr,fcd,graph,mdatoms,fr,
2402                     mu_tot,enerd,vir,pres,-1,TRUE);
2403     cr->nnodes = nnodes;
2404
2405     /* if forces are not small, warn user */
2406     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2407
2408     if (MASTER(cr))
2409     {
2410         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2411         if (state_work->fmax > 1.0e-3)
2412         {
2413             fprintf(stderr,"Maximum force probably not small enough to");
2414             fprintf(stderr," ensure that you are in an \nenergy well. ");
2415             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2416             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2417         }
2418     }
2419
2420     /***********************************************************
2421      *
2422      *      Loop over all pairs in matrix
2423      *
2424      *      do_force called twice. Once with positive and
2425      *      once with negative displacement
2426      *
2427      ************************************************************/
2428
2429     /* Steps are divided one by one over the nodes */
2430     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2431     {
2432
2433         for (d=0; d<DIM; d++)
2434         {
2435             x_min = state_work->s.x[atom][d];
2436
2437             state_work->s.x[atom][d] = x_min - der_range;
2438
2439             /* Make evaluate_energy do a single node force calculation */
2440             cr->nnodes = 1;
2441             evaluate_energy(fplog,bVerbose,cr,
2442                             state_global,top_global,state_work,top,
2443                             inputrec,nrnb,wcycle,gstat,
2444                             vsite,constr,fcd,graph,mdatoms,fr,
2445                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2446
2447             for(i=0; i<natoms; i++)
2448             {
2449                 copy_rvec(state_work->f[i], fneg[i]);
2450             }
2451
2452             state_work->s.x[atom][d] = x_min + der_range;
2453
2454             evaluate_energy(fplog,bVerbose,cr,
2455                             state_global,top_global,state_work,top,
2456                             inputrec,nrnb,wcycle,gstat,
2457                             vsite,constr,fcd,graph,mdatoms,fr,
2458                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2459             cr->nnodes = nnodes;
2460
2461             /* x is restored to original */
2462             state_work->s.x[atom][d] = x_min;
2463
2464             for(j=0; j<natoms; j++)
2465             {
2466                 for (k=0; (k<DIM); k++)
2467                 {
2468                     dfdx[j][k] =
2469                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2470                 }
2471             }
2472
2473             if (!MASTER(cr))
2474             {
2475 #ifdef GMX_MPI
2476 #ifdef GMX_DOUBLE
2477 #define mpi_type MPI_DOUBLE
2478 #else
2479 #define mpi_type MPI_FLOAT
2480 #endif
2481                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2482                          cr->mpi_comm_mygroup);
2483 #endif
2484             }
2485             else
2486             {
2487                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2488                 {
2489                     if (node > 0)
2490                     {
2491 #ifdef GMX_MPI
2492                         MPI_Status stat;
2493                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2494                                  cr->mpi_comm_mygroup,&stat);
2495 #undef mpi_type
2496 #endif
2497                     }
2498
2499                     row = (atom + node)*DIM + d;
2500
2501                     for(j=0; j<natoms; j++)
2502                     {
2503                         for(k=0; k<DIM; k++)
2504                         {
2505                             col = j*DIM + k;
2506
2507                             if (bSparse)
2508                             {
2509                                 if (col >= row && dfdx[j][k] != 0.0)
2510                                 {
2511                                     gmx_sparsematrix_increment_value(sparse_matrix,
2512                                                                      row,col,dfdx[j][k]);
2513                                 }
2514                             }
2515                             else
2516                             {
2517                                 full_matrix[row*sz+col] = dfdx[j][k];
2518                             }
2519                         }
2520                     }
2521                 }
2522             }
2523
2524             if (bVerbose && fplog)
2525             {
2526                 fflush(fplog);
2527             }
2528         }
2529         /* write progress */
2530         if (MASTER(cr) && bVerbose)
2531         {
2532             fprintf(stderr,"\rFinished step %d out of %d",
2533                     min(atom+nnodes,natoms),natoms);
2534             fflush(stderr);
2535         }
2536     }
2537
2538     if (MASTER(cr))
2539     {
2540         fprintf(stderr,"\n\nWriting Hessian...\n");
2541         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2542     }
2543
2544     finish_em(fplog,cr,outf,runtime,wcycle);
2545
2546     runtime->nsteps_done = natoms*2;
2547
2548     return 0;
2549 }
2550