src/mdlib/minimize.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <string.h>
  43 #include <time.h>
  44 #include <math.h>
  45 #include "sysstuff.h"
  46 #include "string2.h"
  47 #include "network.h"
  48 #include "confio.h"
  49 #include "copyrite.h"
  50 #include "smalloc.h"
  51 #include "nrnb.h"
  52 #include "main.h"
  53 #include "force.h"
  54 #include "macros.h"
  55 #include "random.h"
  56 #include "names.h"
  57 #include "gmx_fatal.h"
  58 #include "txtdump.h"
  59 #include "typedefs.h"
  60 #include "update.h"
  61 #include "constr.h"
  62 #include "vec.h"
  63 #include "statutil.h"
  64 #include "tgroup.h"
  65 #include "mdebin.h"
  66 #include "vsite.h"
  67 #include "force.h"
  68 #include "mdrun.h"
  69 #include "md_support.h"
  70 #include "domdec.h"
  71 #include "partdec.h"
  72 #include "trnio.h"
  73 #include "sparsematrix.h"
  74 #include "mtxio.h"
  75 #include "mdatoms.h"
  76 #include "ns.h"
  77 #include "gmx_wallcycle.h"
  78 #include "mtop_util.h"
  79 #include "gmxfio.h"
  80 #include "pme.h"
  81 #include "bondf.h"
  82 #include "gmx_omp_nthreads.h"
  83
  84
  85 typedef struct {
  86   t_state s;
  87   rvec    *f;
  88   real    epot;
  89   real    fnorm;
  90   real    fmax;
  91   int     a_fmax;
  92 } em_state_t;
  93
  94 static em_state_t *init_em_state()
  95 {
  96   em_state_t *ems;
  97
  98   snew(ems,1);
  99
 100   /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
 101   snew(ems->s.lambda,efptNR);
 102
 103   return ems;
 104 }
 105
 106 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 107                            gmx_wallcycle_t wcycle,
 108                            const char *name)
 109 {
 110     char buf[STRLEN];
 111
 112     runtime_start(runtime);
 113
 114     sprintf(buf,"Started %s",name);
 115     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 116
 117     wallcycle_start(wcycle,ewcRUN);
 118 }
 119 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 120                         gmx_wallcycle_t wcycle)
 121 {
 122     wallcycle_stop(wcycle,ewcRUN);
 123
 124     runtime_end(runtime);
 125 }
 126
 127 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 128 {
 129     fprintf(out,"\n");
 130     fprintf(out,"%s:\n",minimizer);
 131     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 132     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 133 }
 134
 135 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 136 {
 137     char buffer[2048];
 138     if (bLastStep)
 139     {
 140         sprintf(buffer,
 141                 "\nEnergy minimization reached the maximum number"
 142                 "of steps before the forces reached the requested"
 143                 "precision Fmax < %g.\n",ftol);
 144     }
 145     else
 146     {
 147         sprintf(buffer,
 148                 "\nEnergy minimization has stopped, but the forces have"
 149                 "not converged to the requested precision Fmax < %g (which"
 150                 "may not be possible for your system). It stopped"
 151                 "because the algorithm tried to make a new step whose size"
 152                 "was too small, or there was no change in the energy since"
 153                 "last step. Either way, we regard the minimization as"
 154                 "converged to within the available machine precision,"
 155                 "given your starting configuration and EM parameters.\n%s%s",
 156                 ftol,
 157                 sizeof(real)<sizeof(double) ?
 158                 "\nDouble precision normally gives you higher accuracy, but"
 159                 "this is often not needed for preparing to run molecular"
 160                 "dynamics.\n" :
 161                 "",
 162                 bConstrain ?
 163                 "You might need to increase your constraint accuracy, or turn\n"
 164                 "off constraints altogether (set constraints = none in mdp file)\n" :
 165                 "");
 166     }
 167     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 168 }
 169
 170
 171
 172 static void print_converged(FILE *fp,const char *alg,real ftol,
 173                             gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 174                             real epot,real fmax, int nfmax, real fnorm)
 175 {
 176   char buf[STEPSTRSIZE];
 177
 178   if (bDone)
 179     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 180             alg,ftol,gmx_step_str(count,buf));
 181   else if(count<nsteps)
 182     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 183                "but did not reach the requested Fmax < %g.\n",
 184             alg,gmx_step_str(count,buf),ftol);
 185   else
 186     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 187             alg,ftol,gmx_step_str(count,buf));
 188
 189 #ifdef GMX_DOUBLE
 190   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 191   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 192   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 193 #else
 194   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 195   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 196   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 197 #endif
 198 }
 199
 200 static void get_f_norm_max(t_commrec *cr,
 201                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 202                            real *fnorm,real *fmax,int *a_fmax)
 203 {
 204   double fnorm2,*sum;
 205   real fmax2,fmax2_0,fam;
 206   int  la_max,a_max,start,end,i,m,gf;
 207
 208   /* This routine finds the largest force and returns it.
 209    * On parallel machines the global max is taken.
 210    */
 211   fnorm2 = 0;
 212   fmax2 = 0;
 213   la_max = -1;
 214   gf = 0;
 215   start = mdatoms->start;
 216   end   = mdatoms->homenr + start;
 217   if (mdatoms->cFREEZE) {
 218     for(i=start; i<end; i++) {
 219       gf = mdatoms->cFREEZE[i];
 220       fam = 0;
 221       for(m=0; m<DIM; m++)
 222         if (!opts->nFreeze[gf][m])
 223           fam += sqr(f[i][m]);
 224       fnorm2 += fam;
 225       if (fam > fmax2) {
 226         fmax2  = fam;
 227         la_max = i;
 228       }
 229     }
 230   } else {
 231     for(i=start; i<end; i++) {
 232       fam = norm2(f[i]);
 233       fnorm2 += fam;
 234       if (fam > fmax2) {
 235         fmax2  = fam;
 236         la_max = i;
 237       }
 238     }
 239   }
 240
 241   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 242     a_max = cr->dd->gatindex[la_max];
 243   } else {
 244     a_max = la_max;
 245   }
 246   if (PAR(cr)) {
 247     snew(sum,2*cr->nnodes+1);
 248     sum[2*cr->nodeid]   = fmax2;
 249     sum[2*cr->nodeid+1] = a_max;
 250     sum[2*cr->nnodes]   = fnorm2;
 251     gmx_sumd(2*cr->nnodes+1,sum,cr);
 252     fnorm2 = sum[2*cr->nnodes];
 253     /* Determine the global maximum */
 254     for(i=0; i<cr->nnodes; i++) {
 255       if (sum[2*i] > fmax2) {
 256         fmax2 = sum[2*i];
 257         a_max = (int)(sum[2*i+1] + 0.5);
 258       }
 259     }
 260     sfree(sum);
 261   }
 262
 263   if (fnorm)
 264     *fnorm = sqrt(fnorm2);
 265   if (fmax)
 266     *fmax  = sqrt(fmax2);
 267   if (a_fmax)
 268     *a_fmax = a_max;
 269 }
 270
 271 static void get_state_f_norm_max(t_commrec *cr,
 272                            t_grpopts *opts,t_mdatoms *mdatoms,
 273                            em_state_t *ems)
 274 {
 275   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 276 }
 277
 278 void init_em(FILE *fplog,const char *title,
 279              t_commrec *cr,t_inputrec *ir,
 280              t_state *state_global,gmx_mtop_t *top_global,
 281              em_state_t *ems,gmx_localtop_t **top,
 282              rvec **f,rvec **f_global,
 283              t_nrnb *nrnb,rvec mu_tot,
 284              t_forcerec *fr,gmx_enerdata_t **enerd,
 285              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 286              gmx_vsite_t *vsite,gmx_constr_t constr,
 287              int nfile,const t_filenm fnm[],
 288              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 289 {
 290     int  start,homenr,i;
 291     real dvdlambda;
 292
 293     if (fplog)
 294     {
 295         fprintf(fplog,"Initiating %s\n",title);
 296     }
 297
 298     state_global->ngtc = 0;
 299
 300     /* Initialize lambda variables */
 301     initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
 302
 303     init_nrnb(nrnb);
 304
 305     if (DOMAINDECOMP(cr))
 306     {
 307         *top = dd_init_local_top(top_global);
 308
 309         dd_init_local_state(cr->dd,state_global,&ems->s);
 310
 311         *f = NULL;
 312
 313         /* Distribute the charge groups over the nodes from the master node */
 314         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 315                             state_global,top_global,ir,
 316                             &ems->s,&ems->f,mdatoms,*top,
 317                             fr,vsite,NULL,constr,
 318                             nrnb,NULL,FALSE);
 319         dd_store_state(cr->dd,&ems->s);
 320
 321         if (ir->nstfout)
 322         {
 323             snew(*f_global,top_global->natoms);
 324         }
 325         else
 326         {
 327             *f_global = NULL;
 328         }
 329         *graph = NULL;
 330     }
 331     else
 332     {
 333         snew(*f,top_global->natoms);
 334
 335         /* Just copy the state */
 336         ems->s = *state_global;
 337         snew(ems->s.x,ems->s.nalloc);
 338         snew(ems->f,ems->s.nalloc);
 339         for(i=0; i<state_global->natoms; i++)
 340         {
 341             copy_rvec(state_global->x[i],ems->s.x[i]);
 342         }
 343         copy_mat(state_global->box,ems->s.box);
 344
 345         if (PAR(cr) && ir->eI != eiNM)
 346         {
 347             /* Initialize the particle decomposition and split the topology */
 348             *top = split_system(fplog,top_global,ir,cr);
 349
 350             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 351         }
 352         else
 353         {
 354             *top = gmx_mtop_generate_local_top(top_global,ir);
 355         }
 356         *f_global = *f;
 357
 358         forcerec_set_excl_load(fr,*top,cr);
 359
 360         init_bonded_thread_force_reduction(fr,&(*top)->idef);
 361
 362         if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 363         {
 364             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 365         }
 366         else
 367         {
 368             *graph = NULL;
 369         }
 370
 371         if (PARTDECOMP(cr))
 372         {
 373             pd_at_range(cr,&start,&homenr);
 374             homenr -= start;
 375         }
 376         else
 377         {
 378             start  = 0;
 379             homenr = top_global->natoms;
 380         }
 381         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 382         update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
 383
 384         if (vsite)
 385         {
 386             set_vsite_top(vsite,*top,mdatoms,cr);
 387         }
 388     }
 389
 390     if (constr)
 391     {
 392         if (ir->eConstrAlg == econtSHAKE &&
 393             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 394         {
 395             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 396                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 397         }
 398
 399         if (!DOMAINDECOMP(cr))
 400         {
 401             set_constraints(constr,*top,ir,mdatoms,cr);
 402         }
 403
 404         if (!ir->bContinuation)
 405         {
 406             /* Constrain the starting coordinates */
 407             dvdlambda=0;
 408             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 409                       ir,NULL,cr,-1,0,mdatoms,
 410                       ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
 411                       ems->s.lambda[efptFEP],&dvdlambda,
 412                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 413         }
 414     }
 415
 416     if (PAR(cr))
 417     {
 418         *gstat = global_stat_init(ir);
 419     }
 420
 421     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 422
 423     snew(*enerd,1);
 424     init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 425                   *enerd);
 426
 427     if (mdebin != NULL)
 428     {
 429         /* Init bin for energy stuff */
 430         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 431     }
 432
 433     clear_rvec(mu_tot);
 434     calc_shifts(ems->s.box,fr->shift_vec);
 435 }
 436
 437 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 438                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 439 {
 440   if (!(cr->duty & DUTY_PME)) {
 441     /* Tell the PME only node to finish */
 442     gmx_pme_send_finish(cr);
 443   }
 444
 445   done_mdoutf(outf);
 446
 447   em_time_end(fplog,cr,runtime,wcycle);
 448 }
 449
 450 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 451 {
 452   em_state_t tmp;
 453
 454   tmp   = *ems1;
 455   *ems1 = *ems2;
 456   *ems2 = tmp;
 457 }
 458
 459 static void copy_em_coords(em_state_t *ems,t_state *state)
 460 {
 461     int i;
 462
 463     for(i=0; (i<state->natoms); i++)
 464     {
 465         copy_rvec(ems->s.x[i],state->x[i]);
 466     }
 467 }
 468
 469 static void write_em_traj(FILE *fplog,t_commrec *cr,
 470                           gmx_mdoutf_t *outf,
 471                           gmx_bool bX,gmx_bool bF,const char *confout,
 472                           gmx_mtop_t *top_global,
 473                           t_inputrec *ir,gmx_large_int_t step,
 474                           em_state_t *state,
 475                           t_state *state_global,rvec *f_global)
 476 {
 477     int mdof_flags;
 478
 479     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 480     {
 481         copy_em_coords(state,state_global);
 482         f_global = state->f;
 483     }
 484
 485     mdof_flags = 0;
 486     if (bX) { mdof_flags |= MDOF_X; }
 487     if (bF) { mdof_flags |= MDOF_F; }
 488     write_traj(fplog,cr,outf,mdof_flags,
 489                top_global,step,(double)step,
 490                &state->s,state_global,state->f,f_global,NULL,NULL);
 491
 492     if (confout != NULL && MASTER(cr))
 493     {
 494         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 495         {
 496             /* Make molecules whole only for confout writing */
 497             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 498                         state_global->x);
 499         }
 500
 501         write_sto_conf_mtop(confout,
 502                             *top_global->name,top_global,
 503                             state_global->x,NULL,ir->ePBC,state_global->box);
 504     }
 505 }
 506
 507 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 508                        gmx_bool bMolPBC,
 509                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 510                        gmx_constr_t constr,gmx_localtop_t *top,
 511                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 512                        gmx_large_int_t count)
 513
 514 {
 515     t_state *s1,*s2;
 516     int  i;
 517     int  start,end;
 518     rvec *x1,*x2;
 519     real dvdlambda;
 520
 521     s1 = &ems1->s;
 522     s2 = &ems2->s;
 523
 524     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 525     {
 526         gmx_incons("state mismatch in do_em_step");
 527     }
 528
 529     s2->flags = s1->flags;
 530
 531     if (s2->nalloc != s1->nalloc)
 532     {
 533         s2->nalloc = s1->nalloc;
 534         srenew(s2->x,s1->nalloc);
 535         srenew(ems2->f,  s1->nalloc);
 536         if (s2->flags & (1<<estCGP))
 537         {
 538             srenew(s2->cg_p,  s1->nalloc);
 539         }
 540     }
 541
 542     s2->natoms = s1->natoms;
 543     copy_mat(s1->box,s2->box);
 544     /* Copy free energy state */
 545     for (i=0;i<efptNR;i++)
 546     {
 547         s2->lambda[i] = s1->lambda[i];
 548     }
 549     copy_mat(s1->box,s2->box);
 550
 551     start = md->start;
 552     end   = md->start + md->homenr;
 553
 554     x1 = s1->x;
 555     x2 = s2->x;
 556
 557 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
 558     {
 559         int gf,i,m;
 560
 561         gf = 0;
 562 #pragma omp for schedule(static) nowait
 563         for(i=start; i<end; i++)
 564         {
 565             if (md->cFREEZE)
 566             {
 567                 gf = md->cFREEZE[i];
 568             }
 569             for(m=0; m<DIM; m++)
 570             {
 571                 if (ir->opts.nFreeze[gf][m])
 572                 {
 573                     x2[i][m] = x1[i][m];
 574                 }
 575                 else
 576                 {
 577                     x2[i][m] = x1[i][m] + a*f[i][m];
 578                 }
 579             }
 580         }
 581
 582         if (s2->flags & (1<<estCGP))
 583         {
 584             /* Copy the CG p vector */
 585             x1 = s1->cg_p;
 586             x2 = s2->cg_p;
 587 #pragma omp for schedule(static) nowait
 588             for(i=start; i<end; i++)
 589             {
 590                 copy_rvec(x1[i],x2[i]);
 591             }
 592         }
 593
 594         if (DOMAINDECOMP(cr))
 595         {
 596             s2->ddp_count = s1->ddp_count;
 597             if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
 598             {
 599 #pragma omp barrier
 600                 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 601                 srenew(s2->cg_gl,s2->cg_gl_nalloc);
 602 #pragma omp barrier
 603             }
 604             s2->ncg_gl = s1->ncg_gl;
 605 #pragma omp for schedule(static) nowait
 606             for(i=0; i<s2->ncg_gl; i++)
 607             {
 608                 s2->cg_gl[i] = s1->cg_gl[i];
 609             }
 610             s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 611         }
 612     }
 613
 614     if (constr)
 615     {
 616         wallcycle_start(wcycle,ewcCONSTR);
 617         dvdlambda = 0;
 618         constrain(NULL,TRUE,TRUE,constr,&top->idef,
 619                   ir,NULL,cr,count,0,md,
 620                   s1->x,s2->x,NULL,bMolPBC,s2->box,
 621                   s2->lambda[efptBONDED],&dvdlambda,
 622                   NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 623         wallcycle_stop(wcycle,ewcCONSTR);
 624     }
 625 }
 626
 627 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 628                                    gmx_mtop_t *top_global,t_inputrec *ir,
 629                                    em_state_t *ems,gmx_localtop_t *top,
 630                                    t_mdatoms *mdatoms,t_forcerec *fr,
 631                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 632                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 633 {
 634     /* Repartition the domain decomposition */
 635     wallcycle_start(wcycle,ewcDOMDEC);
 636     dd_partition_system(fplog,step,cr,FALSE,1,
 637                         NULL,top_global,ir,
 638                         &ems->s,&ems->f,
 639                         mdatoms,top,fr,vsite,NULL,constr,
 640                         nrnb,wcycle,FALSE);
 641     dd_store_state(cr->dd,&ems->s);
 642     wallcycle_stop(wcycle,ewcDOMDEC);
 643 }
 644
 645 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 646                             t_state *state_global,gmx_mtop_t *top_global,
 647                             em_state_t *ems,gmx_localtop_t *top,
 648                             t_inputrec *inputrec,
 649                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 650                             gmx_global_stat_t gstat,
 651                             gmx_vsite_t *vsite,gmx_constr_t constr,
 652                             t_fcdata *fcd,
 653                             t_graph *graph,t_mdatoms *mdatoms,
 654                             t_forcerec *fr,rvec mu_tot,
 655                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 656                             gmx_large_int_t count,gmx_bool bFirst)
 657 {
 658   real t;
 659   gmx_bool bNS;
 660   int  nabnsb;
 661   tensor force_vir,shake_vir,ekin;
 662   real dvdlambda,prescorr,enercorr,dvdlcorr;
 663   real terminate=0;
 664
 665   /* Set the time to the initial time, the time does not change during EM */
 666   t = inputrec->init_t;
 667
 668   if (bFirst ||
 669       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 670     /* This the first state or an old state used before the last ns */
 671     bNS = TRUE;
 672   } else {
 673     bNS = FALSE;
 674     if (inputrec->nstlist > 0) {
 675       bNS = TRUE;
 676     } else if (inputrec->nstlist == -1) {
 677       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 678       if (PAR(cr))
 679         gmx_sumi(1,&nabnsb,cr);
 680       bNS = (nabnsb > 0);
 681     }
 682   }
 683
 684   if (vsite)
 685     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 686                      top->idef.iparams,top->idef.il,
 687                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 688
 689   if (DOMAINDECOMP(cr)) {
 690     if (bNS) {
 691       /* Repartition the domain decomposition */
 692       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 693                              ems,top,mdatoms,fr,vsite,constr,
 694                              nrnb,wcycle);
 695     }
 696   }
 697
 698     /* Calc force & energy on new trial position  */
 699     /* do_force always puts the charge groups in the box and shifts again
 700      * We do not unshift, so molecules are always whole in congrad.c
 701      */
 702     do_force(fplog,cr,inputrec,
 703              count,nrnb,wcycle,top,top_global,&top_global->groups,
 704              ems->s.box,ems->s.x,&ems->s.hist,
 705              ems->f,force_vir,mdatoms,enerd,fcd,
 706              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 707              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
 708              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 709              (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 710
 711     /* Clear the unused shake virial and pressure */
 712     clear_mat(shake_vir);
 713     clear_mat(pres);
 714
 715     /* Communicate stuff when parallel */
 716     if (PAR(cr) && inputrec->eI != eiNM)
 717     {
 718         wallcycle_start(wcycle,ewcMoveE);
 719
 720         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 721                     inputrec,NULL,NULL,NULL,1,&terminate,
 722                     top_global,&ems->s,FALSE,
 723                     CGLO_ENERGY |
 724                     CGLO_PRESSURE |
 725                     CGLO_CONSTRAINT |
 726                     CGLO_FIRSTITERATE);
 727
 728         wallcycle_stop(wcycle,ewcMoveE);
 729     }
 730
 731     /* Calculate long range corrections to pressure and energy */
 732     calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
 733                   pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 734     enerd->term[F_DISPCORR] = enercorr;
 735     enerd->term[F_EPOT] += enercorr;
 736     enerd->term[F_PRES] += prescorr;
 737     enerd->term[F_DVDL] += dvdlcorr;
 738
 739   ems->epot = enerd->term[F_EPOT];
 740
 741   if (constr) {
 742     /* Project out the constraint components of the force */
 743     wallcycle_start(wcycle,ewcCONSTR);
 744     dvdlambda = 0;
 745     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 746               inputrec,NULL,cr,count,0,mdatoms,
 747               ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
 748               ems->s.lambda[efptBONDED],&dvdlambda,
 749               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 750     if (fr->bSepDVDL && fplog)
 751       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
 752     enerd->term[F_DVDL_BONDED] += dvdlambda;
 753     m_add(force_vir,shake_vir,vir);
 754     wallcycle_stop(wcycle,ewcCONSTR);
 755   } else {
 756     copy_mat(force_vir,vir);
 757   }
 758
 759   clear_mat(ekin);
 760   enerd->term[F_PRES] =
 761     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 762
 763   sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
 764
 765     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 766     {
 767         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 768     }
 769 }
 770
 771 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 772                               gmx_mtop_t *mtop,
 773                               em_state_t *s_min,em_state_t *s_b)
 774 {
 775   rvec *fm,*fb,*fmg;
 776   t_block *cgs_gl;
 777   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 778   double partsum;
 779   unsigned char *grpnrFREEZE;
 780
 781   if (debug)
 782     fprintf(debug,"Doing reorder_partsum\n");
 783
 784   fm = s_min->f;
 785   fb = s_b->f;
 786
 787   cgs_gl = dd_charge_groups_global(cr->dd);
 788   index = cgs_gl->index;
 789
 790   /* Collect fm in a global vector fmg.
 791    * This conflicts with the spirit of domain decomposition,
 792    * but to fully optimize this a much more complicated algorithm is required.
 793    */
 794   snew(fmg,mtop->natoms);
 795
 796   ncg   = s_min->s.ncg_gl;
 797   cg_gl = s_min->s.cg_gl;
 798   i = 0;
 799   for(c=0; c<ncg; c++) {
 800     cg = cg_gl[c];
 801     a0 = index[cg];
 802     a1 = index[cg+1];
 803     for(a=a0; a<a1; a++) {
 804       copy_rvec(fm[i],fmg[a]);
 805       i++;
 806     }
 807   }
 808   gmx_sum(mtop->natoms*3,fmg[0],cr);
 809
 810   /* Now we will determine the part of the sum for the cgs in state s_b */
 811   ncg   = s_b->s.ncg_gl;
 812   cg_gl = s_b->s.cg_gl;
 813   partsum = 0;
 814   i = 0;
 815   gf = 0;
 816   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 817   for(c=0; c<ncg; c++) {
 818     cg = cg_gl[c];
 819     a0 = index[cg];
 820     a1 = index[cg+1];
 821     for(a=a0; a<a1; a++) {
 822       if (mdatoms->cFREEZE && grpnrFREEZE) {
 823         gf = grpnrFREEZE[i];
 824       }
 825       for(m=0; m<DIM; m++) {
 826         if (!opts->nFreeze[gf][m]) {
 827           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 828         }
 829       }
 830       i++;
 831     }
 832   }
 833
 834   sfree(fmg);
 835
 836   return partsum;
 837 }
 838
 839 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 840                     gmx_mtop_t *mtop,
 841                     em_state_t *s_min,em_state_t *s_b)
 842 {
 843   rvec *fm,*fb;
 844   double sum;
 845   int  gf,i,m;
 846
 847   /* This is just the classical Polak-Ribiere calculation of beta;
 848    * it looks a bit complicated since we take freeze groups into account,
 849    * and might have to sum it in parallel runs.
 850    */
 851
 852   if (!DOMAINDECOMP(cr) ||
 853       (s_min->s.ddp_count == cr->dd->ddp_count &&
 854        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 855     fm = s_min->f;
 856     fb = s_b->f;
 857     sum = 0;
 858     gf = 0;
 859     /* This part of code can be incorrect with DD,
 860      * since the atom ordering in s_b and s_min might differ.
 861      */
 862     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 863       if (mdatoms->cFREEZE)
 864         gf = mdatoms->cFREEZE[i];
 865       for(m=0; m<DIM; m++)
 866         if (!opts->nFreeze[gf][m]) {
 867           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 868         }
 869     }
 870   } else {
 871     /* We need to reorder cgs while summing */
 872     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 873   }
 874   if (PAR(cr))
 875     gmx_sumd(1,&sum,cr);
 876
 877   return sum/sqr(s_min->fnorm);
 878 }
 879
 880 double do_cg(FILE *fplog,t_commrec *cr,
 881              int nfile,const t_filenm fnm[],
 882              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 883              int nstglobalcomm,
 884              gmx_vsite_t *vsite,gmx_constr_t constr,
 885              int stepout,
 886              t_inputrec *inputrec,
 887              gmx_mtop_t *top_global,t_fcdata *fcd,
 888              t_state *state_global,
 889              t_mdatoms *mdatoms,
 890              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 891              gmx_edsam_t ed,
 892              t_forcerec *fr,
 893              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 894              gmx_membed_t membed,
 895              real cpt_period,real max_hours,
 896              const char *deviceOptions,
 897              unsigned long Flags,
 898              gmx_runtime_t *runtime)
 899 {
 900   const char *CG="Polak-Ribiere Conjugate Gradients";
 901
 902   em_state_t *s_min,*s_a,*s_b,*s_c;
 903   gmx_localtop_t *top;
 904   gmx_enerdata_t *enerd;
 905   rvec   *f;
 906   gmx_global_stat_t gstat;
 907   t_graph    *graph;
 908   rvec   *f_global,*p,*sf,*sfm;
 909   double gpa,gpb,gpc,tmp,sum[2],minstep;
 910   real   fnormn;
 911   real   stepsize;
 912   real   a,b,c,beta=0.0;
 913   real   epot_repl=0;
 914   real   pnorm;
 915   t_mdebin   *mdebin;
 916   gmx_bool   converged,foundlower;
 917   rvec   mu_tot;
 918   gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 919   tensor vir,pres;
 920   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 921   gmx_mdoutf_t *outf;
 922   int    i,m,gf,step,nminstep;
 923   real   terminate=0;
 924
 925   step=0;
 926
 927   s_min = init_em_state();
 928   s_a   = init_em_state();
 929   s_b   = init_em_state();
 930   s_c   = init_em_state();
 931
 932   /* Init em and store the local state in s_min */
 933   init_em(fplog,CG,cr,inputrec,
 934           state_global,top_global,s_min,&top,&f,&f_global,
 935           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 936           nfile,fnm,&outf,&mdebin);
 937
 938   /* Print to log file */
 939   print_em_start(fplog,cr,runtime,wcycle,CG);
 940
 941   /* Max number of steps */
 942   number_steps=inputrec->nsteps;
 943
 944   if (MASTER(cr))
 945     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 946   if (fplog)
 947     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 948
 949   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 950   /* do_force always puts the charge groups in the box and shifts again
 951    * We do not unshift, so molecules are always whole in congrad.c
 952    */
 953   evaluate_energy(fplog,bVerbose,cr,
 954                   state_global,top_global,s_min,top,
 955                   inputrec,nrnb,wcycle,gstat,
 956                   vsite,constr,fcd,graph,mdatoms,fr,
 957                   mu_tot,enerd,vir,pres,-1,TRUE);
 958   where();
 959
 960   if (MASTER(cr)) {
 961     /* Copy stuff to the energy bin for easy printing etc. */
 962     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 963                mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 964                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 965
 966     print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 967     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 968                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 969   }
 970   where();
 971
 972   /* Estimate/guess the initial stepsize */
 973   stepsize = inputrec->em_stepsize/s_min->fnorm;
 974
 975   if (MASTER(cr)) {
 976     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 977             s_min->fmax,s_min->a_fmax+1);
 978     fprintf(stderr,"   F-Norm            = %12.5e\n",
 979             s_min->fnorm/sqrt(state_global->natoms));
 980     fprintf(stderr,"\n");
 981     /* and copy to the log file too... */
 982     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 983             s_min->fmax,s_min->a_fmax+1);
 984     fprintf(fplog,"   F-Norm            = %12.5e\n",
 985             s_min->fnorm/sqrt(state_global->natoms));
 986     fprintf(fplog,"\n");
 987   }
 988   /* Start the loop over CG steps.
 989    * Each successful step is counted, and we continue until
 990    * we either converge or reach the max number of steps.
 991    */
 992   converged = FALSE;
 993   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 994
 995     /* start taking steps in a new direction
 996      * First time we enter the routine, beta=0, and the direction is
 997      * simply the negative gradient.
 998      */
 999
1000     /* Calculate the new direction in p, and the gradient in this direction, gpa */
1001     p  = s_min->s.cg_p;
1002     sf = s_min->f;
1003     gpa = 0;
1004     gf = 0;
1005     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1006       if (mdatoms->cFREEZE)
1007         gf = mdatoms->cFREEZE[i];
1008       for(m=0; m<DIM; m++) {
1009         if (!inputrec->opts.nFreeze[gf][m]) {
1010           p[i][m] = sf[i][m] + beta*p[i][m];
1011           gpa -= p[i][m]*sf[i][m];
1012           /* f is negative gradient, thus the sign */
1013         } else {
1014           p[i][m] = 0;
1015         }
1016       }
1017     }
1018
1019     /* Sum the gradient along the line across CPUs */
1020     if (PAR(cr))
1021       gmx_sumd(1,&gpa,cr);
1022
1023     /* Calculate the norm of the search vector */
1024     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1025
1026     /* Just in case stepsize reaches zero due to numerical precision... */
1027     if(stepsize<=0)
1028       stepsize = inputrec->em_stepsize/pnorm;
1029
1030     /*
1031      * Double check the value of the derivative in the search direction.
1032      * If it is positive it must be due to the old information in the
1033      * CG formula, so just remove that and start over with beta=0.
1034      * This corresponds to a steepest descent step.
1035      */
1036     if(gpa>0) {
1037       beta = 0;
1038       step--; /* Don't count this step since we are restarting */
1039       continue; /* Go back to the beginning of the big for-loop */
1040     }
1041
1042     /* Calculate minimum allowed stepsize, before the average (norm)
1043      * relative change in coordinate is smaller than precision
1044      */
1045     minstep=0;
1046     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1047       for(m=0; m<DIM; m++) {
1048         tmp = fabs(s_min->s.x[i][m]);
1049         if(tmp < 1.0)
1050           tmp = 1.0;
1051         tmp = p[i][m]/tmp;
1052         minstep += tmp*tmp;
1053       }
1054     }
1055     /* Add up from all CPUs */
1056     if(PAR(cr))
1057       gmx_sumd(1,&minstep,cr);
1058
1059     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1060
1061     if(stepsize<minstep) {
1062       converged=TRUE;
1063       break;
1064     }
1065
1066     /* Write coordinates if necessary */
1067     do_x = do_per_step(step,inputrec->nstxout);
1068     do_f = do_per_step(step,inputrec->nstfout);
1069
1070     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1071                   top_global,inputrec,step,
1072                   s_min,state_global,f_global);
1073
1074     /* Take a step downhill.
1075      * In theory, we should minimize the function along this direction.
1076      * That is quite possible, but it turns out to take 5-10 function evaluations
1077      * for each line. However, we dont really need to find the exact minimum -
1078      * it is much better to start a new CG step in a modified direction as soon
1079      * as we are close to it. This will save a lot of energy evaluations.
1080      *
1081      * In practice, we just try to take a single step.
1082      * If it worked (i.e. lowered the energy), we increase the stepsize but
1083      * the continue straight to the next CG step without trying to find any minimum.
1084      * If it didn't work (higher energy), there must be a minimum somewhere between
1085      * the old position and the new one.
1086      *
1087      * Due to the finite numerical accuracy, it turns out that it is a good idea
1088      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1089      * This leads to lower final energies in the tests I've done. / Erik
1090      */
1091     s_a->epot = s_min->epot;
1092     a = 0.0;
1093     c = a + stepsize; /* reference position along line is zero */
1094
1095     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1096       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1097                              s_min,top,mdatoms,fr,vsite,constr,
1098                              nrnb,wcycle);
1099     }
1100
1101     /* Take a trial step (new coords in s_c) */
1102     do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1103                constr,top,nrnb,wcycle,-1);
1104
1105     neval++;
1106     /* Calculate energy for the trial step */
1107     evaluate_energy(fplog,bVerbose,cr,
1108                     state_global,top_global,s_c,top,
1109                     inputrec,nrnb,wcycle,gstat,
1110                     vsite,constr,fcd,graph,mdatoms,fr,
1111                     mu_tot,enerd,vir,pres,-1,FALSE);
1112
1113     /* Calc derivative along line */
1114     p  = s_c->s.cg_p;
1115     sf = s_c->f;
1116     gpc=0;
1117     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1118       for(m=0; m<DIM; m++)
1119           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1120     }
1121     /* Sum the gradient along the line across CPUs */
1122     if (PAR(cr))
1123       gmx_sumd(1,&gpc,cr);
1124
1125     /* This is the max amount of increase in energy we tolerate */
1126     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1127
1128     /* Accept the step if the energy is lower, or if it is not significantly higher
1129      * and the line derivative is still negative.
1130      */
1131     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1132       foundlower = TRUE;
1133       /* Great, we found a better energy. Increase step for next iteration
1134        * if we are still going down, decrease it otherwise
1135        */
1136       if(gpc<0)
1137         stepsize *= 1.618034;  /* The golden section */
1138       else
1139         stepsize *= 0.618034;  /* 1/golden section */
1140     } else {
1141       /* New energy is the same or higher. We will have to do some work
1142        * to find a smaller value in the interval. Take smaller step next time!
1143        */
1144       foundlower = FALSE;
1145       stepsize *= 0.618034;
1146     }
1147
1148
1149
1150
1151     /* OK, if we didn't find a lower value we will have to locate one now - there must
1152      * be one in the interval [a=0,c].
1153      * The same thing is valid here, though: Don't spend dozens of iterations to find
1154      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1155      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1156      *
1157      * I also have a safeguard for potentially really patological functions so we never
1158      * take more than 20 steps before we give up ...
1159      *
1160      * If we already found a lower value we just skip this step and continue to the update.
1161      */
1162     if (!foundlower) {
1163       nminstep=0;
1164
1165       do {
1166         /* Select a new trial point.
1167          * If the derivatives at points a & c have different sign we interpolate to zero,
1168          * otherwise just do a bisection.
1169          */
1170         if(gpa<0 && gpc>0)
1171           b = a + gpa*(a-c)/(gpc-gpa);
1172         else
1173           b = 0.5*(a+c);
1174
1175         /* safeguard if interpolation close to machine accuracy causes errors:
1176          * never go outside the interval
1177          */
1178         if(b<=a || b>=c)
1179           b = 0.5*(a+c);
1180
1181         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1182           /* Reload the old state */
1183           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1184                                  s_min,top,mdatoms,fr,vsite,constr,
1185                                  nrnb,wcycle);
1186         }
1187
1188         /* Take a trial step to this new point - new coords in s_b */
1189         do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1190                constr,top,nrnb,wcycle,-1);
1191
1192         neval++;
1193         /* Calculate energy for the trial step */
1194         evaluate_energy(fplog,bVerbose,cr,
1195                         state_global,top_global,s_b,top,
1196                         inputrec,nrnb,wcycle,gstat,
1197                         vsite,constr,fcd,graph,mdatoms,fr,
1198                         mu_tot,enerd,vir,pres,-1,FALSE);
1199
1200         /* p does not change within a step, but since the domain decomposition
1201          * might change, we have to use cg_p of s_b here.
1202          */
1203         p  = s_b->s.cg_p;
1204         sf = s_b->f;
1205         gpb=0;
1206         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1207           for(m=0; m<DIM; m++)
1208               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1209         }
1210         /* Sum the gradient along the line across CPUs */
1211         if (PAR(cr))
1212           gmx_sumd(1,&gpb,cr);
1213
1214         if (debug)
1215           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1216                   s_a->epot,s_b->epot,s_c->epot,gpb);
1217
1218         epot_repl = s_b->epot;
1219
1220         /* Keep one of the intervals based on the value of the derivative at the new point */
1221         if (gpb > 0) {
1222           /* Replace c endpoint with b */
1223           swap_em_state(s_b,s_c);
1224           c = b;
1225           gpc = gpb;
1226         } else {
1227           /* Replace a endpoint with b */
1228           swap_em_state(s_b,s_a);
1229           a = b;
1230           gpa = gpb;
1231         }
1232
1233         /*
1234          * Stop search as soon as we find a value smaller than the endpoints.
1235          * Never run more than 20 steps, no matter what.
1236          */
1237         nminstep++;
1238       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1239                (nminstep < 20));
1240
1241       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1242           nminstep >= 20) {
1243         /* OK. We couldn't find a significantly lower energy.
1244          * If beta==0 this was steepest descent, and then we give up.
1245          * If not, set beta=0 and restart with steepest descent before quitting.
1246          */
1247         if (beta == 0.0) {
1248           /* Converged */
1249           converged = TRUE;
1250           break;
1251         } else {
1252           /* Reset memory before giving up */
1253           beta = 0.0;
1254           continue;
1255         }
1256       }
1257
1258       /* Select min energy state of A & C, put the best in B.
1259        */
1260       if (s_c->epot < s_a->epot) {
1261         if (debug)
1262           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1263                   s_c->epot,s_a->epot);
1264         swap_em_state(s_b,s_c);
1265         gpb = gpc;
1266         b = c;
1267       } else {
1268         if (debug)
1269           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1270                   s_a->epot,s_c->epot);
1271         swap_em_state(s_b,s_a);
1272         gpb = gpa;
1273         b = a;
1274       }
1275
1276     } else {
1277       if (debug)
1278         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1279                 s_c->epot);
1280       swap_em_state(s_b,s_c);
1281       gpb = gpc;
1282       b = c;
1283     }
1284
1285     /* new search direction */
1286     /* beta = 0 means forget all memory and restart with steepest descents. */
1287     if (nstcg && ((step % nstcg)==0))
1288       beta = 0.0;
1289     else {
1290       /* s_min->fnorm cannot be zero, because then we would have converged
1291        * and broken out.
1292        */
1293
1294       /* Polak-Ribiere update.
1295        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1296        */
1297       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1298     }
1299     /* Limit beta to prevent oscillations */
1300     if (fabs(beta) > 5.0)
1301       beta = 0.0;
1302
1303
1304     /* update positions */
1305     swap_em_state(s_min,s_b);
1306     gpa = gpb;
1307
1308     /* Print it if necessary */
1309     if (MASTER(cr)) {
1310       if(bVerbose)
1311         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1312                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1313                 s_min->fmax,s_min->a_fmax+1);
1314       /* Store the new (lower) energies */
1315       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1316                  mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1317                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1318
1319       do_log = do_per_step(step,inputrec->nstlog);
1320       do_ene = do_per_step(step,inputrec->nstenergy);
1321       if(do_log)
1322           print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1323       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1324                  do_log ? fplog : NULL,step,step,eprNORMAL,
1325                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1326     }
1327
1328     /* Stop when the maximum force lies below tolerance.
1329      * If we have reached machine precision, converged is already set to true.
1330      */
1331     converged = converged || (s_min->fmax < inputrec->em_tol);
1332
1333   } /* End of the loop */
1334
1335   if (converged)
1336     step--; /* we never took that last step in this case */
1337
1338     if (s_min->fmax > inputrec->em_tol)
1339     {
1340         if (MASTER(cr))
1341         {
1342             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1343             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1344         }
1345         converged = FALSE;
1346     }
1347
1348   if (MASTER(cr)) {
1349     /* If we printed energy and/or logfile last step (which was the last step)
1350      * we don't have to do it again, but otherwise print the final values.
1351      */
1352     if(!do_log) {
1353       /* Write final value to log since we didn't do anything the last step */
1354       print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1355     }
1356     if (!do_ene || !do_log) {
1357       /* Write final energy file entries */
1358       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1359                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1360                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1361     }
1362   }
1363
1364   /* Print some stuff... */
1365   if (MASTER(cr))
1366     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1367
1368   /* IMPORTANT!
1369    * For accurate normal mode calculation it is imperative that we
1370    * store the last conformation into the full precision binary trajectory.
1371    *
1372    * However, we should only do it if we did NOT already write this step
1373    * above (which we did if do_x or do_f was true).
1374    */
1375   do_x = !do_per_step(step,inputrec->nstxout);
1376   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1377
1378   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1379                 top_global,inputrec,step,
1380                 s_min,state_global,f_global);
1381
1382   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1383
1384   if (MASTER(cr)) {
1385     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1386                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1387     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1388                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1389
1390     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1391   }
1392
1393   finish_em(fplog,cr,outf,runtime,wcycle);
1394
1395   /* To print the actual number of steps we needed somewhere */
1396   runtime->nsteps_done = step;
1397
1398   return 0;
1399 } /* That's all folks */
1400
1401
1402 double do_lbfgs(FILE *fplog,t_commrec *cr,
1403                 int nfile,const t_filenm fnm[],
1404                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1405                 int nstglobalcomm,
1406                 gmx_vsite_t *vsite,gmx_constr_t constr,
1407                 int stepout,
1408                 t_inputrec *inputrec,
1409                 gmx_mtop_t *top_global,t_fcdata *fcd,
1410                 t_state *state,
1411                 t_mdatoms *mdatoms,
1412                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1413                 gmx_edsam_t ed,
1414                 t_forcerec *fr,
1415                 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1416                 gmx_membed_t membed,
1417                 real cpt_period,real max_hours,
1418                 const char *deviceOptions,
1419                 unsigned long Flags,
1420                 gmx_runtime_t *runtime)
1421 {
1422   static const char *LBFGS="Low-Memory BFGS Minimizer";
1423   em_state_t ems;
1424   gmx_localtop_t *top;
1425   gmx_enerdata_t *enerd;
1426   rvec   *f;
1427   gmx_global_stat_t gstat;
1428   t_graph    *graph;
1429   rvec   *f_global;
1430   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1431   double stepsize,gpa,gpb,gpc,tmp,minstep;
1432   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1433   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1434   real   a,b,c,maxdelta,delta;
1435   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1436   real   dgdx,dgdg,sq,yr,beta;
1437   t_mdebin   *mdebin;
1438   gmx_bool   converged,first;
1439   rvec   mu_tot;
1440   real   fnorm,fmax;
1441   gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1442   tensor vir,pres;
1443   int    start,end,number_steps;
1444   gmx_mdoutf_t *outf;
1445   int    i,k,m,n,nfmax,gf,step;
1446   int    mdof_flags;
1447   /* not used */
1448   real   terminate;
1449
1450   if (PAR(cr))
1451     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1452
1453   n = 3*state->natoms;
1454   nmaxcorr = inputrec->nbfgscorr;
1455
1456   /* Allocate memory */
1457   /* Use pointers to real so we dont have to loop over both atoms and
1458    * dimensions all the time...
1459    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1460    * that point to the same memory.
1461    */
1462   snew(xa,n);
1463   snew(xb,n);
1464   snew(xc,n);
1465   snew(fa,n);
1466   snew(fb,n);
1467   snew(fc,n);
1468   snew(frozen,n);
1469
1470   snew(p,n);
1471   snew(lastx,n);
1472   snew(lastf,n);
1473   snew(rho,nmaxcorr);
1474   snew(alpha,nmaxcorr);
1475
1476   snew(dx,nmaxcorr);
1477   for(i=0;i<nmaxcorr;i++)
1478     snew(dx[i],n);
1479
1480   snew(dg,nmaxcorr);
1481   for(i=0;i<nmaxcorr;i++)
1482     snew(dg[i],n);
1483
1484   step = 0;
1485   neval = 0;
1486
1487   /* Init em */
1488   init_em(fplog,LBFGS,cr,inputrec,
1489           state,top_global,&ems,&top,&f,&f_global,
1490           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1491           nfile,fnm,&outf,&mdebin);
1492   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1493    * so we free some memory again.
1494    */
1495   sfree(ems.s.x);
1496   sfree(ems.f);
1497
1498   xx = (real *)state->x;
1499   ff = (real *)f;
1500
1501   start = mdatoms->start;
1502   end   = mdatoms->homenr + start;
1503
1504   /* Print to log file */
1505   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1506
1507   do_log = do_ene = do_x = do_f = TRUE;
1508
1509   /* Max number of steps */
1510   number_steps=inputrec->nsteps;
1511
1512   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1513   gf = 0;
1514   for(i=start; i<end; i++) {
1515     if (mdatoms->cFREEZE)
1516       gf = mdatoms->cFREEZE[i];
1517      for(m=0; m<DIM; m++)
1518        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1519   }
1520   if (MASTER(cr))
1521     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1522   if (fplog)
1523     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1524
1525   if (vsite)
1526     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1527                      top->idef.iparams,top->idef.il,
1528                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1529
1530   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1531   /* do_force always puts the charge groups in the box and shifts again
1532    * We do not unshift, so molecules are always whole
1533    */
1534   neval++;
1535   ems.s.x = state->x;
1536   ems.f = f;
1537   evaluate_energy(fplog,bVerbose,cr,
1538                   state,top_global,&ems,top,
1539                   inputrec,nrnb,wcycle,gstat,
1540                   vsite,constr,fcd,graph,mdatoms,fr,
1541                   mu_tot,enerd,vir,pres,-1,TRUE);
1542   where();
1543
1544   if (MASTER(cr)) {
1545     /* Copy stuff to the energy bin for easy printing etc. */
1546     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1547                mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1548                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1549
1550     print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1551     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1552                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1553   }
1554   where();
1555
1556   /* This is the starting energy */
1557   Epot = enerd->term[F_EPOT];
1558
1559   fnorm = ems.fnorm;
1560   fmax  = ems.fmax;
1561   nfmax = ems.a_fmax;
1562
1563   /* Set the initial step.
1564    * since it will be multiplied by the non-normalized search direction
1565    * vector (force vector the first time), we scale it by the
1566    * norm of the force.
1567    */
1568
1569   if (MASTER(cr)) {
1570     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1571     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1572     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1573     fprintf(stderr,"\n");
1574     /* and copy to the log file too... */
1575     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1576     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1577     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1578     fprintf(fplog,"\n");
1579   }
1580
1581   point=0;
1582   for(i=0;i<n;i++)
1583     if(!frozen[i])
1584       dx[point][i] = ff[i];  /* Initial search direction */
1585     else
1586       dx[point][i] = 0;
1587
1588   stepsize = 1.0/fnorm;
1589   converged = FALSE;
1590
1591   /* Start the loop over BFGS steps.
1592    * Each successful step is counted, and we continue until
1593    * we either converge or reach the max number of steps.
1594    */
1595
1596   ncorr=0;
1597
1598   /* Set the gradient from the force */
1599   converged = FALSE;
1600   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1601
1602     /* Write coordinates if necessary */
1603     do_x = do_per_step(step,inputrec->nstxout);
1604     do_f = do_per_step(step,inputrec->nstfout);
1605
1606     mdof_flags = 0;
1607     if (do_x)
1608     {
1609         mdof_flags |= MDOF_X;
1610     }
1611
1612     if (do_f)
1613     {
1614         mdof_flags |= MDOF_F;
1615     }
1616
1617     write_traj(fplog,cr,outf,mdof_flags,
1618                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1619
1620     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1621
1622     /* pointer to current direction - point=0 first time here */
1623     s=dx[point];
1624
1625     /* calculate line gradient */
1626     for(gpa=0,i=0;i<n;i++)
1627         gpa-=s[i]*ff[i];
1628
1629     /* Calculate minimum allowed stepsize, before the average (norm)
1630      * relative change in coordinate is smaller than precision
1631      */
1632     for(minstep=0,i=0;i<n;i++) {
1633       tmp=fabs(xx[i]);
1634       if(tmp<1.0)
1635         tmp=1.0;
1636       tmp = s[i]/tmp;
1637       minstep += tmp*tmp;
1638     }
1639     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1640
1641     if(stepsize<minstep) {
1642       converged=TRUE;
1643       break;
1644     }
1645
1646     /* Store old forces and coordinates */
1647     for(i=0;i<n;i++) {
1648       lastx[i]=xx[i];
1649       lastf[i]=ff[i];
1650     }
1651     Epot0=Epot;
1652
1653     first=TRUE;
1654
1655     for(i=0;i<n;i++)
1656       xa[i]=xx[i];
1657
1658     /* Take a step downhill.
1659      * In theory, we should minimize the function along this direction.
1660      * That is quite possible, but it turns out to take 5-10 function evaluations
1661      * for each line. However, we dont really need to find the exact minimum -
1662      * it is much better to start a new BFGS step in a modified direction as soon
1663      * as we are close to it. This will save a lot of energy evaluations.
1664      *
1665      * In practice, we just try to take a single step.
1666      * If it worked (i.e. lowered the energy), we increase the stepsize but
1667      * the continue straight to the next BFGS step without trying to find any minimum.
1668      * If it didn't work (higher energy), there must be a minimum somewhere between
1669      * the old position and the new one.
1670      *
1671      * Due to the finite numerical accuracy, it turns out that it is a good idea
1672      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1673      * This leads to lower final energies in the tests I've done. / Erik
1674      */
1675     foundlower=FALSE;
1676     EpotA = Epot0;
1677     a = 0.0;
1678     c = a + stepsize; /* reference position along line is zero */
1679
1680     /* Check stepsize first. We do not allow displacements
1681      * larger than emstep.
1682      */
1683     do {
1684       c = a + stepsize;
1685       maxdelta=0;
1686       for(i=0;i<n;i++) {
1687         delta=c*s[i];
1688         if(delta>maxdelta)
1689           maxdelta=delta;
1690       }
1691       if(maxdelta>inputrec->em_stepsize)
1692         stepsize*=0.1;
1693     } while(maxdelta>inputrec->em_stepsize);
1694
1695     /* Take a trial step */
1696     for (i=0; i<n; i++)
1697       xc[i] = lastx[i] + c*s[i];
1698
1699     neval++;
1700     /* Calculate energy for the trial step */
1701     ems.s.x = (rvec *)xc;
1702     ems.f   = (rvec *)fc;
1703     evaluate_energy(fplog,bVerbose,cr,
1704                     state,top_global,&ems,top,
1705                     inputrec,nrnb,wcycle,gstat,
1706                     vsite,constr,fcd,graph,mdatoms,fr,
1707                     mu_tot,enerd,vir,pres,step,FALSE);
1708     EpotC = ems.epot;
1709
1710     /* Calc derivative along line */
1711     for(gpc=0,i=0; i<n; i++) {
1712         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1713     }
1714     /* Sum the gradient along the line across CPUs */
1715     if (PAR(cr))
1716       gmx_sumd(1,&gpc,cr);
1717
1718      /* This is the max amount of increase in energy we tolerate */
1719    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1720
1721     /* Accept the step if the energy is lower, or if it is not significantly higher
1722      * and the line derivative is still negative.
1723      */
1724     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1725       foundlower = TRUE;
1726       /* Great, we found a better energy. Increase step for next iteration
1727        * if we are still going down, decrease it otherwise
1728        */
1729       if(gpc<0)
1730         stepsize *= 1.618034;  /* The golden section */
1731       else
1732         stepsize *= 0.618034;  /* 1/golden section */
1733     } else {
1734       /* New energy is the same or higher. We will have to do some work
1735        * to find a smaller value in the interval. Take smaller step next time!
1736        */
1737       foundlower = FALSE;
1738       stepsize *= 0.618034;
1739     }
1740
1741     /* OK, if we didn't find a lower value we will have to locate one now - there must
1742      * be one in the interval [a=0,c].
1743      * The same thing is valid here, though: Don't spend dozens of iterations to find
1744      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1745      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1746      *
1747      * I also have a safeguard for potentially really patological functions so we never
1748      * take more than 20 steps before we give up ...
1749      *
1750      * If we already found a lower value we just skip this step and continue to the update.
1751      */
1752
1753     if(!foundlower) {
1754
1755       nminstep=0;
1756       do {
1757         /* Select a new trial point.
1758          * If the derivatives at points a & c have different sign we interpolate to zero,
1759          * otherwise just do a bisection.
1760          */
1761
1762         if(gpa<0 && gpc>0)
1763           b = a + gpa*(a-c)/(gpc-gpa);
1764         else
1765           b = 0.5*(a+c);
1766
1767         /* safeguard if interpolation close to machine accuracy causes errors:
1768          * never go outside the interval
1769          */
1770         if(b<=a || b>=c)
1771           b = 0.5*(a+c);
1772
1773         /* Take a trial step */
1774         for (i=0; i<n; i++)
1775           xb[i] = lastx[i] + b*s[i];
1776
1777         neval++;
1778         /* Calculate energy for the trial step */
1779         ems.s.x = (rvec *)xb;
1780         ems.f   = (rvec *)fb;
1781         evaluate_energy(fplog,bVerbose,cr,
1782                         state,top_global,&ems,top,
1783                         inputrec,nrnb,wcycle,gstat,
1784                         vsite,constr,fcd,graph,mdatoms,fr,
1785                         mu_tot,enerd,vir,pres,step,FALSE);
1786         EpotB = ems.epot;
1787
1788         fnorm = ems.fnorm;
1789
1790         for(gpb=0,i=0; i<n; i++)
1791           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1792
1793         /* Sum the gradient along the line across CPUs */
1794         if (PAR(cr))
1795           gmx_sumd(1,&gpb,cr);
1796
1797         /* Keep one of the intervals based on the value of the derivative at the new point */
1798         if(gpb>0) {
1799           /* Replace c endpoint with b */
1800           EpotC = EpotB;
1801           c = b;
1802           gpc = gpb;
1803           /* swap coord pointers b/c */
1804           xtmp = xb;
1805           ftmp = fb;
1806           xb = xc;
1807           fb = fc;
1808           xc = xtmp;
1809           fc = ftmp;
1810         } else {
1811           /* Replace a endpoint with b */
1812           EpotA = EpotB;
1813           a = b;
1814           gpa = gpb;
1815           /* swap coord pointers a/b */
1816           xtmp = xb;
1817           ftmp = fb;
1818           xb = xa;
1819           fb = fa;
1820           xa = xtmp;
1821           fa = ftmp;
1822         }
1823
1824         /*
1825          * Stop search as soon as we find a value smaller than the endpoints,
1826          * or if the tolerance is below machine precision.
1827          * Never run more than 20 steps, no matter what.
1828          */
1829         nminstep++;
1830       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1831
1832       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1833         /* OK. We couldn't find a significantly lower energy.
1834          * If ncorr==0 this was steepest descent, and then we give up.
1835          * If not, reset memory to restart as steepest descent before quitting.
1836          */
1837         if(ncorr==0) {
1838         /* Converged */
1839           converged=TRUE;
1840           break;
1841         } else {
1842           /* Reset memory */
1843           ncorr=0;
1844           /* Search in gradient direction */
1845           for(i=0;i<n;i++)
1846             dx[point][i]=ff[i];
1847           /* Reset stepsize */
1848           stepsize = 1.0/fnorm;
1849           continue;
1850         }
1851       }
1852
1853       /* Select min energy state of A & C, put the best in xx/ff/Epot
1854        */
1855       if(EpotC<EpotA) {
1856         Epot = EpotC;
1857         /* Use state C */
1858         for(i=0;i<n;i++) {
1859           xx[i]=xc[i];
1860           ff[i]=fc[i];
1861         }
1862         stepsize=c;
1863       } else {
1864         Epot = EpotA;
1865         /* Use state A */
1866         for(i=0;i<n;i++) {
1867           xx[i]=xa[i];
1868           ff[i]=fa[i];
1869         }
1870         stepsize=a;
1871       }
1872
1873     } else {
1874       /* found lower */
1875       Epot = EpotC;
1876       /* Use state C */
1877       for(i=0;i<n;i++) {
1878         xx[i]=xc[i];
1879         ff[i]=fc[i];
1880       }
1881       stepsize=c;
1882     }
1883
1884     /* Update the memory information, and calculate a new
1885      * approximation of the inverse hessian
1886      */
1887
1888     /* Have new data in Epot, xx, ff */
1889     if(ncorr<nmaxcorr)
1890       ncorr++;
1891
1892     for(i=0;i<n;i++) {
1893       dg[point][i]=lastf[i]-ff[i];
1894       dx[point][i]*=stepsize;
1895     }
1896
1897     dgdg=0;
1898     dgdx=0;
1899     for(i=0;i<n;i++) {
1900       dgdg+=dg[point][i]*dg[point][i];
1901       dgdx+=dg[point][i]*dx[point][i];
1902     }
1903
1904     diag=dgdx/dgdg;
1905
1906     rho[point]=1.0/dgdx;
1907     point++;
1908
1909     if(point>=nmaxcorr)
1910       point=0;
1911
1912     /* Update */
1913     for(i=0;i<n;i++)
1914       p[i]=ff[i];
1915
1916     cp=point;
1917
1918     /* Recursive update. First go back over the memory points */
1919     for(k=0;k<ncorr;k++) {
1920       cp--;
1921       if(cp<0)
1922         cp=ncorr-1;
1923
1924       sq=0;
1925       for(i=0;i<n;i++)
1926         sq+=dx[cp][i]*p[i];
1927
1928       alpha[cp]=rho[cp]*sq;
1929
1930       for(i=0;i<n;i++)
1931         p[i] -= alpha[cp]*dg[cp][i];
1932     }
1933
1934     for(i=0;i<n;i++)
1935       p[i] *= diag;
1936
1937     /* And then go forward again */
1938     for(k=0;k<ncorr;k++) {
1939       yr = 0;
1940       for(i=0;i<n;i++)
1941         yr += p[i]*dg[cp][i];
1942
1943       beta = rho[cp]*yr;
1944       beta = alpha[cp]-beta;
1945
1946       for(i=0;i<n;i++)
1947         p[i] += beta*dx[cp][i];
1948
1949       cp++;
1950       if(cp>=ncorr)
1951         cp=0;
1952     }
1953
1954     for(i=0;i<n;i++)
1955       if(!frozen[i])
1956         dx[point][i] = p[i];
1957       else
1958         dx[point][i] = 0;
1959
1960     stepsize=1.0;
1961
1962     /* Test whether the convergence criterion is met */
1963     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1964
1965     /* Print it if necessary */
1966     if (MASTER(cr)) {
1967       if(bVerbose)
1968         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1969                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1970       /* Store the new (lower) energies */
1971       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1972                  mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1973                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1974       do_log = do_per_step(step,inputrec->nstlog);
1975       do_ene = do_per_step(step,inputrec->nstenergy);
1976       if(do_log)
1977           print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1978       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1979                  do_log ? fplog : NULL,step,step,eprNORMAL,
1980                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1981     }
1982
1983     /* Stop when the maximum force lies below tolerance.
1984      * If we have reached machine precision, converged is already set to true.
1985      */
1986
1987     converged = converged || (fmax < inputrec->em_tol);
1988
1989   } /* End of the loop */
1990
1991   if(converged)
1992     step--; /* we never took that last step in this case */
1993
1994     if(fmax>inputrec->em_tol)
1995     {
1996         if (MASTER(cr))
1997         {
1998             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1999             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
2000         }
2001         converged = FALSE;
2002     }
2003
2004   /* If we printed energy and/or logfile last step (which was the last step)
2005    * we don't have to do it again, but otherwise print the final values.
2006    */
2007   if(!do_log) /* Write final value to log since we didn't do anythin last step */
2008     print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2009   if(!do_ene || !do_log) /* Write final energy file entries */
2010     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2011                !do_log ? fplog : NULL,step,step,eprNORMAL,
2012                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2013
2014   /* Print some stuff... */
2015   if (MASTER(cr))
2016     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2017
2018   /* IMPORTANT!
2019    * For accurate normal mode calculation it is imperative that we
2020    * store the last conformation into the full precision binary trajectory.
2021    *
2022    * However, we should only do it if we did NOT already write this step
2023    * above (which we did if do_x or do_f was true).
2024    */
2025   do_x = !do_per_step(step,inputrec->nstxout);
2026   do_f = !do_per_step(step,inputrec->nstfout);
2027   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2028                 top_global,inputrec,step,
2029                 &ems,state,f);
2030
2031   if (MASTER(cr)) {
2032     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2033                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2034     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2035                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2036
2037     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2038   }
2039
2040   finish_em(fplog,cr,outf,runtime,wcycle);
2041
2042   /* To print the actual number of steps we needed somewhere */
2043   runtime->nsteps_done = step;
2044
2045   return 0;
2046 } /* That's all folks */
2047
2048
2049 double do_steep(FILE *fplog,t_commrec *cr,
2050                 int nfile, const t_filenm fnm[],
2051                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2052                 int nstglobalcomm,
2053                 gmx_vsite_t *vsite,gmx_constr_t constr,
2054                 int stepout,
2055                 t_inputrec *inputrec,
2056                 gmx_mtop_t *top_global,t_fcdata *fcd,
2057                 t_state *state_global,
2058                 t_mdatoms *mdatoms,
2059                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2060                 gmx_edsam_t ed,
2061                 t_forcerec *fr,
2062                 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2063                 gmx_membed_t membed,
2064                 real cpt_period,real max_hours,
2065                 const char *deviceOptions,
2066                 unsigned long Flags,
2067                 gmx_runtime_t *runtime)
2068 {
2069   const char *SD="Steepest Descents";
2070   em_state_t *s_min,*s_try;
2071   rvec       *f_global;
2072   gmx_localtop_t *top;
2073   gmx_enerdata_t *enerd;
2074   rvec   *f;
2075   gmx_global_stat_t gstat;
2076   t_graph    *graph;
2077   real   stepsize,constepsize;
2078   real   ustep,dvdlambda,fnormn;
2079   gmx_mdoutf_t *outf;
2080   t_mdebin   *mdebin;
2081   gmx_bool   bDone,bAbort,do_x,do_f;
2082   tensor vir,pres;
2083   rvec   mu_tot;
2084   int    nsteps;
2085   int    count=0;
2086   int    steps_accepted=0;
2087   /* not used */
2088   real   terminate=0;
2089
2090   s_min = init_em_state();
2091   s_try = init_em_state();
2092
2093   /* Init em and store the local state in s_try */
2094   init_em(fplog,SD,cr,inputrec,
2095           state_global,top_global,s_try,&top,&f,&f_global,
2096           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2097           nfile,fnm,&outf,&mdebin);
2098
2099   /* Print to log file  */
2100   print_em_start(fplog,cr,runtime,wcycle,SD);
2101
2102   /* Set variables for stepsize (in nm). This is the largest
2103    * step that we are going to make in any direction.
2104    */
2105   ustep = inputrec->em_stepsize;
2106   stepsize = 0;
2107
2108   /* Max number of steps  */
2109   nsteps = inputrec->nsteps;
2110
2111   if (MASTER(cr))
2112     /* Print to the screen  */
2113     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2114   if (fplog)
2115     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2116
2117   /**** HERE STARTS THE LOOP ****
2118    * count is the counter for the number of steps
2119    * bDone will be TRUE when the minimization has converged
2120    * bAbort will be TRUE when nsteps steps have been performed or when
2121    * the stepsize becomes smaller than is reasonable for machine precision
2122    */
2123   count  = 0;
2124   bDone  = FALSE;
2125   bAbort = FALSE;
2126   while( !bDone && !bAbort ) {
2127     bAbort = (nsteps >= 0) && (count == nsteps);
2128
2129     /* set new coordinates, except for first step */
2130     if (count > 0) {
2131         do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2132                    s_min,stepsize,s_min->f,s_try,
2133                    constr,top,nrnb,wcycle,count);
2134     }
2135
2136     evaluate_energy(fplog,bVerbose,cr,
2137                     state_global,top_global,s_try,top,
2138                     inputrec,nrnb,wcycle,gstat,
2139                     vsite,constr,fcd,graph,mdatoms,fr,
2140                     mu_tot,enerd,vir,pres,count,count==0);
2141
2142     if (MASTER(cr))
2143       print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2144
2145     if (count == 0)
2146       s_min->epot = s_try->epot + 1;
2147
2148     /* Print it if necessary  */
2149     if (MASTER(cr)) {
2150       if (bVerbose) {
2151         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2152                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2153                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2154       }
2155
2156       if (s_try->epot < s_min->epot) {
2157         /* Store the new (lower) energies  */
2158         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2159                    mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2160                    s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2161         print_ebin(outf->fp_ene,TRUE,
2162                    do_per_step(steps_accepted,inputrec->nstdisreout),
2163                    do_per_step(steps_accepted,inputrec->nstorireout),
2164                    fplog,count,count,eprNORMAL,TRUE,
2165                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2166         fflush(fplog);
2167       }
2168     }
2169
2170     /* Now if the new energy is smaller than the previous...
2171      * or if this is the first step!
2172      * or if we did random steps!
2173      */
2174
2175     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2176       steps_accepted++;
2177
2178       /* Test whether the convergence criterion is met...  */
2179       bDone = (s_try->fmax < inputrec->em_tol);
2180
2181       /* Copy the arrays for force, positions and energy  */
2182       /* The 'Min' array always holds the coords and forces of the minimal
2183          sampled energy  */
2184       swap_em_state(s_min,s_try);
2185       if (count > 0)
2186         ustep *= 1.2;
2187
2188       /* Write to trn, if necessary */
2189       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2190       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2191       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2192                     top_global,inputrec,count,
2193                     s_min,state_global,f_global);
2194     }
2195     else {
2196       /* If energy is not smaller make the step smaller...  */
2197       ustep *= 0.5;
2198
2199       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2200         /* Reload the old state */
2201         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2202                                s_min,top,mdatoms,fr,vsite,constr,
2203                                nrnb,wcycle);
2204       }
2205     }
2206
2207     /* Determine new step  */
2208     stepsize = ustep/s_min->fmax;
2209
2210     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2211 #ifdef GMX_DOUBLE
2212         if (count == nsteps || ustep < 1e-12)
2213 #else
2214         if (count == nsteps || ustep < 1e-6)
2215 #endif
2216         {
2217             if (MASTER(cr))
2218             {
2219                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2220                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2221             }
2222             bAbort=TRUE;
2223         }
2224
2225     count++;
2226   } /* End of the loop  */
2227
2228     /* Print some shit...  */
2229   if (MASTER(cr))
2230     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2231   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2232                 top_global,inputrec,count,
2233                 s_min,state_global,f_global);
2234
2235   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2236
2237   if (MASTER(cr)) {
2238     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2239                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2240     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2241                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2242   }
2243
2244   finish_em(fplog,cr,outf,runtime,wcycle);
2245
2246   /* To print the actual number of steps we needed somewhere */
2247   inputrec->nsteps=count;
2248
2249   runtime->nsteps_done = count;
2250
2251   return 0;
2252 } /* That's all folks */
2253
2254
2255 double do_nm(FILE *fplog,t_commrec *cr,
2256              int nfile,const t_filenm fnm[],
2257              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2258              int nstglobalcomm,
2259              gmx_vsite_t *vsite,gmx_constr_t constr,
2260              int stepout,
2261              t_inputrec *inputrec,
2262              gmx_mtop_t *top_global,t_fcdata *fcd,
2263              t_state *state_global,
2264              t_mdatoms *mdatoms,
2265              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2266              gmx_edsam_t ed,
2267              t_forcerec *fr,
2268              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2269              gmx_membed_t membed,
2270              real cpt_period,real max_hours,
2271              const char *deviceOptions,
2272              unsigned long Flags,
2273              gmx_runtime_t *runtime)
2274 {
2275     const char *NM = "Normal Mode Analysis";
2276     gmx_mdoutf_t *outf;
2277     int        natoms,atom,d;
2278     int        nnodes,node;
2279     rvec       *f_global;
2280     gmx_localtop_t *top;
2281     gmx_enerdata_t *enerd;
2282     rvec       *f;
2283     gmx_global_stat_t gstat;
2284     t_graph    *graph;
2285     real       t,t0,lambda,lam0;
2286     gmx_bool       bNS;
2287     tensor     vir,pres;
2288     rvec       mu_tot;
2289     rvec       *fneg,*dfdx;
2290     gmx_bool       bSparse; /* use sparse matrix storage format */
2291     size_t     sz;
2292     gmx_sparsematrix_t * sparse_matrix = NULL;
2293     real *     full_matrix             = NULL;
2294     em_state_t *   state_work;
2295
2296     /* added with respect to mdrun */
2297     int        i,j,k,row,col;
2298     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2299     real       x_min;
2300     real       fnorm,fmax;
2301
2302     if (constr != NULL)
2303     {
2304         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2305     }
2306
2307     state_work = init_em_state();
2308
2309     /* Init em and store the local state in state_minimum */
2310     init_em(fplog,NM,cr,inputrec,
2311             state_global,top_global,state_work,&top,
2312             &f,&f_global,
2313             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2314             nfile,fnm,&outf,NULL);
2315
2316     natoms = top_global->natoms;
2317     snew(fneg,natoms);
2318     snew(dfdx,natoms);
2319
2320 #ifndef GMX_DOUBLE
2321     if (MASTER(cr))
2322     {
2323         fprintf(stderr,
2324                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2325                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2326                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2327                 "      are fairly modest even if you recompile in double precision.\n\n");
2328     }
2329 #endif
2330
2331     /* Check if we can/should use sparse storage format.
2332      *
2333      * Sparse format is only useful when the Hessian itself is sparse, which it
2334       * will be when we use a cutoff.
2335       * For small systems (n<1000) it is easier to always use full matrix format, though.
2336       */
2337     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2338     {
2339         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2340         bSparse = FALSE;
2341     }
2342     else if(top_global->natoms < 1000)
2343     {
2344         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2345         bSparse = FALSE;
2346     }
2347     else
2348     {
2349         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2350         bSparse = TRUE;
2351     }
2352
2353     sz = DIM*top_global->natoms;
2354
2355     fprintf(stderr,"Allocating Hessian memory...\n\n");
2356
2357     if(bSparse)
2358     {
2359         sparse_matrix=gmx_sparsematrix_init(sz);
2360         sparse_matrix->compressed_symmetric = TRUE;
2361     }
2362     else
2363     {
2364         snew(full_matrix,sz*sz);
2365     }
2366
2367     /* Initial values */
2368     t0           = inputrec->init_t;
2369     lam0         = inputrec->fepvals->init_lambda;
2370     t            = t0;
2371     lambda       = lam0;
2372
2373     init_nrnb(nrnb);
2374
2375     where();
2376
2377     /* Write start time and temperature */
2378     print_em_start(fplog,cr,runtime,wcycle,NM);
2379
2380     /* fudge nr of steps to nr of atoms */
2381     inputrec->nsteps = natoms*2;
2382
2383     if (MASTER(cr))
2384     {
2385         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2386                 *(top_global->name),(int)inputrec->nsteps);
2387     }
2388
2389     nnodes = cr->nnodes;
2390
2391     /* Make evaluate_energy do a single node force calculation */
2392     cr->nnodes = 1;
2393     evaluate_energy(fplog,bVerbose,cr,
2394                     state_global,top_global,state_work,top,
2395                     inputrec,nrnb,wcycle,gstat,
2396                     vsite,constr,fcd,graph,mdatoms,fr,
2397                     mu_tot,enerd,vir,pres,-1,TRUE);
2398     cr->nnodes = nnodes;
2399
2400     /* if forces are not small, warn user */
2401     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2402
2403     if (MASTER(cr))
2404     {
2405         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2406         if (state_work->fmax > 1.0e-3)
2407         {
2408             fprintf(stderr,"Maximum force probably not small enough to");
2409             fprintf(stderr," ensure that you are in an \nenergy well. ");
2410             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2411             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2412         }
2413     }
2414
2415     /***********************************************************
2416      *
2417      *      Loop over all pairs in matrix
2418      *
2419      *      do_force called twice. Once with positive and
2420      *      once with negative displacement
2421      *
2422      ************************************************************/
2423
2424     /* Steps are divided one by one over the nodes */
2425     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2426     {
2427
2428         for (d=0; d<DIM; d++)
2429         {
2430             x_min = state_work->s.x[atom][d];
2431
2432             state_work->s.x[atom][d] = x_min - der_range;
2433
2434             /* Make evaluate_energy do a single node force calculation */
2435             cr->nnodes = 1;
2436             evaluate_energy(fplog,bVerbose,cr,
2437                             state_global,top_global,state_work,top,
2438                             inputrec,nrnb,wcycle,gstat,
2439                             vsite,constr,fcd,graph,mdatoms,fr,
2440                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2441
2442             for(i=0; i<natoms; i++)
2443             {
2444                 copy_rvec(state_work->f[i], fneg[i]);
2445             }
2446
2447             state_work->s.x[atom][d] = x_min + der_range;
2448
2449             evaluate_energy(fplog,bVerbose,cr,
2450                             state_global,top_global,state_work,top,
2451                             inputrec,nrnb,wcycle,gstat,
2452                             vsite,constr,fcd,graph,mdatoms,fr,
2453                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2454             cr->nnodes = nnodes;
2455
2456             /* x is restored to original */
2457             state_work->s.x[atom][d] = x_min;
2458
2459             for(j=0; j<natoms; j++)
2460             {
2461                 for (k=0; (k<DIM); k++)
2462                 {
2463                     dfdx[j][k] =
2464                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2465                 }
2466             }
2467
2468             if (!MASTER(cr))
2469             {
2470 #ifdef GMX_MPI
2471 #ifdef GMX_DOUBLE
2472 #define mpi_type MPI_DOUBLE
2473 #else
2474 #define mpi_type MPI_FLOAT
2475 #endif
2476                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2477                          cr->mpi_comm_mygroup);
2478 #endif
2479             }
2480             else
2481             {
2482                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2483                 {
2484                     if (node > 0)
2485                     {
2486 #ifdef GMX_MPI
2487                         MPI_Status stat;
2488                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2489                                  cr->mpi_comm_mygroup,&stat);
2490 #undef mpi_type
2491 #endif
2492                     }
2493
2494                     row = (atom + node)*DIM + d;
2495
2496                     for(j=0; j<natoms; j++)
2497                     {
2498                         for(k=0; k<DIM; k++)
2499                         {
2500                             col = j*DIM + k;
2501
2502                             if (bSparse)
2503                             {
2504                                 if (col >= row && dfdx[j][k] != 0.0)
2505                                 {
2506                                     gmx_sparsematrix_increment_value(sparse_matrix,
2507                                                                      row,col,dfdx[j][k]);
2508                                 }
2509                             }
2510                             else
2511                             {
2512                                 full_matrix[row*sz+col] = dfdx[j][k];
2513                             }
2514                         }
2515                     }
2516                 }
2517             }
2518
2519             if (bVerbose && fplog)
2520             {
2521                 fflush(fplog);
2522             }
2523         }
2524         /* write progress */
2525         if (MASTER(cr) && bVerbose)
2526         {
2527             fprintf(stderr,"\rFinished step %d out of %d",
2528                     min(atom+nnodes,natoms),natoms);
2529             fflush(stderr);
2530         }
2531     }
2532
2533     if (MASTER(cr))
2534     {
2535         fprintf(stderr,"\n\nWriting Hessian...\n");
2536         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2537     }
2538
2539     finish_em(fplog,cr,outf,runtime,wcycle);
2540
2541     runtime->nsteps_done = natoms*2;
2542
2543     return 0;
2544 }
2545