src/gromacs/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "smalloc.h"
  48 #include "nrnb.h"
  49 #include "main.h"
  50 #include "force.h"
  51 #include "macros.h"
  52 #include "random.h"
  53 #include "names.h"
  54 #include "gmx_fatal.h"
  55 #include "txtdump.h"
  56 #include "typedefs.h"
  57 #include "update.h"
  58 #include "constr.h"
  59 #include "vec.h"
  60 #include "statutil.h"
  61 #include "tgroup.h"
  62 #include "mdebin.h"
  63 #include "vsite.h"
  64 #include "force.h"
  65 #include "mdrun.h"
  66 #include "md_support.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "trnio.h"
  70 #include "mdatoms.h"
  71 #include "ns.h"
  72 #include "gmx_wallcycle.h"
  73 #include "mtop_util.h"
  74 #include "gmxfio.h"
  75 #include "pme.h"
  76 #include "bondf.h"
  77 #include "gmx_omp_nthreads.h"
  78
  79
  80 #include "gromacs/linearalgebra/mtxio.h"
  81 #include "gromacs/linearalgebra/sparsematrix.h"
  82
  83 typedef struct {
  84     t_state  s;
  85     rvec    *f;
  86     real     epot;
  87     real     fnorm;
  88     real     fmax;
  89     int      a_fmax;
  90 } em_state_t;
  91
  92 static em_state_t *init_em_state()
  93 {
  94     em_state_t *ems;
  95
  96     snew(ems, 1);
  97
  98     /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
  99     snew(ems->s.lambda, efptNR);
 100
 101     return ems;
 102 }
 103
 104 static void print_em_start(FILE *fplog, t_commrec *cr, gmx_runtime_t *runtime,
 105                            gmx_wallcycle_t wcycle,
 106                            const char *name)
 107 {
 108     char buf[STRLEN];
 109
 110     runtime_start(runtime);
 111
 112     sprintf(buf, "Started %s", name);
 113     print_date_and_time(fplog, cr->nodeid, buf, NULL);
 114
 115     wallcycle_start(wcycle, ewcRUN);
 116 }
 117 static void em_time_end(gmx_runtime_t  *runtime,
 118                         gmx_wallcycle_t wcycle)
 119 {
 120     wallcycle_stop(wcycle, ewcRUN);
 121
 122     runtime_end(runtime);
 123 }
 124
 125 static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
 126 {
 127     fprintf(out, "\n");
 128     fprintf(out, "%s:\n", minimizer);
 129     fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
 130     fprintf(out, "   Number of steps    = %12d\n", nsteps);
 131 }
 132
 133 static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
 134 {
 135     char buffer[2048];
 136     if (bLastStep)
 137     {
 138         sprintf(buffer,
 139                 "\nEnergy minimization reached the maximum number"
 140                 "of steps before the forces reached the requested"
 141                 "precision Fmax < %g.\n", ftol);
 142     }
 143     else
 144     {
 145         sprintf(buffer,
 146                 "\nEnergy minimization has stopped, but the forces have"
 147                 "not converged to the requested precision Fmax < %g (which"
 148                 "may not be possible for your system). It stopped"
 149                 "because the algorithm tried to make a new step whose size"
 150                 "was too small, or there was no change in the energy since"
 151                 "last step. Either way, we regard the minimization as"
 152                 "converged to within the available machine precision,"
 153                 "given your starting configuration and EM parameters.\n%s%s",
 154                 ftol,
 155                 sizeof(real) < sizeof(double) ?
 156                 "\nDouble precision normally gives you higher accuracy, but"
 157                 "this is often not needed for preparing to run molecular"
 158                 "dynamics.\n" :
 159                 "",
 160                 bConstrain ?
 161                 "You might need to increase your constraint accuracy, or turn\n"
 162                 "off constraints altogether (set constraints = none in mdp file)\n" :
 163                 "");
 164     }
 165     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 166 }
 167
 168
 169
 170 static void print_converged(FILE *fp, const char *alg, real ftol,
 171                             gmx_large_int_t count, gmx_bool bDone, gmx_large_int_t nsteps,
 172                             real epot, real fmax, int nfmax, real fnorm)
 173 {
 174     char buf[STEPSTRSIZE];
 175
 176     if (bDone)
 177     {
 178         fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
 179                 alg, ftol, gmx_step_str(count, buf));
 180     }
 181     else if (count < nsteps)
 182     {
 183         fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
 184                 "but did not reach the requested Fmax < %g.\n",
 185                 alg, gmx_step_str(count, buf), ftol);
 186     }
 187     else
 188     {
 189         fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
 190                 alg, ftol, gmx_step_str(count, buf));
 191     }
 192
 193 #ifdef GMX_DOUBLE
 194     fprintf(fp, "Potential Energy  = %21.14e\n", epot);
 195     fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
 196     fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
 197 #else
 198     fprintf(fp, "Potential Energy  = %14.7e\n", epot);
 199     fprintf(fp, "Maximum force     = %14.7e on atom %d\n", fmax, nfmax+1);
 200     fprintf(fp, "Norm of force     = %14.7e\n", fnorm);
 201 #endif
 202 }
 203
 204 static void get_f_norm_max(t_commrec *cr,
 205                            t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
 206                            real *fnorm, real *fmax, int *a_fmax)
 207 {
 208     double fnorm2, *sum;
 209     real   fmax2, fmax2_0, fam;
 210     int    la_max, a_max, start, end, i, m, gf;
 211
 212     /* This routine finds the largest force and returns it.
 213      * On parallel machines the global max is taken.
 214      */
 215     fnorm2 = 0;
 216     fmax2  = 0;
 217     la_max = -1;
 218     gf     = 0;
 219     start  = mdatoms->start;
 220     end    = mdatoms->homenr + start;
 221     if (mdatoms->cFREEZE)
 222     {
 223         for (i = start; i < end; i++)
 224         {
 225             gf  = mdatoms->cFREEZE[i];
 226             fam = 0;
 227             for (m = 0; m < DIM; m++)
 228             {
 229                 if (!opts->nFreeze[gf][m])
 230                 {
 231                     fam += sqr(f[i][m]);
 232                 }
 233             }
 234             fnorm2 += fam;
 235             if (fam > fmax2)
 236             {
 237                 fmax2  = fam;
 238                 la_max = i;
 239             }
 240         }
 241     }
 242     else
 243     {
 244         for (i = start; i < end; i++)
 245         {
 246             fam     = norm2(f[i]);
 247             fnorm2 += fam;
 248             if (fam > fmax2)
 249             {
 250                 fmax2  = fam;
 251                 la_max = i;
 252             }
 253         }
 254     }
 255
 256     if (la_max >= 0 && DOMAINDECOMP(cr))
 257     {
 258         a_max = cr->dd->gatindex[la_max];
 259     }
 260     else
 261     {
 262         a_max = la_max;
 263     }
 264     if (PAR(cr))
 265     {
 266         snew(sum, 2*cr->nnodes+1);
 267         sum[2*cr->nodeid]   = fmax2;
 268         sum[2*cr->nodeid+1] = a_max;
 269         sum[2*cr->nnodes]   = fnorm2;
 270         gmx_sumd(2*cr->nnodes+1, sum, cr);
 271         fnorm2 = sum[2*cr->nnodes];
 272         /* Determine the global maximum */
 273         for (i = 0; i < cr->nnodes; i++)
 274         {
 275             if (sum[2*i] > fmax2)
 276             {
 277                 fmax2 = sum[2*i];
 278                 a_max = (int)(sum[2*i+1] + 0.5);
 279             }
 280         }
 281         sfree(sum);
 282     }
 283
 284     if (fnorm)
 285     {
 286         *fnorm = sqrt(fnorm2);
 287     }
 288     if (fmax)
 289     {
 290         *fmax  = sqrt(fmax2);
 291     }
 292     if (a_fmax)
 293     {
 294         *a_fmax = a_max;
 295     }
 296 }
 297
 298 static void get_state_f_norm_max(t_commrec *cr,
 299                                  t_grpopts *opts, t_mdatoms *mdatoms,
 300                                  em_state_t *ems)
 301 {
 302     get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
 303 }
 304
 305 void init_em(FILE *fplog, const char *title,
 306              t_commrec *cr, t_inputrec *ir,
 307              t_state *state_global, gmx_mtop_t *top_global,
 308              em_state_t *ems, gmx_localtop_t **top,
 309              rvec **f, rvec **f_global,
 310              t_nrnb *nrnb, rvec mu_tot,
 311              t_forcerec *fr, gmx_enerdata_t **enerd,
 312              t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
 313              gmx_vsite_t *vsite, gmx_constr_t constr,
 314              int nfile, const t_filenm fnm[],
 315              gmx_mdoutf_t **outf, t_mdebin **mdebin)
 316 {
 317     int  start, homenr, i;
 318     real dvdl_constr;
 319
 320     if (fplog)
 321     {
 322         fprintf(fplog, "Initiating %s\n", title);
 323     }
 324
 325     state_global->ngtc = 0;
 326
 327     /* Initialize lambda variables */
 328     initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
 329
 330     init_nrnb(nrnb);
 331
 332     if (DOMAINDECOMP(cr))
 333     {
 334         *top = dd_init_local_top(top_global);
 335
 336         dd_init_local_state(cr->dd, state_global, &ems->s);
 337
 338         *f = NULL;
 339
 340         /* Distribute the charge groups over the nodes from the master node */
 341         dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
 342                             state_global, top_global, ir,
 343                             &ems->s, &ems->f, mdatoms, *top,
 344                             fr, vsite, NULL, constr,
 345                             nrnb, NULL, FALSE);
 346         dd_store_state(cr->dd, &ems->s);
 347
 348         if (ir->nstfout)
 349         {
 350             snew(*f_global, top_global->natoms);
 351         }
 352         else
 353         {
 354             *f_global = NULL;
 355         }
 356         *graph = NULL;
 357     }
 358     else
 359     {
 360         snew(*f, top_global->natoms);
 361
 362         /* Just copy the state */
 363         ems->s = *state_global;
 364         snew(ems->s.x, ems->s.nalloc);
 365         snew(ems->f, ems->s.nalloc);
 366         for (i = 0; i < state_global->natoms; i++)
 367         {
 368             copy_rvec(state_global->x[i], ems->s.x[i]);
 369         }
 370         copy_mat(state_global->box, ems->s.box);
 371
 372         if (PAR(cr) && ir->eI != eiNM)
 373         {
 374             /* Initialize the particle decomposition and split the topology */
 375             *top = split_system(fplog, top_global, ir, cr);
 376
 377             pd_cg_range(cr, &fr->cg0, &fr->hcg);
 378         }
 379         else
 380         {
 381             *top = gmx_mtop_generate_local_top(top_global, ir);
 382         }
 383         *f_global = *f;
 384
 385         forcerec_set_excl_load(fr, *top, cr);
 386
 387         init_bonded_thread_force_reduction(fr, &(*top)->idef);
 388
 389         if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 390         {
 391             *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
 392         }
 393         else
 394         {
 395             *graph = NULL;
 396         }
 397
 398         if (PARTDECOMP(cr))
 399         {
 400             pd_at_range(cr, &start, &homenr);
 401             homenr -= start;
 402         }
 403         else
 404         {
 405             start  = 0;
 406             homenr = top_global->natoms;
 407         }
 408         atoms2md(top_global, ir, 0, NULL, start, homenr, mdatoms);
 409         update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
 410
 411         if (vsite)
 412         {
 413             set_vsite_top(vsite, *top, mdatoms, cr);
 414         }
 415     }
 416
 417     if (constr)
 418     {
 419         if (ir->eConstrAlg == econtSHAKE &&
 420             gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
 421         {
 422             gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
 423                       econstr_names[econtSHAKE], econstr_names[econtLINCS]);
 424         }
 425
 426         if (!DOMAINDECOMP(cr))
 427         {
 428             set_constraints(constr, *top, ir, mdatoms, cr);
 429         }
 430
 431         if (!ir->bContinuation)
 432         {
 433             /* Constrain the starting coordinates */
 434             dvdl_constr = 0;
 435             constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
 436                       ir, NULL, cr, -1, 0, mdatoms,
 437                       ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
 438                       ems->s.lambda[efptFEP], &dvdl_constr,
 439                       NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
 440         }
 441     }
 442
 443     if (PAR(cr))
 444     {
 445         *gstat = global_stat_init(ir);
 446     }
 447
 448     *outf = init_mdoutf(nfile, fnm, 0, cr, ir, NULL);
 449
 450     snew(*enerd, 1);
 451     init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
 452                   *enerd);
 453
 454     if (mdebin != NULL)
 455     {
 456         /* Init bin for energy stuff */
 457         *mdebin = init_mdebin((*outf)->fp_ene, top_global, ir, NULL);
 458     }
 459
 460     clear_rvec(mu_tot);
 461     calc_shifts(ems->s.box, fr->shift_vec);
 462 }
 463
 464 static void finish_em(t_commrec *cr, gmx_mdoutf_t *outf,
 465                       gmx_runtime_t *runtime, gmx_wallcycle_t wcycle)
 466 {
 467     if (!(cr->duty & DUTY_PME))
 468     {
 469         /* Tell the PME only node to finish */
 470         gmx_pme_send_finish(cr);
 471     }
 472
 473     done_mdoutf(outf);
 474
 475     em_time_end(runtime, wcycle);
 476 }
 477
 478 static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
 479 {
 480     em_state_t tmp;
 481
 482     tmp   = *ems1;
 483     *ems1 = *ems2;
 484     *ems2 = tmp;
 485 }
 486
 487 static void copy_em_coords(em_state_t *ems, t_state *state)
 488 {
 489     int i;
 490
 491     for (i = 0; (i < state->natoms); i++)
 492     {
 493         copy_rvec(ems->s.x[i], state->x[i]);
 494     }
 495 }
 496
 497 static void write_em_traj(FILE *fplog, t_commrec *cr,
 498                           gmx_mdoutf_t *outf,
 499                           gmx_bool bX, gmx_bool bF, const char *confout,
 500                           gmx_mtop_t *top_global,
 501                           t_inputrec *ir, gmx_large_int_t step,
 502                           em_state_t *state,
 503                           t_state *state_global, rvec *f_global)
 504 {
 505     int mdof_flags;
 506
 507     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 508     {
 509         copy_em_coords(state, state_global);
 510         f_global = state->f;
 511     }
 512
 513     mdof_flags = 0;
 514     if (bX)
 515     {
 516         mdof_flags |= MDOF_X;
 517     }
 518     if (bF)
 519     {
 520         mdof_flags |= MDOF_F;
 521     }
 522     write_traj(fplog, cr, outf, mdof_flags,
 523                top_global, step, (double)step,
 524                &state->s, state_global, state->f, f_global, NULL, NULL);
 525
 526     if (confout != NULL && MASTER(cr))
 527     {
 528         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 529         {
 530             /* Make molecules whole only for confout writing */
 531             do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
 532                         state_global->x);
 533         }
 534
 535         write_sto_conf_mtop(confout,
 536                             *top_global->name, top_global,
 537                             state_global->x, NULL, ir->ePBC, state_global->box);
 538     }
 539 }
 540
 541 static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
 542                        gmx_bool bMolPBC,
 543                        em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
 544                        gmx_constr_t constr, gmx_localtop_t *top,
 545                        t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 546                        gmx_large_int_t count)
 547
 548 {
 549     t_state *s1, *s2;
 550     int      i;
 551     int      start, end;
 552     rvec    *x1, *x2;
 553     real     dvdl_constr;
 554
 555     s1 = &ems1->s;
 556     s2 = &ems2->s;
 557
 558     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 559     {
 560         gmx_incons("state mismatch in do_em_step");
 561     }
 562
 563     s2->flags = s1->flags;
 564
 565     if (s2->nalloc != s1->nalloc)
 566     {
 567         s2->nalloc = s1->nalloc;
 568         srenew(s2->x, s1->nalloc);
 569         srenew(ems2->f,  s1->nalloc);
 570         if (s2->flags & (1<<estCGP))
 571         {
 572             srenew(s2->cg_p,  s1->nalloc);
 573         }
 574     }
 575
 576     s2->natoms = s1->natoms;
 577     copy_mat(s1->box, s2->box);
 578     /* Copy free energy state */
 579     for (i = 0; i < efptNR; i++)
 580     {
 581         s2->lambda[i] = s1->lambda[i];
 582     }
 583     copy_mat(s1->box, s2->box);
 584
 585     start = md->start;
 586     end   = md->start + md->homenr;
 587
 588     x1 = s1->x;
 589     x2 = s2->x;
 590
 591 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
 592     {
 593         int gf, i, m;
 594
 595         gf = 0;
 596 #pragma omp for schedule(static) nowait
 597         for (i = start; i < end; i++)
 598         {
 599             if (md->cFREEZE)
 600             {
 601                 gf = md->cFREEZE[i];
 602             }
 603             for (m = 0; m < DIM; m++)
 604             {
 605                 if (ir->opts.nFreeze[gf][m])
 606                 {
 607                     x2[i][m] = x1[i][m];
 608                 }
 609                 else
 610                 {
 611                     x2[i][m] = x1[i][m] + a*f[i][m];
 612                 }
 613             }
 614         }
 615
 616         if (s2->flags & (1<<estCGP))
 617         {
 618             /* Copy the CG p vector */
 619             x1 = s1->cg_p;
 620             x2 = s2->cg_p;
 621 #pragma omp for schedule(static) nowait
 622             for (i = start; i < end; i++)
 623             {
 624                 copy_rvec(x1[i], x2[i]);
 625             }
 626         }
 627
 628         if (DOMAINDECOMP(cr))
 629         {
 630             s2->ddp_count = s1->ddp_count;
 631             if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
 632             {
 633 #pragma omp barrier
 634                 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 635                 srenew(s2->cg_gl, s2->cg_gl_nalloc);
 636 #pragma omp barrier
 637             }
 638             s2->ncg_gl = s1->ncg_gl;
 639 #pragma omp for schedule(static) nowait
 640             for (i = 0; i < s2->ncg_gl; i++)
 641             {
 642                 s2->cg_gl[i] = s1->cg_gl[i];
 643             }
 644             s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 645         }
 646     }
 647
 648     if (constr)
 649     {
 650         wallcycle_start(wcycle, ewcCONSTR);
 651         dvdl_constr = 0;
 652         constrain(NULL, TRUE, TRUE, constr, &top->idef,
 653                   ir, NULL, cr, count, 0, md,
 654                   s1->x, s2->x, NULL, bMolPBC, s2->box,
 655                   s2->lambda[efptBONDED], &dvdl_constr,
 656                   NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
 657         wallcycle_stop(wcycle, ewcCONSTR);
 658     }
 659 }
 660
 661 static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
 662                                    gmx_mtop_t *top_global, t_inputrec *ir,
 663                                    em_state_t *ems, gmx_localtop_t *top,
 664                                    t_mdatoms *mdatoms, t_forcerec *fr,
 665                                    gmx_vsite_t *vsite, gmx_constr_t constr,
 666                                    t_nrnb *nrnb, gmx_wallcycle_t wcycle)
 667 {
 668     /* Repartition the domain decomposition */
 669     wallcycle_start(wcycle, ewcDOMDEC);
 670     dd_partition_system(fplog, step, cr, FALSE, 1,
 671                         NULL, top_global, ir,
 672                         &ems->s, &ems->f,
 673                         mdatoms, top, fr, vsite, NULL, constr,
 674                         nrnb, wcycle, FALSE);
 675     dd_store_state(cr->dd, &ems->s);
 676     wallcycle_stop(wcycle, ewcDOMDEC);
 677 }
 678
 679 static void evaluate_energy(FILE *fplog, t_commrec *cr,
 680                             gmx_mtop_t *top_global,
 681                             em_state_t *ems, gmx_localtop_t *top,
 682                             t_inputrec *inputrec,
 683                             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 684                             gmx_global_stat_t gstat,
 685                             gmx_vsite_t *vsite, gmx_constr_t constr,
 686                             t_fcdata *fcd,
 687                             t_graph *graph, t_mdatoms *mdatoms,
 688                             t_forcerec *fr, rvec mu_tot,
 689                             gmx_enerdata_t *enerd, tensor vir, tensor pres,
 690                             gmx_large_int_t count, gmx_bool bFirst)
 691 {
 692     real     t;
 693     gmx_bool bNS;
 694     int      nabnsb;
 695     tensor   force_vir, shake_vir, ekin;
 696     real     dvdl_constr, prescorr, enercorr, dvdlcorr;
 697     real     terminate = 0;
 698
 699     /* Set the time to the initial time, the time does not change during EM */
 700     t = inputrec->init_t;
 701
 702     if (bFirst ||
 703         (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
 704     {
 705         /* This the first state or an old state used before the last ns */
 706         bNS = TRUE;
 707     }
 708     else
 709     {
 710         bNS = FALSE;
 711         if (inputrec->nstlist > 0)
 712         {
 713             bNS = TRUE;
 714         }
 715         else if (inputrec->nstlist == -1)
 716         {
 717             nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x);
 718             if (PAR(cr))
 719             {
 720                 gmx_sumi(1, &nabnsb, cr);
 721             }
 722             bNS = (nabnsb > 0);
 723         }
 724     }
 725
 726     if (vsite)
 727     {
 728         construct_vsites(vsite, ems->s.x, 1, NULL,
 729                          top->idef.iparams, top->idef.il,
 730                          fr->ePBC, fr->bMolPBC, graph, cr, ems->s.box);
 731     }
 732
 733     if (DOMAINDECOMP(cr))
 734     {
 735         if (bNS)
 736         {
 737             /* Repartition the domain decomposition */
 738             em_dd_partition_system(fplog, count, cr, top_global, inputrec,
 739                                    ems, top, mdatoms, fr, vsite, constr,
 740                                    nrnb, wcycle);
 741         }
 742     }
 743
 744     /* Calc force & energy on new trial position  */
 745     /* do_force always puts the charge groups in the box and shifts again
 746      * We do not unshift, so molecules are always whole in congrad.c
 747      */
 748     do_force(fplog, cr, inputrec,
 749              count, nrnb, wcycle, top, &top_global->groups,
 750              ems->s.box, ems->s.x, &ems->s.hist,
 751              ems->f, force_vir, mdatoms, enerd, fcd,
 752              ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
 753              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
 754              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 755              (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 756
 757     /* Clear the unused shake virial and pressure */
 758     clear_mat(shake_vir);
 759     clear_mat(pres);
 760
 761     /* Communicate stuff when parallel */
 762     if (PAR(cr) && inputrec->eI != eiNM)
 763     {
 764         wallcycle_start(wcycle, ewcMoveE);
 765
 766         global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
 767                     inputrec, NULL, NULL, NULL, 1, &terminate,
 768                     top_global, &ems->s, FALSE,
 769                     CGLO_ENERGY |
 770                     CGLO_PRESSURE |
 771                     CGLO_CONSTRAINT |
 772                     CGLO_FIRSTITERATE);
 773
 774         wallcycle_stop(wcycle, ewcMoveE);
 775     }
 776
 777     /* Calculate long range corrections to pressure and energy */
 778     calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
 779                   pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
 780     enerd->term[F_DISPCORR] = enercorr;
 781     enerd->term[F_EPOT]    += enercorr;
 782     enerd->term[F_PRES]    += prescorr;
 783     enerd->term[F_DVDL]    += dvdlcorr;
 784
 785     ems->epot = enerd->term[F_EPOT];
 786
 787     if (constr)
 788     {
 789         /* Project out the constraint components of the force */
 790         wallcycle_start(wcycle, ewcCONSTR);
 791         dvdl_constr = 0;
 792         constrain(NULL, FALSE, FALSE, constr, &top->idef,
 793                   inputrec, NULL, cr, count, 0, mdatoms,
 794                   ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
 795                   ems->s.lambda[efptBONDED], &dvdl_constr,
 796                   NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0);
 797         if (fr->bSepDVDL && fplog)
 798         {
 799             gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr);
 800         }
 801         enerd->term[F_DVDL_CONSTR] += dvdl_constr;
 802         m_add(force_vir, shake_vir, vir);
 803         wallcycle_stop(wcycle, ewcCONSTR);
 804     }
 805     else
 806     {
 807         copy_mat(force_vir, vir);
 808     }
 809
 810     clear_mat(ekin);
 811     enerd->term[F_PRES] =
 812         calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
 813
 814     sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
 815
 816     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 817     {
 818         get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
 819     }
 820 }
 821
 822 static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 823                               gmx_mtop_t *mtop,
 824                               em_state_t *s_min, em_state_t *s_b)
 825 {
 826     rvec          *fm, *fb, *fmg;
 827     t_block       *cgs_gl;
 828     int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
 829     double         partsum;
 830     unsigned char *grpnrFREEZE;
 831
 832     if (debug)
 833     {
 834         fprintf(debug, "Doing reorder_partsum\n");
 835     }
 836
 837     fm = s_min->f;
 838     fb = s_b->f;
 839
 840     cgs_gl = dd_charge_groups_global(cr->dd);
 841     index  = cgs_gl->index;
 842
 843     /* Collect fm in a global vector fmg.
 844      * This conflicts with the spirit of domain decomposition,
 845      * but to fully optimize this a much more complicated algorithm is required.
 846      */
 847     snew(fmg, mtop->natoms);
 848
 849     ncg   = s_min->s.ncg_gl;
 850     cg_gl = s_min->s.cg_gl;
 851     i     = 0;
 852     for (c = 0; c < ncg; c++)
 853     {
 854         cg = cg_gl[c];
 855         a0 = index[cg];
 856         a1 = index[cg+1];
 857         for (a = a0; a < a1; a++)
 858         {
 859             copy_rvec(fm[i], fmg[a]);
 860             i++;
 861         }
 862     }
 863     gmx_sum(mtop->natoms*3, fmg[0], cr);
 864
 865     /* Now we will determine the part of the sum for the cgs in state s_b */
 866     ncg         = s_b->s.ncg_gl;
 867     cg_gl       = s_b->s.cg_gl;
 868     partsum     = 0;
 869     i           = 0;
 870     gf          = 0;
 871     grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 872     for (c = 0; c < ncg; c++)
 873     {
 874         cg = cg_gl[c];
 875         a0 = index[cg];
 876         a1 = index[cg+1];
 877         for (a = a0; a < a1; a++)
 878         {
 879             if (mdatoms->cFREEZE && grpnrFREEZE)
 880             {
 881                 gf = grpnrFREEZE[i];
 882             }
 883             for (m = 0; m < DIM; m++)
 884             {
 885                 if (!opts->nFreeze[gf][m])
 886                 {
 887                     partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 888                 }
 889             }
 890             i++;
 891         }
 892     }
 893
 894     sfree(fmg);
 895
 896     return partsum;
 897 }
 898
 899 static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 900                     gmx_mtop_t *mtop,
 901                     em_state_t *s_min, em_state_t *s_b)
 902 {
 903     rvec  *fm, *fb;
 904     double sum;
 905     int    gf, i, m;
 906
 907     /* This is just the classical Polak-Ribiere calculation of beta;
 908      * it looks a bit complicated since we take freeze groups into account,
 909      * and might have to sum it in parallel runs.
 910      */
 911
 912     if (!DOMAINDECOMP(cr) ||
 913         (s_min->s.ddp_count == cr->dd->ddp_count &&
 914          s_b->s.ddp_count   == cr->dd->ddp_count))
 915     {
 916         fm  = s_min->f;
 917         fb  = s_b->f;
 918         sum = 0;
 919         gf  = 0;
 920         /* This part of code can be incorrect with DD,
 921          * since the atom ordering in s_b and s_min might differ.
 922          */
 923         for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
 924         {
 925             if (mdatoms->cFREEZE)
 926             {
 927                 gf = mdatoms->cFREEZE[i];
 928             }
 929             for (m = 0; m < DIM; m++)
 930             {
 931                 if (!opts->nFreeze[gf][m])
 932                 {
 933                     sum += (fb[i][m] - fm[i][m])*fb[i][m];
 934                 }
 935             }
 936         }
 937     }
 938     else
 939     {
 940         /* We need to reorder cgs while summing */
 941         sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
 942     }
 943     if (PAR(cr))
 944     {
 945         gmx_sumd(1, &sum, cr);
 946     }
 947
 948     return sum/sqr(s_min->fnorm);
 949 }
 950
 951 double do_cg(FILE *fplog, t_commrec *cr,
 952              int nfile, const t_filenm fnm[],
 953              const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 954              int gmx_unused nstglobalcomm,
 955              gmx_vsite_t *vsite, gmx_constr_t constr,
 956              int gmx_unused stepout,
 957              t_inputrec *inputrec,
 958              gmx_mtop_t *top_global, t_fcdata *fcd,
 959              t_state *state_global,
 960              t_mdatoms *mdatoms,
 961              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 962              gmx_edsam_t gmx_unused ed,
 963              t_forcerec *fr,
 964              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 965              gmx_membed_t gmx_unused membed,
 966              real gmx_unused cpt_period, real gmx_unused max_hours,
 967              const char gmx_unused *deviceOptions,
 968              unsigned long gmx_unused Flags,
 969              gmx_runtime_t *runtime)
 970 {
 971     const char       *CG = "Polak-Ribiere Conjugate Gradients";
 972
 973     em_state_t       *s_min, *s_a, *s_b, *s_c;
 974     gmx_localtop_t   *top;
 975     gmx_enerdata_t   *enerd;
 976     rvec             *f;
 977     gmx_global_stat_t gstat;
 978     t_graph          *graph;
 979     rvec             *f_global, *p, *sf, *sfm;
 980     double            gpa, gpb, gpc, tmp, sum[2], minstep;
 981     real              fnormn;
 982     real              stepsize;
 983     real              a, b, c, beta = 0.0;
 984     real              epot_repl = 0;
 985     real              pnorm;
 986     t_mdebin         *mdebin;
 987     gmx_bool          converged, foundlower;
 988     rvec              mu_tot;
 989     gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
 990     tensor            vir, pres;
 991     int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
 992     gmx_mdoutf_t     *outf;
 993     int               i, m, gf, step, nminstep;
 994     real              terminate = 0;
 995
 996     step = 0;
 997
 998     s_min = init_em_state();
 999     s_a   = init_em_state();
1000     s_b   = init_em_state();
1001     s_c   = init_em_state();
1002
1003     /* Init em and store the local state in s_min */
1004     init_em(fplog, CG, cr, inputrec,
1005             state_global, top_global, s_min, &top, &f, &f_global,
1006             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1007             nfile, fnm, &outf, &mdebin);
1008
1009     /* Print to log file */
1010     print_em_start(fplog, cr, runtime, wcycle, CG);
1011
1012     /* Max number of steps */
1013     number_steps = inputrec->nsteps;
1014
1015     if (MASTER(cr))
1016     {
1017         sp_header(stderr, CG, inputrec->em_tol, number_steps);
1018     }
1019     if (fplog)
1020     {
1021         sp_header(fplog, CG, inputrec->em_tol, number_steps);
1022     }
1023
1024     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1025     /* do_force always puts the charge groups in the box and shifts again
1026      * We do not unshift, so molecules are always whole in congrad.c
1027      */
1028     evaluate_energy(fplog, cr,
1029                     top_global, s_min, top,
1030                     inputrec, nrnb, wcycle, gstat,
1031                     vsite, constr, fcd, graph, mdatoms, fr,
1032                     mu_tot, enerd, vir, pres, -1, TRUE);
1033     where();
1034
1035     if (MASTER(cr))
1036     {
1037         /* Copy stuff to the energy bin for easy printing etc. */
1038         upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1039                    mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1040                    NULL, NULL, vir, pres, NULL, mu_tot, constr);
1041
1042         print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1043         print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1044                    TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1045     }
1046     where();
1047
1048     /* Estimate/guess the initial stepsize */
1049     stepsize = inputrec->em_stepsize/s_min->fnorm;
1050
1051     if (MASTER(cr))
1052     {
1053         fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
1054                 s_min->fmax, s_min->a_fmax+1);
1055         fprintf(stderr, "   F-Norm            = %12.5e\n",
1056                 s_min->fnorm/sqrt(state_global->natoms));
1057         fprintf(stderr, "\n");
1058         /* and copy to the log file too... */
1059         fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
1060                 s_min->fmax, s_min->a_fmax+1);
1061         fprintf(fplog, "   F-Norm            = %12.5e\n",
1062                 s_min->fnorm/sqrt(state_global->natoms));
1063         fprintf(fplog, "\n");
1064     }
1065     /* Start the loop over CG steps.
1066      * Each successful step is counted, and we continue until
1067      * we either converge or reach the max number of steps.
1068      */
1069     converged = FALSE;
1070     for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
1071     {
1072
1073         /* start taking steps in a new direction
1074          * First time we enter the routine, beta=0, and the direction is
1075          * simply the negative gradient.
1076          */
1077
1078         /* Calculate the new direction in p, and the gradient in this direction, gpa */
1079         p   = s_min->s.cg_p;
1080         sf  = s_min->f;
1081         gpa = 0;
1082         gf  = 0;
1083         for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1084         {
1085             if (mdatoms->cFREEZE)
1086             {
1087                 gf = mdatoms->cFREEZE[i];
1088             }
1089             for (m = 0; m < DIM; m++)
1090             {
1091                 if (!inputrec->opts.nFreeze[gf][m])
1092                 {
1093                     p[i][m] = sf[i][m] + beta*p[i][m];
1094                     gpa    -= p[i][m]*sf[i][m];
1095                     /* f is negative gradient, thus the sign */
1096                 }
1097                 else
1098                 {
1099                     p[i][m] = 0;
1100                 }
1101             }
1102         }
1103
1104         /* Sum the gradient along the line across CPUs */
1105         if (PAR(cr))
1106         {
1107             gmx_sumd(1, &gpa, cr);
1108         }
1109
1110         /* Calculate the norm of the search vector */
1111         get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
1112
1113         /* Just in case stepsize reaches zero due to numerical precision... */
1114         if (stepsize <= 0)
1115         {
1116             stepsize = inputrec->em_stepsize/pnorm;
1117         }
1118
1119         /*
1120          * Double check the value of the derivative in the search direction.
1121          * If it is positive it must be due to the old information in the
1122          * CG formula, so just remove that and start over with beta=0.
1123          * This corresponds to a steepest descent step.
1124          */
1125         if (gpa > 0)
1126         {
1127             beta = 0;
1128             step--;   /* Don't count this step since we are restarting */
1129             continue; /* Go back to the beginning of the big for-loop */
1130         }
1131
1132         /* Calculate minimum allowed stepsize, before the average (norm)
1133          * relative change in coordinate is smaller than precision
1134          */
1135         minstep = 0;
1136         for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1137         {
1138             for (m = 0; m < DIM; m++)
1139             {
1140                 tmp = fabs(s_min->s.x[i][m]);
1141                 if (tmp < 1.0)
1142                 {
1143                     tmp = 1.0;
1144                 }
1145                 tmp      = p[i][m]/tmp;
1146                 minstep += tmp*tmp;
1147             }
1148         }
1149         /* Add up from all CPUs */
1150         if (PAR(cr))
1151         {
1152             gmx_sumd(1, &minstep, cr);
1153         }
1154
1155         minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1156
1157         if (stepsize < minstep)
1158         {
1159             converged = TRUE;
1160             break;
1161         }
1162
1163         /* Write coordinates if necessary */
1164         do_x = do_per_step(step, inputrec->nstxout);
1165         do_f = do_per_step(step, inputrec->nstfout);
1166
1167         write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
1168                       top_global, inputrec, step,
1169                       s_min, state_global, f_global);
1170
1171         /* Take a step downhill.
1172          * In theory, we should minimize the function along this direction.
1173          * That is quite possible, but it turns out to take 5-10 function evaluations
1174          * for each line. However, we dont really need to find the exact minimum -
1175          * it is much better to start a new CG step in a modified direction as soon
1176          * as we are close to it. This will save a lot of energy evaluations.
1177          *
1178          * In practice, we just try to take a single step.
1179          * If it worked (i.e. lowered the energy), we increase the stepsize but
1180          * the continue straight to the next CG step without trying to find any minimum.
1181          * If it didn't work (higher energy), there must be a minimum somewhere between
1182          * the old position and the new one.
1183          *
1184          * Due to the finite numerical accuracy, it turns out that it is a good idea
1185          * to even accept a SMALL increase in energy, if the derivative is still downhill.
1186          * This leads to lower final energies in the tests I've done. / Erik
1187          */
1188         s_a->epot = s_min->epot;
1189         a         = 0.0;
1190         c         = a + stepsize; /* reference position along line is zero */
1191
1192         if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1193         {
1194             em_dd_partition_system(fplog, step, cr, top_global, inputrec,
1195                                    s_min, top, mdatoms, fr, vsite, constr,
1196                                    nrnb, wcycle);
1197         }
1198
1199         /* Take a trial step (new coords in s_c) */
1200         do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
1201                    constr, top, nrnb, wcycle, -1);
1202
1203         neval++;
1204         /* Calculate energy for the trial step */
1205         evaluate_energy(fplog, cr,
1206                         top_global, s_c, top,
1207                         inputrec, nrnb, wcycle, gstat,
1208                         vsite, constr, fcd, graph, mdatoms, fr,
1209                         mu_tot, enerd, vir, pres, -1, FALSE);
1210
1211         /* Calc derivative along line */
1212         p   = s_c->s.cg_p;
1213         sf  = s_c->f;
1214         gpc = 0;
1215         for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1216         {
1217             for (m = 0; m < DIM; m++)
1218             {
1219                 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1220             }
1221         }
1222         /* Sum the gradient along the line across CPUs */
1223         if (PAR(cr))
1224         {
1225             gmx_sumd(1, &gpc, cr);
1226         }
1227
1228         /* This is the max amount of increase in energy we tolerate */
1229         tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1230
1231         /* Accept the step if the energy is lower, or if it is not significantly higher
1232          * and the line derivative is still negative.
1233          */
1234         if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1235         {
1236             foundlower = TRUE;
1237             /* Great, we found a better energy. Increase step for next iteration
1238              * if we are still going down, decrease it otherwise
1239              */
1240             if (gpc < 0)
1241             {
1242                 stepsize *= 1.618034; /* The golden section */
1243             }
1244             else
1245             {
1246                 stepsize *= 0.618034; /* 1/golden section */
1247             }
1248         }
1249         else
1250         {
1251             /* New energy is the same or higher. We will have to do some work
1252              * to find a smaller value in the interval. Take smaller step next time!
1253              */
1254             foundlower = FALSE;
1255             stepsize  *= 0.618034;
1256         }
1257
1258
1259
1260
1261         /* OK, if we didn't find a lower value we will have to locate one now - there must
1262          * be one in the interval [a=0,c].
1263          * The same thing is valid here, though: Don't spend dozens of iterations to find
1264          * the line minimum. We try to interpolate based on the derivative at the endpoints,
1265          * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1266          *
1267          * I also have a safeguard for potentially really patological functions so we never
1268          * take more than 20 steps before we give up ...
1269          *
1270          * If we already found a lower value we just skip this step and continue to the update.
1271          */
1272         if (!foundlower)
1273         {
1274             nminstep = 0;
1275
1276             do
1277             {
1278                 /* Select a new trial point.
1279                  * If the derivatives at points a & c have different sign we interpolate to zero,
1280                  * otherwise just do a bisection.
1281                  */
1282                 if (gpa < 0 && gpc > 0)
1283                 {
1284                     b = a + gpa*(a-c)/(gpc-gpa);
1285                 }
1286                 else
1287                 {
1288                     b = 0.5*(a+c);
1289                 }
1290
1291                 /* safeguard if interpolation close to machine accuracy causes errors:
1292                  * never go outside the interval
1293                  */
1294                 if (b <= a || b >= c)
1295                 {
1296                     b = 0.5*(a+c);
1297                 }
1298
1299                 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1300                 {
1301                     /* Reload the old state */
1302                     em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
1303                                            s_min, top, mdatoms, fr, vsite, constr,
1304                                            nrnb, wcycle);
1305                 }
1306
1307                 /* Take a trial step to this new point - new coords in s_b */
1308                 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
1309                            constr, top, nrnb, wcycle, -1);
1310
1311                 neval++;
1312                 /* Calculate energy for the trial step */
1313                 evaluate_energy(fplog, cr,
1314                                 top_global, s_b, top,
1315                                 inputrec, nrnb, wcycle, gstat,
1316                                 vsite, constr, fcd, graph, mdatoms, fr,
1317                                 mu_tot, enerd, vir, pres, -1, FALSE);
1318
1319                 /* p does not change within a step, but since the domain decomposition
1320                  * might change, we have to use cg_p of s_b here.
1321                  */
1322                 p   = s_b->s.cg_p;
1323                 sf  = s_b->f;
1324                 gpb = 0;
1325                 for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
1326                 {
1327                     for (m = 0; m < DIM; m++)
1328                     {
1329                         gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1330                     }
1331                 }
1332                 /* Sum the gradient along the line across CPUs */
1333                 if (PAR(cr))
1334                 {
1335                     gmx_sumd(1, &gpb, cr);
1336                 }
1337
1338                 if (debug)
1339                 {
1340                     fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1341                             s_a->epot, s_b->epot, s_c->epot, gpb);
1342                 }
1343
1344                 epot_repl = s_b->epot;
1345
1346                 /* Keep one of the intervals based on the value of the derivative at the new point */
1347                 if (gpb > 0)
1348                 {
1349                     /* Replace c endpoint with b */
1350                     swap_em_state(s_b, s_c);
1351                     c   = b;
1352                     gpc = gpb;
1353                 }
1354                 else
1355                 {
1356                     /* Replace a endpoint with b */
1357                     swap_em_state(s_b, s_a);
1358                     a   = b;
1359                     gpa = gpb;
1360                 }
1361
1362                 /*
1363                  * Stop search as soon as we find a value smaller than the endpoints.
1364                  * Never run more than 20 steps, no matter what.
1365                  */
1366                 nminstep++;
1367             }
1368             while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1369                    (nminstep < 20));
1370
1371             if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1372                 nminstep >= 20)
1373             {
1374                 /* OK. We couldn't find a significantly lower energy.
1375                  * If beta==0 this was steepest descent, and then we give up.
1376                  * If not, set beta=0 and restart with steepest descent before quitting.
1377                  */
1378                 if (beta == 0.0)
1379                 {
1380                     /* Converged */
1381                     converged = TRUE;
1382                     break;
1383                 }
1384                 else
1385                 {
1386                     /* Reset memory before giving up */
1387                     beta = 0.0;
1388                     continue;
1389                 }
1390             }
1391
1392             /* Select min energy state of A & C, put the best in B.
1393              */
1394             if (s_c->epot < s_a->epot)
1395             {
1396                 if (debug)
1397                 {
1398                     fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
1399                             s_c->epot, s_a->epot);
1400                 }
1401                 swap_em_state(s_b, s_c);
1402                 gpb = gpc;
1403                 b   = c;
1404             }
1405             else
1406             {
1407                 if (debug)
1408                 {
1409                     fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
1410                             s_a->epot, s_c->epot);
1411                 }
1412                 swap_em_state(s_b, s_a);
1413                 gpb = gpa;
1414                 b   = a;
1415             }
1416
1417         }
1418         else
1419         {
1420             if (debug)
1421             {
1422                 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
1423                         s_c->epot);
1424             }
1425             swap_em_state(s_b, s_c);
1426             gpb = gpc;
1427             b   = c;
1428         }
1429
1430         /* new search direction */
1431         /* beta = 0 means forget all memory and restart with steepest descents. */
1432         if (nstcg && ((step % nstcg) == 0))
1433         {
1434             beta = 0.0;
1435         }
1436         else
1437         {
1438             /* s_min->fnorm cannot be zero, because then we would have converged
1439              * and broken out.
1440              */
1441
1442             /* Polak-Ribiere update.
1443              * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1444              */
1445             beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1446         }
1447         /* Limit beta to prevent oscillations */
1448         if (fabs(beta) > 5.0)
1449         {
1450             beta = 0.0;
1451         }
1452
1453
1454         /* update positions */
1455         swap_em_state(s_min, s_b);
1456         gpa = gpb;
1457
1458         /* Print it if necessary */
1459         if (MASTER(cr))
1460         {
1461             if (bVerbose)
1462             {
1463                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1464                         step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms),
1465                         s_min->fmax, s_min->a_fmax+1);
1466             }
1467             /* Store the new (lower) energies */
1468             upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1469                        mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1470                        NULL, NULL, vir, pres, NULL, mu_tot, constr);
1471
1472             do_log = do_per_step(step, inputrec->nstlog);
1473             do_ene = do_per_step(step, inputrec->nstenergy);
1474             if (do_log)
1475             {
1476                 print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1477             }
1478             print_ebin(outf->fp_ene, do_ene, FALSE, FALSE,
1479                        do_log ? fplog : NULL, step, step, eprNORMAL,
1480                        TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1481         }
1482
1483         /* Stop when the maximum force lies below tolerance.
1484          * If we have reached machine precision, converged is already set to true.
1485          */
1486         converged = converged || (s_min->fmax < inputrec->em_tol);
1487
1488     } /* End of the loop */
1489
1490     if (converged)
1491     {
1492         step--; /* we never took that last step in this case */
1493
1494     }
1495     if (s_min->fmax > inputrec->em_tol)
1496     {
1497         if (MASTER(cr))
1498         {
1499             warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
1500             warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
1501         }
1502         converged = FALSE;
1503     }
1504
1505     if (MASTER(cr))
1506     {
1507         /* If we printed energy and/or logfile last step (which was the last step)
1508          * we don't have to do it again, but otherwise print the final values.
1509          */
1510         if (!do_log)
1511         {
1512             /* Write final value to log since we didn't do anything the last step */
1513             print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
1514         }
1515         if (!do_ene || !do_log)
1516         {
1517             /* Write final energy file entries */
1518             print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE,
1519                        !do_log ? fplog : NULL, step, step, eprNORMAL,
1520                        TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1521         }
1522     }
1523
1524     /* Print some stuff... */
1525     if (MASTER(cr))
1526     {
1527         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1528     }
1529
1530     /* IMPORTANT!
1531      * For accurate normal mode calculation it is imperative that we
1532      * store the last conformation into the full precision binary trajectory.
1533      *
1534      * However, we should only do it if we did NOT already write this step
1535      * above (which we did if do_x or do_f was true).
1536      */
1537     do_x = !do_per_step(step, inputrec->nstxout);
1538     do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1539
1540     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
1541                   top_global, inputrec, step,
1542                   s_min, state_global, f_global);
1543
1544     fnormn = s_min->fnorm/sqrt(state_global->natoms);
1545
1546     if (MASTER(cr))
1547     {
1548         print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
1549                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1550         print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
1551                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1552
1553         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1554     }
1555
1556     finish_em(cr, outf, runtime, wcycle);
1557
1558     /* To print the actual number of steps we needed somewhere */
1559     runtime->nsteps_done = step;
1560
1561     return 0;
1562 } /* That's all folks */
1563
1564
1565 double do_lbfgs(FILE *fplog, t_commrec *cr,
1566                 int nfile, const t_filenm fnm[],
1567                 const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
1568                 int gmx_unused nstglobalcomm,
1569                 gmx_vsite_t *vsite, gmx_constr_t constr,
1570                 int gmx_unused stepout,
1571                 t_inputrec *inputrec,
1572                 gmx_mtop_t *top_global, t_fcdata *fcd,
1573                 t_state *state,
1574                 t_mdatoms *mdatoms,
1575                 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1576                 gmx_edsam_t gmx_unused ed,
1577                 t_forcerec *fr,
1578                 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
1579                 gmx_membed_t gmx_unused membed,
1580                 real gmx_unused cpt_period, real gmx_unused max_hours,
1581                 const char gmx_unused *deviceOptions,
1582                 unsigned long gmx_unused Flags,
1583                 gmx_runtime_t *runtime)
1584 {
1585     static const char *LBFGS = "Low-Memory BFGS Minimizer";
1586     em_state_t         ems;
1587     gmx_localtop_t    *top;
1588     gmx_enerdata_t    *enerd;
1589     rvec              *f;
1590     gmx_global_stat_t  gstat;
1591     t_graph           *graph;
1592     rvec              *f_global;
1593     int                ncorr, nmaxcorr, point, cp, neval, nminstep;
1594     double             stepsize, gpa, gpb, gpc, tmp, minstep;
1595     real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
1596     real              *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
1597     real               a, b, c, maxdelta, delta;
1598     real               diag, Epot0, Epot, EpotA, EpotB, EpotC;
1599     real               dgdx, dgdg, sq, yr, beta;
1600     t_mdebin          *mdebin;
1601     gmx_bool           converged, first;
1602     rvec               mu_tot;
1603     real               fnorm, fmax;
1604     gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
1605     tensor             vir, pres;
1606     int                start, end, number_steps;
1607     gmx_mdoutf_t      *outf;
1608     int                i, k, m, n, nfmax, gf, step;
1609     int                mdof_flags;
1610     /* not used */
1611     real               terminate;
1612
1613     if (PAR(cr))
1614     {
1615         gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
1616     }
1617
1618     if (NULL != constr)
1619     {
1620         gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1621     }
1622
1623     n        = 3*state->natoms;
1624     nmaxcorr = inputrec->nbfgscorr;
1625
1626     /* Allocate memory */
1627     /* Use pointers to real so we dont have to loop over both atoms and
1628      * dimensions all the time...
1629      * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1630      * that point to the same memory.
1631      */
1632     snew(xa, n);
1633     snew(xb, n);
1634     snew(xc, n);
1635     snew(fa, n);
1636     snew(fb, n);
1637     snew(fc, n);
1638     snew(frozen, n);
1639
1640     snew(p, n);
1641     snew(lastx, n);
1642     snew(lastf, n);
1643     snew(rho, nmaxcorr);
1644     snew(alpha, nmaxcorr);
1645
1646     snew(dx, nmaxcorr);
1647     for (i = 0; i < nmaxcorr; i++)
1648     {
1649         snew(dx[i], n);
1650     }
1651
1652     snew(dg, nmaxcorr);
1653     for (i = 0; i < nmaxcorr; i++)
1654     {
1655         snew(dg[i], n);
1656     }
1657
1658     step  = 0;
1659     neval = 0;
1660
1661     /* Init em */
1662     init_em(fplog, LBFGS, cr, inputrec,
1663             state, top_global, &ems, &top, &f, &f_global,
1664             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1665             nfile, fnm, &outf, &mdebin);
1666     /* Do_lbfgs is not completely updated like do_steep and do_cg,
1667      * so we free some memory again.
1668      */
1669     sfree(ems.s.x);
1670     sfree(ems.f);
1671
1672     xx = (real *)state->x;
1673     ff = (real *)f;
1674
1675     start = mdatoms->start;
1676     end   = mdatoms->homenr + start;
1677
1678     /* Print to log file */
1679     print_em_start(fplog, cr, runtime, wcycle, LBFGS);
1680
1681     do_log = do_ene = do_x = do_f = TRUE;
1682
1683     /* Max number of steps */
1684     number_steps = inputrec->nsteps;
1685
1686     /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1687     gf = 0;
1688     for (i = start; i < end; i++)
1689     {
1690         if (mdatoms->cFREEZE)
1691         {
1692             gf = mdatoms->cFREEZE[i];
1693         }
1694         for (m = 0; m < DIM; m++)
1695         {
1696             frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
1697         }
1698     }
1699     if (MASTER(cr))
1700     {
1701         sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1702     }
1703     if (fplog)
1704     {
1705         sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1706     }
1707
1708     if (vsite)
1709     {
1710         construct_vsites(vsite, state->x, 1, NULL,
1711                          top->idef.iparams, top->idef.il,
1712                          fr->ePBC, fr->bMolPBC, graph, cr, state->box);
1713     }
1714
1715     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1716     /* do_force always puts the charge groups in the box and shifts again
1717      * We do not unshift, so molecules are always whole
1718      */
1719     neval++;
1720     ems.s.x = state->x;
1721     ems.f   = f;
1722     evaluate_energy(fplog, cr,
1723                     top_global, &ems, top,
1724                     inputrec, nrnb, wcycle, gstat,
1725                     vsite, constr, fcd, graph, mdatoms, fr,
1726                     mu_tot, enerd, vir, pres, -1, TRUE);
1727     where();
1728
1729     if (MASTER(cr))
1730     {
1731         /* Copy stuff to the energy bin for easy printing etc. */
1732         upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1733                    mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
1734                    NULL, NULL, vir, pres, NULL, mu_tot, constr);
1735
1736         print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
1737         print_ebin(outf->fp_ene, TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1738                    TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1739     }
1740     where();
1741
1742     /* This is the starting energy */
1743     Epot = enerd->term[F_EPOT];
1744
1745     fnorm = ems.fnorm;
1746     fmax  = ems.fmax;
1747     nfmax = ems.a_fmax;
1748
1749     /* Set the initial step.
1750      * since it will be multiplied by the non-normalized search direction
1751      * vector (force vector the first time), we scale it by the
1752      * norm of the force.
1753      */
1754
1755     if (MASTER(cr))
1756     {
1757         fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1758         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
1759         fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrt(state->natoms));
1760         fprintf(stderr, "\n");
1761         /* and copy to the log file too... */
1762         fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1763         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
1764         fprintf(fplog, "   F-Norm            = %12.5e\n", fnorm/sqrt(state->natoms));
1765         fprintf(fplog, "\n");
1766     }
1767
1768     point = 0;
1769     for (i = 0; i < n; i++)
1770     {
1771         if (!frozen[i])
1772         {
1773             dx[point][i] = ff[i]; /* Initial search direction */
1774         }
1775         else
1776         {
1777             dx[point][i] = 0;
1778         }
1779     }
1780
1781     stepsize  = 1.0/fnorm;
1782     converged = FALSE;
1783
1784     /* Start the loop over BFGS steps.
1785      * Each successful step is counted, and we continue until
1786      * we either converge or reach the max number of steps.
1787      */
1788
1789     ncorr = 0;
1790
1791     /* Set the gradient from the force */
1792     converged = FALSE;
1793     for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
1794     {
1795
1796         /* Write coordinates if necessary */
1797         do_x = do_per_step(step, inputrec->nstxout);
1798         do_f = do_per_step(step, inputrec->nstfout);
1799
1800         mdof_flags = 0;
1801         if (do_x)
1802         {
1803             mdof_flags |= MDOF_X;
1804         }
1805
1806         if (do_f)
1807         {
1808             mdof_flags |= MDOF_F;
1809         }
1810
1811         write_traj(fplog, cr, outf, mdof_flags,
1812                    top_global, step, (real)step, state, state, f, f, NULL, NULL);
1813
1814         /* Do the linesearching in the direction dx[point][0..(n-1)] */
1815
1816         /* pointer to current direction - point=0 first time here */
1817         s = dx[point];
1818
1819         /* calculate line gradient */
1820         for (gpa = 0, i = 0; i < n; i++)
1821         {
1822             gpa -= s[i]*ff[i];
1823         }
1824
1825         /* Calculate minimum allowed stepsize, before the average (norm)
1826          * relative change in coordinate is smaller than precision
1827          */
1828         for (minstep = 0, i = 0; i < n; i++)
1829         {
1830             tmp = fabs(xx[i]);
1831             if (tmp < 1.0)
1832             {
1833                 tmp = 1.0;
1834             }
1835             tmp      = s[i]/tmp;
1836             minstep += tmp*tmp;
1837         }
1838         minstep = GMX_REAL_EPS/sqrt(minstep/n);
1839
1840         if (stepsize < minstep)
1841         {
1842             converged = TRUE;
1843             break;
1844         }
1845
1846         /* Store old forces and coordinates */
1847         for (i = 0; i < n; i++)
1848         {
1849             lastx[i] = xx[i];
1850             lastf[i] = ff[i];
1851         }
1852         Epot0 = Epot;
1853
1854         first = TRUE;
1855
1856         for (i = 0; i < n; i++)
1857         {
1858             xa[i] = xx[i];
1859         }
1860
1861         /* Take a step downhill.
1862          * In theory, we should minimize the function along this direction.
1863          * That is quite possible, but it turns out to take 5-10 function evaluations
1864          * for each line. However, we dont really need to find the exact minimum -
1865          * it is much better to start a new BFGS step in a modified direction as soon
1866          * as we are close to it. This will save a lot of energy evaluations.
1867          *
1868          * In practice, we just try to take a single step.
1869          * If it worked (i.e. lowered the energy), we increase the stepsize but
1870          * the continue straight to the next BFGS step without trying to find any minimum.
1871          * If it didn't work (higher energy), there must be a minimum somewhere between
1872          * the old position and the new one.
1873          *
1874          * Due to the finite numerical accuracy, it turns out that it is a good idea
1875          * to even accept a SMALL increase in energy, if the derivative is still downhill.
1876          * This leads to lower final energies in the tests I've done. / Erik
1877          */
1878         foundlower = FALSE;
1879         EpotA      = Epot0;
1880         a          = 0.0;
1881         c          = a + stepsize; /* reference position along line is zero */
1882
1883         /* Check stepsize first. We do not allow displacements
1884          * larger than emstep.
1885          */
1886         do
1887         {
1888             c        = a + stepsize;
1889             maxdelta = 0;
1890             for (i = 0; i < n; i++)
1891             {
1892                 delta = c*s[i];
1893                 if (delta > maxdelta)
1894                 {
1895                     maxdelta = delta;
1896                 }
1897             }
1898             if (maxdelta > inputrec->em_stepsize)
1899             {
1900                 stepsize *= 0.1;
1901             }
1902         }
1903         while (maxdelta > inputrec->em_stepsize);
1904
1905         /* Take a trial step */
1906         for (i = 0; i < n; i++)
1907         {
1908             xc[i] = lastx[i] + c*s[i];
1909         }
1910
1911         neval++;
1912         /* Calculate energy for the trial step */
1913         ems.s.x = (rvec *)xc;
1914         ems.f   = (rvec *)fc;
1915         evaluate_energy(fplog, cr,
1916                         top_global, &ems, top,
1917                         inputrec, nrnb, wcycle, gstat,
1918                         vsite, constr, fcd, graph, mdatoms, fr,
1919                         mu_tot, enerd, vir, pres, step, FALSE);
1920         EpotC = ems.epot;
1921
1922         /* Calc derivative along line */
1923         for (gpc = 0, i = 0; i < n; i++)
1924         {
1925             gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1926         }
1927         /* Sum the gradient along the line across CPUs */
1928         if (PAR(cr))
1929         {
1930             gmx_sumd(1, &gpc, cr);
1931         }
1932
1933         /* This is the max amount of increase in energy we tolerate */
1934         tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
1935
1936         /* Accept the step if the energy is lower, or if it is not significantly higher
1937          * and the line derivative is still negative.
1938          */
1939         if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
1940         {
1941             foundlower = TRUE;
1942             /* Great, we found a better energy. Increase step for next iteration
1943              * if we are still going down, decrease it otherwise
1944              */
1945             if (gpc < 0)
1946             {
1947                 stepsize *= 1.618034; /* The golden section */
1948             }
1949             else
1950             {
1951                 stepsize *= 0.618034; /* 1/golden section */
1952             }
1953         }
1954         else
1955         {
1956             /* New energy is the same or higher. We will have to do some work
1957              * to find a smaller value in the interval. Take smaller step next time!
1958              */
1959             foundlower = FALSE;
1960             stepsize  *= 0.618034;
1961         }
1962
1963         /* OK, if we didn't find a lower value we will have to locate one now - there must
1964          * be one in the interval [a=0,c].
1965          * The same thing is valid here, though: Don't spend dozens of iterations to find
1966          * the line minimum. We try to interpolate based on the derivative at the endpoints,
1967          * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1968          *
1969          * I also have a safeguard for potentially really patological functions so we never
1970          * take more than 20 steps before we give up ...
1971          *
1972          * If we already found a lower value we just skip this step and continue to the update.
1973          */
1974
1975         if (!foundlower)
1976         {
1977
1978             nminstep = 0;
1979             do
1980             {
1981                 /* Select a new trial point.
1982                  * If the derivatives at points a & c have different sign we interpolate to zero,
1983                  * otherwise just do a bisection.
1984                  */
1985
1986                 if (gpa < 0 && gpc > 0)
1987                 {
1988                     b = a + gpa*(a-c)/(gpc-gpa);
1989                 }
1990                 else
1991                 {
1992                     b = 0.5*(a+c);
1993                 }
1994
1995                 /* safeguard if interpolation close to machine accuracy causes errors:
1996                  * never go outside the interval
1997                  */
1998                 if (b <= a || b >= c)
1999                 {
2000                     b = 0.5*(a+c);
2001                 }
2002
2003                 /* Take a trial step */
2004                 for (i = 0; i < n; i++)
2005                 {
2006                     xb[i] = lastx[i] + b*s[i];
2007                 }
2008
2009                 neval++;
2010                 /* Calculate energy for the trial step */
2011                 ems.s.x = (rvec *)xb;
2012                 ems.f   = (rvec *)fb;
2013                 evaluate_energy(fplog, cr,
2014                                 top_global, &ems, top,
2015                                 inputrec, nrnb, wcycle, gstat,
2016                                 vsite, constr, fcd, graph, mdatoms, fr,
2017                                 mu_tot, enerd, vir, pres, step, FALSE);
2018                 EpotB = ems.epot;
2019
2020                 fnorm = ems.fnorm;
2021
2022                 for (gpb = 0, i = 0; i < n; i++)
2023                 {
2024                     gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
2025
2026                 }
2027                 /* Sum the gradient along the line across CPUs */
2028                 if (PAR(cr))
2029                 {
2030                     gmx_sumd(1, &gpb, cr);
2031                 }
2032
2033                 /* Keep one of the intervals based on the value of the derivative at the new point */
2034                 if (gpb > 0)
2035                 {
2036                     /* Replace c endpoint with b */
2037                     EpotC = EpotB;
2038                     c     = b;
2039                     gpc   = gpb;
2040                     /* swap coord pointers b/c */
2041                     xtmp = xb;
2042                     ftmp = fb;
2043                     xb   = xc;
2044                     fb   = fc;
2045                     xc   = xtmp;
2046                     fc   = ftmp;
2047                 }
2048                 else
2049                 {
2050                     /* Replace a endpoint with b */
2051                     EpotA = EpotB;
2052                     a     = b;
2053                     gpa   = gpb;
2054                     /* swap coord pointers a/b */
2055                     xtmp = xb;
2056                     ftmp = fb;
2057                     xb   = xa;
2058                     fb   = fa;
2059                     xa   = xtmp;
2060                     fa   = ftmp;
2061                 }
2062
2063                 /*
2064                  * Stop search as soon as we find a value smaller than the endpoints,
2065                  * or if the tolerance is below machine precision.
2066                  * Never run more than 20 steps, no matter what.
2067                  */
2068                 nminstep++;
2069             }
2070             while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
2071
2072             if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
2073             {
2074                 /* OK. We couldn't find a significantly lower energy.
2075                  * If ncorr==0 this was steepest descent, and then we give up.
2076                  * If not, reset memory to restart as steepest descent before quitting.
2077                  */
2078                 if (ncorr == 0)
2079                 {
2080                     /* Converged */
2081                     converged = TRUE;
2082                     break;
2083                 }
2084                 else
2085                 {
2086                     /* Reset memory */
2087                     ncorr = 0;
2088                     /* Search in gradient direction */
2089                     for (i = 0; i < n; i++)
2090                     {
2091                         dx[point][i] = ff[i];
2092                     }
2093                     /* Reset stepsize */
2094                     stepsize = 1.0/fnorm;
2095                     continue;
2096                 }
2097             }
2098
2099             /* Select min energy state of A & C, put the best in xx/ff/Epot
2100              */
2101             if (EpotC < EpotA)
2102             {
2103                 Epot = EpotC;
2104                 /* Use state C */
2105                 for (i = 0; i < n; i++)
2106                 {
2107                     xx[i] = xc[i];
2108                     ff[i] = fc[i];
2109                 }
2110                 stepsize = c;
2111             }
2112             else
2113             {
2114                 Epot = EpotA;
2115                 /* Use state A */
2116                 for (i = 0; i < n; i++)
2117                 {
2118                     xx[i] = xa[i];
2119                     ff[i] = fa[i];
2120                 }
2121                 stepsize = a;
2122             }
2123
2124         }
2125         else
2126         {
2127             /* found lower */
2128             Epot = EpotC;
2129             /* Use state C */
2130             for (i = 0; i < n; i++)
2131             {
2132                 xx[i] = xc[i];
2133                 ff[i] = fc[i];
2134             }
2135             stepsize = c;
2136         }
2137
2138         /* Update the memory information, and calculate a new
2139          * approximation of the inverse hessian
2140          */
2141
2142         /* Have new data in Epot, xx, ff */
2143         if (ncorr < nmaxcorr)
2144         {
2145             ncorr++;
2146         }
2147
2148         for (i = 0; i < n; i++)
2149         {
2150             dg[point][i]  = lastf[i]-ff[i];
2151             dx[point][i] *= stepsize;
2152         }
2153
2154         dgdg = 0;
2155         dgdx = 0;
2156         for (i = 0; i < n; i++)
2157         {
2158             dgdg += dg[point][i]*dg[point][i];
2159             dgdx += dg[point][i]*dx[point][i];
2160         }
2161
2162         diag = dgdx/dgdg;
2163
2164         rho[point] = 1.0/dgdx;
2165         point++;
2166
2167         if (point >= nmaxcorr)
2168         {
2169             point = 0;
2170         }
2171
2172         /* Update */
2173         for (i = 0; i < n; i++)
2174         {
2175             p[i] = ff[i];
2176         }
2177
2178         cp = point;
2179
2180         /* Recursive update. First go back over the memory points */
2181         for (k = 0; k < ncorr; k++)
2182         {
2183             cp--;
2184             if (cp < 0)
2185             {
2186                 cp = ncorr-1;
2187             }
2188
2189             sq = 0;
2190             for (i = 0; i < n; i++)
2191             {
2192                 sq += dx[cp][i]*p[i];
2193             }
2194
2195             alpha[cp] = rho[cp]*sq;
2196
2197             for (i = 0; i < n; i++)
2198             {
2199                 p[i] -= alpha[cp]*dg[cp][i];
2200             }
2201         }
2202
2203         for (i = 0; i < n; i++)
2204         {
2205             p[i] *= diag;
2206         }
2207
2208         /* And then go forward again */
2209         for (k = 0; k < ncorr; k++)
2210         {
2211             yr = 0;
2212             for (i = 0; i < n; i++)
2213             {
2214                 yr += p[i]*dg[cp][i];
2215             }
2216
2217             beta = rho[cp]*yr;
2218             beta = alpha[cp]-beta;
2219
2220             for (i = 0; i < n; i++)
2221             {
2222                 p[i] += beta*dx[cp][i];
2223             }
2224
2225             cp++;
2226             if (cp >= ncorr)
2227             {
2228                 cp = 0;
2229             }
2230         }
2231
2232         for (i = 0; i < n; i++)
2233         {
2234             if (!frozen[i])
2235             {
2236                 dx[point][i] = p[i];
2237             }
2238             else
2239             {
2240                 dx[point][i] = 0;
2241             }
2242         }
2243
2244         stepsize = 1.0;
2245
2246         /* Test whether the convergence criterion is met */
2247         get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
2248
2249         /* Print it if necessary */
2250         if (MASTER(cr))
2251         {
2252             if (bVerbose)
2253             {
2254                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2255                         step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1);
2256             }
2257             /* Store the new (lower) energies */
2258             upd_mdebin(mdebin, FALSE, FALSE, (double)step,
2259                        mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
2260                        NULL, NULL, vir, pres, NULL, mu_tot, constr);
2261             do_log = do_per_step(step, inputrec->nstlog);
2262             do_ene = do_per_step(step, inputrec->nstenergy);
2263             if (do_log)
2264             {
2265                 print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
2266             }
2267             print_ebin(outf->fp_ene, do_ene, FALSE, FALSE,
2268                        do_log ? fplog : NULL, step, step, eprNORMAL,
2269                        TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2270         }
2271
2272         /* Stop when the maximum force lies below tolerance.
2273          * If we have reached machine precision, converged is already set to true.
2274          */
2275
2276         converged = converged || (fmax < inputrec->em_tol);
2277
2278     } /* End of the loop */
2279
2280     if (converged)
2281     {
2282         step--; /* we never took that last step in this case */
2283
2284     }
2285     if (fmax > inputrec->em_tol)
2286     {
2287         if (MASTER(cr))
2288         {
2289             warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
2290             warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
2291         }
2292         converged = FALSE;
2293     }
2294
2295     /* If we printed energy and/or logfile last step (which was the last step)
2296      * we don't have to do it again, but otherwise print the final values.
2297      */
2298     if (!do_log) /* Write final value to log since we didn't do anythin last step */
2299     {
2300         print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
2301     }
2302     if (!do_ene || !do_log) /* Write final energy file entries */
2303     {
2304         print_ebin(outf->fp_ene, !do_ene, FALSE, FALSE,
2305                    !do_log ? fplog : NULL, step, step, eprNORMAL,
2306                    TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2307     }
2308
2309     /* Print some stuff... */
2310     if (MASTER(cr))
2311     {
2312         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2313     }
2314
2315     /* IMPORTANT!
2316      * For accurate normal mode calculation it is imperative that we
2317      * store the last conformation into the full precision binary trajectory.
2318      *
2319      * However, we should only do it if we did NOT already write this step
2320      * above (which we did if do_x or do_f was true).
2321      */
2322     do_x = !do_per_step(step, inputrec->nstxout);
2323     do_f = !do_per_step(step, inputrec->nstfout);
2324     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
2325                   top_global, inputrec, step,
2326                   &ems, state, f);
2327
2328     if (MASTER(cr))
2329     {
2330         print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
2331                         number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
2332         print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
2333                         number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
2334
2335         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2336     }
2337
2338     finish_em(cr, outf, runtime, wcycle);
2339
2340     /* To print the actual number of steps we needed somewhere */
2341     runtime->nsteps_done = step;
2342
2343     return 0;
2344 } /* That's all folks */
2345
2346
2347 double do_steep(FILE *fplog, t_commrec *cr,
2348                 int nfile, const t_filenm fnm[],
2349                 const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
2350                 int gmx_unused nstglobalcomm,
2351                 gmx_vsite_t *vsite, gmx_constr_t constr,
2352                 int gmx_unused stepout,
2353                 t_inputrec *inputrec,
2354                 gmx_mtop_t *top_global, t_fcdata *fcd,
2355                 t_state *state_global,
2356                 t_mdatoms *mdatoms,
2357                 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2358                 gmx_edsam_t gmx_unused  ed,
2359                 t_forcerec *fr,
2360                 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2361                 gmx_membed_t gmx_unused membed,
2362                 real gmx_unused cpt_period, real gmx_unused max_hours,
2363                 const char  gmx_unused *deviceOptions,
2364                 unsigned long gmx_unused Flags,
2365                 gmx_runtime_t *runtime)
2366 {
2367     const char       *SD = "Steepest Descents";
2368     em_state_t       *s_min, *s_try;
2369     rvec             *f_global;
2370     gmx_localtop_t   *top;
2371     gmx_enerdata_t   *enerd;
2372     rvec             *f;
2373     gmx_global_stat_t gstat;
2374     t_graph          *graph;
2375     real              stepsize, constepsize;
2376     real              ustep, fnormn;
2377     gmx_mdoutf_t     *outf;
2378     t_mdebin         *mdebin;
2379     gmx_bool          bDone, bAbort, do_x, do_f;
2380     tensor            vir, pres;
2381     rvec              mu_tot;
2382     int               nsteps;
2383     int               count          = 0;
2384     int               steps_accepted = 0;
2385     /* not used */
2386     real              terminate = 0;
2387
2388     s_min = init_em_state();
2389     s_try = init_em_state();
2390
2391     /* Init em and store the local state in s_try */
2392     init_em(fplog, SD, cr, inputrec,
2393             state_global, top_global, s_try, &top, &f, &f_global,
2394             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2395             nfile, fnm, &outf, &mdebin);
2396
2397     /* Print to log file  */
2398     print_em_start(fplog, cr, runtime, wcycle, SD);
2399
2400     /* Set variables for stepsize (in nm). This is the largest
2401      * step that we are going to make in any direction.
2402      */
2403     ustep    = inputrec->em_stepsize;
2404     stepsize = 0;
2405
2406     /* Max number of steps  */
2407     nsteps = inputrec->nsteps;
2408
2409     if (MASTER(cr))
2410     {
2411         /* Print to the screen  */
2412         sp_header(stderr, SD, inputrec->em_tol, nsteps);
2413     }
2414     if (fplog)
2415     {
2416         sp_header(fplog, SD, inputrec->em_tol, nsteps);
2417     }
2418
2419     /**** HERE STARTS THE LOOP ****
2420      * count is the counter for the number of steps
2421      * bDone will be TRUE when the minimization has converged
2422      * bAbort will be TRUE when nsteps steps have been performed or when
2423      * the stepsize becomes smaller than is reasonable for machine precision
2424      */
2425     count  = 0;
2426     bDone  = FALSE;
2427     bAbort = FALSE;
2428     while (!bDone && !bAbort)
2429     {
2430         bAbort = (nsteps >= 0) && (count == nsteps);
2431
2432         /* set new coordinates, except for first step */
2433         if (count > 0)
2434         {
2435             do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
2436                        s_min, stepsize, s_min->f, s_try,
2437                        constr, top, nrnb, wcycle, count);
2438         }
2439
2440         evaluate_energy(fplog, cr,
2441                         top_global, s_try, top,
2442                         inputrec, nrnb, wcycle, gstat,
2443                         vsite, constr, fcd, graph, mdatoms, fr,
2444                         mu_tot, enerd, vir, pres, count, count == 0);
2445
2446         if (MASTER(cr))
2447         {
2448             print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
2449         }
2450
2451         if (count == 0)
2452         {
2453             s_min->epot = s_try->epot + 1;
2454         }
2455
2456         /* Print it if necessary  */
2457         if (MASTER(cr))
2458         {
2459             if (bVerbose)
2460             {
2461                 fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2462                         count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
2463                         (s_try->epot < s_min->epot) ? '\n' : '\r');
2464             }
2465
2466             if (s_try->epot < s_min->epot)
2467             {
2468                 /* Store the new (lower) energies  */
2469                 upd_mdebin(mdebin, FALSE, FALSE, (double)count,
2470                            mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
2471                            s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
2472                 print_ebin(outf->fp_ene, TRUE,
2473                            do_per_step(steps_accepted, inputrec->nstdisreout),
2474                            do_per_step(steps_accepted, inputrec->nstorireout),
2475                            fplog, count, count, eprNORMAL, TRUE,
2476                            mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2477                 fflush(fplog);
2478             }
2479         }
2480
2481         /* Now if the new energy is smaller than the previous...
2482          * or if this is the first step!
2483          * or if we did random steps!
2484          */
2485
2486         if ( (count == 0) || (s_try->epot < s_min->epot) )
2487         {
2488             steps_accepted++;
2489
2490             /* Test whether the convergence criterion is met...  */
2491             bDone = (s_try->fmax < inputrec->em_tol);
2492
2493             /* Copy the arrays for force, positions and energy  */
2494             /* The 'Min' array always holds the coords and forces of the minimal
2495                sampled energy  */
2496             swap_em_state(s_min, s_try);
2497             if (count > 0)
2498             {
2499                 ustep *= 1.2;
2500             }
2501
2502             /* Write to trn, if necessary */
2503             do_x = do_per_step(steps_accepted, inputrec->nstxout);
2504             do_f = do_per_step(steps_accepted, inputrec->nstfout);
2505             write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
2506                           top_global, inputrec, count,
2507                           s_min, state_global, f_global);
2508         }
2509         else
2510         {
2511             /* If energy is not smaller make the step smaller...  */
2512             ustep *= 0.5;
2513
2514             if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2515             {
2516                 /* Reload the old state */
2517                 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
2518                                        s_min, top, mdatoms, fr, vsite, constr,
2519                                        nrnb, wcycle);
2520             }
2521         }
2522
2523         /* Determine new step  */
2524         stepsize = ustep/s_min->fmax;
2525
2526         /* Check if stepsize is too small, with 1 nm as a characteristic length */
2527 #ifdef GMX_DOUBLE
2528         if (count == nsteps || ustep < 1e-12)
2529 #else
2530         if (count == nsteps || ustep < 1e-6)
2531 #endif
2532         {
2533             if (MASTER(cr))
2534             {
2535                 warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
2536                 warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
2537             }
2538             bAbort = TRUE;
2539         }
2540
2541         count++;
2542     } /* End of the loop  */
2543
2544     /* Print some shit...  */
2545     if (MASTER(cr))
2546     {
2547         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2548     }
2549     write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
2550                   top_global, inputrec, count,
2551                   s_min, state_global, f_global);
2552
2553     fnormn = s_min->fnorm/sqrt(state_global->natoms);
2554
2555     if (MASTER(cr))
2556     {
2557         print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
2558                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2559         print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
2560                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2561     }
2562
2563     finish_em(cr, outf, runtime, wcycle);
2564
2565     /* To print the actual number of steps we needed somewhere */
2566     inputrec->nsteps = count;
2567
2568     runtime->nsteps_done = count;
2569
2570     return 0;
2571 } /* That's all folks */
2572
2573
2574 double do_nm(FILE *fplog, t_commrec *cr,
2575              int nfile, const t_filenm fnm[],
2576              const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
2577              int gmx_unused nstglobalcomm,
2578              gmx_vsite_t *vsite, gmx_constr_t constr,
2579              int gmx_unused stepout,
2580              t_inputrec *inputrec,
2581              gmx_mtop_t *top_global, t_fcdata *fcd,
2582              t_state *state_global,
2583              t_mdatoms *mdatoms,
2584              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2585              gmx_edsam_t  gmx_unused ed,
2586              t_forcerec *fr,
2587              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2588              gmx_membed_t gmx_unused membed,
2589              real gmx_unused cpt_period, real gmx_unused max_hours,
2590              const char gmx_unused *deviceOptions,
2591              unsigned long gmx_unused Flags,
2592              gmx_runtime_t *runtime)
2593 {
2594     const char          *NM = "Normal Mode Analysis";
2595     gmx_mdoutf_t        *outf;
2596     int                  natoms, atom, d;
2597     int                  nnodes, node;
2598     rvec                *f_global;
2599     gmx_localtop_t      *top;
2600     gmx_enerdata_t      *enerd;
2601     rvec                *f;
2602     gmx_global_stat_t    gstat;
2603     t_graph             *graph;
2604     real                 t, t0, lambda, lam0;
2605     gmx_bool             bNS;
2606     tensor               vir, pres;
2607     rvec                 mu_tot;
2608     rvec                *fneg, *dfdx;
2609     gmx_bool             bSparse; /* use sparse matrix storage format */
2610     size_t               sz;
2611     gmx_sparsematrix_t * sparse_matrix           = NULL;
2612     real           *     full_matrix             = NULL;
2613     em_state_t       *   state_work;
2614
2615     /* added with respect to mdrun */
2616     int        i, j, k, row, col;
2617     real       der_range = 10.0*sqrt(GMX_REAL_EPS);
2618     real       x_min;
2619     real       fnorm, fmax;
2620
2621     if (constr != NULL)
2622     {
2623         gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
2624     }
2625
2626     state_work = init_em_state();
2627
2628     /* Init em and store the local state in state_minimum */
2629     init_em(fplog, NM, cr, inputrec,
2630             state_global, top_global, state_work, &top,
2631             &f, &f_global,
2632             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2633             nfile, fnm, &outf, NULL);
2634
2635     natoms = top_global->natoms;
2636     snew(fneg, natoms);
2637     snew(dfdx, natoms);
2638
2639 #ifndef GMX_DOUBLE
2640     if (MASTER(cr))
2641     {
2642         fprintf(stderr,
2643                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2644                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2645                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2646                 "      are fairly modest even if you recompile in double precision.\n\n");
2647     }
2648 #endif
2649
2650     /* Check if we can/should use sparse storage format.
2651      *
2652      * Sparse format is only useful when the Hessian itself is sparse, which it
2653      * will be when we use a cutoff.
2654      * For small systems (n<1000) it is easier to always use full matrix format, though.
2655      */
2656     if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
2657     {
2658         fprintf(stderr, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
2659         bSparse = FALSE;
2660     }
2661     else if (top_global->natoms < 1000)
2662     {
2663         fprintf(stderr, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
2664         bSparse = FALSE;
2665     }
2666     else
2667     {
2668         fprintf(stderr, "Using compressed symmetric sparse Hessian format.\n");
2669         bSparse = TRUE;
2670     }
2671
2672     sz = DIM*top_global->natoms;
2673
2674     fprintf(stderr, "Allocating Hessian memory...\n\n");
2675
2676     if (bSparse)
2677     {
2678         sparse_matrix = gmx_sparsematrix_init(sz);
2679         sparse_matrix->compressed_symmetric = TRUE;
2680     }
2681     else
2682     {
2683         snew(full_matrix, sz*sz);
2684     }
2685
2686     /* Initial values */
2687     t0           = inputrec->init_t;
2688     lam0         = inputrec->fepvals->init_lambda;
2689     t            = t0;
2690     lambda       = lam0;
2691
2692     init_nrnb(nrnb);
2693
2694     where();
2695
2696     /* Write start time and temperature */
2697     print_em_start(fplog, cr, runtime, wcycle, NM);
2698
2699     /* fudge nr of steps to nr of atoms */
2700     inputrec->nsteps = natoms*2;
2701
2702     if (MASTER(cr))
2703     {
2704         fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
2705                 *(top_global->name), (int)inputrec->nsteps);
2706     }
2707
2708     nnodes = cr->nnodes;
2709
2710     /* Make evaluate_energy do a single node force calculation */
2711     cr->nnodes = 1;
2712     evaluate_energy(fplog, cr,
2713                     top_global, state_work, top,
2714                     inputrec, nrnb, wcycle, gstat,
2715                     vsite, constr, fcd, graph, mdatoms, fr,
2716                     mu_tot, enerd, vir, pres, -1, TRUE);
2717     cr->nnodes = nnodes;
2718
2719     /* if forces are not small, warn user */
2720     get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
2721
2722     if (MASTER(cr))
2723     {
2724         fprintf(stderr, "Maximum force:%12.5e\n", state_work->fmax);
2725         if (state_work->fmax > 1.0e-3)
2726         {
2727             fprintf(stderr, "Maximum force probably not small enough to");
2728             fprintf(stderr, " ensure that you are in an \nenergy well. ");
2729             fprintf(stderr, "Be aware that negative eigenvalues may occur");
2730             fprintf(stderr, " when the\nresulting matrix is diagonalized.\n");
2731         }
2732     }
2733
2734     /***********************************************************
2735      *
2736      *      Loop over all pairs in matrix
2737      *
2738      *      do_force called twice. Once with positive and
2739      *      once with negative displacement
2740      *
2741      ************************************************************/
2742
2743     /* Steps are divided one by one over the nodes */
2744     for (atom = cr->nodeid; atom < natoms; atom += nnodes)
2745     {
2746
2747         for (d = 0; d < DIM; d++)
2748         {
2749             x_min = state_work->s.x[atom][d];
2750
2751             state_work->s.x[atom][d] = x_min - der_range;
2752
2753             /* Make evaluate_energy do a single node force calculation */
2754             cr->nnodes = 1;
2755             evaluate_energy(fplog, cr,
2756                             top_global, state_work, top,
2757                             inputrec, nrnb, wcycle, gstat,
2758                             vsite, constr, fcd, graph, mdatoms, fr,
2759                             mu_tot, enerd, vir, pres, atom*2, FALSE);
2760
2761             for (i = 0; i < natoms; i++)
2762             {
2763                 copy_rvec(state_work->f[i], fneg[i]);
2764             }
2765
2766             state_work->s.x[atom][d] = x_min + der_range;
2767
2768             evaluate_energy(fplog, cr,
2769                             top_global, state_work, top,
2770                             inputrec, nrnb, wcycle, gstat,
2771                             vsite, constr, fcd, graph, mdatoms, fr,
2772                             mu_tot, enerd, vir, pres, atom*2+1, FALSE);
2773             cr->nnodes = nnodes;
2774
2775             /* x is restored to original */
2776             state_work->s.x[atom][d] = x_min;
2777
2778             for (j = 0; j < natoms; j++)
2779             {
2780                 for (k = 0; (k < DIM); k++)
2781                 {
2782                     dfdx[j][k] =
2783                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2784                 }
2785             }
2786
2787             if (!MASTER(cr))
2788             {
2789 #ifdef GMX_MPI
2790 #ifdef GMX_DOUBLE
2791 #define mpi_type MPI_DOUBLE
2792 #else
2793 #define mpi_type MPI_FLOAT
2794 #endif
2795                 MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
2796                          cr->mpi_comm_mygroup);
2797 #endif
2798             }
2799             else
2800             {
2801                 for (node = 0; (node < nnodes && atom+node < natoms); node++)
2802                 {
2803                     if (node > 0)
2804                     {
2805 #ifdef GMX_MPI
2806                         MPI_Status stat;
2807                         MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
2808                                  cr->mpi_comm_mygroup, &stat);
2809 #undef mpi_type
2810 #endif
2811                     }
2812
2813                     row = (atom + node)*DIM + d;
2814
2815                     for (j = 0; j < natoms; j++)
2816                     {
2817                         for (k = 0; k < DIM; k++)
2818                         {
2819                             col = j*DIM + k;
2820
2821                             if (bSparse)
2822                             {
2823                                 if (col >= row && dfdx[j][k] != 0.0)
2824                                 {
2825                                     gmx_sparsematrix_increment_value(sparse_matrix,
2826                                                                      row, col, dfdx[j][k]);
2827                                 }
2828                             }
2829                             else
2830                             {
2831                                 full_matrix[row*sz+col] = dfdx[j][k];
2832                             }
2833                         }
2834                     }
2835                 }
2836             }
2837
2838             if (bVerbose && fplog)
2839             {
2840                 fflush(fplog);
2841             }
2842         }
2843         /* write progress */
2844         if (MASTER(cr) && bVerbose)
2845         {
2846             fprintf(stderr, "\rFinished step %d out of %d",
2847                     min(atom+nnodes, natoms), natoms);
2848             fflush(stderr);
2849         }
2850     }
2851
2852     if (MASTER(cr))
2853     {
2854         fprintf(stderr, "\n\nWriting Hessian...\n");
2855         gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
2856     }
2857
2858     finish_em(cr, outf, runtime, wcycle);
2859
2860     runtime->nsteps_done = natoms*2;
2861
2862     return 0;
2863 }