src/gromacs/mdlib/minimize.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines integrators for energy minimization
  40  *
  41  * \author Berk Hess <hess@kth.se>
  42  * \author Erik Lindahl <erik@kth.se>
  43  * \ingroup module_mdlib
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "minimize.h"
  48
  49 #include "config.h"
  50
  51 #include <cmath>
  52 #include <cstring>
  53 #include <ctime>
  54
  55 #include <algorithm>
  56 #include <vector>
  57
  58 #include "gromacs/commandline/filenm.h"
  59 #include "gromacs/domdec/domdec.h"
  60 #include "gromacs/domdec/domdec_struct.h"
  61 #include "gromacs/ewald/pme.h"
  62 #include "gromacs/fileio/confio.h"
  63 #include "gromacs/fileio/mtxio.h"
  64 #include "gromacs/gmxlib/network.h"
  65 #include "gromacs/gmxlib/nrnb.h"
  66 #include "gromacs/imd/imd.h"
  67 #include "gromacs/linearalgebra/sparsematrix.h"
  68 #include "gromacs/listed-forces/manage-threading.h"
  69 #include "gromacs/math/functions.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/mdlib/constr.h"
  72 #include "gromacs/mdlib/force.h"
  73 #include "gromacs/mdlib/forcerec.h"
  74 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  75 #include "gromacs/mdlib/md_support.h"
  76 #include "gromacs/mdlib/mdatoms.h"
  77 #include "gromacs/mdlib/mdebin.h"
  78 #include "gromacs/mdlib/mdrun.h"
  79 #include "gromacs/mdlib/mdsetup.h"
  80 #include "gromacs/mdlib/ns.h"
  81 #include "gromacs/mdlib/shellfc.h"
  82 #include "gromacs/mdlib/sim_util.h"
  83 #include "gromacs/mdlib/tgroup.h"
  84 #include "gromacs/mdlib/trajectory_writing.h"
  85 #include "gromacs/mdlib/update.h"
  86 #include "gromacs/mdlib/vsite.h"
  87 #include "gromacs/mdtypes/commrec.h"
  88 #include "gromacs/mdtypes/inputrec.h"
  89 #include "gromacs/mdtypes/md_enums.h"
  90 #include "gromacs/pbcutil/mshift.h"
  91 #include "gromacs/pbcutil/pbc.h"
  92 #include "gromacs/timing/wallcycle.h"
  93 #include "gromacs/timing/walltime_accounting.h"
  94 #include "gromacs/topology/mtop_util.h"
  95 #include "gromacs/topology/topology.h"
  96 #include "gromacs/utility/cstringutil.h"
  97 #include "gromacs/utility/exceptions.h"
  98 #include "gromacs/utility/fatalerror.h"
  99 #include "gromacs/utility/logger.h"
 100 #include "gromacs/utility/smalloc.h"
 101
 102 //! Utility structure for manipulating states during EM
 103 typedef struct {
 104     //! Copy of the global state
 105     t_state  s;
 106     //! Force array
 107     rvec    *f;
 108     //! Potential energy
 109     real     epot;
 110     //! Norm of the force
 111     real     fnorm;
 112     //! Maximum force
 113     real     fmax;
 114     //! Direction
 115     int      a_fmax;
 116 } em_state_t;
 117
 118 //! Initiate em_state_t structure and return pointer to it
 119 static em_state_t *init_em_state()
 120 {
 121     em_state_t *ems;
 122
 123     snew(ems, 1);
 124
 125     /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
 126     snew(ems->s.lambda, efptNR);
 127
 128     return ems;
 129 }
 130
 131 //! Print the EM starting conditions
 132 static void print_em_start(FILE                     *fplog,
 133                            t_commrec                *cr,
 134                            gmx_walltime_accounting_t walltime_accounting,
 135                            gmx_wallcycle_t           wcycle,
 136                            const char               *name)
 137 {
 138     walltime_accounting_start(walltime_accounting);
 139     wallcycle_start(wcycle, ewcRUN);
 140     print_start(fplog, cr, walltime_accounting, name);
 141 }
 142
 143 //! Stop counting time for EM
 144 static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
 145                         gmx_wallcycle_t           wcycle)
 146 {
 147     wallcycle_stop(wcycle, ewcRUN);
 148
 149     walltime_accounting_end(walltime_accounting);
 150 }
 151
 152 //! Printing a log file and console header
 153 static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
 154 {
 155     fprintf(out, "\n");
 156     fprintf(out, "%s:\n", minimizer);
 157     fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
 158     fprintf(out, "   Number of steps    = %12d\n", nsteps);
 159 }
 160
 161 //! Print warning message
 162 static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
 163 {
 164     char buffer[2048];
 165     if (bLastStep)
 166     {
 167         sprintf(buffer,
 168                 "\nEnergy minimization reached the maximum number "
 169                 "of steps before the forces reached the requested "
 170                 "precision Fmax < %g.\n", ftol);
 171     }
 172     else
 173     {
 174         sprintf(buffer,
 175                 "\nEnergy minimization has stopped, but the forces have "
 176                 "not converged to the requested precision Fmax < %g (which "
 177                 "may not be possible for your system). It stopped "
 178                 "because the algorithm tried to make a new step whose size "
 179                 "was too small, or there was no change in the energy since "
 180                 "last step. Either way, we regard the minimization as "
 181                 "converged to within the available machine precision, "
 182                 "given your starting configuration and EM parameters.\n%s%s",
 183                 ftol,
 184                 sizeof(real) < sizeof(double) ?
 185                 "\nDouble precision normally gives you higher accuracy, but "
 186                 "this is often not needed for preparing to run molecular "
 187                 "dynamics.\n" :
 188                 "",
 189                 bConstrain ?
 190                 "You might need to increase your constraint accuracy, or turn\n"
 191                 "off constraints altogether (set constraints = none in mdp file)\n" :
 192                 "");
 193     }
 194     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 195 }
 196
 197 //! Print message about convergence of the EM
 198 static void print_converged(FILE *fp, const char *alg, real ftol,
 199                             gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
 200                             real epot, real fmax, int nfmax, real fnorm)
 201 {
 202     char buf[STEPSTRSIZE];
 203
 204     if (bDone)
 205     {
 206         fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
 207                 alg, ftol, gmx_step_str(count, buf));
 208     }
 209     else if (count < nsteps)
 210     {
 211         fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
 212                 "but did not reach the requested Fmax < %g.\n",
 213                 alg, gmx_step_str(count, buf), ftol);
 214     }
 215     else
 216     {
 217         fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
 218                 alg, ftol, gmx_step_str(count, buf));
 219     }
 220
 221 #if GMX_DOUBLE
 222     fprintf(fp, "Potential Energy  = %21.14e\n", epot);
 223     fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
 224     fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
 225 #else
 226     fprintf(fp, "Potential Energy  = %14.7e\n", epot);
 227     fprintf(fp, "Maximum force     = %14.7e on atom %d\n", fmax, nfmax+1);
 228     fprintf(fp, "Norm of force     = %14.7e\n", fnorm);
 229 #endif
 230 }
 231
 232 //! Compute the norm and max of the force array in parallel
 233 static void get_f_norm_max(t_commrec *cr,
 234                            t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
 235                            real *fnorm, real *fmax, int *a_fmax)
 236 {
 237     double fnorm2, *sum;
 238     real   fmax2, fam;
 239     int    la_max, a_max, start, end, i, m, gf;
 240
 241     /* This routine finds the largest force and returns it.
 242      * On parallel machines the global max is taken.
 243      */
 244     fnorm2 = 0;
 245     fmax2  = 0;
 246     la_max = -1;
 247     start  = 0;
 248     end    = mdatoms->homenr;
 249     if (mdatoms->cFREEZE)
 250     {
 251         for (i = start; i < end; i++)
 252         {
 253             gf  = mdatoms->cFREEZE[i];
 254             fam = 0;
 255             for (m = 0; m < DIM; m++)
 256             {
 257                 if (!opts->nFreeze[gf][m])
 258                 {
 259                     fam += gmx::square(f[i][m]);
 260                 }
 261             }
 262             fnorm2 += fam;
 263             if (fam > fmax2)
 264             {
 265                 fmax2  = fam;
 266                 la_max = i;
 267             }
 268         }
 269     }
 270     else
 271     {
 272         for (i = start; i < end; i++)
 273         {
 274             fam     = norm2(f[i]);
 275             fnorm2 += fam;
 276             if (fam > fmax2)
 277             {
 278                 fmax2  = fam;
 279                 la_max = i;
 280             }
 281         }
 282     }
 283
 284     if (la_max >= 0 && DOMAINDECOMP(cr))
 285     {
 286         a_max = cr->dd->gatindex[la_max];
 287     }
 288     else
 289     {
 290         a_max = la_max;
 291     }
 292     if (PAR(cr))
 293     {
 294         snew(sum, 2*cr->nnodes+1);
 295         sum[2*cr->nodeid]   = fmax2;
 296         sum[2*cr->nodeid+1] = a_max;
 297         sum[2*cr->nnodes]   = fnorm2;
 298         gmx_sumd(2*cr->nnodes+1, sum, cr);
 299         fnorm2 = sum[2*cr->nnodes];
 300         /* Determine the global maximum */
 301         for (i = 0; i < cr->nnodes; i++)
 302         {
 303             if (sum[2*i] > fmax2)
 304             {
 305                 fmax2 = sum[2*i];
 306                 a_max = (int)(sum[2*i+1] + 0.5);
 307             }
 308         }
 309         sfree(sum);
 310     }
 311
 312     if (fnorm)
 313     {
 314         *fnorm = sqrt(fnorm2);
 315     }
 316     if (fmax)
 317     {
 318         *fmax  = sqrt(fmax2);
 319     }
 320     if (a_fmax)
 321     {
 322         *a_fmax = a_max;
 323     }
 324 }
 325
 326 //! Compute the norm of the force
 327 static void get_state_f_norm_max(t_commrec *cr,
 328                                  t_grpopts *opts, t_mdatoms *mdatoms,
 329                                  em_state_t *ems)
 330 {
 331     get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
 332 }
 333
 334 //! Initialize the energy minimization
 335 void init_em(FILE *fplog, const char *title,
 336              t_commrec *cr, t_inputrec *ir,
 337              t_state *state_global, gmx_mtop_t *top_global,
 338              em_state_t *ems, gmx_localtop_t **top,
 339              rvec **f,
 340              t_nrnb *nrnb, rvec mu_tot,
 341              t_forcerec *fr, gmx_enerdata_t **enerd,
 342              t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
 343              gmx_vsite_t *vsite, gmx_constr_t constr, gmx_shellfc_t **shellfc,
 344              int nfile, const t_filenm fnm[],
 345              gmx_mdoutf_t *outf, t_mdebin **mdebin,
 346              int imdport, unsigned long gmx_unused Flags,
 347              gmx_wallcycle_t wcycle)
 348 {
 349     int  i;
 350     real dvdl_constr;
 351
 352     if (fplog)
 353     {
 354         fprintf(fplog, "Initiating %s\n", title);
 355     }
 356
 357     state_global->ngtc = 0;
 358
 359     /* Initialize lambda variables */
 360     initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
 361
 362     init_nrnb(nrnb);
 363
 364     /* Interactive molecular dynamics */
 365     init_IMD(ir, cr, top_global, fplog, 1, state_global->x,
 366              nfile, fnm, NULL, imdport, Flags);
 367
 368     if (ir->eI == eiNM)
 369     {
 370         GMX_ASSERT(shellfc != NULL, "With NM we always support shells");
 371
 372         *shellfc = init_shell_flexcon(stdout,
 373                                       top_global,
 374                                       n_flexible_constraints(constr),
 375                                       ir->nstcalcenergy,
 376                                       DOMAINDECOMP(cr));
 377     }
 378     else
 379     {
 380         GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexible-constraint support");
 381
 382         /* With energy minimization, shells and flexible constraints are
 383          * automatically minimized when treated like normal DOFS.
 384          */
 385         if (shellfc != NULL)
 386         {
 387             *shellfc = NULL;
 388         }
 389     }
 390
 391     if (DOMAINDECOMP(cr))
 392     {
 393         *top = dd_init_local_top(top_global);
 394
 395         dd_init_local_state(cr->dd, state_global, &ems->s);
 396
 397         *f = NULL;
 398
 399         /* Distribute the charge groups over the nodes from the master node */
 400         dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
 401                             state_global, top_global, ir,
 402                             &ems->s, &ems->f, mdatoms, *top,
 403                             fr, vsite, constr,
 404                             nrnb, NULL, FALSE);
 405         dd_store_state(cr->dd, &ems->s);
 406
 407         *graph = NULL;
 408     }
 409     else
 410     {
 411         snew(*f, top_global->natoms);
 412
 413         /* Just copy the state */
 414         ems->s = *state_global;
 415         /* We need to allocate one element extra, since we might use
 416          * (unaligned) 4-wide SIMD loads to access rvec entries.
 417          */
 418         snew(ems->s.x, ems->s.nalloc + 1);
 419         snew(ems->f, ems->s.nalloc+1);
 420         snew(ems->s.v, ems->s.nalloc+1);
 421         for (i = 0; i < state_global->natoms; i++)
 422         {
 423             copy_rvec(state_global->x[i], ems->s.x[i]);
 424         }
 425         copy_mat(state_global->box, ems->s.box);
 426
 427
 428         snew(*top, 1);
 429         mdAlgorithmsSetupAtomData(cr, ir, top_global, *top, fr,
 430                                   graph, mdatoms,
 431                                   vsite, shellfc ? *shellfc : NULL);
 432
 433         update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
 434
 435         if (vsite)
 436         {
 437             set_vsite_top(vsite, *top, mdatoms, cr);
 438         }
 439     }
 440
 441     if (constr)
 442     {
 443         if (ir->eConstrAlg == econtSHAKE &&
 444             gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
 445         {
 446             gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
 447                       econstr_names[econtSHAKE], econstr_names[econtLINCS]);
 448         }
 449
 450         if (!DOMAINDECOMP(cr))
 451         {
 452             set_constraints(constr, *top, ir, mdatoms, cr);
 453         }
 454
 455         if (!ir->bContinuation)
 456         {
 457             /* Constrain the starting coordinates */
 458             dvdl_constr = 0;
 459             constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
 460                       ir, cr, -1, 0, 1.0, mdatoms,
 461                       ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
 462                       ems->s.lambda[efptFEP], &dvdl_constr,
 463                       NULL, NULL, nrnb, econqCoord);
 464         }
 465     }
 466
 467     if (PAR(cr))
 468     {
 469         *gstat = global_stat_init(ir);
 470     }
 471     else
 472     {
 473         *gstat = NULL;
 474     }
 475
 476     *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle);
 477
 478     snew(*enerd, 1);
 479     init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
 480                   *enerd);
 481
 482     if (mdebin != NULL)
 483     {
 484         /* Init bin for energy stuff */
 485         *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL);
 486     }
 487
 488     clear_rvec(mu_tot);
 489     calc_shifts(ems->s.box, fr->shift_vec);
 490 }
 491
 492 //! Finalize the minimization
 493 static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
 494                       gmx_walltime_accounting_t walltime_accounting,
 495                       gmx_wallcycle_t wcycle)
 496 {
 497     if (!(cr->duty & DUTY_PME))
 498     {
 499         /* Tell the PME only node to finish */
 500         gmx_pme_send_finish(cr);
 501     }
 502
 503     done_mdoutf(outf);
 504
 505     em_time_end(walltime_accounting, wcycle);
 506 }
 507
 508 //! Swap two different EM states during minimization
 509 static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
 510 {
 511     em_state_t tmp;
 512
 513     tmp   = *ems1;
 514     *ems1 = *ems2;
 515     *ems2 = tmp;
 516 }
 517
 518 //! Copy coordinate from an EM state to a "normal" state structure
 519 static void copy_em_coords(em_state_t *ems, t_state *state)
 520 {
 521     int i;
 522
 523     for (i = 0; (i < state->natoms); i++)
 524     {
 525         copy_rvec(ems->s.x[i], state->x[i]);
 526     }
 527 }
 528
 529 //! Save the EM trajectory
 530 static void write_em_traj(FILE *fplog, t_commrec *cr,
 531                           gmx_mdoutf_t outf,
 532                           gmx_bool bX, gmx_bool bF, const char *confout,
 533                           gmx_mtop_t *top_global,
 534                           t_inputrec *ir, gmx_int64_t step,
 535                           em_state_t *state,
 536                           t_state *state_global)
 537 {
 538     int      mdof_flags;
 539     gmx_bool bIMDout = FALSE;
 540
 541
 542     /* Shall we do IMD output? */
 543     if (ir->bIMD)
 544     {
 545         bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup));
 546     }
 547
 548     if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
 549     {
 550         copy_em_coords(state, state_global);
 551     }
 552
 553     mdof_flags = 0;
 554     if (bX)
 555     {
 556         mdof_flags |= MDOF_X;
 557     }
 558     if (bF)
 559     {
 560         mdof_flags |= MDOF_F;
 561     }
 562
 563     /* If we want IMD output, set appropriate MDOF flag */
 564     if (ir->bIMD)
 565     {
 566         mdof_flags |= MDOF_IMD;
 567     }
 568
 569     mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
 570                                      top_global, step, (double)step,
 571                                      &state->s, state_global, state->f);
 572
 573     if (confout != NULL && MASTER(cr))
 574     {
 575         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 576         {
 577             /* Make molecules whole only for confout writing */
 578             do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
 579                         state_global->x);
 580         }
 581
 582         write_sto_conf_mtop(confout,
 583                             *top_global->name, top_global,
 584                             state_global->x, NULL, ir->ePBC, state_global->box);
 585     }
 586 }
 587
 588 //! \brief Do one minimization step
 589 //
 590 // \returns true when the step succeeded, false when a constraint error occurred
 591 static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
 592                        gmx_bool bMolPBC,
 593                        em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
 594                        gmx_constr_t constr, gmx_localtop_t *top,
 595                        t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 596                        gmx_int64_t count)
 597
 598 {
 599     t_state *s1, *s2;
 600     int      start, end;
 601     real     dvdl_constr;
 602     int      nthreads gmx_unused;
 603
 604     bool     validStep = true;
 605
 606     s1 = &ems1->s;
 607     s2 = &ems2->s;
 608
 609     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 610     {
 611         gmx_incons("state mismatch in do_em_step");
 612     }
 613
 614     s2->flags = s1->flags;
 615
 616     if (s2->nalloc != s1->nalloc)
 617     {
 618         s2->nalloc = s1->nalloc;
 619         /* We need to allocate one element extra, since we might use
 620          * (unaligned) 4-wide SIMD loads to access rvec entries.
 621          */
 622         srenew(s2->x, s1->nalloc + 1);
 623         srenew(ems2->f,  s1->nalloc);
 624         if (s2->flags & (1<<estCGP))
 625         {
 626             srenew(s2->cg_p,  s1->nalloc + 1);
 627         }
 628     }
 629
 630     s2->natoms = s1->natoms;
 631     copy_mat(s1->box, s2->box);
 632     /* Copy free energy state */
 633     for (int i = 0; i < efptNR; i++)
 634     {
 635         s2->lambda[i] = s1->lambda[i];
 636     }
 637     copy_mat(s1->box, s2->box);
 638
 639     start = 0;
 640     end   = md->homenr;
 641
 642     // cppcheck-suppress unreadVariable
 643     nthreads = gmx_omp_nthreads_get(emntUpdate);
 644 #pragma omp parallel num_threads(nthreads)
 645     {
 646         rvec *x1 = s1->x;
 647         rvec *x2 = s2->x;
 648
 649         int   gf = 0;
 650 #pragma omp for schedule(static) nowait
 651         for (int i = start; i < end; i++)
 652         {
 653             try
 654             {
 655                 if (md->cFREEZE)
 656                 {
 657                     gf = md->cFREEZE[i];
 658                 }
 659                 for (int m = 0; m < DIM; m++)
 660                 {
 661                     if (ir->opts.nFreeze[gf][m])
 662                     {
 663                         x2[i][m] = x1[i][m];
 664                     }
 665                     else
 666                     {
 667                         x2[i][m] = x1[i][m] + a*f[i][m];
 668                     }
 669                 }
 670             }
 671             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 672         }
 673
 674         if (s2->flags & (1<<estCGP))
 675         {
 676             /* Copy the CG p vector */
 677             rvec *p1 = s1->cg_p;
 678             rvec *p2 = s2->cg_p;
 679 #pragma omp for schedule(static) nowait
 680             for (int i = start; i < end; i++)
 681             {
 682                 // Trivial OpenMP block that does not throw
 683                 copy_rvec(p1[i], p2[i]);
 684             }
 685         }
 686
 687         if (DOMAINDECOMP(cr))
 688         {
 689             s2->ddp_count = s1->ddp_count;
 690             if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
 691             {
 692 #pragma omp barrier
 693                 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 694                 try
 695                 {
 696                     /* We need to allocate one element extra, since we might use
 697                      * (unaligned) 4-wide SIMD loads to access rvec entries.
 698                      */
 699                     srenew(s2->cg_gl, s2->cg_gl_nalloc + 1);
 700                 }
 701                 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 702 #pragma omp barrier
 703             }
 704             s2->ncg_gl = s1->ncg_gl;
 705 #pragma omp for schedule(static) nowait
 706             for (int i = 0; i < s2->ncg_gl; i++)
 707             {
 708                 s2->cg_gl[i] = s1->cg_gl[i];
 709             }
 710             s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 711         }
 712     }
 713
 714     if (constr)
 715     {
 716         wallcycle_start(wcycle, ewcCONSTR);
 717         dvdl_constr = 0;
 718         validStep   =
 719             constrain(NULL, TRUE, TRUE, constr, &top->idef,
 720                       ir, cr, count, 0, 1.0, md,
 721                       s1->x, s2->x, NULL, bMolPBC, s2->box,
 722                       s2->lambda[efptBONDED], &dvdl_constr,
 723                       NULL, NULL, nrnb, econqCoord);
 724         wallcycle_stop(wcycle, ewcCONSTR);
 725
 726         // We should move this check to the different minimizers
 727         if (!validStep && ir->eI != eiSteep)
 728         {
 729             gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
 730                       EI(ir->eI), EI(eiSteep), EI(ir->eI));
 731         }
 732     }
 733
 734     return validStep;
 735 }
 736
 737 //! Prepare EM for using domain decomposition parallellization
 738 static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
 739                                    gmx_mtop_t *top_global, t_inputrec *ir,
 740                                    em_state_t *ems, gmx_localtop_t *top,
 741                                    t_mdatoms *mdatoms, t_forcerec *fr,
 742                                    gmx_vsite_t *vsite, gmx_constr_t constr,
 743                                    t_nrnb *nrnb, gmx_wallcycle_t wcycle)
 744 {
 745     /* Repartition the domain decomposition */
 746     dd_partition_system(fplog, step, cr, FALSE, 1,
 747                         NULL, top_global, ir,
 748                         &ems->s, &ems->f,
 749                         mdatoms, top, fr, vsite, constr,
 750                         nrnb, wcycle, FALSE);
 751     dd_store_state(cr->dd, &ems->s);
 752 }
 753
 754 //! De one energy evaluation
 755 static void evaluate_energy(FILE *fplog, t_commrec *cr,
 756                             gmx_mtop_t *top_global,
 757                             em_state_t *ems, gmx_localtop_t *top,
 758                             t_inputrec *inputrec,
 759                             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 760                             gmx_global_stat_t gstat,
 761                             gmx_vsite_t *vsite, gmx_constr_t constr,
 762                             t_fcdata *fcd,
 763                             t_graph *graph, t_mdatoms *mdatoms,
 764                             t_forcerec *fr, rvec mu_tot,
 765                             gmx_enerdata_t *enerd, tensor vir, tensor pres,
 766                             gmx_int64_t count, gmx_bool bFirst)
 767 {
 768     real     t;
 769     gmx_bool bNS;
 770     tensor   force_vir, shake_vir, ekin;
 771     real     dvdl_constr, prescorr, enercorr, dvdlcorr;
 772     real     terminate = 0;
 773
 774     /* Set the time to the initial time, the time does not change during EM */
 775     t = inputrec->init_t;
 776
 777     if (bFirst ||
 778         (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
 779     {
 780         /* This is the first state or an old state used before the last ns */
 781         bNS = TRUE;
 782     }
 783     else
 784     {
 785         bNS = FALSE;
 786         if (inputrec->nstlist > 0)
 787         {
 788             bNS = TRUE;
 789         }
 790     }
 791
 792     if (vsite)
 793     {
 794         construct_vsites(vsite, ems->s.x, 1, NULL,
 795                          top->idef.iparams, top->idef.il,
 796                          fr->ePBC, fr->bMolPBC, cr, ems->s.box);
 797     }
 798
 799     if (DOMAINDECOMP(cr) && bNS)
 800     {
 801         /* Repartition the domain decomposition */
 802         em_dd_partition_system(fplog, count, cr, top_global, inputrec,
 803                                ems, top, mdatoms, fr, vsite, constr,
 804                                nrnb, wcycle);
 805     }
 806
 807     /* Calc force & energy on new trial position  */
 808     /* do_force always puts the charge groups in the box and shifts again
 809      * We do not unshift, so molecules are always whole in congrad.c
 810      */
 811     do_force(fplog, cr, inputrec,
 812              count, nrnb, wcycle, top, &top_global->groups,
 813              ems->s.box, ems->s.x, &ems->s.hist,
 814              ems->f, force_vir, mdatoms, enerd, fcd,
 815              ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
 816              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
 817              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 818              (bNS ? GMX_FORCE_NS : 0));
 819
 820     /* Clear the unused shake virial and pressure */
 821     clear_mat(shake_vir);
 822     clear_mat(pres);
 823
 824     /* Communicate stuff when parallel */
 825     if (PAR(cr) && inputrec->eI != eiNM)
 826     {
 827         wallcycle_start(wcycle, ewcMoveE);
 828
 829         global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
 830                     inputrec, NULL, NULL, NULL, 1, &terminate,
 831                     NULL, FALSE,
 832                     CGLO_ENERGY |
 833                     CGLO_PRESSURE |
 834                     CGLO_CONSTRAINT);
 835
 836         wallcycle_stop(wcycle, ewcMoveE);
 837     }
 838
 839     /* Calculate long range corrections to pressure and energy */
 840     calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
 841                   pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
 842     enerd->term[F_DISPCORR] = enercorr;
 843     enerd->term[F_EPOT]    += enercorr;
 844     enerd->term[F_PRES]    += prescorr;
 845     enerd->term[F_DVDL]    += dvdlcorr;
 846
 847     ems->epot = enerd->term[F_EPOT];
 848
 849     if (constr)
 850     {
 851         /* Project out the constraint components of the force */
 852         wallcycle_start(wcycle, ewcCONSTR);
 853         dvdl_constr = 0;
 854         constrain(NULL, FALSE, FALSE, constr, &top->idef,
 855                   inputrec, cr, count, 0, 1.0, mdatoms,
 856                   ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
 857                   ems->s.lambda[efptBONDED], &dvdl_constr,
 858                   NULL, &shake_vir, nrnb, econqForceDispl);
 859         enerd->term[F_DVDL_CONSTR] += dvdl_constr;
 860         m_add(force_vir, shake_vir, vir);
 861         wallcycle_stop(wcycle, ewcCONSTR);
 862     }
 863     else
 864     {
 865         copy_mat(force_vir, vir);
 866     }
 867
 868     clear_mat(ekin);
 869     enerd->term[F_PRES] =
 870         calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
 871
 872     sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
 873
 874     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 875     {
 876         get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
 877     }
 878 }
 879
 880 //! Parallel utility summing energies and forces
 881 static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 882                               gmx_mtop_t *top_global,
 883                               em_state_t *s_min, em_state_t *s_b)
 884 {
 885     rvec          *fm, *fb, *fmg;
 886     t_block       *cgs_gl;
 887     int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
 888     double         partsum;
 889     unsigned char *grpnrFREEZE;
 890
 891     if (debug)
 892     {
 893         fprintf(debug, "Doing reorder_partsum\n");
 894     }
 895
 896     fm = s_min->f;
 897     fb = s_b->f;
 898
 899     cgs_gl = dd_charge_groups_global(cr->dd);
 900     index  = cgs_gl->index;
 901
 902     /* Collect fm in a global vector fmg.
 903      * This conflicts with the spirit of domain decomposition,
 904      * but to fully optimize this a much more complicated algorithm is required.
 905      */
 906     snew(fmg, top_global->natoms);
 907
 908     ncg   = s_min->s.ncg_gl;
 909     cg_gl = s_min->s.cg_gl;
 910     i     = 0;
 911     for (c = 0; c < ncg; c++)
 912     {
 913         cg = cg_gl[c];
 914         a0 = index[cg];
 915         a1 = index[cg+1];
 916         for (a = a0; a < a1; a++)
 917         {
 918             copy_rvec(fm[i], fmg[a]);
 919             i++;
 920         }
 921     }
 922     gmx_sum(top_global->natoms*3, fmg[0], cr);
 923
 924     /* Now we will determine the part of the sum for the cgs in state s_b */
 925     ncg         = s_b->s.ncg_gl;
 926     cg_gl       = s_b->s.cg_gl;
 927     partsum     = 0;
 928     i           = 0;
 929     gf          = 0;
 930     grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
 931     for (c = 0; c < ncg; c++)
 932     {
 933         cg = cg_gl[c];
 934         a0 = index[cg];
 935         a1 = index[cg+1];
 936         for (a = a0; a < a1; a++)
 937         {
 938             if (mdatoms->cFREEZE && grpnrFREEZE)
 939             {
 940                 gf = grpnrFREEZE[i];
 941             }
 942             for (m = 0; m < DIM; m++)
 943             {
 944                 if (!opts->nFreeze[gf][m])
 945                 {
 946                     partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 947                 }
 948             }
 949             i++;
 950         }
 951     }
 952
 953     sfree(fmg);
 954
 955     return partsum;
 956 }
 957
 958 //! Print some stuff, like beta, whatever that means.
 959 static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 960                     gmx_mtop_t *top_global,
 961                     em_state_t *s_min, em_state_t *s_b)
 962 {
 963     rvec  *fm, *fb;
 964     double sum;
 965     int    gf, i, m;
 966
 967     /* This is just the classical Polak-Ribiere calculation of beta;
 968      * it looks a bit complicated since we take freeze groups into account,
 969      * and might have to sum it in parallel runs.
 970      */
 971
 972     if (!DOMAINDECOMP(cr) ||
 973         (s_min->s.ddp_count == cr->dd->ddp_count &&
 974          s_b->s.ddp_count   == cr->dd->ddp_count))
 975     {
 976         fm  = s_min->f;
 977         fb  = s_b->f;
 978         sum = 0;
 979         gf  = 0;
 980         /* This part of code can be incorrect with DD,
 981          * since the atom ordering in s_b and s_min might differ.
 982          */
 983         for (i = 0; i < mdatoms->homenr; i++)
 984         {
 985             if (mdatoms->cFREEZE)
 986             {
 987                 gf = mdatoms->cFREEZE[i];
 988             }
 989             for (m = 0; m < DIM; m++)
 990             {
 991                 if (!opts->nFreeze[gf][m])
 992                 {
 993                     sum += (fb[i][m] - fm[i][m])*fb[i][m];
 994                 }
 995             }
 996         }
 997     }
 998     else
 999     {
1000         /* We need to reorder cgs while summing */
1001         sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
1002     }
1003     if (PAR(cr))
1004     {
1005         gmx_sumd(1, &sum, cr);
1006     }
1007
1008     return sum/gmx::square(s_min->fnorm);
1009 }
1010
1011 namespace gmx
1012 {
1013
1014 /*! \brief Do conjugate gradients minimization
1015     \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
1016                            int nfile, const t_filenm fnm[],
1017                            const gmx_output_env_t *oenv, gmx_bool bVerbose,
1018                            int nstglobalcomm,
1019                            gmx_vsite_t *vsite, gmx_constr_t constr,
1020                            int stepout,
1021                            t_inputrec *inputrec,
1022                            gmx_mtop_t *top_global, t_fcdata *fcd,
1023                            t_state *state_global,
1024                            t_mdatoms *mdatoms,
1025                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1026                            gmx_edsam_t ed,
1027                            t_forcerec *fr,
1028                            int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1029                            gmx_membed_t gmx_unused *membed,
1030                            real cpt_period, real max_hours,
1031                            int imdport,
1032                            unsigned long Flags,
1033                            gmx_walltime_accounting_t walltime_accounting)
1034  */
1035 double do_cg(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
1036              int nfile, const t_filenm fnm[],
1037              const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
1038              int gmx_unused nstglobalcomm,
1039              gmx_vsite_t *vsite, gmx_constr_t constr,
1040              int gmx_unused stepout,
1041              t_inputrec *inputrec,
1042              gmx_mtop_t *top_global, t_fcdata *fcd,
1043              t_state *state_global,
1044              t_mdatoms *mdatoms,
1045              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1046              gmx_edsam_t gmx_unused ed,
1047              t_forcerec *fr,
1048              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
1049              gmx_membed_t gmx_unused *membed,
1050              real gmx_unused cpt_period, real gmx_unused max_hours,
1051              int imdport,
1052              unsigned long gmx_unused Flags,
1053              gmx_walltime_accounting_t walltime_accounting)
1054 {
1055     const char       *CG = "Polak-Ribiere Conjugate Gradients";
1056
1057     em_state_t       *s_min, *s_a, *s_b, *s_c;
1058     gmx_localtop_t   *top;
1059     gmx_enerdata_t   *enerd;
1060     rvec             *f;
1061     gmx_global_stat_t gstat;
1062     t_graph          *graph;
1063     rvec             *p, *sf;
1064     double            gpa, gpb, gpc, tmp, minstep;
1065     real              fnormn;
1066     real              stepsize;
1067     real              a, b, c, beta = 0.0;
1068     real              epot_repl = 0;
1069     real              pnorm;
1070     t_mdebin         *mdebin;
1071     gmx_bool          converged, foundlower;
1072     rvec              mu_tot;
1073     gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
1074     tensor            vir, pres;
1075     int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
1076     gmx_mdoutf_t      outf;
1077     int               i, m, gf, step, nminstep;
1078
1079     step = 0;
1080
1081     s_min = init_em_state();
1082     s_a   = init_em_state();
1083     s_b   = init_em_state();
1084     s_c   = init_em_state();
1085
1086     /* Init em and store the local state in s_min */
1087     init_em(fplog, CG, cr, inputrec,
1088             state_global, top_global, s_min, &top, &f,
1089             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat,
1090             vsite, constr, NULL,
1091             nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
1092
1093     /* Print to log file */
1094     print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
1095
1096     /* Max number of steps */
1097     number_steps = inputrec->nsteps;
1098
1099     if (MASTER(cr))
1100     {
1101         sp_header(stderr, CG, inputrec->em_tol, number_steps);
1102     }
1103     if (fplog)
1104     {
1105         sp_header(fplog, CG, inputrec->em_tol, number_steps);
1106     }
1107
1108     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1109     /* do_force always puts the charge groups in the box and shifts again
1110      * We do not unshift, so molecules are always whole in congrad.c
1111      */
1112     evaluate_energy(fplog, cr,
1113                     top_global, s_min, top,
1114                     inputrec, nrnb, wcycle, gstat,
1115                     vsite, constr, fcd, graph, mdatoms, fr,
1116                     mu_tot, enerd, vir, pres, -1, TRUE);
1117     where();
1118
1119     if (MASTER(cr))
1120     {
1121         /* Copy stuff to the energy bin for easy printing etc. */
1122         upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1123                    mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1124                    NULL, NULL, vir, pres, NULL, mu_tot, constr);
1125
1126         print_ebin_header(fplog, step, step);
1127         print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1128                    mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1129     }
1130     where();
1131
1132     /* Estimate/guess the initial stepsize */
1133     stepsize = inputrec->em_stepsize/s_min->fnorm;
1134
1135     if (MASTER(cr))
1136     {
1137         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1138         fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
1139                 s_min->fmax, s_min->a_fmax+1);
1140         fprintf(stderr, "   F-Norm            = %12.5e\n",
1141                 s_min->fnorm/sqrtNumAtoms);
1142         fprintf(stderr, "\n");
1143         /* and copy to the log file too... */
1144         fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
1145                 s_min->fmax, s_min->a_fmax+1);
1146         fprintf(fplog, "   F-Norm            = %12.5e\n",
1147                 s_min->fnorm/sqrtNumAtoms);
1148         fprintf(fplog, "\n");
1149     }
1150     /* Start the loop over CG steps.
1151      * Each successful step is counted, and we continue until
1152      * we either converge or reach the max number of steps.
1153      */
1154     converged = FALSE;
1155     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1156     {
1157
1158         /* start taking steps in a new direction
1159          * First time we enter the routine, beta=0, and the direction is
1160          * simply the negative gradient.
1161          */
1162
1163         /* Calculate the new direction in p, and the gradient in this direction, gpa */
1164         p   = s_min->s.cg_p;
1165         sf  = s_min->f;
1166         gpa = 0;
1167         gf  = 0;
1168         for (i = 0; i < mdatoms->homenr; i++)
1169         {
1170             if (mdatoms->cFREEZE)
1171             {
1172                 gf = mdatoms->cFREEZE[i];
1173             }
1174             for (m = 0; m < DIM; m++)
1175             {
1176                 if (!inputrec->opts.nFreeze[gf][m])
1177                 {
1178                     p[i][m] = sf[i][m] + beta*p[i][m];
1179                     gpa    -= p[i][m]*sf[i][m];
1180                     /* f is negative gradient, thus the sign */
1181                 }
1182                 else
1183                 {
1184                     p[i][m] = 0;
1185                 }
1186             }
1187         }
1188
1189         /* Sum the gradient along the line across CPUs */
1190         if (PAR(cr))
1191         {
1192             gmx_sumd(1, &gpa, cr);
1193         }
1194
1195         /* Calculate the norm of the search vector */
1196         get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
1197
1198         /* Just in case stepsize reaches zero due to numerical precision... */
1199         if (stepsize <= 0)
1200         {
1201             stepsize = inputrec->em_stepsize/pnorm;
1202         }
1203
1204         /*
1205          * Double check the value of the derivative in the search direction.
1206          * If it is positive it must be due to the old information in the
1207          * CG formula, so just remove that and start over with beta=0.
1208          * This corresponds to a steepest descent step.
1209          */
1210         if (gpa > 0)
1211         {
1212             beta = 0;
1213             step--;   /* Don't count this step since we are restarting */
1214             continue; /* Go back to the beginning of the big for-loop */
1215         }
1216
1217         /* Calculate minimum allowed stepsize, before the average (norm)
1218          * relative change in coordinate is smaller than precision
1219          */
1220         minstep = 0;
1221         for (i = 0; i < mdatoms->homenr; i++)
1222         {
1223             for (m = 0; m < DIM; m++)
1224             {
1225                 tmp = fabs(s_min->s.x[i][m]);
1226                 if (tmp < 1.0)
1227                 {
1228                     tmp = 1.0;
1229                 }
1230                 tmp      = p[i][m]/tmp;
1231                 minstep += tmp*tmp;
1232             }
1233         }
1234         /* Add up from all CPUs */
1235         if (PAR(cr))
1236         {
1237             gmx_sumd(1, &minstep, cr);
1238         }
1239
1240         minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1241
1242         if (stepsize < minstep)
1243         {
1244             converged = TRUE;
1245             break;
1246         }
1247
1248         /* Write coordinates if necessary */
1249         do_x = do_per_step(step, inputrec->nstxout);
1250         do_f = do_per_step(step, inputrec->nstfout);
1251
1252         write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
1253                       top_global, inputrec, step,
1254                       s_min, state_global);
1255
1256         /* Take a step downhill.
1257          * In theory, we should minimize the function along this direction.
1258          * That is quite possible, but it turns out to take 5-10 function evaluations
1259          * for each line. However, we dont really need to find the exact minimum -
1260          * it is much better to start a new CG step in a modified direction as soon
1261          * as we are close to it. This will save a lot of energy evaluations.
1262          *
1263          * In practice, we just try to take a single step.
1264          * If it worked (i.e. lowered the energy), we increase the stepsize but
1265          * the continue straight to the next CG step without trying to find any minimum.
1266          * If it didn't work (higher energy), there must be a minimum somewhere between
1267          * the old position and the new one.
1268          *
1269          * Due to the finite numerical accuracy, it turns out that it is a good idea
1270          * to even accept a SMALL increase in energy, if the derivative is still downhill.
1271          * This leads to lower final energies in the tests I've done. / Erik
1272          */
1273         s_a->epot = s_min->epot;
1274         a         = 0.0;
1275         c         = a + stepsize; /* reference position along line is zero */
1276
1277         if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1278         {
1279             em_dd_partition_system(fplog, step, cr, top_global, inputrec,
1280                                    s_min, top, mdatoms, fr, vsite, constr,
1281                                    nrnb, wcycle);
1282         }
1283
1284         /* Take a trial step (new coords in s_c) */
1285         do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
1286                    constr, top, nrnb, wcycle, -1);
1287
1288         neval++;
1289         /* Calculate energy for the trial step */
1290         evaluate_energy(fplog, cr,
1291                         top_global, s_c, top,
1292                         inputrec, nrnb, wcycle, gstat,
1293                         vsite, constr, fcd, graph, mdatoms, fr,
1294                         mu_tot, enerd, vir, pres, -1, FALSE);
1295
1296         /* Calc derivative along line */
1297         p   = s_c->s.cg_p;
1298         sf  = s_c->f;
1299         gpc = 0;
1300         for (i = 0; i < mdatoms->homenr; i++)
1301         {
1302             for (m = 0; m < DIM; m++)
1303             {
1304                 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1305             }
1306         }
1307         /* Sum the gradient along the line across CPUs */
1308         if (PAR(cr))
1309         {
1310             gmx_sumd(1, &gpc, cr);
1311         }
1312
1313         /* This is the max amount of increase in energy we tolerate */
1314         tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1315
1316         /* Accept the step if the energy is lower, or if it is not significantly higher
1317          * and the line derivative is still negative.
1318          */
1319         if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1320         {
1321             foundlower = TRUE;
1322             /* Great, we found a better energy. Increase step for next iteration
1323              * if we are still going down, decrease it otherwise
1324              */
1325             if (gpc < 0)
1326             {
1327                 stepsize *= 1.618034; /* The golden section */
1328             }
1329             else
1330             {
1331                 stepsize *= 0.618034; /* 1/golden section */
1332             }
1333         }
1334         else
1335         {
1336             /* New energy is the same or higher. We will have to do some work
1337              * to find a smaller value in the interval. Take smaller step next time!
1338              */
1339             foundlower = FALSE;
1340             stepsize  *= 0.618034;
1341         }
1342
1343
1344
1345
1346         /* OK, if we didn't find a lower value we will have to locate one now - there must
1347          * be one in the interval [a=0,c].
1348          * The same thing is valid here, though: Don't spend dozens of iterations to find
1349          * the line minimum. We try to interpolate based on the derivative at the endpoints,
1350          * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1351          *
1352          * I also have a safeguard for potentially really pathological functions so we never
1353          * take more than 20 steps before we give up ...
1354          *
1355          * If we already found a lower value we just skip this step and continue to the update.
1356          */
1357         if (!foundlower)
1358         {
1359             nminstep = 0;
1360
1361             do
1362             {
1363                 /* Select a new trial point.
1364                  * If the derivatives at points a & c have different sign we interpolate to zero,
1365                  * otherwise just do a bisection.
1366                  */
1367                 if (gpa < 0 && gpc > 0)
1368                 {
1369                     b = a + gpa*(a-c)/(gpc-gpa);
1370                 }
1371                 else
1372                 {
1373                     b = 0.5*(a+c);
1374                 }
1375
1376                 /* safeguard if interpolation close to machine accuracy causes errors:
1377                  * never go outside the interval
1378                  */
1379                 if (b <= a || b >= c)
1380                 {
1381                     b = 0.5*(a+c);
1382                 }
1383
1384                 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1385                 {
1386                     /* Reload the old state */
1387                     em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
1388                                            s_min, top, mdatoms, fr, vsite, constr,
1389                                            nrnb, wcycle);
1390                 }
1391
1392                 /* Take a trial step to this new point - new coords in s_b */
1393                 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
1394                            constr, top, nrnb, wcycle, -1);
1395
1396                 neval++;
1397                 /* Calculate energy for the trial step */
1398                 evaluate_energy(fplog, cr,
1399                                 top_global, s_b, top,
1400                                 inputrec, nrnb, wcycle, gstat,
1401                                 vsite, constr, fcd, graph, mdatoms, fr,
1402                                 mu_tot, enerd, vir, pres, -1, FALSE);
1403
1404                 /* p does not change within a step, but since the domain decomposition
1405                  * might change, we have to use cg_p of s_b here.
1406                  */
1407                 p   = s_b->s.cg_p;
1408                 sf  = s_b->f;
1409                 gpb = 0;
1410                 for (i = 0; i < mdatoms->homenr; i++)
1411                 {
1412                     for (m = 0; m < DIM; m++)
1413                     {
1414                         gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1415                     }
1416                 }
1417                 /* Sum the gradient along the line across CPUs */
1418                 if (PAR(cr))
1419                 {
1420                     gmx_sumd(1, &gpb, cr);
1421                 }
1422
1423                 if (debug)
1424                 {
1425                     fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1426                             s_a->epot, s_b->epot, s_c->epot, gpb);
1427                 }
1428
1429                 epot_repl = s_b->epot;
1430
1431                 /* Keep one of the intervals based on the value of the derivative at the new point */
1432                 if (gpb > 0)
1433                 {
1434                     /* Replace c endpoint with b */
1435                     swap_em_state(s_b, s_c);
1436                     c   = b;
1437                     gpc = gpb;
1438                 }
1439                 else
1440                 {
1441                     /* Replace a endpoint with b */
1442                     swap_em_state(s_b, s_a);
1443                     a   = b;
1444                     gpa = gpb;
1445                 }
1446
1447                 /*
1448                  * Stop search as soon as we find a value smaller than the endpoints.
1449                  * Never run more than 20 steps, no matter what.
1450                  */
1451                 nminstep++;
1452             }
1453             while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1454                    (nminstep < 20));
1455
1456             if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1457                 nminstep >= 20)
1458             {
1459                 /* OK. We couldn't find a significantly lower energy.
1460                  * If beta==0 this was steepest descent, and then we give up.
1461                  * If not, set beta=0 and restart with steepest descent before quitting.
1462                  */
1463                 if (beta == 0.0)
1464                 {
1465                     /* Converged */
1466                     converged = TRUE;
1467                     break;
1468                 }
1469                 else
1470                 {
1471                     /* Reset memory before giving up */
1472                     beta = 0.0;
1473                     continue;
1474                 }
1475             }
1476
1477             /* Select min energy state of A & C, put the best in B.
1478              */
1479             if (s_c->epot < s_a->epot)
1480             {
1481                 if (debug)
1482                 {
1483                     fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
1484                             s_c->epot, s_a->epot);
1485                 }
1486                 swap_em_state(s_b, s_c);
1487                 gpb = gpc;
1488             }
1489             else
1490             {
1491                 if (debug)
1492                 {
1493                     fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
1494                             s_a->epot, s_c->epot);
1495                 }
1496                 swap_em_state(s_b, s_a);
1497                 gpb = gpa;
1498             }
1499
1500         }
1501         else
1502         {
1503             if (debug)
1504             {
1505                 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
1506                         s_c->epot);
1507             }
1508             swap_em_state(s_b, s_c);
1509             gpb = gpc;
1510         }
1511
1512         /* new search direction */
1513         /* beta = 0 means forget all memory and restart with steepest descents. */
1514         if (nstcg && ((step % nstcg) == 0))
1515         {
1516             beta = 0.0;
1517         }
1518         else
1519         {
1520             /* s_min->fnorm cannot be zero, because then we would have converged
1521              * and broken out.
1522              */
1523
1524             /* Polak-Ribiere update.
1525              * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1526              */
1527             beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1528         }
1529         /* Limit beta to prevent oscillations */
1530         if (fabs(beta) > 5.0)
1531         {
1532             beta = 0.0;
1533         }
1534
1535
1536         /* update positions */
1537         swap_em_state(s_min, s_b);
1538         gpa = gpb;
1539
1540         /* Print it if necessary */
1541         if (MASTER(cr))
1542         {
1543             if (bVerbose)
1544             {
1545                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1546                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1547                         step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
1548                         s_min->fmax, s_min->a_fmax+1);
1549                 fflush(stderr);
1550             }
1551             /* Store the new (lower) energies */
1552             upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1553                        mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1554                        NULL, NULL, vir, pres, NULL, mu_tot, constr);
1555
1556             do_log = do_per_step(step, inputrec->nstlog);
1557             do_ene = do_per_step(step, inputrec->nstenergy);
1558
1559             /* Prepare IMD energy record, if bIMD is TRUE. */
1560             IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE);
1561
1562             if (do_log)
1563             {
1564                 print_ebin_header(fplog, step, step);
1565             }
1566             print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
1567                        do_log ? fplog : NULL, step, step, eprNORMAL,
1568                        mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1569         }
1570
1571         /* Send energies and positions to the IMD client if bIMD is TRUE. */
1572         if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
1573         {
1574             IMD_send_positions(inputrec->imd);
1575         }
1576
1577         /* Stop when the maximum force lies below tolerance.
1578          * If we have reached machine precision, converged is already set to true.
1579          */
1580         converged = converged || (s_min->fmax < inputrec->em_tol);
1581
1582     }   /* End of the loop */
1583
1584     /* IMD cleanup, if bIMD is TRUE. */
1585     IMD_finalize(inputrec->bIMD, inputrec->imd);
1586
1587     if (converged)
1588     {
1589         step--; /* we never took that last step in this case */
1590
1591     }
1592     if (s_min->fmax > inputrec->em_tol)
1593     {
1594         if (MASTER(cr))
1595         {
1596             warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
1597             warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
1598         }
1599         converged = FALSE;
1600     }
1601
1602     if (MASTER(cr))
1603     {
1604         /* If we printed energy and/or logfile last step (which was the last step)
1605          * we don't have to do it again, but otherwise print the final values.
1606          */
1607         if (!do_log)
1608         {
1609             /* Write final value to log since we didn't do anything the last step */
1610             print_ebin_header(fplog, step, step);
1611         }
1612         if (!do_ene || !do_log)
1613         {
1614             /* Write final energy file entries */
1615             print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
1616                        !do_log ? fplog : NULL, step, step, eprNORMAL,
1617                        mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1618         }
1619     }
1620
1621     /* Print some stuff... */
1622     if (MASTER(cr))
1623     {
1624         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1625     }
1626
1627     /* IMPORTANT!
1628      * For accurate normal mode calculation it is imperative that we
1629      * store the last conformation into the full precision binary trajectory.
1630      *
1631      * However, we should only do it if we did NOT already write this step
1632      * above (which we did if do_x or do_f was true).
1633      */
1634     do_x = !do_per_step(step, inputrec->nstxout);
1635     do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1636
1637     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
1638                   top_global, inputrec, step,
1639                   s_min, state_global);
1640
1641
1642     if (MASTER(cr))
1643     {
1644         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1645         fnormn = s_min->fnorm/sqrtNumAtoms;
1646         print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
1647                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1648         print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
1649                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1650
1651         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1652     }
1653
1654     finish_em(cr, outf, walltime_accounting, wcycle);
1655
1656     /* To print the actual number of steps we needed somewhere */
1657     walltime_accounting_set_nsteps_done(walltime_accounting, step);
1658
1659     return 0;
1660 }   /* That's all folks */
1661
1662
1663 /*! \brief Do L-BFGS conjugate gradients minimization
1664     \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
1665                           int nfile, const t_filenm fnm[],
1666                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
1667                           int nstglobalcomm,
1668                           gmx_vsite_t *vsite, gmx_constr_t constr,
1669                           int stepout,
1670                           t_inputrec *inputrec,
1671                           gmx_mtop_t *top_global, t_fcdata *fcd,
1672                           t_state *state_global,
1673                           t_mdatoms *mdatoms,
1674                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1675                           gmx_edsam_t ed,
1676                           t_forcerec *fr,
1677                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1678                           real cpt_period, real max_hours,
1679                           int imdport,
1680                           unsigned long Flags,
1681                           gmx_walltime_accounting_t walltime_accounting)
1682  */
1683 double do_lbfgs(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
1684                 int nfile, const t_filenm fnm[],
1685                 const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
1686                 int gmx_unused nstglobalcomm,
1687                 gmx_vsite_t *vsite, gmx_constr_t constr,
1688                 int gmx_unused stepout,
1689                 t_inputrec *inputrec,
1690                 gmx_mtop_t *top_global, t_fcdata *fcd,
1691                 t_state *state_global,
1692                 t_mdatoms *mdatoms,
1693                 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1694                 gmx_edsam_t gmx_unused ed,
1695                 t_forcerec *fr,
1696                 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
1697                 gmx_membed_t gmx_unused *membed,
1698                 real gmx_unused cpt_period, real gmx_unused max_hours,
1699                 int imdport,
1700                 unsigned long gmx_unused Flags,
1701                 gmx_walltime_accounting_t walltime_accounting)
1702 {
1703     static const char *LBFGS = "Low-Memory BFGS Minimizer";
1704     em_state_t         ems;
1705     gmx_localtop_t    *top;
1706     gmx_enerdata_t    *enerd;
1707     rvec              *f;
1708     gmx_global_stat_t  gstat;
1709     t_graph           *graph;
1710     int                ncorr, nmaxcorr, point, cp, neval, nminstep;
1711     double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
1712     real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
1713     real              *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
1714     real               a, b, c, maxdelta, delta;
1715     real               diag, Epot0, Epot, EpotA, EpotB, EpotC;
1716     real               dgdx, dgdg, sq, yr, beta;
1717     t_mdebin          *mdebin;
1718     gmx_bool           converged;
1719     rvec               mu_tot;
1720     real               fnorm, fmax;
1721     gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
1722     tensor             vir, pres;
1723     int                start, end, number_steps;
1724     gmx_mdoutf_t       outf;
1725     int                i, k, m, n, nfmax, gf, step;
1726     int                mdof_flags;
1727
1728     if (PAR(cr))
1729     {
1730         gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
1731     }
1732
1733     if (NULL != constr)
1734     {
1735         gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1736     }
1737
1738     n        = 3*state_global->natoms;
1739     nmaxcorr = inputrec->nbfgscorr;
1740
1741     /* Allocate memory */
1742     /* Use pointers to real so we dont have to loop over both atoms and
1743      * dimensions all the time...
1744      * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1745      * that point to the same memory.
1746      */
1747     snew(xa, n);
1748     snew(xb, n);
1749     snew(xc, n);
1750     snew(fa, n);
1751     snew(fb, n);
1752     snew(fc, n);
1753     snew(frozen, n);
1754
1755     snew(p, n);
1756     snew(lastx, n);
1757     snew(lastf, n);
1758     snew(rho, nmaxcorr);
1759     snew(alpha, nmaxcorr);
1760
1761     snew(dx, nmaxcorr);
1762     for (i = 0; i < nmaxcorr; i++)
1763     {
1764         snew(dx[i], n);
1765     }
1766
1767     snew(dg, nmaxcorr);
1768     for (i = 0; i < nmaxcorr; i++)
1769     {
1770         snew(dg[i], n);
1771     }
1772
1773     step  = 0;
1774     neval = 0;
1775
1776     /* Init em */
1777     init_em(fplog, LBFGS, cr, inputrec,
1778             state_global, top_global, &ems, &top, &f,
1779             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat,
1780             vsite, constr, NULL,
1781             nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
1782     /* Do_lbfgs is not completely updated like do_steep and do_cg,
1783      * so we free some memory again.
1784      */
1785     sfree(ems.s.x);
1786     sfree(ems.f);
1787
1788     xx = (real *)state_global->x;
1789     ff = (real *)f;
1790
1791     start = 0;
1792     end   = mdatoms->homenr;
1793
1794     /* Print to log file */
1795     print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
1796
1797     do_log = do_ene = do_x = do_f = TRUE;
1798
1799     /* Max number of steps */
1800     number_steps = inputrec->nsteps;
1801
1802     /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1803     gf = 0;
1804     for (i = start; i < end; i++)
1805     {
1806         if (mdatoms->cFREEZE)
1807         {
1808             gf = mdatoms->cFREEZE[i];
1809         }
1810         for (m = 0; m < DIM; m++)
1811         {
1812             frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
1813         }
1814     }
1815     if (MASTER(cr))
1816     {
1817         sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1818     }
1819     if (fplog)
1820     {
1821         sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1822     }
1823
1824     if (vsite)
1825     {
1826         construct_vsites(vsite, state_global->x, 1, NULL,
1827                          top->idef.iparams, top->idef.il,
1828                          fr->ePBC, fr->bMolPBC, cr, state_global->box);
1829     }
1830
1831     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1832     /* do_force always puts the charge groups in the box and shifts again
1833      * We do not unshift, so molecules are always whole
1834      */
1835     neval++;
1836     ems.s.x = state_global->x;
1837     ems.f   = f;
1838     evaluate_energy(fplog, cr,
1839                     top_global, &ems, top,
1840                     inputrec, nrnb, wcycle, gstat,
1841                     vsite, constr, fcd, graph, mdatoms, fr,
1842                     mu_tot, enerd, vir, pres, -1, TRUE);
1843     where();
1844
1845     if (MASTER(cr))
1846     {
1847         /* Copy stuff to the energy bin for easy printing etc. */
1848         upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1849                    mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
1850                    NULL, NULL, vir, pres, NULL, mu_tot, constr);
1851
1852         print_ebin_header(fplog, step, step);
1853         print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1854                    mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1855     }
1856     where();
1857
1858     /* This is the starting energy */
1859     Epot = enerd->term[F_EPOT];
1860
1861     fnorm = ems.fnorm;
1862     fmax  = ems.fmax;
1863     nfmax = ems.a_fmax;
1864
1865     /* Set the initial step.
1866      * since it will be multiplied by the non-normalized search direction
1867      * vector (force vector the first time), we scale it by the
1868      * norm of the force.
1869      */
1870
1871     if (MASTER(cr))
1872     {
1873         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1874         fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1875         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
1876         fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
1877         fprintf(stderr, "\n");
1878         /* and copy to the log file too... */
1879         fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1880         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
1881         fprintf(fplog, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
1882         fprintf(fplog, "\n");
1883     }
1884
1885     // Point is an index to the memory of search directions, where 0 is the first one.
1886     point = 0;
1887
1888     // Set initial search direction to the force (-gradient), or 0 for frozen particles.
1889     for (i = 0; i < n; i++)
1890     {
1891         if (!frozen[i])
1892         {
1893             dx[point][i] = ff[i]; /* Initial search direction */
1894         }
1895         else
1896         {
1897             dx[point][i] = 0;
1898         }
1899     }
1900
1901     // Stepsize will be modified during the search, and actually it is not critical
1902     // (the main efficiency in the algorithm comes from changing directions), but
1903     // we still need an initial value, so estimate it as the inverse of the norm
1904     // so we take small steps where the potential fluctuates a lot.
1905     stepsize  = 1.0/fnorm;
1906
1907     /* Start the loop over BFGS steps.
1908      * Each successful step is counted, and we continue until
1909      * we either converge or reach the max number of steps.
1910      */
1911
1912     ncorr = 0;
1913
1914     /* Set the gradient from the force */
1915     converged = FALSE;
1916     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1917     {
1918
1919         /* Write coordinates if necessary */
1920         do_x = do_per_step(step, inputrec->nstxout);
1921         do_f = do_per_step(step, inputrec->nstfout);
1922
1923         mdof_flags = 0;
1924         if (do_x)
1925         {
1926             mdof_flags |= MDOF_X;
1927         }
1928
1929         if (do_f)
1930         {
1931             mdof_flags |= MDOF_F;
1932         }
1933
1934         if (inputrec->bIMD)
1935         {
1936             mdof_flags |= MDOF_IMD;
1937         }
1938
1939         mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
1940                                          top_global, step, (real)step, state_global, state_global, f);
1941
1942         /* Do the linesearching in the direction dx[point][0..(n-1)] */
1943
1944         /* make s a pointer to current search direction - point=0 first time we get here */
1945         s = dx[point];
1946
1947         // calculate line gradient in position A
1948         for (gpa = 0, i = 0; i < n; i++)
1949         {
1950             gpa -= s[i]*ff[i];
1951         }
1952
1953         /* Calculate minimum allowed stepsize along the line, before the average (norm)
1954          * relative change in coordinate is smaller than precision
1955          */
1956         for (minstep = 0, i = 0; i < n; i++)
1957         {
1958             tmp = fabs(xx[i]);
1959             if (tmp < 1.0)
1960             {
1961                 tmp = 1.0;
1962             }
1963             tmp      = s[i]/tmp;
1964             minstep += tmp*tmp;
1965         }
1966         minstep = GMX_REAL_EPS/sqrt(minstep/n);
1967
1968         if (stepsize < minstep)
1969         {
1970             converged = TRUE;
1971             break;
1972         }
1973
1974         // Before taking any steps along the line, store the old position
1975         for (i = 0; i < n; i++)
1976         {
1977             lastx[i] = xx[i];
1978             lastf[i] = ff[i];
1979         }
1980         Epot0 = Epot;
1981
1982         for (i = 0; i < n; i++)
1983         {
1984             xa[i] = xx[i];
1985         }
1986
1987         /* Take a step downhill.
1988          * In theory, we should find the actual minimum of the function in this
1989          * direction, somewhere along the line.
1990          * That is quite possible, but it turns out to take 5-10 function evaluations
1991          * for each line. However, we dont really need to find the exact minimum -
1992          * it is much better to start a new BFGS step in a modified direction as soon
1993          * as we are close to it. This will save a lot of energy evaluations.
1994          *
1995          * In practice, we just try to take a single step.
1996          * If it worked (i.e. lowered the energy), we increase the stepsize but
1997          * continue straight to the next BFGS step without trying to find any minimum,
1998          * i.e. we change the search direction too. If the line was smooth, it is
1999          * likely we are in a smooth region, and then it makes sense to take longer
2000          * steps in the modified search direction too.
2001          *
2002          * If it didn't work (higher energy), there must be a minimum somewhere between
2003          * the old position and the new one. Then we need to start by finding a lower
2004          * value before we change search direction. Since the energy was apparently
2005          * quite rough, we need to decrease the step size.
2006          *
2007          * Due to the finite numerical accuracy, it turns out that it is a good idea
2008          * to accept a SMALL increase in energy, if the derivative is still downhill.
2009          * This leads to lower final energies in the tests I've done. / Erik
2010          */
2011
2012         // State "A" is the first position along the line.
2013         // reference position along line is initially zero
2014         EpotA      = Epot0;
2015         a          = 0.0;
2016
2017         // Check stepsize first. We do not allow displacements
2018         // larger than emstep.
2019         //
2020         do
2021         {
2022             // Pick a new position C by adding stepsize to A.
2023             c        = a + stepsize;
2024
2025             // Calculate what the largest change in any individual coordinate
2026             // would be (translation along line * gradient along line)
2027             maxdelta = 0;
2028             for (i = 0; i < n; i++)
2029             {
2030                 delta = c*s[i];
2031                 if (delta > maxdelta)
2032                 {
2033                     maxdelta = delta;
2034                 }
2035             }
2036             // If any displacement is larger than the stepsize limit, reduce the step
2037             if (maxdelta > inputrec->em_stepsize)
2038             {
2039                 stepsize *= 0.1;
2040             }
2041         }
2042         while (maxdelta > inputrec->em_stepsize);
2043
2044         // Take a trial step and move the coordinate array xc[] to position C
2045         for (i = 0; i < n; i++)
2046         {
2047             xc[i] = lastx[i] + c*s[i];
2048         }
2049
2050         neval++;
2051         // Calculate energy for the trial step in position C
2052         ems.s.x = (rvec *)xc;
2053         ems.f   = (rvec *)fc;
2054         evaluate_energy(fplog, cr,
2055                         top_global, &ems, top,
2056                         inputrec, nrnb, wcycle, gstat,
2057                         vsite, constr, fcd, graph, mdatoms, fr,
2058                         mu_tot, enerd, vir, pres, step, FALSE);
2059         EpotC = ems.epot;
2060
2061         // Calc line gradient in position C
2062         for (gpc = 0, i = 0; i < n; i++)
2063         {
2064             gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
2065         }
2066         /* Sum the gradient along the line across CPUs */
2067         if (PAR(cr))
2068         {
2069             gmx_sumd(1, &gpc, cr);
2070         }
2071
2072         // This is the max amount of increase in energy we tolerate.
2073         // By allowing VERY small changes (close to numerical precision) we
2074         // frequently find even better (lower) final energies.
2075         tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
2076
2077         // Accept the step if the energy is lower in the new position C (compared to A),
2078         // or if it is not significantly higher and the line derivative is still negative.
2079         if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
2080         {
2081             // Great, we found a better energy. We no longer try to alter the
2082             // stepsize, but simply accept this new better position. The we select a new
2083             // search direction instead, which will be much more efficient than continuing
2084             // to take smaller steps along a line. Set fnorm based on the new C position,
2085             // which will be used to update the stepsize to 1/fnorm further down.
2086             foundlower = TRUE;
2087             fnorm      = ems.fnorm;
2088         }
2089         else
2090         {
2091             // If we got here, the energy is NOT lower in point C, i.e. it will be the same
2092             // or higher than in point A. In this case it is pointless to move to point C,
2093             // so we will have to do more iterations along the same line to find a smaller
2094             // value in the interval [A=0.0,C].
2095             // Here, A is still 0.0, but that will change when we do a search in the interval
2096             // [0.0,C] below. That search we will do by interpolation or bisection rather
2097             // than with the stepsize, so no need to modify it. For the next search direction
2098             // it will be reset to 1/fnorm anyway.
2099             foundlower = FALSE;
2100         }
2101
2102         if (!foundlower)
2103         {
2104             // OK, if we didn't find a lower value we will have to locate one now - there must
2105             // be one in the interval [a,c].
2106             // The same thing is valid here, though: Don't spend dozens of iterations to find
2107             // the line minimum. We try to interpolate based on the derivative at the endpoints,
2108             // and only continue until we find a lower value. In most cases this means 1-2 iterations.
2109             // I also have a safeguard for potentially really pathological functions so we never
2110             // take more than 20 steps before we give up.
2111             // If we already found a lower value we just skip this step and continue to the update.
2112             nminstep = 0;
2113             do
2114             {
2115                 // Select a new trial point B in the interval [A,C].
2116                 // If the derivatives at points a & c have different sign we interpolate to zero,
2117                 // otherwise just do a bisection since there might be multiple minima/maxima
2118                 // inside the interval.
2119                 if (gpa < 0 && gpc > 0)
2120                 {
2121                     b = a + gpa*(a-c)/(gpc-gpa);
2122                 }
2123                 else
2124                 {
2125                     b = 0.5*(a+c);
2126                 }
2127
2128                 /* safeguard if interpolation close to machine accuracy causes errors:
2129                  * never go outside the interval
2130                  */
2131                 if (b <= a || b >= c)
2132                 {
2133                     b = 0.5*(a+c);
2134                 }
2135
2136                 // Take a trial step to point B
2137                 for (i = 0; i < n; i++)
2138                 {
2139                     xb[i] = lastx[i] + b*s[i];
2140                 }
2141
2142                 neval++;
2143                 // Calculate energy for the trial step in point B
2144                 ems.s.x = (rvec *)xb;
2145                 ems.f   = (rvec *)fb;
2146                 evaluate_energy(fplog, cr,
2147                                 top_global, &ems, top,
2148                                 inputrec, nrnb, wcycle, gstat,
2149                                 vsite, constr, fcd, graph, mdatoms, fr,
2150                                 mu_tot, enerd, vir, pres, step, FALSE);
2151                 EpotB = ems.epot;
2152                 fnorm = ems.fnorm;
2153
2154                 // Calculate gradient in point B
2155                 for (gpb = 0, i = 0; i < n; i++)
2156                 {
2157                     gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
2158
2159                 }
2160                 /* Sum the gradient along the line across CPUs */
2161                 if (PAR(cr))
2162                 {
2163                     gmx_sumd(1, &gpb, cr);
2164                 }
2165
2166                 // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
2167                 // at the new point B, and rename the endpoints of this new interval A and C.
2168                 if (gpb > 0)
2169                 {
2170                     /* Replace c endpoint with b */
2171                     EpotC = EpotB;
2172                     c     = b;
2173                     gpc   = gpb;
2174                     /* swap coord pointers b/c */
2175                     xtmp = xb;
2176                     ftmp = fb;
2177                     xb   = xc;
2178                     fb   = fc;
2179                     xc   = xtmp;
2180                     fc   = ftmp;
2181                 }
2182                 else
2183                 {
2184                     /* Replace a endpoint with b */
2185                     EpotA = EpotB;
2186                     a     = b;
2187                     gpa   = gpb;
2188                     /* swap coord pointers a/b */
2189                     xtmp = xb;
2190                     ftmp = fb;
2191                     xb   = xa;
2192                     fb   = fa;
2193                     xa   = xtmp;
2194                     fa   = ftmp;
2195                 }
2196
2197                 /*
2198                  * Stop search as soon as we find a value smaller than the endpoints,
2199                  * or if the tolerance is below machine precision.
2200                  * Never run more than 20 steps, no matter what.
2201                  */
2202                 nminstep++;
2203             }
2204             while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
2205
2206             if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
2207             {
2208                 /* OK. We couldn't find a significantly lower energy.
2209                  * If ncorr==0 this was steepest descent, and then we give up.
2210                  * If not, reset memory to restart as steepest descent before quitting.
2211                  */
2212                 if (ncorr == 0)
2213                 {
2214                     /* Converged */
2215                     converged = TRUE;
2216                     break;
2217                 }
2218                 else
2219                 {
2220                     /* Reset memory */
2221                     ncorr = 0;
2222                     /* Search in gradient direction */
2223                     for (i = 0; i < n; i++)
2224                     {
2225                         dx[point][i] = ff[i];
2226                     }
2227                     /* Reset stepsize */
2228                     stepsize = 1.0/fnorm;
2229                     continue;
2230                 }
2231             }
2232
2233             /* Select min energy state of A & C, put the best in xx/ff/Epot
2234              */
2235             if (EpotC < EpotA)
2236             {
2237                 Epot = EpotC;
2238                 /* Use state C */
2239                 for (i = 0; i < n; i++)
2240                 {
2241                     xx[i] = xc[i];
2242                     ff[i] = fc[i];
2243                 }
2244                 step_taken = c;
2245             }
2246             else
2247             {
2248                 Epot = EpotA;
2249                 /* Use state A */
2250                 for (i = 0; i < n; i++)
2251                 {
2252                     xx[i] = xa[i];
2253                     ff[i] = fa[i];
2254                 }
2255                 step_taken = a;
2256             }
2257
2258         }
2259         else
2260         {
2261             /* found lower */
2262             Epot = EpotC;
2263             /* Use state C */
2264             for (i = 0; i < n; i++)
2265             {
2266                 xx[i] = xc[i];
2267                 ff[i] = fc[i];
2268             }
2269             step_taken = c;
2270         }
2271
2272         /* Update the memory information, and calculate a new
2273          * approximation of the inverse hessian
2274          */
2275
2276         /* Have new data in Epot, xx, ff */
2277         if (ncorr < nmaxcorr)
2278         {
2279             ncorr++;
2280         }
2281
2282         for (i = 0; i < n; i++)
2283         {
2284             dg[point][i]  = lastf[i]-ff[i];
2285             dx[point][i] *= step_taken;
2286         }
2287
2288         dgdg = 0;
2289         dgdx = 0;
2290         for (i = 0; i < n; i++)
2291         {
2292             dgdg += dg[point][i]*dg[point][i];
2293             dgdx += dg[point][i]*dx[point][i];
2294         }
2295
2296         diag = dgdx/dgdg;
2297
2298         rho[point] = 1.0/dgdx;
2299         point++;
2300
2301         if (point >= nmaxcorr)
2302         {
2303             point = 0;
2304         }
2305
2306         /* Update */
2307         for (i = 0; i < n; i++)
2308         {
2309             p[i] = ff[i];
2310         }
2311
2312         cp = point;
2313
2314         /* Recursive update. First go back over the memory points */
2315         for (k = 0; k < ncorr; k++)
2316         {
2317             cp--;
2318             if (cp < 0)
2319             {
2320                 cp = ncorr-1;
2321             }
2322
2323             sq = 0;
2324             for (i = 0; i < n; i++)
2325             {
2326                 sq += dx[cp][i]*p[i];
2327             }
2328
2329             alpha[cp] = rho[cp]*sq;
2330
2331             for (i = 0; i < n; i++)
2332             {
2333                 p[i] -= alpha[cp]*dg[cp][i];
2334             }
2335         }
2336
2337         for (i = 0; i < n; i++)
2338         {
2339             p[i] *= diag;
2340         }
2341
2342         /* And then go forward again */
2343         for (k = 0; k < ncorr; k++)
2344         {
2345             yr = 0;
2346             for (i = 0; i < n; i++)
2347             {
2348                 yr += p[i]*dg[cp][i];
2349             }
2350
2351             beta = rho[cp]*yr;
2352             beta = alpha[cp]-beta;
2353
2354             for (i = 0; i < n; i++)
2355             {
2356                 p[i] += beta*dx[cp][i];
2357             }
2358
2359             cp++;
2360             if (cp >= ncorr)
2361             {
2362                 cp = 0;
2363             }
2364         }
2365
2366         for (i = 0; i < n; i++)
2367         {
2368             if (!frozen[i])
2369             {
2370                 dx[point][i] = p[i];
2371             }
2372             else
2373             {
2374                 dx[point][i] = 0;
2375             }
2376         }
2377
2378         /* Test whether the convergence criterion is met */
2379         get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
2380
2381         /* Print it if necessary */
2382         if (MASTER(cr))
2383         {
2384             if (bVerbose)
2385             {
2386                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2387                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2388                         step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
2389                 fflush(stderr);
2390             }
2391             /* Store the new (lower) energies */
2392             upd_mdebin(mdebin, FALSE, FALSE, (double)step,
2393                        mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
2394                        NULL, NULL, vir, pres, NULL, mu_tot, constr);
2395             do_log = do_per_step(step, inputrec->nstlog);
2396             do_ene = do_per_step(step, inputrec->nstenergy);
2397             if (do_log)
2398             {
2399                 print_ebin_header(fplog, step, step);
2400             }
2401             print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
2402                        do_log ? fplog : NULL, step, step, eprNORMAL,
2403                        mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2404         }
2405
2406         /* Send x and E to IMD client, if bIMD is TRUE. */
2407         if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
2408         {
2409             IMD_send_positions(inputrec->imd);
2410         }
2411
2412         // Reset stepsize in we are doing more iterations
2413         stepsize = 1.0/fnorm;
2414
2415         /* Stop when the maximum force lies below tolerance.
2416          * If we have reached machine precision, converged is already set to true.
2417          */
2418         converged = converged || (fmax < inputrec->em_tol);
2419
2420     }   /* End of the loop */
2421
2422     /* IMD cleanup, if bIMD is TRUE. */
2423     IMD_finalize(inputrec->bIMD, inputrec->imd);
2424
2425     if (converged)
2426     {
2427         step--; /* we never took that last step in this case */
2428
2429     }
2430     if (fmax > inputrec->em_tol)
2431     {
2432         if (MASTER(cr))
2433         {
2434             warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
2435             warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
2436         }
2437         converged = FALSE;
2438     }
2439
2440     /* If we printed energy and/or logfile last step (which was the last step)
2441      * we don't have to do it again, but otherwise print the final values.
2442      */
2443     if (!do_log) /* Write final value to log since we didn't do anythin last step */
2444     {
2445         print_ebin_header(fplog, step, step);
2446     }
2447     if (!do_ene || !do_log) /* Write final energy file entries */
2448     {
2449         print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
2450                    !do_log ? fplog : NULL, step, step, eprNORMAL,
2451                    mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2452     }
2453
2454     /* Print some stuff... */
2455     if (MASTER(cr))
2456     {
2457         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2458     }
2459
2460     /* IMPORTANT!
2461      * For accurate normal mode calculation it is imperative that we
2462      * store the last conformation into the full precision binary trajectory.
2463      *
2464      * However, we should only do it if we did NOT already write this step
2465      * above (which we did if do_x or do_f was true).
2466      */
2467     do_x = !do_per_step(step, inputrec->nstxout);
2468     do_f = !do_per_step(step, inputrec->nstfout);
2469     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
2470                   top_global, inputrec, step,
2471                   &ems, state_global);
2472
2473     if (MASTER(cr))
2474     {
2475         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2476         print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
2477                         number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
2478         print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
2479                         number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
2480
2481         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2482     }
2483
2484     finish_em(cr, outf, walltime_accounting, wcycle);
2485
2486     /* To print the actual number of steps we needed somewhere */
2487     walltime_accounting_set_nsteps_done(walltime_accounting, step);
2488
2489     return 0;
2490 }   /* That's all folks */
2491
2492 /*! \brief Do steepest descents minimization
2493     \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
2494                           int nfile, const t_filenm fnm[],
2495                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
2496                           int nstglobalcomm,
2497                           gmx_vsite_t *vsite, gmx_constr_t constr,
2498                           int stepout,
2499                           t_inputrec *inputrec,
2500                           gmx_mtop_t *top_global, t_fcdata *fcd,
2501                           t_state *state_global,
2502                           t_mdatoms *mdatoms,
2503                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2504                           gmx_edsam_t ed,
2505                           t_forcerec *fr,
2506                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2507                           real cpt_period, real max_hours,
2508                           int imdport,
2509                           unsigned long Flags,
2510                           gmx_walltime_accounting_t walltime_accounting)
2511  */
2512 double do_steep(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
2513                 int nfile, const t_filenm fnm[],
2514                 const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
2515                 int gmx_unused nstglobalcomm,
2516                 gmx_vsite_t *vsite, gmx_constr_t constr,
2517                 int gmx_unused stepout,
2518                 t_inputrec *inputrec,
2519                 gmx_mtop_t *top_global, t_fcdata *fcd,
2520                 t_state *state_global,
2521                 t_mdatoms *mdatoms,
2522                 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2523                 gmx_edsam_t gmx_unused  ed,
2524                 t_forcerec *fr,
2525                 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2526                 gmx_membed_t gmx_unused *membed,
2527                 real gmx_unused cpt_period, real gmx_unused max_hours,
2528                 int imdport,
2529                 unsigned long gmx_unused Flags,
2530                 gmx_walltime_accounting_t walltime_accounting)
2531 {
2532     const char       *SD = "Steepest Descents";
2533     em_state_t       *s_min, *s_try;
2534     gmx_localtop_t   *top;
2535     gmx_enerdata_t   *enerd;
2536     rvec             *f;
2537     gmx_global_stat_t gstat;
2538     t_graph          *graph;
2539     real              stepsize;
2540     real              ustep, fnormn;
2541     gmx_mdoutf_t      outf;
2542     t_mdebin         *mdebin;
2543     gmx_bool          bDone, bAbort, do_x, do_f;
2544     tensor            vir, pres;
2545     rvec              mu_tot;
2546     int               nsteps;
2547     int               count          = 0;
2548     int               steps_accepted = 0;
2549
2550     s_min = init_em_state();
2551     s_try = init_em_state();
2552
2553     /* Init em and store the local state in s_try */
2554     init_em(fplog, SD, cr, inputrec,
2555             state_global, top_global, s_try, &top, &f,
2556             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat,
2557             vsite, constr, NULL,
2558             nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
2559
2560     /* Print to log file  */
2561     print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
2562
2563     /* Set variables for stepsize (in nm). This is the largest
2564      * step that we are going to make in any direction.
2565      */
2566     ustep    = inputrec->em_stepsize;
2567     stepsize = 0;
2568
2569     /* Max number of steps  */
2570     nsteps = inputrec->nsteps;
2571
2572     if (MASTER(cr))
2573     {
2574         /* Print to the screen  */
2575         sp_header(stderr, SD, inputrec->em_tol, nsteps);
2576     }
2577     if (fplog)
2578     {
2579         sp_header(fplog, SD, inputrec->em_tol, nsteps);
2580     }
2581
2582     /**** HERE STARTS THE LOOP ****
2583      * count is the counter for the number of steps
2584      * bDone will be TRUE when the minimization has converged
2585      * bAbort will be TRUE when nsteps steps have been performed or when
2586      * the stepsize becomes smaller than is reasonable for machine precision
2587      */
2588     count  = 0;
2589     bDone  = FALSE;
2590     bAbort = FALSE;
2591     while (!bDone && !bAbort)
2592     {
2593         bAbort = (nsteps >= 0) && (count == nsteps);
2594
2595         /* set new coordinates, except for first step */
2596         bool validStep = true;
2597         if (count > 0)
2598         {
2599             validStep =
2600                 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
2601                            s_min, stepsize, s_min->f, s_try,
2602                            constr, top, nrnb, wcycle, count);
2603         }
2604
2605         if (validStep)
2606         {
2607             evaluate_energy(fplog, cr,
2608                             top_global, s_try, top,
2609                             inputrec, nrnb, wcycle, gstat,
2610                             vsite, constr, fcd, graph, mdatoms, fr,
2611                             mu_tot, enerd, vir, pres, count, count == 0);
2612         }
2613         else
2614         {
2615             // Signal constraint error during stepping with energy=inf
2616             s_try->epot = std::numeric_limits<real>::infinity();
2617         }
2618
2619         if (MASTER(cr))
2620         {
2621             print_ebin_header(fplog, count, count);
2622         }
2623
2624         if (count == 0)
2625         {
2626             s_min->epot = s_try->epot;
2627         }
2628
2629         /* Print it if necessary  */
2630         if (MASTER(cr))
2631         {
2632             if (bVerbose)
2633             {
2634                 fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2635                         count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
2636                         ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
2637                 fflush(stderr);
2638             }
2639
2640             if ( (count == 0) || (s_try->epot < s_min->epot) )
2641             {
2642                 /* Store the new (lower) energies  */
2643                 upd_mdebin(mdebin, FALSE, FALSE, (double)count,
2644                            mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
2645                            s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
2646
2647                 /* Prepare IMD energy record, if bIMD is TRUE. */
2648                 IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE);
2649
2650                 print_ebin(mdoutf_get_fp_ene(outf), TRUE,
2651                            do_per_step(steps_accepted, inputrec->nstdisreout),
2652                            do_per_step(steps_accepted, inputrec->nstorireout),
2653                            fplog, count, count, eprNORMAL,
2654                            mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2655                 fflush(fplog);
2656             }
2657         }
2658
2659         /* Now if the new energy is smaller than the previous...
2660          * or if this is the first step!
2661          * or if we did random steps!
2662          */
2663
2664         if ( (count == 0) || (s_try->epot < s_min->epot) )
2665         {
2666             steps_accepted++;
2667
2668             /* Test whether the convergence criterion is met...  */
2669             bDone = (s_try->fmax < inputrec->em_tol);
2670
2671             /* Copy the arrays for force, positions and energy  */
2672             /* The 'Min' array always holds the coords and forces of the minimal
2673                sampled energy  */
2674             swap_em_state(s_min, s_try);
2675             if (count > 0)
2676             {
2677                 ustep *= 1.2;
2678             }
2679
2680             /* Write to trn, if necessary */
2681             do_x = do_per_step(steps_accepted, inputrec->nstxout);
2682             do_f = do_per_step(steps_accepted, inputrec->nstfout);
2683             write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
2684                           top_global, inputrec, count,
2685                           s_min, state_global);
2686         }
2687         else
2688         {
2689             /* If energy is not smaller make the step smaller...  */
2690             ustep *= 0.5;
2691
2692             if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2693             {
2694                 /* Reload the old state */
2695                 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
2696                                        s_min, top, mdatoms, fr, vsite, constr,
2697                                        nrnb, wcycle);
2698             }
2699         }
2700
2701         /* Determine new step  */
2702         stepsize = ustep/s_min->fmax;
2703
2704         /* Check if stepsize is too small, with 1 nm as a characteristic length */
2705 #if GMX_DOUBLE
2706         if (count == nsteps || ustep < 1e-12)
2707 #else
2708         if (count == nsteps || ustep < 1e-6)
2709 #endif
2710         {
2711             if (MASTER(cr))
2712             {
2713                 warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
2714                 warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
2715             }
2716             bAbort = TRUE;
2717         }
2718
2719         /* Send IMD energies and positions, if bIMD is TRUE. */
2720         if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
2721         {
2722             IMD_send_positions(inputrec->imd);
2723         }
2724
2725         count++;
2726     }   /* End of the loop  */
2727
2728     /* IMD cleanup, if bIMD is TRUE. */
2729     IMD_finalize(inputrec->bIMD, inputrec->imd);
2730
2731     /* Print some data...  */
2732     if (MASTER(cr))
2733     {
2734         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2735     }
2736     write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
2737                   top_global, inputrec, count,
2738                   s_min, state_global);
2739
2740     if (MASTER(cr))
2741     {
2742         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2743         fnormn = s_min->fnorm/sqrtNumAtoms;
2744
2745         print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
2746                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2747         print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
2748                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2749     }
2750
2751     finish_em(cr, outf, walltime_accounting, wcycle);
2752
2753     /* To print the actual number of steps we needed somewhere */
2754     inputrec->nsteps = count;
2755
2756     walltime_accounting_set_nsteps_done(walltime_accounting, count);
2757
2758     return 0;
2759 }   /* That's all folks */
2760
2761 /*! \brief Do normal modes analysis
2762     \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
2763                           int nfile, const t_filenm fnm[],
2764                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
2765                           int nstglobalcomm,
2766                           gmx_vsite_t *vsite, gmx_constr_t constr,
2767                           int stepout,
2768                           t_inputrec *inputrec,
2769                           gmx_mtop_t *top_global, t_fcdata *fcd,
2770                           t_state *state_global,
2771                           t_mdatoms *mdatoms,
2772                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2773                           gmx_edsam_t ed,
2774                           t_forcerec *fr,
2775                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2776                           real cpt_period, real max_hours,
2777                           int imdport,
2778                           unsigned long Flags,
2779                           gmx_walltime_accounting_t walltime_accounting)
2780  */
2781 double do_nm(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
2782              int nfile, const t_filenm fnm[],
2783              const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
2784              int gmx_unused nstglobalcomm,
2785              gmx_vsite_t *vsite, gmx_constr_t constr,
2786              int gmx_unused stepout,
2787              t_inputrec *inputrec,
2788              gmx_mtop_t *top_global, t_fcdata *fcd,
2789              t_state *state_global,
2790              t_mdatoms *mdatoms,
2791              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2792              gmx_edsam_t  gmx_unused ed,
2793              t_forcerec *fr,
2794              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2795              gmx_membed_t gmx_unused *membed,
2796              real gmx_unused cpt_period, real gmx_unused max_hours,
2797              int imdport,
2798              unsigned long gmx_unused Flags,
2799              gmx_walltime_accounting_t walltime_accounting)
2800 {
2801     const char          *NM = "Normal Mode Analysis";
2802     gmx_mdoutf_t         outf;
2803     int                  nnodes, node;
2804     gmx_localtop_t      *top;
2805     gmx_enerdata_t      *enerd;
2806     rvec                *f;
2807     gmx_global_stat_t    gstat;
2808     t_graph             *graph;
2809     tensor               vir, pres;
2810     rvec                 mu_tot;
2811     rvec                *fneg, *dfdx;
2812     gmx_bool             bSparse; /* use sparse matrix storage format */
2813     size_t               sz;
2814     gmx_sparsematrix_t * sparse_matrix           = NULL;
2815     real           *     full_matrix             = NULL;
2816     em_state_t       *   state_work;
2817
2818     /* added with respect to mdrun */
2819     int                       row, col;
2820     real                      der_range = 10.0*sqrt(GMX_REAL_EPS);
2821     real                      x_min;
2822     bool                      bIsMaster = MASTER(cr);
2823
2824     if (constr != NULL)
2825     {
2826         gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
2827     }
2828
2829     state_work = init_em_state();
2830
2831     gmx_shellfc_t *shellfc;
2832
2833     /* Init em and store the local state in state_minimum */
2834     init_em(fplog, NM, cr, inputrec,
2835             state_global, top_global, state_work, &top,
2836             &f,
2837             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat,
2838             vsite, constr, &shellfc,
2839             nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
2840
2841     std::vector<size_t> atom_index = get_atom_index(top_global);
2842     snew(fneg, atom_index.size());
2843     snew(dfdx, atom_index.size());
2844
2845 #if !GMX_DOUBLE
2846     if (bIsMaster)
2847     {
2848         fprintf(stderr,
2849                 "NOTE: This version of GROMACS has been compiled in single precision,\n"
2850                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2851                 "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
2852                 "      are fairly modest even if you recompile in double precision.\n\n");
2853     }
2854 #endif
2855
2856     /* Check if we can/should use sparse storage format.
2857      *
2858      * Sparse format is only useful when the Hessian itself is sparse, which it
2859      * will be when we use a cutoff.
2860      * For small systems (n<1000) it is easier to always use full matrix format, though.
2861      */
2862     if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
2863     {
2864         GMX_LOG(mdlog.warning).appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
2865         bSparse = FALSE;
2866     }
2867     else if (atom_index.size() < 1000)
2868     {
2869         GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%d), using full Hessian format.",
2870                                                    atom_index.size());
2871         bSparse = FALSE;
2872     }
2873     else
2874     {
2875         GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");
2876         bSparse = TRUE;
2877     }
2878
2879     /* Number of dimensions, based on real atoms, that is not vsites or shell */
2880     sz = DIM*atom_index.size();
2881
2882     fprintf(stderr, "Allocating Hessian memory...\n\n");
2883
2884     if (bSparse)
2885     {
2886         sparse_matrix = gmx_sparsematrix_init(sz);
2887         sparse_matrix->compressed_symmetric = TRUE;
2888     }
2889     else
2890     {
2891         snew(full_matrix, sz*sz);
2892     }
2893
2894     init_nrnb(nrnb);
2895
2896     where();
2897
2898     /* Write start time and temperature */
2899     print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
2900
2901     /* fudge nr of steps to nr of atoms */
2902     inputrec->nsteps = atom_index.size()*2;
2903
2904     if (bIsMaster)
2905     {
2906         fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
2907                 *(top_global->name), (int)inputrec->nsteps);
2908     }
2909
2910     nnodes = cr->nnodes;
2911
2912     /* Make evaluate_energy do a single node force calculation */
2913     cr->nnodes = 1;
2914     evaluate_energy(fplog, cr,
2915                     top_global, state_work, top,
2916                     inputrec, nrnb, wcycle, gstat,
2917                     vsite, constr, fcd, graph, mdatoms, fr,
2918                     mu_tot, enerd, vir, pres, -1, TRUE);
2919     cr->nnodes = nnodes;
2920
2921     /* if forces are not small, warn user */
2922     get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
2923
2924     GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work->fmax);
2925     if (state_work->fmax > 1.0e-3)
2926     {
2927         GMX_LOG(mdlog.warning).appendText(
2928                 "The force is probably not small enough to "
2929                 "ensure that you are at a minimum.\n"
2930                 "Be aware that negative eigenvalues may occur\n"
2931                 "when the resulting matrix is diagonalized.");
2932     }
2933
2934     /***********************************************************
2935      *
2936      *      Loop over all pairs in matrix
2937      *
2938      *      do_force called twice. Once with positive and
2939      *      once with negative displacement
2940      *
2941      ************************************************************/
2942
2943     /* Steps are divided one by one over the nodes */
2944     bool bNS = true;
2945     for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes)
2946     {
2947         size_t atom = atom_index[aid];
2948         for (size_t d = 0; d < DIM; d++)
2949         {
2950             gmx_bool    bBornRadii  = FALSE;
2951             gmx_int64_t step        = 0;
2952             int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
2953             double      t           = 0;
2954
2955             x_min = state_work->s.x[atom][d];
2956
2957             for (unsigned int dx = 0; (dx < 2); dx++)
2958             {
2959                 if (dx == 0)
2960                 {
2961                     state_work->s.x[atom][d] = x_min - der_range;
2962                 }
2963                 else
2964                 {
2965                     state_work->s.x[atom][d] = x_min + der_range;
2966                 }
2967
2968                 /* Make evaluate_energy do a single node force calculation */
2969                 cr->nnodes = 1;
2970                 if (shellfc)
2971                 {
2972                     /* Now is the time to relax the shells */
2973                     (void) relax_shell_flexcon(fplog, cr, bVerbose, step,
2974                                                inputrec, bNS, force_flags,
2975                                                top,
2976                                                constr, enerd, fcd,
2977                                                &state_work->s, state_work->f, vir, mdatoms,
2978                                                nrnb, wcycle, graph, &top_global->groups,
2979                                                shellfc, fr, bBornRadii, t, mu_tot,
2980                                                vsite, NULL);
2981                     bNS = false;
2982                     step++;
2983                 }
2984                 else
2985                 {
2986                     evaluate_energy(fplog, cr,
2987                                     top_global, state_work, top,
2988                                     inputrec, nrnb, wcycle, gstat,
2989                                     vsite, constr, fcd, graph, mdatoms, fr,
2990                                     mu_tot, enerd, vir, pres, atom*2+dx, FALSE);
2991                 }
2992
2993                 cr->nnodes = nnodes;
2994
2995                 if (dx == 0)
2996                 {
2997                     for (size_t i = 0; i < atom_index.size(); i++)
2998                     {
2999                         copy_rvec(state_work->f[atom_index[i]], fneg[i]);
3000                     }
3001                 }
3002             }
3003
3004             /* x is restored to original */
3005             state_work->s.x[atom][d] = x_min;
3006
3007             for (size_t j = 0; j < atom_index.size(); j++)
3008             {
3009                 for (size_t k = 0; (k < DIM); k++)
3010                 {
3011                     dfdx[j][k] =
3012                         -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
3013                 }
3014             }
3015
3016             if (!bIsMaster)
3017             {
3018 #if GMX_MPI
3019 #define mpi_type GMX_MPI_REAL
3020                 MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
3021                          cr->nodeid, cr->mpi_comm_mygroup);
3022 #endif
3023             }
3024             else
3025             {
3026                 for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++)
3027                 {
3028                     if (node > 0)
3029                     {
3030 #if GMX_MPI
3031                         MPI_Status stat;
3032                         MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
3033                                  cr->mpi_comm_mygroup, &stat);
3034 #undef mpi_type
3035 #endif
3036                     }
3037
3038                     row = (atom + node)*DIM + d;
3039
3040                     for (size_t j = 0; j < atom_index.size(); j++)
3041                     {
3042                         for (size_t k = 0; k < DIM; k++)
3043                         {
3044                             col = j*DIM + k;
3045
3046                             if (bSparse)
3047                             {
3048                                 if (col >= row && dfdx[j][k] != 0.0)
3049                                 {
3050                                     gmx_sparsematrix_increment_value(sparse_matrix,
3051                                                                      row, col, dfdx[j][k]);
3052                                 }
3053                             }
3054                             else
3055                             {
3056                                 full_matrix[row*sz+col] = dfdx[j][k];
3057                             }
3058                         }
3059                     }
3060                 }
3061             }
3062
3063             if (bVerbose && fplog)
3064             {
3065                 fflush(fplog);
3066             }
3067         }
3068         /* write progress */
3069         if (bIsMaster && bVerbose)
3070         {
3071             fprintf(stderr, "\rFinished step %d out of %d",
3072                     static_cast<int>(std::min(atom+nnodes, atom_index.size())),
3073                     static_cast<int>(atom_index.size()));
3074             fflush(stderr);
3075         }
3076     }
3077
3078     if (bIsMaster)
3079     {
3080         fprintf(stderr, "\n\nWriting Hessian...\n");
3081         gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
3082     }
3083
3084     finish_em(cr, outf, walltime_accounting, wcycle);
3085
3086     walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
3087
3088     return 0;
3089 }
3090
3091 } // namespace gmx