src/gromacs/mdrun/minimize.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  *
  40  * \brief This file defines integrators for energy minimization
  41  *
  42  * \author Berk Hess <hess@kth.se>
  43  * \author Erik Lindahl <erik@kth.se>
  44  * \ingroup module_mdrun
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "config.h"
  49
  50 #include <cmath>
  51 #include <cstring>
  52 #include <ctime>
  53
  54 #include <algorithm>
  55 #include <limits>
  56 #include <vector>
  57
  58 #include "gromacs/commandline/filenm.h"
  59 #include "gromacs/domdec/collect.h"
  60 #include "gromacs/domdec/dlbtiming.h"
  61 #include "gromacs/domdec/domdec.h"
  62 #include "gromacs/domdec/domdec_struct.h"
  63 #include "gromacs/domdec/mdsetup.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/ewald/pme_pp.h"
  66 #include "gromacs/fileio/confio.h"
  67 #include "gromacs/fileio/mtxio.h"
  68 #include "gromacs/gmxlib/network.h"
  69 #include "gromacs/gmxlib/nrnb.h"
  70 #include "gromacs/imd/imd.h"
  71 #include "gromacs/linearalgebra/sparsematrix.h"
  72 #include "gromacs/listed_forces/listed_forces.h"
  73 #include "gromacs/math/functions.h"
  74 #include "gromacs/math/vec.h"
  75 #include "gromacs/mdlib/constr.h"
  76 #include "gromacs/mdlib/coupling.h"
  77 #include "gromacs/mdlib/dispersioncorrection.h"
  78 #include "gromacs/mdlib/ebin.h"
  79 #include "gromacs/mdlib/enerdata_utils.h"
  80 #include "gromacs/mdlib/energyoutput.h"
  81 #include "gromacs/mdlib/force.h"
  82 #include "gromacs/mdlib/force_flags.h"
  83 #include "gromacs/mdlib/forcerec.h"
  84 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  85 #include "gromacs/mdlib/md_support.h"
  86 #include "gromacs/mdlib/mdatoms.h"
  87 #include "gromacs/mdlib/stat.h"
  88 #include "gromacs/mdlib/tgroup.h"
  89 #include "gromacs/mdlib/trajectory_writing.h"
  90 #include "gromacs/mdlib/update.h"
  91 #include "gromacs/mdlib/vsite.h"
  92 #include "gromacs/mdrunutility/handlerestart.h"
  93 #include "gromacs/mdrunutility/printtime.h"
  94 #include "gromacs/mdtypes/checkpointdata.h"
  95 #include "gromacs/mdtypes/commrec.h"
  96 #include "gromacs/mdtypes/forcebuffers.h"
  97 #include "gromacs/mdtypes/forcerec.h"
  98 #include "gromacs/mdtypes/inputrec.h"
  99 #include "gromacs/mdtypes/interaction_const.h"
 100 #include "gromacs/mdtypes/md_enums.h"
 101 #include "gromacs/mdtypes/mdatom.h"
 102 #include "gromacs/mdtypes/mdrunoptions.h"
 103 #include "gromacs/mdtypes/state.h"
 104 #include "gromacs/pbcutil/pbc.h"
 105 #include "gromacs/timing/wallcycle.h"
 106 #include "gromacs/timing/walltime_accounting.h"
 107 #include "gromacs/topology/mtop_util.h"
 108 #include "gromacs/topology/topology.h"
 109 #include "gromacs/utility/cstringutil.h"
 110 #include "gromacs/utility/exceptions.h"
 111 #include "gromacs/utility/fatalerror.h"
 112 #include "gromacs/utility/logger.h"
 113 #include "gromacs/utility/smalloc.h"
 114
 115 #include "legacysimulator.h"
 116 #include "shellfc.h"
 117
 118 using gmx::ArrayRef;
 119 using gmx::MdrunScheduleWorkload;
 120 using gmx::RVec;
 121 using gmx::VirtualSitesHandler;
 122
 123 //! Utility structure for manipulating states during EM
 124 typedef struct em_state
 125 {
 126     //! Copy of the global state
 127     t_state s;
 128     //! Force array
 129     gmx::ForceBuffers f;
 130     //! Potential energy
 131     real epot;
 132     //! Norm of the force
 133     real fnorm;
 134     //! Maximum force
 135     real fmax;
 136     //! Direction
 137     int a_fmax;
 138 } em_state_t;
 139
 140 //! Print the EM starting conditions
 141 static void print_em_start(FILE*                     fplog,
 142                            const t_commrec*          cr,
 143                            gmx_walltime_accounting_t walltime_accounting,
 144                            gmx_wallcycle*            wcycle,
 145                            const char*               name)
 146 {
 147     walltime_accounting_start_time(walltime_accounting);
 148     wallcycle_start(wcycle, WallCycleCounter::Run);
 149     print_start(fplog, cr, walltime_accounting, name);
 150 }
 151
 152 //! Stop counting time for EM
 153 static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle* wcycle)
 154 {
 155     wallcycle_stop(wcycle, WallCycleCounter::Run);
 156
 157     walltime_accounting_end_time(walltime_accounting);
 158 }
 159
 160 //! Printing a log file and console header
 161 static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps)
 162 {
 163     fprintf(out, "\n");
 164     fprintf(out, "%s:\n", minimizer);
 165     fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
 166     fprintf(out, "   Number of steps    = %12d\n", nsteps);
 167 }
 168
 169 //! Print warning message
 170 static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain)
 171 {
 172     constexpr bool realIsDouble = GMX_DOUBLE;
 173     char           buffer[2048];
 174
 175     if (!std::isfinite(fmax))
 176     {
 177         sprintf(buffer,
 178                 "\nEnergy minimization has stopped because the force "
 179                 "on at least one atom is not finite. This usually means "
 180                 "atoms are overlapping. Modify the input coordinates to "
 181                 "remove atom overlap or use soft-core potentials with "
 182                 "the free energy code to avoid infinite forces.\n%s",
 183                 !realIsDouble ? "You could also be lucky that switching to double precision "
 184                                 "is sufficient to obtain finite forces.\n"
 185                               : "");
 186     }
 187     else if (bLastStep)
 188     {
 189         sprintf(buffer,
 190                 "\nEnergy minimization reached the maximum number "
 191                 "of steps before the forces reached the requested "
 192                 "precision Fmax < %g.\n",
 193                 ftol);
 194     }
 195     else
 196     {
 197         sprintf(buffer,
 198                 "\nEnergy minimization has stopped, but the forces have "
 199                 "not converged to the requested precision Fmax < %g (which "
 200                 "may not be possible for your system). It stopped "
 201                 "because the algorithm tried to make a new step whose size "
 202                 "was too small, or there was no change in the energy since "
 203                 "last step. Either way, we regard the minimization as "
 204                 "converged to within the available machine precision, "
 205                 "given your starting configuration and EM parameters.\n%s%s",
 206                 ftol,
 207                 !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but "
 208                                 "this is often not needed for preparing to run molecular "
 209                                 "dynamics.\n"
 210                               : "",
 211                 bConstrain ? "You might need to increase your constraint accuracy, or turn\n"
 212                              "off constraints altogether (set constraints = none in mdp file)\n"
 213                            : "");
 214     }
 215
 216     fputs(wrap_lines(buffer, 78, 0, FALSE), stderr);
 217     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 218 }
 219
 220 //! Print message about convergence of the EM
 221 static void print_converged(FILE*             fp,
 222                             const char*       alg,
 223                             real              ftol,
 224                             int64_t           count,
 225                             gmx_bool          bDone,
 226                             int64_t           nsteps,
 227                             const em_state_t* ems,
 228                             double            sqrtNumAtoms)
 229 {
 230     char buf[STEPSTRSIZE];
 231
 232     if (bDone)
 233     {
 234         fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf));
 235     }
 236     else if (count < nsteps)
 237     {
 238         fprintf(fp,
 239                 "\n%s converged to machine precision in %s steps,\n"
 240                 "but did not reach the requested Fmax < %g.\n",
 241                 alg,
 242                 gmx_step_str(count, buf),
 243                 ftol);
 244     }
 245     else
 246     {
 247         fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol, gmx_step_str(count, buf));
 248     }
 249
 250 #if GMX_DOUBLE
 251     fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
 252     fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
 253     fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm / sqrtNumAtoms);
 254 #else
 255     fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
 256     fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
 257     fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm / sqrtNumAtoms);
 258 #endif
 259 }
 260
 261 //! Compute the norm and max of the force array in parallel
 262 static void get_f_norm_max(const t_commrec*               cr,
 263                            const t_grpopts*               opts,
 264                            t_mdatoms*                     mdatoms,
 265                            gmx::ArrayRef<const gmx::RVec> f,
 266                            real*                          fnorm,
 267                            real*                          fmax,
 268                            int*                           a_fmax)
 269 {
 270     double fnorm2, *sum;
 271     real   fmax2, fam;
 272     int    la_max, a_max, start, end, i, m, gf;
 273
 274     /* This routine finds the largest force and returns it.
 275      * On parallel machines the global max is taken.
 276      */
 277     fnorm2 = 0;
 278     fmax2  = 0;
 279     la_max = -1;
 280     start  = 0;
 281     end    = mdatoms->homenr;
 282     if (mdatoms->cFREEZE)
 283     {
 284         for (i = start; i < end; i++)
 285         {
 286             gf  = mdatoms->cFREEZE[i];
 287             fam = 0;
 288             for (m = 0; m < DIM; m++)
 289             {
 290                 if (!opts->nFreeze[gf][m])
 291                 {
 292                     fam += gmx::square(f[i][m]);
 293                 }
 294             }
 295             fnorm2 += fam;
 296             if (fam > fmax2)
 297             {
 298                 fmax2  = fam;
 299                 la_max = i;
 300             }
 301         }
 302     }
 303     else
 304     {
 305         for (i = start; i < end; i++)
 306         {
 307             fam = norm2(f[i]);
 308             fnorm2 += fam;
 309             if (fam > fmax2)
 310             {
 311                 fmax2  = fam;
 312                 la_max = i;
 313             }
 314         }
 315     }
 316
 317     if (la_max >= 0 && DOMAINDECOMP(cr))
 318     {
 319         a_max = cr->dd->globalAtomIndices[la_max];
 320     }
 321     else
 322     {
 323         a_max = la_max;
 324     }
 325     if (PAR(cr))
 326     {
 327         snew(sum, 2 * cr->nnodes + 1);
 328         sum[2 * cr->nodeid]     = fmax2;
 329         sum[2 * cr->nodeid + 1] = a_max;
 330         sum[2 * cr->nnodes]     = fnorm2;
 331         gmx_sumd(2 * cr->nnodes + 1, sum, cr);
 332         fnorm2 = sum[2 * cr->nnodes];
 333         /* Determine the global maximum */
 334         for (i = 0; i < cr->nnodes; i++)
 335         {
 336             if (sum[2 * i] > fmax2)
 337             {
 338                 fmax2 = sum[2 * i];
 339                 a_max = gmx::roundToInt(sum[2 * i + 1]);
 340             }
 341         }
 342         sfree(sum);
 343     }
 344
 345     if (fnorm)
 346     {
 347         *fnorm = sqrt(fnorm2);
 348     }
 349     if (fmax)
 350     {
 351         *fmax = sqrt(fmax2);
 352     }
 353     if (a_fmax)
 354     {
 355         *a_fmax = a_max;
 356     }
 357 }
 358
 359 //! Compute the norm of the force
 360 static void get_state_f_norm_max(const t_commrec* cr, const t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems)
 361 {
 362     get_f_norm_max(cr, opts, mdatoms, ems->f.view().force(), &ems->fnorm, &ems->fmax, &ems->a_fmax);
 363 }
 364
 365 //! Initialize the energy minimization
 366 static void init_em(FILE*                fplog,
 367                     const gmx::MDLogger& mdlog,
 368                     const char*          title,
 369                     const t_commrec*     cr,
 370                     const t_inputrec*    ir,
 371                     gmx::ImdSession*     imdSession,
 372                     pull_t*              pull_work,
 373                     t_state*             state_global,
 374                     const gmx_mtop_t&    top_global,
 375                     em_state_t*          ems,
 376                     gmx_localtop_t*      top,
 377                     t_nrnb*              nrnb,
 378                     t_forcerec*          fr,
 379                     gmx::MDAtoms*        mdAtoms,
 380                     gmx_global_stat_t*   gstat,
 381                     VirtualSitesHandler* vsite,
 382                     gmx::Constraints*    constr,
 383                     gmx_shellfc_t**      shellfc)
 384 {
 385     real dvdl_constr;
 386
 387     if (fplog)
 388     {
 389         fprintf(fplog, "Initiating %s\n", title);
 390     }
 391
 392     if (MASTER(cr))
 393     {
 394         state_global->ngtc = 0;
 395     }
 396     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
 397     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
 398     initialize_lambdas(fplog,
 399                        ir->efep,
 400                        ir->bSimTemp,
 401                        *ir->fepvals,
 402                        ir->simtempvals->temperatures,
 403                        gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc),
 404                        MASTER(cr),
 405                        fep_state,
 406                        lambda);
 407
 408     if (ir->eI == IntegrationAlgorithm::NM)
 409     {
 410         GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");
 411
 412         *shellfc = init_shell_flexcon(stdout,
 413                                       top_global,
 414                                       constr ? constr->numFlexibleConstraints() : 0,
 415                                       ir->nstcalcenergy,
 416                                       DOMAINDECOMP(cr),
 417                                       thisRankHasDuty(cr, DUTY_PME));
 418     }
 419     else
 420     {
 421         GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI),
 422                    "This else currently only handles energy minimizers, consider if your algorithm "
 423                    "needs shell/flexible-constraint support");
 424
 425         /* With energy minimization, shells and flexible constraints are
 426          * automatically minimized when treated like normal DOFS.
 427          */
 428         if (shellfc != nullptr)
 429         {
 430             *shellfc = nullptr;
 431         }
 432     }
 433
 434     if (DOMAINDECOMP(cr))
 435     {
 436         dd_init_local_state(*cr->dd, state_global, &ems->s);
 437
 438         /* Distribute the charge groups over the nodes from the master node */
 439         dd_partition_system(fplog,
 440                             mdlog,
 441                             ir->init_step,
 442                             cr,
 443                             TRUE,
 444                             1,
 445                             state_global,
 446                             top_global,
 447                             *ir,
 448                             imdSession,
 449                             pull_work,
 450                             &ems->s,
 451                             &ems->f,
 452                             mdAtoms,
 453                             top,
 454                             fr,
 455                             vsite,
 456                             constr,
 457                             nrnb,
 458                             nullptr,
 459                             FALSE);
 460         dd_store_state(*cr->dd, &ems->s);
 461     }
 462     else
 463     {
 464         state_change_natoms(state_global, state_global->natoms);
 465         /* Just copy the state */
 466         ems->s = *state_global;
 467         state_change_natoms(&ems->s, ems->s.natoms);
 468
 469         mdAlgorithmsSetupAtomData(
 470                 cr, *ir, top_global, top, fr, &ems->f, mdAtoms, constr, vsite, shellfc ? *shellfc : nullptr);
 471     }
 472
 473     update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[FreeEnergyPerturbationCouplingType::Mass]);
 474
 475     if (constr)
 476     {
 477         // TODO how should this cross-module support dependency be managed?
 478         if (ir->eConstrAlg == ConstraintAlgorithm::Shake && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
 479         {
 480             gmx_fatal(FARGS,
 481                       "Can not do energy minimization with %s, use %s\n",
 482                       enumValueToString(ConstraintAlgorithm::Shake),
 483                       enumValueToString(ConstraintAlgorithm::Lincs));
 484         }
 485
 486         if (!ir->bContinuation)
 487         {
 488             /* Constrain the starting coordinates */
 489             bool needsLogging  = true;
 490             bool computeEnergy = true;
 491             bool computeVirial = false;
 492             dvdl_constr        = 0;
 493             constr->apply(needsLogging,
 494                           computeEnergy,
 495                           -1,
 496                           0,
 497                           1.0,
 498                           ems->s.x.arrayRefWithPadding(),
 499                           ems->s.x.arrayRefWithPadding(),
 500                           ArrayRef<RVec>(),
 501                           ems->s.box,
 502                           ems->s.lambda[FreeEnergyPerturbationCouplingType::Fep],
 503                           &dvdl_constr,
 504                           gmx::ArrayRefWithPadding<RVec>(),
 505                           computeVirial,
 506                           nullptr,
 507                           gmx::ConstraintVariable::Positions);
 508         }
 509     }
 510
 511     if (PAR(cr))
 512     {
 513         *gstat = global_stat_init(ir);
 514     }
 515     else
 516     {
 517         *gstat = nullptr;
 518     }
 519
 520     calc_shifts(ems->s.box, fr->shift_vec);
 521 }
 522
 523 //! Finalize the minimization
 524 static void finish_em(const t_commrec*          cr,
 525                       gmx_mdoutf_t              outf,
 526                       gmx_walltime_accounting_t walltime_accounting,
 527                       gmx_wallcycle*            wcycle)
 528 {
 529     if (!thisRankHasDuty(cr, DUTY_PME))
 530     {
 531         /* Tell the PME only node to finish */
 532         gmx_pme_send_finish(cr);
 533     }
 534
 535     done_mdoutf(outf);
 536
 537     em_time_end(walltime_accounting, wcycle);
 538 }
 539
 540 //! Swap two different EM states during minimization
 541 static void swap_em_state(em_state_t** ems1, em_state_t** ems2)
 542 {
 543     em_state_t* tmp;
 544
 545     tmp   = *ems1;
 546     *ems1 = *ems2;
 547     *ems2 = tmp;
 548 }
 549
 550 //! Save the EM trajectory
 551 static void write_em_traj(FILE*               fplog,
 552                           const t_commrec*    cr,
 553                           gmx_mdoutf_t        outf,
 554                           gmx_bool            bX,
 555                           gmx_bool            bF,
 556                           const char*         confout,
 557                           const gmx_mtop_t&   top_global,
 558                           const t_inputrec*   ir,
 559                           int64_t             step,
 560                           em_state_t*         state,
 561                           t_state*            state_global,
 562                           ObservablesHistory* observablesHistory)
 563 {
 564     int mdof_flags = 0;
 565
 566     if (bX)
 567     {
 568         mdof_flags |= MDOF_X;
 569     }
 570     if (bF)
 571     {
 572         mdof_flags |= MDOF_F;
 573     }
 574
 575     /* If we want IMD output, set appropriate MDOF flag */
 576     if (ir->bIMD)
 577     {
 578         mdof_flags |= MDOF_IMD;
 579     }
 580
 581     gmx::WriteCheckpointDataHolder checkpointDataHolder;
 582     mdoutf_write_to_trajectory_files(fplog,
 583                                      cr,
 584                                      outf,
 585                                      mdof_flags,
 586                                      top_global.natoms,
 587                                      step,
 588                                      static_cast<double>(step),
 589                                      &state->s,
 590                                      state_global,
 591                                      observablesHistory,
 592                                      state->f.view().force(),
 593                                      &checkpointDataHolder);
 594
 595     if (confout != nullptr)
 596     {
 597         if (DOMAINDECOMP(cr))
 598         {
 599             /* If bX=true, x was collected to state_global in the call above */
 600             if (!bX)
 601             {
 602                 auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>();
 603                 dd_collect_vec(
 604                         cr->dd, state->s.ddp_count, state->s.ddp_count_cg_gl, state->s.cg_gl, state->s.x, globalXRef);
 605             }
 606         }
 607         else
 608         {
 609             /* Copy the local state pointer */
 610             state_global = &state->s;
 611         }
 612
 613         if (MASTER(cr))
 614         {
 615             if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 616             {
 617                 /* Make molecules whole only for confout writing */
 618                 do_pbc_mtop(ir->pbcType, state->s.box, &top_global, state_global->x.rvec_array());
 619             }
 620
 621             write_sto_conf_mtop(confout,
 622                                 *top_global.name,
 623                                 top_global,
 624                                 state_global->x.rvec_array(),
 625                                 nullptr,
 626                                 ir->pbcType,
 627                                 state->s.box);
 628         }
 629     }
 630 }
 631
 632 //! \brief Do one minimization step
 633 //
 634 // \returns true when the step succeeded, false when a constraint error occurred
 635 static bool do_em_step(const t_commrec*                          cr,
 636                        const t_inputrec*                         ir,
 637                        t_mdatoms*                                md,
 638                        em_state_t*                               ems1,
 639                        real                                      a,
 640                        gmx::ArrayRefWithPadding<const gmx::RVec> force,
 641                        em_state_t*                               ems2,
 642                        gmx::Constraints*                         constr,
 643                        int64_t                                   count)
 644
 645 {
 646     t_state *    s1, *s2;
 647     int          start, end;
 648     real         dvdl_constr;
 649     int nthreads gmx_unused;
 650
 651     bool validStep = true;
 652
 653     s1 = &ems1->s;
 654     s2 = &ems2->s;
 655
 656     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 657     {
 658         gmx_incons("state mismatch in do_em_step");
 659     }
 660
 661     s2->flags = s1->flags;
 662
 663     if (s2->natoms != s1->natoms)
 664     {
 665         state_change_natoms(s2, s1->natoms);
 666         ems2->f.resize(s2->natoms);
 667     }
 668     if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
 669     {
 670         s2->cg_gl.resize(s1->cg_gl.size());
 671     }
 672
 673     copy_mat(s1->box, s2->box);
 674     /* Copy free energy state */
 675     s2->lambda = s1->lambda;
 676     copy_mat(s1->box, s2->box);
 677
 678     start = 0;
 679     end   = md->homenr;
 680
 681     nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
 682 #pragma omp parallel num_threads(nthreads)
 683     {
 684         const rvec* x1 = s1->x.rvec_array();
 685         rvec*       x2 = s2->x.rvec_array();
 686         const rvec* f  = as_rvec_array(force.unpaddedArrayRef().data());
 687
 688         int gf = 0;
 689 #pragma omp for schedule(static) nowait
 690         for (int i = start; i < end; i++)
 691         {
 692             try
 693             {
 694                 if (md->cFREEZE)
 695                 {
 696                     gf = md->cFREEZE[i];
 697                 }
 698                 for (int m = 0; m < DIM; m++)
 699                 {
 700                     if (ir->opts.nFreeze[gf][m])
 701                     {
 702                         x2[i][m] = x1[i][m];
 703                     }
 704                     else
 705                     {
 706                         x2[i][m] = x1[i][m] + a * f[i][m];
 707                     }
 708                 }
 709             }
 710             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 711         }
 712
 713         if (s2->flags & enumValueToBitMask(StateEntry::Cgp))
 714         {
 715             /* Copy the CG p vector */
 716             const rvec* p1 = s1->cg_p.rvec_array();
 717             rvec*       p2 = s2->cg_p.rvec_array();
 718 #pragma omp for schedule(static) nowait
 719             for (int i = start; i < end; i++)
 720             {
 721                 // Trivial OpenMP block that does not throw
 722                 copy_rvec(p1[i], p2[i]);
 723             }
 724         }
 725
 726         if (DOMAINDECOMP(cr))
 727         {
 728             /* OpenMP does not supported unsigned loop variables */
 729 #pragma omp for schedule(static) nowait
 730             for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++)
 731             {
 732                 s2->cg_gl[i] = s1->cg_gl[i];
 733             }
 734         }
 735     }
 736
 737     if (DOMAINDECOMP(cr))
 738     {
 739         s2->ddp_count       = s1->ddp_count;
 740         s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 741     }
 742
 743     if (constr)
 744     {
 745         dvdl_constr = 0;
 746         validStep   = constr->apply(TRUE,
 747                                   TRUE,
 748                                   count,
 749                                   0,
 750                                   1.0,
 751                                   s1->x.arrayRefWithPadding(),
 752                                   s2->x.arrayRefWithPadding(),
 753                                   ArrayRef<RVec>(),
 754                                   s2->box,
 755                                   s2->lambda[FreeEnergyPerturbationCouplingType::Bonded],
 756                                   &dvdl_constr,
 757                                   gmx::ArrayRefWithPadding<RVec>(),
 758                                   false,
 759                                   nullptr,
 760                                   gmx::ConstraintVariable::Positions);
 761
 762         if (cr->nnodes > 1)
 763         {
 764             /* This global reduction will affect performance at high
 765              * parallelization, but we can not really avoid it.
 766              * But usually EM is not run at high parallelization.
 767              */
 768             int reductionBuffer = static_cast<int>(!validStep);
 769             gmx_sumi(1, &reductionBuffer, cr);
 770             validStep = (reductionBuffer == 0);
 771         }
 772
 773         // We should move this check to the different minimizers
 774         if (!validStep && ir->eI != IntegrationAlgorithm::Steep)
 775         {
 776             gmx_fatal(FARGS,
 777                       "The coordinates could not be constrained. Minimizer '%s' can not handle "
 778                       "constraint failures, use minimizer '%s' before using '%s'.",
 779                       enumValueToString(ir->eI),
 780                       enumValueToString(IntegrationAlgorithm::Steep),
 781                       enumValueToString(ir->eI));
 782         }
 783     }
 784
 785     return validStep;
 786 }
 787
 788 //! Prepare EM for using domain decomposition parallellization
 789 static void em_dd_partition_system(FILE*                fplog,
 790                                    const gmx::MDLogger& mdlog,
 791                                    int                  step,
 792                                    const t_commrec*     cr,
 793                                    const gmx_mtop_t&    top_global,
 794                                    const t_inputrec*    ir,
 795                                    gmx::ImdSession*     imdSession,
 796                                    pull_t*              pull_work,
 797                                    em_state_t*          ems,
 798                                    gmx_localtop_t*      top,
 799                                    gmx::MDAtoms*        mdAtoms,
 800                                    t_forcerec*          fr,
 801                                    VirtualSitesHandler* vsite,
 802                                    gmx::Constraints*    constr,
 803                                    t_nrnb*              nrnb,
 804                                    gmx_wallcycle*       wcycle)
 805 {
 806     /* Repartition the domain decomposition */
 807     dd_partition_system(fplog,
 808                         mdlog,
 809                         step,
 810                         cr,
 811                         FALSE,
 812                         1,
 813                         nullptr,
 814                         top_global,
 815                         *ir,
 816                         imdSession,
 817                         pull_work,
 818                         &ems->s,
 819                         &ems->f,
 820                         mdAtoms,
 821                         top,
 822                         fr,
 823                         vsite,
 824                         constr,
 825                         nrnb,
 826                         wcycle,
 827                         FALSE);
 828     dd_store_state(*cr->dd, &ems->s);
 829 }
 830
 831 namespace
 832 {
 833
 834 //! Copy coordinates, OpenMP parallelized, from \p refCoords to coords
 835 void setCoordinates(std::vector<RVec>* coords, ArrayRef<const RVec> refCoords)
 836 {
 837     coords->resize(refCoords.size());
 838
 839     const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
 840 #pragma omp parallel for num_threads(nthreads) schedule(static)
 841     for (int i = 0; i < ssize(refCoords); i++)
 842     {
 843         (*coords)[i] = refCoords[i];
 844     }
 845 }
 846
 847 //! Returns the maximum difference an atom moved between two coordinate sets, over all ranks
 848 real maxCoordinateDifference(ArrayRef<const RVec> coords1, ArrayRef<const RVec> coords2, MPI_Comm mpiCommMyGroup)
 849 {
 850     GMX_RELEASE_ASSERT(coords1.size() == coords2.size(), "Coordinate counts should match");
 851
 852     real maxDiffSquared = 0;
 853
 854     const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
 855 #pragma omp parallel for reduction(max : maxDiffSquared) num_threads(nthreads) schedule(static)
 856     for (int i = 0; i < ssize(coords1); i++)
 857     {
 858         maxDiffSquared = std::max(maxDiffSquared, gmx::norm2(coords1[i] - coords2[i]));
 859     }
 860
 861 #if GMX_MPI
 862     int numRanks = 1;
 863     if (mpiCommMyGroup != MPI_COMM_NULL)
 864     {
 865         MPI_Comm_size(mpiCommMyGroup, &numRanks);
 866     }
 867     if (numRanks > 1)
 868     {
 869         real maxDiffSquaredReduced;
 870         MPI_Allreduce(
 871                 &maxDiffSquared, &maxDiffSquaredReduced, 1, GMX_DOUBLE ? MPI_DOUBLE : MPI_FLOAT, MPI_MAX, mpiCommMyGroup);
 872         maxDiffSquared = maxDiffSquaredReduced;
 873     }
 874 #else
 875     GMX_UNUSED_VALUE(mpiCommMyGroup);
 876 #endif
 877
 878     return std::sqrt(maxDiffSquared);
 879 }
 880
 881 /*! \brief Class to handle the work of setting and doing an energy evaluation.
 882  *
 883  * This class is a mere aggregate of parameters to pass to evaluate an
 884  * energy, so that future changes to names and types of them consume
 885  * less time when refactoring other code.
 886  *
 887  * Aggregate initialization is used, for which the chief risk is that
 888  * if a member is added at the end and not all initializer lists are
 889  * updated, then the member will be value initialized, which will
 890  * typically mean initialization to zero.
 891  *
 892  * Use a braced initializer list to construct one of these. */
 893 class EnergyEvaluator
 894 {
 895 public:
 896     /*! \brief Evaluates an energy on the state in \c ems.
 897      *
 898      * \todo In practice, the same objects mu_tot, vir, and pres
 899      * are always passed to this function, so we would rather have
 900      * them as data members. However, their C-array types are
 901      * unsuited for aggregate initialization. When the types
 902      * improve, the call signature of this method can be reduced.
 903      */
 904     void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst);
 905     //! Handles logging (deprecated).
 906     FILE* fplog;
 907     //! Handles logging.
 908     const gmx::MDLogger& mdlog;
 909     //! Handles communication.
 910     const t_commrec* cr;
 911     //! Coordinates multi-simulations.
 912     const gmx_multisim_t* ms;
 913     //! Holds the simulation topology.
 914     const gmx_mtop_t& top_global;
 915     //! Holds the domain topology.
 916     gmx_localtop_t* top;
 917     //! User input options.
 918     const t_inputrec* inputrec;
 919     //! The Interactive Molecular Dynamics session.
 920     gmx::ImdSession* imdSession;
 921     //! The pull work object.
 922     pull_t* pull_work;
 923     //! Manages flop accounting.
 924     t_nrnb* nrnb;
 925     //! Manages wall cycle accounting.
 926     gmx_wallcycle* wcycle;
 927     //! Coordinates global reduction.
 928     gmx_global_stat_t gstat;
 929     //! Handles virtual sites.
 930     VirtualSitesHandler* vsite;
 931     //! Handles constraints.
 932     gmx::Constraints* constr;
 933     //! Per-atom data for this domain.
 934     gmx::MDAtoms* mdAtoms;
 935     //! Handles how to calculate the forces.
 936     t_forcerec* fr;
 937     //! Schedule of force-calculation work each step for this task.
 938     MdrunScheduleWorkload* runScheduleWork;
 939     //! Stores the computed energies.
 940     gmx_enerdata_t* enerd;
 941     //! The DD partitioning count at which the pair list was generated
 942     int ddpCountPairSearch;
 943     //! The local coordinates that were used for pair searching, stored for computing displacements
 944     std::vector<RVec> pairSearchCoordinates;
 945 };
 946
 947 void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst)
 948 {
 949     real     t;
 950     gmx_bool bNS;
 951     tensor   force_vir, shake_vir, ekin;
 952     real     dvdl_constr;
 953     real     terminate = 0;
 954
 955     /* Set the time to the initial time, the time does not change during EM */
 956     t = inputrec->init_t;
 957
 958     if (vsite)
 959     {
 960         vsite->construct(ems->s.x, {}, ems->s.box, gmx::VSiteOperation::Positions);
 961     }
 962
 963     // Compute the buffer size of the pair list
 964     const real bufferSize = inputrec->rlist - std::max(inputrec->rcoulomb, inputrec->rvdw);
 965
 966     if (bFirst || bufferSize <= 0 || (DOMAINDECOMP(cr) && ems->s.ddp_count != ddpCountPairSearch))
 967     {
 968         /* This is the first state or an old state used before the last ns */
 969         bNS = TRUE;
 970     }
 971     else
 972     {
 973         // We need to generate a new pairlist when one atom moved more than half the buffer size
 974         ArrayRef<const RVec> localCoordinates =
 975                 ArrayRef<const RVec>(ems->s.x).subArray(0, mdAtoms->mdatoms()->homenr);
 976         bNS = 2 * maxCoordinateDifference(pairSearchCoordinates, localCoordinates, cr->mpi_comm_mygroup)
 977               > bufferSize;
 978     }
 979
 980     if (DOMAINDECOMP(cr) && bNS)
 981     {
 982         /* Repartition the domain decomposition */
 983         em_dd_partition_system(
 984                 fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work, ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
 985         ddpCountPairSearch = cr->dd->ddp_count;
 986     }
 987
 988     /* Store the local coordinates that will be used in the pair search, after we re-partitioned */
 989     if (bufferSize > 0 && bNS)
 990     {
 991         ArrayRef<const RVec> localCoordinates =
 992                 constArrayRefFromArray(ems->s.x.data(), mdAtoms->mdatoms()->homenr);
 993         setCoordinates(&pairSearchCoordinates, localCoordinates);
 994     }
 995
 996     /* Calc force & energy on new trial position  */
 997     /* do_force always puts the charge groups in the box and shifts again
 998      * We do not unshift, so molecules are always whole in congrad.c
 999      */
1000     do_force(fplog,
1001              cr,
1002              ms,
1003              *inputrec,
1004              nullptr,
1005              nullptr,
1006              imdSession,
1007              pull_work,
1008              count,
1009              nrnb,
1010              wcycle,
1011              top,
1012              ems->s.box,
1013              ems->s.x.arrayRefWithPadding(),
1014              &ems->s.hist,
1015              &ems->f.view(),
1016              force_vir,
1017              mdAtoms->mdatoms(),
1018              enerd,
1019              ems->s.lambda,
1020              fr,
1021              runScheduleWork,
1022              vsite,
1023              mu_tot,
1024              t,
1025              nullptr,
1026              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
1027                      | (bNS ? GMX_FORCE_NS : 0),
1028              DDBalanceRegionHandler(cr));
1029
1030     /* Clear the unused shake virial and pressure */
1031     clear_mat(shake_vir);
1032     clear_mat(pres);
1033
1034     /* Communicate stuff when parallel */
1035     if (PAR(cr) && inputrec->eI != IntegrationAlgorithm::NM)
1036     {
1037         wallcycle_start(wcycle, WallCycleCounter::MoveE);
1038
1039         global_stat(*gstat,
1040                     cr,
1041                     enerd,
1042                     force_vir,
1043                     shake_vir,
1044                     *inputrec,
1045                     nullptr,
1046                     gmx::ArrayRef<real>{},
1047                     nullptr,
1048                     std::vector<real>(1, terminate),
1049                     FALSE,
1050                     CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
1051
1052         wallcycle_stop(wcycle, WallCycleCounter::MoveE);
1053     }
1054
1055     if (fr->dispersionCorrection)
1056     {
1057         /* Calculate long range corrections to pressure and energy */
1058         const DispersionCorrection::Correction correction = fr->dispersionCorrection->calculate(
1059                 ems->s.box, ems->s.lambda[FreeEnergyPerturbationCouplingType::Vdw]);
1060
1061         enerd->term[F_DISPCORR] = correction.energy;
1062         enerd->term[F_EPOT] += correction.energy;
1063         enerd->term[F_PRES] += correction.pressure;
1064         enerd->term[F_DVDL] += correction.dvdl;
1065     }
1066     else
1067     {
1068         enerd->term[F_DISPCORR] = 0;
1069     }
1070
1071     ems->epot = enerd->term[F_EPOT];
1072
1073     if (constr)
1074     {
1075         /* Project out the constraint components of the force */
1076         bool needsLogging  = false;
1077         bool computeEnergy = false;
1078         bool computeVirial = true;
1079         dvdl_constr        = 0;
1080         auto f             = ems->f.view().forceWithPadding();
1081         constr->apply(needsLogging,
1082                       computeEnergy,
1083                       count,
1084                       0,
1085                       1.0,
1086                       ems->s.x.arrayRefWithPadding(),
1087                       f,
1088                       f.unpaddedArrayRef(),
1089                       ems->s.box,
1090                       ems->s.lambda[FreeEnergyPerturbationCouplingType::Bonded],
1091                       &dvdl_constr,
1092                       gmx::ArrayRefWithPadding<RVec>(),
1093                       computeVirial,
1094                       shake_vir,
1095                       gmx::ConstraintVariable::ForceDispl);
1096         enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1097         m_add(force_vir, shake_vir, vir);
1098     }
1099     else
1100     {
1101         copy_mat(force_vir, vir);
1102     }
1103
1104     clear_mat(ekin);
1105     enerd->term[F_PRES] = calc_pres(fr->pbcType, inputrec->nwall, ems->s.box, ekin, vir, pres);
1106
1107     if (inputrec->efep != FreeEnergyPerturbationType::No)
1108     {
1109         accumulateKineticLambdaComponents(enerd, ems->s.lambda, *inputrec->fepvals);
1110     }
1111
1112     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
1113     {
1114         get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems);
1115     }
1116 }
1117
1118 } // namespace
1119
1120 //! Parallel utility summing energies and forces
1121 static double reorder_partsum(const t_commrec*  cr,
1122                               const t_grpopts*  opts,
1123                               const gmx_mtop_t& top_global,
1124                               const em_state_t* s_min,
1125                               const em_state_t* s_b)
1126 {
1127     if (debug)
1128     {
1129         fprintf(debug, "Doing reorder_partsum\n");
1130     }
1131
1132     auto fm = s_min->f.view().force();
1133     auto fb = s_b->f.view().force();
1134
1135     /* Collect fm in a global vector fmg.
1136      * This conflicts with the spirit of domain decomposition,
1137      * but to fully optimize this a much more complicated algorithm is required.
1138      */
1139     const int natoms = top_global.natoms;
1140     rvec*     fmg;
1141     snew(fmg, natoms);
1142
1143     gmx::ArrayRef<const int> indicesMin = s_min->s.cg_gl;
1144     int                      i          = 0;
1145     for (int a : indicesMin)
1146     {
1147         copy_rvec(fm[i], fmg[a]);
1148         i++;
1149     }
1150     gmx_sum(top_global.natoms * 3, fmg[0], cr);
1151
1152     /* Now we will determine the part of the sum for the cgs in state s_b */
1153     gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
1154
1155     double partsum                        = 0;
1156     i                                     = 0;
1157     int                                gf = 0;
1158     gmx::ArrayRef<const unsigned char> grpnrFREEZE =
1159             top_global.groups.groupNumbers[SimulationAtomGroupType::Freeze];
1160     for (int a : indicesB)
1161     {
1162         if (!grpnrFREEZE.empty())
1163         {
1164             gf = grpnrFREEZE[i];
1165         }
1166         for (int m = 0; m < DIM; m++)
1167         {
1168             if (!opts->nFreeze[gf][m])
1169             {
1170                 partsum += (fb[i][m] - fmg[a][m]) * fb[i][m];
1171             }
1172         }
1173         i++;
1174     }
1175
1176     sfree(fmg);
1177
1178     return partsum;
1179 }
1180
1181 //! Print some stuff, like beta, whatever that means.
1182 static real pr_beta(const t_commrec*  cr,
1183                     const t_grpopts*  opts,
1184                     t_mdatoms*        mdatoms,
1185                     const gmx_mtop_t& top_global,
1186                     const em_state_t* s_min,
1187                     const em_state_t* s_b)
1188 {
1189     double sum;
1190
1191     /* This is just the classical Polak-Ribiere calculation of beta;
1192      * it looks a bit complicated since we take freeze groups into account,
1193      * and might have to sum it in parallel runs.
1194      */
1195
1196     if (!DOMAINDECOMP(cr)
1197         || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
1198     {
1199         auto fm = s_min->f.view().force();
1200         auto fb = s_b->f.view().force();
1201         sum     = 0;
1202         int gf  = 0;
1203         /* This part of code can be incorrect with DD,
1204          * since the atom ordering in s_b and s_min might differ.
1205          */
1206         for (int i = 0; i < mdatoms->homenr; i++)
1207         {
1208             if (mdatoms->cFREEZE)
1209             {
1210                 gf = mdatoms->cFREEZE[i];
1211             }
1212             for (int m = 0; m < DIM; m++)
1213             {
1214                 if (!opts->nFreeze[gf][m])
1215                 {
1216                     sum += (fb[i][m] - fm[i][m]) * fb[i][m];
1217                 }
1218             }
1219         }
1220     }
1221     else
1222     {
1223         /* We need to reorder cgs while summing */
1224         sum = reorder_partsum(cr, opts, top_global, s_min, s_b);
1225     }
1226     if (PAR(cr))
1227     {
1228         gmx_sumd(1, &sum, cr);
1229     }
1230
1231     return sum / gmx::square(s_min->fnorm);
1232 }
1233
1234 namespace gmx
1235 {
1236
1237 void LegacySimulator::do_cg()
1238 {
1239     const char* CG = "Polak-Ribiere Conjugate Gradients";
1240
1241     gmx_localtop_t    top(top_global.ffparams);
1242     gmx_global_stat_t gstat;
1243     double            tmp, minstep;
1244     real              stepsize;
1245     real              a, b, c, beta = 0.0;
1246     real              epot_repl = 0;
1247     real              pnorm;
1248     gmx_bool          converged, foundlower;
1249     rvec              mu_tot = { 0 };
1250     gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
1251     tensor            vir, pres;
1252     int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
1253     int               m, step, nminstep;
1254     auto*             mdatoms = mdAtoms->mdatoms();
1255
1256     GMX_LOG(mdlog.info)
1257             .asParagraph()
1258             .appendText(
1259                     "Note that activating conjugate gradient energy minimization via the "
1260                     "integrator .mdp option and the command gmx mdrun may "
1261                     "be available in a different form in a future version of GROMACS, "
1262                     "e.g. gmx minimize and an .mdp option.");
1263
1264     step = 0;
1265
1266     if (MASTER(cr))
1267     {
1268         // In CG, the state is extended with a search direction
1269         state_global->flags |= enumValueToBitMask(StateEntry::Cgp);
1270
1271         // Ensure the extra per-atom state array gets allocated
1272         state_change_natoms(state_global, state_global->natoms);
1273
1274         // Initialize the search direction to zero
1275         for (RVec& cg_p : state_global->cg_p)
1276         {
1277             cg_p = { 0, 0, 0 };
1278         }
1279     }
1280
1281     /* Create 4 states on the stack and extract pointers that we will swap */
1282     em_state_t  s0{}, s1{}, s2{}, s3{};
1283     em_state_t* s_min = &s0;
1284     em_state_t* s_a   = &s1;
1285     em_state_t* s_b   = &s2;
1286     em_state_t* s_c   = &s3;
1287
1288     /* Init em and store the local state in s_min */
1289     init_em(fplog,
1290             mdlog,
1291             CG,
1292             cr,
1293             inputrec,
1294             imdSession,
1295             pull_work,
1296             state_global,
1297             top_global,
1298             s_min,
1299             &top,
1300             nrnb,
1301             fr,
1302             mdAtoms,
1303             &gstat,
1304             vsite,
1305             constr,
1306             nullptr);
1307     const bool        simulationsShareState = false;
1308     gmx_mdoutf*       outf                  = init_mdoutf(fplog,
1309                                    nfile,
1310                                    fnm,
1311                                    mdrunOptions,
1312                                    cr,
1313                                    outputProvider,
1314                                    mdModulesNotifiers,
1315                                    inputrec,
1316                                    top_global,
1317                                    nullptr,
1318                                    wcycle,
1319                                    StartingBehavior::NewSimulation,
1320                                    simulationsShareState,
1321                                    ms);
1322     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
1323                                    top_global,
1324                                    *inputrec,
1325                                    pull_work,
1326                                    nullptr,
1327                                    false,
1328                                    StartingBehavior::NewSimulation,
1329                                    simulationsShareState,
1330                                    mdModulesNotifiers);
1331
1332     /* Print to log file */
1333     print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
1334
1335     /* Max number of steps */
1336     number_steps = inputrec->nsteps;
1337
1338     if (MASTER(cr))
1339     {
1340         sp_header(stderr, CG, inputrec->em_tol, number_steps);
1341     }
1342     if (fplog)
1343     {
1344         sp_header(fplog, CG, inputrec->em_tol, number_steps);
1345     }
1346
1347     EnergyEvaluator energyEvaluator{ fplog,  mdlog,           cr,         ms,        top_global,
1348                                      &top,   inputrec,        imdSession, pull_work, nrnb,
1349                                      wcycle, gstat,           vsite,      constr,    mdAtoms,
1350                                      fr,     runScheduleWork, enerd,      -1,        {} };
1351     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1352     /* do_force always puts the charge groups in the box and shifts again
1353      * We do not unshift, so molecules are always whole in congrad.c
1354      */
1355     energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
1356
1357     if (MASTER(cr))
1358     {
1359         /* Copy stuff to the energy bin for easy printing etc. */
1360         matrix nullBox = {};
1361         energyOutput.addDataAtEnergyStep(false,
1362                                          false,
1363                                          static_cast<double>(step),
1364                                          mdatoms->tmass,
1365                                          enerd,
1366                                          nullptr,
1367                                          nullptr,
1368                                          nullBox,
1369                                          PTCouplingArrays(),
1370                                          0,
1371                                          vir,
1372                                          pres,
1373                                          nullptr,
1374                                          mu_tot,
1375                                          constr);
1376
1377         EnergyOutput::printHeader(fplog, step, step);
1378         energyOutput.printStepToEnergyFile(
1379                 mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, fr->fcdata.get(), nullptr);
1380     }
1381
1382     /* Estimate/guess the initial stepsize */
1383     stepsize = inputrec->em_stepsize / s_min->fnorm;
1384
1385     if (MASTER(cr))
1386     {
1387         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1388         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
1389         fprintf(stderr, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
1390         fprintf(stderr, "\n");
1391         /* and copy to the log file too... */
1392         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
1393         fprintf(fplog, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
1394         fprintf(fplog, "\n");
1395     }
1396     /* Start the loop over CG steps.
1397      * Each successful step is counted, and we continue until
1398      * we either converge or reach the max number of steps.
1399      */
1400     converged = FALSE;
1401     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1402     {
1403
1404         /* start taking steps in a new direction
1405          * First time we enter the routine, beta=0, and the direction is
1406          * simply the negative gradient.
1407          */
1408
1409         /* Calculate the new direction in p, and the gradient in this direction, gpa */
1410         gmx::ArrayRef<gmx::RVec>       pm  = s_min->s.cg_p;
1411         gmx::ArrayRef<const gmx::RVec> sfm = s_min->f.view().force();
1412         double                         gpa = 0;
1413         int                            gf  = 0;
1414         for (int i = 0; i < mdatoms->homenr; i++)
1415         {
1416             if (mdatoms->cFREEZE)
1417             {
1418                 gf = mdatoms->cFREEZE[i];
1419             }
1420             for (m = 0; m < DIM; m++)
1421             {
1422                 if (!inputrec->opts.nFreeze[gf][m])
1423                 {
1424                     pm[i][m] = sfm[i][m] + beta * pm[i][m];
1425                     gpa -= pm[i][m] * sfm[i][m];
1426                     /* f is negative gradient, thus the sign */
1427                 }
1428                 else
1429                 {
1430                     pm[i][m] = 0;
1431                 }
1432             }
1433         }
1434
1435         /* Sum the gradient along the line across CPUs */
1436         if (PAR(cr))
1437         {
1438             gmx_sumd(1, &gpa, cr);
1439         }
1440
1441         /* Calculate the norm of the search vector */
1442         get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr);
1443
1444         /* Just in case stepsize reaches zero due to numerical precision... */
1445         if (stepsize <= 0)
1446         {
1447             stepsize = inputrec->em_stepsize / pnorm;
1448         }
1449
1450         /*
1451          * Double check the value of the derivative in the search direction.
1452          * If it is positive it must be due to the old information in the
1453          * CG formula, so just remove that and start over with beta=0.
1454          * This corresponds to a steepest descent step.
1455          */
1456         if (gpa > 0)
1457         {
1458             beta = 0;
1459             step--;   /* Don't count this step since we are restarting */
1460             continue; /* Go back to the beginning of the big for-loop */
1461         }
1462
1463         /* Calculate minimum allowed stepsize, before the average (norm)
1464          * relative change in coordinate is smaller than precision
1465          */
1466         minstep      = 0;
1467         auto s_min_x = makeArrayRef(s_min->s.x);
1468         for (int i = 0; i < mdatoms->homenr; i++)
1469         {
1470             for (m = 0; m < DIM; m++)
1471             {
1472                 tmp = fabs(s_min_x[i][m]);
1473                 if (tmp < 1.0)
1474                 {
1475                     tmp = 1.0;
1476                 }
1477                 tmp = pm[i][m] / tmp;
1478                 minstep += tmp * tmp;
1479             }
1480         }
1481         /* Add up from all CPUs */
1482         if (PAR(cr))
1483         {
1484             gmx_sumd(1, &minstep, cr);
1485         }
1486
1487         minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global.natoms));
1488
1489         if (stepsize < minstep)
1490         {
1491             converged = TRUE;
1492             break;
1493         }
1494
1495         /* Write coordinates if necessary */
1496         do_x = do_per_step(step, inputrec->nstxout);
1497         do_f = do_per_step(step, inputrec->nstfout);
1498
1499         write_em_traj(
1500                 fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min, state_global, observablesHistory);
1501
1502         /* Take a step downhill.
1503          * In theory, we should minimize the function along this direction.
1504          * That is quite possible, but it turns out to take 5-10 function evaluations
1505          * for each line. However, we dont really need to find the exact minimum -
1506          * it is much better to start a new CG step in a modified direction as soon
1507          * as we are close to it. This will save a lot of energy evaluations.
1508          *
1509          * In practice, we just try to take a single step.
1510          * If it worked (i.e. lowered the energy), we increase the stepsize but
1511          * the continue straight to the next CG step without trying to find any minimum.
1512          * If it didn't work (higher energy), there must be a minimum somewhere between
1513          * the old position and the new one.
1514          *
1515          * Due to the finite numerical accuracy, it turns out that it is a good idea
1516          * to even accept a SMALL increase in energy, if the derivative is still downhill.
1517          * This leads to lower final energies in the tests I've done. / Erik
1518          */
1519         s_a->epot = s_min->epot;
1520         a         = 0.0;
1521         c         = a + stepsize; /* reference position along line is zero */
1522
1523         if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1524         {
1525             em_dd_partition_system(fplog,
1526                                    mdlog,
1527                                    step,
1528                                    cr,
1529                                    top_global,
1530                                    inputrec,
1531                                    imdSession,
1532                                    pull_work,
1533                                    s_min,
1534                                    &top,
1535                                    mdAtoms,
1536                                    fr,
1537                                    vsite,
1538                                    constr,
1539                                    nrnb,
1540                                    wcycle);
1541         }
1542
1543         /* Take a trial step (new coords in s_c) */
1544         do_em_step(cr, inputrec, mdatoms, s_min, c, s_min->s.cg_p.constArrayRefWithPadding(), s_c, constr, -1);
1545
1546         neval++;
1547         /* Calculate energy for the trial step */
1548         energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
1549
1550         /* Calc derivative along line */
1551         const rvec*                    pc  = s_c->s.cg_p.rvec_array();
1552         gmx::ArrayRef<const gmx::RVec> sfc = s_c->f.view().force();
1553         double                         gpc = 0;
1554         for (int i = 0; i < mdatoms->homenr; i++)
1555         {
1556             for (m = 0; m < DIM; m++)
1557             {
1558                 gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */
1559             }
1560         }
1561         /* Sum the gradient along the line across CPUs */
1562         if (PAR(cr))
1563         {
1564             gmx_sumd(1, &gpc, cr);
1565         }
1566
1567         /* This is the max amount of increase in energy we tolerate */
1568         tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot);
1569
1570         /* Accept the step if the energy is lower, or if it is not significantly higher
1571          * and the line derivative is still negative.
1572          */
1573         if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1574         {
1575             foundlower = TRUE;
1576             /* Great, we found a better energy. Increase step for next iteration
1577              * if we are still going down, decrease it otherwise
1578              */
1579             if (gpc < 0)
1580             {
1581                 stepsize *= 1.618034; /* The golden section */
1582             }
1583             else
1584             {
1585                 stepsize *= 0.618034; /* 1/golden section */
1586             }
1587         }
1588         else
1589         {
1590             /* New energy is the same or higher. We will have to do some work
1591              * to find a smaller value in the interval. Take smaller step next time!
1592              */
1593             foundlower = FALSE;
1594             stepsize *= 0.618034;
1595         }
1596
1597
1598         /* OK, if we didn't find a lower value we will have to locate one now - there must
1599          * be one in the interval [a=0,c].
1600          * The same thing is valid here, though: Don't spend dozens of iterations to find
1601          * the line minimum. We try to interpolate based on the derivative at the endpoints,
1602          * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1603          *
1604          * I also have a safeguard for potentially really pathological functions so we never
1605          * take more than 20 steps before we give up ...
1606          *
1607          * If we already found a lower value we just skip this step and continue to the update.
1608          */
1609         double gpb;
1610         if (!foundlower)
1611         {
1612             nminstep = 0;
1613
1614             do
1615             {
1616                 /* Select a new trial point.
1617                  * If the derivatives at points a & c have different sign we interpolate to zero,
1618                  * otherwise just do a bisection.
1619                  */
1620                 if (gpa < 0 && gpc > 0)
1621                 {
1622                     b = a + gpa * (a - c) / (gpc - gpa);
1623                 }
1624                 else
1625                 {
1626                     b = 0.5 * (a + c);
1627                 }
1628
1629                 /* safeguard if interpolation close to machine accuracy causes errors:
1630                  * never go outside the interval
1631                  */
1632                 if (b <= a || b >= c)
1633                 {
1634                     b = 0.5 * (a + c);
1635                 }
1636
1637                 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1638                 {
1639                     /* Reload the old state */
1640                     em_dd_partition_system(fplog,
1641                                            mdlog,
1642                                            -1,
1643                                            cr,
1644                                            top_global,
1645                                            inputrec,
1646                                            imdSession,
1647                                            pull_work,
1648                                            s_min,
1649                                            &top,
1650                                            mdAtoms,
1651                                            fr,
1652                                            vsite,
1653                                            constr,
1654                                            nrnb,
1655                                            wcycle);
1656                 }
1657
1658                 /* Take a trial step to this new point - new coords in s_b */
1659                 do_em_step(cr, inputrec, mdatoms, s_min, b, s_min->s.cg_p.constArrayRefWithPadding(), s_b, constr, -1);
1660
1661                 neval++;
1662                 /* Calculate energy for the trial step */
1663                 energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
1664
1665                 /* p does not change within a step, but since the domain decomposition
1666                  * might change, we have to use cg_p of s_b here.
1667                  */
1668                 const rvec*                    pb  = s_b->s.cg_p.rvec_array();
1669                 gmx::ArrayRef<const gmx::RVec> sfb = s_b->f.view().force();
1670                 gpb                                = 0;
1671                 for (int i = 0; i < mdatoms->homenr; i++)
1672                 {
1673                     for (m = 0; m < DIM; m++)
1674                     {
1675                         gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */
1676                     }
1677                 }
1678                 /* Sum the gradient along the line across CPUs */
1679                 if (PAR(cr))
1680                 {
1681                     gmx_sumd(1, &gpb, cr);
1682                 }
1683
1684                 if (debug)
1685                 {
1686                     fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot, s_c->epot, gpb);
1687                 }
1688
1689                 epot_repl = s_b->epot;
1690
1691                 /* Keep one of the intervals based on the value of the derivative at the new point */
1692                 if (gpb > 0)
1693                 {
1694                     /* Replace c endpoint with b */
1695                     swap_em_state(&s_b, &s_c);
1696                     c   = b;
1697                     gpc = gpb;
1698                 }
1699                 else
1700                 {
1701                     /* Replace a endpoint with b */
1702                     swap_em_state(&s_b, &s_a);
1703                     a   = b;
1704                     gpa = gpb;
1705                 }
1706
1707                 /*
1708                  * Stop search as soon as we find a value smaller than the endpoints.
1709                  * Never run more than 20 steps, no matter what.
1710                  */
1711                 nminstep++;
1712             } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20));
1713
1714             if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20)
1715             {
1716                 /* OK. We couldn't find a significantly lower energy.
1717                  * If beta==0 this was steepest descent, and then we give up.
1718                  * If not, set beta=0 and restart with steepest descent before quitting.
1719                  */
1720                 if (beta == 0.0)
1721                 {
1722                     /* Converged */
1723                     converged = TRUE;
1724                     break;
1725                 }
1726                 else
1727                 {
1728                     /* Reset memory before giving up */
1729                     beta = 0.0;
1730                     continue;
1731                 }
1732             }
1733
1734             /* Select min energy state of A & C, put the best in B.
1735              */
1736             if (s_c->epot < s_a->epot)
1737             {
1738                 if (debug)
1739                 {
1740                     fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot, s_a->epot);
1741                 }
1742                 swap_em_state(&s_b, &s_c);
1743                 gpb = gpc;
1744             }
1745             else
1746             {
1747                 if (debug)
1748                 {
1749                     fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot, s_c->epot);
1750                 }
1751                 swap_em_state(&s_b, &s_a);
1752                 gpb = gpa;
1753             }
1754         }
1755         else
1756         {
1757             if (debug)
1758             {
1759                 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot);
1760             }
1761             swap_em_state(&s_b, &s_c);
1762             gpb = gpc;
1763         }
1764
1765         /* new search direction */
1766         /* beta = 0 means forget all memory and restart with steepest descents. */
1767         if (nstcg && ((step % nstcg) == 0))
1768         {
1769             beta = 0.0;
1770         }
1771         else
1772         {
1773             /* s_min->fnorm cannot be zero, because then we would have converged
1774              * and broken out.
1775              */
1776
1777             /* Polak-Ribiere update.
1778              * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1779              */
1780             beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1781         }
1782         /* Limit beta to prevent oscillations */
1783         if (fabs(beta) > 5.0)
1784         {
1785             beta = 0.0;
1786         }
1787
1788
1789         /* update positions */
1790         swap_em_state(&s_min, &s_b);
1791         gpa = gpb;
1792
1793         /* Print it if necessary */
1794         if (MASTER(cr))
1795         {
1796             if (mdrunOptions.verbose)
1797             {
1798                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1799                 fprintf(stderr,
1800                         "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1801                         step,
1802                         s_min->epot,
1803                         s_min->fnorm / sqrtNumAtoms,
1804                         s_min->fmax,
1805                         s_min->a_fmax + 1);
1806                 fflush(stderr);
1807             }
1808             /* Store the new (lower) energies */
1809             matrix nullBox = {};
1810             energyOutput.addDataAtEnergyStep(false,
1811                                              false,
1812                                              static_cast<double>(step),
1813                                              mdatoms->tmass,
1814                                              enerd,
1815                                              nullptr,
1816                                              nullptr,
1817                                              nullBox,
1818                                              PTCouplingArrays(),
1819                                              0,
1820                                              vir,
1821                                              pres,
1822                                              nullptr,
1823                                              mu_tot,
1824                                              constr);
1825
1826             do_log = do_per_step(step, inputrec->nstlog);
1827             do_ene = do_per_step(step, inputrec->nstenergy);
1828
1829             imdSession->fillEnergyRecord(step, TRUE);
1830
1831             if (do_log)
1832             {
1833                 EnergyOutput::printHeader(fplog, step, step);
1834             }
1835             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1836                                                do_ene,
1837                                                FALSE,
1838                                                FALSE,
1839                                                do_log ? fplog : nullptr,
1840                                                step,
1841                                                step,
1842                                                fr->fcdata.get(),
1843                                                nullptr);
1844         }
1845
1846         /* Send energies and positions to the IMD client if bIMD is TRUE. */
1847         if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x, 0))
1848         {
1849             imdSession->sendPositionsAndEnergies();
1850         }
1851
1852         /* Stop when the maximum force lies below tolerance.
1853          * If we have reached machine precision, converged is already set to true.
1854          */
1855         converged = converged || (s_min->fmax < inputrec->em_tol);
1856
1857     } /* End of the loop */
1858
1859     if (converged)
1860     {
1861         step--; /* we never took that last step in this case */
1862     }
1863     if (s_min->fmax > inputrec->em_tol)
1864     {
1865         if (MASTER(cr))
1866         {
1867             warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE);
1868         }
1869         converged = FALSE;
1870     }
1871
1872     if (MASTER(cr))
1873     {
1874         /* If we printed energy and/or logfile last step (which was the last step)
1875          * we don't have to do it again, but otherwise print the final values.
1876          */
1877         if (!do_log)
1878         {
1879             /* Write final value to log since we didn't do anything the last step */
1880             EnergyOutput::printHeader(fplog, step, step);
1881         }
1882         if (!do_ene || !do_log)
1883         {
1884             /* Write final energy file entries */
1885             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1886                                                !do_ene,
1887                                                FALSE,
1888                                                FALSE,
1889                                                !do_log ? fplog : nullptr,
1890                                                step,
1891                                                step,
1892                                                fr->fcdata.get(),
1893                                                nullptr);
1894         }
1895     }
1896
1897     /* Print some stuff... */
1898     if (MASTER(cr))
1899     {
1900         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1901     }
1902
1903     /* IMPORTANT!
1904      * For accurate normal mode calculation it is imperative that we
1905      * store the last conformation into the full precision binary trajectory.
1906      *
1907      * However, we should only do it if we did NOT already write this step
1908      * above (which we did if do_x or do_f was true).
1909      */
1910     /* Note that with 0 < nstfout != nstxout we can end up with two frames
1911      * in the trajectory with the same step number.
1912      */
1913     do_x = !do_per_step(step, inputrec->nstxout);
1914     do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1915
1916     write_em_traj(
1917             fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, step, s_min, state_global, observablesHistory);
1918
1919
1920     if (MASTER(cr))
1921     {
1922         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1923         print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
1924         print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
1925
1926         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1927     }
1928
1929     finish_em(cr, outf, walltime_accounting, wcycle);
1930
1931     /* To print the actual number of steps we needed somewhere */
1932     walltime_accounting_set_nsteps_done(walltime_accounting, step);
1933 }
1934
1935
1936 void LegacySimulator::do_lbfgs()
1937 {
1938     static const char* LBFGS = "Low-Memory BFGS Minimizer";
1939     em_state_t         ems;
1940     gmx_localtop_t     top(top_global.ffparams);
1941     gmx_global_stat_t  gstat;
1942     auto*              mdatoms = mdAtoms->mdatoms();
1943
1944     GMX_LOG(mdlog.info)
1945             .asParagraph()
1946             .appendText(
1947                     "Note that activating L-BFGS energy minimization via the "
1948                     "integrator .mdp option and the command gmx mdrun may "
1949                     "be available in a different form in a future version of GROMACS, "
1950                     "e.g. gmx minimize and an .mdp option.");
1951
1952     if (PAR(cr))
1953     {
1954         gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
1955     }
1956
1957     if (nullptr != constr)
1958     {
1959         gmx_fatal(
1960                 FARGS,
1961                 "The combination of constraints and L-BFGS minimization is not implemented. Either "
1962                 "do not use constraints, or use another minimizer (e.g. steepest descent).");
1963     }
1964
1965     const int n        = 3 * state_global->natoms;
1966     const int nmaxcorr = inputrec->nbfgscorr;
1967
1968     std::vector<real> p(n);
1969     std::vector<real> rho(nmaxcorr);
1970     std::vector<real> alpha(nmaxcorr);
1971
1972     std::vector<std::vector<real>> dx(nmaxcorr);
1973     for (auto& dxCorr : dx)
1974     {
1975         dxCorr.resize(n);
1976     }
1977
1978     std::vector<std::vector<real>> dg(nmaxcorr);
1979     for (auto& dgCorr : dg)
1980     {
1981         dgCorr.resize(n);
1982     }
1983
1984     int step  = 0;
1985     int neval = 0;
1986
1987     /* Init em */
1988     init_em(fplog,
1989             mdlog,
1990             LBFGS,
1991             cr,
1992             inputrec,
1993             imdSession,
1994             pull_work,
1995             state_global,
1996             top_global,
1997             &ems,
1998             &top,
1999             nrnb,
2000             fr,
2001             mdAtoms,
2002             &gstat,
2003             vsite,
2004             constr,
2005             nullptr);
2006     const bool        simulationsShareState = false;
2007     gmx_mdoutf*       outf                  = init_mdoutf(fplog,
2008                                    nfile,
2009                                    fnm,
2010                                    mdrunOptions,
2011                                    cr,
2012                                    outputProvider,
2013                                    mdModulesNotifiers,
2014                                    inputrec,
2015                                    top_global,
2016                                    nullptr,
2017                                    wcycle,
2018                                    StartingBehavior::NewSimulation,
2019                                    simulationsShareState,
2020                                    ms);
2021     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
2022                                    top_global,
2023                                    *inputrec,
2024                                    pull_work,
2025                                    nullptr,
2026                                    false,
2027                                    StartingBehavior::NewSimulation,
2028                                    simulationsShareState,
2029                                    mdModulesNotifiers);
2030
2031     const int start = 0;
2032     const int end   = mdatoms->homenr;
2033
2034     /* We need 4 working states */
2035     em_state_t  s0{}, s1{}, s2{}, s3{};
2036     em_state_t* sa   = &s0;
2037     em_state_t* sb   = &s1;
2038     em_state_t* sc   = &s2;
2039     em_state_t* last = &s3;
2040     /* Initialize by copying the state from ems (we could skip x and f here) */
2041     *sa = ems;
2042     *sb = ems;
2043     *sc = ems;
2044
2045     /* Print to log file */
2046     print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
2047
2048     /* Max number of steps */
2049     const int number_steps = inputrec->nsteps;
2050
2051     /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
2052     std::vector<bool> frozen(n);
2053     int               gf = 0;
2054     for (int i = start; i < end; i++)
2055     {
2056         if (mdatoms->cFREEZE)
2057         {
2058             gf = mdatoms->cFREEZE[i];
2059         }
2060         for (int m = 0; m < DIM; m++)
2061         {
2062             frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
2063         }
2064     }
2065     if (MASTER(cr))
2066     {
2067         sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
2068     }
2069     if (fplog)
2070     {
2071         sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
2072     }
2073
2074     if (vsite)
2075     {
2076         vsite->construct(state_global->x, {}, state_global->box, VSiteOperation::Positions);
2077     }
2078
2079     /* Call the force routine and some auxiliary (neighboursearching etc.) */
2080     /* do_force always puts the charge groups in the box and shifts again
2081      * We do not unshift, so molecules are always whole
2082      */
2083     neval++;
2084     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
2085                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
2086                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
2087     rvec            mu_tot;
2088     tensor          vir;
2089     tensor          pres;
2090     energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
2091
2092     if (MASTER(cr))
2093     {
2094         /* Copy stuff to the energy bin for easy printing etc. */
2095         matrix nullBox = {};
2096         energyOutput.addDataAtEnergyStep(false,
2097                                          false,
2098                                          static_cast<double>(step),
2099                                          mdatoms->tmass,
2100                                          enerd,
2101                                          nullptr,
2102                                          nullptr,
2103                                          nullBox,
2104                                          PTCouplingArrays(),
2105                                          0,
2106                                          vir,
2107                                          pres,
2108                                          nullptr,
2109                                          mu_tot,
2110                                          constr);
2111
2112         EnergyOutput::printHeader(fplog, step, step);
2113         energyOutput.printStepToEnergyFile(
2114                 mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, fr->fcdata.get(), nullptr);
2115     }
2116
2117     /* Set the initial step.
2118      * since it will be multiplied by the non-normalized search direction
2119      * vector (force vector the first time), we scale it by the
2120      * norm of the force.
2121      */
2122
2123     if (MASTER(cr))
2124     {
2125         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2126         fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
2127         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
2128         fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
2129         fprintf(stderr, "\n");
2130         /* and copy to the log file too... */
2131         fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
2132         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
2133         fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
2134         fprintf(fplog, "\n");
2135     }
2136
2137     // Point is an index to the memory of search directions, where 0 is the first one.
2138     int point = 0;
2139
2140     // Set initial search direction to the force (-gradient), or 0 for frozen particles.
2141     real* fInit = static_cast<real*>(ems.f.view().force().data()[0]);
2142     for (int i = 0; i < n; i++)
2143     {
2144         if (!frozen[i])
2145         {
2146             dx[point][i] = fInit[i]; /* Initial search direction */
2147         }
2148         else
2149         {
2150             dx[point][i] = 0;
2151         }
2152     }
2153
2154     // Stepsize will be modified during the search, and actually it is not critical
2155     // (the main efficiency in the algorithm comes from changing directions), but
2156     // we still need an initial value, so estimate it as the inverse of the norm
2157     // so we take small steps where the potential fluctuates a lot.
2158     double stepsize = 1.0 / ems.fnorm;
2159
2160     /* Start the loop over BFGS steps.
2161      * Each successful step is counted, and we continue until
2162      * we either converge or reach the max number of steps.
2163      */
2164
2165     bool do_log = true;
2166     bool do_ene = true;
2167
2168     int ncorr = 0;
2169
2170     /* Set the gradient from the force */
2171     bool converged = false;
2172     for (int step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
2173     {
2174
2175         /* Write coordinates if necessary */
2176         const bool do_x = do_per_step(step, inputrec->nstxout);
2177         const bool do_f = do_per_step(step, inputrec->nstfout);
2178
2179         int mdof_flags = 0;
2180         if (do_x)
2181         {
2182             mdof_flags |= MDOF_X;
2183         }
2184
2185         if (do_f)
2186         {
2187             mdof_flags |= MDOF_F;
2188         }
2189
2190         if (inputrec->bIMD)
2191         {
2192             mdof_flags |= MDOF_IMD;
2193         }
2194
2195         gmx::WriteCheckpointDataHolder checkpointDataHolder;
2196         mdoutf_write_to_trajectory_files(fplog,
2197                                          cr,
2198                                          outf,
2199                                          mdof_flags,
2200                                          top_global.natoms,
2201                                          step,
2202                                          static_cast<real>(step),
2203                                          &ems.s,
2204                                          state_global,
2205                                          observablesHistory,
2206                                          ems.f.view().force(),
2207                                          &checkpointDataHolder);
2208
2209         /* Do the linesearching in the direction dx[point][0..(n-1)] */
2210
2211         /* make s a pointer to current search direction - point=0 first time we get here */
2212         gmx::ArrayRef<const real> s = dx[point];
2213
2214         const real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
2215         const real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
2216
2217         // calculate line gradient in position A
2218         double gpa = 0;
2219         for (int i = 0; i < n; i++)
2220         {
2221             gpa -= s[i] * ff[i];
2222         }
2223
2224         /* Calculate minimum allowed stepsize along the line, before the average (norm)
2225          * relative change in coordinate is smaller than precision
2226          */
2227         double minstep = 0;
2228         for (int i = 0; i < n; i++)
2229         {
2230             double tmp = fabs(xx[i]);
2231             if (tmp < 1.0)
2232             {
2233                 tmp = 1.0;
2234             }
2235             tmp = s[i] / tmp;
2236             minstep += tmp * tmp;
2237         }
2238         minstep = GMX_REAL_EPS / sqrt(minstep / n);
2239
2240         if (stepsize < minstep)
2241         {
2242             converged = true;
2243             break;
2244         }
2245
2246         // Before taking any steps along the line, store the old position
2247         *last            = ems;
2248         real*      lastx = static_cast<real*>(last->s.x.data()[0]);
2249         real*      lastf = static_cast<real*>(last->f.view().force().data()[0]);
2250         const real Epot0 = ems.epot;
2251
2252         *sa = ems;
2253
2254         /* Take a step downhill.
2255          * In theory, we should find the actual minimum of the function in this
2256          * direction, somewhere along the line.
2257          * That is quite possible, but it turns out to take 5-10 function evaluations
2258          * for each line. However, we dont really need to find the exact minimum -
2259          * it is much better to start a new BFGS step in a modified direction as soon
2260          * as we are close to it. This will save a lot of energy evaluations.
2261          *
2262          * In practice, we just try to take a single step.
2263          * If it worked (i.e. lowered the energy), we increase the stepsize but
2264          * continue straight to the next BFGS step without trying to find any minimum,
2265          * i.e. we change the search direction too. If the line was smooth, it is
2266          * likely we are in a smooth region, and then it makes sense to take longer
2267          * steps in the modified search direction too.
2268          *
2269          * If it didn't work (higher energy), there must be a minimum somewhere between
2270          * the old position and the new one. Then we need to start by finding a lower
2271          * value before we change search direction. Since the energy was apparently
2272          * quite rough, we need to decrease the step size.
2273          *
2274          * Due to the finite numerical accuracy, it turns out that it is a good idea
2275          * to accept a SMALL increase in energy, if the derivative is still downhill.
2276          * This leads to lower final energies in the tests I've done. / Erik
2277          */
2278
2279         // State "A" is the first position along the line.
2280         // reference position along line is initially zero
2281         real a = 0;
2282
2283         // Check stepsize first. We do not allow displacements
2284         // larger than emstep.
2285         //
2286         real c;
2287         real maxdelta;
2288         do
2289         {
2290             // Pick a new position C by adding stepsize to A.
2291             c = a + stepsize;
2292
2293             // Calculate what the largest change in any individual coordinate
2294             // would be (translation along line * gradient along line)
2295             maxdelta = 0;
2296             for (int i = 0; i < n; i++)
2297             {
2298                 real delta = c * s[i];
2299                 if (delta > maxdelta)
2300                 {
2301                     maxdelta = delta;
2302                 }
2303             }
2304             // If any displacement is larger than the stepsize limit, reduce the step
2305             if (maxdelta > inputrec->em_stepsize)
2306             {
2307                 stepsize *= 0.1;
2308             }
2309         } while (maxdelta > inputrec->em_stepsize);
2310
2311         // Take a trial step and move the coordinate array xc[] to position C
2312         real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
2313         for (int i = 0; i < n; i++)
2314         {
2315             xc[i] = lastx[i] + c * s[i];
2316         }
2317
2318         neval++;
2319         // Calculate energy for the trial step in position C
2320         energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
2321
2322         // Calc line gradient in position C
2323         real*  fc  = static_cast<real*>(sc->f.view().force()[0]);
2324         double gpc = 0;
2325         for (int i = 0; i < n; i++)
2326         {
2327             gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
2328         }
2329         /* Sum the gradient along the line across CPUs */
2330         if (PAR(cr))
2331         {
2332             gmx_sumd(1, &gpc, cr);
2333         }
2334
2335         // This is the max amount of increase in energy we tolerate.
2336         // By allowing VERY small changes (close to numerical precision) we
2337         // frequently find even better (lower) final energies.
2338         double tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
2339
2340         // Accept the step if the energy is lower in the new position C (compared to A),
2341         // or if it is not significantly higher and the line derivative is still negative.
2342         bool foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
2343         // If true, great, we found a better energy. We no longer try to alter the
2344         // stepsize, but simply accept this new better position. The we select a new
2345         // search direction instead, which will be much more efficient than continuing
2346         // to take smaller steps along a line. Set fnorm based on the new C position,
2347         // which will be used to update the stepsize to 1/fnorm further down.
2348
2349         // If false, the energy is NOT lower in point C, i.e. it will be the same
2350         // or higher than in point A. In this case it is pointless to move to point C,
2351         // so we will have to do more iterations along the same line to find a smaller
2352         // value in the interval [A=0.0,C].
2353         // Here, A is still 0.0, but that will change when we do a search in the interval
2354         // [0.0,C] below. That search we will do by interpolation or bisection rather
2355         // than with the stepsize, so no need to modify it. For the next search direction
2356         // it will be reset to 1/fnorm anyway.
2357
2358         double step_taken;
2359         if (!foundlower)
2360         {
2361             // OK, if we didn't find a lower value we will have to locate one now - there must
2362             // be one in the interval [a,c].
2363             // The same thing is valid here, though: Don't spend dozens of iterations to find
2364             // the line minimum. We try to interpolate based on the derivative at the endpoints,
2365             // and only continue until we find a lower value. In most cases this means 1-2 iterations.
2366             // I also have a safeguard for potentially really pathological functions so we never
2367             // take more than 20 steps before we give up.
2368             // If we already found a lower value we just skip this step and continue to the update.
2369             real fnorm    = 0;
2370             int  nminstep = 0;
2371             do
2372             {
2373                 // Select a new trial point B in the interval [A,C].
2374                 // If the derivatives at points a & c have different sign we interpolate to zero,
2375                 // otherwise just do a bisection since there might be multiple minima/maxima
2376                 // inside the interval.
2377                 real b;
2378                 if (gpa < 0 && gpc > 0)
2379                 {
2380                     b = a + gpa * (a - c) / (gpc - gpa);
2381                 }
2382                 else
2383                 {
2384                     b = 0.5 * (a + c);
2385                 }
2386
2387                 /* safeguard if interpolation close to machine accuracy causes errors:
2388                  * never go outside the interval
2389                  */
2390                 if (b <= a || b >= c)
2391                 {
2392                     b = 0.5 * (a + c);
2393                 }
2394
2395                 // Take a trial step to point B
2396                 real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
2397                 for (int i = 0; i < n; i++)
2398                 {
2399                     xb[i] = lastx[i] + b * s[i];
2400                 }
2401
2402                 neval++;
2403                 // Calculate energy for the trial step in point B
2404                 energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
2405                 fnorm = sb->fnorm;
2406
2407                 // Calculate gradient in point B
2408                 real*  fb  = static_cast<real*>(sb->f.view().force()[0]);
2409                 double gpb = 0;
2410                 for (int i = 0; i < n; i++)
2411                 {
2412                     gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
2413                 }
2414                 /* Sum the gradient along the line across CPUs */
2415                 if (PAR(cr))
2416                 {
2417                     gmx_sumd(1, &gpb, cr);
2418                 }
2419
2420                 // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
2421                 // at the new point B, and rename the endpoints of this new interval A and C.
2422                 if (gpb > 0)
2423                 {
2424                     /* Replace c endpoint with b */
2425                     c = b;
2426                     /* copy state b to c */
2427                     *sc = *sb;
2428                 }
2429                 else
2430                 {
2431                     /* Replace a endpoint with b */
2432                     a = b;
2433                     /* copy state b to a */
2434                     *sa = *sb;
2435                 }
2436
2437                 /*
2438                  * Stop search as soon as we find a value smaller than the endpoints,
2439                  * or if the tolerance is below machine precision.
2440                  * Never run more than 20 steps, no matter what.
2441                  */
2442                 nminstep++;
2443             } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
2444
2445             if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20)
2446             {
2447                 /* OK. We couldn't find a significantly lower energy.
2448                  * If ncorr==0 this was steepest descent, and then we give up.
2449                  * If not, reset memory to restart as steepest descent before quitting.
2450                  */
2451                 if (ncorr == 0)
2452                 {
2453                     /* Converged */
2454                     converged = true;
2455                     break;
2456                 }
2457                 else
2458                 {
2459                     /* Reset memory */
2460                     ncorr = 0;
2461                     /* Search in gradient direction */
2462                     for (int i = 0; i < n; i++)
2463                     {
2464                         dx[point][i] = ff[i];
2465                     }
2466                     /* Reset stepsize */
2467                     stepsize = 1.0 / fnorm;
2468                     continue;
2469                 }
2470             }
2471
2472             /* Select min energy state of A & C, put the best in xx/ff/Epot
2473              */
2474             if (sc->epot < sa->epot)
2475             {
2476                 /* Use state C */
2477                 ems        = *sc;
2478                 step_taken = c;
2479             }
2480             else
2481             {
2482                 /* Use state A */
2483                 ems        = *sa;
2484                 step_taken = a;
2485             }
2486         }
2487         else
2488         {
2489             /* found lower */
2490             /* Use state C */
2491             ems        = *sc;
2492             step_taken = c;
2493         }
2494
2495         /* Update the memory information, and calculate a new
2496          * approximation of the inverse hessian
2497          */
2498
2499         /* Have new data in Epot, xx, ff */
2500         if (ncorr < nmaxcorr)
2501         {
2502             ncorr++;
2503         }
2504
2505         for (int i = 0; i < n; i++)
2506         {
2507             dg[point][i] = lastf[i] - ff[i];
2508             dx[point][i] *= step_taken;
2509         }
2510
2511         real dgdg = 0;
2512         real dgdx = 0;
2513         for (int i = 0; i < n; i++)
2514         {
2515             dgdg += dg[point][i] * dg[point][i];
2516             dgdx += dg[point][i] * dx[point][i];
2517         }
2518
2519         const real diag = dgdx / dgdg;
2520
2521         rho[point] = 1.0 / dgdx;
2522         point++;
2523
2524         if (point >= nmaxcorr)
2525         {
2526             point = 0;
2527         }
2528
2529         /* Update */
2530         for (int i = 0; i < n; i++)
2531         {
2532             p[i] = ff[i];
2533         }
2534
2535         int cp = point;
2536
2537         /* Recursive update. First go back over the memory points */
2538         for (int k = 0; k < ncorr; k++)
2539         {
2540             cp--;
2541             if (cp < 0)
2542             {
2543                 cp = ncorr - 1;
2544             }
2545
2546             real sq = 0;
2547             for (int i = 0; i < n; i++)
2548             {
2549                 sq += dx[cp][i] * p[i];
2550             }
2551
2552             alpha[cp] = rho[cp] * sq;
2553
2554             for (int i = 0; i < n; i++)
2555             {
2556                 p[i] -= alpha[cp] * dg[cp][i];
2557             }
2558         }
2559
2560         for (int i = 0; i < n; i++)
2561         {
2562             p[i] *= diag;
2563         }
2564
2565         /* And then go forward again */
2566         for (int k = 0; k < ncorr; k++)
2567         {
2568             real yr = 0;
2569             for (int i = 0; i < n; i++)
2570             {
2571                 yr += p[i] * dg[cp][i];
2572             }
2573
2574             real beta = rho[cp] * yr;
2575             beta      = alpha[cp] - beta;
2576
2577             for (int i = 0; i < n; i++)
2578             {
2579                 p[i] += beta * dx[cp][i];
2580             }
2581
2582             cp++;
2583             if (cp >= ncorr)
2584             {
2585                 cp = 0;
2586             }
2587         }
2588
2589         for (int i = 0; i < n; i++)
2590         {
2591             if (!frozen[i])
2592             {
2593                 dx[point][i] = p[i];
2594             }
2595             else
2596             {
2597                 dx[point][i] = 0;
2598             }
2599         }
2600
2601         /* Print it if necessary */
2602         if (MASTER(cr))
2603         {
2604             if (mdrunOptions.verbose)
2605             {
2606                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2607                 fprintf(stderr,
2608                         "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2609                         step,
2610                         ems.epot,
2611                         ems.fnorm / sqrtNumAtoms,
2612                         ems.fmax,
2613                         ems.a_fmax + 1);
2614                 fflush(stderr);
2615             }
2616             /* Store the new (lower) energies */
2617             matrix nullBox = {};
2618             energyOutput.addDataAtEnergyStep(false,
2619                                              false,
2620                                              static_cast<double>(step),
2621                                              mdatoms->tmass,
2622                                              enerd,
2623                                              nullptr,
2624                                              nullptr,
2625                                              nullBox,
2626                                              PTCouplingArrays(),
2627                                              0,
2628                                              vir,
2629                                              pres,
2630                                              nullptr,
2631                                              mu_tot,
2632                                              constr);
2633
2634             do_log = do_per_step(step, inputrec->nstlog);
2635             do_ene = do_per_step(step, inputrec->nstenergy);
2636
2637             imdSession->fillEnergyRecord(step, TRUE);
2638
2639             if (do_log)
2640             {
2641                 EnergyOutput::printHeader(fplog, step, step);
2642             }
2643             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
2644                                                do_ene,
2645                                                FALSE,
2646                                                FALSE,
2647                                                do_log ? fplog : nullptr,
2648                                                step,
2649                                                step,
2650                                                fr->fcdata.get(),
2651                                                nullptr);
2652         }
2653
2654         /* Send x and E to IMD client, if bIMD is TRUE. */
2655         if (imdSession->run(step, TRUE, state_global->box, state_global->x, 0) && MASTER(cr))
2656         {
2657             imdSession->sendPositionsAndEnergies();
2658         }
2659
2660         // Reset stepsize in we are doing more iterations
2661         stepsize = 1.0;
2662
2663         /* Stop when the maximum force lies below tolerance.
2664          * If we have reached machine precision, converged is already set to true.
2665          */
2666         converged = converged || (ems.fmax < inputrec->em_tol);
2667
2668     } /* End of the loop */
2669
2670     if (converged)
2671     {
2672         step--; /* we never took that last step in this case */
2673     }
2674     if (ems.fmax > inputrec->em_tol)
2675     {
2676         if (MASTER(cr))
2677         {
2678             warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE);
2679         }
2680         converged = FALSE;
2681     }
2682
2683     /* If we printed energy and/or logfile last step (which was the last step)
2684      * we don't have to do it again, but otherwise print the final values.
2685      */
2686     if (!do_log) /* Write final value to log since we didn't do anythin last step */
2687     {
2688         EnergyOutput::printHeader(fplog, step, step);
2689     }
2690     if (!do_ene || !do_log) /* Write final energy file entries */
2691     {
2692         energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
2693                                            !do_ene,
2694                                            FALSE,
2695                                            FALSE,
2696                                            !do_log ? fplog : nullptr,
2697                                            step,
2698                                            step,
2699                                            fr->fcdata.get(),
2700                                            nullptr);
2701     }
2702
2703     /* Print some stuff... */
2704     if (MASTER(cr))
2705     {
2706         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2707     }
2708
2709     /* IMPORTANT!
2710      * For accurate normal mode calculation it is imperative that we
2711      * store the last conformation into the full precision binary trajectory.
2712      *
2713      * However, we should only do it if we did NOT already write this step
2714      * above (which we did if do_x or do_f was true).
2715      */
2716     const bool do_x = !do_per_step(step, inputrec->nstxout);
2717     const bool do_f = !do_per_step(step, inputrec->nstfout);
2718     write_em_traj(
2719             fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, step, &ems, state_global, observablesHistory);
2720
2721     if (MASTER(cr))
2722     {
2723         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2724         print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
2725         print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
2726
2727         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2728     }
2729
2730     finish_em(cr, outf, walltime_accounting, wcycle);
2731
2732     /* To print the actual number of steps we needed somewhere */
2733     walltime_accounting_set_nsteps_done(walltime_accounting, step);
2734 }
2735
2736 void LegacySimulator::do_steep()
2737 {
2738     const char*       SD = "Steepest Descents";
2739     gmx_localtop_t    top(top_global.ffparams);
2740     gmx_global_stat_t gstat;
2741     real              stepsize;
2742     real              ustep;
2743     gmx_bool          bDone, bAbort, do_x, do_f;
2744     tensor            vir, pres;
2745     rvec              mu_tot = { 0 };
2746     int               nsteps;
2747     int               count          = 0;
2748     int               steps_accepted = 0;
2749     auto*             mdatoms        = mdAtoms->mdatoms();
2750
2751     GMX_LOG(mdlog.info)
2752             .asParagraph()
2753             .appendText(
2754                     "Note that activating steepest-descent energy minimization via the "
2755                     "integrator .mdp option and the command gmx mdrun may "
2756                     "be available in a different form in a future version of GROMACS, "
2757                     "e.g. gmx minimize and an .mdp option.");
2758
2759     /* Create 2 states on the stack and extract pointers that we will swap */
2760     em_state_t  s0{}, s1{};
2761     em_state_t* s_min = &s0;
2762     em_state_t* s_try = &s1;
2763
2764     /* Init em and store the local state in s_try */
2765     init_em(fplog,
2766             mdlog,
2767             SD,
2768             cr,
2769             inputrec,
2770             imdSession,
2771             pull_work,
2772             state_global,
2773             top_global,
2774             s_try,
2775             &top,
2776             nrnb,
2777             fr,
2778             mdAtoms,
2779             &gstat,
2780             vsite,
2781             constr,
2782             nullptr);
2783     const bool        simulationsShareState = false;
2784     gmx_mdoutf*       outf                  = init_mdoutf(fplog,
2785                                    nfile,
2786                                    fnm,
2787                                    mdrunOptions,
2788                                    cr,
2789                                    outputProvider,
2790                                    mdModulesNotifiers,
2791                                    inputrec,
2792                                    top_global,
2793                                    nullptr,
2794                                    wcycle,
2795                                    StartingBehavior::NewSimulation,
2796                                    simulationsShareState,
2797                                    ms);
2798     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
2799                                    top_global,
2800                                    *inputrec,
2801                                    pull_work,
2802                                    nullptr,
2803                                    false,
2804                                    StartingBehavior::NewSimulation,
2805                                    simulationsShareState,
2806                                    mdModulesNotifiers);
2807
2808     /* Print to log file  */
2809     print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
2810
2811     /* Set variables for stepsize (in nm). This is the largest
2812      * step that we are going to make in any direction.
2813      */
2814     ustep    = inputrec->em_stepsize;
2815     stepsize = 0;
2816
2817     /* Max number of steps  */
2818     nsteps = inputrec->nsteps;
2819
2820     if (MASTER(cr))
2821     {
2822         /* Print to the screen  */
2823         sp_header(stderr, SD, inputrec->em_tol, nsteps);
2824     }
2825     if (fplog)
2826     {
2827         sp_header(fplog, SD, inputrec->em_tol, nsteps);
2828     }
2829     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
2830                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
2831                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
2832
2833     /**** HERE STARTS THE LOOP ****
2834      * count is the counter for the number of steps
2835      * bDone will be TRUE when the minimization has converged
2836      * bAbort will be TRUE when nsteps steps have been performed or when
2837      * the stepsize becomes smaller than is reasonable for machine precision
2838      */
2839     count  = 0;
2840     bDone  = FALSE;
2841     bAbort = FALSE;
2842     while (!bDone && !bAbort)
2843     {
2844         bAbort = (nsteps >= 0) && (count == nsteps);
2845
2846         /* set new coordinates, except for first step */
2847         bool validStep = true;
2848         if (count > 0)
2849         {
2850             validStep = do_em_step(
2851                     cr, inputrec, mdatoms, s_min, stepsize, s_min->f.view().forceWithPadding(), s_try, constr, count);
2852         }
2853
2854         if (validStep)
2855         {
2856             energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
2857         }
2858         else
2859         {
2860             // Signal constraint error during stepping with energy=inf
2861             s_try->epot = std::numeric_limits<real>::infinity();
2862         }
2863
2864         if (MASTER(cr))
2865         {
2866             EnergyOutput::printHeader(fplog, count, count);
2867         }
2868
2869         if (count == 0)
2870         {
2871             s_min->epot = s_try->epot;
2872         }
2873
2874         /* Print it if necessary  */
2875         if (MASTER(cr))
2876         {
2877             if (mdrunOptions.verbose)
2878             {
2879                 fprintf(stderr,
2880                         "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2881                         count,
2882                         ustep,
2883                         s_try->epot,
2884                         s_try->fmax,
2885                         s_try->a_fmax + 1,
2886                         ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r');
2887                 fflush(stderr);
2888             }
2889
2890             if ((count == 0) || (s_try->epot < s_min->epot))
2891             {
2892                 /* Store the new (lower) energies  */
2893                 matrix nullBox = {};
2894                 energyOutput.addDataAtEnergyStep(false,
2895                                                  false,
2896                                                  static_cast<double>(count),
2897                                                  mdatoms->tmass,
2898                                                  enerd,
2899                                                  nullptr,
2900                                                  nullptr,
2901                                                  nullBox,
2902                                                  PTCouplingArrays(),
2903                                                  0,
2904                                                  vir,
2905                                                  pres,
2906                                                  nullptr,
2907                                                  mu_tot,
2908                                                  constr);
2909
2910                 imdSession->fillEnergyRecord(count, TRUE);
2911
2912                 const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout);
2913                 const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout);
2914                 energyOutput.printStepToEnergyFile(
2915                         mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or, fplog, count, count, fr->fcdata.get(), nullptr);
2916                 fflush(fplog);
2917             }
2918         }
2919
2920         /* Now if the new energy is smaller than the previous...
2921          * or if this is the first step!
2922          * or if we did random steps!
2923          */
2924
2925         if ((count == 0) || (s_try->epot < s_min->epot))
2926         {
2927             steps_accepted++;
2928
2929             /* Test whether the convergence criterion is met...  */
2930             bDone = (s_try->fmax < inputrec->em_tol);
2931
2932             /* Copy the arrays for force, positions and energy  */
2933             /* The 'Min' array always holds the coords and forces of the minimal
2934                sampled energy  */
2935             swap_em_state(&s_min, &s_try);
2936             if (count > 0)
2937             {
2938                 ustep *= 1.2;
2939             }
2940
2941             /* Write to trn, if necessary */
2942             do_x = do_per_step(steps_accepted, inputrec->nstxout);
2943             do_f = do_per_step(steps_accepted, inputrec->nstfout);
2944             write_em_traj(
2945                     fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min, state_global, observablesHistory);
2946         }
2947         else
2948         {
2949             /* If energy is not smaller make the step smaller...  */
2950             ustep *= 0.5;
2951
2952             if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2953             {
2954                 /* Reload the old state */
2955                 em_dd_partition_system(fplog,
2956                                        mdlog,
2957                                        count,
2958                                        cr,
2959                                        top_global,
2960                                        inputrec,
2961                                        imdSession,
2962                                        pull_work,
2963                                        s_min,
2964                                        &top,
2965                                        mdAtoms,
2966                                        fr,
2967                                        vsite,
2968                                        constr,
2969                                        nrnb,
2970                                        wcycle);
2971             }
2972         }
2973
2974         // If the force is very small after finishing minimization,
2975         // we risk dividing by zero when calculating the step size.
2976         // So we check first if the minimization has stopped before
2977         // trying to obtain a new step size.
2978         if (!bDone)
2979         {
2980             /* Determine new step  */
2981             stepsize = ustep / s_min->fmax;
2982         }
2983
2984         /* Check if stepsize is too small, with 1 nm as a characteristic length */
2985 #if GMX_DOUBLE
2986         if (count == nsteps || ustep < 1e-12)
2987 #else
2988         if (count == nsteps || ustep < 1e-6)
2989 #endif
2990         {
2991             if (MASTER(cr))
2992             {
2993                 warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr);
2994             }
2995             bAbort = TRUE;
2996         }
2997
2998         /* Send IMD energies and positions, if bIMD is TRUE. */
2999         if (imdSession->run(count,
3000                             TRUE,
3001                             MASTER(cr) ? state_global->box : nullptr,
3002                             MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>(),
3003                             0)
3004             && MASTER(cr))
3005         {
3006             imdSession->sendPositionsAndEnergies();
3007         }
3008
3009         count++;
3010     } /* End of the loop  */
3011
3012     /* Print some data...  */
3013     if (MASTER(cr))
3014     {
3015         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
3016     }
3017     write_em_traj(fplog,
3018                   cr,
3019                   outf,
3020                   TRUE,
3021                   inputrec->nstfout != 0,
3022                   ftp2fn(efSTO, nfile, fnm),
3023                   top_global,
3024                   inputrec,
3025                   count,
3026                   s_min,
3027                   state_global,
3028                   observablesHistory);
3029
3030     if (MASTER(cr))
3031     {
3032         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
3033
3034         print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
3035         print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
3036     }
3037
3038     finish_em(cr, outf, walltime_accounting, wcycle);
3039
3040     /* To print the actual number of steps we needed somewhere */
3041     {
3042         // TODO: Avoid changing inputrec (#3854)
3043         auto* nonConstInputrec   = const_cast<t_inputrec*>(inputrec);
3044         nonConstInputrec->nsteps = count;
3045     }
3046
3047     walltime_accounting_set_nsteps_done(walltime_accounting, count);
3048 }
3049
3050 void LegacySimulator::do_nm()
3051 {
3052     const char*         NM = "Normal Mode Analysis";
3053     int                 nnodes;
3054     gmx_localtop_t      top(top_global.ffparams);
3055     gmx_global_stat_t   gstat;
3056     tensor              vir, pres;
3057     rvec                mu_tot = { 0 };
3058     rvec*               dfdx;
3059     gmx_bool            bSparse; /* use sparse matrix storage format */
3060     size_t              sz;
3061     gmx_sparsematrix_t* sparse_matrix = nullptr;
3062     real*               full_matrix   = nullptr;
3063
3064     /* added with respect to mdrun */
3065     int   row, col;
3066     real  der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
3067     real  x_min;
3068     bool  bIsMaster = MASTER(cr);
3069     auto* mdatoms   = mdAtoms->mdatoms();
3070
3071     GMX_LOG(mdlog.info)
3072             .asParagraph()
3073             .appendText(
3074                     "Note that activating normal-mode analysis via the integrator "
3075                     ".mdp option and the command gmx mdrun may "
3076                     "be available in a different form in a future version of GROMACS, "
3077                     "e.g. gmx normal-modes.");
3078
3079     if (constr != nullptr)
3080     {
3081         gmx_fatal(
3082                 FARGS,
3083                 "Constraints present with Normal Mode Analysis, this combination is not supported");
3084     }
3085
3086     gmx_shellfc_t* shellfc;
3087
3088     em_state_t state_work{};
3089
3090     /* Init em and store the local state in state_minimum */
3091     init_em(fplog,
3092             mdlog,
3093             NM,
3094             cr,
3095             inputrec,
3096             imdSession,
3097             pull_work,
3098             state_global,
3099             top_global,
3100             &state_work,
3101             &top,
3102             nrnb,
3103             fr,
3104             mdAtoms,
3105             &gstat,
3106             vsite,
3107             constr,
3108             &shellfc);
3109     const bool  simulationsShareState = false;
3110     gmx_mdoutf* outf                  = init_mdoutf(fplog,
3111                                    nfile,
3112                                    fnm,
3113                                    mdrunOptions,
3114                                    cr,
3115                                    outputProvider,
3116                                    mdModulesNotifiers,
3117                                    inputrec,
3118                                    top_global,
3119                                    nullptr,
3120                                    wcycle,
3121                                    StartingBehavior::NewSimulation,
3122                                    simulationsShareState,
3123                                    ms);
3124
3125     std::vector<int>       atom_index = get_atom_index(top_global);
3126     std::vector<gmx::RVec> fneg(atom_index.size(), { 0, 0, 0 });
3127     snew(dfdx, atom_index.size());
3128
3129 #if !GMX_DOUBLE
3130     if (bIsMaster)
3131     {
3132         fprintf(stderr,
3133                 "NOTE: This version of GROMACS has been compiled in single precision,\n"
3134                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
3135                 "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
3136                 "      are fairly modest even if you recompile in double precision.\n\n");
3137     }
3138 #endif
3139
3140     /* Check if we can/should use sparse storage format.
3141      *
3142      * Sparse format is only useful when the Hessian itself is sparse, which it
3143      * will be when we use a cutoff.
3144      * For small systems (n<1000) it is easier to always use full matrix format, though.
3145      */
3146     if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0)
3147     {
3148         GMX_LOG(mdlog.warning)
3149                 .appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
3150         bSparse = FALSE;
3151     }
3152     else if (atom_index.size() < 1000)
3153     {
3154         GMX_LOG(mdlog.warning)
3155                 .appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
3156                                      atom_index.size());
3157         bSparse = FALSE;
3158     }
3159     else
3160     {
3161         GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");
3162         bSparse = TRUE;
3163     }
3164
3165     /* Number of dimensions, based on real atoms, that is not vsites or shell */
3166     sz = DIM * atom_index.size();
3167
3168     fprintf(stderr, "Allocating Hessian memory...\n\n");
3169
3170     if (bSparse)
3171     {
3172         sparse_matrix                       = gmx_sparsematrix_init(sz);
3173         sparse_matrix->compressed_symmetric = TRUE;
3174     }
3175     else
3176     {
3177         snew(full_matrix, sz * sz);
3178     }
3179
3180     /* Write start time and temperature */
3181     print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
3182
3183     /* fudge nr of steps to nr of atoms */
3184     {
3185         // TODO: Avoid changing inputrec (#3854)
3186         auto* nonConstInputrec   = const_cast<t_inputrec*>(inputrec);
3187         nonConstInputrec->nsteps = atom_index.size() * 2;
3188     }
3189
3190     if (bIsMaster)
3191     {
3192         fprintf(stderr,
3193                 "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
3194                 *(top_global.name),
3195                 inputrec->nsteps);
3196     }
3197
3198     nnodes = cr->nnodes;
3199
3200     /* Make evaluate_energy do a single node force calculation */
3201     cr->nnodes = 1;
3202     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
3203                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
3204                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
3205     energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
3206     cr->nnodes = nnodes;
3207
3208     /* if forces are not small, warn user */
3209     get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work);
3210
3211     GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);
3212     if (state_work.fmax > 1.0e-3)
3213     {
3214         GMX_LOG(mdlog.warning)
3215                 .appendText(
3216                         "The force is probably not small enough to "
3217                         "ensure that you are at a minimum.\n"
3218                         "Be aware that negative eigenvalues may occur\n"
3219                         "when the resulting matrix is diagonalized.");
3220     }
3221
3222     /***********************************************************
3223      *
3224      *      Loop over all pairs in matrix
3225      *
3226      *      do_force called twice. Once with positive and
3227      *      once with negative displacement
3228      *
3229      ************************************************************/
3230
3231     /* Steps are divided one by one over the nodes */
3232     bool bNS          = true;
3233     auto state_work_x = makeArrayRef(state_work.s.x);
3234     auto state_work_f = state_work.f.view().force();
3235     for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes)
3236     {
3237         size_t atom = atom_index[aid];
3238         for (size_t d = 0; d < DIM; d++)
3239         {
3240             int64_t step        = 0;
3241             int     force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
3242             double  t           = 0;
3243
3244             x_min = state_work_x[atom][d];
3245
3246             for (unsigned int dx = 0; (dx < 2); dx++)
3247             {
3248                 if (dx == 0)
3249                 {
3250                     state_work_x[atom][d] = x_min - der_range;
3251                 }
3252                 else
3253                 {
3254                     state_work_x[atom][d] = x_min + der_range;
3255                 }
3256
3257                 /* Make evaluate_energy do a single node force calculation */
3258                 cr->nnodes = 1;
3259                 if (shellfc)
3260                 {
3261                     /* Now is the time to relax the shells */
3262                     relax_shell_flexcon(fplog,
3263                                         cr,
3264                                         ms,
3265                                         mdrunOptions.verbose,
3266                                         nullptr,
3267                                         step,
3268                                         inputrec,
3269                                         imdSession,
3270                                         pull_work,
3271                                         bNS,
3272                                         force_flags,
3273                                         &top,
3274                                         constr,
3275                                         enerd,
3276                                         state_work.s.natoms,
3277                                         state_work.s.x.arrayRefWithPadding(),
3278                                         state_work.s.v.arrayRefWithPadding(),
3279                                         state_work.s.box,
3280                                         state_work.s.lambda,
3281                                         &state_work.s.hist,
3282                                         &state_work.f.view(),
3283                                         vir,
3284                                         *mdatoms,
3285                                         nrnb,
3286                                         wcycle,
3287                                         shellfc,
3288                                         fr,
3289                                         runScheduleWork,
3290                                         t,
3291                                         mu_tot,
3292                                         vsite,
3293                                         DDBalanceRegionHandler(nullptr));
3294                     bNS = false;
3295                     step++;
3296                 }
3297                 else
3298                 {
3299                     energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE);
3300                 }
3301
3302                 cr->nnodes = nnodes;
3303
3304                 if (dx == 0)
3305                 {
3306                     std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(), fneg.begin());
3307                 }
3308             }
3309
3310             /* x is restored to original */
3311             state_work_x[atom][d] = x_min;
3312
3313             for (size_t j = 0; j < atom_index.size(); j++)
3314             {
3315                 for (size_t k = 0; (k < DIM); k++)
3316                 {
3317                     dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range);
3318                 }
3319             }
3320
3321             if (!bIsMaster)
3322             {
3323 #if GMX_MPI
3324 #    define mpi_type GMX_MPI_REAL
3325                 MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid, cr->mpi_comm_mygroup);
3326 #endif
3327             }
3328             else
3329             {
3330                 for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++)
3331                 {
3332                     if (node > 0)
3333                     {
3334 #if GMX_MPI
3335                         MPI_Status stat;
3336                         MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node, cr->mpi_comm_mygroup, &stat);
3337 #    undef mpi_type
3338 #endif
3339                     }
3340
3341                     row = (aid + node) * DIM + d;
3342
3343                     for (size_t j = 0; j < atom_index.size(); j++)
3344                     {
3345                         for (size_t k = 0; k < DIM; k++)
3346                         {
3347                             col = j * DIM + k;
3348
3349                             if (bSparse)
3350                             {
3351                                 if (col >= row && dfdx[j][k] != 0.0)
3352                                 {
3353                                     gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]);
3354                                 }
3355                             }
3356                             else
3357                             {
3358                                 full_matrix[row * sz + col] = dfdx[j][k];
3359                             }
3360                         }
3361                     }
3362                 }
3363             }
3364
3365             if (mdrunOptions.verbose && fplog)
3366             {
3367                 fflush(fplog);
3368             }
3369         }
3370         /* write progress */
3371         if (bIsMaster && mdrunOptions.verbose)
3372         {
3373             fprintf(stderr,
3374                     "\rFinished step %d out of %td",
3375                     std::min<int>(atom + nnodes, atom_index.size()),
3376                     ssize(atom_index));
3377             fflush(stderr);
3378         }
3379     }
3380
3381     if (bIsMaster)
3382     {
3383         fprintf(stderr, "\n\nWriting Hessian...\n");
3384         gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
3385     }
3386
3387     finish_em(cr, outf, walltime_accounting, wcycle);
3388
3389     walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2);
3390 }
3391
3392 } // namespace gmx