src/gromacs/mdrun/md.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the integrator for normal molecular dynamics simulations
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include <cinttypes>
  47 #include <cmath>
  48 #include <cstdio>
  49 #include <cstdlib>
  50
  51 #include <algorithm>
  52 #include <memory>
  53 #include <numeric>
  54
  55 #include "gromacs/applied_forces/awh/awh.h"
  56 #include "gromacs/commandline/filenm.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlbtiming.h"
  59 #include "gromacs/domdec/domdec.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/mdsetup.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/essentialdynamics/edsam.h"
  66 #include "gromacs/ewald/pme_load_balancing.h"
  67 #include "gromacs/ewald/pme_pp.h"
  68 #include "gromacs/fileio/trxio.h"
  69 #include "gromacs/gmxlib/network.h"
  70 #include "gromacs/gmxlib/nrnb.h"
  71 #include "gromacs/gpu_utils/device_stream_manager.h"
  72 #include "gromacs/gpu_utils/gpu_utils.h"
  73 #include "gromacs/imd/imd.h"
  74 #include "gromacs/listed_forces/listed_forces.h"
  75 #include "gromacs/math/functions.h"
  76 #include "gromacs/math/invertmatrix.h"
  77 #include "gromacs/math/vec.h"
  78 #include "gromacs/math/vectypes.h"
  79 #include "gromacs/mdlib/checkpointhandler.h"
  80 #include "gromacs/mdlib/compute_io.h"
  81 #include "gromacs/mdlib/constr.h"
  82 #include "gromacs/mdlib/coupling.h"
  83 #include "gromacs/mdlib/ebin.h"
  84 #include "gromacs/mdlib/enerdata_utils.h"
  85 #include "gromacs/mdlib/energyoutput.h"
  86 #include "gromacs/mdlib/expanded.h"
  87 #include "gromacs/mdlib/force.h"
  88 #include "gromacs/mdlib/force_flags.h"
  89 #include "gromacs/mdlib/forcerec.h"
  90 #include "gromacs/mdlib/freeenergyparameters.h"
  91 #include "gromacs/mdlib/md_support.h"
  92 #include "gromacs/mdlib/mdatoms.h"
  93 #include "gromacs/mdlib/mdoutf.h"
  94 #include "gromacs/mdlib/membed.h"
  95 #include "gromacs/mdlib/resethandler.h"
  96 #include "gromacs/mdlib/sighandler.h"
  97 #include "gromacs/mdlib/simulationsignal.h"
  98 #include "gromacs/mdlib/stat.h"
  99 #include "gromacs/mdlib/stophandler.h"
 100 #include "gromacs/mdlib/tgroup.h"
 101 #include "gromacs/mdlib/trajectory_writing.h"
 102 #include "gromacs/mdlib/update.h"
 103 #include "gromacs/mdlib/update_constrain_gpu.h"
 104 #include "gromacs/mdlib/update_vv.h"
 105 #include "gromacs/mdlib/vcm.h"
 106 #include "gromacs/mdlib/vsite.h"
 107 #include "gromacs/mdrunutility/handlerestart.h"
 108 #include "gromacs/mdrunutility/multisim.h"
 109 #include "gromacs/mdrunutility/printtime.h"
 110 #include "gromacs/mdtypes/awh_history.h"
 111 #include "gromacs/mdtypes/awh_params.h"
 112 #include "gromacs/mdtypes/commrec.h"
 113 #include "gromacs/mdtypes/df_history.h"
 114 #include "gromacs/mdtypes/energyhistory.h"
 115 #include "gromacs/mdtypes/fcdata.h"
 116 #include "gromacs/mdtypes/forcebuffers.h"
 117 #include "gromacs/mdtypes/forcerec.h"
 118 #include "gromacs/mdtypes/group.h"
 119 #include "gromacs/mdtypes/inputrec.h"
 120 #include "gromacs/mdtypes/interaction_const.h"
 121 #include "gromacs/mdtypes/md_enums.h"
 122 #include "gromacs/mdtypes/mdatom.h"
 123 #include "gromacs/mdtypes/mdrunoptions.h"
 124 #include "gromacs/mdtypes/multipletimestepping.h"
 125 #include "gromacs/mdtypes/observableshistory.h"
 126 #include "gromacs/mdtypes/pullhistory.h"
 127 #include "gromacs/mdtypes/simulation_workload.h"
 128 #include "gromacs/mdtypes/state.h"
 129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 130 #include "gromacs/modularsimulator/energydata.h"
 131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 132 #include "gromacs/nbnxm/nbnxm.h"
 133 #include "gromacs/pbcutil/pbc.h"
 134 #include "gromacs/pulling/output.h"
 135 #include "gromacs/pulling/pull.h"
 136 #include "gromacs/swap/swapcoords.h"
 137 #include "gromacs/timing/wallcycle.h"
 138 #include "gromacs/timing/walltime_accounting.h"
 139 #include "gromacs/topology/atoms.h"
 140 #include "gromacs/topology/idef.h"
 141 #include "gromacs/topology/mtop_util.h"
 142 #include "gromacs/topology/topology.h"
 143 #include "gromacs/trajectory/trajectoryframe.h"
 144 #include "gromacs/utility/basedefinitions.h"
 145 #include "gromacs/utility/cstringutil.h"
 146 #include "gromacs/utility/fatalerror.h"
 147 #include "gromacs/utility/logger.h"
 148 #include "gromacs/utility/real.h"
 149 #include "gromacs/utility/smalloc.h"
 150
 151 #include "legacysimulator.h"
 152 #include "replicaexchange.h"
 153 #include "shellfc.h"
 154
 155 using gmx::SimulationSignaller;
 156
 157 void gmx::LegacySimulator::do_md()
 158 {
 159     // TODO Historically, the EM and MD "integrators" used different
 160     // names for the t_inputrec *parameter, but these must have the
 161     // same name, now that it's a member of a struct. We use this ir
 162     // alias to avoid a large ripple of nearly useless changes.
 163     // t_inputrec is being replaced by IMdpOptionsProvider, so this
 164     // will go away eventually.
 165     const t_inputrec* ir = inputrec;
 166
 167     int64_t      step, step_rel;
 168     double       t, t0 = ir->init_t;
 169     gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
 170     gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
 171     gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
 172     gmx_bool     do_ene, do_log, do_verbose;
 173     gmx_bool     bMasterState;
 174     unsigned int force_flags;
 175     tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
 176     int    i, m;
 177     rvec   mu_tot;
 178     matrix pressureCouplingMu, M;
 179     gmx_repl_ex_t     repl_ex = nullptr;
 180     gmx_global_stat_t gstat;
 181     gmx_shellfc_t*    shellfc;
 182     gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 183     gmx_bool          bTrotter;
 184     real              dvdl_constr;
 185     std::vector<RVec> cbuf;
 186     matrix            lastbox;
 187     int               lamnew = 0;
 188     /* for FEP */
 189     int       nstfep = 0;
 190     double    cycles;
 191     real      saved_conserved_quantity = 0;
 192     real      last_ekin                = 0;
 193     t_extmass MassQ;
 194     char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
 195
 196     /* PME load balancing data for GPU kernels */
 197     gmx_bool bPMETune         = FALSE;
 198     gmx_bool bPMETunePrinting = FALSE;
 199
 200     bool bInteractiveMDstep = false;
 201
 202     /* Domain decomposition could incorrectly miss a bonded
 203        interaction, but checking for that requires a global
 204        communication stage, which does not otherwise happen in DD
 205        code. So we do that alongside the first global energy reduction
 206        after a new DD is made. These variables handle whether the
 207        check happens, and the result it returns. */
 208     bool shouldCheckNumberOfBondedInteractions = false;
 209     int  totalNumberOfBondedInteractions       = -1;
 210
 211     SimulationSignals signals;
 212     // Most global communnication stages don't propagate mdrun
 213     // signals, and will use this object to achieve that.
 214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
 215
 216     if (!mdrunOptions.writeConfout)
 217     {
 218         // This is on by default, and the main known use case for
 219         // turning it off is for convenience in benchmarking, which is
 220         // something that should not show up in the general user
 221         // interface.
 222         GMX_LOG(mdlog.info)
 223                 .asParagraph()
 224                 .appendText(
 225                         "The -noconfout functionality is deprecated, and may be removed in a "
 226                         "future version.");
 227     }
 228
 229     /* md-vv uses averaged full step velocities for T-control
 230        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 231        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 232     bTrotter = (EI_VV(ir->eI)
 233                 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
 234
 235     const bool bRerunMD = false;
 236
 237     int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
 238     bGStatEveryStep   = (nstglobalcomm == 1);
 239
 240     const SimulationGroups* groups = &top_global->groups;
 241
 242     std::unique_ptr<EssentialDynamics> ed = nullptr;
 243     if (opt2bSet("-ei", nfile, fnm))
 244     {
 245         /* Initialize essential dynamics sampling */
 246         ed = init_edsam(mdlog,
 247                         opt2fn_null("-ei", nfile, fnm),
 248                         opt2fn("-eo", nfile, fnm),
 249                         top_global,
 250                         ir,
 251                         cr,
 252                         constr,
 253                         state_global,
 254                         observablesHistory,
 255                         oenv,
 256                         startingBehavior);
 257     }
 258     else if (observablesHistory->edsamHistory)
 259     {
 260         gmx_fatal(FARGS,
 261                   "The checkpoint is from a run with essential dynamics sampling, "
 262                   "but the current run did not specify the -ei option. "
 263                   "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
 264     }
 265
 266     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
 267     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
 268     initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
 269     Update upd(*ir, deform);
 270     bool   doSimulatedAnnealing = false;
 271     {
 272         // TODO: Avoid changing inputrec (#3854)
 273         // Simulated annealing updates the reference temperature.
 274         auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
 275         doSimulatedAnnealing   = initSimulatedAnnealing(nonConstInputrec, &upd);
 276     }
 277     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
 278
 279     const t_fcdata& fcdata = *fr->fcdata;
 280
 281     bool simulationsShareState = false;
 282     int  nstSignalComm         = nstglobalcomm;
 283     {
 284         // TODO This implementation of ensemble orientation restraints is nasty because
 285         // a user can't just do multi-sim with single-sim orientation restraints.
 286         bool usingEnsembleRestraints =
 287                 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
 288         bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
 289
 290         // Replica exchange, ensemble restraints and AWH need all
 291         // simulations to remain synchronized, so they need
 292         // checkpoints and stop conditions to act on the same step, so
 293         // the propagation of such signals must take place between
 294         // simulations, not just within simulations.
 295         // TODO: Make algorithm initializers set these flags.
 296         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
 297
 298         if (simulationsShareState)
 299         {
 300             // Inter-simulation signal communication does not need to happen
 301             // often, so we use a minimum of 200 steps to reduce overhead.
 302             const int c_minimumInterSimulationSignallingInterval = 200;
 303             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
 304                             * nstglobalcomm;
 305         }
 306     }
 307
 308     if (startingBehavior != StartingBehavior::RestartWithAppending)
 309     {
 310         pleaseCiteCouplingAlgorithms(fplog, *ir);
 311     }
 312     gmx_mdoutf*       outf = init_mdoutf(fplog,
 313                                    nfile,
 314                                    fnm,
 315                                    mdrunOptions,
 316                                    cr,
 317                                    outputProvider,
 318                                    mdModulesNotifier,
 319                                    ir,
 320                                    top_global,
 321                                    oenv,
 322                                    wcycle,
 323                                    startingBehavior,
 324                                    simulationsShareState,
 325                                    ms);
 326     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
 327                                    top_global,
 328                                    ir,
 329                                    pull_work,
 330                                    mdoutf_get_fp_dhdl(outf),
 331                                    false,
 332                                    startingBehavior,
 333                                    simulationsShareState,
 334                                    mdModulesNotifier);
 335
 336     gstat = global_stat_init(ir);
 337
 338     const auto& simulationWork     = runScheduleWork->simulationWork;
 339     const bool  useGpuForPme       = simulationWork.useGpuPme;
 340     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
 341     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
 342     const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
 343
 344     /* Check for polarizable models and flexible constraints */
 345     shellfc = init_shell_flexcon(fplog,
 346                                  top_global,
 347                                  constr ? constr->numFlexibleConstraints() : 0,
 348                                  ir->nstcalcenergy,
 349                                  DOMAINDECOMP(cr),
 350                                  useGpuForPme);
 351
 352     {
 353         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
 354         if ((io > 2000) && MASTER(cr))
 355         {
 356             fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
 357         }
 358     }
 359
 360     // Local state only becomes valid now.
 361     std::unique_ptr<t_state> stateInstance;
 362     t_state*                 state;
 363
 364     gmx_localtop_t top(top_global->ffparams);
 365
 366     auto mdatoms = mdAtoms->mdatoms();
 367
 368     ForceBuffers f(fr->useMts,
 369                    ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
 370                            ? PinningPolicy::PinnedIfSupported
 371                            : PinningPolicy::CannotBePinned);
 372     if (DOMAINDECOMP(cr))
 373     {
 374         stateInstance = std::make_unique<t_state>();
 375         state         = stateInstance.get();
 376         dd_init_local_state(cr->dd, state_global, state);
 377
 378         /* Distribute the charge groups over the nodes from the master node */
 379         dd_partition_system(fplog,
 380                             mdlog,
 381                             ir->init_step,
 382                             cr,
 383                             TRUE,
 384                             1,
 385                             state_global,
 386                             *top_global,
 387                             ir,
 388                             imdSession,
 389                             pull_work,
 390                             state,
 391                             &f,
 392                             mdAtoms,
 393                             &top,
 394                             fr,
 395                             vsite,
 396                             constr,
 397                             nrnb,
 398                             nullptr,
 399                             FALSE);
 400         shouldCheckNumberOfBondedInteractions = true;
 401         upd.setNumAtoms(state->natoms);
 402     }
 403     else
 404     {
 405         state_change_natoms(state_global, state_global->natoms);
 406         /* Copy the pointer to the global state */
 407         state = state_global;
 408
 409         /* Generate and initialize new topology */
 410         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
 411
 412         upd.setNumAtoms(state->natoms);
 413     }
 414
 415     std::unique_ptr<UpdateConstrainGpu> integrator;
 416
 417     StatePropagatorDataGpu* stateGpu = fr->stateGpu;
 418
 419     // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
 420     if (useGpuForUpdate)
 421     {
 422         GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
 423                                    || constr->numConstraintsTotal() == 0,
 424                            "Constraints in domain decomposition are only supported with update "
 425                            "groups if using GPU update.\n");
 426         GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
 427                                    || constr->numConstraintsTotal() == 0,
 428                            "SHAKE is not supported with GPU update.");
 429         GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
 430                            "Either PME or short-ranged non-bonded interaction tasks must run on "
 431                            "the GPU to use GPU update.\n");
 432         GMX_RELEASE_ASSERT(ir->eI == eiMD,
 433                            "Only the md integrator is supported with the GPU update.\n");
 434         GMX_RELEASE_ASSERT(
 435                 ir->etc != etcNOSEHOOVER,
 436                 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
 437         GMX_RELEASE_ASSERT(
 438                 ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN
 439                         || ir->epc == epcCRESCALE,
 440                 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
 441                 "with the GPU update.\n");
 442         GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
 443                            "Virtual sites are not supported with the GPU update.\n");
 444         GMX_RELEASE_ASSERT(ed == nullptr,
 445                            "Essential dynamics is not supported with the GPU update.\n");
 446         GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
 447                            "Constraints pulling is not supported with the GPU update.\n");
 448         GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
 449                            "Orientation restraints are not supported with the GPU update.\n");
 450         GMX_RELEASE_ASSERT(
 451                 ir->efep == efepNO
 452                         || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)),
 453                 "Free energy perturbation of masses and constraints are not supported with the GPU "
 454                 "update.");
 455
 456         if (constr != nullptr && constr->numConstraintsTotal() > 0)
 457         {
 458             GMX_LOG(mdlog.info)
 459                     .asParagraph()
 460                     .appendText("Updating coordinates and applying constraints on the GPU.");
 461         }
 462         else
 463         {
 464             GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
 465         }
 466         GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
 467                            "Device stream manager should be initialized in order to use GPU "
 468                            "update-constraints.");
 469         GMX_RELEASE_ASSERT(
 470                 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
 471                 "Update stream should be initialized in order to use GPU "
 472                 "update-constraints.");
 473         integrator = std::make_unique<UpdateConstrainGpu>(
 474                 *ir,
 475                 *top_global,
 476                 fr->deviceStreamManager->context(),
 477                 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
 478                 stateGpu->xUpdatedOnDevice(),
 479                 wcycle);
 480
 481         integrator->setPbc(PbcType::Xyz, state->box);
 482     }
 483
 484     if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
 485     {
 486         changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
 487     }
 488     if (useGpuForUpdate)
 489     {
 490         changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
 491     }
 492
 493     // NOTE: The global state is no longer used at this point.
 494     // But state_global is still used as temporary storage space for writing
 495     // the global state to file and potentially for replica exchange.
 496     // (Global topology should persist.)
 497
 498     update_mdatoms(mdatoms, state->lambda[efptMASS]);
 499
 500     if (ir->bExpanded)
 501     {
 502         /* Check nstexpanded here, because the grompp check was broken */
 503         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
 504         {
 505             gmx_fatal(FARGS,
 506                       "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
 507         }
 508         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
 509     }
 510
 511     if (MASTER(cr))
 512     {
 513         EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
 514     }
 515
 516     preparePrevStepPullCom(
 517             ir, pull_work, mdatoms->massT, state, state_global, cr, startingBehavior != StartingBehavior::NewSimulation);
 518
 519     // TODO: Remove this by converting AWH into a ForceProvider
 520     auto awh = prepareAwhModule(fplog,
 521                                 *ir,
 522                                 state_global,
 523                                 cr,
 524                                 ms,
 525                                 startingBehavior != StartingBehavior::NewSimulation,
 526                                 shellfc != nullptr,
 527                                 opt2fn("-awh", nfile, fnm),
 528                                 pull_work);
 529
 530     if (useReplicaExchange && MASTER(cr))
 531     {
 532         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
 533     }
 534     /* PME tuning is only supported in the Verlet scheme, with PME for
 535      * Coulomb. It is not supported with only LJ PME. */
 536     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
 537                 && ir->cutoff_scheme != ecutsGROUP);
 538
 539     pme_load_balancing_t* pme_loadbal = nullptr;
 540     if (bPMETune)
 541     {
 542         pme_loadbal_init(
 543                 &pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu());
 544     }
 545
 546     if (!ir->bContinuation)
 547     {
 548         if (state->flags & (1U << estV))
 549         {
 550             auto v = makeArrayRef(state->v);
 551             /* Set the velocities of vsites, shells and frozen atoms to zero */
 552             for (i = 0; i < mdatoms->homenr; i++)
 553             {
 554                 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
 555                 {
 556                     clear_rvec(v[i]);
 557                 }
 558                 else if (mdatoms->cFREEZE)
 559                 {
 560                     for (m = 0; m < DIM; m++)
 561                     {
 562                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 563                         {
 564                             v[i][m] = 0;
 565                         }
 566                     }
 567                 }
 568             }
 569         }
 570
 571         if (constr)
 572         {
 573             /* Constrain the initial coordinates and velocities */
 574             do_constrain_first(fplog,
 575                                constr,
 576                                ir,
 577                                mdatoms->nr,
 578                                mdatoms->homenr,
 579                                state->x.arrayRefWithPadding(),
 580                                state->v.arrayRefWithPadding(),
 581                                state->box,
 582                                state->lambda[efptBONDED]);
 583         }
 584         if (vsite)
 585         {
 586             /* Construct the virtual sites for the initial configuration */
 587             vsite->construct(state->x, ir->delta_t, {}, state->box);
 588         }
 589     }
 590
 591     if (ir->efep != efepNO)
 592     {
 593         /* Set free energy calculation frequency as the greatest common
 594          * denominator of nstdhdl and repl_ex_nst. */
 595         nstfep = ir->fepvals->nstdhdl;
 596         if (ir->bExpanded)
 597         {
 598             nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
 599         }
 600         if (useReplicaExchange)
 601         {
 602             nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
 603         }
 604         if (ir->bDoAwh)
 605         {
 606             nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep);
 607         }
 608     }
 609
 610     /* Be REALLY careful about what flags you set here. You CANNOT assume
 611      * this is the first step, since we might be restarting from a checkpoint,
 612      * and in that case we should not do any modifications to the state.
 613      */
 614     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
 615
 616     // When restarting from a checkpoint, it can be appropriate to
 617     // initialize ekind from quantities in the checkpoint. Otherwise,
 618     // compute_globals must initialize ekind before the simulation
 619     // starts/restarts. However, only the master rank knows what was
 620     // found in the checkpoint file, so we have to communicate in
 621     // order to coordinate the restart.
 622     //
 623     // TODO Consider removing this communication if/when checkpoint
 624     // reading directly follows .tpr reading, because all ranks can
 625     // agree on hasReadEkinState at that time.
 626     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
 627     if (PAR(cr))
 628     {
 629         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
 630     }
 631     if (hasReadEkinState)
 632     {
 633         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
 634     }
 635
 636     unsigned int cglo_flags =
 637             (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
 638              | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
 639
 640     bSumEkinhOld = FALSE;
 641
 642     t_vcm vcm(top_global->groups, *ir);
 643     reportComRemovalInfo(fplog, vcm);
 644
 645     /* To minimize communication, compute_globals computes the COM velocity
 646      * and the kinetic energy for the velocities without COM motion removed.
 647      * Thus to get the kinetic energy without the COM contribution, we need
 648      * to call compute_globals twice.
 649      */
 650     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
 651     {
 652         unsigned int cglo_flags_iteration = cglo_flags;
 653         if (bStopCM && cgloIteration == 0)
 654         {
 655             cglo_flags_iteration |= CGLO_STOPCM;
 656             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
 657         }
 658         compute_globals(gstat,
 659                         cr,
 660                         ir,
 661                         fr,
 662                         ekind,
 663                         makeConstArrayRef(state->x),
 664                         makeConstArrayRef(state->v),
 665                         state->box,
 666                         mdatoms,
 667                         nrnb,
 668                         &vcm,
 669                         nullptr,
 670                         enerd,
 671                         force_vir,
 672                         shake_vir,
 673                         total_vir,
 674                         pres,
 675                         constr,
 676                         &nullSignaller,
 677                         state->box,
 678                         &totalNumberOfBondedInteractions,
 679                         &bSumEkinhOld,
 680                         cglo_flags_iteration
 681                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
 682                                                                          : 0));
 683         if (cglo_flags_iteration & CGLO_STOPCM)
 684         {
 685             /* At initialization, do not pass x with acceleration-correction mode
 686              * to avoid (incorrect) correction of the initial coordinates.
 687              */
 688             auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
 689                                                                      : makeArrayRef(state->x);
 690             process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
 691             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
 692         }
 693     }
 694     checkNumberOfBondedInteractions(mdlog,
 695                                     cr,
 696                                     totalNumberOfBondedInteractions,
 697                                     top_global,
 698                                     &top,
 699                                     makeConstArrayRef(state->x),
 700                                     state->box,
 701                                     &shouldCheckNumberOfBondedInteractions);
 702     if (ir->eI == eiVVAK)
 703     {
 704         /* a second call to get the half step temperature initialized as well */
 705         /* we do the same call as above, but turn the pressure off -- internally to
 706            compute_globals, this is recognized as a velocity verlet half-step
 707            kinetic energy calculation.  This minimized excess variables, but
 708            perhaps loses some logic?*/
 709
 710         compute_globals(gstat,
 711                         cr,
 712                         ir,
 713                         fr,
 714                         ekind,
 715                         makeConstArrayRef(state->x),
 716                         makeConstArrayRef(state->v),
 717                         state->box,
 718                         mdatoms,
 719                         nrnb,
 720                         &vcm,
 721                         nullptr,
 722                         enerd,
 723                         force_vir,
 724                         shake_vir,
 725                         total_vir,
 726                         pres,
 727                         constr,
 728                         &nullSignaller,
 729                         state->box,
 730                         nullptr,
 731                         &bSumEkinhOld,
 732                         cglo_flags & ~CGLO_PRESSURE);
 733     }
 734
 735     /* Calculate the initial half step temperature, and save the ekinh_old */
 736     if (startingBehavior == StartingBehavior::NewSimulation)
 737     {
 738         for (i = 0; (i < ir->opts.ngtc); i++)
 739         {
 740             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
 741         }
 742     }
 743
 744     /* need to make an initiation call to get the Trotter variables set, as well as other constants
 745        for non-trotter temperature control */
 746     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
 747
 748     if (MASTER(cr))
 749     {
 750         if (!ir->bContinuation)
 751         {
 752             if (constr && ir->eConstrAlg == econtLINCS)
 753             {
 754                 fprintf(fplog,
 755                         "RMS relative constraint deviation after constraining: %.2e\n",
 756                         constr->rmsd());
 757             }
 758             if (EI_STATE_VELOCITY(ir->eI))
 759             {
 760                 real temp = enerd->term[F_TEMP];
 761                 if (ir->eI != eiVV)
 762                 {
 763                     /* Result of Ekin averaged over velocities of -half
 764                      * and +half step, while we only have -half step here.
 765                      */
 766                     temp *= 2;
 767                 }
 768                 fprintf(fplog, "Initial temperature: %g K\n", temp);
 769             }
 770         }
 771
 772         char tbuf[20];
 773         fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
 774         if (ir->nsteps >= 0)
 775         {
 776             sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
 777         }
 778         else
 779         {
 780             sprintf(tbuf, "%s", "infinite");
 781         }
 782         if (ir->init_step > 0)
 783         {
 784             fprintf(stderr,
 785                     "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 786                     gmx_step_str(ir->init_step + ir->nsteps, sbuf),
 787                     tbuf,
 788                     gmx_step_str(ir->init_step, sbuf2),
 789                     ir->init_step * ir->delta_t);
 790         }
 791         else
 792         {
 793             fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
 794         }
 795         fprintf(fplog, "\n");
 796     }
 797
 798     walltime_accounting_start_time(walltime_accounting);
 799     wallcycle_start(wcycle, ewcRUN);
 800     print_start(fplog, cr, walltime_accounting, "mdrun");
 801
 802     /***********************************************************
 803      *
 804      *             Loop over MD steps
 805      *
 806      ************************************************************/
 807
 808     bFirstStep = TRUE;
 809     /* Skip the first Nose-Hoover integration when we get the state from tpx */
 810     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
 811     bSumEkinhOld     = FALSE;
 812     bExchanged       = FALSE;
 813     bNeedRepartition = FALSE;
 814
 815     step     = ir->init_step;
 816     step_rel = 0;
 817
 818     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
 819             compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
 820             simulationsShareState,
 821             MASTER(cr),
 822             ir->nstlist,
 823             mdrunOptions.reproducible,
 824             nstSignalComm,
 825             mdrunOptions.maximumHoursToRun,
 826             ir->nstlist == 0,
 827             fplog,
 828             step,
 829             bNS,
 830             walltime_accounting);
 831
 832     auto checkpointHandler = std::make_unique<CheckpointHandler>(
 833             compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
 834             simulationsShareState,
 835             ir->nstlist == 0,
 836             MASTER(cr),
 837             mdrunOptions.writeConfout,
 838             mdrunOptions.checkpointOptions.period);
 839
 840     const bool resetCountersIsLocal = true;
 841     auto       resetHandler         = std::make_unique<ResetHandler>(
 842             compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
 843             !resetCountersIsLocal,
 844             ir->nsteps,
 845             MASTER(cr),
 846             mdrunOptions.timingOptions.resetHalfway,
 847             mdrunOptions.maximumHoursToRun,
 848             mdlog,
 849             wcycle,
 850             walltime_accounting);
 851
 852     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
 853
 854     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
 855     {
 856         logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
 857     }
 858
 859     /* and stop now if we should */
 860     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
 861     while (!bLastStep)
 862     {
 863
 864         /* Determine if this is a neighbor search step */
 865         bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
 866
 867         if (bPMETune && bNStList)
 868         {
 869             // This has to be here because PME load balancing is called so early.
 870             // TODO: Move to after all booleans are defined.
 871             if (useGpuForUpdate && !bFirstStep)
 872             {
 873                 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
 874                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
 875             }
 876             /* PME grid + cut-off optimization with GPUs or PME nodes */
 877             pme_loadbal_do(pme_loadbal,
 878                            cr,
 879                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
 880                            fplog,
 881                            mdlog,
 882                            *ir,
 883                            fr,
 884                            state->box,
 885                            state->x,
 886                            wcycle,
 887                            step,
 888                            step_rel,
 889                            &bPMETunePrinting,
 890                            simulationWork.useGpuPmePpCommunication);
 891         }
 892
 893         wallcycle_start(wcycle, ewcSTEP);
 894
 895         bLastStep = (step_rel == ir->nsteps);
 896         t         = t0 + step * ir->delta_t;
 897
 898         // TODO Refactor this, so that nstfep does not need a default value of zero
 899         if (ir->efep != efepNO || ir->bSimTemp)
 900         {
 901             /* find and set the current lambdas */
 902             state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
 903
 904             bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
 905             bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
 906             bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
 907                            && (!bFirstStep));
 908         }
 909
 910         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
 911                      && do_per_step(step, replExParams.exchangeInterval));
 912
 913         if (doSimulatedAnnealing)
 914         {
 915             // TODO: Avoid changing inputrec (#3854)
 916             // Simulated annealing updates the reference temperature.
 917             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
 918             update_annealing_target_temp(nonConstInputrec, t, &upd);
 919         }
 920
 921         /* Stop Center of Mass motion */
 922         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 923
 924         /* Determine whether or not to do Neighbour Searching */
 925         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
 926
 927         /* Note that the stopHandler will cause termination at nstglobalcomm
 928          * steps. Since this concides with nstcalcenergy, nsttcouple and/or
 929          * nstpcouple steps, we have computed the half-step kinetic energy
 930          * of the previous step and can always output energies at the last step.
 931          */
 932         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
 933
 934         /* do_log triggers energy and virial calculation. Because this leads
 935          * to different code paths, forces can be different. Thus for exact
 936          * continuation we should avoid extra log output.
 937          * Note that the || bLastStep can result in non-exact continuation
 938          * beyond the last step. But we don't consider that to be an issue.
 939          */
 940         do_log     = (do_per_step(step, ir->nstlog)
 941                   || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
 942         do_verbose = mdrunOptions.verbose
 943                      && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
 944
 945         if (useGpuForUpdate && !bFirstStep && bNS)
 946         {
 947             // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
 948             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
 949             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
 950             // Copy coordinate from the GPU when needed at the search step.
 951             // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
 952             // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
 953             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
 954             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
 955         }
 956
 957         if (bNS && !(bFirstStep && ir->bContinuation))
 958         {
 959             bMasterState = FALSE;
 960             /* Correct the new box if it is too skewed */
 961             if (inputrecDynamicBox(ir))
 962             {
 963                 if (correct_box(fplog, step, state->box))
 964                 {
 965                     bMasterState = TRUE;
 966                     // If update is offloaded, it should be informed about the box size change
 967                     if (useGpuForUpdate)
 968                     {
 969                         integrator->setPbc(PbcType::Xyz, state->box);
 970                     }
 971                 }
 972             }
 973             if (DOMAINDECOMP(cr) && bMasterState)
 974             {
 975                 dd_collect_state(cr->dd, state, state_global);
 976             }
 977
 978             if (DOMAINDECOMP(cr))
 979             {
 980                 /* Repartition the domain decomposition */
 981                 dd_partition_system(fplog,
 982                                     mdlog,
 983                                     step,
 984                                     cr,
 985                                     bMasterState,
 986                                     nstglobalcomm,
 987                                     state_global,
 988                                     *top_global,
 989                                     ir,
 990                                     imdSession,
 991                                     pull_work,
 992                                     state,
 993                                     &f,
 994                                     mdAtoms,
 995                                     &top,
 996                                     fr,
 997                                     vsite,
 998                                     constr,
 999                                     nrnb,
1000                                     wcycle,
1001                                     do_verbose && !bPMETunePrinting);
1002                 shouldCheckNumberOfBondedInteractions = true;
1003                 upd.setNumAtoms(state->natoms);
1004             }
1005         }
1006
1007         // Allocate or re-size GPU halo exchange object, if necessary
1008         if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
1009         {
1010             GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
1011                                "GPU device manager has to be initialized to use GPU "
1012                                "version of halo exchange.");
1013             constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
1014         }
1015
1016         if (MASTER(cr) && do_log)
1017         {
1018             gmx::EnergyOutput::printHeader(
1019                     fplog, step, t); /* can we improve the information printed here? */
1020         }
1021
1022         if (ir->efep != efepNO)
1023         {
1024             update_mdatoms(mdatoms, state->lambda[efptMASS]);
1025         }
1026
1027         if (bExchanged)
1028         {
1029             /* We need the kinetic energy at minus the half step for determining
1030              * the full step kinetic energy and possibly for T-coupling.*/
1031             /* This may not be quite working correctly yet . . . . */
1032             compute_globals(gstat,
1033                             cr,
1034                             ir,
1035                             fr,
1036                             ekind,
1037                             makeConstArrayRef(state->x),
1038                             makeConstArrayRef(state->v),
1039                             state->box,
1040                             mdatoms,
1041                             nrnb,
1042                             &vcm,
1043                             wcycle,
1044                             enerd,
1045                             nullptr,
1046                             nullptr,
1047                             nullptr,
1048                             nullptr,
1049                             constr,
1050                             &nullSignaller,
1051                             state->box,
1052                             &totalNumberOfBondedInteractions,
1053                             &bSumEkinhOld,
1054                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
1055             checkNumberOfBondedInteractions(mdlog,
1056                                             cr,
1057                                             totalNumberOfBondedInteractions,
1058                                             top_global,
1059                                             &top,
1060                                             makeConstArrayRef(state->x),
1061                                             state->box,
1062                                             &shouldCheckNumberOfBondedInteractions);
1063         }
1064         clear_mat(force_vir);
1065
1066         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1067
1068         /* Determine the energy and pressure:
1069          * at nstcalcenergy steps and at energy output steps (set below).
1070          */
1071         if (EI_VV(ir->eI) && (!bInitStep))
1072         {
1073             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1074             bCalcVir      = bCalcEnerStep
1075                        || (ir->epc != epcNO
1076                            && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1077         }
1078         else
1079         {
1080             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1081             bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
1082         }
1083         bCalcEner = bCalcEnerStep;
1084
1085         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1086
1087         if (do_ene || do_log || bDoReplEx)
1088         {
1089             bCalcVir  = TRUE;
1090             bCalcEner = TRUE;
1091         }
1092
1093         /* Do we need global communication ? */
1094         bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1095                   || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1096
1097         force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1098                        | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1099                        | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
1100         if (fr->useMts && !do_per_step(step, ir->nstfout))
1101         {
1102             force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1103         }
1104
1105         if (shellfc)
1106         {
1107             /* Now is the time to relax the shells */
1108             relax_shell_flexcon(fplog,
1109                                 cr,
1110                                 ms,
1111                                 mdrunOptions.verbose,
1112                                 enforcedRotation,
1113                                 step,
1114                                 ir,
1115                                 imdSession,
1116                                 pull_work,
1117                                 bNS,
1118                                 force_flags,
1119                                 &top,
1120                                 constr,
1121                                 enerd,
1122                                 state->natoms,
1123                                 state->x.arrayRefWithPadding(),
1124                                 state->v.arrayRefWithPadding(),
1125                                 state->box,
1126                                 state->lambda,
1127                                 &state->hist,
1128                                 &f.view(),
1129                                 force_vir,
1130                                 mdatoms,
1131                                 nrnb,
1132                                 wcycle,
1133                                 shellfc,
1134                                 fr,
1135                                 runScheduleWork,
1136                                 t,
1137                                 mu_tot,
1138                                 vsite,
1139                                 ddBalanceRegionHandler);
1140         }
1141         else
1142         {
1143             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1144                is updated (or the AWH update will be performed twice for one step when continuing).
1145                It would be best to call this update function from do_md_trajectory_writing but that
1146                would occur after do_force. One would have to divide the update_awh function into one
1147                function applying the AWH force and one doing the AWH bias update. The update AWH
1148                bias function could then be called after do_md_trajectory_writing (then containing
1149                update_awh_history). The checkpointing will in the future probably moved to the start
1150                of the md loop which will rid of this issue. */
1151             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1152             {
1153                 awh->updateHistory(state_global->awhHistory.get());
1154             }
1155
1156             /* The coordinates (x) are shifted (to get whole molecules)
1157              * in do_force.
1158              * This is parallellized as well, and does communication too.
1159              * Check comments in sim_util.c
1160              */
1161             do_force(fplog,
1162                      cr,
1163                      ms,
1164                      ir,
1165                      awh.get(),
1166                      enforcedRotation,
1167                      imdSession,
1168                      pull_work,
1169                      step,
1170                      nrnb,
1171                      wcycle,
1172                      &top,
1173                      state->box,
1174                      state->x.arrayRefWithPadding(),
1175                      &state->hist,
1176                      &f.view(),
1177                      force_vir,
1178                      mdatoms,
1179                      enerd,
1180                      state->lambda,
1181                      fr,
1182                      runScheduleWork,
1183                      vsite,
1184                      mu_tot,
1185                      t,
1186                      ed ? ed->getLegacyED() : nullptr,
1187                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
1188                      ddBalanceRegionHandler);
1189         }
1190
1191         // VV integrators do not need the following velocity half step
1192         // if it is the first step after starting from a checkpoint.
1193         // That is, the half step is needed on all other steps, and
1194         // also the first step when starting from a .tpr file.
1195         if (EI_VV(ir->eI))
1196         {
1197             integrateVVFirstStep(step,
1198                                  bFirstStep,
1199                                  bInitStep,
1200                                  startingBehavior,
1201                                  nstglobalcomm,
1202                                  ir,
1203                                  fr,
1204                                  cr,
1205                                  state,
1206                                  mdatoms,
1207                                  fcdata,
1208                                  &MassQ,
1209                                  &vcm,
1210                                  top_global,
1211                                  top,
1212                                  enerd,
1213                                  ekind,
1214                                  gstat,
1215                                  &last_ekin,
1216                                  bCalcVir,
1217                                  total_vir,
1218                                  shake_vir,
1219                                  force_vir,
1220                                  pres,
1221                                  M,
1222                                  do_log,
1223                                  do_ene,
1224                                  bCalcEner,
1225                                  bGStat,
1226                                  bStopCM,
1227                                  bTrotter,
1228                                  bExchanged,
1229                                  &bSumEkinhOld,
1230                                  &shouldCheckNumberOfBondedInteractions,
1231                                  &saved_conserved_quantity,
1232                                  &f,
1233                                  &upd,
1234                                  constr,
1235                                  &nullSignaller,
1236                                  trotter_seq,
1237                                  nrnb,
1238                                  mdlog,
1239                                  fplog,
1240                                  wcycle);
1241         }
1242
1243         /* ########  END FIRST UPDATE STEP  ############## */
1244         /* ########  If doing VV, we now have v(dt) ###### */
1245         if (bDoExpanded)
1246         {
1247             /* perform extended ensemble sampling in lambda - we don't
1248                actually move to the new state before outputting
1249                statistics, but if performing simulated tempering, we
1250                do update the velocities and the tau_t. */
1251             // TODO: Avoid changing inputrec (#3854)
1252             // Simulated tempering updates the reference temperature.
1253             // Expanded ensemble without simulated tempering does not change the inputrec.
1254             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
1255             lamnew                 = ExpandedEnsembleDynamics(fplog,
1256                                               nonConstInputrec,
1257                                               enerd,
1258                                               state,
1259                                               &MassQ,
1260                                               state->fep_state,
1261                                               state->dfhist,
1262                                               step,
1263                                               state->v.rvec_array(),
1264                                               mdatoms);
1265             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1266             if (MASTER(cr))
1267             {
1268                 copy_df_history(state_global->dfhist, state->dfhist);
1269             }
1270         }
1271
1272         // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1273         // coordinates have not already been copied for i) search or ii) CPU force tasks.
1274         if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1275             && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1276                 || checkpointHandler->isCheckpointingStep()))
1277         {
1278             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1279             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1280         }
1281         // Copy velocities if needed for the output/checkpointing.
1282         // NOTE: Copy on the search steps is done at the beginning of the step.
1283         if (useGpuForUpdate && !bNS
1284             && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1285         {
1286             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1287             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1288         }
1289         // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1290         // and update is offloaded hence forces are kept on the GPU for update and have not been
1291         // already transferred in do_force().
1292         // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1293         //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
1294         //       prior to GPU update.
1295         // TODO: When the output flags will be included in step workload, this copy can be combined with the
1296         //       copy call in do_force(...).
1297         // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1298         //       on host after the D2H copy in do_force(...).
1299         if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1300             && do_per_step(step, ir->nstfout))
1301         {
1302             stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1303             stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1304         }
1305         /* Now we have the energies and forces corresponding to the
1306          * coordinates at time t. We must output all of this before
1307          * the update.
1308          */
1309         do_md_trajectory_writing(fplog,
1310                                  cr,
1311                                  nfile,
1312                                  fnm,
1313                                  step,
1314                                  step_rel,
1315                                  t,
1316                                  ir,
1317                                  state,
1318                                  state_global,
1319                                  observablesHistory,
1320                                  top_global,
1321                                  fr,
1322                                  outf,
1323                                  energyOutput,
1324                                  ekind,
1325                                  f.view().force(),
1326                                  checkpointHandler->isCheckpointingStep(),
1327                                  bRerunMD,
1328                                  bLastStep,
1329                                  mdrunOptions.writeConfout,
1330                                  bSumEkinhOld);
1331         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1332         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1333
1334         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1335         if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1336             && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1337         {
1338             copy_mat(state->svir_prev, shake_vir);
1339             copy_mat(state->fvir_prev, force_vir);
1340         }
1341
1342         stopHandler->setSignal();
1343         resetHandler->setSignal(walltime_accounting);
1344
1345         if (bGStat || !PAR(cr))
1346         {
1347             /* In parallel we only have to check for checkpointing in steps
1348              * where we do global communication,
1349              *  otherwise the other nodes don't know.
1350              */
1351             checkpointHandler->setSignal(walltime_accounting);
1352         }
1353
1354         /* #########   START SECOND UPDATE STEP ################# */
1355
1356         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1357            controlled in preprocessing */
1358
1359         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1360         {
1361             gmx_bool bIfRandomize;
1362             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1363             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1364             if (constr && bIfRandomize)
1365             {
1366                 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1367             }
1368         }
1369         /* Box is changed in update() when we do pressure coupling,
1370          * but we should still use the old box for energy corrections and when
1371          * writing it to the energy file, so it matches the trajectory files for
1372          * the same timestep above. Make a copy in a separate array.
1373          */
1374         copy_mat(state->box, lastbox);
1375
1376         dvdl_constr = 0;
1377
1378         if (!useGpuForUpdate)
1379         {
1380             wallcycle_start(wcycle, ewcUPDATE);
1381         }
1382         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1383         if (bTrotter)
1384         {
1385             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1386             /* We can only do Berendsen coupling after we have summed
1387              * the kinetic energy or virial. Since the happens
1388              * in global_state after update, we should only do it at
1389              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1390              */
1391         }
1392         else
1393         {
1394             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1395             update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1396         }
1397
1398         /* With leap-frog type integrators we compute the kinetic energy
1399          * at a whole time step as the average of the half-time step kinetic
1400          * energies of two subsequent steps. Therefore we need to compute the
1401          * half step kinetic energy also if we need energies at the next step.
1402          */
1403         const bool needHalfStepKineticEnergy =
1404                 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1405
1406         // Parrinello-Rahman requires the pressure to be availible before the update to compute
1407         // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1408         const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
1409                                          && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1410
1411         if (EI_VV(ir->eI))
1412         {
1413             GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1414
1415             integrateVVSecondStep(step,
1416                                   ir,
1417                                   fr,
1418                                   cr,
1419                                   state,
1420                                   mdatoms,
1421                                   fcdata,
1422                                   &MassQ,
1423                                   &vcm,
1424                                   pull_work,
1425                                   enerd,
1426                                   ekind,
1427                                   gstat,
1428                                   &dvdl_constr,
1429                                   bCalcVir,
1430                                   total_vir,
1431                                   shake_vir,
1432                                   force_vir,
1433                                   pres,
1434                                   M,
1435                                   lastbox,
1436                                   do_log,
1437                                   do_ene,
1438                                   bGStat,
1439                                   &bSumEkinhOld,
1440                                   &f,
1441                                   &cbuf,
1442                                   &upd,
1443                                   constr,
1444                                   &nullSignaller,
1445                                   trotter_seq,
1446                                   nrnb,
1447                                   wcycle);
1448         }
1449         else
1450         {
1451             if (useGpuForUpdate)
1452             {
1453
1454                 wallcycle_stop(wcycle, ewcUPDATE);
1455
1456                 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1457                 {
1458                     integrator->set(stateGpu->getCoordinates(),
1459                                     stateGpu->getVelocities(),
1460                                     stateGpu->getForces(),
1461                                     top.idef,
1462                                     *mdatoms,
1463                                     ekind->ngtc);
1464
1465                     // Copy data to the GPU after buffers might have being reinitialized
1466                     stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1467                     stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1468                 }
1469
1470                 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1471                     && !thisRankHasDuty(cr, DUTY_PME))
1472                 {
1473                     // The PME forces were recieved to the host, so have to be copied
1474                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1475                 }
1476                 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1477                 {
1478                     // The buffer ops were not offloaded this step, so the forces are on the
1479                     // host and have to be copied
1480                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1481                 }
1482
1483                 const bool doTemperatureScaling =
1484                         (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1485
1486                 // This applies Leap-Frog, LINCS and SETTLE in succession
1487                 integrator->integrate(
1488                         stateGpu->getForcesReadyOnDeviceEvent(
1489                                 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1490                         ir->delta_t,
1491                         true,
1492                         bCalcVir,
1493                         shake_vir,
1494                         doTemperatureScaling,
1495                         ekind->tcstat,
1496                         doParrinelloRahman,
1497                         ir->nstpcouple * ir->delta_t,
1498                         M);
1499
1500                 // Copy velocities D2H after update if:
1501                 // - Globals are computed this step (includes the energy output steps).
1502                 // - Temperature is needed for the next step.
1503                 if (bGStat || needHalfStepKineticEnergy)
1504                 {
1505                     stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1506                     stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1507                 }
1508             }
1509             else
1510             {
1511                 /* With multiple time stepping we need to do an additional normal
1512                  * update step to obtain the virial, as the actual MTS integration
1513                  * using an acceleration where the slow forces are multiplied by mtsFactor.
1514                  * Using that acceleration would result in a virial with the slow
1515                  * force contribution would be a factor mtsFactor too large.
1516                  */
1517                 if (fr->useMts && bCalcVir && constr != nullptr)
1518                 {
1519                     upd.update_for_constraint_virial(
1520                             *ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
1521
1522                     constrain_coordinates(constr,
1523                                           do_log,
1524                                           do_ene,
1525                                           step,
1526                                           state,
1527                                           upd.xp()->arrayRefWithPadding(),
1528                                           &dvdl_constr,
1529                                           bCalcVir,
1530                                           shake_vir);
1531                 }
1532
1533                 ArrayRefWithPadding<const RVec> forceCombined =
1534                         (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1535                                 ? f.view().forceMtsCombinedWithPadding()
1536                                 : f.view().forceWithPadding();
1537                 upd.update_coords(
1538                         *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
1539
1540                 wallcycle_stop(wcycle, ewcUPDATE);
1541
1542                 constrain_coordinates(constr,
1543                                       do_log,
1544                                       do_ene,
1545                                       step,
1546                                       state,
1547                                       upd.xp()->arrayRefWithPadding(),
1548                                       &dvdl_constr,
1549                                       bCalcVir && !fr->useMts,
1550                                       shake_vir);
1551
1552                 upd.update_sd_second_half(
1553                         *ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, constr, do_log, do_ene);
1554                 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1555             }
1556
1557             if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1558             {
1559                 updatePrevStepPullCom(pull_work, state);
1560             }
1561
1562             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1563         }
1564
1565         if (vsite != nullptr)
1566         {
1567             wallcycle_start(wcycle, ewcVSITECONSTR);
1568             vsite->construct(state->x, ir->delta_t, state->v, state->box);
1569             wallcycle_stop(wcycle, ewcVSITECONSTR);
1570         }
1571
1572         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1573         /* With Leap-Frog we can skip compute_globals at
1574          * non-communication steps, but we need to calculate
1575          * the kinetic energy one step before communication.
1576          */
1577         {
1578             // Organize to do inter-simulation signalling on steps if
1579             // and when algorithms require it.
1580             const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1581
1582             if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1583             {
1584                 // Copy coordinates when needed to stop the CM motion.
1585                 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1586                 {
1587                     stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1588                     stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1589                 }
1590                 // Since we're already communicating at this step, we
1591                 // can propagate intra-simulation signals. Note that
1592                 // check_nstglobalcomm has the responsibility for
1593                 // choosing the value of nstglobalcomm that is one way
1594                 // bGStat becomes true, so we can't get into a
1595                 // situation where e.g. checkpointing can't be
1596                 // signalled.
1597                 bool                doIntraSimSignal = true;
1598                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1599
1600                 compute_globals(gstat,
1601                                 cr,
1602                                 ir,
1603                                 fr,
1604                                 ekind,
1605                                 makeConstArrayRef(state->x),
1606                                 makeConstArrayRef(state->v),
1607                                 state->box,
1608                                 mdatoms,
1609                                 nrnb,
1610                                 &vcm,
1611                                 wcycle,
1612                                 enerd,
1613                                 force_vir,
1614                                 shake_vir,
1615                                 total_vir,
1616                                 pres,
1617                                 constr,
1618                                 &signaller,
1619                                 lastbox,
1620                                 &totalNumberOfBondedInteractions,
1621                                 &bSumEkinhOld,
1622                                 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1623                                         | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1624                                         | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1625                                         | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1626                                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1627                                                                                  : 0));
1628                 checkNumberOfBondedInteractions(mdlog,
1629                                                 cr,
1630                                                 totalNumberOfBondedInteractions,
1631                                                 top_global,
1632                                                 &top,
1633                                                 makeConstArrayRef(state->x),
1634                                                 state->box,
1635                                                 &shouldCheckNumberOfBondedInteractions);
1636                 if (!EI_VV(ir->eI) && bStopCM)
1637                 {
1638                     process_and_stopcm_grp(
1639                             fplog, &vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
1640                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1641
1642                     // TODO: The special case of removing CM motion should be dealt more gracefully
1643                     if (useGpuForUpdate)
1644                     {
1645                         stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1646                         // Here we block until the H2D copy completes because event sync with the
1647                         // force kernels that use the coordinates on the next steps is not implemented
1648                         // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1649                         stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1650                         // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1651                         if (vcm.mode != ecmNO)
1652                         {
1653                             stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1654                         }
1655                     }
1656                 }
1657             }
1658         }
1659
1660         /* #############  END CALC EKIN AND PRESSURE ################# */
1661
1662         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1663            the virial that should probably be addressed eventually. state->veta has better properies,
1664            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1665            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1666
1667         if (ir->efep != efepNO && !EI_VV(ir->eI))
1668         {
1669             /* Sum up the foreign energy and dK/dl terms for md and sd.
1670                Currently done every step so that dH/dl is correct in the .edr */
1671             accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1672         }
1673
1674         update_pcouple_after_coordinates(
1675                 fplog, step, ir, mdatoms, pres, force_vir, shake_vir, pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1676
1677         const bool doBerendsenPressureCoupling =
1678                 (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
1679         const bool doCRescalePressureCoupling =
1680                 (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple));
1681         if (useGpuForUpdate
1682             && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1683         {
1684             integrator->scaleCoordinates(pressureCouplingMu);
1685             if (doCRescalePressureCoupling)
1686             {
1687                 matrix pressureCouplingInvMu;
1688                 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1689                 integrator->scaleVelocities(pressureCouplingInvMu);
1690             }
1691             integrator->setPbc(PbcType::Xyz, state->box);
1692         }
1693
1694         /* ################# END UPDATE STEP 2 ################# */
1695         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1696
1697         /* The coordinates (x) were unshifted in update */
1698         if (!bGStat)
1699         {
1700             /* We will not sum ekinh_old,
1701              * so signal that we still have to do it.
1702              */
1703             bSumEkinhOld = TRUE;
1704         }
1705
1706         if (bCalcEner)
1707         {
1708             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1709
1710             /* use the directly determined last velocity, not actually the averaged half steps */
1711             if (bTrotter && ir->eI == eiVV)
1712             {
1713                 enerd->term[F_EKIN] = last_ekin;
1714             }
1715             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1716
1717             if (integratorHasConservedEnergyQuantity(ir))
1718             {
1719                 if (EI_VV(ir->eI))
1720                 {
1721                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1722                 }
1723                 else
1724                 {
1725                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1726                 }
1727             }
1728             /* #########  END PREPARING EDR OUTPUT  ###########  */
1729         }
1730
1731         /* Output stuff */
1732         if (MASTER(cr))
1733         {
1734             if (fplog && do_log && bDoExpanded)
1735             {
1736                 /* only needed if doing expanded ensemble */
1737                 PrintFreeEnergyInfoToFile(fplog,
1738                                           ir->fepvals,
1739                                           ir->expandedvals,
1740                                           ir->bSimTemp ? ir->simtempvals : nullptr,
1741                                           state_global->dfhist,
1742                                           state->fep_state,
1743                                           ir->nstlog,
1744                                           step);
1745             }
1746             if (bCalcEner)
1747             {
1748                 energyOutput.addDataAtEnergyStep(bDoDHDL,
1749                                                  bCalcEnerStep,
1750                                                  t,
1751                                                  mdatoms->tmass,
1752                                                  enerd,
1753                                                  ir->fepvals,
1754                                                  ir->expandedvals,
1755                                                  lastbox,
1756                                                  PTCouplingArrays{ state->boxv,
1757                                                                    state->nosehoover_xi,
1758                                                                    state->nosehoover_vxi,
1759                                                                    state->nhpres_xi,
1760                                                                    state->nhpres_vxi },
1761                                                  state->fep_state,
1762                                                  shake_vir,
1763                                                  force_vir,
1764                                                  total_vir,
1765                                                  pres,
1766                                                  ekind,
1767                                                  mu_tot,
1768                                                  constr);
1769             }
1770             else
1771             {
1772                 energyOutput.recordNonEnergyStep();
1773             }
1774
1775             gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1776             gmx_bool do_or = do_per_step(step, ir->nstorireout);
1777
1778             if (doSimulatedAnnealing)
1779             {
1780                 gmx::EnergyOutput::printAnnealingTemperatures(
1781                         do_log ? fplog : nullptr, groups, &(ir->opts));
1782             }
1783             if (do_log || do_ene || do_dr || do_or)
1784             {
1785                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1786                                                    do_ene,
1787                                                    do_dr,
1788                                                    do_or,
1789                                                    do_log ? fplog : nullptr,
1790                                                    step,
1791                                                    t,
1792                                                    fr->fcdata.get(),
1793                                                    awh.get());
1794             }
1795             if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1796             {
1797                 const bool isInitialOutput = false;
1798                 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1799             }
1800
1801             if (ir->bPull)
1802             {
1803                 pull_print_output(pull_work, step, t);
1804             }
1805
1806             if (do_per_step(step, ir->nstlog))
1807             {
1808                 if (fflush(fplog) != 0)
1809                 {
1810                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1811                 }
1812             }
1813         }
1814         if (bDoExpanded)
1815         {
1816             /* Have to do this part _after_ outputting the logfile and the edr file */
1817             /* Gets written into the state at the beginning of next loop*/
1818             state->fep_state = lamnew;
1819         }
1820         else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1821         {
1822             state->fep_state = awh->fepLambdaState();
1823         }
1824         /* Print the remaining wall clock time for the run */
1825         if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1826         {
1827             if (shellfc)
1828             {
1829                 fprintf(stderr, "\n");
1830             }
1831             print_time(stderr, walltime_accounting, step, ir, cr);
1832         }
1833
1834         /* Ion/water position swapping.
1835          * Not done in last step since trajectory writing happens before this call
1836          * in the MD loop and exchanges would be lost anyway. */
1837         bNeedRepartition = FALSE;
1838         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1839         {
1840             bNeedRepartition = do_swapcoords(cr,
1841                                              step,
1842                                              t,
1843                                              ir,
1844                                              swap,
1845                                              wcycle,
1846                                              as_rvec_array(state->x.data()),
1847                                              state->box,
1848                                              MASTER(cr) && mdrunOptions.verbose,
1849                                              bRerunMD);
1850
1851             if (bNeedRepartition && DOMAINDECOMP(cr))
1852             {
1853                 dd_collect_state(cr->dd, state, state_global);
1854             }
1855         }
1856
1857         /* Replica exchange */
1858         bExchanged = FALSE;
1859         if (bDoReplEx)
1860         {
1861             bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1862         }
1863
1864         if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1865         {
1866             dd_partition_system(fplog,
1867                                 mdlog,
1868                                 step,
1869                                 cr,
1870                                 TRUE,
1871                                 1,
1872                                 state_global,
1873                                 *top_global,
1874                                 ir,
1875                                 imdSession,
1876                                 pull_work,
1877                                 state,
1878                                 &f,
1879                                 mdAtoms,
1880                                 &top,
1881                                 fr,
1882                                 vsite,
1883                                 constr,
1884                                 nrnb,
1885                                 wcycle,
1886                                 FALSE);
1887             shouldCheckNumberOfBondedInteractions = true;
1888             upd.setNumAtoms(state->natoms);
1889         }
1890
1891         bFirstStep = FALSE;
1892         bInitStep  = FALSE;
1893
1894         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1895         /* With all integrators, except VV, we need to retain the pressure
1896          * at the current step for coupling at the next step.
1897          */
1898         if ((state->flags & (1U << estPRES_PREV))
1899             && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1900         {
1901             /* Store the pressure in t_state for pressure coupling
1902              * at the next MD step.
1903              */
1904             copy_mat(pres, state->pres_prev);
1905         }
1906
1907         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1908
1909         if ((membed != nullptr) && (!bLastStep))
1910         {
1911             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1912         }
1913
1914         cycles = wallcycle_stop(wcycle, ewcSTEP);
1915         if (DOMAINDECOMP(cr) && wcycle)
1916         {
1917             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1918         }
1919
1920         /* increase the MD step number */
1921         step++;
1922         step_rel++;
1923
1924 #if GMX_FAHCORE
1925         if (MASTER(cr))
1926         {
1927             fcReportProgress(ir->nsteps + ir->init_step, step);
1928         }
1929 #endif
1930
1931         resetHandler->resetCounters(
1932                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1933
1934         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1935         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1936     }
1937     /* End of main MD loop */
1938
1939     /* Closing TNG files can include compressing data. Therefore it is good to do that
1940      * before stopping the time measurements. */
1941     mdoutf_tng_close(outf);
1942
1943     /* Stop measuring walltime */
1944     walltime_accounting_end_time(walltime_accounting);
1945
1946     if (!thisRankHasDuty(cr, DUTY_PME))
1947     {
1948         /* Tell the PME only node to finish */
1949         gmx_pme_send_finish(cr);
1950     }
1951
1952     if (MASTER(cr))
1953     {
1954         if (ir->nstcalcenergy > 0)
1955         {
1956             energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1957
1958             gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1959             energyOutput.printAverages(fplog, groups);
1960         }
1961     }
1962     done_mdoutf(outf);
1963
1964     if (bPMETune)
1965     {
1966         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1967     }
1968
1969     done_shellfc(fplog, shellfc, step_rel);
1970
1971     if (useReplicaExchange && MASTER(cr))
1972     {
1973         print_replica_exchange_statistics(fplog, repl_ex);
1974     }
1975
1976     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1977
1978     global_stat_destroy(gstat);
1979 }