src/gromacs/mdrun/md.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the integrator for normal molecular dynamics simulations
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include <cinttypes>
  47 #include <cmath>
  48 #include <cstdio>
  49 #include <cstdlib>
  50
  51 #include <algorithm>
  52 #include <memory>
  53 #include <numeric>
  54
  55 #include "gromacs/applied_forces/awh/awh.h"
  56 #include "gromacs/commandline/filenm.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlbtiming.h"
  59 #include "gromacs/domdec/domdec.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/mdsetup.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/essentialdynamics/edsam.h"
  66 #include "gromacs/ewald/pme_load_balancing.h"
  67 #include "gromacs/ewald/pme_pp.h"
  68 #include "gromacs/fileio/trxio.h"
  69 #include "gromacs/gmxlib/network.h"
  70 #include "gromacs/gmxlib/nrnb.h"
  71 #include "gromacs/gpu_utils/device_stream_manager.h"
  72 #include "gromacs/gpu_utils/gpu_utils.h"
  73 #include "gromacs/imd/imd.h"
  74 #include "gromacs/listed_forces/listed_forces.h"
  75 #include "gromacs/math/functions.h"
  76 #include "gromacs/math/invertmatrix.h"
  77 #include "gromacs/math/vec.h"
  78 #include "gromacs/math/vectypes.h"
  79 #include "gromacs/mdlib/checkpointhandler.h"
  80 #include "gromacs/mdlib/compute_io.h"
  81 #include "gromacs/mdlib/constr.h"
  82 #include "gromacs/mdlib/coupling.h"
  83 #include "gromacs/mdlib/ebin.h"
  84 #include "gromacs/mdlib/enerdata_utils.h"
  85 #include "gromacs/mdlib/energyoutput.h"
  86 #include "gromacs/mdlib/expanded.h"
  87 #include "gromacs/mdlib/force.h"
  88 #include "gromacs/mdlib/force_flags.h"
  89 #include "gromacs/mdlib/forcerec.h"
  90 #include "gromacs/mdlib/freeenergyparameters.h"
  91 #include "gromacs/mdlib/md_support.h"
  92 #include "gromacs/mdlib/mdatoms.h"
  93 #include "gromacs/mdlib/mdoutf.h"
  94 #include "gromacs/mdlib/membed.h"
  95 #include "gromacs/mdlib/resethandler.h"
  96 #include "gromacs/mdlib/sighandler.h"
  97 #include "gromacs/mdlib/simulationsignal.h"
  98 #include "gromacs/mdlib/stat.h"
  99 #include "gromacs/mdlib/stophandler.h"
 100 #include "gromacs/mdlib/tgroup.h"
 101 #include "gromacs/mdlib/trajectory_writing.h"
 102 #include "gromacs/mdlib/update.h"
 103 #include "gromacs/mdlib/update_constrain_gpu.h"
 104 #include "gromacs/mdlib/update_vv.h"
 105 #include "gromacs/mdlib/vcm.h"
 106 #include "gromacs/mdlib/vsite.h"
 107 #include "gromacs/mdrunutility/handlerestart.h"
 108 #include "gromacs/mdrunutility/multisim.h"
 109 #include "gromacs/mdrunutility/printtime.h"
 110 #include "gromacs/mdtypes/awh_history.h"
 111 #include "gromacs/mdtypes/awh_params.h"
 112 #include "gromacs/mdtypes/commrec.h"
 113 #include "gromacs/mdtypes/df_history.h"
 114 #include "gromacs/mdtypes/energyhistory.h"
 115 #include "gromacs/mdtypes/fcdata.h"
 116 #include "gromacs/mdtypes/forcebuffers.h"
 117 #include "gromacs/mdtypes/forcerec.h"
 118 #include "gromacs/mdtypes/group.h"
 119 #include "gromacs/mdtypes/inputrec.h"
 120 #include "gromacs/mdtypes/interaction_const.h"
 121 #include "gromacs/mdtypes/md_enums.h"
 122 #include "gromacs/mdtypes/mdatom.h"
 123 #include "gromacs/mdtypes/mdrunoptions.h"
 124 #include "gromacs/mdtypes/multipletimestepping.h"
 125 #include "gromacs/mdtypes/observableshistory.h"
 126 #include "gromacs/mdtypes/pullhistory.h"
 127 #include "gromacs/mdtypes/simulation_workload.h"
 128 #include "gromacs/mdtypes/state.h"
 129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 130 #include "gromacs/modularsimulator/energydata.h"
 131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 132 #include "gromacs/nbnxm/nbnxm.h"
 133 #include "gromacs/pbcutil/pbc.h"
 134 #include "gromacs/pulling/output.h"
 135 #include "gromacs/pulling/pull.h"
 136 #include "gromacs/swap/swapcoords.h"
 137 #include "gromacs/timing/wallcycle.h"
 138 #include "gromacs/timing/walltime_accounting.h"
 139 #include "gromacs/topology/atoms.h"
 140 #include "gromacs/topology/idef.h"
 141 #include "gromacs/topology/mtop_util.h"
 142 #include "gromacs/topology/topology.h"
 143 #include "gromacs/trajectory/trajectoryframe.h"
 144 #include "gromacs/utility/basedefinitions.h"
 145 #include "gromacs/utility/cstringutil.h"
 146 #include "gromacs/utility/fatalerror.h"
 147 #include "gromacs/utility/logger.h"
 148 #include "gromacs/utility/real.h"
 149 #include "gromacs/utility/smalloc.h"
 150
 151 #include "legacysimulator.h"
 152 #include "replicaexchange.h"
 153 #include "shellfc.h"
 154
 155 using gmx::SimulationSignaller;
 156
 157 void gmx::LegacySimulator::do_md()
 158 {
 159     // TODO Historically, the EM and MD "integrators" used different
 160     // names for the t_inputrec *parameter, but these must have the
 161     // same name, now that it's a member of a struct. We use this ir
 162     // alias to avoid a large ripple of nearly useless changes.
 163     // t_inputrec is being replaced by IMdpOptionsProvider, so this
 164     // will go away eventually.
 165     t_inputrec*  ir = inputrec;
 166     int64_t      step, step_rel;
 167     double       t, t0 = ir->init_t;
 168     gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
 169     gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
 170     gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
 171     gmx_bool     do_ene, do_log, do_verbose;
 172     gmx_bool     bMasterState;
 173     unsigned int force_flags;
 174     tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
 175     int    i, m;
 176     rvec   mu_tot;
 177     matrix pressureCouplingMu, M;
 178     gmx_repl_ex_t     repl_ex = nullptr;
 179     gmx_global_stat_t gstat;
 180     gmx_shellfc_t*    shellfc;
 181     gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 182     gmx_bool          bTrotter;
 183     real              dvdl_constr;
 184     std::vector<RVec> cbuf;
 185     matrix            lastbox;
 186     int               lamnew = 0;
 187     /* for FEP */
 188     int       nstfep = 0;
 189     double    cycles;
 190     real      saved_conserved_quantity = 0;
 191     real      last_ekin                = 0;
 192     t_extmass MassQ;
 193     char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
 194
 195     /* PME load balancing data for GPU kernels */
 196     gmx_bool bPMETune         = FALSE;
 197     gmx_bool bPMETunePrinting = FALSE;
 198
 199     bool bInteractiveMDstep = false;
 200
 201     /* Domain decomposition could incorrectly miss a bonded
 202        interaction, but checking for that requires a global
 203        communication stage, which does not otherwise happen in DD
 204        code. So we do that alongside the first global energy reduction
 205        after a new DD is made. These variables handle whether the
 206        check happens, and the result it returns. */
 207     bool shouldCheckNumberOfBondedInteractions = false;
 208     int  totalNumberOfBondedInteractions       = -1;
 209
 210     SimulationSignals signals;
 211     // Most global communnication stages don't propagate mdrun
 212     // signals, and will use this object to achieve that.
 213     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
 214
 215     if (!mdrunOptions.writeConfout)
 216     {
 217         // This is on by default, and the main known use case for
 218         // turning it off is for convenience in benchmarking, which is
 219         // something that should not show up in the general user
 220         // interface.
 221         GMX_LOG(mdlog.info)
 222                 .asParagraph()
 223                 .appendText(
 224                         "The -noconfout functionality is deprecated, and may be removed in a "
 225                         "future version.");
 226     }
 227
 228     /* md-vv uses averaged full step velocities for T-control
 229        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 230        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 231     bTrotter = (EI_VV(ir->eI)
 232                 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
 233
 234     const bool bRerunMD = false;
 235
 236     int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
 237     bGStatEveryStep   = (nstglobalcomm == 1);
 238
 239     const SimulationGroups* groups = &top_global->groups;
 240
 241     std::unique_ptr<EssentialDynamics> ed = nullptr;
 242     if (opt2bSet("-ei", nfile, fnm))
 243     {
 244         /* Initialize essential dynamics sampling */
 245         ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global,
 246                         ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior);
 247     }
 248     else if (observablesHistory->edsamHistory)
 249     {
 250         gmx_fatal(FARGS,
 251                   "The checkpoint is from a run with essential dynamics sampling, "
 252                   "but the current run did not specify the -ei option. "
 253                   "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
 254     }
 255
 256     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
 257     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
 258     initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
 259     Update     upd(*ir, deform);
 260     const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
 261     const bool useReplicaExchange   = (replExParams.exchangeInterval > 0);
 262
 263     const t_fcdata& fcdata = *fr->fcdata;
 264
 265     bool simulationsShareState = false;
 266     int  nstSignalComm         = nstglobalcomm;
 267     {
 268         // TODO This implementation of ensemble orientation restraints is nasty because
 269         // a user can't just do multi-sim with single-sim orientation restraints.
 270         bool usingEnsembleRestraints =
 271                 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
 272         bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
 273
 274         // Replica exchange, ensemble restraints and AWH need all
 275         // simulations to remain synchronized, so they need
 276         // checkpoints and stop conditions to act on the same step, so
 277         // the propagation of such signals must take place between
 278         // simulations, not just within simulations.
 279         // TODO: Make algorithm initializers set these flags.
 280         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
 281
 282         if (simulationsShareState)
 283         {
 284             // Inter-simulation signal communication does not need to happen
 285             // often, so we use a minimum of 200 steps to reduce overhead.
 286             const int c_minimumInterSimulationSignallingInterval = 200;
 287             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
 288                             * nstglobalcomm;
 289         }
 290     }
 291
 292     if (startingBehavior != StartingBehavior::RestartWithAppending)
 293     {
 294         pleaseCiteCouplingAlgorithms(fplog, *ir);
 295     }
 296     gmx_mdoutf* outf =
 297             init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir,
 298                         top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms);
 299     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work,
 300                                    mdoutf_get_fp_dhdl(outf), false, startingBehavior,
 301                                    simulationsShareState, mdModulesNotifier);
 302
 303     gstat = global_stat_init(ir);
 304
 305     const auto& simulationWork     = runScheduleWork->simulationWork;
 306     const bool  useGpuForPme       = simulationWork.useGpuPme;
 307     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
 308     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
 309     const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
 310
 311     /* Check for polarizable models and flexible constraints */
 312     shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0,
 313                                  ir->nstcalcenergy, DOMAINDECOMP(cr), useGpuForPme);
 314
 315     {
 316         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
 317         if ((io > 2000) && MASTER(cr))
 318         {
 319             fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
 320         }
 321     }
 322
 323     // Local state only becomes valid now.
 324     std::unique_ptr<t_state> stateInstance;
 325     t_state*                 state;
 326
 327     gmx_localtop_t top(top_global->ffparams);
 328
 329     auto mdatoms = mdAtoms->mdatoms();
 330
 331     ForceBuffers f(fr->useMts, ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
 332                                        ? PinningPolicy::PinnedIfSupported
 333                                        : PinningPolicy::CannotBePinned);
 334     if (DOMAINDECOMP(cr))
 335     {
 336         stateInstance = std::make_unique<t_state>();
 337         state         = stateInstance.get();
 338         dd_init_local_state(cr->dd, state_global, state);
 339
 340         /* Distribute the charge groups over the nodes from the master node */
 341         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
 342                             imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
 343                             nrnb, nullptr, FALSE);
 344         shouldCheckNumberOfBondedInteractions = true;
 345         upd.setNumAtoms(state->natoms);
 346     }
 347     else
 348     {
 349         state_change_natoms(state_global, state_global->natoms);
 350         /* Copy the pointer to the global state */
 351         state = state_global;
 352
 353         /* Generate and initialize new topology */
 354         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
 355
 356         upd.setNumAtoms(state->natoms);
 357     }
 358
 359     std::unique_ptr<UpdateConstrainGpu> integrator;
 360
 361     StatePropagatorDataGpu* stateGpu = fr->stateGpu;
 362
 363     // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
 364     if (useGpuForUpdate)
 365     {
 366         GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
 367                                    || constr->numConstraintsTotal() == 0,
 368                            "Constraints in domain decomposition are only supported with update "
 369                            "groups if using GPU update.\n");
 370         GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
 371                                    || constr->numConstraintsTotal() == 0,
 372                            "SHAKE is not supported with GPU update.");
 373         GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
 374                            "Either PME or short-ranged non-bonded interaction tasks must run on "
 375                            "the GPU to use GPU update.\n");
 376         GMX_RELEASE_ASSERT(ir->eI == eiMD,
 377                            "Only the md integrator is supported with the GPU update.\n");
 378         GMX_RELEASE_ASSERT(
 379                 ir->etc != etcNOSEHOOVER,
 380                 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
 381         GMX_RELEASE_ASSERT(
 382                 ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN
 383                         || ir->epc == epcCRESCALE,
 384                 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
 385                 "with the GPU update.\n");
 386         GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
 387                            "Virtual sites are not supported with the GPU update.\n");
 388         GMX_RELEASE_ASSERT(ed == nullptr,
 389                            "Essential dynamics is not supported with the GPU update.\n");
 390         GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
 391                            "Constraints pulling is not supported with the GPU update.\n");
 392         GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
 393                            "Orientation restraints are not supported with the GPU update.\n");
 394         GMX_RELEASE_ASSERT(
 395                 ir->efep == efepNO
 396                         || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)),
 397                 "Free energy perturbation of masses and constraints are not supported with the GPU "
 398                 "update.");
 399
 400         if (constr != nullptr && constr->numConstraintsTotal() > 0)
 401         {
 402             GMX_LOG(mdlog.info)
 403                     .asParagraph()
 404                     .appendText("Updating coordinates and applying constraints on the GPU.");
 405         }
 406         else
 407         {
 408             GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
 409         }
 410         GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
 411                            "Device stream manager should be initialized in order to use GPU "
 412                            "update-constraints.");
 413         GMX_RELEASE_ASSERT(
 414                 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
 415                 "Update stream should be initialized in order to use GPU "
 416                 "update-constraints.");
 417         integrator = std::make_unique<UpdateConstrainGpu>(
 418                 *ir, *top_global, fr->deviceStreamManager->context(),
 419                 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
 420                 stateGpu->xUpdatedOnDevice(), wcycle);
 421
 422         integrator->setPbc(PbcType::Xyz, state->box);
 423     }
 424
 425     if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
 426     {
 427         changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
 428     }
 429     if (useGpuForUpdate)
 430     {
 431         changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
 432     }
 433
 434     // NOTE: The global state is no longer used at this point.
 435     // But state_global is still used as temporary storage space for writing
 436     // the global state to file and potentially for replica exchange.
 437     // (Global topology should persist.)
 438
 439     update_mdatoms(mdatoms, state->lambda[efptMASS]);
 440
 441     if (ir->bExpanded)
 442     {
 443         /* Check nstexpanded here, because the grompp check was broken */
 444         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
 445         {
 446             gmx_fatal(FARGS,
 447                       "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
 448         }
 449         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
 450     }
 451
 452     if (MASTER(cr))
 453     {
 454         EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
 455     }
 456
 457     preparePrevStepPullCom(ir, pull_work, mdatoms->massT, state, state_global, cr,
 458                            startingBehavior != StartingBehavior::NewSimulation);
 459
 460     // TODO: Remove this by converting AWH into a ForceProvider
 461     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms,
 462                                 startingBehavior != StartingBehavior::NewSimulation,
 463                                 shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work);
 464
 465     if (useReplicaExchange && MASTER(cr))
 466     {
 467         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
 468     }
 469     /* PME tuning is only supported in the Verlet scheme, with PME for
 470      * Coulomb. It is not supported with only LJ PME. */
 471     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
 472                 && ir->cutoff_scheme != ecutsGROUP);
 473
 474     pme_load_balancing_t* pme_loadbal = nullptr;
 475     if (bPMETune)
 476     {
 477         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata,
 478                          fr->nbv->useGpu());
 479     }
 480
 481     if (!ir->bContinuation)
 482     {
 483         if (state->flags & (1U << estV))
 484         {
 485             auto v = makeArrayRef(state->v);
 486             /* Set the velocities of vsites, shells and frozen atoms to zero */
 487             for (i = 0; i < mdatoms->homenr; i++)
 488             {
 489                 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
 490                 {
 491                     clear_rvec(v[i]);
 492                 }
 493                 else if (mdatoms->cFREEZE)
 494                 {
 495                     for (m = 0; m < DIM; m++)
 496                     {
 497                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 498                         {
 499                             v[i][m] = 0;
 500                         }
 501                     }
 502                 }
 503             }
 504         }
 505
 506         if (constr)
 507         {
 508             /* Constrain the initial coordinates and velocities */
 509             do_constrain_first(fplog, constr, ir, mdatoms->nr, mdatoms->homenr,
 510                                state->x.arrayRefWithPadding(), state->v.arrayRefWithPadding(),
 511                                state->box, state->lambda[efptBONDED]);
 512         }
 513         if (vsite)
 514         {
 515             /* Construct the virtual sites for the initial configuration */
 516             vsite->construct(state->x, ir->delta_t, {}, state->box);
 517         }
 518     }
 519
 520     if (ir->efep != efepNO)
 521     {
 522         /* Set free energy calculation frequency as the greatest common
 523          * denominator of nstdhdl and repl_ex_nst. */
 524         nstfep = ir->fepvals->nstdhdl;
 525         if (ir->bExpanded)
 526         {
 527             nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
 528         }
 529         if (useReplicaExchange)
 530         {
 531             nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
 532         }
 533         if (ir->bDoAwh)
 534         {
 535             nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep);
 536         }
 537     }
 538
 539     /* Be REALLY careful about what flags you set here. You CANNOT assume
 540      * this is the first step, since we might be restarting from a checkpoint,
 541      * and in that case we should not do any modifications to the state.
 542      */
 543     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
 544
 545     // When restarting from a checkpoint, it can be appropriate to
 546     // initialize ekind from quantities in the checkpoint. Otherwise,
 547     // compute_globals must initialize ekind before the simulation
 548     // starts/restarts. However, only the master rank knows what was
 549     // found in the checkpoint file, so we have to communicate in
 550     // order to coordinate the restart.
 551     //
 552     // TODO Consider removing this communication if/when checkpoint
 553     // reading directly follows .tpr reading, because all ranks can
 554     // agree on hasReadEkinState at that time.
 555     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
 556     if (PAR(cr))
 557     {
 558         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
 559     }
 560     if (hasReadEkinState)
 561     {
 562         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
 563     }
 564
 565     unsigned int cglo_flags =
 566             (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
 567              | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
 568
 569     bSumEkinhOld = FALSE;
 570
 571     t_vcm vcm(top_global->groups, *ir);
 572     reportComRemovalInfo(fplog, vcm);
 573
 574     /* To minimize communication, compute_globals computes the COM velocity
 575      * and the kinetic energy for the velocities without COM motion removed.
 576      * Thus to get the kinetic energy without the COM contribution, we need
 577      * to call compute_globals twice.
 578      */
 579     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
 580     {
 581         unsigned int cglo_flags_iteration = cglo_flags;
 582         if (bStopCM && cgloIteration == 0)
 583         {
 584             cglo_flags_iteration |= CGLO_STOPCM;
 585             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
 586         }
 587         compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 588                         makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr,
 589                         enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
 590                         state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
 591                         cglo_flags_iteration
 592                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
 593                                                                          : 0));
 594         if (cglo_flags_iteration & CGLO_STOPCM)
 595         {
 596             /* At initialization, do not pass x with acceleration-correction mode
 597              * to avoid (incorrect) correction of the initial coordinates.
 598              */
 599             auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
 600                                                                      : makeArrayRef(state->x);
 601             process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
 602             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
 603         }
 604     }
 605     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
 606                                     makeConstArrayRef(state->x), state->box,
 607                                     &shouldCheckNumberOfBondedInteractions);
 608     if (ir->eI == eiVVAK)
 609     {
 610         /* a second call to get the half step temperature initialized as well */
 611         /* we do the same call as above, but turn the pressure off -- internally to
 612            compute_globals, this is recognized as a velocity verlet half-step
 613            kinetic energy calculation.  This minimized excess variables, but
 614            perhaps loses some logic?*/
 615
 616         compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 617                         makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr,
 618                         enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
 619                         state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
 620     }
 621
 622     /* Calculate the initial half step temperature, and save the ekinh_old */
 623     if (startingBehavior == StartingBehavior::NewSimulation)
 624     {
 625         for (i = 0; (i < ir->opts.ngtc); i++)
 626         {
 627             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
 628         }
 629     }
 630
 631     /* need to make an initiation call to get the Trotter variables set, as well as other constants
 632        for non-trotter temperature control */
 633     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
 634
 635     if (MASTER(cr))
 636     {
 637         if (!ir->bContinuation)
 638         {
 639             if (constr && ir->eConstrAlg == econtLINCS)
 640             {
 641                 fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n",
 642                         constr->rmsd());
 643             }
 644             if (EI_STATE_VELOCITY(ir->eI))
 645             {
 646                 real temp = enerd->term[F_TEMP];
 647                 if (ir->eI != eiVV)
 648                 {
 649                     /* Result of Ekin averaged over velocities of -half
 650                      * and +half step, while we only have -half step here.
 651                      */
 652                     temp *= 2;
 653                 }
 654                 fprintf(fplog, "Initial temperature: %g K\n", temp);
 655             }
 656         }
 657
 658         char tbuf[20];
 659         fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
 660         if (ir->nsteps >= 0)
 661         {
 662             sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
 663         }
 664         else
 665         {
 666             sprintf(tbuf, "%s", "infinite");
 667         }
 668         if (ir->init_step > 0)
 669         {
 670             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 671                     gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf,
 672                     gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t);
 673         }
 674         else
 675         {
 676             fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
 677         }
 678         fprintf(fplog, "\n");
 679     }
 680
 681     walltime_accounting_start_time(walltime_accounting);
 682     wallcycle_start(wcycle, ewcRUN);
 683     print_start(fplog, cr, walltime_accounting, "mdrun");
 684
 685     /***********************************************************
 686      *
 687      *             Loop over MD steps
 688      *
 689      ************************************************************/
 690
 691     bFirstStep = TRUE;
 692     /* Skip the first Nose-Hoover integration when we get the state from tpx */
 693     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
 694     bSumEkinhOld     = FALSE;
 695     bExchanged       = FALSE;
 696     bNeedRepartition = FALSE;
 697
 698     step     = ir->init_step;
 699     step_rel = 0;
 700
 701     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
 702             compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
 703             MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
 704             mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
 705
 706     auto checkpointHandler = std::make_unique<CheckpointHandler>(
 707             compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), simulationsShareState,
 708             ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout,
 709             mdrunOptions.checkpointOptions.period);
 710
 711     const bool resetCountersIsLocal = true;
 712     auto       resetHandler         = std::make_unique<ResetHandler>(
 713             compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
 714             !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
 715             mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
 716
 717     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
 718
 719     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
 720     {
 721         logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
 722     }
 723
 724     /* and stop now if we should */
 725     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
 726     while (!bLastStep)
 727     {
 728
 729         /* Determine if this is a neighbor search step */
 730         bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
 731
 732         if (bPMETune && bNStList)
 733         {
 734             // This has to be here because PME load balancing is called so early.
 735             // TODO: Move to after all booleans are defined.
 736             if (useGpuForUpdate && !bFirstStep)
 737             {
 738                 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
 739                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
 740             }
 741             /* PME grid + cut-off optimization with GPUs or PME nodes */
 742             pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
 743                            fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel,
 744                            &bPMETunePrinting, simulationWork.useGpuPmePpCommunication);
 745         }
 746
 747         wallcycle_start(wcycle, ewcSTEP);
 748
 749         bLastStep = (step_rel == ir->nsteps);
 750         t         = t0 + step * ir->delta_t;
 751
 752         // TODO Refactor this, so that nstfep does not need a default value of zero
 753         if (ir->efep != efepNO || ir->bSimTemp)
 754         {
 755             /* find and set the current lambdas */
 756             state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
 757
 758             bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
 759             bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
 760             bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
 761                            && (!bFirstStep));
 762         }
 763
 764         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
 765                      && do_per_step(step, replExParams.exchangeInterval));
 766
 767         if (doSimulatedAnnealing)
 768         {
 769             update_annealing_target_temp(ir, t, &upd);
 770         }
 771
 772         /* Stop Center of Mass motion */
 773         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 774
 775         /* Determine whether or not to do Neighbour Searching */
 776         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
 777
 778         /* Note that the stopHandler will cause termination at nstglobalcomm
 779          * steps. Since this concides with nstcalcenergy, nsttcouple and/or
 780          * nstpcouple steps, we have computed the half-step kinetic energy
 781          * of the previous step and can always output energies at the last step.
 782          */
 783         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
 784
 785         /* do_log triggers energy and virial calculation. Because this leads
 786          * to different code paths, forces can be different. Thus for exact
 787          * continuation we should avoid extra log output.
 788          * Note that the || bLastStep can result in non-exact continuation
 789          * beyond the last step. But we don't consider that to be an issue.
 790          */
 791         do_log     = (do_per_step(step, ir->nstlog)
 792                   || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
 793         do_verbose = mdrunOptions.verbose
 794                      && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
 795
 796         if (useGpuForUpdate && !bFirstStep && bNS)
 797         {
 798             // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
 799             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
 800             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
 801             // Copy coordinate from the GPU when needed at the search step.
 802             // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
 803             // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
 804             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
 805             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
 806         }
 807
 808         if (bNS && !(bFirstStep && ir->bContinuation))
 809         {
 810             bMasterState = FALSE;
 811             /* Correct the new box if it is too skewed */
 812             if (inputrecDynamicBox(ir))
 813             {
 814                 if (correct_box(fplog, step, state->box))
 815                 {
 816                     bMasterState = TRUE;
 817                     // If update is offloaded, it should be informed about the box size change
 818                     if (useGpuForUpdate)
 819                     {
 820                         integrator->setPbc(PbcType::Xyz, state->box);
 821                     }
 822                 }
 823             }
 824             if (DOMAINDECOMP(cr) && bMasterState)
 825             {
 826                 dd_collect_state(cr->dd, state, state_global);
 827             }
 828
 829             if (DOMAINDECOMP(cr))
 830             {
 831                 /* Repartition the domain decomposition */
 832                 dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global,
 833                                     *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top,
 834                                     fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
 835                 shouldCheckNumberOfBondedInteractions = true;
 836                 upd.setNumAtoms(state->natoms);
 837             }
 838         }
 839
 840         // Allocate or re-size GPU halo exchange object, if necessary
 841         if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
 842         {
 843             GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
 844                                "GPU device manager has to be initialized to use GPU "
 845                                "version of halo exchange.");
 846             constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
 847         }
 848
 849         if (MASTER(cr) && do_log)
 850         {
 851             gmx::EnergyOutput::printHeader(fplog, step,
 852                                            t); /* can we improve the information printed here? */
 853         }
 854
 855         if (ir->efep != efepNO)
 856         {
 857             update_mdatoms(mdatoms, state->lambda[efptMASS]);
 858         }
 859
 860         if (bExchanged)
 861         {
 862             /* We need the kinetic energy at minus the half step for determining
 863              * the full step kinetic energy and possibly for T-coupling.*/
 864             /* This may not be quite working correctly yet . . . . */
 865             compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 866                             makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
 867                             enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller,
 868                             state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
 869                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
 870             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
 871                                             &top, makeConstArrayRef(state->x), state->box,
 872                                             &shouldCheckNumberOfBondedInteractions);
 873         }
 874         clear_mat(force_vir);
 875
 876         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
 877
 878         /* Determine the energy and pressure:
 879          * at nstcalcenergy steps and at energy output steps (set below).
 880          */
 881         if (EI_VV(ir->eI) && (!bInitStep))
 882         {
 883             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
 884             bCalcVir      = bCalcEnerStep
 885                        || (ir->epc != epcNO
 886                            && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
 887         }
 888         else
 889         {
 890             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
 891             bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
 892         }
 893         bCalcEner = bCalcEnerStep;
 894
 895         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 896
 897         if (do_ene || do_log || bDoReplEx)
 898         {
 899             bCalcVir  = TRUE;
 900             bCalcEner = TRUE;
 901         }
 902
 903         /* Do we need global communication ? */
 904         bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
 905                   || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
 906
 907         force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
 908                        | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
 909                        | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
 910         if (fr->useMts && !do_per_step(step, ir->nstfout))
 911         {
 912             force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
 913         }
 914
 915         if (shellfc)
 916         {
 917             /* Now is the time to relax the shells */
 918             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
 919                                 imdSession, pull_work, bNS, force_flags, &top, constr, enerd,
 920                                 state->natoms, state->x.arrayRefWithPadding(),
 921                                 state->v.arrayRefWithPadding(), state->box, state->lambda,
 922                                 &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc,
 923                                 fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
 924         }
 925         else
 926         {
 927             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
 928                is updated (or the AWH update will be performed twice for one step when continuing).
 929                It would be best to call this update function from do_md_trajectory_writing but that
 930                would occur after do_force. One would have to divide the update_awh function into one
 931                function applying the AWH force and one doing the AWH bias update. The update AWH
 932                bias function could then be called after do_md_trajectory_writing (then containing
 933                update_awh_history). The checkpointing will in the future probably moved to the start
 934                of the md loop which will rid of this issue. */
 935             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
 936             {
 937                 awh->updateHistory(state_global->awhHistory.get());
 938             }
 939
 940             /* The coordinates (x) are shifted (to get whole molecules)
 941              * in do_force.
 942              * This is parallellized as well, and does communication too.
 943              * Check comments in sim_util.c
 944              */
 945             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
 946                      nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
 947                      &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork,
 948                      vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
 949                      (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
 950         }
 951
 952         // VV integrators do not need the following velocity half step
 953         // if it is the first step after starting from a checkpoint.
 954         // That is, the half step is needed on all other steps, and
 955         // also the first step when starting from a .tpr file.
 956         if (EI_VV(ir->eI))
 957         {
 958             integrateVVFirstStep(step, bFirstStep, bInitStep, startingBehavior, nstglobalcomm, ir,
 959                                  fr, cr, state, mdatoms, fcdata, &MassQ, &vcm, top_global, top, enerd,
 960                                  ekind, gstat, &last_ekin, bCalcVir, total_vir, shake_vir, force_vir,
 961                                  pres, M, do_log, do_ene, bCalcEner, bGStat, bStopCM, bTrotter,
 962                                  bExchanged, &bSumEkinhOld, &shouldCheckNumberOfBondedInteractions,
 963                                  &saved_conserved_quantity, &f, &upd, constr, &nullSignaller,
 964                                  trotter_seq, nrnb, mdlog, fplog, wcycle);
 965         }
 966
 967         /* ########  END FIRST UPDATE STEP  ############## */
 968         /* ########  If doing VV, we now have v(dt) ###### */
 969         if (bDoExpanded)
 970         {
 971             /* perform extended ensemble sampling in lambda - we don't
 972                actually move to the new state before outputting
 973                statistics, but if performing simulated tempering, we
 974                do update the velocities and the tau_t. */
 975
 976             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state,
 977                                               state->dfhist, step, state->v.rvec_array(), mdatoms);
 978             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
 979             if (MASTER(cr))
 980             {
 981                 copy_df_history(state_global->dfhist, state->dfhist);
 982             }
 983         }
 984
 985         // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
 986         // coordinates have not already been copied for i) search or ii) CPU force tasks.
 987         if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
 988             && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
 989                 || checkpointHandler->isCheckpointingStep()))
 990         {
 991             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
 992             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
 993         }
 994         // Copy velocities if needed for the output/checkpointing.
 995         // NOTE: Copy on the search steps is done at the beginning of the step.
 996         if (useGpuForUpdate && !bNS
 997             && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
 998         {
 999             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1000             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1001         }
1002         // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1003         // and update is offloaded hence forces are kept on the GPU for update and have not been
1004         // already transferred in do_force().
1005         // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1006         //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
1007         //       prior to GPU update.
1008         // TODO: When the output flags will be included in step workload, this copy can be combined with the
1009         //       copy call in do_force(...).
1010         // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1011         //       on host after the D2H copy in do_force(...).
1012         if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1013             && do_per_step(step, ir->nstfout))
1014         {
1015             stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1016             stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1017         }
1018         /* Now we have the energies and forces corresponding to the
1019          * coordinates at time t. We must output all of this before
1020          * the update.
1021          */
1022         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
1023                                  observablesHistory, top_global, fr, outf, energyOutput, ekind,
1024                                  f.view().force(), checkpointHandler->isCheckpointingStep(),
1025                                  bRerunMD, bLastStep, mdrunOptions.writeConfout, bSumEkinhOld);
1026         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1027         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1028
1029         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1030         if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1031             && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1032         {
1033             copy_mat(state->svir_prev, shake_vir);
1034             copy_mat(state->fvir_prev, force_vir);
1035         }
1036
1037         stopHandler->setSignal();
1038         resetHandler->setSignal(walltime_accounting);
1039
1040         if (bGStat || !PAR(cr))
1041         {
1042             /* In parallel we only have to check for checkpointing in steps
1043              * where we do global communication,
1044              *  otherwise the other nodes don't know.
1045              */
1046             checkpointHandler->setSignal(walltime_accounting);
1047         }
1048
1049         /* #########   START SECOND UPDATE STEP ################# */
1050
1051         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1052            controlled in preprocessing */
1053
1054         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1055         {
1056             gmx_bool bIfRandomize;
1057             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1058             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1059             if (constr && bIfRandomize)
1060             {
1061                 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1062             }
1063         }
1064         /* Box is changed in update() when we do pressure coupling,
1065          * but we should still use the old box for energy corrections and when
1066          * writing it to the energy file, so it matches the trajectory files for
1067          * the same timestep above. Make a copy in a separate array.
1068          */
1069         copy_mat(state->box, lastbox);
1070
1071         dvdl_constr = 0;
1072
1073         if (!useGpuForUpdate)
1074         {
1075             wallcycle_start(wcycle, ewcUPDATE);
1076         }
1077         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1078         if (bTrotter)
1079         {
1080             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1081             /* We can only do Berendsen coupling after we have summed
1082              * the kinetic energy or virial. Since the happens
1083              * in global_state after update, we should only do it at
1084              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1085              */
1086         }
1087         else
1088         {
1089             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1090             update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1091         }
1092
1093         /* With leap-frog type integrators we compute the kinetic energy
1094          * at a whole time step as the average of the half-time step kinetic
1095          * energies of two subsequent steps. Therefore we need to compute the
1096          * half step kinetic energy also if we need energies at the next step.
1097          */
1098         const bool needHalfStepKineticEnergy =
1099                 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1100
1101         // Parrinello-Rahman requires the pressure to be availible before the update to compute
1102         // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1103         const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
1104                                          && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1105
1106         if (EI_VV(ir->eI))
1107         {
1108             GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1109
1110             integrateVVSecondStep(step, ir, fr, cr, state, mdatoms, fcdata, &MassQ, &vcm, pull_work,
1111                                   enerd, ekind, gstat, &dvdl_constr, bCalcVir, total_vir, shake_vir,
1112                                   force_vir, pres, M, lastbox, do_log, do_ene, bGStat, &bSumEkinhOld,
1113                                   &f, &cbuf, &upd, constr, &nullSignaller, trotter_seq, nrnb, wcycle);
1114         }
1115         else
1116         {
1117             if (useGpuForUpdate)
1118             {
1119
1120                 wallcycle_stop(wcycle, ewcUPDATE);
1121
1122                 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1123                 {
1124                     integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
1125                                     stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc);
1126
1127                     // Copy data to the GPU after buffers might have being reinitialized
1128                     stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1129                     stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1130                 }
1131
1132                 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1133                     && !thisRankHasDuty(cr, DUTY_PME))
1134                 {
1135                     // The PME forces were recieved to the host, so have to be copied
1136                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1137                 }
1138                 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1139                 {
1140                     // The buffer ops were not offloaded this step, so the forces are on the
1141                     // host and have to be copied
1142                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1143                 }
1144
1145                 const bool doTemperatureScaling =
1146                         (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1147
1148                 // This applies Leap-Frog, LINCS and SETTLE in succession
1149                 integrator->integrate(
1150                         stateGpu->getForcesReadyOnDeviceEvent(
1151                                 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1152                         ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling, ekind->tcstat,
1153                         doParrinelloRahman, ir->nstpcouple * ir->delta_t, M);
1154
1155                 // Copy velocities D2H after update if:
1156                 // - Globals are computed this step (includes the energy output steps).
1157                 // - Temperature is needed for the next step.
1158                 if (bGStat || needHalfStepKineticEnergy)
1159                 {
1160                     stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1161                     stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1162                 }
1163             }
1164             else
1165             {
1166                 /* With multiple time stepping we need to do an additional normal
1167                  * update step to obtain the virial, as the actual MTS integration
1168                  * using an acceleration where the slow forces are multiplied by mtsFactor.
1169                  * Using that acceleration would result in a virial with the slow
1170                  * force contribution would be a factor mtsFactor too large.
1171                  */
1172                 if (fr->useMts && bCalcVir && constr != nullptr)
1173                 {
1174                     upd.update_for_constraint_virial(*ir, *mdatoms, *state,
1175                                                      f.view().forceWithPadding(), *ekind);
1176
1177                     constrain_coordinates(constr, do_log, do_ene, step, state,
1178                                           upd.xp()->arrayRefWithPadding(), &dvdl_constr, bCalcVir,
1179                                           shake_vir);
1180                 }
1181
1182                 ArrayRefWithPadding<const RVec> forceCombined =
1183                         (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1184                                 ? f.view().forceMtsCombinedWithPadding()
1185                                 : f.view().forceWithPadding();
1186                 upd.update_coords(*ir, step, mdatoms, state, forceCombined, fcdata, ekind, M,
1187                                   etrtPOSITION, cr, constr != nullptr);
1188
1189                 wallcycle_stop(wcycle, ewcUPDATE);
1190
1191                 constrain_coordinates(constr, do_log, do_ene, step, state,
1192                                       upd.xp()->arrayRefWithPadding(), &dvdl_constr,
1193                                       bCalcVir && !fr->useMts, shake_vir);
1194
1195                 upd.update_sd_second_half(*ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle,
1196                                           constr, do_log, do_ene);
1197                 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1198             }
1199
1200             if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1201             {
1202                 updatePrevStepPullCom(pull_work, state);
1203             }
1204
1205             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1206         }
1207
1208         if (vsite != nullptr)
1209         {
1210             wallcycle_start(wcycle, ewcVSITECONSTR);
1211             vsite->construct(state->x, ir->delta_t, state->v, state->box);
1212             wallcycle_stop(wcycle, ewcVSITECONSTR);
1213         }
1214
1215         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1216         /* With Leap-Frog we can skip compute_globals at
1217          * non-communication steps, but we need to calculate
1218          * the kinetic energy one step before communication.
1219          */
1220         {
1221             // Organize to do inter-simulation signalling on steps if
1222             // and when algorithms require it.
1223             const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1224
1225             if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1226             {
1227                 // Copy coordinates when needed to stop the CM motion.
1228                 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1229                 {
1230                     stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1231                     stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1232                 }
1233                 // Since we're already communicating at this step, we
1234                 // can propagate intra-simulation signals. Note that
1235                 // check_nstglobalcomm has the responsibility for
1236                 // choosing the value of nstglobalcomm that is one way
1237                 // bGStat becomes true, so we can't get into a
1238                 // situation where e.g. checkpointing can't be
1239                 // signalled.
1240                 bool                doIntraSimSignal = true;
1241                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1242
1243                 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1244                                 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm,
1245                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
1246                                 &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1247                                 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1248                                         | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1249                                         | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1250                                         | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1251                                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1252                                                                                  : 0));
1253                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1254                                                 top_global, &top, makeConstArrayRef(state->x),
1255                                                 state->box, &shouldCheckNumberOfBondedInteractions);
1256                 if (!EI_VV(ir->eI) && bStopCM)
1257                 {
1258                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1259                                            makeArrayRef(state->v));
1260                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1261
1262                     // TODO: The special case of removing CM motion should be dealt more gracefully
1263                     if (useGpuForUpdate)
1264                     {
1265                         stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1266                         // Here we block until the H2D copy completes because event sync with the
1267                         // force kernels that use the coordinates on the next steps is not implemented
1268                         // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1269                         stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1270                         // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1271                         if (vcm.mode != ecmNO)
1272                         {
1273                             stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1274                         }
1275                     }
1276                 }
1277             }
1278         }
1279
1280         /* #############  END CALC EKIN AND PRESSURE ################# */
1281
1282         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1283            the virial that should probably be addressed eventually. state->veta has better properies,
1284            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1285            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1286
1287         if (ir->efep != efepNO && !EI_VV(ir->eI))
1288         {
1289             /* Sum up the foreign energy and dK/dl terms for md and sd.
1290                Currently done every step so that dH/dl is correct in the .edr */
1291             accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1292         }
1293
1294         update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
1295                                          pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1296
1297         const bool doBerendsenPressureCoupling =
1298                 (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
1299         const bool doCRescalePressureCoupling =
1300                 (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple));
1301         if (useGpuForUpdate
1302             && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1303         {
1304             integrator->scaleCoordinates(pressureCouplingMu);
1305             if (doCRescalePressureCoupling)
1306             {
1307                 matrix pressureCouplingInvMu;
1308                 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1309                 integrator->scaleVelocities(pressureCouplingInvMu);
1310             }
1311             integrator->setPbc(PbcType::Xyz, state->box);
1312         }
1313
1314         /* ################# END UPDATE STEP 2 ################# */
1315         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1316
1317         /* The coordinates (x) were unshifted in update */
1318         if (!bGStat)
1319         {
1320             /* We will not sum ekinh_old,
1321              * so signal that we still have to do it.
1322              */
1323             bSumEkinhOld = TRUE;
1324         }
1325
1326         if (bCalcEner)
1327         {
1328             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1329
1330             /* use the directly determined last velocity, not actually the averaged half steps */
1331             if (bTrotter && ir->eI == eiVV)
1332             {
1333                 enerd->term[F_EKIN] = last_ekin;
1334             }
1335             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1336
1337             if (integratorHasConservedEnergyQuantity(ir))
1338             {
1339                 if (EI_VV(ir->eI))
1340                 {
1341                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1342                 }
1343                 else
1344                 {
1345                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1346                 }
1347             }
1348             /* #########  END PREPARING EDR OUTPUT  ###########  */
1349         }
1350
1351         /* Output stuff */
1352         if (MASTER(cr))
1353         {
1354             if (fplog && do_log && bDoExpanded)
1355             {
1356                 /* only needed if doing expanded ensemble */
1357                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals,
1358                                           ir->bSimTemp ? ir->simtempvals : nullptr,
1359                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1360             }
1361             if (bCalcEner)
1362             {
1363                 energyOutput.addDataAtEnergyStep(
1364                         bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals,
1365                         ir->expandedvals, lastbox,
1366                         PTCouplingArrays{ state->boxv, state->nosehoover_xi, state->nosehoover_vxi,
1367                                           state->nhpres_xi, state->nhpres_vxi },
1368                         state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr);
1369             }
1370             else
1371             {
1372                 energyOutput.recordNonEnergyStep();
1373             }
1374
1375             gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1376             gmx_bool do_or = do_per_step(step, ir->nstorireout);
1377
1378             if (doSimulatedAnnealing)
1379             {
1380                 gmx::EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups,
1381                                                               &(ir->opts));
1382             }
1383             if (do_log || do_ene || do_dr || do_or)
1384             {
1385                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1386                                                    do_log ? fplog : nullptr, step, t,
1387                                                    fr->fcdata.get(), awh.get());
1388             }
1389             if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1390             {
1391                 const bool isInitialOutput = false;
1392                 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1393             }
1394
1395             if (ir->bPull)
1396             {
1397                 pull_print_output(pull_work, step, t);
1398             }
1399
1400             if (do_per_step(step, ir->nstlog))
1401             {
1402                 if (fflush(fplog) != 0)
1403                 {
1404                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1405                 }
1406             }
1407         }
1408         if (bDoExpanded)
1409         {
1410             /* Have to do this part _after_ outputting the logfile and the edr file */
1411             /* Gets written into the state at the beginning of next loop*/
1412             state->fep_state = lamnew;
1413         }
1414         else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1415         {
1416             state->fep_state = awh->fepLambdaState();
1417         }
1418         /* Print the remaining wall clock time for the run */
1419         if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1420         {
1421             if (shellfc)
1422             {
1423                 fprintf(stderr, "\n");
1424             }
1425             print_time(stderr, walltime_accounting, step, ir, cr);
1426         }
1427
1428         /* Ion/water position swapping.
1429          * Not done in last step since trajectory writing happens before this call
1430          * in the MD loop and exchanges would be lost anyway. */
1431         bNeedRepartition = FALSE;
1432         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1433         {
1434             bNeedRepartition =
1435                     do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()),
1436                                   state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD);
1437
1438             if (bNeedRepartition && DOMAINDECOMP(cr))
1439             {
1440                 dd_collect_state(cr->dd, state, state_global);
1441             }
1442         }
1443
1444         /* Replica exchange */
1445         bExchanged = FALSE;
1446         if (bDoReplEx)
1447         {
1448             bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1449         }
1450
1451         if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1452         {
1453             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir,
1454                                 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
1455                                 nrnb, wcycle, FALSE);
1456             shouldCheckNumberOfBondedInteractions = true;
1457             upd.setNumAtoms(state->natoms);
1458         }
1459
1460         bFirstStep = FALSE;
1461         bInitStep  = FALSE;
1462
1463         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1464         /* With all integrators, except VV, we need to retain the pressure
1465          * at the current step for coupling at the next step.
1466          */
1467         if ((state->flags & (1U << estPRES_PREV))
1468             && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1469         {
1470             /* Store the pressure in t_state for pressure coupling
1471              * at the next MD step.
1472              */
1473             copy_mat(pres, state->pres_prev);
1474         }
1475
1476         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1477
1478         if ((membed != nullptr) && (!bLastStep))
1479         {
1480             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1481         }
1482
1483         cycles = wallcycle_stop(wcycle, ewcSTEP);
1484         if (DOMAINDECOMP(cr) && wcycle)
1485         {
1486             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1487         }
1488
1489         /* increase the MD step number */
1490         step++;
1491         step_rel++;
1492
1493 #if GMX_FAHCORE
1494         if (MASTER(cr))
1495         {
1496             fcReportProgress(ir->nsteps + ir->init_step, step);
1497         }
1498 #endif
1499
1500         resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
1501                                     fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1502
1503         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1504         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1505     }
1506     /* End of main MD loop */
1507
1508     /* Closing TNG files can include compressing data. Therefore it is good to do that
1509      * before stopping the time measurements. */
1510     mdoutf_tng_close(outf);
1511
1512     /* Stop measuring walltime */
1513     walltime_accounting_end_time(walltime_accounting);
1514
1515     if (!thisRankHasDuty(cr, DUTY_PME))
1516     {
1517         /* Tell the PME only node to finish */
1518         gmx_pme_send_finish(cr);
1519     }
1520
1521     if (MASTER(cr))
1522     {
1523         if (ir->nstcalcenergy > 0)
1524         {
1525             energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1526
1527             gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1528             energyOutput.printAverages(fplog, groups);
1529         }
1530     }
1531     done_mdoutf(outf);
1532
1533     if (bPMETune)
1534     {
1535         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1536     }
1537
1538     done_shellfc(fplog, shellfc, step_rel);
1539
1540     if (useReplicaExchange && MASTER(cr))
1541     {
1542         print_replica_exchange_statistics(fplog, repl_ex);
1543     }
1544
1545     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1546
1547     global_stat_destroy(gstat);
1548 }