Merge branch release-2021 into master
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53 #include <numeric>
54
55 #include "gromacs/applied_forces/awh/awh.h"
56 #include "gromacs/commandline/filenm.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlbtiming.h"
59 #include "gromacs/domdec/domdec.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/domdec_struct.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/mdsetup.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/essentialdynamics/edsam.h"
66 #include "gromacs/ewald/pme_load_balancing.h"
67 #include "gromacs/ewald/pme_pp.h"
68 #include "gromacs/fileio/trxio.h"
69 #include "gromacs/gmxlib/network.h"
70 #include "gromacs/gmxlib/nrnb.h"
71 #include "gromacs/gpu_utils/device_stream_manager.h"
72 #include "gromacs/gpu_utils/gpu_utils.h"
73 #include "gromacs/imd/imd.h"
74 #include "gromacs/listed_forces/listed_forces.h"
75 #include "gromacs/math/functions.h"
76 #include "gromacs/math/invertmatrix.h"
77 #include "gromacs/math/vec.h"
78 #include "gromacs/math/vectypes.h"
79 #include "gromacs/mdlib/checkpointhandler.h"
80 #include "gromacs/mdlib/compute_io.h"
81 #include "gromacs/mdlib/constr.h"
82 #include "gromacs/mdlib/coupling.h"
83 #include "gromacs/mdlib/ebin.h"
84 #include "gromacs/mdlib/enerdata_utils.h"
85 #include "gromacs/mdlib/energyoutput.h"
86 #include "gromacs/mdlib/expanded.h"
87 #include "gromacs/mdlib/force.h"
88 #include "gromacs/mdlib/force_flags.h"
89 #include "gromacs/mdlib/forcerec.h"
90 #include "gromacs/mdlib/freeenergyparameters.h"
91 #include "gromacs/mdlib/md_support.h"
92 #include "gromacs/mdlib/mdatoms.h"
93 #include "gromacs/mdlib/mdoutf.h"
94 #include "gromacs/mdlib/membed.h"
95 #include "gromacs/mdlib/resethandler.h"
96 #include "gromacs/mdlib/sighandler.h"
97 #include "gromacs/mdlib/simulationsignal.h"
98 #include "gromacs/mdlib/stat.h"
99 #include "gromacs/mdlib/stophandler.h"
100 #include "gromacs/mdlib/tgroup.h"
101 #include "gromacs/mdlib/trajectory_writing.h"
102 #include "gromacs/mdlib/update.h"
103 #include "gromacs/mdlib/update_constrain_gpu.h"
104 #include "gromacs/mdlib/update_vv.h"
105 #include "gromacs/mdlib/vcm.h"
106 #include "gromacs/mdlib/vsite.h"
107 #include "gromacs/mdrunutility/handlerestart.h"
108 #include "gromacs/mdrunutility/multisim.h"
109 #include "gromacs/mdrunutility/printtime.h"
110 #include "gromacs/mdtypes/awh_history.h"
111 #include "gromacs/mdtypes/awh_params.h"
112 #include "gromacs/mdtypes/commrec.h"
113 #include "gromacs/mdtypes/df_history.h"
114 #include "gromacs/mdtypes/energyhistory.h"
115 #include "gromacs/mdtypes/fcdata.h"
116 #include "gromacs/mdtypes/forcebuffers.h"
117 #include "gromacs/mdtypes/forcerec.h"
118 #include "gromacs/mdtypes/group.h"
119 #include "gromacs/mdtypes/inputrec.h"
120 #include "gromacs/mdtypes/interaction_const.h"
121 #include "gromacs/mdtypes/md_enums.h"
122 #include "gromacs/mdtypes/mdatom.h"
123 #include "gromacs/mdtypes/mdrunoptions.h"
124 #include "gromacs/mdtypes/multipletimestepping.h"
125 #include "gromacs/mdtypes/observableshistory.h"
126 #include "gromacs/mdtypes/pullhistory.h"
127 #include "gromacs/mdtypes/simulation_workload.h"
128 #include "gromacs/mdtypes/state.h"
129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
130 #include "gromacs/modularsimulator/energydata.h"
131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
132 #include "gromacs/nbnxm/nbnxm.h"
133 #include "gromacs/pbcutil/pbc.h"
134 #include "gromacs/pulling/output.h"
135 #include "gromacs/pulling/pull.h"
136 #include "gromacs/swap/swapcoords.h"
137 #include "gromacs/timing/wallcycle.h"
138 #include "gromacs/timing/walltime_accounting.h"
139 #include "gromacs/topology/atoms.h"
140 #include "gromacs/topology/idef.h"
141 #include "gromacs/topology/mtop_util.h"
142 #include "gromacs/topology/topology.h"
143 #include "gromacs/trajectory/trajectoryframe.h"
144 #include "gromacs/utility/basedefinitions.h"
145 #include "gromacs/utility/cstringutil.h"
146 #include "gromacs/utility/fatalerror.h"
147 #include "gromacs/utility/logger.h"
148 #include "gromacs/utility/real.h"
149 #include "gromacs/utility/smalloc.h"
150
151 #include "legacysimulator.h"
152 #include "replicaexchange.h"
153 #include "shellfc.h"
154
155 using gmx::SimulationSignaller;
156
157 void gmx::LegacySimulator::do_md()
158 {
159     // TODO Historically, the EM and MD "integrators" used different
160     // names for the t_inputrec *parameter, but these must have the
161     // same name, now that it's a member of a struct. We use this ir
162     // alias to avoid a large ripple of nearly useless changes.
163     // t_inputrec is being replaced by IMdpOptionsProvider, so this
164     // will go away eventually.
165     const t_inputrec* ir = inputrec;
166
167     int64_t      step, step_rel;
168     double       t, t0 = ir->init_t;
169     gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
170     gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
171     gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
172     gmx_bool     do_ene, do_log, do_verbose;
173     gmx_bool     bMasterState;
174     unsigned int force_flags;
175     tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
176     int    i, m;
177     rvec   mu_tot;
178     matrix pressureCouplingMu, M;
179     gmx_repl_ex_t     repl_ex = nullptr;
180     gmx_global_stat_t gstat;
181     gmx_shellfc_t*    shellfc;
182     gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183     gmx_bool          bTrotter;
184     real              dvdl_constr;
185     std::vector<RVec> cbuf;
186     matrix            lastbox;
187     int               lamnew = 0;
188     /* for FEP */
189     int       nstfep = 0;
190     double    cycles;
191     real      saved_conserved_quantity = 0;
192     real      last_ekin                = 0;
193     t_extmass MassQ;
194     char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
195
196     /* PME load balancing data for GPU kernels */
197     gmx_bool bPMETune         = FALSE;
198     gmx_bool bPMETunePrinting = FALSE;
199
200     bool bInteractiveMDstep = false;
201
202     /* Domain decomposition could incorrectly miss a bonded
203        interaction, but checking for that requires a global
204        communication stage, which does not otherwise happen in DD
205        code. So we do that alongside the first global energy reduction
206        after a new DD is made. These variables handle whether the
207        check happens, and the result it returns. */
208     bool shouldCheckNumberOfBondedInteractions = false;
209     int  totalNumberOfBondedInteractions       = -1;
210
211     SimulationSignals signals;
212     // Most global communnication stages don't propagate mdrun
213     // signals, and will use this object to achieve that.
214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
215
216     if (!mdrunOptions.writeConfout)
217     {
218         // This is on by default, and the main known use case for
219         // turning it off is for convenience in benchmarking, which is
220         // something that should not show up in the general user
221         // interface.
222         GMX_LOG(mdlog.info)
223                 .asParagraph()
224                 .appendText(
225                         "The -noconfout functionality is deprecated, and may be removed in a "
226                         "future version.");
227     }
228
229     /* md-vv uses averaged full step velocities for T-control
230        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
231        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
232     bTrotter = (EI_VV(ir->eI)
233                 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
234
235     const bool bRerunMD = false;
236
237     int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
238     bGStatEveryStep   = (nstglobalcomm == 1);
239
240     const SimulationGroups* groups = &top_global->groups;
241
242     std::unique_ptr<EssentialDynamics> ed = nullptr;
243     if (opt2bSet("-ei", nfile, fnm))
244     {
245         /* Initialize essential dynamics sampling */
246         ed = init_edsam(mdlog,
247                         opt2fn_null("-ei", nfile, fnm),
248                         opt2fn("-eo", nfile, fnm),
249                         top_global,
250                         ir,
251                         cr,
252                         constr,
253                         state_global,
254                         observablesHistory,
255                         oenv,
256                         startingBehavior);
257     }
258     else if (observablesHistory->edsamHistory)
259     {
260         gmx_fatal(FARGS,
261                   "The checkpoint is from a run with essential dynamics sampling, "
262                   "but the current run did not specify the -ei option. "
263                   "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
264     }
265
266     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
267     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
268     initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
269     Update upd(*ir, deform);
270     bool   doSimulatedAnnealing = false;
271     {
272         // TODO: Avoid changing inputrec (#3854)
273         // Simulated annealing updates the reference temperature.
274         auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
275         doSimulatedAnnealing   = initSimulatedAnnealing(nonConstInputrec, &upd);
276     }
277     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
278
279     const t_fcdata& fcdata = *fr->fcdata;
280
281     bool simulationsShareState = false;
282     int  nstSignalComm         = nstglobalcomm;
283     {
284         // TODO This implementation of ensemble orientation restraints is nasty because
285         // a user can't just do multi-sim with single-sim orientation restraints.
286         bool usingEnsembleRestraints =
287                 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
288         bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
289
290         // Replica exchange, ensemble restraints and AWH need all
291         // simulations to remain synchronized, so they need
292         // checkpoints and stop conditions to act on the same step, so
293         // the propagation of such signals must take place between
294         // simulations, not just within simulations.
295         // TODO: Make algorithm initializers set these flags.
296         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
297
298         if (simulationsShareState)
299         {
300             // Inter-simulation signal communication does not need to happen
301             // often, so we use a minimum of 200 steps to reduce overhead.
302             const int c_minimumInterSimulationSignallingInterval = 200;
303             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
304                             * nstglobalcomm;
305         }
306     }
307
308     if (startingBehavior != StartingBehavior::RestartWithAppending)
309     {
310         pleaseCiteCouplingAlgorithms(fplog, *ir);
311     }
312     gmx_mdoutf*       outf = init_mdoutf(fplog,
313                                    nfile,
314                                    fnm,
315                                    mdrunOptions,
316                                    cr,
317                                    outputProvider,
318                                    mdModulesNotifier,
319                                    ir,
320                                    top_global,
321                                    oenv,
322                                    wcycle,
323                                    startingBehavior,
324                                    simulationsShareState,
325                                    ms);
326     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
327                                    top_global,
328                                    ir,
329                                    pull_work,
330                                    mdoutf_get_fp_dhdl(outf),
331                                    false,
332                                    startingBehavior,
333                                    simulationsShareState,
334                                    mdModulesNotifier);
335
336     gstat = global_stat_init(ir);
337
338     const auto& simulationWork     = runScheduleWork->simulationWork;
339     const bool  useGpuForPme       = simulationWork.useGpuPme;
340     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
341     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
342     const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
343
344     /* Check for polarizable models and flexible constraints */
345     shellfc = init_shell_flexcon(fplog,
346                                  top_global,
347                                  constr ? constr->numFlexibleConstraints() : 0,
348                                  ir->nstcalcenergy,
349                                  DOMAINDECOMP(cr),
350                                  useGpuForPme);
351
352     {
353         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
354         if ((io > 2000) && MASTER(cr))
355         {
356             fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
357         }
358     }
359
360     // Local state only becomes valid now.
361     std::unique_ptr<t_state> stateInstance;
362     t_state*                 state;
363
364     gmx_localtop_t top(top_global->ffparams);
365
366     auto mdatoms = mdAtoms->mdatoms();
367
368     ForceBuffers f(fr->useMts,
369                    ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
370                            ? PinningPolicy::PinnedIfSupported
371                            : PinningPolicy::CannotBePinned);
372     if (DOMAINDECOMP(cr))
373     {
374         stateInstance = std::make_unique<t_state>();
375         state         = stateInstance.get();
376         dd_init_local_state(cr->dd, state_global, state);
377
378         /* Distribute the charge groups over the nodes from the master node */
379         dd_partition_system(fplog,
380                             mdlog,
381                             ir->init_step,
382                             cr,
383                             TRUE,
384                             1,
385                             state_global,
386                             *top_global,
387                             ir,
388                             imdSession,
389                             pull_work,
390                             state,
391                             &f,
392                             mdAtoms,
393                             &top,
394                             fr,
395                             vsite,
396                             constr,
397                             nrnb,
398                             nullptr,
399                             FALSE);
400         shouldCheckNumberOfBondedInteractions = true;
401         upd.setNumAtoms(state->natoms);
402     }
403     else
404     {
405         state_change_natoms(state_global, state_global->natoms);
406         /* Copy the pointer to the global state */
407         state = state_global;
408
409         /* Generate and initialize new topology */
410         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
411
412         upd.setNumAtoms(state->natoms);
413     }
414
415     std::unique_ptr<UpdateConstrainGpu> integrator;
416
417     StatePropagatorDataGpu* stateGpu = fr->stateGpu;
418
419     // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
420     if (useGpuForUpdate)
421     {
422         GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
423                                    || constr->numConstraintsTotal() == 0,
424                            "Constraints in domain decomposition are only supported with update "
425                            "groups if using GPU update.\n");
426         GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
427                                    || constr->numConstraintsTotal() == 0,
428                            "SHAKE is not supported with GPU update.");
429         GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
430                            "Either PME or short-ranged non-bonded interaction tasks must run on "
431                            "the GPU to use GPU update.\n");
432         GMX_RELEASE_ASSERT(ir->eI == eiMD,
433                            "Only the md integrator is supported with the GPU update.\n");
434         GMX_RELEASE_ASSERT(
435                 ir->etc != TemperatureCoupling::NoseHoover,
436                 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
437         GMX_RELEASE_ASSERT(
438                 ir->epc == PressureCoupling::No || ir->epc == PressureCoupling::ParrinelloRahman
439                         || ir->epc == PressureCoupling::Berendsen || ir->epc == PressureCoupling::CRescale,
440                 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
441                 "with the GPU update.\n");
442         GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
443                            "Virtual sites are not supported with the GPU update.\n");
444         GMX_RELEASE_ASSERT(ed == nullptr,
445                            "Essential dynamics is not supported with the GPU update.\n");
446         GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
447                            "Constraints pulling is not supported with the GPU update.\n");
448         GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
449                            "Orientation restraints are not supported with the GPU update.\n");
450         GMX_RELEASE_ASSERT(
451                 ir->efep == efepNO
452                         || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)),
453                 "Free energy perturbation of masses and constraints are not supported with the GPU "
454                 "update.");
455
456         if (constr != nullptr && constr->numConstraintsTotal() > 0)
457         {
458             GMX_LOG(mdlog.info)
459                     .asParagraph()
460                     .appendText("Updating coordinates and applying constraints on the GPU.");
461         }
462         else
463         {
464             GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
465         }
466         GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
467                            "Device stream manager should be initialized in order to use GPU "
468                            "update-constraints.");
469         GMX_RELEASE_ASSERT(
470                 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
471                 "Update stream should be initialized in order to use GPU "
472                 "update-constraints.");
473         integrator = std::make_unique<UpdateConstrainGpu>(
474                 *ir,
475                 *top_global,
476                 fr->deviceStreamManager->context(),
477                 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
478                 stateGpu->xUpdatedOnDevice(),
479                 wcycle);
480
481         integrator->setPbc(PbcType::Xyz, state->box);
482     }
483
484     if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
485     {
486         changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
487     }
488     if (useGpuForUpdate)
489     {
490         changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
491     }
492
493     // NOTE: The global state is no longer used at this point.
494     // But state_global is still used as temporary storage space for writing
495     // the global state to file and potentially for replica exchange.
496     // (Global topology should persist.)
497
498     update_mdatoms(mdatoms, state->lambda[efptMASS]);
499
500     if (ir->bExpanded)
501     {
502         /* Check nstexpanded here, because the grompp check was broken */
503         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
504         {
505             gmx_fatal(FARGS,
506                       "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
507         }
508         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
509     }
510
511     if (MASTER(cr))
512     {
513         EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
514     }
515
516     preparePrevStepPullCom(
517             ir, pull_work, mdatoms->massT, state, state_global, cr, startingBehavior != StartingBehavior::NewSimulation);
518
519     // TODO: Remove this by converting AWH into a ForceProvider
520     auto awh = prepareAwhModule(fplog,
521                                 *ir,
522                                 state_global,
523                                 cr,
524                                 ms,
525                                 startingBehavior != StartingBehavior::NewSimulation,
526                                 shellfc != nullptr,
527                                 opt2fn("-awh", nfile, fnm),
528                                 pull_work);
529
530     if (useReplicaExchange && MASTER(cr))
531     {
532         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
533     }
534     /* PME tuning is only supported in the Verlet scheme, with PME for
535      * Coulomb. It is not supported with only LJ PME. */
536     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
537                 && ir->cutoff_scheme != ecutsGROUP);
538
539     pme_load_balancing_t* pme_loadbal = nullptr;
540     if (bPMETune)
541     {
542         pme_loadbal_init(
543                 &pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu());
544     }
545
546     if (!ir->bContinuation)
547     {
548         if (state->flags & (1U << estV))
549         {
550             auto v = makeArrayRef(state->v);
551             /* Set the velocities of vsites, shells and frozen atoms to zero */
552             for (i = 0; i < mdatoms->homenr; i++)
553             {
554                 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
555                 {
556                     clear_rvec(v[i]);
557                 }
558                 else if (mdatoms->cFREEZE)
559                 {
560                     for (m = 0; m < DIM; m++)
561                     {
562                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
563                         {
564                             v[i][m] = 0;
565                         }
566                     }
567                 }
568             }
569         }
570
571         if (constr)
572         {
573             /* Constrain the initial coordinates and velocities */
574             do_constrain_first(fplog,
575                                constr,
576                                ir,
577                                mdatoms->nr,
578                                mdatoms->homenr,
579                                state->x.arrayRefWithPadding(),
580                                state->v.arrayRefWithPadding(),
581                                state->box,
582                                state->lambda[efptBONDED]);
583         }
584     }
585
586     if (ir->efep != efepNO)
587     {
588         /* Set free energy calculation frequency as the greatest common
589          * denominator of nstdhdl and repl_ex_nst. */
590         nstfep = ir->fepvals->nstdhdl;
591         if (ir->bExpanded)
592         {
593             nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
594         }
595         if (useReplicaExchange)
596         {
597             nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
598         }
599         if (ir->bDoAwh)
600         {
601             nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep);
602         }
603     }
604
605     /* Be REALLY careful about what flags you set here. You CANNOT assume
606      * this is the first step, since we might be restarting from a checkpoint,
607      * and in that case we should not do any modifications to the state.
608      */
609     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
610
611     // When restarting from a checkpoint, it can be appropriate to
612     // initialize ekind from quantities in the checkpoint. Otherwise,
613     // compute_globals must initialize ekind before the simulation
614     // starts/restarts. However, only the master rank knows what was
615     // found in the checkpoint file, so we have to communicate in
616     // order to coordinate the restart.
617     //
618     // TODO Consider removing this communication if/when checkpoint
619     // reading directly follows .tpr reading, because all ranks can
620     // agree on hasReadEkinState at that time.
621     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
622     if (PAR(cr))
623     {
624         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
625     }
626     if (hasReadEkinState)
627     {
628         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
629     }
630
631     unsigned int cglo_flags =
632             (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
633              | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
634
635     bSumEkinhOld = FALSE;
636
637     t_vcm vcm(top_global->groups, *ir);
638     reportComRemovalInfo(fplog, vcm);
639
640     /* To minimize communication, compute_globals computes the COM velocity
641      * and the kinetic energy for the velocities without COM motion removed.
642      * Thus to get the kinetic energy without the COM contribution, we need
643      * to call compute_globals twice.
644      */
645     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
646     {
647         unsigned int cglo_flags_iteration = cglo_flags;
648         if (bStopCM && cgloIteration == 0)
649         {
650             cglo_flags_iteration |= CGLO_STOPCM;
651             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
652         }
653         compute_globals(gstat,
654                         cr,
655                         ir,
656                         fr,
657                         ekind,
658                         makeConstArrayRef(state->x),
659                         makeConstArrayRef(state->v),
660                         state->box,
661                         mdatoms,
662                         nrnb,
663                         &vcm,
664                         nullptr,
665                         enerd,
666                         force_vir,
667                         shake_vir,
668                         total_vir,
669                         pres,
670                         gmx::ArrayRef<real>{},
671                         &nullSignaller,
672                         state->box,
673                         &totalNumberOfBondedInteractions,
674                         &bSumEkinhOld,
675                         cglo_flags_iteration
676                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
677                                                                          : 0));
678         if (cglo_flags_iteration & CGLO_STOPCM)
679         {
680             /* At initialization, do not pass x with acceleration-correction mode
681              * to avoid (incorrect) correction of the initial coordinates.
682              */
683             auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
684                                                                      : makeArrayRef(state->x);
685             process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
686             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
687         }
688     }
689     checkNumberOfBondedInteractions(mdlog,
690                                     cr,
691                                     totalNumberOfBondedInteractions,
692                                     top_global,
693                                     &top,
694                                     makeConstArrayRef(state->x),
695                                     state->box,
696                                     &shouldCheckNumberOfBondedInteractions);
697     if (ir->eI == eiVVAK)
698     {
699         /* a second call to get the half step temperature initialized as well */
700         /* we do the same call as above, but turn the pressure off -- internally to
701            compute_globals, this is recognized as a velocity verlet half-step
702            kinetic energy calculation.  This minimized excess variables, but
703            perhaps loses some logic?*/
704
705         compute_globals(gstat,
706                         cr,
707                         ir,
708                         fr,
709                         ekind,
710                         makeConstArrayRef(state->x),
711                         makeConstArrayRef(state->v),
712                         state->box,
713                         mdatoms,
714                         nrnb,
715                         &vcm,
716                         nullptr,
717                         enerd,
718                         force_vir,
719                         shake_vir,
720                         total_vir,
721                         pres,
722                         gmx::ArrayRef<real>{},
723                         &nullSignaller,
724                         state->box,
725                         nullptr,
726                         &bSumEkinhOld,
727                         cglo_flags & ~CGLO_PRESSURE);
728     }
729
730     /* Calculate the initial half step temperature, and save the ekinh_old */
731     if (startingBehavior == StartingBehavior::NewSimulation)
732     {
733         for (i = 0; (i < ir->opts.ngtc); i++)
734         {
735             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
736         }
737     }
738
739     /* need to make an initiation call to get the Trotter variables set, as well as other constants
740        for non-trotter temperature control */
741     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
742
743     if (MASTER(cr))
744     {
745         if (!ir->bContinuation)
746         {
747             if (constr && ir->eConstrAlg == econtLINCS)
748             {
749                 fprintf(fplog,
750                         "RMS relative constraint deviation after constraining: %.2e\n",
751                         constr->rmsd());
752             }
753             if (EI_STATE_VELOCITY(ir->eI))
754             {
755                 real temp = enerd->term[F_TEMP];
756                 if (ir->eI != eiVV)
757                 {
758                     /* Result of Ekin averaged over velocities of -half
759                      * and +half step, while we only have -half step here.
760                      */
761                     temp *= 2;
762                 }
763                 fprintf(fplog, "Initial temperature: %g K\n", temp);
764             }
765         }
766
767         char tbuf[20];
768         fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
769         if (ir->nsteps >= 0)
770         {
771             sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
772         }
773         else
774         {
775             sprintf(tbuf, "%s", "infinite");
776         }
777         if (ir->init_step > 0)
778         {
779             fprintf(stderr,
780                     "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
781                     gmx_step_str(ir->init_step + ir->nsteps, sbuf),
782                     tbuf,
783                     gmx_step_str(ir->init_step, sbuf2),
784                     ir->init_step * ir->delta_t);
785         }
786         else
787         {
788             fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
789         }
790         fprintf(fplog, "\n");
791     }
792
793     walltime_accounting_start_time(walltime_accounting);
794     wallcycle_start(wcycle, ewcRUN);
795     print_start(fplog, cr, walltime_accounting, "mdrun");
796
797     /***********************************************************
798      *
799      *             Loop over MD steps
800      *
801      ************************************************************/
802
803     bFirstStep = TRUE;
804     /* Skip the first Nose-Hoover integration when we get the state from tpx */
805     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
806     bSumEkinhOld     = FALSE;
807     bExchanged       = FALSE;
808     bNeedRepartition = FALSE;
809
810     step     = ir->init_step;
811     step_rel = 0;
812
813     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
814             compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
815             simulationsShareState,
816             MASTER(cr),
817             ir->nstlist,
818             mdrunOptions.reproducible,
819             nstSignalComm,
820             mdrunOptions.maximumHoursToRun,
821             ir->nstlist == 0,
822             fplog,
823             step,
824             bNS,
825             walltime_accounting);
826
827     auto checkpointHandler = std::make_unique<CheckpointHandler>(
828             compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
829             simulationsShareState,
830             ir->nstlist == 0,
831             MASTER(cr),
832             mdrunOptions.writeConfout,
833             mdrunOptions.checkpointOptions.period);
834
835     const bool resetCountersIsLocal = true;
836     auto       resetHandler         = std::make_unique<ResetHandler>(
837             compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
838             !resetCountersIsLocal,
839             ir->nsteps,
840             MASTER(cr),
841             mdrunOptions.timingOptions.resetHalfway,
842             mdrunOptions.maximumHoursToRun,
843             mdlog,
844             wcycle,
845             walltime_accounting);
846
847     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
848
849     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
850     {
851         logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
852     }
853
854     /* and stop now if we should */
855     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
856     while (!bLastStep)
857     {
858
859         /* Determine if this is a neighbor search step */
860         bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
861
862         if (bPMETune && bNStList)
863         {
864             // This has to be here because PME load balancing is called so early.
865             // TODO: Move to after all booleans are defined.
866             if (useGpuForUpdate && !bFirstStep)
867             {
868                 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
869                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
870             }
871             /* PME grid + cut-off optimization with GPUs or PME nodes */
872             pme_loadbal_do(pme_loadbal,
873                            cr,
874                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
875                            fplog,
876                            mdlog,
877                            *ir,
878                            fr,
879                            state->box,
880                            state->x,
881                            wcycle,
882                            step,
883                            step_rel,
884                            &bPMETunePrinting,
885                            simulationWork.useGpuPmePpCommunication);
886         }
887
888         wallcycle_start(wcycle, ewcSTEP);
889
890         bLastStep = (step_rel == ir->nsteps);
891         t         = t0 + step * ir->delta_t;
892
893         // TODO Refactor this, so that nstfep does not need a default value of zero
894         if (ir->efep != efepNO || ir->bSimTemp)
895         {
896             /* find and set the current lambdas */
897             state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
898
899             bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
900             bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
901             bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
902                            && (!bFirstStep));
903         }
904
905         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
906                      && do_per_step(step, replExParams.exchangeInterval));
907
908         if (doSimulatedAnnealing)
909         {
910             // TODO: Avoid changing inputrec (#3854)
911             // Simulated annealing updates the reference temperature.
912             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
913             update_annealing_target_temp(nonConstInputrec, t, &upd);
914         }
915
916         /* Stop Center of Mass motion */
917         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
918
919         /* Determine whether or not to do Neighbour Searching */
920         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
921
922         /* Note that the stopHandler will cause termination at nstglobalcomm
923          * steps. Since this concides with nstcalcenergy, nsttcouple and/or
924          * nstpcouple steps, we have computed the half-step kinetic energy
925          * of the previous step and can always output energies at the last step.
926          */
927         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
928
929         /* do_log triggers energy and virial calculation. Because this leads
930          * to different code paths, forces can be different. Thus for exact
931          * continuation we should avoid extra log output.
932          * Note that the || bLastStep can result in non-exact continuation
933          * beyond the last step. But we don't consider that to be an issue.
934          */
935         do_log     = (do_per_step(step, ir->nstlog)
936                   || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
937         do_verbose = mdrunOptions.verbose
938                      && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
939
940         if (useGpuForUpdate && !bFirstStep && bNS)
941         {
942             // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
943             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
944             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
945             // Copy coordinate from the GPU when needed at the search step.
946             // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
947             // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
948             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
949             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
950         }
951
952         if (vsite != nullptr)
953         {
954             // Virtual sites need to be updated before domain decomposition and forces are calculated
955             wallcycle_start(wcycle, ewcVSITECONSTR);
956             vsite->construct(state->x, ir->delta_t, state->v, state->box);
957             wallcycle_stop(wcycle, ewcVSITECONSTR);
958         }
959
960         if (bNS && !(bFirstStep && ir->bContinuation))
961         {
962             bMasterState = FALSE;
963             /* Correct the new box if it is too skewed */
964             if (inputrecDynamicBox(ir))
965             {
966                 if (correct_box(fplog, step, state->box))
967                 {
968                     bMasterState = TRUE;
969                     // If update is offloaded, it should be informed about the box size change
970                     if (useGpuForUpdate)
971                     {
972                         integrator->setPbc(PbcType::Xyz, state->box);
973                     }
974                 }
975             }
976             if (DOMAINDECOMP(cr) && bMasterState)
977             {
978                 dd_collect_state(cr->dd, state, state_global);
979             }
980
981             if (DOMAINDECOMP(cr))
982             {
983                 /* Repartition the domain decomposition */
984                 dd_partition_system(fplog,
985                                     mdlog,
986                                     step,
987                                     cr,
988                                     bMasterState,
989                                     nstglobalcomm,
990                                     state_global,
991                                     *top_global,
992                                     ir,
993                                     imdSession,
994                                     pull_work,
995                                     state,
996                                     &f,
997                                     mdAtoms,
998                                     &top,
999                                     fr,
1000                                     vsite,
1001                                     constr,
1002                                     nrnb,
1003                                     wcycle,
1004                                     do_verbose && !bPMETunePrinting);
1005                 shouldCheckNumberOfBondedInteractions = true;
1006                 upd.setNumAtoms(state->natoms);
1007             }
1008         }
1009
1010         // Allocate or re-size GPU halo exchange object, if necessary
1011         if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
1012         {
1013             GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
1014                                "GPU device manager has to be initialized to use GPU "
1015                                "version of halo exchange.");
1016             constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
1017         }
1018
1019         if (MASTER(cr) && do_log)
1020         {
1021             gmx::EnergyOutput::printHeader(
1022                     fplog, step, t); /* can we improve the information printed here? */
1023         }
1024
1025         if (ir->efep != efepNO)
1026         {
1027             update_mdatoms(mdatoms, state->lambda[efptMASS]);
1028         }
1029
1030         if (bExchanged)
1031         {
1032             /* We need the kinetic energy at minus the half step for determining
1033              * the full step kinetic energy and possibly for T-coupling.*/
1034             /* This may not be quite working correctly yet . . . . */
1035             compute_globals(gstat,
1036                             cr,
1037                             ir,
1038                             fr,
1039                             ekind,
1040                             makeConstArrayRef(state->x),
1041                             makeConstArrayRef(state->v),
1042                             state->box,
1043                             mdatoms,
1044                             nrnb,
1045                             &vcm,
1046                             wcycle,
1047                             enerd,
1048                             nullptr,
1049                             nullptr,
1050                             nullptr,
1051                             nullptr,
1052                             gmx::ArrayRef<real>{},
1053                             &nullSignaller,
1054                             state->box,
1055                             &totalNumberOfBondedInteractions,
1056                             &bSumEkinhOld,
1057                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
1058             checkNumberOfBondedInteractions(mdlog,
1059                                             cr,
1060                                             totalNumberOfBondedInteractions,
1061                                             top_global,
1062                                             &top,
1063                                             makeConstArrayRef(state->x),
1064                                             state->box,
1065                                             &shouldCheckNumberOfBondedInteractions);
1066         }
1067         clear_mat(force_vir);
1068
1069         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1070
1071         /* Determine the energy and pressure:
1072          * at nstcalcenergy steps and at energy output steps (set below).
1073          */
1074         if (EI_VV(ir->eI) && (!bInitStep))
1075         {
1076             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1077             bCalcVir      = bCalcEnerStep
1078                        || (ir->epc != PressureCoupling::No
1079                            && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1080         }
1081         else
1082         {
1083             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1084             bCalcVir      = bCalcEnerStep
1085                        || (ir->epc != PressureCoupling::No && do_per_step(step, ir->nstpcouple));
1086         }
1087         bCalcEner = bCalcEnerStep;
1088
1089         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1090
1091         if (do_ene || do_log || bDoReplEx)
1092         {
1093             bCalcVir  = TRUE;
1094             bCalcEner = TRUE;
1095         }
1096
1097         /* Do we need global communication ? */
1098         bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1099                   || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1100
1101         force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1102                        | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1103                        | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
1104         if (fr->useMts && !do_per_step(step, ir->nstfout))
1105         {
1106             force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1107         }
1108
1109         if (shellfc)
1110         {
1111             /* Now is the time to relax the shells */
1112             relax_shell_flexcon(fplog,
1113                                 cr,
1114                                 ms,
1115                                 mdrunOptions.verbose,
1116                                 enforcedRotation,
1117                                 step,
1118                                 ir,
1119                                 imdSession,
1120                                 pull_work,
1121                                 bNS,
1122                                 force_flags,
1123                                 &top,
1124                                 constr,
1125                                 enerd,
1126                                 state->natoms,
1127                                 state->x.arrayRefWithPadding(),
1128                                 state->v.arrayRefWithPadding(),
1129                                 state->box,
1130                                 state->lambda,
1131                                 &state->hist,
1132                                 &f.view(),
1133                                 force_vir,
1134                                 mdatoms,
1135                                 nrnb,
1136                                 wcycle,
1137                                 shellfc,
1138                                 fr,
1139                                 runScheduleWork,
1140                                 t,
1141                                 mu_tot,
1142                                 vsite,
1143                                 ddBalanceRegionHandler);
1144         }
1145         else
1146         {
1147             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1148                is updated (or the AWH update will be performed twice for one step when continuing).
1149                It would be best to call this update function from do_md_trajectory_writing but that
1150                would occur after do_force. One would have to divide the update_awh function into one
1151                function applying the AWH force and one doing the AWH bias update. The update AWH
1152                bias function could then be called after do_md_trajectory_writing (then containing
1153                update_awh_history). The checkpointing will in the future probably moved to the start
1154                of the md loop which will rid of this issue. */
1155             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1156             {
1157                 awh->updateHistory(state_global->awhHistory.get());
1158             }
1159
1160             /* The coordinates (x) are shifted (to get whole molecules)
1161              * in do_force.
1162              * This is parallellized as well, and does communication too.
1163              * Check comments in sim_util.c
1164              */
1165             do_force(fplog,
1166                      cr,
1167                      ms,
1168                      ir,
1169                      awh.get(),
1170                      enforcedRotation,
1171                      imdSession,
1172                      pull_work,
1173                      step,
1174                      nrnb,
1175                      wcycle,
1176                      &top,
1177                      state->box,
1178                      state->x.arrayRefWithPadding(),
1179                      &state->hist,
1180                      &f.view(),
1181                      force_vir,
1182                      mdatoms,
1183                      enerd,
1184                      state->lambda,
1185                      fr,
1186                      runScheduleWork,
1187                      vsite,
1188                      mu_tot,
1189                      t,
1190                      ed ? ed->getLegacyED() : nullptr,
1191                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
1192                      ddBalanceRegionHandler);
1193         }
1194
1195         // VV integrators do not need the following velocity half step
1196         // if it is the first step after starting from a checkpoint.
1197         // That is, the half step is needed on all other steps, and
1198         // also the first step when starting from a .tpr file.
1199         if (EI_VV(ir->eI))
1200         {
1201             integrateVVFirstStep(step,
1202                                  bFirstStep,
1203                                  bInitStep,
1204                                  startingBehavior,
1205                                  nstglobalcomm,
1206                                  ir,
1207                                  fr,
1208                                  cr,
1209                                  state,
1210                                  mdatoms,
1211                                  fcdata,
1212                                  &MassQ,
1213                                  &vcm,
1214                                  top_global,
1215                                  top,
1216                                  enerd,
1217                                  ekind,
1218                                  gstat,
1219                                  &last_ekin,
1220                                  bCalcVir,
1221                                  total_vir,
1222                                  shake_vir,
1223                                  force_vir,
1224                                  pres,
1225                                  M,
1226                                  do_log,
1227                                  do_ene,
1228                                  bCalcEner,
1229                                  bGStat,
1230                                  bStopCM,
1231                                  bTrotter,
1232                                  bExchanged,
1233                                  &bSumEkinhOld,
1234                                  &shouldCheckNumberOfBondedInteractions,
1235                                  &saved_conserved_quantity,
1236                                  &f,
1237                                  &upd,
1238                                  constr,
1239                                  &nullSignaller,
1240                                  trotter_seq,
1241                                  nrnb,
1242                                  mdlog,
1243                                  fplog,
1244                                  wcycle);
1245         }
1246
1247         /* ########  END FIRST UPDATE STEP  ############## */
1248         /* ########  If doing VV, we now have v(dt) ###### */
1249         if (bDoExpanded)
1250         {
1251             /* perform extended ensemble sampling in lambda - we don't
1252                actually move to the new state before outputting
1253                statistics, but if performing simulated tempering, we
1254                do update the velocities and the tau_t. */
1255             // TODO: Avoid changing inputrec (#3854)
1256             // Simulated tempering updates the reference temperature.
1257             // Expanded ensemble without simulated tempering does not change the inputrec.
1258             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
1259             lamnew                 = ExpandedEnsembleDynamics(fplog,
1260                                               nonConstInputrec,
1261                                               enerd,
1262                                               state,
1263                                               &MassQ,
1264                                               state->fep_state,
1265                                               state->dfhist,
1266                                               step,
1267                                               state->v.rvec_array(),
1268                                               mdatoms);
1269             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1270             if (MASTER(cr))
1271             {
1272                 copy_df_history(state_global->dfhist, state->dfhist);
1273             }
1274         }
1275
1276         // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1277         // coordinates have not already been copied for i) search or ii) CPU force tasks.
1278         if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1279             && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1280                 || checkpointHandler->isCheckpointingStep()))
1281         {
1282             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1283             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1284         }
1285         // Copy velocities if needed for the output/checkpointing.
1286         // NOTE: Copy on the search steps is done at the beginning of the step.
1287         if (useGpuForUpdate && !bNS
1288             && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1289         {
1290             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1291             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1292         }
1293         // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1294         // and update is offloaded hence forces are kept on the GPU for update and have not been
1295         // already transferred in do_force().
1296         // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1297         //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
1298         //       prior to GPU update.
1299         // TODO: When the output flags will be included in step workload, this copy can be combined with the
1300         //       copy call in do_force(...).
1301         // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1302         //       on host after the D2H copy in do_force(...).
1303         if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1304             && do_per_step(step, ir->nstfout))
1305         {
1306             stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1307             stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1308         }
1309         /* Now we have the energies and forces corresponding to the
1310          * coordinates at time t. We must output all of this before
1311          * the update.
1312          */
1313         do_md_trajectory_writing(fplog,
1314                                  cr,
1315                                  nfile,
1316                                  fnm,
1317                                  step,
1318                                  step_rel,
1319                                  t,
1320                                  ir,
1321                                  state,
1322                                  state_global,
1323                                  observablesHistory,
1324                                  top_global,
1325                                  fr,
1326                                  outf,
1327                                  energyOutput,
1328                                  ekind,
1329                                  f.view().force(),
1330                                  checkpointHandler->isCheckpointingStep(),
1331                                  bRerunMD,
1332                                  bLastStep,
1333                                  mdrunOptions.writeConfout,
1334                                  bSumEkinhOld);
1335         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1336         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1337
1338         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1339         if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1340             && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1341         {
1342             copy_mat(state->svir_prev, shake_vir);
1343             copy_mat(state->fvir_prev, force_vir);
1344         }
1345
1346         stopHandler->setSignal();
1347         resetHandler->setSignal(walltime_accounting);
1348
1349         if (bGStat || !PAR(cr))
1350         {
1351             /* In parallel we only have to check for checkpointing in steps
1352              * where we do global communication,
1353              *  otherwise the other nodes don't know.
1354              */
1355             checkpointHandler->setSignal(walltime_accounting);
1356         }
1357
1358         /* #########   START SECOND UPDATE STEP ################# */
1359
1360         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1361            controlled in preprocessing */
1362
1363         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1364         {
1365             gmx_bool bIfRandomize;
1366             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1367             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1368             if (constr && bIfRandomize)
1369             {
1370                 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1371             }
1372         }
1373         /* Box is changed in update() when we do pressure coupling,
1374          * but we should still use the old box for energy corrections and when
1375          * writing it to the energy file, so it matches the trajectory files for
1376          * the same timestep above. Make a copy in a separate array.
1377          */
1378         copy_mat(state->box, lastbox);
1379
1380         dvdl_constr = 0;
1381
1382         if (!useGpuForUpdate)
1383         {
1384             wallcycle_start(wcycle, ewcUPDATE);
1385         }
1386         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1387         if (bTrotter)
1388         {
1389             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1390             /* We can only do Berendsen coupling after we have summed
1391              * the kinetic energy or virial. Since the happens
1392              * in global_state after update, we should only do it at
1393              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1394              */
1395         }
1396         else
1397         {
1398             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1399             update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1400         }
1401
1402         /* With leap-frog type integrators we compute the kinetic energy
1403          * at a whole time step as the average of the half-time step kinetic
1404          * energies of two subsequent steps. Therefore we need to compute the
1405          * half step kinetic energy also if we need energies at the next step.
1406          */
1407         const bool needHalfStepKineticEnergy =
1408                 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1409
1410         // Parrinello-Rahman requires the pressure to be availible before the update to compute
1411         // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1412         const bool doParrinelloRahman = (ir->epc == PressureCoupling::ParrinelloRahman
1413                                          && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1414
1415         if (EI_VV(ir->eI))
1416         {
1417             GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1418
1419             integrateVVSecondStep(step,
1420                                   ir,
1421                                   fr,
1422                                   cr,
1423                                   state,
1424                                   mdatoms,
1425                                   fcdata,
1426                                   &MassQ,
1427                                   &vcm,
1428                                   pull_work,
1429                                   enerd,
1430                                   ekind,
1431                                   gstat,
1432                                   &dvdl_constr,
1433                                   bCalcVir,
1434                                   total_vir,
1435                                   shake_vir,
1436                                   force_vir,
1437                                   pres,
1438                                   M,
1439                                   lastbox,
1440                                   do_log,
1441                                   do_ene,
1442                                   bGStat,
1443                                   &bSumEkinhOld,
1444                                   &f,
1445                                   &cbuf,
1446                                   &upd,
1447                                   constr,
1448                                   &nullSignaller,
1449                                   trotter_seq,
1450                                   nrnb,
1451                                   wcycle);
1452         }
1453         else
1454         {
1455             if (useGpuForUpdate)
1456             {
1457
1458                 wallcycle_stop(wcycle, ewcUPDATE);
1459
1460                 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1461                 {
1462                     integrator->set(stateGpu->getCoordinates(),
1463                                     stateGpu->getVelocities(),
1464                                     stateGpu->getForces(),
1465                                     top.idef,
1466                                     *mdatoms,
1467                                     ekind->ngtc);
1468
1469                     // Copy data to the GPU after buffers might have being reinitialized
1470                     stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1471                     stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1472                 }
1473
1474                 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1475                     && !thisRankHasDuty(cr, DUTY_PME))
1476                 {
1477                     // The PME forces were recieved to the host, so have to be copied
1478                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1479                 }
1480                 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1481                 {
1482                     // The buffer ops were not offloaded this step, so the forces are on the
1483                     // host and have to be copied
1484                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1485                 }
1486
1487                 const bool doTemperatureScaling =
1488                         (ir->etc != TemperatureCoupling::No
1489                          && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1490
1491                 // This applies Leap-Frog, LINCS and SETTLE in succession
1492                 integrator->integrate(
1493                         stateGpu->getForcesReadyOnDeviceEvent(
1494                                 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1495                         ir->delta_t,
1496                         true,
1497                         bCalcVir,
1498                         shake_vir,
1499                         doTemperatureScaling,
1500                         ekind->tcstat,
1501                         doParrinelloRahman,
1502                         ir->nstpcouple * ir->delta_t,
1503                         M);
1504
1505                 // Copy velocities D2H after update if:
1506                 // - Globals are computed this step (includes the energy output steps).
1507                 // - Temperature is needed for the next step.
1508                 if (bGStat || needHalfStepKineticEnergy)
1509                 {
1510                     stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1511                     stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1512                 }
1513             }
1514             else
1515             {
1516                 /* With multiple time stepping we need to do an additional normal
1517                  * update step to obtain the virial, as the actual MTS integration
1518                  * using an acceleration where the slow forces are multiplied by mtsFactor.
1519                  * Using that acceleration would result in a virial with the slow
1520                  * force contribution would be a factor mtsFactor too large.
1521                  */
1522                 if (fr->useMts && bCalcVir && constr != nullptr)
1523                 {
1524                     upd.update_for_constraint_virial(
1525                             *ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
1526
1527                     constrain_coordinates(constr,
1528                                           do_log,
1529                                           do_ene,
1530                                           step,
1531                                           state,
1532                                           upd.xp()->arrayRefWithPadding(),
1533                                           &dvdl_constr,
1534                                           bCalcVir,
1535                                           shake_vir);
1536                 }
1537
1538                 ArrayRefWithPadding<const RVec> forceCombined =
1539                         (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1540                                 ? f.view().forceMtsCombinedWithPadding()
1541                                 : f.view().forceWithPadding();
1542                 upd.update_coords(
1543                         *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
1544
1545                 wallcycle_stop(wcycle, ewcUPDATE);
1546
1547                 constrain_coordinates(constr,
1548                                       do_log,
1549                                       do_ene,
1550                                       step,
1551                                       state,
1552                                       upd.xp()->arrayRefWithPadding(),
1553                                       &dvdl_constr,
1554                                       bCalcVir && !fr->useMts,
1555                                       shake_vir);
1556
1557                 upd.update_sd_second_half(
1558                         *ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, constr, do_log, do_ene);
1559                 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1560             }
1561
1562             if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1563             {
1564                 updatePrevStepPullCom(pull_work, state);
1565             }
1566
1567             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1568         }
1569
1570         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1571         /* With Leap-Frog we can skip compute_globals at
1572          * non-communication steps, but we need to calculate
1573          * the kinetic energy one step before communication.
1574          */
1575         {
1576             // Organize to do inter-simulation signalling on steps if
1577             // and when algorithms require it.
1578             const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1579
1580             if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1581             {
1582                 // Copy coordinates when needed to stop the CM motion.
1583                 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1584                 {
1585                     stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1586                     stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1587                 }
1588                 // Since we're already communicating at this step, we
1589                 // can propagate intra-simulation signals. Note that
1590                 // check_nstglobalcomm has the responsibility for
1591                 // choosing the value of nstglobalcomm that is one way
1592                 // bGStat becomes true, so we can't get into a
1593                 // situation where e.g. checkpointing can't be
1594                 // signalled.
1595                 bool                doIntraSimSignal = true;
1596                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1597
1598                 compute_globals(
1599                         gstat,
1600                         cr,
1601                         ir,
1602                         fr,
1603                         ekind,
1604                         makeConstArrayRef(state->x),
1605                         makeConstArrayRef(state->v),
1606                         state->box,
1607                         mdatoms,
1608                         nrnb,
1609                         &vcm,
1610                         wcycle,
1611                         enerd,
1612                         force_vir,
1613                         shake_vir,
1614                         total_vir,
1615                         pres,
1616                         (!EI_VV(ir->eI) && bCalcEner && constr != nullptr) ? constr->rmsdData()
1617                                                                            : gmx::ArrayRef<real>{},
1618                         &signaller,
1619                         lastbox,
1620                         &totalNumberOfBondedInteractions,
1621                         &bSumEkinhOld,
1622                         (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1623                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1624                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1625                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1626                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1627                                                                          : 0));
1628                 checkNumberOfBondedInteractions(mdlog,
1629                                                 cr,
1630                                                 totalNumberOfBondedInteractions,
1631                                                 top_global,
1632                                                 &top,
1633                                                 makeConstArrayRef(state->x),
1634                                                 state->box,
1635                                                 &shouldCheckNumberOfBondedInteractions);
1636                 if (!EI_VV(ir->eI) && bStopCM)
1637                 {
1638                     process_and_stopcm_grp(
1639                             fplog, &vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
1640                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1641
1642                     // TODO: The special case of removing CM motion should be dealt more gracefully
1643                     if (useGpuForUpdate)
1644                     {
1645                         stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1646                         // Here we block until the H2D copy completes because event sync with the
1647                         // force kernels that use the coordinates on the next steps is not implemented
1648                         // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1649                         stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1650                         // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1651                         if (vcm.mode != ecmNO)
1652                         {
1653                             stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1654                         }
1655                     }
1656                 }
1657             }
1658         }
1659
1660         /* #############  END CALC EKIN AND PRESSURE ################# */
1661
1662         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1663            the virial that should probably be addressed eventually. state->veta has better properies,
1664            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1665            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1666
1667         if (ir->efep != efepNO && !EI_VV(ir->eI))
1668         {
1669             /* Sum up the foreign energy and dK/dl terms for md and sd.
1670                Currently done every step so that dH/dl is correct in the .edr */
1671             accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1672         }
1673
1674         update_pcouple_after_coordinates(
1675                 fplog, step, ir, mdatoms, pres, force_vir, shake_vir, pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1676
1677         const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
1678                                                   && do_per_step(step, inputrec->nstpcouple));
1679         const bool doCRescalePressureCoupling  = (inputrec->epc == PressureCoupling::CRescale
1680                                                  && do_per_step(step, inputrec->nstpcouple));
1681         if (useGpuForUpdate
1682             && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1683         {
1684             integrator->scaleCoordinates(pressureCouplingMu);
1685             if (doCRescalePressureCoupling)
1686             {
1687                 matrix pressureCouplingInvMu;
1688                 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1689                 integrator->scaleVelocities(pressureCouplingInvMu);
1690             }
1691             integrator->setPbc(PbcType::Xyz, state->box);
1692         }
1693
1694         /* ################# END UPDATE STEP 2 ################# */
1695         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1696
1697         /* The coordinates (x) were unshifted in update */
1698         if (!bGStat)
1699         {
1700             /* We will not sum ekinh_old,
1701              * so signal that we still have to do it.
1702              */
1703             bSumEkinhOld = TRUE;
1704         }
1705
1706         if (bCalcEner)
1707         {
1708             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1709
1710             /* use the directly determined last velocity, not actually the averaged half steps */
1711             if (bTrotter && ir->eI == eiVV)
1712             {
1713                 enerd->term[F_EKIN] = last_ekin;
1714             }
1715             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1716
1717             if (integratorHasConservedEnergyQuantity(ir))
1718             {
1719                 if (EI_VV(ir->eI))
1720                 {
1721                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1722                 }
1723                 else
1724                 {
1725                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1726                 }
1727             }
1728             /* #########  END PREPARING EDR OUTPUT  ###########  */
1729         }
1730
1731         /* Output stuff */
1732         if (MASTER(cr))
1733         {
1734             if (fplog && do_log && bDoExpanded)
1735             {
1736                 /* only needed if doing expanded ensemble */
1737                 PrintFreeEnergyInfoToFile(fplog,
1738                                           ir->fepvals,
1739                                           ir->expandedvals,
1740                                           ir->bSimTemp ? ir->simtempvals : nullptr,
1741                                           state_global->dfhist,
1742                                           state->fep_state,
1743                                           ir->nstlog,
1744                                           step);
1745             }
1746             if (bCalcEner)
1747             {
1748                 energyOutput.addDataAtEnergyStep(bDoDHDL,
1749                                                  bCalcEnerStep,
1750                                                  t,
1751                                                  mdatoms->tmass,
1752                                                  enerd,
1753                                                  ir->fepvals,
1754                                                  ir->expandedvals,
1755                                                  lastbox,
1756                                                  PTCouplingArrays{ state->boxv,
1757                                                                    state->nosehoover_xi,
1758                                                                    state->nosehoover_vxi,
1759                                                                    state->nhpres_xi,
1760                                                                    state->nhpres_vxi },
1761                                                  state->fep_state,
1762                                                  shake_vir,
1763                                                  force_vir,
1764                                                  total_vir,
1765                                                  pres,
1766                                                  ekind,
1767                                                  mu_tot,
1768                                                  constr);
1769             }
1770             else
1771             {
1772                 energyOutput.recordNonEnergyStep();
1773             }
1774
1775             gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1776             gmx_bool do_or = do_per_step(step, ir->nstorireout);
1777
1778             if (doSimulatedAnnealing)
1779             {
1780                 gmx::EnergyOutput::printAnnealingTemperatures(
1781                         do_log ? fplog : nullptr, groups, &(ir->opts));
1782             }
1783             if (do_log || do_ene || do_dr || do_or)
1784             {
1785                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1786                                                    do_ene,
1787                                                    do_dr,
1788                                                    do_or,
1789                                                    do_log ? fplog : nullptr,
1790                                                    step,
1791                                                    t,
1792                                                    fr->fcdata.get(),
1793                                                    awh.get());
1794             }
1795             if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1796             {
1797                 const bool isInitialOutput = false;
1798                 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1799             }
1800
1801             if (ir->bPull)
1802             {
1803                 pull_print_output(pull_work, step, t);
1804             }
1805
1806             if (do_per_step(step, ir->nstlog))
1807             {
1808                 if (fflush(fplog) != 0)
1809                 {
1810                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1811                 }
1812             }
1813         }
1814         if (bDoExpanded)
1815         {
1816             /* Have to do this part _after_ outputting the logfile and the edr file */
1817             /* Gets written into the state at the beginning of next loop*/
1818             state->fep_state = lamnew;
1819         }
1820         else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1821         {
1822             state->fep_state = awh->fepLambdaState();
1823         }
1824         /* Print the remaining wall clock time for the run */
1825         if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1826         {
1827             if (shellfc)
1828             {
1829                 fprintf(stderr, "\n");
1830             }
1831             print_time(stderr, walltime_accounting, step, ir, cr);
1832         }
1833
1834         /* Ion/water position swapping.
1835          * Not done in last step since trajectory writing happens before this call
1836          * in the MD loop and exchanges would be lost anyway. */
1837         bNeedRepartition = FALSE;
1838         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1839         {
1840             bNeedRepartition = do_swapcoords(cr,
1841                                              step,
1842                                              t,
1843                                              ir,
1844                                              swap,
1845                                              wcycle,
1846                                              as_rvec_array(state->x.data()),
1847                                              state->box,
1848                                              MASTER(cr) && mdrunOptions.verbose,
1849                                              bRerunMD);
1850
1851             if (bNeedRepartition && DOMAINDECOMP(cr))
1852             {
1853                 dd_collect_state(cr->dd, state, state_global);
1854             }
1855         }
1856
1857         /* Replica exchange */
1858         bExchanged = FALSE;
1859         if (bDoReplEx)
1860         {
1861             bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1862         }
1863
1864         if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1865         {
1866             dd_partition_system(fplog,
1867                                 mdlog,
1868                                 step,
1869                                 cr,
1870                                 TRUE,
1871                                 1,
1872                                 state_global,
1873                                 *top_global,
1874                                 ir,
1875                                 imdSession,
1876                                 pull_work,
1877                                 state,
1878                                 &f,
1879                                 mdAtoms,
1880                                 &top,
1881                                 fr,
1882                                 vsite,
1883                                 constr,
1884                                 nrnb,
1885                                 wcycle,
1886                                 FALSE);
1887             shouldCheckNumberOfBondedInteractions = true;
1888             upd.setNumAtoms(state->natoms);
1889         }
1890
1891         bFirstStep = FALSE;
1892         bInitStep  = FALSE;
1893
1894         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1895         /* With all integrators, except VV, we need to retain the pressure
1896          * at the current step for coupling at the next step.
1897          */
1898         if ((state->flags & (1U << estPRES_PREV))
1899             && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1900         {
1901             /* Store the pressure in t_state for pressure coupling
1902              * at the next MD step.
1903              */
1904             copy_mat(pres, state->pres_prev);
1905         }
1906
1907         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1908
1909         if ((membed != nullptr) && (!bLastStep))
1910         {
1911             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1912         }
1913
1914         cycles = wallcycle_stop(wcycle, ewcSTEP);
1915         if (DOMAINDECOMP(cr) && wcycle)
1916         {
1917             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1918         }
1919
1920         /* increase the MD step number */
1921         step++;
1922         step_rel++;
1923
1924 #if GMX_FAHCORE
1925         if (MASTER(cr))
1926         {
1927             fcReportProgress(ir->nsteps + ir->init_step, step);
1928         }
1929 #endif
1930
1931         resetHandler->resetCounters(
1932                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1933
1934         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1935         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1936     }
1937     /* End of main MD loop */
1938
1939     /* Closing TNG files can include compressing data. Therefore it is good to do that
1940      * before stopping the time measurements. */
1941     mdoutf_tng_close(outf);
1942
1943     /* Stop measuring walltime */
1944     walltime_accounting_end_time(walltime_accounting);
1945
1946     if (!thisRankHasDuty(cr, DUTY_PME))
1947     {
1948         /* Tell the PME only node to finish */
1949         gmx_pme_send_finish(cr);
1950     }
1951
1952     if (MASTER(cr))
1953     {
1954         if (ir->nstcalcenergy > 0)
1955         {
1956             energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1957
1958             gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1959             energyOutput.printAverages(fplog, groups);
1960         }
1961     }
1962     done_mdoutf(outf);
1963
1964     if (bPMETune)
1965     {
1966         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1967     }
1968
1969     done_shellfc(fplog, shellfc, step_rel);
1970
1971     if (useReplicaExchange && MASTER(cr))
1972     {
1973         print_replica_exchange_statistics(fplog, repl_ex);
1974     }
1975
1976     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1977
1978     global_stat_destroy(gstat);
1979 }