04cc254287673b41b8b8e5476aa03d8863336fce
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53 #include <numeric>
54
55 #include "gromacs/applied_forces/awh/awh.h"
56 #include "gromacs/commandline/filenm.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlbtiming.h"
59 #include "gromacs/domdec/domdec.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/domdec_struct.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/mdsetup.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/essentialdynamics/edsam.h"
66 #include "gromacs/ewald/pme_load_balancing.h"
67 #include "gromacs/ewald/pme_pp.h"
68 #include "gromacs/fileio/trxio.h"
69 #include "gromacs/gmxlib/network.h"
70 #include "gromacs/gmxlib/nrnb.h"
71 #include "gromacs/gpu_utils/device_stream_manager.h"
72 #include "gromacs/gpu_utils/gpu_utils.h"
73 #include "gromacs/imd/imd.h"
74 #include "gromacs/listed_forces/listed_forces.h"
75 #include "gromacs/math/functions.h"
76 #include "gromacs/math/invertmatrix.h"
77 #include "gromacs/math/vec.h"
78 #include "gromacs/math/vectypes.h"
79 #include "gromacs/mdlib/checkpointhandler.h"
80 #include "gromacs/mdlib/compute_io.h"
81 #include "gromacs/mdlib/constr.h"
82 #include "gromacs/mdlib/coupling.h"
83 #include "gromacs/mdlib/ebin.h"
84 #include "gromacs/mdlib/enerdata_utils.h"
85 #include "gromacs/mdlib/energyoutput.h"
86 #include "gromacs/mdlib/expanded.h"
87 #include "gromacs/mdlib/force.h"
88 #include "gromacs/mdlib/force_flags.h"
89 #include "gromacs/mdlib/forcerec.h"
90 #include "gromacs/mdlib/freeenergyparameters.h"
91 #include "gromacs/mdlib/md_support.h"
92 #include "gromacs/mdlib/mdatoms.h"
93 #include "gromacs/mdlib/mdoutf.h"
94 #include "gromacs/mdlib/membed.h"
95 #include "gromacs/mdlib/resethandler.h"
96 #include "gromacs/mdlib/sighandler.h"
97 #include "gromacs/mdlib/simulationsignal.h"
98 #include "gromacs/mdlib/stat.h"
99 #include "gromacs/mdlib/stophandler.h"
100 #include "gromacs/mdlib/tgroup.h"
101 #include "gromacs/mdlib/trajectory_writing.h"
102 #include "gromacs/mdlib/update.h"
103 #include "gromacs/mdlib/update_constrain_gpu.h"
104 #include "gromacs/mdlib/update_vv.h"
105 #include "gromacs/mdlib/vcm.h"
106 #include "gromacs/mdlib/vsite.h"
107 #include "gromacs/mdrunutility/handlerestart.h"
108 #include "gromacs/mdrunutility/multisim.h"
109 #include "gromacs/mdrunutility/printtime.h"
110 #include "gromacs/mdtypes/awh_history.h"
111 #include "gromacs/mdtypes/awh_params.h"
112 #include "gromacs/mdtypes/commrec.h"
113 #include "gromacs/mdtypes/df_history.h"
114 #include "gromacs/mdtypes/energyhistory.h"
115 #include "gromacs/mdtypes/fcdata.h"
116 #include "gromacs/mdtypes/forcebuffers.h"
117 #include "gromacs/mdtypes/forcerec.h"
118 #include "gromacs/mdtypes/group.h"
119 #include "gromacs/mdtypes/inputrec.h"
120 #include "gromacs/mdtypes/interaction_const.h"
121 #include "gromacs/mdtypes/md_enums.h"
122 #include "gromacs/mdtypes/mdatom.h"
123 #include "gromacs/mdtypes/mdrunoptions.h"
124 #include "gromacs/mdtypes/multipletimestepping.h"
125 #include "gromacs/mdtypes/observableshistory.h"
126 #include "gromacs/mdtypes/pullhistory.h"
127 #include "gromacs/mdtypes/simulation_workload.h"
128 #include "gromacs/mdtypes/state.h"
129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
130 #include "gromacs/modularsimulator/energydata.h"
131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
132 #include "gromacs/nbnxm/nbnxm.h"
133 #include "gromacs/pbcutil/pbc.h"
134 #include "gromacs/pulling/output.h"
135 #include "gromacs/pulling/pull.h"
136 #include "gromacs/swap/swapcoords.h"
137 #include "gromacs/timing/wallcycle.h"
138 #include "gromacs/timing/walltime_accounting.h"
139 #include "gromacs/topology/atoms.h"
140 #include "gromacs/topology/idef.h"
141 #include "gromacs/topology/mtop_util.h"
142 #include "gromacs/topology/topology.h"
143 #include "gromacs/trajectory/trajectoryframe.h"
144 #include "gromacs/utility/basedefinitions.h"
145 #include "gromacs/utility/cstringutil.h"
146 #include "gromacs/utility/fatalerror.h"
147 #include "gromacs/utility/logger.h"
148 #include "gromacs/utility/real.h"
149 #include "gromacs/utility/smalloc.h"
150
151 #include "legacysimulator.h"
152 #include "replicaexchange.h"
153 #include "shellfc.h"
154
155 using gmx::SimulationSignaller;
156
157 void gmx::LegacySimulator::do_md()
158 {
159     // TODO Historically, the EM and MD "integrators" used different
160     // names for the t_inputrec *parameter, but these must have the
161     // same name, now that it's a member of a struct. We use this ir
162     // alias to avoid a large ripple of nearly useless changes.
163     // t_inputrec is being replaced by IMdpOptionsProvider, so this
164     // will go away eventually.
165     const t_inputrec* ir = inputrec;
166
167     int64_t      step, step_rel;
168     double       t, t0 = ir->init_t;
169     gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
170     gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
171     gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
172     gmx_bool     do_ene, do_log, do_verbose;
173     gmx_bool     bMasterState;
174     unsigned int force_flags;
175     tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
176     int    i, m;
177     rvec   mu_tot;
178     matrix pressureCouplingMu, M;
179     gmx_repl_ex_t     repl_ex = nullptr;
180     gmx_global_stat_t gstat;
181     gmx_shellfc_t*    shellfc;
182     gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183     gmx_bool          bTrotter;
184     real              dvdl_constr;
185     std::vector<RVec> cbuf;
186     matrix            lastbox;
187     int               lamnew = 0;
188     /* for FEP */
189     int       nstfep = 0;
190     double    cycles;
191     real      saved_conserved_quantity = 0;
192     real      last_ekin                = 0;
193     t_extmass MassQ;
194     char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
195
196     /* PME load balancing data for GPU kernels */
197     gmx_bool bPMETune         = FALSE;
198     gmx_bool bPMETunePrinting = FALSE;
199
200     bool bInteractiveMDstep = false;
201
202     /* Domain decomposition could incorrectly miss a bonded
203        interaction, but checking for that requires a global
204        communication stage, which does not otherwise happen in DD
205        code. So we do that alongside the first global energy reduction
206        after a new DD is made. These variables handle whether the
207        check happens, and the result it returns. */
208     bool shouldCheckNumberOfBondedInteractions = false;
209     int  totalNumberOfBondedInteractions       = -1;
210
211     SimulationSignals signals;
212     // Most global communnication stages don't propagate mdrun
213     // signals, and will use this object to achieve that.
214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
215
216     if (!mdrunOptions.writeConfout)
217     {
218         // This is on by default, and the main known use case for
219         // turning it off is for convenience in benchmarking, which is
220         // something that should not show up in the general user
221         // interface.
222         GMX_LOG(mdlog.info)
223                 .asParagraph()
224                 .appendText(
225                         "The -noconfout functionality is deprecated, and may be removed in a "
226                         "future version.");
227     }
228
229     /* md-vv uses averaged full step velocities for T-control
230        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
231        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
232     bTrotter = (EI_VV(ir->eI)
233                 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
234
235     const bool bRerunMD = false;
236
237     int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
238     bGStatEveryStep   = (nstglobalcomm == 1);
239
240     const SimulationGroups* groups = &top_global->groups;
241
242     std::unique_ptr<EssentialDynamics> ed = nullptr;
243     if (opt2bSet("-ei", nfile, fnm))
244     {
245         /* Initialize essential dynamics sampling */
246         ed = init_edsam(mdlog,
247                         opt2fn_null("-ei", nfile, fnm),
248                         opt2fn("-eo", nfile, fnm),
249                         top_global,
250                         ir,
251                         cr,
252                         constr,
253                         state_global,
254                         observablesHistory,
255                         oenv,
256                         startingBehavior);
257     }
258     else if (observablesHistory->edsamHistory)
259     {
260         gmx_fatal(FARGS,
261                   "The checkpoint is from a run with essential dynamics sampling, "
262                   "but the current run did not specify the -ei option. "
263                   "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
264     }
265
266     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
267     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
268     initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
269     Update upd(*ir, deform);
270     bool   doSimulatedAnnealing = false;
271     {
272         // TODO: Avoid changing inputrec (#3854)
273         // Simulated annealing updates the reference temperature.
274         auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
275         doSimulatedAnnealing   = initSimulatedAnnealing(nonConstInputrec, &upd);
276     }
277     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
278
279     const t_fcdata& fcdata = *fr->fcdata;
280
281     bool simulationsShareState = false;
282     int  nstSignalComm         = nstglobalcomm;
283     {
284         // TODO This implementation of ensemble orientation restraints is nasty because
285         // a user can't just do multi-sim with single-sim orientation restraints.
286         bool usingEnsembleRestraints =
287                 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
288         bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
289
290         // Replica exchange, ensemble restraints and AWH need all
291         // simulations to remain synchronized, so they need
292         // checkpoints and stop conditions to act on the same step, so
293         // the propagation of such signals must take place between
294         // simulations, not just within simulations.
295         // TODO: Make algorithm initializers set these flags.
296         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
297
298         if (simulationsShareState)
299         {
300             // Inter-simulation signal communication does not need to happen
301             // often, so we use a minimum of 200 steps to reduce overhead.
302             const int c_minimumInterSimulationSignallingInterval = 200;
303             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
304                             * nstglobalcomm;
305         }
306     }
307
308     if (startingBehavior != StartingBehavior::RestartWithAppending)
309     {
310         pleaseCiteCouplingAlgorithms(fplog, *ir);
311     }
312     gmx_mdoutf*       outf = init_mdoutf(fplog,
313                                    nfile,
314                                    fnm,
315                                    mdrunOptions,
316                                    cr,
317                                    outputProvider,
318                                    mdModulesNotifier,
319                                    ir,
320                                    top_global,
321                                    oenv,
322                                    wcycle,
323                                    startingBehavior,
324                                    simulationsShareState,
325                                    ms);
326     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
327                                    top_global,
328                                    ir,
329                                    pull_work,
330                                    mdoutf_get_fp_dhdl(outf),
331                                    false,
332                                    startingBehavior,
333                                    simulationsShareState,
334                                    mdModulesNotifier);
335
336     gstat = global_stat_init(ir);
337
338     const auto& simulationWork     = runScheduleWork->simulationWork;
339     const bool  useGpuForPme       = simulationWork.useGpuPme;
340     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
341     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
342     const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
343
344     /* Check for polarizable models and flexible constraints */
345     shellfc = init_shell_flexcon(fplog,
346                                  top_global,
347                                  constr ? constr->numFlexibleConstraints() : 0,
348                                  ir->nstcalcenergy,
349                                  DOMAINDECOMP(cr),
350                                  useGpuForPme);
351
352     {
353         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
354         if ((io > 2000) && MASTER(cr))
355         {
356             fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
357         }
358     }
359
360     // Local state only becomes valid now.
361     std::unique_ptr<t_state> stateInstance;
362     t_state*                 state;
363
364     gmx_localtop_t top(top_global->ffparams);
365
366     auto mdatoms = mdAtoms->mdatoms();
367
368     ForceBuffers f(fr->useMts,
369                    ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
370                            ? PinningPolicy::PinnedIfSupported
371                            : PinningPolicy::CannotBePinned);
372     if (DOMAINDECOMP(cr))
373     {
374         stateInstance = std::make_unique<t_state>();
375         state         = stateInstance.get();
376         dd_init_local_state(cr->dd, state_global, state);
377
378         /* Distribute the charge groups over the nodes from the master node */
379         dd_partition_system(fplog,
380                             mdlog,
381                             ir->init_step,
382                             cr,
383                             TRUE,
384                             1,
385                             state_global,
386                             *top_global,
387                             ir,
388                             imdSession,
389                             pull_work,
390                             state,
391                             &f,
392                             mdAtoms,
393                             &top,
394                             fr,
395                             vsite,
396                             constr,
397                             nrnb,
398                             nullptr,
399                             FALSE);
400         shouldCheckNumberOfBondedInteractions = true;
401         upd.setNumAtoms(state->natoms);
402     }
403     else
404     {
405         state_change_natoms(state_global, state_global->natoms);
406         /* Copy the pointer to the global state */
407         state = state_global;
408
409         /* Generate and initialize new topology */
410         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
411
412         upd.setNumAtoms(state->natoms);
413     }
414
415     std::unique_ptr<UpdateConstrainGpu> integrator;
416
417     StatePropagatorDataGpu* stateGpu = fr->stateGpu;
418
419     // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
420     if (useGpuForUpdate)
421     {
422         GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
423                                    || constr->numConstraintsTotal() == 0,
424                            "Constraints in domain decomposition are only supported with update "
425                            "groups if using GPU update.\n");
426         GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
427                                    || constr->numConstraintsTotal() == 0,
428                            "SHAKE is not supported with GPU update.");
429         GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
430                            "Either PME or short-ranged non-bonded interaction tasks must run on "
431                            "the GPU to use GPU update.\n");
432         GMX_RELEASE_ASSERT(ir->eI == eiMD,
433                            "Only the md integrator is supported with the GPU update.\n");
434         GMX_RELEASE_ASSERT(
435                 ir->etc != etcNOSEHOOVER,
436                 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
437         GMX_RELEASE_ASSERT(
438                 ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN
439                         || ir->epc == epcCRESCALE,
440                 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
441                 "with the GPU update.\n");
442         GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
443                            "Virtual sites are not supported with the GPU update.\n");
444         GMX_RELEASE_ASSERT(ed == nullptr,
445                            "Essential dynamics is not supported with the GPU update.\n");
446         GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
447                            "Constraints pulling is not supported with the GPU update.\n");
448         GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
449                            "Orientation restraints are not supported with the GPU update.\n");
450         GMX_RELEASE_ASSERT(
451                 ir->efep == efepNO
452                         || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)),
453                 "Free energy perturbation of masses and constraints are not supported with the GPU "
454                 "update.");
455
456         if (constr != nullptr && constr->numConstraintsTotal() > 0)
457         {
458             GMX_LOG(mdlog.info)
459                     .asParagraph()
460                     .appendText("Updating coordinates and applying constraints on the GPU.");
461         }
462         else
463         {
464             GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
465         }
466         GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
467                            "Device stream manager should be initialized in order to use GPU "
468                            "update-constraints.");
469         GMX_RELEASE_ASSERT(
470                 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
471                 "Update stream should be initialized in order to use GPU "
472                 "update-constraints.");
473         integrator = std::make_unique<UpdateConstrainGpu>(
474                 *ir,
475                 *top_global,
476                 fr->deviceStreamManager->context(),
477                 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
478                 stateGpu->xUpdatedOnDevice(),
479                 wcycle);
480
481         integrator->setPbc(PbcType::Xyz, state->box);
482     }
483
484     if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
485     {
486         changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
487     }
488     if (useGpuForUpdate)
489     {
490         changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
491     }
492
493     // NOTE: The global state is no longer used at this point.
494     // But state_global is still used as temporary storage space for writing
495     // the global state to file and potentially for replica exchange.
496     // (Global topology should persist.)
497
498     update_mdatoms(mdatoms, state->lambda[efptMASS]);
499
500     if (ir->bExpanded)
501     {
502         /* Check nstexpanded here, because the grompp check was broken */
503         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
504         {
505             gmx_fatal(FARGS,
506                       "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
507         }
508         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
509     }
510
511     if (MASTER(cr))
512     {
513         EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
514     }
515
516     preparePrevStepPullCom(
517             ir, pull_work, mdatoms->massT, state, state_global, cr, startingBehavior != StartingBehavior::NewSimulation);
518
519     // TODO: Remove this by converting AWH into a ForceProvider
520     auto awh = prepareAwhModule(fplog,
521                                 *ir,
522                                 state_global,
523                                 cr,
524                                 ms,
525                                 startingBehavior != StartingBehavior::NewSimulation,
526                                 shellfc != nullptr,
527                                 opt2fn("-awh", nfile, fnm),
528                                 pull_work);
529
530     if (useReplicaExchange && MASTER(cr))
531     {
532         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
533     }
534     /* PME tuning is only supported in the Verlet scheme, with PME for
535      * Coulomb. It is not supported with only LJ PME. */
536     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
537                 && ir->cutoff_scheme != ecutsGROUP);
538
539     pme_load_balancing_t* pme_loadbal = nullptr;
540     if (bPMETune)
541     {
542         pme_loadbal_init(
543                 &pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu());
544     }
545
546     if (!ir->bContinuation)
547     {
548         if (state->flags & (1U << estV))
549         {
550             auto v = makeArrayRef(state->v);
551             /* Set the velocities of vsites, shells and frozen atoms to zero */
552             for (i = 0; i < mdatoms->homenr; i++)
553             {
554                 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
555                 {
556                     clear_rvec(v[i]);
557                 }
558                 else if (mdatoms->cFREEZE)
559                 {
560                     for (m = 0; m < DIM; m++)
561                     {
562                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
563                         {
564                             v[i][m] = 0;
565                         }
566                     }
567                 }
568             }
569         }
570
571         if (constr)
572         {
573             /* Constrain the initial coordinates and velocities */
574             do_constrain_first(fplog,
575                                constr,
576                                ir,
577                                mdatoms->nr,
578                                mdatoms->homenr,
579                                state->x.arrayRefWithPadding(),
580                                state->v.arrayRefWithPadding(),
581                                state->box,
582                                state->lambda[efptBONDED]);
583         }
584         if (vsite)
585         {
586             /* Construct the virtual sites for the initial configuration */
587             vsite->construct(state->x, ir->delta_t, {}, state->box);
588         }
589     }
590
591     if (ir->efep != efepNO)
592     {
593         /* Set free energy calculation frequency as the greatest common
594          * denominator of nstdhdl and repl_ex_nst. */
595         nstfep = ir->fepvals->nstdhdl;
596         if (ir->bExpanded)
597         {
598             nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
599         }
600         if (useReplicaExchange)
601         {
602             nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
603         }
604         if (ir->bDoAwh)
605         {
606             nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep);
607         }
608     }
609
610     /* Be REALLY careful about what flags you set here. You CANNOT assume
611      * this is the first step, since we might be restarting from a checkpoint,
612      * and in that case we should not do any modifications to the state.
613      */
614     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
615
616     // When restarting from a checkpoint, it can be appropriate to
617     // initialize ekind from quantities in the checkpoint. Otherwise,
618     // compute_globals must initialize ekind before the simulation
619     // starts/restarts. However, only the master rank knows what was
620     // found in the checkpoint file, so we have to communicate in
621     // order to coordinate the restart.
622     //
623     // TODO Consider removing this communication if/when checkpoint
624     // reading directly follows .tpr reading, because all ranks can
625     // agree on hasReadEkinState at that time.
626     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
627     if (PAR(cr))
628     {
629         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
630     }
631     if (hasReadEkinState)
632     {
633         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
634     }
635
636     unsigned int cglo_flags =
637             (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
638              | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
639
640     bSumEkinhOld = FALSE;
641
642     t_vcm vcm(top_global->groups, *ir);
643     reportComRemovalInfo(fplog, vcm);
644
645     /* To minimize communication, compute_globals computes the COM velocity
646      * and the kinetic energy for the velocities without COM motion removed.
647      * Thus to get the kinetic energy without the COM contribution, we need
648      * to call compute_globals twice.
649      */
650     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
651     {
652         unsigned int cglo_flags_iteration = cglo_flags;
653         if (bStopCM && cgloIteration == 0)
654         {
655             cglo_flags_iteration |= CGLO_STOPCM;
656             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
657         }
658         compute_globals(gstat,
659                         cr,
660                         ir,
661                         fr,
662                         ekind,
663                         makeConstArrayRef(state->x),
664                         makeConstArrayRef(state->v),
665                         state->box,
666                         mdatoms,
667                         nrnb,
668                         &vcm,
669                         nullptr,
670                         enerd,
671                         force_vir,
672                         shake_vir,
673                         total_vir,
674                         pres,
675                         gmx::ArrayRef<real>{},
676                         &nullSignaller,
677                         state->box,
678                         &totalNumberOfBondedInteractions,
679                         &bSumEkinhOld,
680                         cglo_flags_iteration
681                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
682                                                                          : 0));
683         if (cglo_flags_iteration & CGLO_STOPCM)
684         {
685             /* At initialization, do not pass x with acceleration-correction mode
686              * to avoid (incorrect) correction of the initial coordinates.
687              */
688             auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
689                                                                      : makeArrayRef(state->x);
690             process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
691             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
692         }
693     }
694     checkNumberOfBondedInteractions(mdlog,
695                                     cr,
696                                     totalNumberOfBondedInteractions,
697                                     top_global,
698                                     &top,
699                                     makeConstArrayRef(state->x),
700                                     state->box,
701                                     &shouldCheckNumberOfBondedInteractions);
702     if (ir->eI == eiVVAK)
703     {
704         /* a second call to get the half step temperature initialized as well */
705         /* we do the same call as above, but turn the pressure off -- internally to
706            compute_globals, this is recognized as a velocity verlet half-step
707            kinetic energy calculation.  This minimized excess variables, but
708            perhaps loses some logic?*/
709
710         compute_globals(gstat,
711                         cr,
712                         ir,
713                         fr,
714                         ekind,
715                         makeConstArrayRef(state->x),
716                         makeConstArrayRef(state->v),
717                         state->box,
718                         mdatoms,
719                         nrnb,
720                         &vcm,
721                         nullptr,
722                         enerd,
723                         force_vir,
724                         shake_vir,
725                         total_vir,
726                         pres,
727                         gmx::ArrayRef<real>{},
728                         &nullSignaller,
729                         state->box,
730                         nullptr,
731                         &bSumEkinhOld,
732                         cglo_flags & ~CGLO_PRESSURE);
733     }
734
735     /* Calculate the initial half step temperature, and save the ekinh_old */
736     if (startingBehavior == StartingBehavior::NewSimulation)
737     {
738         for (i = 0; (i < ir->opts.ngtc); i++)
739         {
740             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
741         }
742     }
743
744     /* need to make an initiation call to get the Trotter variables set, as well as other constants
745        for non-trotter temperature control */
746     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
747
748     if (MASTER(cr))
749     {
750         if (!ir->bContinuation)
751         {
752             if (constr && ir->eConstrAlg == econtLINCS)
753             {
754                 fprintf(fplog,
755                         "RMS relative constraint deviation after constraining: %.2e\n",
756                         constr->rmsd());
757             }
758             if (EI_STATE_VELOCITY(ir->eI))
759             {
760                 real temp = enerd->term[F_TEMP];
761                 if (ir->eI != eiVV)
762                 {
763                     /* Result of Ekin averaged over velocities of -half
764                      * and +half step, while we only have -half step here.
765                      */
766                     temp *= 2;
767                 }
768                 fprintf(fplog, "Initial temperature: %g K\n", temp);
769             }
770         }
771
772         char tbuf[20];
773         fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
774         if (ir->nsteps >= 0)
775         {
776             sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
777         }
778         else
779         {
780             sprintf(tbuf, "%s", "infinite");
781         }
782         if (ir->init_step > 0)
783         {
784             fprintf(stderr,
785                     "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
786                     gmx_step_str(ir->init_step + ir->nsteps, sbuf),
787                     tbuf,
788                     gmx_step_str(ir->init_step, sbuf2),
789                     ir->init_step * ir->delta_t);
790         }
791         else
792         {
793             fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
794         }
795         fprintf(fplog, "\n");
796     }
797
798     walltime_accounting_start_time(walltime_accounting);
799     wallcycle_start(wcycle, ewcRUN);
800     print_start(fplog, cr, walltime_accounting, "mdrun");
801
802     /***********************************************************
803      *
804      *             Loop over MD steps
805      *
806      ************************************************************/
807
808     bFirstStep = TRUE;
809     /* Skip the first Nose-Hoover integration when we get the state from tpx */
810     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
811     bSumEkinhOld     = FALSE;
812     bExchanged       = FALSE;
813     bNeedRepartition = FALSE;
814
815     step     = ir->init_step;
816     step_rel = 0;
817
818     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
819             compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
820             simulationsShareState,
821             MASTER(cr),
822             ir->nstlist,
823             mdrunOptions.reproducible,
824             nstSignalComm,
825             mdrunOptions.maximumHoursToRun,
826             ir->nstlist == 0,
827             fplog,
828             step,
829             bNS,
830             walltime_accounting);
831
832     auto checkpointHandler = std::make_unique<CheckpointHandler>(
833             compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
834             simulationsShareState,
835             ir->nstlist == 0,
836             MASTER(cr),
837             mdrunOptions.writeConfout,
838             mdrunOptions.checkpointOptions.period);
839
840     const bool resetCountersIsLocal = true;
841     auto       resetHandler         = std::make_unique<ResetHandler>(
842             compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
843             !resetCountersIsLocal,
844             ir->nsteps,
845             MASTER(cr),
846             mdrunOptions.timingOptions.resetHalfway,
847             mdrunOptions.maximumHoursToRun,
848             mdlog,
849             wcycle,
850             walltime_accounting);
851
852     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
853
854     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
855     {
856         logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
857     }
858
859     /* and stop now if we should */
860     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
861     while (!bLastStep)
862     {
863
864         /* Determine if this is a neighbor search step */
865         bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
866
867         if (bPMETune && bNStList)
868         {
869             // This has to be here because PME load balancing is called so early.
870             // TODO: Move to after all booleans are defined.
871             if (useGpuForUpdate && !bFirstStep)
872             {
873                 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
874                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
875             }
876             /* PME grid + cut-off optimization with GPUs or PME nodes */
877             pme_loadbal_do(pme_loadbal,
878                            cr,
879                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
880                            fplog,
881                            mdlog,
882                            *ir,
883                            fr,
884                            state->box,
885                            state->x,
886                            wcycle,
887                            step,
888                            step_rel,
889                            &bPMETunePrinting,
890                            simulationWork.useGpuPmePpCommunication);
891         }
892
893         wallcycle_start(wcycle, ewcSTEP);
894
895         bLastStep = (step_rel == ir->nsteps);
896         t         = t0 + step * ir->delta_t;
897
898         // TODO Refactor this, so that nstfep does not need a default value of zero
899         if (ir->efep != efepNO || ir->bSimTemp)
900         {
901             /* find and set the current lambdas */
902             state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
903
904             bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
905             bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
906             bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
907                            && (!bFirstStep));
908         }
909
910         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
911                      && do_per_step(step, replExParams.exchangeInterval));
912
913         if (doSimulatedAnnealing)
914         {
915             // TODO: Avoid changing inputrec (#3854)
916             // Simulated annealing updates the reference temperature.
917             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
918             update_annealing_target_temp(nonConstInputrec, t, &upd);
919         }
920
921         /* Stop Center of Mass motion */
922         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
923
924         /* Determine whether or not to do Neighbour Searching */
925         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
926
927         /* Note that the stopHandler will cause termination at nstglobalcomm
928          * steps. Since this concides with nstcalcenergy, nsttcouple and/or
929          * nstpcouple steps, we have computed the half-step kinetic energy
930          * of the previous step and can always output energies at the last step.
931          */
932         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
933
934         /* do_log triggers energy and virial calculation. Because this leads
935          * to different code paths, forces can be different. Thus for exact
936          * continuation we should avoid extra log output.
937          * Note that the || bLastStep can result in non-exact continuation
938          * beyond the last step. But we don't consider that to be an issue.
939          */
940         do_log     = (do_per_step(step, ir->nstlog)
941                   || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
942         do_verbose = mdrunOptions.verbose
943                      && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
944
945         if (useGpuForUpdate && !bFirstStep && bNS)
946         {
947             // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
948             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
949             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
950             // Copy coordinate from the GPU when needed at the search step.
951             // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
952             // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
953             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
954             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
955         }
956
957         if (bNS && !(bFirstStep && ir->bContinuation))
958         {
959             bMasterState = FALSE;
960             /* Correct the new box if it is too skewed */
961             if (inputrecDynamicBox(ir))
962             {
963                 if (correct_box(fplog, step, state->box))
964                 {
965                     bMasterState = TRUE;
966                     // If update is offloaded, it should be informed about the box size change
967                     if (useGpuForUpdate)
968                     {
969                         integrator->setPbc(PbcType::Xyz, state->box);
970                     }
971                 }
972             }
973             if (DOMAINDECOMP(cr) && bMasterState)
974             {
975                 dd_collect_state(cr->dd, state, state_global);
976             }
977
978             if (DOMAINDECOMP(cr))
979             {
980                 /* Repartition the domain decomposition */
981                 dd_partition_system(fplog,
982                                     mdlog,
983                                     step,
984                                     cr,
985                                     bMasterState,
986                                     nstglobalcomm,
987                                     state_global,
988                                     *top_global,
989                                     ir,
990                                     imdSession,
991                                     pull_work,
992                                     state,
993                                     &f,
994                                     mdAtoms,
995                                     &top,
996                                     fr,
997                                     vsite,
998                                     constr,
999                                     nrnb,
1000                                     wcycle,
1001                                     do_verbose && !bPMETunePrinting);
1002                 shouldCheckNumberOfBondedInteractions = true;
1003                 upd.setNumAtoms(state->natoms);
1004             }
1005         }
1006
1007         // Allocate or re-size GPU halo exchange object, if necessary
1008         if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
1009         {
1010             GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
1011                                "GPU device manager has to be initialized to use GPU "
1012                                "version of halo exchange.");
1013             constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
1014         }
1015
1016         if (MASTER(cr) && do_log)
1017         {
1018             gmx::EnergyOutput::printHeader(
1019                     fplog, step, t); /* can we improve the information printed here? */
1020         }
1021
1022         if (ir->efep != efepNO)
1023         {
1024             update_mdatoms(mdatoms, state->lambda[efptMASS]);
1025         }
1026
1027         if (bExchanged)
1028         {
1029             /* We need the kinetic energy at minus the half step for determining
1030              * the full step kinetic energy and possibly for T-coupling.*/
1031             /* This may not be quite working correctly yet . . . . */
1032             compute_globals(gstat,
1033                             cr,
1034                             ir,
1035                             fr,
1036                             ekind,
1037                             makeConstArrayRef(state->x),
1038                             makeConstArrayRef(state->v),
1039                             state->box,
1040                             mdatoms,
1041                             nrnb,
1042                             &vcm,
1043                             wcycle,
1044                             enerd,
1045                             nullptr,
1046                             nullptr,
1047                             nullptr,
1048                             nullptr,
1049                             gmx::ArrayRef<real>{},
1050                             &nullSignaller,
1051                             state->box,
1052                             &totalNumberOfBondedInteractions,
1053                             &bSumEkinhOld,
1054                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
1055             checkNumberOfBondedInteractions(mdlog,
1056                                             cr,
1057                                             totalNumberOfBondedInteractions,
1058                                             top_global,
1059                                             &top,
1060                                             makeConstArrayRef(state->x),
1061                                             state->box,
1062                                             &shouldCheckNumberOfBondedInteractions);
1063         }
1064         clear_mat(force_vir);
1065
1066         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1067
1068         /* Determine the energy and pressure:
1069          * at nstcalcenergy steps and at energy output steps (set below).
1070          */
1071         if (EI_VV(ir->eI) && (!bInitStep))
1072         {
1073             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1074             bCalcVir      = bCalcEnerStep
1075                        || (ir->epc != epcNO
1076                            && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1077         }
1078         else
1079         {
1080             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1081             bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
1082         }
1083         bCalcEner = bCalcEnerStep;
1084
1085         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1086
1087         if (do_ene || do_log || bDoReplEx)
1088         {
1089             bCalcVir  = TRUE;
1090             bCalcEner = TRUE;
1091         }
1092
1093         /* Do we need global communication ? */
1094         bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1095                   || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1096
1097         force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1098                        | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1099                        | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
1100         if (fr->useMts && !do_per_step(step, ir->nstfout))
1101         {
1102             force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1103         }
1104
1105         if (shellfc)
1106         {
1107             /* Now is the time to relax the shells */
1108             relax_shell_flexcon(fplog,
1109                                 cr,
1110                                 ms,
1111                                 mdrunOptions.verbose,
1112                                 enforcedRotation,
1113                                 step,
1114                                 ir,
1115                                 imdSession,
1116                                 pull_work,
1117                                 bNS,
1118                                 force_flags,
1119                                 &top,
1120                                 constr,
1121                                 enerd,
1122                                 state->natoms,
1123                                 state->x.arrayRefWithPadding(),
1124                                 state->v.arrayRefWithPadding(),
1125                                 state->box,
1126                                 state->lambda,
1127                                 &state->hist,
1128                                 &f.view(),
1129                                 force_vir,
1130                                 mdatoms,
1131                                 nrnb,
1132                                 wcycle,
1133                                 shellfc,
1134                                 fr,
1135                                 runScheduleWork,
1136                                 t,
1137                                 mu_tot,
1138                                 vsite,
1139                                 ddBalanceRegionHandler);
1140         }
1141         else
1142         {
1143             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1144                is updated (or the AWH update will be performed twice for one step when continuing).
1145                It would be best to call this update function from do_md_trajectory_writing but that
1146                would occur after do_force. One would have to divide the update_awh function into one
1147                function applying the AWH force and one doing the AWH bias update. The update AWH
1148                bias function could then be called after do_md_trajectory_writing (then containing
1149                update_awh_history). The checkpointing will in the future probably moved to the start
1150                of the md loop which will rid of this issue. */
1151             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1152             {
1153                 awh->updateHistory(state_global->awhHistory.get());
1154             }
1155
1156             /* The coordinates (x) are shifted (to get whole molecules)
1157              * in do_force.
1158              * This is parallellized as well, and does communication too.
1159              * Check comments in sim_util.c
1160              */
1161             do_force(fplog,
1162                      cr,
1163                      ms,
1164                      ir,
1165                      awh.get(),
1166                      enforcedRotation,
1167                      imdSession,
1168                      pull_work,
1169                      step,
1170                      nrnb,
1171                      wcycle,
1172                      &top,
1173                      state->box,
1174                      state->x.arrayRefWithPadding(),
1175                      &state->hist,
1176                      &f.view(),
1177                      force_vir,
1178                      mdatoms,
1179                      enerd,
1180                      state->lambda,
1181                      fr,
1182                      runScheduleWork,
1183                      vsite,
1184                      mu_tot,
1185                      t,
1186                      ed ? ed->getLegacyED() : nullptr,
1187                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
1188                      ddBalanceRegionHandler);
1189         }
1190
1191         // VV integrators do not need the following velocity half step
1192         // if it is the first step after starting from a checkpoint.
1193         // That is, the half step is needed on all other steps, and
1194         // also the first step when starting from a .tpr file.
1195         if (EI_VV(ir->eI))
1196         {
1197             integrateVVFirstStep(step,
1198                                  bFirstStep,
1199                                  bInitStep,
1200                                  startingBehavior,
1201                                  nstglobalcomm,
1202                                  ir,
1203                                  fr,
1204                                  cr,
1205                                  state,
1206                                  mdatoms,
1207                                  fcdata,
1208                                  &MassQ,
1209                                  &vcm,
1210                                  top_global,
1211                                  top,
1212                                  enerd,
1213                                  ekind,
1214                                  gstat,
1215                                  &last_ekin,
1216                                  bCalcVir,
1217                                  total_vir,
1218                                  shake_vir,
1219                                  force_vir,
1220                                  pres,
1221                                  M,
1222                                  do_log,
1223                                  do_ene,
1224                                  bCalcEner,
1225                                  bGStat,
1226                                  bStopCM,
1227                                  bTrotter,
1228                                  bExchanged,
1229                                  &bSumEkinhOld,
1230                                  &shouldCheckNumberOfBondedInteractions,
1231                                  &saved_conserved_quantity,
1232                                  &f,
1233                                  &upd,
1234                                  constr,
1235                                  &nullSignaller,
1236                                  trotter_seq,
1237                                  nrnb,
1238                                  mdlog,
1239                                  fplog,
1240                                  wcycle);
1241         }
1242
1243         /* ########  END FIRST UPDATE STEP  ############## */
1244         /* ########  If doing VV, we now have v(dt) ###### */
1245         if (bDoExpanded)
1246         {
1247             /* perform extended ensemble sampling in lambda - we don't
1248                actually move to the new state before outputting
1249                statistics, but if performing simulated tempering, we
1250                do update the velocities and the tau_t. */
1251             // TODO: Avoid changing inputrec (#3854)
1252             // Simulated tempering updates the reference temperature.
1253             // Expanded ensemble without simulated tempering does not change the inputrec.
1254             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
1255             lamnew                 = ExpandedEnsembleDynamics(fplog,
1256                                               nonConstInputrec,
1257                                               enerd,
1258                                               state,
1259                                               &MassQ,
1260                                               state->fep_state,
1261                                               state->dfhist,
1262                                               step,
1263                                               state->v.rvec_array(),
1264                                               mdatoms);
1265             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1266             if (MASTER(cr))
1267             {
1268                 copy_df_history(state_global->dfhist, state->dfhist);
1269             }
1270         }
1271
1272         // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1273         // coordinates have not already been copied for i) search or ii) CPU force tasks.
1274         if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1275             && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1276                 || checkpointHandler->isCheckpointingStep()))
1277         {
1278             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1279             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1280         }
1281         // Copy velocities if needed for the output/checkpointing.
1282         // NOTE: Copy on the search steps is done at the beginning of the step.
1283         if (useGpuForUpdate && !bNS
1284             && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1285         {
1286             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1287             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1288         }
1289         // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1290         // and update is offloaded hence forces are kept on the GPU for update and have not been
1291         // already transferred in do_force().
1292         // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1293         //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
1294         //       prior to GPU update.
1295         // TODO: When the output flags will be included in step workload, this copy can be combined with the
1296         //       copy call in do_force(...).
1297         // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1298         //       on host after the D2H copy in do_force(...).
1299         if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1300             && do_per_step(step, ir->nstfout))
1301         {
1302             stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1303             stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1304         }
1305         /* Now we have the energies and forces corresponding to the
1306          * coordinates at time t. We must output all of this before
1307          * the update.
1308          */
1309         do_md_trajectory_writing(fplog,
1310                                  cr,
1311                                  nfile,
1312                                  fnm,
1313                                  step,
1314                                  step_rel,
1315                                  t,
1316                                  ir,
1317                                  state,
1318                                  state_global,
1319                                  observablesHistory,
1320                                  top_global,
1321                                  fr,
1322                                  outf,
1323                                  energyOutput,
1324                                  ekind,
1325                                  f.view().force(),
1326                                  checkpointHandler->isCheckpointingStep(),
1327                                  bRerunMD,
1328                                  bLastStep,
1329                                  mdrunOptions.writeConfout,
1330                                  bSumEkinhOld);
1331         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1332         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1333
1334         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1335         if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1336             && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1337         {
1338             copy_mat(state->svir_prev, shake_vir);
1339             copy_mat(state->fvir_prev, force_vir);
1340         }
1341
1342         stopHandler->setSignal();
1343         resetHandler->setSignal(walltime_accounting);
1344
1345         if (bGStat || !PAR(cr))
1346         {
1347             /* In parallel we only have to check for checkpointing in steps
1348              * where we do global communication,
1349              *  otherwise the other nodes don't know.
1350              */
1351             checkpointHandler->setSignal(walltime_accounting);
1352         }
1353
1354         /* #########   START SECOND UPDATE STEP ################# */
1355
1356         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1357            controlled in preprocessing */
1358
1359         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1360         {
1361             gmx_bool bIfRandomize;
1362             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1363             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1364             if (constr && bIfRandomize)
1365             {
1366                 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1367             }
1368         }
1369         /* Box is changed in update() when we do pressure coupling,
1370          * but we should still use the old box for energy corrections and when
1371          * writing it to the energy file, so it matches the trajectory files for
1372          * the same timestep above. Make a copy in a separate array.
1373          */
1374         copy_mat(state->box, lastbox);
1375
1376         dvdl_constr = 0;
1377
1378         if (!useGpuForUpdate)
1379         {
1380             wallcycle_start(wcycle, ewcUPDATE);
1381         }
1382         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1383         if (bTrotter)
1384         {
1385             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1386             /* We can only do Berendsen coupling after we have summed
1387              * the kinetic energy or virial. Since the happens
1388              * in global_state after update, we should only do it at
1389              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1390              */
1391         }
1392         else
1393         {
1394             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1395             update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1396         }
1397
1398         /* With leap-frog type integrators we compute the kinetic energy
1399          * at a whole time step as the average of the half-time step kinetic
1400          * energies of two subsequent steps. Therefore we need to compute the
1401          * half step kinetic energy also if we need energies at the next step.
1402          */
1403         const bool needHalfStepKineticEnergy =
1404                 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1405
1406         // Parrinello-Rahman requires the pressure to be availible before the update to compute
1407         // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1408         const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
1409                                          && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1410
1411         if (EI_VV(ir->eI))
1412         {
1413             GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1414
1415             integrateVVSecondStep(step,
1416                                   ir,
1417                                   fr,
1418                                   cr,
1419                                   state,
1420                                   mdatoms,
1421                                   fcdata,
1422                                   &MassQ,
1423                                   &vcm,
1424                                   pull_work,
1425                                   enerd,
1426                                   ekind,
1427                                   gstat,
1428                                   &dvdl_constr,
1429                                   bCalcVir,
1430                                   total_vir,
1431                                   shake_vir,
1432                                   force_vir,
1433                                   pres,
1434                                   M,
1435                                   lastbox,
1436                                   do_log,
1437                                   do_ene,
1438                                   bGStat,
1439                                   &bSumEkinhOld,
1440                                   &f,
1441                                   &cbuf,
1442                                   &upd,
1443                                   constr,
1444                                   &nullSignaller,
1445                                   trotter_seq,
1446                                   nrnb,
1447                                   wcycle);
1448         }
1449         else
1450         {
1451             if (useGpuForUpdate)
1452             {
1453
1454                 wallcycle_stop(wcycle, ewcUPDATE);
1455
1456                 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1457                 {
1458                     integrator->set(stateGpu->getCoordinates(),
1459                                     stateGpu->getVelocities(),
1460                                     stateGpu->getForces(),
1461                                     top.idef,
1462                                     *mdatoms,
1463                                     ekind->ngtc);
1464
1465                     // Copy data to the GPU after buffers might have being reinitialized
1466                     stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1467                     stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1468                 }
1469
1470                 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1471                     && !thisRankHasDuty(cr, DUTY_PME))
1472                 {
1473                     // The PME forces were recieved to the host, so have to be copied
1474                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1475                 }
1476                 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1477                 {
1478                     // The buffer ops were not offloaded this step, so the forces are on the
1479                     // host and have to be copied
1480                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1481                 }
1482
1483                 const bool doTemperatureScaling =
1484                         (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1485
1486                 // This applies Leap-Frog, LINCS and SETTLE in succession
1487                 integrator->integrate(
1488                         stateGpu->getForcesReadyOnDeviceEvent(
1489                                 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1490                         ir->delta_t,
1491                         true,
1492                         bCalcVir,
1493                         shake_vir,
1494                         doTemperatureScaling,
1495                         ekind->tcstat,
1496                         doParrinelloRahman,
1497                         ir->nstpcouple * ir->delta_t,
1498                         M);
1499
1500                 // Copy velocities D2H after update if:
1501                 // - Globals are computed this step (includes the energy output steps).
1502                 // - Temperature is needed for the next step.
1503                 if (bGStat || needHalfStepKineticEnergy)
1504                 {
1505                     stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1506                     stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1507                 }
1508             }
1509             else
1510             {
1511                 /* With multiple time stepping we need to do an additional normal
1512                  * update step to obtain the virial, as the actual MTS integration
1513                  * using an acceleration where the slow forces are multiplied by mtsFactor.
1514                  * Using that acceleration would result in a virial with the slow
1515                  * force contribution would be a factor mtsFactor too large.
1516                  */
1517                 if (fr->useMts && bCalcVir && constr != nullptr)
1518                 {
1519                     upd.update_for_constraint_virial(
1520                             *ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
1521
1522                     constrain_coordinates(constr,
1523                                           do_log,
1524                                           do_ene,
1525                                           step,
1526                                           state,
1527                                           upd.xp()->arrayRefWithPadding(),
1528                                           &dvdl_constr,
1529                                           bCalcVir,
1530                                           shake_vir);
1531                 }
1532
1533                 ArrayRefWithPadding<const RVec> forceCombined =
1534                         (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1535                                 ? f.view().forceMtsCombinedWithPadding()
1536                                 : f.view().forceWithPadding();
1537                 upd.update_coords(
1538                         *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
1539
1540                 wallcycle_stop(wcycle, ewcUPDATE);
1541
1542                 constrain_coordinates(constr,
1543                                       do_log,
1544                                       do_ene,
1545                                       step,
1546                                       state,
1547                                       upd.xp()->arrayRefWithPadding(),
1548                                       &dvdl_constr,
1549                                       bCalcVir && !fr->useMts,
1550                                       shake_vir);
1551
1552                 upd.update_sd_second_half(
1553                         *ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, constr, do_log, do_ene);
1554                 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1555             }
1556
1557             if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1558             {
1559                 updatePrevStepPullCom(pull_work, state);
1560             }
1561
1562             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1563         }
1564
1565         if (vsite != nullptr)
1566         {
1567             wallcycle_start(wcycle, ewcVSITECONSTR);
1568             vsite->construct(state->x, ir->delta_t, state->v, state->box);
1569             wallcycle_stop(wcycle, ewcVSITECONSTR);
1570         }
1571
1572         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1573         /* With Leap-Frog we can skip compute_globals at
1574          * non-communication steps, but we need to calculate
1575          * the kinetic energy one step before communication.
1576          */
1577         {
1578             // Organize to do inter-simulation signalling on steps if
1579             // and when algorithms require it.
1580             const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1581
1582             if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1583             {
1584                 // Copy coordinates when needed to stop the CM motion.
1585                 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1586                 {
1587                     stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1588                     stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1589                 }
1590                 // Since we're already communicating at this step, we
1591                 // can propagate intra-simulation signals. Note that
1592                 // check_nstglobalcomm has the responsibility for
1593                 // choosing the value of nstglobalcomm that is one way
1594                 // bGStat becomes true, so we can't get into a
1595                 // situation where e.g. checkpointing can't be
1596                 // signalled.
1597                 bool                doIntraSimSignal = true;
1598                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1599
1600                 compute_globals(
1601                         gstat,
1602                         cr,
1603                         ir,
1604                         fr,
1605                         ekind,
1606                         makeConstArrayRef(state->x),
1607                         makeConstArrayRef(state->v),
1608                         state->box,
1609                         mdatoms,
1610                         nrnb,
1611                         &vcm,
1612                         wcycle,
1613                         enerd,
1614                         force_vir,
1615                         shake_vir,
1616                         total_vir,
1617                         pres,
1618                         (!EI_VV(ir->eI) && bCalcEner && constr != nullptr) ? constr->rmsdData()
1619                                                                            : gmx::ArrayRef<real>{},
1620                         &signaller,
1621                         lastbox,
1622                         &totalNumberOfBondedInteractions,
1623                         &bSumEkinhOld,
1624                         (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1625                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1626                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1627                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1628                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1629                                                                          : 0));
1630                 checkNumberOfBondedInteractions(mdlog,
1631                                                 cr,
1632                                                 totalNumberOfBondedInteractions,
1633                                                 top_global,
1634                                                 &top,
1635                                                 makeConstArrayRef(state->x),
1636                                                 state->box,
1637                                                 &shouldCheckNumberOfBondedInteractions);
1638                 if (!EI_VV(ir->eI) && bStopCM)
1639                 {
1640                     process_and_stopcm_grp(
1641                             fplog, &vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
1642                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1643
1644                     // TODO: The special case of removing CM motion should be dealt more gracefully
1645                     if (useGpuForUpdate)
1646                     {
1647                         stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1648                         // Here we block until the H2D copy completes because event sync with the
1649                         // force kernels that use the coordinates on the next steps is not implemented
1650                         // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1651                         stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1652                         // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1653                         if (vcm.mode != ecmNO)
1654                         {
1655                             stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1656                         }
1657                     }
1658                 }
1659             }
1660         }
1661
1662         /* #############  END CALC EKIN AND PRESSURE ################# */
1663
1664         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1665            the virial that should probably be addressed eventually. state->veta has better properies,
1666            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1667            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1668
1669         if (ir->efep != efepNO && !EI_VV(ir->eI))
1670         {
1671             /* Sum up the foreign energy and dK/dl terms for md and sd.
1672                Currently done every step so that dH/dl is correct in the .edr */
1673             accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1674         }
1675
1676         update_pcouple_after_coordinates(
1677                 fplog, step, ir, mdatoms, pres, force_vir, shake_vir, pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1678
1679         const bool doBerendsenPressureCoupling =
1680                 (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
1681         const bool doCRescalePressureCoupling =
1682                 (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple));
1683         if (useGpuForUpdate
1684             && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1685         {
1686             integrator->scaleCoordinates(pressureCouplingMu);
1687             if (doCRescalePressureCoupling)
1688             {
1689                 matrix pressureCouplingInvMu;
1690                 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1691                 integrator->scaleVelocities(pressureCouplingInvMu);
1692             }
1693             integrator->setPbc(PbcType::Xyz, state->box);
1694         }
1695
1696         /* ################# END UPDATE STEP 2 ################# */
1697         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1698
1699         /* The coordinates (x) were unshifted in update */
1700         if (!bGStat)
1701         {
1702             /* We will not sum ekinh_old,
1703              * so signal that we still have to do it.
1704              */
1705             bSumEkinhOld = TRUE;
1706         }
1707
1708         if (bCalcEner)
1709         {
1710             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1711
1712             /* use the directly determined last velocity, not actually the averaged half steps */
1713             if (bTrotter && ir->eI == eiVV)
1714             {
1715                 enerd->term[F_EKIN] = last_ekin;
1716             }
1717             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1718
1719             if (integratorHasConservedEnergyQuantity(ir))
1720             {
1721                 if (EI_VV(ir->eI))
1722                 {
1723                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1724                 }
1725                 else
1726                 {
1727                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1728                 }
1729             }
1730             /* #########  END PREPARING EDR OUTPUT  ###########  */
1731         }
1732
1733         /* Output stuff */
1734         if (MASTER(cr))
1735         {
1736             if (fplog && do_log && bDoExpanded)
1737             {
1738                 /* only needed if doing expanded ensemble */
1739                 PrintFreeEnergyInfoToFile(fplog,
1740                                           ir->fepvals,
1741                                           ir->expandedvals,
1742                                           ir->bSimTemp ? ir->simtempvals : nullptr,
1743                                           state_global->dfhist,
1744                                           state->fep_state,
1745                                           ir->nstlog,
1746                                           step);
1747             }
1748             if (bCalcEner)
1749             {
1750                 energyOutput.addDataAtEnergyStep(bDoDHDL,
1751                                                  bCalcEnerStep,
1752                                                  t,
1753                                                  mdatoms->tmass,
1754                                                  enerd,
1755                                                  ir->fepvals,
1756                                                  ir->expandedvals,
1757                                                  lastbox,
1758                                                  PTCouplingArrays{ state->boxv,
1759                                                                    state->nosehoover_xi,
1760                                                                    state->nosehoover_vxi,
1761                                                                    state->nhpres_xi,
1762                                                                    state->nhpres_vxi },
1763                                                  state->fep_state,
1764                                                  shake_vir,
1765                                                  force_vir,
1766                                                  total_vir,
1767                                                  pres,
1768                                                  ekind,
1769                                                  mu_tot,
1770                                                  constr);
1771             }
1772             else
1773             {
1774                 energyOutput.recordNonEnergyStep();
1775             }
1776
1777             gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1778             gmx_bool do_or = do_per_step(step, ir->nstorireout);
1779
1780             if (doSimulatedAnnealing)
1781             {
1782                 gmx::EnergyOutput::printAnnealingTemperatures(
1783                         do_log ? fplog : nullptr, groups, &(ir->opts));
1784             }
1785             if (do_log || do_ene || do_dr || do_or)
1786             {
1787                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1788                                                    do_ene,
1789                                                    do_dr,
1790                                                    do_or,
1791                                                    do_log ? fplog : nullptr,
1792                                                    step,
1793                                                    t,
1794                                                    fr->fcdata.get(),
1795                                                    awh.get());
1796             }
1797             if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1798             {
1799                 const bool isInitialOutput = false;
1800                 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1801             }
1802
1803             if (ir->bPull)
1804             {
1805                 pull_print_output(pull_work, step, t);
1806             }
1807
1808             if (do_per_step(step, ir->nstlog))
1809             {
1810                 if (fflush(fplog) != 0)
1811                 {
1812                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1813                 }
1814             }
1815         }
1816         if (bDoExpanded)
1817         {
1818             /* Have to do this part _after_ outputting the logfile and the edr file */
1819             /* Gets written into the state at the beginning of next loop*/
1820             state->fep_state = lamnew;
1821         }
1822         else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1823         {
1824             state->fep_state = awh->fepLambdaState();
1825         }
1826         /* Print the remaining wall clock time for the run */
1827         if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1828         {
1829             if (shellfc)
1830             {
1831                 fprintf(stderr, "\n");
1832             }
1833             print_time(stderr, walltime_accounting, step, ir, cr);
1834         }
1835
1836         /* Ion/water position swapping.
1837          * Not done in last step since trajectory writing happens before this call
1838          * in the MD loop and exchanges would be lost anyway. */
1839         bNeedRepartition = FALSE;
1840         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1841         {
1842             bNeedRepartition = do_swapcoords(cr,
1843                                              step,
1844                                              t,
1845                                              ir,
1846                                              swap,
1847                                              wcycle,
1848                                              as_rvec_array(state->x.data()),
1849                                              state->box,
1850                                              MASTER(cr) && mdrunOptions.verbose,
1851                                              bRerunMD);
1852
1853             if (bNeedRepartition && DOMAINDECOMP(cr))
1854             {
1855                 dd_collect_state(cr->dd, state, state_global);
1856             }
1857         }
1858
1859         /* Replica exchange */
1860         bExchanged = FALSE;
1861         if (bDoReplEx)
1862         {
1863             bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1864         }
1865
1866         if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1867         {
1868             dd_partition_system(fplog,
1869                                 mdlog,
1870                                 step,
1871                                 cr,
1872                                 TRUE,
1873                                 1,
1874                                 state_global,
1875                                 *top_global,
1876                                 ir,
1877                                 imdSession,
1878                                 pull_work,
1879                                 state,
1880                                 &f,
1881                                 mdAtoms,
1882                                 &top,
1883                                 fr,
1884                                 vsite,
1885                                 constr,
1886                                 nrnb,
1887                                 wcycle,
1888                                 FALSE);
1889             shouldCheckNumberOfBondedInteractions = true;
1890             upd.setNumAtoms(state->natoms);
1891         }
1892
1893         bFirstStep = FALSE;
1894         bInitStep  = FALSE;
1895
1896         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1897         /* With all integrators, except VV, we need to retain the pressure
1898          * at the current step for coupling at the next step.
1899          */
1900         if ((state->flags & (1U << estPRES_PREV))
1901             && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1902         {
1903             /* Store the pressure in t_state for pressure coupling
1904              * at the next MD step.
1905              */
1906             copy_mat(pres, state->pres_prev);
1907         }
1908
1909         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1910
1911         if ((membed != nullptr) && (!bLastStep))
1912         {
1913             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1914         }
1915
1916         cycles = wallcycle_stop(wcycle, ewcSTEP);
1917         if (DOMAINDECOMP(cr) && wcycle)
1918         {
1919             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1920         }
1921
1922         /* increase the MD step number */
1923         step++;
1924         step_rel++;
1925
1926 #if GMX_FAHCORE
1927         if (MASTER(cr))
1928         {
1929             fcReportProgress(ir->nsteps + ir->init_step, step);
1930         }
1931 #endif
1932
1933         resetHandler->resetCounters(
1934                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1935
1936         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1937         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1938     }
1939     /* End of main MD loop */
1940
1941     /* Closing TNG files can include compressing data. Therefore it is good to do that
1942      * before stopping the time measurements. */
1943     mdoutf_tng_close(outf);
1944
1945     /* Stop measuring walltime */
1946     walltime_accounting_end_time(walltime_accounting);
1947
1948     if (!thisRankHasDuty(cr, DUTY_PME))
1949     {
1950         /* Tell the PME only node to finish */
1951         gmx_pme_send_finish(cr);
1952     }
1953
1954     if (MASTER(cr))
1955     {
1956         if (ir->nstcalcenergy > 0)
1957         {
1958             energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1959
1960             gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1961             energyOutput.printAverages(fplog, groups);
1962         }
1963     }
1964     done_mdoutf(outf);
1965
1966     if (bPMETune)
1967     {
1968         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1969     }
1970
1971     done_shellfc(fplog, shellfc, step_rel);
1972
1973     if (useReplicaExchange && MASTER(cr))
1974     {
1975         print_replica_exchange_statistics(fplog, repl_ex);
1976     }
1977
1978     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1979
1980     global_stat_destroy(gstat);
1981 }