Fix clang-tidy warnings in pme and sim_utils
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53 #include <numeric>
54
55 #include "gromacs/applied_forces/awh/awh.h"
56 #include "gromacs/applied_forces/awh/read_params.h"
57 #include "gromacs/commandline/filenm.h"
58 #include "gromacs/domdec/collect.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec.h"
61 #include "gromacs/domdec/domdec_network.h"
62 #include "gromacs/domdec/domdec_struct.h"
63 #include "gromacs/domdec/gpuhaloexchange.h"
64 #include "gromacs/domdec/localtopologychecker.h"
65 #include "gromacs/domdec/mdsetup.h"
66 #include "gromacs/domdec/partition.h"
67 #include "gromacs/essentialdynamics/edsam.h"
68 #include "gromacs/ewald/pme_load_balancing.h"
69 #include "gromacs/ewald/pme_pp.h"
70 #include "gromacs/fileio/trxio.h"
71 #include "gromacs/gmxlib/network.h"
72 #include "gromacs/gmxlib/nrnb.h"
73 #include "gromacs/gpu_utils/device_stream_manager.h"
74 #include "gromacs/gpu_utils/gpu_utils.h"
75 #include "gromacs/imd/imd.h"
76 #include "gromacs/listed_forces/listed_forces.h"
77 #include "gromacs/math/functions.h"
78 #include "gromacs/math/invertmatrix.h"
79 #include "gromacs/math/vec.h"
80 #include "gromacs/math/vectypes.h"
81 #include "gromacs/mdlib/checkpointhandler.h"
82 #include "gromacs/mdlib/compute_io.h"
83 #include "gromacs/mdlib/constr.h"
84 #include "gromacs/mdlib/coupling.h"
85 #include "gromacs/mdlib/ebin.h"
86 #include "gromacs/mdlib/enerdata_utils.h"
87 #include "gromacs/mdlib/energyoutput.h"
88 #include "gromacs/mdlib/expanded.h"
89 #include "gromacs/mdlib/force.h"
90 #include "gromacs/mdlib/force_flags.h"
91 #include "gromacs/mdlib/forcerec.h"
92 #include "gromacs/mdlib/freeenergyparameters.h"
93 #include "gromacs/mdlib/md_support.h"
94 #include "gromacs/mdlib/mdatoms.h"
95 #include "gromacs/mdlib/mdoutf.h"
96 #include "gromacs/mdlib/membed.h"
97 #include "gromacs/mdlib/resethandler.h"
98 #include "gromacs/mdlib/sighandler.h"
99 #include "gromacs/mdlib/simulationsignal.h"
100 #include "gromacs/mdlib/stat.h"
101 #include "gromacs/mdlib/stophandler.h"
102 #include "gromacs/mdlib/tgroup.h"
103 #include "gromacs/mdlib/trajectory_writing.h"
104 #include "gromacs/mdlib/update.h"
105 #include "gromacs/mdlib/update_constrain_gpu.h"
106 #include "gromacs/mdlib/update_vv.h"
107 #include "gromacs/mdlib/vcm.h"
108 #include "gromacs/mdlib/vsite.h"
109 #include "gromacs/mdrunutility/freeenergy.h"
110 #include "gromacs/mdrunutility/handlerestart.h"
111 #include "gromacs/mdrunutility/multisim.h"
112 #include "gromacs/mdrunutility/printtime.h"
113 #include "gromacs/mdtypes/awh_history.h"
114 #include "gromacs/mdtypes/awh_params.h"
115 #include "gromacs/mdtypes/commrec.h"
116 #include "gromacs/mdtypes/df_history.h"
117 #include "gromacs/mdtypes/energyhistory.h"
118 #include "gromacs/mdtypes/fcdata.h"
119 #include "gromacs/mdtypes/forcebuffers.h"
120 #include "gromacs/mdtypes/forcerec.h"
121 #include "gromacs/mdtypes/group.h"
122 #include "gromacs/mdtypes/inputrec.h"
123 #include "gromacs/mdtypes/interaction_const.h"
124 #include "gromacs/mdtypes/md_enums.h"
125 #include "gromacs/mdtypes/mdatom.h"
126 #include "gromacs/mdtypes/mdrunoptions.h"
127 #include "gromacs/mdtypes/multipletimestepping.h"
128 #include "gromacs/mdtypes/observableshistory.h"
129 #include "gromacs/mdtypes/observablesreducer.h"
130 #include "gromacs/mdtypes/pullhistory.h"
131 #include "gromacs/mdtypes/simulation_workload.h"
132 #include "gromacs/mdtypes/state.h"
133 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
134 #include "gromacs/modularsimulator/energydata.h"
135 #include "gromacs/nbnxm/gpu_data_mgmt.h"
136 #include "gromacs/nbnxm/nbnxm.h"
137 #include "gromacs/pbcutil/pbc.h"
138 #include "gromacs/pulling/output.h"
139 #include "gromacs/pulling/pull.h"
140 #include "gromacs/swap/swapcoords.h"
141 #include "gromacs/timing/wallcycle.h"
142 #include "gromacs/timing/walltime_accounting.h"
143 #include "gromacs/topology/atoms.h"
144 #include "gromacs/topology/idef.h"
145 #include "gromacs/topology/mtop_util.h"
146 #include "gromacs/topology/topology.h"
147 #include "gromacs/trajectory/trajectoryframe.h"
148 #include "gromacs/utility/basedefinitions.h"
149 #include "gromacs/utility/cstringutil.h"
150 #include "gromacs/utility/fatalerror.h"
151 #include "gromacs/utility/logger.h"
152 #include "gromacs/utility/real.h"
153 #include "gromacs/utility/smalloc.h"
154
155 #include "legacysimulator.h"
156 #include "replicaexchange.h"
157 #include "shellfc.h"
158
159 using gmx::SimulationSignaller;
160
161 void gmx::LegacySimulator::do_md()
162 {
163     // TODO Historically, the EM and MD "integrators" used different
164     // names for the t_inputrec *parameter, but these must have the
165     // same name, now that it's a member of a struct. We use this ir
166     // alias to avoid a large ripple of nearly useless changes.
167     // t_inputrec is being replaced by IMdpOptionsProvider, so this
168     // will go away eventually.
169     const t_inputrec* ir = inputrec;
170
171     double       t, t0 = ir->init_t;
172     gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
173     gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
174     gmx_bool     bDoExpanded = FALSE;
175     gmx_bool     do_ene, do_log, do_verbose;
176     gmx_bool     bMasterState;
177     unsigned int force_flags;
178     tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
179     int    i, m;
180     rvec   mu_tot;
181     matrix pressureCouplingMu, M;
182     gmx_repl_ex_t     repl_ex = nullptr;
183     gmx_global_stat_t gstat;
184     gmx_shellfc_t*    shellfc;
185     gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
186     gmx_bool          bTrotter;
187     real              dvdl_constr;
188     std::vector<RVec> cbuf;
189     matrix            lastbox;
190     int               lamnew = 0;
191     /* for FEP */
192     double    cycles;
193     real      saved_conserved_quantity = 0;
194     real      last_ekin                = 0;
195     t_extmass MassQ;
196     char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
197
198     /* PME load balancing data for GPU kernels */
199     gmx_bool bPMETune         = FALSE;
200     gmx_bool bPMETunePrinting = FALSE;
201
202     bool bInteractiveMDstep = false;
203
204     SimulationSignals signals;
205     // Most global communnication stages don't propagate mdrun
206     // signals, and will use this object to achieve that.
207     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
208
209     if (!mdrunOptions.writeConfout)
210     {
211         // This is on by default, and the main known use case for
212         // turning it off is for convenience in benchmarking, which is
213         // something that should not show up in the general user
214         // interface.
215         GMX_LOG(mdlog.info)
216                 .asParagraph()
217                 .appendText(
218                         "The -noconfout functionality is deprecated, and may be removed in a "
219                         "future version.");
220     }
221
222     /* md-vv uses averaged full step velocities for T-control
223        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
224        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
225     bTrotter = (EI_VV(ir->eI)
226                 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
227
228     const bool bRerunMD = false;
229
230     int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
231     bGStatEveryStep   = (nstglobalcomm == 1);
232
233     const SimulationGroups* groups = &top_global.groups;
234
235     std::unique_ptr<EssentialDynamics> ed = nullptr;
236     if (opt2bSet("-ei", nfile, fnm))
237     {
238         /* Initialize essential dynamics sampling */
239         ed = init_edsam(mdlog,
240                         opt2fn_null("-ei", nfile, fnm),
241                         opt2fn("-eo", nfile, fnm),
242                         top_global,
243                         *ir,
244                         cr,
245                         constr,
246                         state_global,
247                         observablesHistory,
248                         oenv,
249                         startingBehavior);
250     }
251     else if (observablesHistory->edsamHistory)
252     {
253         gmx_fatal(FARGS,
254                   "The checkpoint is from a run with essential dynamics sampling, "
255                   "but the current run did not specify the -ei option. "
256                   "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
257     }
258
259     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
260     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
261     initialize_lambdas(fplog,
262                        ir->efep,
263                        ir->bSimTemp,
264                        *ir->fepvals,
265                        ir->simtempvals->temperatures,
266                        gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc),
267                        MASTER(cr),
268                        fep_state,
269                        lambda);
270     Update upd(*ir, deform);
271     bool   doSimulatedAnnealing = false;
272     {
273         // TODO: Avoid changing inputrec (#3854)
274         // Simulated annealing updates the reference temperature.
275         auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
276         doSimulatedAnnealing   = initSimulatedAnnealing(nonConstInputrec, &upd);
277     }
278     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
279
280     t_fcdata& fcdata = *fr->fcdata;
281
282     bool simulationsShareState = false;
283     int  nstSignalComm         = nstglobalcomm;
284     {
285         // TODO This implementation of ensemble orientation restraints is nasty because
286         // a user can't just do multi-sim with single-sim orientation restraints.
287         bool usingEnsembleRestraints = (fcdata.disres->nsystems > 1) || ((ms != nullptr) && fcdata.orires);
288         bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim() && (ms != nullptr));
289
290         // Replica exchange, ensemble restraints and AWH need all
291         // simulations to remain synchronized, so they need
292         // checkpoints and stop conditions to act on the same step, so
293         // the propagation of such signals must take place between
294         // simulations, not just within simulations.
295         // TODO: Make algorithm initializers set these flags.
296         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
297
298         if (simulationsShareState)
299         {
300             // Inter-simulation signal communication does not need to happen
301             // often, so we use a minimum of 200 steps to reduce overhead.
302             const int c_minimumInterSimulationSignallingInterval = 200;
303             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
304                             * nstglobalcomm;
305         }
306     }
307
308     if (startingBehavior != StartingBehavior::RestartWithAppending)
309     {
310         pleaseCiteCouplingAlgorithms(fplog, *ir);
311     }
312     gmx_mdoutf*       outf = init_mdoutf(fplog,
313                                    nfile,
314                                    fnm,
315                                    mdrunOptions,
316                                    cr,
317                                    outputProvider,
318                                    mdModulesNotifiers,
319                                    ir,
320                                    top_global,
321                                    oenv,
322                                    wcycle,
323                                    startingBehavior,
324                                    simulationsShareState,
325                                    ms);
326     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
327                                    top_global,
328                                    *ir,
329                                    pull_work,
330                                    mdoutf_get_fp_dhdl(outf),
331                                    false,
332                                    startingBehavior,
333                                    simulationsShareState,
334                                    mdModulesNotifiers);
335
336     gstat = global_stat_init(ir);
337
338     const auto& simulationWork     = runScheduleWork->simulationWork;
339     const bool  useGpuForPme       = simulationWork.useGpuPme;
340     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
341     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
342     const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
343
344     /* Check for polarizable models and flexible constraints */
345     shellfc = init_shell_flexcon(fplog,
346                                  top_global,
347                                  constr ? constr->numFlexibleConstraints() : 0,
348                                  ir->nstcalcenergy,
349                                  DOMAINDECOMP(cr),
350                                  useGpuForPme);
351
352     {
353         double io = compute_io(ir, top_global.natoms, *groups, energyOutput.numEnergyTerms(), 1);
354         if ((io > 2000) && MASTER(cr))
355         {
356             fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
357         }
358     }
359
360     ObservablesReducer observablesReducer = observablesReducerBuilder->build();
361
362     ForceBuffers     f(simulationWork.useMts,
363                    ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
364                                ? PinningPolicy::PinnedIfSupported
365                                : PinningPolicy::CannotBePinned);
366     const t_mdatoms* md = mdAtoms->mdatoms();
367     if (DOMAINDECOMP(cr))
368     {
369         // Local state only becomes valid now.
370         dd_init_local_state(*cr->dd, state_global, state);
371
372         /* Distribute the charge groups over the nodes from the master node */
373         dd_partition_system(fplog,
374                             mdlog,
375                             ir->init_step,
376                             cr,
377                             TRUE,
378                             1,
379                             state_global,
380                             top_global,
381                             *ir,
382                             imdSession,
383                             pull_work,
384                             state,
385                             &f,
386                             mdAtoms,
387                             top,
388                             fr,
389                             vsite,
390                             constr,
391                             nrnb,
392                             nullptr,
393                             FALSE);
394         upd.updateAfterPartition(state->natoms,
395                                  md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
396                                              : gmx::ArrayRef<const unsigned short>(),
397                                  md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
398                                          : gmx::ArrayRef<const unsigned short>());
399         fr->longRangeNonbondeds->updateAfterPartition(*md);
400     }
401     else
402     {
403         state_change_natoms(state_global, state_global->natoms);
404
405         /* Generate and initialize new topology */
406         mdAlgorithmsSetupAtomData(cr, *ir, top_global, top, fr, &f, mdAtoms, constr, vsite, shellfc);
407
408         upd.updateAfterPartition(state->natoms,
409                                  md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
410                                              : gmx::ArrayRef<const unsigned short>(),
411                                  md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
412                                          : gmx::ArrayRef<const unsigned short>());
413         fr->longRangeNonbondeds->updateAfterPartition(*md);
414     }
415
416     std::unique_ptr<UpdateConstrainGpu> integrator;
417
418     StatePropagatorDataGpu* stateGpu = fr->stateGpu;
419
420     // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
421     if (useGpuForUpdate)
422     {
423         GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
424                                    || constr->numConstraintsTotal() == 0,
425                            "Constraints in domain decomposition are only supported with update "
426                            "groups if using GPU update.\n");
427         GMX_RELEASE_ASSERT(ir->eConstrAlg != ConstraintAlgorithm::Shake || constr == nullptr
428                                    || constr->numConstraintsTotal() == 0,
429                            "SHAKE is not supported with GPU update.");
430         GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
431                            "Either PME or short-ranged non-bonded interaction tasks must run on "
432                            "the GPU to use GPU update.\n");
433         GMX_RELEASE_ASSERT(ir->eI == IntegrationAlgorithm::MD,
434                            "Only the md integrator is supported with the GPU update.\n");
435         GMX_RELEASE_ASSERT(
436                 ir->etc != TemperatureCoupling::NoseHoover,
437                 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
438         GMX_RELEASE_ASSERT(
439                 ir->epc == PressureCoupling::No || ir->epc == PressureCoupling::ParrinelloRahman
440                         || ir->epc == PressureCoupling::Berendsen || ir->epc == PressureCoupling::CRescale,
441                 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
442                 "with the GPU update.\n");
443         GMX_RELEASE_ASSERT(!md->haveVsites,
444                            "Virtual sites are not supported with the GPU update.\n");
445         GMX_RELEASE_ASSERT(ed == nullptr,
446                            "Essential dynamics is not supported with the GPU update.\n");
447         GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
448                            "Constraints pulling is not supported with the GPU update.\n");
449         GMX_RELEASE_ASSERT(fcdata.orires == nullptr,
450                            "Orientation restraints are not supported with the GPU update.\n");
451         GMX_RELEASE_ASSERT(
452                 ir->efep == FreeEnergyPerturbationType::No
453                         || (!haveFepPerturbedMasses(top_global) && !havePerturbedConstraints(top_global)),
454                 "Free energy perturbation of masses and constraints are not supported with the GPU "
455                 "update.");
456
457         if (constr != nullptr && constr->numConstraintsTotal() > 0)
458         {
459             GMX_LOG(mdlog.info)
460                     .asParagraph()
461                     .appendText("Updating coordinates and applying constraints on the GPU.");
462         }
463         else
464         {
465             GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
466         }
467         GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
468                            "Device stream manager should be initialized in order to use GPU "
469                            "update-constraints.");
470         GMX_RELEASE_ASSERT(
471                 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
472                 "Update stream should be initialized in order to use GPU "
473                 "update-constraints.");
474         integrator = std::make_unique<UpdateConstrainGpu>(
475                 *ir,
476                 top_global,
477                 ekind->ngtc,
478                 fr->deviceStreamManager->context(),
479                 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
480                 wcycle);
481
482         stateGpu->setXUpdatedOnDeviceEvent(integrator->xUpdatedOnDeviceEvent());
483
484         integrator->setPbc(PbcType::Xyz, state->box);
485     }
486
487     if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
488     {
489         changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
490     }
491     if (useGpuForUpdate)
492     {
493         changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
494     }
495
496     // NOTE: The global state is no longer used at this point.
497     // But state_global is still used as temporary storage space for writing
498     // the global state to file and potentially for replica exchange.
499     // (Global topology should persist.)
500
501     update_mdatoms(mdAtoms->mdatoms(), state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
502
503     if (ir->bExpanded)
504     {
505         /* Check nstexpanded here, because the grompp check was broken */
506         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
507         {
508             gmx_fatal(FARGS,
509                       "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
510         }
511         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
512     }
513
514     if (MASTER(cr))
515     {
516         EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
517     }
518
519     preparePrevStepPullCom(ir,
520                            pull_work,
521                            gmx::arrayRefFromArray(md->massT, md->nr),
522                            state,
523                            state_global,
524                            cr,
525                            startingBehavior != StartingBehavior::NewSimulation);
526
527     // TODO: Remove this by converting AWH into a ForceProvider
528     auto awh = prepareAwhModule(fplog,
529                                 *ir,
530                                 state_global,
531                                 cr,
532                                 ms,
533                                 startingBehavior != StartingBehavior::NewSimulation,
534                                 shellfc != nullptr,
535                                 opt2fn("-awh", nfile, fnm),
536                                 pull_work);
537
538     if (useReplicaExchange && MASTER(cr))
539     {
540         repl_ex = init_replica_exchange(fplog, ms, top_global.natoms, ir, replExParams);
541     }
542     /* PME tuning is only supported in the Verlet scheme, with PME for
543      * Coulomb. It is not supported with only LJ PME. */
544     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
545                 && ir->cutoff_scheme != CutoffScheme::Group);
546
547     pme_load_balancing_t* pme_loadbal = nullptr;
548     if (bPMETune)
549     {
550         pme_loadbal_init(
551                 &pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu());
552     }
553
554     if (!ir->bContinuation)
555     {
556         if (state->flags & enumValueToBitMask(StateEntry::V))
557         {
558             auto v = makeArrayRef(state->v);
559             /* Set the velocities of vsites, shells and frozen atoms to zero */
560             for (i = 0; i < md->homenr; i++)
561             {
562                 if (md->ptype[i] == ParticleType::Shell)
563                 {
564                     clear_rvec(v[i]);
565                 }
566                 else if (md->cFREEZE)
567                 {
568                     for (m = 0; m < DIM; m++)
569                     {
570                         if (ir->opts.nFreeze[md->cFREEZE[i]][m])
571                         {
572                             v[i][m] = 0;
573                         }
574                     }
575                 }
576             }
577         }
578
579         if (constr)
580         {
581             /* Constrain the initial coordinates and velocities */
582             do_constrain_first(fplog,
583                                constr,
584                                ir,
585                                md->nr,
586                                md->homenr,
587                                state->x.arrayRefWithPadding(),
588                                state->v.arrayRefWithPadding(),
589                                state->box,
590                                state->lambda[FreeEnergyPerturbationCouplingType::Bonded]);
591         }
592     }
593
594     const int nstfep = computeFepPeriod(*ir, replExParams);
595
596     /* Be REALLY careful about what flags you set here. You CANNOT assume
597      * this is the first step, since we might be restarting from a checkpoint,
598      * and in that case we should not do any modifications to the state.
599      */
600     bStopCM = (ir->comm_mode != ComRemovalAlgorithm::No && !ir->bContinuation);
601
602     // When restarting from a checkpoint, it can be appropriate to
603     // initialize ekind from quantities in the checkpoint. Otherwise,
604     // compute_globals must initialize ekind before the simulation
605     // starts/restarts. However, only the master rank knows what was
606     // found in the checkpoint file, so we have to communicate in
607     // order to coordinate the restart.
608     //
609     // TODO Consider removing this communication if/when checkpoint
610     // reading directly follows .tpr reading, because all ranks can
611     // agree on hasReadEkinState at that time.
612     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
613     if (PAR(cr))
614     {
615         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
616     }
617     if (hasReadEkinState)
618     {
619         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
620     }
621
622     unsigned int cglo_flags =
623             (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
624              | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
625
626     bSumEkinhOld = FALSE;
627
628     t_vcm vcm(top_global.groups, *ir);
629     reportComRemovalInfo(fplog, vcm);
630
631     int64_t step     = ir->init_step;
632     int64_t step_rel = 0;
633
634     /* To minimize communication, compute_globals computes the COM velocity
635      * and the kinetic energy for the velocities without COM motion removed.
636      * Thus to get the kinetic energy without the COM contribution, we need
637      * to call compute_globals twice.
638      */
639     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
640     {
641         unsigned int cglo_flags_iteration = cglo_flags;
642         if (bStopCM && cgloIteration == 0)
643         {
644             cglo_flags_iteration |= CGLO_STOPCM;
645             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
646         }
647         compute_globals(gstat,
648                         cr,
649                         ir,
650                         fr,
651                         ekind,
652                         makeConstArrayRef(state->x),
653                         makeConstArrayRef(state->v),
654                         state->box,
655                         md,
656                         nrnb,
657                         &vcm,
658                         nullptr,
659                         enerd,
660                         force_vir,
661                         shake_vir,
662                         total_vir,
663                         pres,
664                         gmx::ArrayRef<real>{},
665                         &nullSignaller,
666                         state->box,
667                         &bSumEkinhOld,
668                         cglo_flags_iteration,
669                         step,
670                         &observablesReducer);
671         if (cglo_flags_iteration & CGLO_STOPCM)
672         {
673             /* At initialization, do not pass x with acceleration-correction mode
674              * to avoid (incorrect) correction of the initial coordinates.
675              */
676             auto x = (vcm.mode == ComRemovalAlgorithm::LinearAccelerationCorrection)
677                              ? ArrayRef<RVec>()
678                              : makeArrayRef(state->x);
679             process_and_stopcm_grp(fplog, &vcm, *md, x, makeArrayRef(state->v));
680             inc_nrnb(nrnb, eNR_STOPCM, md->homenr);
681         }
682     }
683     if (ir->eI == IntegrationAlgorithm::VVAK)
684     {
685         /* a second call to get the half step temperature initialized as well */
686         /* we do the same call as above, but turn the pressure off -- internally to
687            compute_globals, this is recognized as a velocity verlet half-step
688            kinetic energy calculation.  This minimized excess variables, but
689            perhaps loses some logic?*/
690
691         compute_globals(gstat,
692                         cr,
693                         ir,
694                         fr,
695                         ekind,
696                         makeConstArrayRef(state->x),
697                         makeConstArrayRef(state->v),
698                         state->box,
699                         md,
700                         nrnb,
701                         &vcm,
702                         nullptr,
703                         enerd,
704                         force_vir,
705                         shake_vir,
706                         total_vir,
707                         pres,
708                         gmx::ArrayRef<real>{},
709                         &nullSignaller,
710                         state->box,
711                         &bSumEkinhOld,
712                         cglo_flags & ~CGLO_PRESSURE,
713                         step,
714                         &observablesReducer);
715     }
716
717     /* Calculate the initial half step temperature, and save the ekinh_old */
718     if (startingBehavior == StartingBehavior::NewSimulation)
719     {
720         for (i = 0; (i < ir->opts.ngtc); i++)
721         {
722             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
723         }
724     }
725
726     /* need to make an initiation call to get the Trotter variables set, as well as other constants
727        for non-trotter temperature control */
728     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
729
730     if (MASTER(cr))
731     {
732         if (!ir->bContinuation)
733         {
734             if (constr && ir->eConstrAlg == ConstraintAlgorithm::Lincs)
735             {
736                 fprintf(fplog,
737                         "RMS relative constraint deviation after constraining: %.2e\n",
738                         constr->rmsd());
739             }
740             if (EI_STATE_VELOCITY(ir->eI))
741             {
742                 real temp = enerd->term[F_TEMP];
743                 if (ir->eI != IntegrationAlgorithm::VV)
744                 {
745                     /* Result of Ekin averaged over velocities of -half
746                      * and +half step, while we only have -half step here.
747                      */
748                     temp *= 2;
749                 }
750                 fprintf(fplog, "Initial temperature: %g K\n", temp);
751             }
752         }
753
754         char tbuf[20];
755         fprintf(stderr, "starting mdrun '%s'\n", *(top_global.name));
756         if (ir->nsteps >= 0)
757         {
758             sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
759         }
760         else
761         {
762             sprintf(tbuf, "%s", "infinite");
763         }
764         if (ir->init_step > 0)
765         {
766             fprintf(stderr,
767                     "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
768                     gmx_step_str(ir->init_step + ir->nsteps, sbuf),
769                     tbuf,
770                     gmx_step_str(ir->init_step, sbuf2),
771                     ir->init_step * ir->delta_t);
772         }
773         else
774         {
775             fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
776         }
777         fprintf(fplog, "\n");
778     }
779
780     walltime_accounting_start_time(walltime_accounting);
781     wallcycle_start(wcycle, WallCycleCounter::Run);
782     print_start(fplog, cr, walltime_accounting, "mdrun");
783
784     /***********************************************************
785      *
786      *             Loop over MD steps
787      *
788      ************************************************************/
789
790     bFirstStep = TRUE;
791     /* Skip the first Nose-Hoover integration when we get the state from tpx */
792     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
793     bSumEkinhOld     = FALSE;
794     bExchanged       = FALSE;
795     bNeedRepartition = FALSE;
796
797     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
798             compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
799             simulationsShareState,
800             MASTER(cr),
801             ir->nstlist,
802             mdrunOptions.reproducible,
803             nstSignalComm,
804             mdrunOptions.maximumHoursToRun,
805             ir->nstlist == 0,
806             fplog,
807             step,
808             bNS,
809             walltime_accounting);
810
811     auto checkpointHandler = std::make_unique<CheckpointHandler>(
812             compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
813             simulationsShareState,
814             ir->nstlist == 0,
815             MASTER(cr),
816             mdrunOptions.writeConfout,
817             mdrunOptions.checkpointOptions.period);
818
819     const bool resetCountersIsLocal = true;
820     auto       resetHandler         = std::make_unique<ResetHandler>(
821             compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
822             !resetCountersIsLocal,
823             ir->nsteps,
824             MASTER(cr),
825             mdrunOptions.timingOptions.resetHalfway,
826             mdrunOptions.maximumHoursToRun,
827             mdlog,
828             wcycle,
829             walltime_accounting);
830
831     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
832
833     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
834     {
835         logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
836     }
837
838     /* and stop now if we should */
839     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
840     while (!bLastStep)
841     {
842
843         /* Determine if this is a neighbor search step */
844         bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
845
846         if (bPMETune && bNStList)
847         {
848             // This has to be here because PME load balancing is called so early.
849             // TODO: Move to after all booleans are defined.
850             if (useGpuForUpdate && !bFirstStep)
851             {
852                 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
853                 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
854             }
855             /* PME grid + cut-off optimization with GPUs or PME nodes */
856             pme_loadbal_do(pme_loadbal,
857                            cr,
858                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
859                            fplog,
860                            mdlog,
861                            *ir,
862                            fr,
863                            state->box,
864                            state->x,
865                            wcycle,
866                            step,
867                            step_rel,
868                            &bPMETunePrinting,
869                            simulationWork.useGpuPmePpCommunication);
870         }
871
872         wallcycle_start(wcycle, WallCycleCounter::Step);
873
874         bLastStep = (step_rel == ir->nsteps);
875         t         = t0 + step * ir->delta_t;
876
877         // TODO Refactor this, so that nstfep does not need a default value of zero
878         if (ir->efep != FreeEnergyPerturbationType::No || ir->bSimTemp)
879         {
880             /* find and set the current lambdas */
881             state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
882
883             bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
884                            && (!bFirstStep));
885         }
886
887         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
888                      && do_per_step(step, replExParams.exchangeInterval));
889
890         if (doSimulatedAnnealing)
891         {
892             // TODO: Avoid changing inputrec (#3854)
893             // Simulated annealing updates the reference temperature.
894             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
895             update_annealing_target_temp(nonConstInputrec, t, &upd);
896         }
897
898         /* Stop Center of Mass motion */
899         bStopCM = (ir->comm_mode != ComRemovalAlgorithm::No && do_per_step(step, ir->nstcomm));
900
901         /* Determine whether or not to do Neighbour Searching */
902         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
903
904         /* Note that the stopHandler will cause termination at nstglobalcomm
905          * steps. Since this concides with nstcalcenergy, nsttcouple and/or
906          * nstpcouple steps, we have computed the half-step kinetic energy
907          * of the previous step and can always output energies at the last step.
908          */
909         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
910
911         /* do_log triggers energy and virial calculation. Because this leads
912          * to different code paths, forces can be different. Thus for exact
913          * continuation we should avoid extra log output.
914          * Note that the || bLastStep can result in non-exact continuation
915          * beyond the last step. But we don't consider that to be an issue.
916          */
917         do_log     = (do_per_step(step, ir->nstlog)
918                   || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
919         do_verbose = mdrunOptions.verbose
920                      && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
921
922         if (useGpuForUpdate && !bFirstStep && bNS)
923         {
924             // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
925             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
926             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
927             // Copy coordinate from the GPU when needed at the search step.
928             // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
929             // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
930             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
931             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
932         }
933
934         // We only need to calculate virtual velocities if we are writing them in the current step
935         const bool needVirtualVelocitiesThisStep =
936                 (vsite != nullptr)
937                 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep());
938
939         if (vsite != nullptr)
940         {
941             // Virtual sites need to be updated before domain decomposition and forces are calculated
942             wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
943             // md-vv calculates virtual velocities once it has full-step real velocities
944             vsite->construct(state->x,
945                              state->v,
946                              state->box,
947                              (!EI_VV(inputrec->eI) && needVirtualVelocitiesThisStep)
948                                      ? VSiteOperation::PositionsAndVelocities
949                                      : VSiteOperation::Positions);
950             wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
951         }
952
953         if (bNS && !(bFirstStep && ir->bContinuation))
954         {
955             bMasterState = FALSE;
956             /* Correct the new box if it is too skewed */
957             if (inputrecDynamicBox(ir))
958             {
959                 if (correct_box(fplog, step, state->box))
960                 {
961                     bMasterState = TRUE;
962                     // If update is offloaded, it should be informed about the box size change
963                     if (useGpuForUpdate)
964                     {
965                         integrator->setPbc(PbcType::Xyz, state->box);
966                     }
967                 }
968             }
969             if (DOMAINDECOMP(cr) && bMasterState)
970             {
971                 dd_collect_state(cr->dd, state, state_global);
972             }
973
974             if (DOMAINDECOMP(cr))
975             {
976                 /* Repartition the domain decomposition */
977                 dd_partition_system(fplog,
978                                     mdlog,
979                                     step,
980                                     cr,
981                                     bMasterState,
982                                     nstglobalcomm,
983                                     state_global,
984                                     top_global,
985                                     *ir,
986                                     imdSession,
987                                     pull_work,
988                                     state,
989                                     &f,
990                                     mdAtoms,
991                                     top,
992                                     fr,
993                                     vsite,
994                                     constr,
995                                     nrnb,
996                                     wcycle,
997                                     do_verbose && !bPMETunePrinting);
998                 upd.updateAfterPartition(state->natoms,
999                                          md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
1000                                                      : gmx::ArrayRef<const unsigned short>(),
1001                                          md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
1002                                                  : gmx::ArrayRef<const unsigned short>());
1003                 fr->longRangeNonbondeds->updateAfterPartition(*md);
1004             }
1005         }
1006
1007         // Allocate or re-size GPU halo exchange object, if necessary
1008         if (bNS && simulationWork.havePpDomainDecomposition && simulationWork.useGpuHaloExchange)
1009         {
1010             GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
1011                                "GPU device manager has to be initialized to use GPU "
1012                                "version of halo exchange.");
1013             constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
1014         }
1015
1016         if (MASTER(cr) && do_log)
1017         {
1018             gmx::EnergyOutput::printHeader(
1019                     fplog, step, t); /* can we improve the information printed here? */
1020         }
1021
1022         if (ir->efep != FreeEnergyPerturbationType::No)
1023         {
1024             update_mdatoms(mdAtoms->mdatoms(), state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
1025         }
1026
1027         if (bExchanged)
1028         {
1029             /* We need the kinetic energy at minus the half step for determining
1030              * the full step kinetic energy and possibly for T-coupling.*/
1031             /* This may not be quite working correctly yet . . . . */
1032             int cglo_flags = CGLO_GSTAT | CGLO_TEMPERATURE;
1033             compute_globals(gstat,
1034                             cr,
1035                             ir,
1036                             fr,
1037                             ekind,
1038                             makeConstArrayRef(state->x),
1039                             makeConstArrayRef(state->v),
1040                             state->box,
1041                             md,
1042                             nrnb,
1043                             &vcm,
1044                             wcycle,
1045                             enerd,
1046                             nullptr,
1047                             nullptr,
1048                             nullptr,
1049                             nullptr,
1050                             gmx::ArrayRef<real>{},
1051                             &nullSignaller,
1052                             state->box,
1053                             &bSumEkinhOld,
1054                             cglo_flags,
1055                             step,
1056                             &observablesReducer);
1057         }
1058         clear_mat(force_vir);
1059
1060         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1061
1062         /* Determine the energy and pressure:
1063          * at nstcalcenergy steps and at energy output steps (set below).
1064          */
1065         if (EI_VV(ir->eI) && (!bInitStep))
1066         {
1067             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1068             bCalcVir      = bCalcEnerStep
1069                        || (ir->epc != PressureCoupling::No
1070                            && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1071         }
1072         else
1073         {
1074             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1075             bCalcVir      = bCalcEnerStep
1076                        || (ir->epc != PressureCoupling::No && do_per_step(step, ir->nstpcouple));
1077         }
1078         bCalcEner = bCalcEnerStep;
1079
1080         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1081
1082         if (do_ene || do_log || bDoReplEx)
1083         {
1084             bCalcVir  = TRUE;
1085             bCalcEner = TRUE;
1086         }
1087
1088         // bCalcEner is only here for when the last step is not a mulitple of nstfep
1089         const bool computeDHDL = ((ir->efep != FreeEnergyPerturbationType::No || ir->bSimTemp)
1090                                   && (do_per_step(step, nstfep) || bCalcEner));
1091
1092         /* Do we need global communication ? */
1093         bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1094                   || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1095
1096         force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1097                        | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1098                        | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (computeDHDL ? GMX_FORCE_DHDL : 0));
1099         if (simulationWork.useMts && !do_per_step(step, ir->nstfout))
1100         {
1101             // TODO: merge this with stepWork.useOnlyMtsCombinedForceBuffer
1102             force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1103         }
1104
1105         if (shellfc)
1106         {
1107             /* Now is the time to relax the shells */
1108             relax_shell_flexcon(fplog,
1109                                 cr,
1110                                 ms,
1111                                 mdrunOptions.verbose,
1112                                 enforcedRotation,
1113                                 step,
1114                                 ir,
1115                                 imdSession,
1116                                 pull_work,
1117                                 bNS,
1118                                 force_flags,
1119                                 top,
1120                                 constr,
1121                                 enerd,
1122                                 state->natoms,
1123                                 state->x.arrayRefWithPadding(),
1124                                 state->v.arrayRefWithPadding(),
1125                                 state->box,
1126                                 state->lambda,
1127                                 &state->hist,
1128                                 &f.view(),
1129                                 force_vir,
1130                                 *md,
1131                                 fr->longRangeNonbondeds.get(),
1132                                 nrnb,
1133                                 wcycle,
1134                                 shellfc,
1135                                 fr,
1136                                 runScheduleWork,
1137                                 t,
1138                                 mu_tot,
1139                                 vsite,
1140                                 ddBalanceRegionHandler);
1141         }
1142         else
1143         {
1144             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1145                is updated (or the AWH update will be performed twice for one step when continuing).
1146                It would be best to call this update function from do_md_trajectory_writing but that
1147                would occur after do_force. One would have to divide the update_awh function into one
1148                function applying the AWH force and one doing the AWH bias update. The update AWH
1149                bias function could then be called after do_md_trajectory_writing (then containing
1150                update_awh_history). The checkpointing will in the future probably moved to the start
1151                of the md loop which will rid of this issue. */
1152             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1153             {
1154                 awh->updateHistory(state_global->awhHistory.get());
1155             }
1156
1157             /* The coordinates (x) are shifted (to get whole molecules)
1158              * in do_force.
1159              * This is parallellized as well, and does communication too.
1160              * Check comments in sim_util.c
1161              */
1162             do_force(fplog,
1163                      cr,
1164                      ms,
1165                      *ir,
1166                      awh.get(),
1167                      enforcedRotation,
1168                      imdSession,
1169                      pull_work,
1170                      step,
1171                      nrnb,
1172                      wcycle,
1173                      top,
1174                      state->box,
1175                      state->x.arrayRefWithPadding(),
1176                      &state->hist,
1177                      &f.view(),
1178                      force_vir,
1179                      md,
1180                      enerd,
1181                      state->lambda,
1182                      fr,
1183                      runScheduleWork,
1184                      vsite,
1185                      mu_tot,
1186                      t,
1187                      ed ? ed->getLegacyED() : nullptr,
1188                      fr->longRangeNonbondeds.get(),
1189                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
1190                      ddBalanceRegionHandler);
1191         }
1192
1193         // VV integrators do not need the following velocity half step
1194         // if it is the first step after starting from a checkpoint.
1195         // That is, the half step is needed on all other steps, and
1196         // also the first step when starting from a .tpr file.
1197         if (EI_VV(ir->eI))
1198         {
1199             integrateVVFirstStep(step,
1200                                  bFirstStep,
1201                                  bInitStep,
1202                                  startingBehavior,
1203                                  nstglobalcomm,
1204                                  ir,
1205                                  fr,
1206                                  cr,
1207                                  state,
1208                                  mdAtoms->mdatoms(),
1209                                  &fcdata,
1210                                  &MassQ,
1211                                  &vcm,
1212                                  enerd,
1213                                  &observablesReducer,
1214                                  ekind,
1215                                  gstat,
1216                                  &last_ekin,
1217                                  bCalcVir,
1218                                  total_vir,
1219                                  shake_vir,
1220                                  force_vir,
1221                                  pres,
1222                                  M,
1223                                  do_log,
1224                                  do_ene,
1225                                  bCalcEner,
1226                                  bGStat,
1227                                  bStopCM,
1228                                  bTrotter,
1229                                  bExchanged,
1230                                  &bSumEkinhOld,
1231                                  &saved_conserved_quantity,
1232                                  &f,
1233                                  &upd,
1234                                  constr,
1235                                  &nullSignaller,
1236                                  trotter_seq,
1237                                  nrnb,
1238                                  fplog,
1239                                  wcycle);
1240             if (vsite != nullptr && needVirtualVelocitiesThisStep)
1241             {
1242                 // Positions were calculated earlier
1243                 wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
1244                 vsite->construct(state->x, state->v, state->box, VSiteOperation::Velocities);
1245                 wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
1246             }
1247         }
1248
1249         /* ########  END FIRST UPDATE STEP  ############## */
1250         /* ########  If doing VV, we now have v(dt) ###### */
1251         if (bDoExpanded)
1252         {
1253             /* perform extended ensemble sampling in lambda - we don't
1254                actually move to the new state before outputting
1255                statistics, but if performing simulated tempering, we
1256                do update the velocities and the tau_t. */
1257             // TODO: Avoid changing inputrec (#3854)
1258             // Simulated tempering updates the reference temperature.
1259             // Expanded ensemble without simulated tempering does not change the inputrec.
1260             auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
1261             lamnew                 = ExpandedEnsembleDynamics(fplog,
1262                                               nonConstInputrec,
1263                                               enerd,
1264                                               state,
1265                                               &MassQ,
1266                                               state->fep_state,
1267                                               state->dfhist,
1268                                               step,
1269                                               state->v.rvec_array(),
1270                                               md->homenr,
1271                                               md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
1272                                                                       : gmx::ArrayRef<const unsigned short>());
1273             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1274             if (MASTER(cr))
1275             {
1276                 copy_df_history(state_global->dfhist, state->dfhist);
1277             }
1278         }
1279
1280         // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1281         // coordinates have not already been copied for i) search or ii) CPU force tasks.
1282         if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1283             && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1284                 || checkpointHandler->isCheckpointingStep()))
1285         {
1286             stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1287             stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1288         }
1289         // Copy velocities if needed for the output/checkpointing.
1290         // NOTE: Copy on the search steps is done at the beginning of the step.
1291         if (useGpuForUpdate && !bNS
1292             && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1293         {
1294             stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1295             stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1296         }
1297         // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1298         // and update is offloaded hence forces are kept on the GPU for update and have not been
1299         // already transferred in do_force().
1300         // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1301         //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
1302         //       prior to GPU update.
1303         // TODO: When the output flags will be included in step workload, this copy can be combined with the
1304         //       copy call in do_force(...).
1305         // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1306         //       on host after the D2H copy in do_force(...).
1307         if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1308             && do_per_step(step, ir->nstfout))
1309         {
1310             stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1311             stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1312         }
1313         /* Now we have the energies and forces corresponding to the
1314          * coordinates at time t. We must output all of this before
1315          * the update.
1316          */
1317         do_md_trajectory_writing(fplog,
1318                                  cr,
1319                                  nfile,
1320                                  fnm,
1321                                  step,
1322                                  step_rel,
1323                                  t,
1324                                  ir,
1325                                  state,
1326                                  state_global,
1327                                  observablesHistory,
1328                                  top_global,
1329                                  fr,
1330                                  outf,
1331                                  energyOutput,
1332                                  ekind,
1333                                  f.view().force(),
1334                                  checkpointHandler->isCheckpointingStep(),
1335                                  bRerunMD,
1336                                  bLastStep,
1337                                  mdrunOptions.writeConfout,
1338                                  bSumEkinhOld);
1339         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1340         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x, t);
1341
1342         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1343         if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1344             && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1345         {
1346             copy_mat(state->svir_prev, shake_vir);
1347             copy_mat(state->fvir_prev, force_vir);
1348         }
1349
1350         stopHandler->setSignal();
1351         resetHandler->setSignal(walltime_accounting);
1352
1353         if (bGStat || !PAR(cr))
1354         {
1355             /* In parallel we only have to check for checkpointing in steps
1356              * where we do global communication,
1357              *  otherwise the other nodes don't know.
1358              */
1359             checkpointHandler->setSignal(walltime_accounting);
1360         }
1361
1362         /* #########   START SECOND UPDATE STEP ################# */
1363
1364         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1365            controlled in preprocessing */
1366
1367         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1368         {
1369             gmx_bool bIfRandomize;
1370             bIfRandomize = update_randomize_velocities(ir,
1371                                                        step,
1372                                                        cr,
1373                                                        md->homenr,
1374                                                        md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
1375                                                                : gmx::ArrayRef<const unsigned short>(),
1376                                                        gmx::arrayRefFromArray(md->invmass, md->nr),
1377                                                        state->v,
1378                                                        &upd,
1379                                                        constr);
1380             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1381             if (constr && bIfRandomize)
1382             {
1383                 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1384             }
1385         }
1386         /* Box is changed in update() when we do pressure coupling,
1387          * but we should still use the old box for energy corrections and when
1388          * writing it to the energy file, so it matches the trajectory files for
1389          * the same timestep above. Make a copy in a separate array.
1390          */
1391         copy_mat(state->box, lastbox);
1392
1393         dvdl_constr = 0;
1394
1395         if (!useGpuForUpdate)
1396         {
1397             wallcycle_start(wcycle, WallCycleCounter::Update);
1398         }
1399         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1400         if (bTrotter)
1401         {
1402             trotter_update(ir,
1403                            step,
1404                            ekind,
1405                            enerd,
1406                            state,
1407                            total_vir,
1408                            md->homenr,
1409                            md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
1410                                    : gmx::ArrayRef<const unsigned short>(),
1411                            gmx::arrayRefFromArray(md->invmass, md->nr),
1412                            &MassQ,
1413                            trotter_seq,
1414                            TrotterSequence::Three);
1415             /* We can only do Berendsen coupling after we have summed
1416              * the kinetic energy or virial. Since the happens
1417              * in global_state after update, we should only do it at
1418              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1419              */
1420         }
1421         else
1422         {
1423             update_tcouple(step,
1424                            ir,
1425                            state,
1426                            ekind,
1427                            &MassQ,
1428                            md->homenr,
1429                            md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
1430                                    : gmx::ArrayRef<const unsigned short>());
1431             update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1432         }
1433
1434         /* With leap-frog type integrators we compute the kinetic energy
1435          * at a whole time step as the average of the half-time step kinetic
1436          * energies of two subsequent steps. Therefore we need to compute the
1437          * half step kinetic energy also if we need energies at the next step.
1438          */
1439         const bool needHalfStepKineticEnergy =
1440                 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1441
1442         // Parrinello-Rahman requires the pressure to be availible before the update to compute
1443         // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1444         const bool doParrinelloRahman = (ir->epc == PressureCoupling::ParrinelloRahman
1445                                          && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1446
1447         if (EI_VV(ir->eI))
1448         {
1449             GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1450
1451             integrateVVSecondStep(step,
1452                                   ir,
1453                                   fr,
1454                                   cr,
1455                                   state,
1456                                   mdAtoms->mdatoms(),
1457                                   &fcdata,
1458                                   &MassQ,
1459                                   &vcm,
1460                                   pull_work,
1461                                   enerd,
1462                                   &observablesReducer,
1463                                   ekind,
1464                                   gstat,
1465                                   &dvdl_constr,
1466                                   bCalcVir,
1467                                   total_vir,
1468                                   shake_vir,
1469                                   force_vir,
1470                                   pres,
1471                                   M,
1472                                   lastbox,
1473                                   do_log,
1474                                   do_ene,
1475                                   bGStat,
1476                                   &bSumEkinhOld,
1477                                   &f,
1478                                   &cbuf,
1479                                   &upd,
1480                                   constr,
1481                                   &nullSignaller,
1482                                   trotter_seq,
1483                                   nrnb,
1484                                   wcycle);
1485         }
1486         else
1487         {
1488             if (useGpuForUpdate)
1489             {
1490                 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1491                 {
1492                     integrator->set(stateGpu->getCoordinates(),
1493                                     stateGpu->getVelocities(),
1494                                     stateGpu->getForces(),
1495                                     top->idef,
1496                                     *md);
1497
1498                     // Copy data to the GPU after buffers might have being reinitialized
1499                     /* The velocity copy is redundant if we had Center-of-Mass motion removed on
1500                      * the previous step. We don't check that now. */
1501                     stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1502                     if (!runScheduleWork->stepWork.haveGpuPmeOnThisRank
1503                         && !runScheduleWork->stepWork.useGpuXBufferOps)
1504                     {
1505                         stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1506                     }
1507                 }
1508
1509                 if ((simulationWork.useGpuPme && simulationWork.useCpuPmePpCommunication)
1510                     || (!runScheduleWork->stepWork.useGpuFBufferOps))
1511                 {
1512                     // The PME forces were recieved to the host, and reduced on the CPU with the
1513                     // rest of the forces computed on the GPU, so the final forces have to be copied
1514                     // back to the GPU. Or the buffer ops were not offloaded this step, so the
1515                     // forces are on the host and have to be copied
1516                     stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1517                 }
1518                 const bool doTemperatureScaling =
1519                         (ir->etc != TemperatureCoupling::No
1520                          && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1521
1522                 // This applies Leap-Frog, LINCS and SETTLE in succession
1523                 integrator->integrate(
1524                         stateGpu->getForcesReadyOnDeviceEvent(
1525                                 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1526                         ir->delta_t,
1527                         true,
1528                         bCalcVir,
1529                         shake_vir,
1530                         doTemperatureScaling,
1531                         ekind->tcstat,
1532                         doParrinelloRahman,
1533                         ir->nstpcouple * ir->delta_t,
1534                         M);
1535
1536                 // Copy velocities D2H after update if:
1537                 // - Globals are computed this step (includes the energy output steps).
1538                 // - Temperature is needed for the next step.
1539                 if (bGStat || needHalfStepKineticEnergy)
1540                 {
1541                     stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1542                     stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1543                 }
1544             }
1545             else
1546             {
1547                 /* With multiple time stepping we need to do an additional normal
1548                  * update step to obtain the virial, as the actual MTS integration
1549                  * using an acceleration where the slow forces are multiplied by mtsFactor.
1550                  * Using that acceleration would result in a virial with the slow
1551                  * force contribution would be a factor mtsFactor too large.
1552                  */
1553                 if (simulationWork.useMts && bCalcVir && constr != nullptr)
1554                 {
1555                     upd.update_for_constraint_virial(*ir,
1556                                                      md->homenr,
1557                                                      md->havePartiallyFrozenAtoms,
1558                                                      gmx::arrayRefFromArray(md->invmass, md->nr),
1559                                                      gmx::arrayRefFromArray(md->invMassPerDim, md->nr),
1560                                                      *state,
1561                                                      f.view().forceWithPadding(),
1562                                                      *ekind);
1563
1564                     constrain_coordinates(constr,
1565                                           do_log,
1566                                           do_ene,
1567                                           step,
1568                                           state,
1569                                           upd.xp()->arrayRefWithPadding(),
1570                                           &dvdl_constr,
1571                                           bCalcVir,
1572                                           shake_vir);
1573                 }
1574
1575                 ArrayRefWithPadding<const RVec> forceCombined =
1576                         (simulationWork.useMts && step % ir->mtsLevels[1].stepFactor == 0)
1577                                 ? f.view().forceMtsCombinedWithPadding()
1578                                 : f.view().forceWithPadding();
1579                 upd.update_coords(*ir,
1580                                   step,
1581                                   md->homenr,
1582                                   md->havePartiallyFrozenAtoms,
1583                                   gmx::arrayRefFromArray(md->ptype, md->nr),
1584                                   gmx::arrayRefFromArray(md->invmass, md->nr),
1585                                   gmx::arrayRefFromArray(md->invMassPerDim, md->nr),
1586                                   state,
1587                                   forceCombined,
1588                                   &fcdata,
1589                                   ekind,
1590                                   M,
1591                                   etrtPOSITION,
1592                                   cr,
1593                                   constr != nullptr);
1594
1595                 wallcycle_stop(wcycle, WallCycleCounter::Update);
1596
1597                 constrain_coordinates(constr,
1598                                       do_log,
1599                                       do_ene,
1600                                       step,
1601                                       state,
1602                                       upd.xp()->arrayRefWithPadding(),
1603                                       &dvdl_constr,
1604                                       bCalcVir && !simulationWork.useMts,
1605                                       shake_vir);
1606
1607                 upd.update_sd_second_half(*ir,
1608                                           step,
1609                                           &dvdl_constr,
1610                                           md->homenr,
1611                                           gmx::arrayRefFromArray(md->ptype, md->nr),
1612                                           gmx::arrayRefFromArray(md->invmass, md->nr),
1613                                           state,
1614                                           cr,
1615                                           nrnb,
1616                                           wcycle,
1617                                           constr,
1618                                           do_log,
1619                                           do_ene);
1620                 upd.finish_update(
1621                         *ir, md->havePartiallyFrozenAtoms, md->homenr, state, wcycle, constr != nullptr);
1622             }
1623
1624             if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1625             {
1626                 updatePrevStepPullCom(pull_work, state);
1627             }
1628
1629             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1630         }
1631
1632         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1633         /* With Leap-Frog we can skip compute_globals at
1634          * non-communication steps, but we need to calculate
1635          * the kinetic energy one step before communication.
1636          */
1637         {
1638             // Organize to do inter-simulation signalling on steps if
1639             // and when algorithms require it.
1640             const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1641
1642             if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1643             {
1644                 // Copy coordinates when needed to stop the CM motion.
1645                 if (useGpuForUpdate && (bDoReplEx || (!EI_VV(ir->eI) && bStopCM)))
1646                 {
1647                     stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1648                     stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1649                 }
1650                 // Since we're already communicating at this step, we
1651                 // can propagate intra-simulation signals. Note that
1652                 // check_nstglobalcomm has the responsibility for
1653                 // choosing the value of nstglobalcomm that is one way
1654                 // bGStat becomes true, so we can't get into a
1655                 // situation where e.g. checkpointing can't be
1656                 // signalled.
1657                 bool                doIntraSimSignal = true;
1658                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1659
1660                 compute_globals(
1661                         gstat,
1662                         cr,
1663                         ir,
1664                         fr,
1665                         ekind,
1666                         makeConstArrayRef(state->x),
1667                         makeConstArrayRef(state->v),
1668                         state->box,
1669                         md,
1670                         nrnb,
1671                         &vcm,
1672                         wcycle,
1673                         enerd,
1674                         force_vir,
1675                         shake_vir,
1676                         total_vir,
1677                         pres,
1678                         (!EI_VV(ir->eI) && bCalcEner && constr != nullptr) ? constr->rmsdData()
1679                                                                            : gmx::ArrayRef<real>{},
1680                         &signaller,
1681                         lastbox,
1682                         &bSumEkinhOld,
1683                         (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1684                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1685                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1686                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT,
1687                         step,
1688                         &observablesReducer);
1689                 if (!EI_VV(ir->eI) && bStopCM)
1690                 {
1691                     process_and_stopcm_grp(
1692                             fplog, &vcm, *md, makeArrayRef(state->x), makeArrayRef(state->v));
1693                     inc_nrnb(nrnb, eNR_STOPCM, md->homenr);
1694
1695                     // TODO: The special case of removing CM motion should be dealt more gracefully
1696                     if (useGpuForUpdate)
1697                     {
1698                         stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1699                         // Here we block until the H2D copy completes because event sync with the
1700                         // force kernels that use the coordinates on the next steps is not implemented
1701                         // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1702                         stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1703                         // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1704                         if (vcm.mode != ComRemovalAlgorithm::No)
1705                         {
1706                             stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1707                         }
1708                     }
1709                 }
1710             }
1711         }
1712
1713         /* #############  END CALC EKIN AND PRESSURE ################# */
1714
1715         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1716            the virial that should probably be addressed eventually. state->veta has better properies,
1717            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1718            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1719
1720         if (ir->efep != FreeEnergyPerturbationType::No && !EI_VV(ir->eI))
1721         {
1722             /* Sum up the foreign energy and dK/dl terms for md and sd.
1723                Currently done every step so that dH/dl is correct in the .edr */
1724             accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1725         }
1726
1727         update_pcouple_after_coordinates(fplog,
1728                                          step,
1729                                          ir,
1730                                          md->homenr,
1731                                          md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
1732                                                      : gmx::ArrayRef<const unsigned short>(),
1733                                          pres,
1734                                          force_vir,
1735                                          shake_vir,
1736                                          pressureCouplingMu,
1737                                          state,
1738                                          nrnb,
1739                                          upd.deform(),
1740                                          !useGpuForUpdate);
1741
1742         const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
1743                                                   && do_per_step(step, inputrec->nstpcouple));
1744         const bool doCRescalePressureCoupling  = (inputrec->epc == PressureCoupling::CRescale
1745                                                  && do_per_step(step, inputrec->nstpcouple));
1746         if (useGpuForUpdate
1747             && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1748         {
1749             integrator->scaleCoordinates(pressureCouplingMu);
1750             if (doCRescalePressureCoupling)
1751             {
1752                 matrix pressureCouplingInvMu;
1753                 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1754                 integrator->scaleVelocities(pressureCouplingInvMu);
1755             }
1756             integrator->setPbc(PbcType::Xyz, state->box);
1757         }
1758
1759         /* ################# END UPDATE STEP 2 ################# */
1760         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1761
1762         /* The coordinates (x) were unshifted in update */
1763         if (!bGStat)
1764         {
1765             /* We will not sum ekinh_old,
1766              * so signal that we still have to do it.
1767              */
1768             bSumEkinhOld = TRUE;
1769         }
1770
1771         if (bCalcEner)
1772         {
1773             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1774
1775             /* use the directly determined last velocity, not actually the averaged half steps */
1776             if (bTrotter && ir->eI == IntegrationAlgorithm::VV)
1777             {
1778                 enerd->term[F_EKIN] = last_ekin;
1779             }
1780             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1781
1782             if (integratorHasConservedEnergyQuantity(ir))
1783             {
1784                 if (EI_VV(ir->eI))
1785                 {
1786                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1787                 }
1788                 else
1789                 {
1790                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1791                 }
1792             }
1793             /* #########  END PREPARING EDR OUTPUT  ###########  */
1794         }
1795
1796         /* Output stuff */
1797         if (MASTER(cr))
1798         {
1799             if (fplog && do_log && bDoExpanded)
1800             {
1801                 /* only needed if doing expanded ensemble */
1802                 PrintFreeEnergyInfoToFile(fplog,
1803                                           ir->fepvals.get(),
1804                                           ir->expandedvals.get(),
1805                                           ir->bSimTemp ? ir->simtempvals.get() : nullptr,
1806                                           state_global->dfhist,
1807                                           state->fep_state,
1808                                           ir->nstlog,
1809                                           step);
1810             }
1811             if (bCalcEner)
1812             {
1813                 const bool outputDHDL = (computeDHDL && do_per_step(step, ir->fepvals->nstdhdl));
1814
1815                 energyOutput.addDataAtEnergyStep(outputDHDL,
1816                                                  bCalcEnerStep,
1817                                                  t,
1818                                                  md->tmass,
1819                                                  enerd,
1820                                                  ir->fepvals.get(),
1821                                                  ir->expandedvals.get(),
1822                                                  lastbox,
1823                                                  PTCouplingArrays{ state->boxv,
1824                                                                    state->nosehoover_xi,
1825                                                                    state->nosehoover_vxi,
1826                                                                    state->nhpres_xi,
1827                                                                    state->nhpres_vxi },
1828                                                  state->fep_state,
1829                                                  total_vir,
1830                                                  pres,
1831                                                  ekind,
1832                                                  mu_tot,
1833                                                  constr);
1834             }
1835             else
1836             {
1837                 energyOutput.recordNonEnergyStep();
1838             }
1839
1840             gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1841             gmx_bool do_or = do_per_step(step, ir->nstorireout);
1842
1843             if (doSimulatedAnnealing)
1844             {
1845                 gmx::EnergyOutput::printAnnealingTemperatures(
1846                         do_log ? fplog : nullptr, groups, &(ir->opts));
1847             }
1848             if (do_log || do_ene || do_dr || do_or)
1849             {
1850                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1851                                                    do_ene,
1852                                                    do_dr,
1853                                                    do_or,
1854                                                    do_log ? fplog : nullptr,
1855                                                    step,
1856                                                    t,
1857                                                    fr->fcdata.get(),
1858                                                    awh.get());
1859             }
1860             if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1861             {
1862                 const bool isInitialOutput = false;
1863                 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1864             }
1865
1866             if (ir->bPull)
1867             {
1868                 pull_print_output(pull_work, step, t);
1869             }
1870
1871             if (do_per_step(step, ir->nstlog))
1872             {
1873                 if (fflush(fplog) != 0)
1874                 {
1875                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1876                 }
1877             }
1878         }
1879         if (bDoExpanded)
1880         {
1881             /* Have to do this part _after_ outputting the logfile and the edr file */
1882             /* Gets written into the state at the beginning of next loop*/
1883             state->fep_state = lamnew;
1884         }
1885         else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1886         {
1887             state->fep_state = awh->fepLambdaState();
1888         }
1889         /* Print the remaining wall clock time for the run */
1890         if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1891         {
1892             if (shellfc)
1893             {
1894                 fprintf(stderr, "\n");
1895             }
1896             print_time(stderr, walltime_accounting, step, ir, cr);
1897         }
1898
1899         /* Ion/water position swapping.
1900          * Not done in last step since trajectory writing happens before this call
1901          * in the MD loop and exchanges would be lost anyway. */
1902         bNeedRepartition = FALSE;
1903         if ((ir->eSwapCoords != SwapType::No) && (step > 0) && !bLastStep
1904             && do_per_step(step, ir->swap->nstswap))
1905         {
1906             bNeedRepartition = do_swapcoords(cr,
1907                                              step,
1908                                              t,
1909                                              ir,
1910                                              swap,
1911                                              wcycle,
1912                                              as_rvec_array(state->x.data()),
1913                                              state->box,
1914                                              MASTER(cr) && mdrunOptions.verbose,
1915                                              bRerunMD);
1916
1917             if (bNeedRepartition && DOMAINDECOMP(cr))
1918             {
1919                 dd_collect_state(cr->dd, state, state_global);
1920             }
1921         }
1922
1923         /* Replica exchange */
1924         bExchanged = FALSE;
1925         if (bDoReplEx)
1926         {
1927             bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1928         }
1929
1930         if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1931         {
1932             dd_partition_system(fplog,
1933                                 mdlog,
1934                                 step,
1935                                 cr,
1936                                 TRUE,
1937                                 1,
1938                                 state_global,
1939                                 top_global,
1940                                 *ir,
1941                                 imdSession,
1942                                 pull_work,
1943                                 state,
1944                                 &f,
1945                                 mdAtoms,
1946                                 top,
1947                                 fr,
1948                                 vsite,
1949                                 constr,
1950                                 nrnb,
1951                                 wcycle,
1952                                 FALSE);
1953             upd.updateAfterPartition(state->natoms,
1954                                      md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
1955                                                  : gmx::ArrayRef<const unsigned short>(),
1956                                      md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
1957                                              : gmx::ArrayRef<const unsigned short>());
1958             fr->longRangeNonbondeds->updateAfterPartition(*md);
1959         }
1960
1961         bFirstStep = FALSE;
1962         bInitStep  = FALSE;
1963
1964         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1965         /* With all integrators, except VV, we need to retain the pressure
1966          * at the current step for coupling at the next step.
1967          */
1968         if ((state->flags & enumValueToBitMask(StateEntry::PressurePrevious))
1969             && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1970         {
1971             /* Store the pressure in t_state for pressure coupling
1972              * at the next MD step.
1973              */
1974             copy_mat(pres, state->pres_prev);
1975         }
1976
1977         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1978
1979         if ((membed != nullptr) && (!bLastStep))
1980         {
1981             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1982         }
1983
1984         cycles = wallcycle_stop(wcycle, WallCycleCounter::Step);
1985         if (DOMAINDECOMP(cr) && wcycle)
1986         {
1987             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1988         }
1989
1990         /* increase the MD step number */
1991         step++;
1992         step_rel++;
1993
1994 #if GMX_FAHCORE
1995         if (MASTER(cr))
1996         {
1997             fcReportProgress(ir->nsteps + ir->init_step, step);
1998         }
1999 #endif
2000
2001         resetHandler->resetCounters(
2002                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
2003
2004         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
2005         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
2006     }
2007     /* End of main MD loop */
2008
2009     /* Closing TNG files can include compressing data. Therefore it is good to do that
2010      * before stopping the time measurements. */
2011     mdoutf_tng_close(outf);
2012
2013     /* Stop measuring walltime */
2014     walltime_accounting_end_time(walltime_accounting);
2015
2016     if (simulationWork.haveSeparatePmeRank)
2017     {
2018         /* Tell the PME only node to finish */
2019         gmx_pme_send_finish(cr);
2020     }
2021
2022     if (MASTER(cr))
2023     {
2024         if (ir->nstcalcenergy > 0)
2025         {
2026             energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
2027
2028             gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
2029             energyOutput.printAverages(fplog, groups);
2030         }
2031     }
2032     done_mdoutf(outf);
2033
2034     if (bPMETune)
2035     {
2036         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
2037     }
2038
2039     done_shellfc(fplog, shellfc, step_rel);
2040
2041     if (useReplicaExchange && MASTER(cr))
2042     {
2043         print_replica_exchange_statistics(fplog, repl_ex);
2044     }
2045
2046     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
2047
2048     global_stat_destroy(gstat);
2049 }