2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implements the integrator for normal molecular dynamics simulations
41 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42 * \ingroup module_mdrun
55 #include "gromacs/applied_forces/awh/awh.h"
56 #include "gromacs/applied_forces/awh/read_params.h"
57 #include "gromacs/commandline/filenm.h"
58 #include "gromacs/domdec/collect.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec.h"
61 #include "gromacs/domdec/domdec_network.h"
62 #include "gromacs/domdec/domdec_struct.h"
63 #include "gromacs/domdec/gpuhaloexchange.h"
64 #include "gromacs/domdec/mdsetup.h"
65 #include "gromacs/domdec/partition.h"
66 #include "gromacs/essentialdynamics/edsam.h"
67 #include "gromacs/ewald/pme_load_balancing.h"
68 #include "gromacs/ewald/pme_pp.h"
69 #include "gromacs/fileio/trxio.h"
70 #include "gromacs/gmxlib/network.h"
71 #include "gromacs/gmxlib/nrnb.h"
72 #include "gromacs/gpu_utils/device_stream_manager.h"
73 #include "gromacs/gpu_utils/gpu_utils.h"
74 #include "gromacs/imd/imd.h"
75 #include "gromacs/listed_forces/listed_forces.h"
76 #include "gromacs/math/functions.h"
77 #include "gromacs/math/invertmatrix.h"
78 #include "gromacs/math/vec.h"
79 #include "gromacs/math/vectypes.h"
80 #include "gromacs/mdlib/checkpointhandler.h"
81 #include "gromacs/mdlib/compute_io.h"
82 #include "gromacs/mdlib/constr.h"
83 #include "gromacs/mdlib/coupling.h"
84 #include "gromacs/mdlib/ebin.h"
85 #include "gromacs/mdlib/enerdata_utils.h"
86 #include "gromacs/mdlib/energyoutput.h"
87 #include "gromacs/mdlib/expanded.h"
88 #include "gromacs/mdlib/force.h"
89 #include "gromacs/mdlib/force_flags.h"
90 #include "gromacs/mdlib/forcerec.h"
91 #include "gromacs/mdlib/freeenergyparameters.h"
92 #include "gromacs/mdlib/md_support.h"
93 #include "gromacs/mdlib/mdatoms.h"
94 #include "gromacs/mdlib/mdoutf.h"
95 #include "gromacs/mdlib/membed.h"
96 #include "gromacs/mdlib/resethandler.h"
97 #include "gromacs/mdlib/sighandler.h"
98 #include "gromacs/mdlib/simulationsignal.h"
99 #include "gromacs/mdlib/stat.h"
100 #include "gromacs/mdlib/stophandler.h"
101 #include "gromacs/mdlib/tgroup.h"
102 #include "gromacs/mdlib/trajectory_writing.h"
103 #include "gromacs/mdlib/update.h"
104 #include "gromacs/mdlib/update_constrain_gpu.h"
105 #include "gromacs/mdlib/update_vv.h"
106 #include "gromacs/mdlib/vcm.h"
107 #include "gromacs/mdlib/vsite.h"
108 #include "gromacs/mdrunutility/handlerestart.h"
109 #include "gromacs/mdrunutility/multisim.h"
110 #include "gromacs/mdrunutility/printtime.h"
111 #include "gromacs/mdtypes/awh_history.h"
112 #include "gromacs/mdtypes/awh_params.h"
113 #include "gromacs/mdtypes/commrec.h"
114 #include "gromacs/mdtypes/df_history.h"
115 #include "gromacs/mdtypes/energyhistory.h"
116 #include "gromacs/mdtypes/fcdata.h"
117 #include "gromacs/mdtypes/forcebuffers.h"
118 #include "gromacs/mdtypes/forcerec.h"
119 #include "gromacs/mdtypes/group.h"
120 #include "gromacs/mdtypes/inputrec.h"
121 #include "gromacs/mdtypes/interaction_const.h"
122 #include "gromacs/mdtypes/md_enums.h"
123 #include "gromacs/mdtypes/mdatom.h"
124 #include "gromacs/mdtypes/mdrunoptions.h"
125 #include "gromacs/mdtypes/multipletimestepping.h"
126 #include "gromacs/mdtypes/observableshistory.h"
127 #include "gromacs/mdtypes/pullhistory.h"
128 #include "gromacs/mdtypes/simulation_workload.h"
129 #include "gromacs/mdtypes/state.h"
130 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
131 #include "gromacs/modularsimulator/energydata.h"
132 #include "gromacs/nbnxm/gpu_data_mgmt.h"
133 #include "gromacs/nbnxm/nbnxm.h"
134 #include "gromacs/pbcutil/pbc.h"
135 #include "gromacs/pulling/output.h"
136 #include "gromacs/pulling/pull.h"
137 #include "gromacs/swap/swapcoords.h"
138 #include "gromacs/timing/wallcycle.h"
139 #include "gromacs/timing/walltime_accounting.h"
140 #include "gromacs/topology/atoms.h"
141 #include "gromacs/topology/idef.h"
142 #include "gromacs/topology/mtop_util.h"
143 #include "gromacs/topology/topology.h"
144 #include "gromacs/trajectory/trajectoryframe.h"
145 #include "gromacs/utility/basedefinitions.h"
146 #include "gromacs/utility/cstringutil.h"
147 #include "gromacs/utility/fatalerror.h"
148 #include "gromacs/utility/logger.h"
149 #include "gromacs/utility/real.h"
150 #include "gromacs/utility/smalloc.h"
152 #include "legacysimulator.h"
153 #include "replicaexchange.h"
156 using gmx::SimulationSignaller;
158 void gmx::LegacySimulator::do_md()
160 // TODO Historically, the EM and MD "integrators" used different
161 // names for the t_inputrec *parameter, but these must have the
162 // same name, now that it's a member of a struct. We use this ir
163 // alias to avoid a large ripple of nearly useless changes.
164 // t_inputrec is being replaced by IMdpOptionsProvider, so this
165 // will go away eventually.
166 const t_inputrec* ir = inputrec;
168 int64_t step, step_rel;
169 double t, t0 = ir->init_t;
170 gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
171 gmx_bool bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
172 gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
173 gmx_bool do_ene, do_log, do_verbose;
174 gmx_bool bMasterState;
175 unsigned int force_flags;
176 tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
179 matrix pressureCouplingMu, M;
180 gmx_repl_ex_t repl_ex = nullptr;
181 gmx_global_stat_t gstat;
182 gmx_shellfc_t* shellfc;
183 gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
186 std::vector<RVec> cbuf;
192 real saved_conserved_quantity = 0;
195 char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
197 /* PME load balancing data for GPU kernels */
198 gmx_bool bPMETune = FALSE;
199 gmx_bool bPMETunePrinting = FALSE;
201 bool bInteractiveMDstep = false;
203 SimulationSignals signals;
204 // Most global communnication stages don't propagate mdrun
205 // signals, and will use this object to achieve that.
206 SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
208 if (!mdrunOptions.writeConfout)
210 // This is on by default, and the main known use case for
211 // turning it off is for convenience in benchmarking, which is
212 // something that should not show up in the general user
217 "The -noconfout functionality is deprecated, and may be removed in a "
221 /* md-vv uses averaged full step velocities for T-control
222 md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
223 md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
224 bTrotter = (EI_VV(ir->eI)
225 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
227 const bool bRerunMD = false;
229 int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
230 bGStatEveryStep = (nstglobalcomm == 1);
232 const SimulationGroups* groups = &top_global.groups;
234 std::unique_ptr<EssentialDynamics> ed = nullptr;
235 if (opt2bSet("-ei", nfile, fnm))
237 /* Initialize essential dynamics sampling */
238 ed = init_edsam(mdlog,
239 opt2fn_null("-ei", nfile, fnm),
240 opt2fn("-eo", nfile, fnm),
250 else if (observablesHistory->edsamHistory)
253 "The checkpoint is from a run with essential dynamics sampling, "
254 "but the current run did not specify the -ei option. "
255 "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
258 int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
259 gmx::ArrayRef<real> lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
261 fplog, *ir, gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc), MASTER(cr), fep_state, lambda);
262 Update upd(*ir, deform);
263 bool doSimulatedAnnealing = false;
265 // TODO: Avoid changing inputrec (#3854)
266 // Simulated annealing updates the reference temperature.
267 auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
268 doSimulatedAnnealing = initSimulatedAnnealing(nonConstInputrec, &upd);
270 const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
272 const t_fcdata& fcdata = *fr->fcdata;
274 bool simulationsShareState = false;
275 int nstSignalComm = nstglobalcomm;
277 // TODO This implementation of ensemble orientation restraints is nasty because
278 // a user can't just do multi-sim with single-sim orientation restraints.
279 bool usingEnsembleRestraints =
280 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
281 bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim() && (ms != nullptr));
283 // Replica exchange, ensemble restraints and AWH need all
284 // simulations to remain synchronized, so they need
285 // checkpoints and stop conditions to act on the same step, so
286 // the propagation of such signals must take place between
287 // simulations, not just within simulations.
288 // TODO: Make algorithm initializers set these flags.
289 simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
291 if (simulationsShareState)
293 // Inter-simulation signal communication does not need to happen
294 // often, so we use a minimum of 200 steps to reduce overhead.
295 const int c_minimumInterSimulationSignallingInterval = 200;
296 nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
301 if (startingBehavior != StartingBehavior::RestartWithAppending)
303 pleaseCiteCouplingAlgorithms(fplog, *ir);
305 gmx_mdoutf* outf = init_mdoutf(fplog,
317 simulationsShareState,
319 gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
323 mdoutf_get_fp_dhdl(outf),
326 simulationsShareState,
329 gstat = global_stat_init(ir);
331 const auto& simulationWork = runScheduleWork->simulationWork;
332 const bool useGpuForPme = simulationWork.useGpuPme;
333 const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
334 const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
335 const bool useGpuForUpdate = simulationWork.useGpuUpdate;
337 /* Check for polarizable models and flexible constraints */
338 shellfc = init_shell_flexcon(fplog,
340 constr ? constr->numFlexibleConstraints() : 0,
346 double io = compute_io(ir, top_global.natoms, *groups, energyOutput.numEnergyTerms(), 1);
347 if ((io > 2000) && MASTER(cr))
349 fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
353 // Local state only becomes valid now.
354 std::unique_ptr<t_state> stateInstance;
357 gmx_localtop_t top(top_global.ffparams);
359 auto mdatoms = mdAtoms->mdatoms();
361 ForceBuffers f(fr->useMts,
362 ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
363 ? PinningPolicy::PinnedIfSupported
364 : PinningPolicy::CannotBePinned);
365 if (DOMAINDECOMP(cr))
367 stateInstance = std::make_unique<t_state>();
368 state = stateInstance.get();
369 dd_init_local_state(*cr->dd, state_global, state);
371 /* Distribute the charge groups over the nodes from the master node */
372 dd_partition_system(fplog,
393 upd.setNumAtoms(state->natoms);
397 state_change_natoms(state_global, state_global->natoms);
398 /* Copy the pointer to the global state */
399 state = state_global;
401 /* Generate and initialize new topology */
402 mdAlgorithmsSetupAtomData(cr, *ir, top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
404 upd.setNumAtoms(state->natoms);
407 std::unique_ptr<UpdateConstrainGpu> integrator;
409 StatePropagatorDataGpu* stateGpu = fr->stateGpu;
411 // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
414 GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
415 || constr->numConstraintsTotal() == 0,
416 "Constraints in domain decomposition are only supported with update "
417 "groups if using GPU update.\n");
418 GMX_RELEASE_ASSERT(ir->eConstrAlg != ConstraintAlgorithm::Shake || constr == nullptr
419 || constr->numConstraintsTotal() == 0,
420 "SHAKE is not supported with GPU update.");
421 GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
422 "Either PME or short-ranged non-bonded interaction tasks must run on "
423 "the GPU to use GPU update.\n");
424 GMX_RELEASE_ASSERT(ir->eI == IntegrationAlgorithm::MD,
425 "Only the md integrator is supported with the GPU update.\n");
427 ir->etc != TemperatureCoupling::NoseHoover,
428 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
430 ir->epc == PressureCoupling::No || ir->epc == PressureCoupling::ParrinelloRahman
431 || ir->epc == PressureCoupling::Berendsen || ir->epc == PressureCoupling::CRescale,
432 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
433 "with the GPU update.\n");
434 GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
435 "Virtual sites are not supported with the GPU update.\n");
436 GMX_RELEASE_ASSERT(ed == nullptr,
437 "Essential dynamics is not supported with the GPU update.\n");
438 GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
439 "Constraints pulling is not supported with the GPU update.\n");
440 GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
441 "Orientation restraints are not supported with the GPU update.\n");
443 ir->efep == FreeEnergyPerturbationType::No
444 || (!haveFepPerturbedMasses(top_global) && !havePerturbedConstraints(top_global)),
445 "Free energy perturbation of masses and constraints are not supported with the GPU "
448 if (constr != nullptr && constr->numConstraintsTotal() > 0)
452 .appendText("Updating coordinates and applying constraints on the GPU.");
456 GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
458 GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
459 "Device stream manager should be initialized in order to use GPU "
460 "update-constraints.");
462 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
463 "Update stream should be initialized in order to use GPU "
464 "update-constraints.");
465 integrator = std::make_unique<UpdateConstrainGpu>(
469 fr->deviceStreamManager->context(),
470 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
471 stateGpu->xUpdatedOnDevice(),
474 integrator->setPbc(PbcType::Xyz, state->box);
477 if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
479 changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
483 changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
486 // NOTE: The global state is no longer used at this point.
487 // But state_global is still used as temporary storage space for writing
488 // the global state to file and potentially for replica exchange.
489 // (Global topology should persist.)
491 update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
495 /* Check nstexpanded here, because the grompp check was broken */
496 if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
499 "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
501 init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
506 EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
509 preparePrevStepPullCom(ir,
511 gmx::arrayRefFromArray(mdatoms->massT, mdatoms->nr),
515 startingBehavior != StartingBehavior::NewSimulation);
517 // TODO: Remove this by converting AWH into a ForceProvider
518 auto awh = prepareAwhModule(fplog,
523 startingBehavior != StartingBehavior::NewSimulation,
525 opt2fn("-awh", nfile, fnm),
528 if (useReplicaExchange && MASTER(cr))
530 repl_ex = init_replica_exchange(fplog, ms, top_global.natoms, ir, replExParams);
532 /* PME tuning is only supported in the Verlet scheme, with PME for
533 * Coulomb. It is not supported with only LJ PME. */
534 bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
535 && ir->cutoff_scheme != CutoffScheme::Group);
537 pme_load_balancing_t* pme_loadbal = nullptr;
541 &pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu());
544 if (!ir->bContinuation)
546 if (state->flags & enumValueToBitMask(StateEntry::V))
548 auto v = makeArrayRef(state->v);
549 /* Set the velocities of vsites, shells and frozen atoms to zero */
550 for (i = 0; i < mdatoms->homenr; i++)
552 if (mdatoms->ptype[i] == ParticleType::Shell)
556 else if (mdatoms->cFREEZE)
558 for (m = 0; m < DIM; m++)
560 if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
571 /* Constrain the initial coordinates and velocities */
572 do_constrain_first(fplog,
577 state->x.arrayRefWithPadding(),
578 state->v.arrayRefWithPadding(),
580 state->lambda[FreeEnergyPerturbationCouplingType::Bonded]);
584 if (ir->efep != FreeEnergyPerturbationType::No)
586 /* Set free energy calculation frequency as the greatest common
587 * denominator of nstdhdl and repl_ex_nst. */
588 nstfep = ir->fepvals->nstdhdl;
591 nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
593 if (useReplicaExchange)
595 nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
599 nstfep = std::gcd(ir->awhParams->nstSampleCoord(), nstfep);
603 /* Be REALLY careful about what flags you set here. You CANNOT assume
604 * this is the first step, since we might be restarting from a checkpoint,
605 * and in that case we should not do any modifications to the state.
607 bStopCM = (ir->comm_mode != ComRemovalAlgorithm::No && !ir->bContinuation);
609 // When restarting from a checkpoint, it can be appropriate to
610 // initialize ekind from quantities in the checkpoint. Otherwise,
611 // compute_globals must initialize ekind before the simulation
612 // starts/restarts. However, only the master rank knows what was
613 // found in the checkpoint file, so we have to communicate in
614 // order to coordinate the restart.
616 // TODO Consider removing this communication if/when checkpoint
617 // reading directly follows .tpr reading, because all ranks can
618 // agree on hasReadEkinState at that time.
619 bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
622 gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
624 if (hasReadEkinState)
626 restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
629 unsigned int cglo_flags =
630 (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
631 | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
633 bSumEkinhOld = FALSE;
635 t_vcm vcm(top_global.groups, *ir);
636 reportComRemovalInfo(fplog, vcm);
638 /* To minimize communication, compute_globals computes the COM velocity
639 * and the kinetic energy for the velocities without COM motion removed.
640 * Thus to get the kinetic energy without the COM contribution, we need
641 * to call compute_globals twice.
643 for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
645 unsigned int cglo_flags_iteration = cglo_flags;
646 if (bStopCM && cgloIteration == 0)
648 cglo_flags_iteration |= CGLO_STOPCM;
649 cglo_flags_iteration &= ~CGLO_TEMPERATURE;
651 if (DOMAINDECOMP(cr) && shouldCheckNumberOfBondedInteractions(*cr->dd) && cgloIteration == 0)
653 cglo_flags_iteration |= CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS;
655 compute_globals(gstat,
660 makeConstArrayRef(state->x),
661 makeConstArrayRef(state->v),
672 gmx::ArrayRef<real>{},
676 cglo_flags_iteration);
677 if (cglo_flags_iteration & CGLO_STOPCM)
679 /* At initialization, do not pass x with acceleration-correction mode
680 * to avoid (incorrect) correction of the initial coordinates.
682 auto x = (vcm.mode == ComRemovalAlgorithm::LinearAccelerationCorrection)
684 : makeArrayRef(state->x);
685 process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
686 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
689 if (DOMAINDECOMP(cr))
691 checkNumberOfBondedInteractions(
692 mdlog, cr, top_global, &top, makeConstArrayRef(state->x), state->box);
694 if (ir->eI == IntegrationAlgorithm::VVAK)
696 /* a second call to get the half step temperature initialized as well */
697 /* we do the same call as above, but turn the pressure off -- internally to
698 compute_globals, this is recognized as a velocity verlet half-step
699 kinetic energy calculation. This minimized excess variables, but
700 perhaps loses some logic?*/
702 compute_globals(gstat,
707 makeConstArrayRef(state->x),
708 makeConstArrayRef(state->v),
719 gmx::ArrayRef<real>{},
723 cglo_flags & ~CGLO_PRESSURE);
726 /* Calculate the initial half step temperature, and save the ekinh_old */
727 if (startingBehavior == StartingBehavior::NewSimulation)
729 for (i = 0; (i < ir->opts.ngtc); i++)
731 copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
735 /* need to make an initiation call to get the Trotter variables set, as well as other constants
736 for non-trotter temperature control */
737 auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
741 if (!ir->bContinuation)
743 if (constr && ir->eConstrAlg == ConstraintAlgorithm::Lincs)
746 "RMS relative constraint deviation after constraining: %.2e\n",
749 if (EI_STATE_VELOCITY(ir->eI))
751 real temp = enerd->term[F_TEMP];
752 if (ir->eI != IntegrationAlgorithm::VV)
754 /* Result of Ekin averaged over velocities of -half
755 * and +half step, while we only have -half step here.
759 fprintf(fplog, "Initial temperature: %g K\n", temp);
764 fprintf(stderr, "starting mdrun '%s'\n", *(top_global.name));
767 sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
771 sprintf(tbuf, "%s", "infinite");
773 if (ir->init_step > 0)
776 "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
777 gmx_step_str(ir->init_step + ir->nsteps, sbuf),
779 gmx_step_str(ir->init_step, sbuf2),
780 ir->init_step * ir->delta_t);
784 fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
786 fprintf(fplog, "\n");
789 walltime_accounting_start_time(walltime_accounting);
790 wallcycle_start(wcycle, ewcRUN);
791 print_start(fplog, cr, walltime_accounting, "mdrun");
793 /***********************************************************
797 ************************************************************/
800 /* Skip the first Nose-Hoover integration when we get the state from tpx */
801 bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
802 bSumEkinhOld = FALSE;
804 bNeedRepartition = FALSE;
806 step = ir->init_step;
809 auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
810 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
811 simulationsShareState,
814 mdrunOptions.reproducible,
816 mdrunOptions.maximumHoursToRun,
821 walltime_accounting);
823 auto checkpointHandler = std::make_unique<CheckpointHandler>(
824 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
825 simulationsShareState,
828 mdrunOptions.writeConfout,
829 mdrunOptions.checkpointOptions.period);
831 const bool resetCountersIsLocal = true;
832 auto resetHandler = std::make_unique<ResetHandler>(
833 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
834 !resetCountersIsLocal,
837 mdrunOptions.timingOptions.resetHalfway,
838 mdrunOptions.maximumHoursToRun,
841 walltime_accounting);
843 const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
845 if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
847 logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
850 /* and stop now if we should */
851 bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
855 /* Determine if this is a neighbor search step */
856 bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
858 if (bPMETune && bNStList)
860 // This has to be here because PME load balancing is called so early.
861 // TODO: Move to after all booleans are defined.
862 if (useGpuForUpdate && !bFirstStep)
864 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
865 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
867 /* PME grid + cut-off optimization with GPUs or PME nodes */
868 pme_loadbal_do(pme_loadbal,
870 (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
881 simulationWork.useGpuPmePpCommunication);
884 wallcycle_start(wcycle, ewcSTEP);
886 bLastStep = (step_rel == ir->nsteps);
887 t = t0 + step * ir->delta_t;
889 // TODO Refactor this, so that nstfep does not need a default value of zero
890 if (ir->efep != FreeEnergyPerturbationType::No || ir->bSimTemp)
892 /* find and set the current lambdas */
893 state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
895 bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
896 bDoFEP = ((ir->efep != FreeEnergyPerturbationType::No) && do_per_step(step, nstfep));
897 bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
901 bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
902 && do_per_step(step, replExParams.exchangeInterval));
904 if (doSimulatedAnnealing)
906 // TODO: Avoid changing inputrec (#3854)
907 // Simulated annealing updates the reference temperature.
908 auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
909 update_annealing_target_temp(nonConstInputrec, t, &upd);
912 /* Stop Center of Mass motion */
913 bStopCM = (ir->comm_mode != ComRemovalAlgorithm::No && do_per_step(step, ir->nstcomm));
915 /* Determine whether or not to do Neighbour Searching */
916 bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
918 /* Note that the stopHandler will cause termination at nstglobalcomm
919 * steps. Since this concides with nstcalcenergy, nsttcouple and/or
920 * nstpcouple steps, we have computed the half-step kinetic energy
921 * of the previous step and can always output energies at the last step.
923 bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
925 /* do_log triggers energy and virial calculation. Because this leads
926 * to different code paths, forces can be different. Thus for exact
927 * continuation we should avoid extra log output.
928 * Note that the || bLastStep can result in non-exact continuation
929 * beyond the last step. But we don't consider that to be an issue.
931 do_log = (do_per_step(step, ir->nstlog)
932 || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
933 do_verbose = mdrunOptions.verbose
934 && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
936 if (useGpuForUpdate && !bFirstStep && bNS)
938 // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
939 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
940 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
941 // Copy coordinate from the GPU when needed at the search step.
942 // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
943 // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
944 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
945 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
948 // We only need to calculate virtual velocities if we are writing them in the current step
949 const bool needVirtualVelocitiesThisStep =
951 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep());
953 if (vsite != nullptr)
955 // Virtual sites need to be updated before domain decomposition and forces are calculated
956 wallcycle_start(wcycle, ewcVSITECONSTR);
957 // md-vv calculates virtual velocities once it has full-step real velocities
958 vsite->construct(state->x,
961 (!EI_VV(inputrec->eI) && needVirtualVelocitiesThisStep)
962 ? VSiteOperation::PositionsAndVelocities
963 : VSiteOperation::Positions);
964 wallcycle_stop(wcycle, ewcVSITECONSTR);
967 if (bNS && !(bFirstStep && ir->bContinuation))
969 bMasterState = FALSE;
970 /* Correct the new box if it is too skewed */
971 if (inputrecDynamicBox(ir))
973 if (correct_box(fplog, step, state->box))
976 // If update is offloaded, it should be informed about the box size change
979 integrator->setPbc(PbcType::Xyz, state->box);
983 if (DOMAINDECOMP(cr) && bMasterState)
985 dd_collect_state(cr->dd, state, state_global);
988 if (DOMAINDECOMP(cr))
990 /* Repartition the domain decomposition */
991 dd_partition_system(fplog,
1011 do_verbose && !bPMETunePrinting);
1012 upd.setNumAtoms(state->natoms);
1016 // Allocate or re-size GPU halo exchange object, if necessary
1017 if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
1019 GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
1020 "GPU device manager has to be initialized to use GPU "
1021 "version of halo exchange.");
1022 constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
1025 if (MASTER(cr) && do_log)
1027 gmx::EnergyOutput::printHeader(
1028 fplog, step, t); /* can we improve the information printed here? */
1031 if (ir->efep != FreeEnergyPerturbationType::No)
1033 update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
1038 /* We need the kinetic energy at minus the half step for determining
1039 * the full step kinetic energy and possibly for T-coupling.*/
1040 /* This may not be quite working correctly yet . . . . */
1041 int cglo_flags = CGLO_GSTAT | CGLO_TEMPERATURE;
1042 if (DOMAINDECOMP(cr) && shouldCheckNumberOfBondedInteractions(*cr->dd))
1044 cglo_flags |= CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS;
1046 compute_globals(gstat,
1051 makeConstArrayRef(state->x),
1052 makeConstArrayRef(state->v),
1063 gmx::ArrayRef<real>{},
1068 if (DOMAINDECOMP(cr))
1070 checkNumberOfBondedInteractions(
1071 mdlog, cr, top_global, &top, makeConstArrayRef(state->x), state->box);
1074 clear_mat(force_vir);
1076 checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1078 /* Determine the energy and pressure:
1079 * at nstcalcenergy steps and at energy output steps (set below).
1081 if (EI_VV(ir->eI) && (!bInitStep))
1083 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1084 bCalcVir = bCalcEnerStep
1085 || (ir->epc != PressureCoupling::No
1086 && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1090 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1091 bCalcVir = bCalcEnerStep
1092 || (ir->epc != PressureCoupling::No && do_per_step(step, ir->nstpcouple));
1094 bCalcEner = bCalcEnerStep;
1096 do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1098 if (do_ene || do_log || bDoReplEx)
1104 /* Do we need global communication ? */
1105 bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1106 || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1108 force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1109 | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1110 | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
1111 if (fr->useMts && !do_per_step(step, ir->nstfout))
1113 force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1118 /* Now is the time to relax the shells */
1119 relax_shell_flexcon(fplog,
1122 mdrunOptions.verbose,
1134 state->x.arrayRefWithPadding(),
1135 state->v.arrayRefWithPadding(),
1150 ddBalanceRegionHandler);
1154 /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1155 is updated (or the AWH update will be performed twice for one step when continuing).
1156 It would be best to call this update function from do_md_trajectory_writing but that
1157 would occur after do_force. One would have to divide the update_awh function into one
1158 function applying the AWH force and one doing the AWH bias update. The update AWH
1159 bias function could then be called after do_md_trajectory_writing (then containing
1160 update_awh_history). The checkpointing will in the future probably moved to the start
1161 of the md loop which will rid of this issue. */
1162 if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1164 awh->updateHistory(state_global->awhHistory.get());
1167 /* The coordinates (x) are shifted (to get whole molecules)
1169 * This is parallellized as well, and does communication too.
1170 * Check comments in sim_util.c
1185 state->x.arrayRefWithPadding(),
1197 ed ? ed->getLegacyED() : nullptr,
1198 (bNS ? GMX_FORCE_NS : 0) | force_flags,
1199 ddBalanceRegionHandler);
1202 // VV integrators do not need the following velocity half step
1203 // if it is the first step after starting from a checkpoint.
1204 // That is, the half step is needed on all other steps, and
1205 // also the first step when starting from a .tpr file.
1208 integrateVVFirstStep(step,
1241 &saved_conserved_quantity,
1251 if (vsite != nullptr && needVirtualVelocitiesThisStep)
1253 // Positions were calculated earlier
1254 wallcycle_start(wcycle, ewcVSITECONSTR);
1255 vsite->construct(state->x, state->v, state->box, VSiteOperation::Velocities);
1256 wallcycle_stop(wcycle, ewcVSITECONSTR);
1260 /* ######## END FIRST UPDATE STEP ############## */
1261 /* ######## If doing VV, we now have v(dt) ###### */
1264 /* perform extended ensemble sampling in lambda - we don't
1265 actually move to the new state before outputting
1266 statistics, but if performing simulated tempering, we
1267 do update the velocities and the tau_t. */
1268 // TODO: Avoid changing inputrec (#3854)
1269 // Simulated tempering updates the reference temperature.
1270 // Expanded ensemble without simulated tempering does not change the inputrec.
1271 auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
1272 lamnew = ExpandedEnsembleDynamics(fplog,
1280 state->v.rvec_array(),
1282 /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1285 copy_df_history(state_global->dfhist, state->dfhist);
1289 // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1290 // coordinates have not already been copied for i) search or ii) CPU force tasks.
1291 if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1292 && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1293 || checkpointHandler->isCheckpointingStep()))
1295 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1296 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1298 // Copy velocities if needed for the output/checkpointing.
1299 // NOTE: Copy on the search steps is done at the beginning of the step.
1300 if (useGpuForUpdate && !bNS
1301 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1303 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1304 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1306 // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1307 // and update is offloaded hence forces are kept on the GPU for update and have not been
1308 // already transferred in do_force().
1309 // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1310 // when the forces are ready on the GPU -- the same synchronizer should be used as the one
1311 // prior to GPU update.
1312 // TODO: When the output flags will be included in step workload, this copy can be combined with the
1313 // copy call in do_force(...).
1314 // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1315 // on host after the D2H copy in do_force(...).
1316 if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1317 && do_per_step(step, ir->nstfout))
1319 stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1320 stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1322 /* Now we have the energies and forces corresponding to the
1323 * coordinates at time t. We must output all of this before
1326 do_md_trajectory_writing(fplog,
1343 checkpointHandler->isCheckpointingStep(),
1346 mdrunOptions.writeConfout,
1348 /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1349 bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x, t);
1351 /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1352 if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1353 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1355 copy_mat(state->svir_prev, shake_vir);
1356 copy_mat(state->fvir_prev, force_vir);
1359 stopHandler->setSignal();
1360 resetHandler->setSignal(walltime_accounting);
1362 if (bGStat || !PAR(cr))
1364 /* In parallel we only have to check for checkpointing in steps
1365 * where we do global communication,
1366 * otherwise the other nodes don't know.
1368 checkpointHandler->setSignal(walltime_accounting);
1371 /* ######### START SECOND UPDATE STEP ################# */
1373 /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1374 controlled in preprocessing */
1376 if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1378 gmx_bool bIfRandomize;
1379 bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1380 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1381 if (constr && bIfRandomize)
1383 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1386 /* Box is changed in update() when we do pressure coupling,
1387 * but we should still use the old box for energy corrections and when
1388 * writing it to the energy file, so it matches the trajectory files for
1389 * the same timestep above. Make a copy in a separate array.
1391 copy_mat(state->box, lastbox);
1395 if (!useGpuForUpdate)
1397 wallcycle_start(wcycle, ewcUPDATE);
1399 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1402 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1403 /* We can only do Berendsen coupling after we have summed
1404 * the kinetic energy or virial. Since the happens
1405 * in global_state after update, we should only do it at
1406 * step % nstlist = 1 with bGStatEveryStep=FALSE.
1411 update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1412 update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1415 /* With leap-frog type integrators we compute the kinetic energy
1416 * at a whole time step as the average of the half-time step kinetic
1417 * energies of two subsequent steps. Therefore we need to compute the
1418 * half step kinetic energy also if we need energies at the next step.
1420 const bool needHalfStepKineticEnergy =
1421 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1423 // Parrinello-Rahman requires the pressure to be availible before the update to compute
1424 // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1425 const bool doParrinelloRahman = (ir->epc == PressureCoupling::ParrinelloRahman
1426 && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1430 GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1432 integrateVVSecondStep(step,
1468 if (useGpuForUpdate)
1470 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1472 integrator->set(stateGpu->getCoordinates(),
1473 stateGpu->getVelocities(),
1474 stateGpu->getForces(),
1478 // Copy data to the GPU after buffers might have being reinitialized
1479 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1480 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1483 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1484 && !thisRankHasDuty(cr, DUTY_PME))
1486 // The PME forces were recieved to the host, so have to be copied
1487 stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1489 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1491 // The buffer ops were not offloaded this step, so the forces are on the
1492 // host and have to be copied
1493 stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1496 const bool doTemperatureScaling =
1497 (ir->etc != TemperatureCoupling::No
1498 && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1500 // This applies Leap-Frog, LINCS and SETTLE in succession
1501 integrator->integrate(
1502 stateGpu->getForcesReadyOnDeviceEvent(
1503 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1508 doTemperatureScaling,
1511 ir->nstpcouple * ir->delta_t,
1514 // Copy velocities D2H after update if:
1515 // - Globals are computed this step (includes the energy output steps).
1516 // - Temperature is needed for the next step.
1517 if (bGStat || needHalfStepKineticEnergy)
1519 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1520 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1525 /* With multiple time stepping we need to do an additional normal
1526 * update step to obtain the virial, as the actual MTS integration
1527 * using an acceleration where the slow forces are multiplied by mtsFactor.
1528 * Using that acceleration would result in a virial with the slow
1529 * force contribution would be a factor mtsFactor too large.
1531 if (fr->useMts && bCalcVir && constr != nullptr)
1533 upd.update_for_constraint_virial(
1534 *ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
1536 constrain_coordinates(constr,
1541 upd.xp()->arrayRefWithPadding(),
1547 ArrayRefWithPadding<const RVec> forceCombined =
1548 (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1549 ? f.view().forceMtsCombinedWithPadding()
1550 : f.view().forceWithPadding();
1552 *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
1554 wallcycle_stop(wcycle, ewcUPDATE);
1556 constrain_coordinates(constr,
1561 upd.xp()->arrayRefWithPadding(),
1563 bCalcVir && !fr->useMts,
1566 upd.update_sd_second_half(
1567 *ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, constr, do_log, do_ene);
1568 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1571 if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1573 updatePrevStepPullCom(pull_work, state);
1576 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1579 /* ############## IF NOT VV, Calculate globals HERE ############ */
1580 /* With Leap-Frog we can skip compute_globals at
1581 * non-communication steps, but we need to calculate
1582 * the kinetic energy one step before communication.
1585 // Organize to do inter-simulation signalling on steps if
1586 // and when algorithms require it.
1587 const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1589 if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1591 // Copy coordinates when needed to stop the CM motion.
1592 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1594 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1595 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1597 // Since we're already communicating at this step, we
1598 // can propagate intra-simulation signals. Note that
1599 // check_nstglobalcomm has the responsibility for
1600 // choosing the value of nstglobalcomm that is one way
1601 // bGStat becomes true, so we can't get into a
1602 // situation where e.g. checkpointing can't be
1604 bool doIntraSimSignal = true;
1605 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1613 makeConstArrayRef(state->x),
1614 makeConstArrayRef(state->v),
1625 (!EI_VV(ir->eI) && bCalcEner && constr != nullptr) ? constr->rmsdData()
1626 : gmx::ArrayRef<real>{},
1630 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1631 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1632 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1633 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1634 | (DOMAINDECOMP(cr) && shouldCheckNumberOfBondedInteractions(*cr->dd)
1635 ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1637 if (DOMAINDECOMP(cr))
1639 checkNumberOfBondedInteractions(
1640 mdlog, cr, top_global, &top, makeConstArrayRef(state->x), state->box);
1642 if (!EI_VV(ir->eI) && bStopCM)
1644 process_and_stopcm_grp(
1645 fplog, &vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
1646 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1648 // TODO: The special case of removing CM motion should be dealt more gracefully
1649 if (useGpuForUpdate)
1651 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1652 // Here we block until the H2D copy completes because event sync with the
1653 // force kernels that use the coordinates on the next steps is not implemented
1654 // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1655 stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1656 // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1657 if (vcm.mode != ComRemovalAlgorithm::No)
1659 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1666 /* ############# END CALC EKIN AND PRESSURE ################# */
1668 /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1669 the virial that should probably be addressed eventually. state->veta has better properies,
1670 but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1671 generate the new shake_vir, but test the veta value for convergence. This will take some thought. */
1673 if (ir->efep != FreeEnergyPerturbationType::No && !EI_VV(ir->eI))
1675 /* Sum up the foreign energy and dK/dl terms for md and sd.
1676 Currently done every step so that dH/dl is correct in the .edr */
1677 accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1680 update_pcouple_after_coordinates(
1681 fplog, step, ir, mdatoms, pres, force_vir, shake_vir, pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1683 const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
1684 && do_per_step(step, inputrec->nstpcouple));
1685 const bool doCRescalePressureCoupling = (inputrec->epc == PressureCoupling::CRescale
1686 && do_per_step(step, inputrec->nstpcouple));
1688 && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1690 integrator->scaleCoordinates(pressureCouplingMu);
1691 if (doCRescalePressureCoupling)
1693 matrix pressureCouplingInvMu;
1694 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1695 integrator->scaleVelocities(pressureCouplingInvMu);
1697 integrator->setPbc(PbcType::Xyz, state->box);
1700 /* ################# END UPDATE STEP 2 ################# */
1701 /* #### We now have r(t+dt) and v(t+dt/2) ############# */
1703 /* The coordinates (x) were unshifted in update */
1706 /* We will not sum ekinh_old,
1707 * so signal that we still have to do it.
1709 bSumEkinhOld = TRUE;
1714 /* ######### BEGIN PREPARING EDR OUTPUT ########### */
1716 /* use the directly determined last velocity, not actually the averaged half steps */
1717 if (bTrotter && ir->eI == IntegrationAlgorithm::VV)
1719 enerd->term[F_EKIN] = last_ekin;
1721 enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1723 if (integratorHasConservedEnergyQuantity(ir))
1727 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1731 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1734 /* ######### END PREPARING EDR OUTPUT ########### */
1740 if (fplog && do_log && bDoExpanded)
1742 /* only needed if doing expanded ensemble */
1743 PrintFreeEnergyInfoToFile(fplog,
1745 ir->expandedvals.get(),
1746 ir->bSimTemp ? ir->simtempvals.get() : nullptr,
1747 state_global->dfhist,
1754 energyOutput.addDataAtEnergyStep(bDoDHDL,
1760 ir->expandedvals.get(),
1762 PTCouplingArrays{ state->boxv,
1763 state->nosehoover_xi,
1764 state->nosehoover_vxi,
1766 state->nhpres_vxi },
1778 energyOutput.recordNonEnergyStep();
1781 gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1782 gmx_bool do_or = do_per_step(step, ir->nstorireout);
1784 if (doSimulatedAnnealing)
1786 gmx::EnergyOutput::printAnnealingTemperatures(
1787 do_log ? fplog : nullptr, groups, &(ir->opts));
1789 if (do_log || do_ene || do_dr || do_or)
1791 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1795 do_log ? fplog : nullptr,
1801 if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1803 const bool isInitialOutput = false;
1804 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1809 pull_print_output(pull_work, step, t);
1812 if (do_per_step(step, ir->nstlog))
1814 if (fflush(fplog) != 0)
1816 gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1822 /* Have to do this part _after_ outputting the logfile and the edr file */
1823 /* Gets written into the state at the beginning of next loop*/
1824 state->fep_state = lamnew;
1826 else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1828 state->fep_state = awh->fepLambdaState();
1830 /* Print the remaining wall clock time for the run */
1831 if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1835 fprintf(stderr, "\n");
1837 print_time(stderr, walltime_accounting, step, ir, cr);
1840 /* Ion/water position swapping.
1841 * Not done in last step since trajectory writing happens before this call
1842 * in the MD loop and exchanges would be lost anyway. */
1843 bNeedRepartition = FALSE;
1844 if ((ir->eSwapCoords != SwapType::No) && (step > 0) && !bLastStep
1845 && do_per_step(step, ir->swap->nstswap))
1847 bNeedRepartition = do_swapcoords(cr,
1853 as_rvec_array(state->x.data()),
1855 MASTER(cr) && mdrunOptions.verbose,
1858 if (bNeedRepartition && DOMAINDECOMP(cr))
1860 dd_collect_state(cr->dd, state, state_global);
1864 /* Replica exchange */
1868 bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1871 if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1873 dd_partition_system(fplog,
1894 upd.setNumAtoms(state->natoms);
1900 /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1901 /* With all integrators, except VV, we need to retain the pressure
1902 * at the current step for coupling at the next step.
1904 if ((state->flags & enumValueToBitMask(StateEntry::PressurePrevious))
1905 && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1907 /* Store the pressure in t_state for pressure coupling
1908 * at the next MD step.
1910 copy_mat(pres, state->pres_prev);
1913 /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */
1915 if ((membed != nullptr) && (!bLastStep))
1917 rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1920 cycles = wallcycle_stop(wcycle, ewcSTEP);
1921 if (DOMAINDECOMP(cr) && wcycle)
1923 dd_cycles_add(cr->dd, cycles, ddCyclStep);
1926 /* increase the MD step number */
1933 fcReportProgress(ir->nsteps + ir->init_step, step);
1937 resetHandler->resetCounters(
1938 step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1940 /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1941 imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1943 /* End of main MD loop */
1945 /* Closing TNG files can include compressing data. Therefore it is good to do that
1946 * before stopping the time measurements. */
1947 mdoutf_tng_close(outf);
1949 /* Stop measuring walltime */
1950 walltime_accounting_end_time(walltime_accounting);
1952 if (!thisRankHasDuty(cr, DUTY_PME))
1954 /* Tell the PME only node to finish */
1955 gmx_pme_send_finish(cr);
1960 if (ir->nstcalcenergy > 0)
1962 energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1964 gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1965 energyOutput.printAverages(fplog, groups);
1972 pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1975 done_shellfc(fplog, shellfc, step_rel);
1977 if (useReplicaExchange && MASTER(cr))
1979 print_replica_exchange_statistics(fplog, repl_ex);
1982 walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1984 global_stat_destroy(gstat);