2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implements the integrator for normal molecular dynamics simulations
41 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42 * \ingroup module_mdrun
55 #include "gromacs/applied_forces/awh/awh.h"
56 #include "gromacs/applied_forces/awh/read_params.h"
57 #include "gromacs/commandline/filenm.h"
58 #include "gromacs/domdec/collect.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec.h"
61 #include "gromacs/domdec/domdec_network.h"
62 #include "gromacs/domdec/domdec_struct.h"
63 #include "gromacs/domdec/gpuhaloexchange.h"
64 #include "gromacs/domdec/mdsetup.h"
65 #include "gromacs/domdec/partition.h"
66 #include "gromacs/essentialdynamics/edsam.h"
67 #include "gromacs/ewald/pme_load_balancing.h"
68 #include "gromacs/ewald/pme_pp.h"
69 #include "gromacs/fileio/trxio.h"
70 #include "gromacs/gmxlib/network.h"
71 #include "gromacs/gmxlib/nrnb.h"
72 #include "gromacs/gpu_utils/device_stream_manager.h"
73 #include "gromacs/gpu_utils/gpu_utils.h"
74 #include "gromacs/imd/imd.h"
75 #include "gromacs/listed_forces/listed_forces.h"
76 #include "gromacs/math/functions.h"
77 #include "gromacs/math/invertmatrix.h"
78 #include "gromacs/math/vec.h"
79 #include "gromacs/math/vectypes.h"
80 #include "gromacs/mdlib/checkpointhandler.h"
81 #include "gromacs/mdlib/compute_io.h"
82 #include "gromacs/mdlib/constr.h"
83 #include "gromacs/mdlib/coupling.h"
84 #include "gromacs/mdlib/ebin.h"
85 #include "gromacs/mdlib/enerdata_utils.h"
86 #include "gromacs/mdlib/energyoutput.h"
87 #include "gromacs/mdlib/expanded.h"
88 #include "gromacs/mdlib/force.h"
89 #include "gromacs/mdlib/force_flags.h"
90 #include "gromacs/mdlib/forcerec.h"
91 #include "gromacs/mdlib/freeenergyparameters.h"
92 #include "gromacs/mdlib/md_support.h"
93 #include "gromacs/mdlib/mdatoms.h"
94 #include "gromacs/mdlib/mdoutf.h"
95 #include "gromacs/mdlib/membed.h"
96 #include "gromacs/mdlib/resethandler.h"
97 #include "gromacs/mdlib/sighandler.h"
98 #include "gromacs/mdlib/simulationsignal.h"
99 #include "gromacs/mdlib/stat.h"
100 #include "gromacs/mdlib/stophandler.h"
101 #include "gromacs/mdlib/tgroup.h"
102 #include "gromacs/mdlib/trajectory_writing.h"
103 #include "gromacs/mdlib/update.h"
104 #include "gromacs/mdlib/update_constrain_gpu.h"
105 #include "gromacs/mdlib/update_vv.h"
106 #include "gromacs/mdlib/vcm.h"
107 #include "gromacs/mdlib/vsite.h"
108 #include "gromacs/mdrunutility/handlerestart.h"
109 #include "gromacs/mdrunutility/multisim.h"
110 #include "gromacs/mdrunutility/printtime.h"
111 #include "gromacs/mdtypes/awh_history.h"
112 #include "gromacs/mdtypes/awh_params.h"
113 #include "gromacs/mdtypes/commrec.h"
114 #include "gromacs/mdtypes/df_history.h"
115 #include "gromacs/mdtypes/energyhistory.h"
116 #include "gromacs/mdtypes/fcdata.h"
117 #include "gromacs/mdtypes/forcebuffers.h"
118 #include "gromacs/mdtypes/forcerec.h"
119 #include "gromacs/mdtypes/group.h"
120 #include "gromacs/mdtypes/inputrec.h"
121 #include "gromacs/mdtypes/interaction_const.h"
122 #include "gromacs/mdtypes/md_enums.h"
123 #include "gromacs/mdtypes/mdatom.h"
124 #include "gromacs/mdtypes/mdrunoptions.h"
125 #include "gromacs/mdtypes/multipletimestepping.h"
126 #include "gromacs/mdtypes/observableshistory.h"
127 #include "gromacs/mdtypes/pullhistory.h"
128 #include "gromacs/mdtypes/simulation_workload.h"
129 #include "gromacs/mdtypes/state.h"
130 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
131 #include "gromacs/modularsimulator/energydata.h"
132 #include "gromacs/nbnxm/gpu_data_mgmt.h"
133 #include "gromacs/nbnxm/nbnxm.h"
134 #include "gromacs/pbcutil/pbc.h"
135 #include "gromacs/pulling/output.h"
136 #include "gromacs/pulling/pull.h"
137 #include "gromacs/swap/swapcoords.h"
138 #include "gromacs/timing/wallcycle.h"
139 #include "gromacs/timing/walltime_accounting.h"
140 #include "gromacs/topology/atoms.h"
141 #include "gromacs/topology/idef.h"
142 #include "gromacs/topology/mtop_util.h"
143 #include "gromacs/topology/topology.h"
144 #include "gromacs/trajectory/trajectoryframe.h"
145 #include "gromacs/utility/basedefinitions.h"
146 #include "gromacs/utility/cstringutil.h"
147 #include "gromacs/utility/fatalerror.h"
148 #include "gromacs/utility/logger.h"
149 #include "gromacs/utility/real.h"
150 #include "gromacs/utility/smalloc.h"
152 #include "legacysimulator.h"
153 #include "replicaexchange.h"
156 using gmx::SimulationSignaller;
158 void gmx::LegacySimulator::do_md()
160 // TODO Historically, the EM and MD "integrators" used different
161 // names for the t_inputrec *parameter, but these must have the
162 // same name, now that it's a member of a struct. We use this ir
163 // alias to avoid a large ripple of nearly useless changes.
164 // t_inputrec is being replaced by IMdpOptionsProvider, so this
165 // will go away eventually.
166 const t_inputrec* ir = inputrec;
168 int64_t step, step_rel;
169 double t, t0 = ir->init_t;
170 gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
171 gmx_bool bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
172 gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
173 gmx_bool do_ene, do_log, do_verbose;
174 gmx_bool bMasterState;
175 unsigned int force_flags;
176 tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
179 matrix pressureCouplingMu, M;
180 gmx_repl_ex_t repl_ex = nullptr;
181 gmx_global_stat_t gstat;
182 gmx_shellfc_t* shellfc;
183 gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
186 std::vector<RVec> cbuf;
192 real saved_conserved_quantity = 0;
195 char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
197 /* PME load balancing data for GPU kernels */
198 gmx_bool bPMETune = FALSE;
199 gmx_bool bPMETunePrinting = FALSE;
201 bool bInteractiveMDstep = false;
203 SimulationSignals signals;
204 // Most global communnication stages don't propagate mdrun
205 // signals, and will use this object to achieve that.
206 SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
208 if (!mdrunOptions.writeConfout)
210 // This is on by default, and the main known use case for
211 // turning it off is for convenience in benchmarking, which is
212 // something that should not show up in the general user
217 "The -noconfout functionality is deprecated, and may be removed in a "
221 /* md-vv uses averaged full step velocities for T-control
222 md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
223 md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
224 bTrotter = (EI_VV(ir->eI)
225 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
227 const bool bRerunMD = false;
229 int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
230 bGStatEveryStep = (nstglobalcomm == 1);
232 const SimulationGroups* groups = &top_global.groups;
234 std::unique_ptr<EssentialDynamics> ed = nullptr;
235 if (opt2bSet("-ei", nfile, fnm))
237 /* Initialize essential dynamics sampling */
238 ed = init_edsam(mdlog,
239 opt2fn_null("-ei", nfile, fnm),
240 opt2fn("-eo", nfile, fnm),
250 else if (observablesHistory->edsamHistory)
253 "The checkpoint is from a run with essential dynamics sampling, "
254 "but the current run did not specify the -ei option. "
255 "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
258 int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
259 gmx::ArrayRef<real> lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
260 initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
261 Update upd(*ir, deform);
262 bool doSimulatedAnnealing = false;
264 // TODO: Avoid changing inputrec (#3854)
265 // Simulated annealing updates the reference temperature.
266 auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
267 doSimulatedAnnealing = initSimulatedAnnealing(nonConstInputrec, &upd);
269 const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
271 const t_fcdata& fcdata = *fr->fcdata;
273 bool simulationsShareState = false;
274 int nstSignalComm = nstglobalcomm;
276 // TODO This implementation of ensemble orientation restraints is nasty because
277 // a user can't just do multi-sim with single-sim orientation restraints.
278 bool usingEnsembleRestraints =
279 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
280 bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim() && (ms != nullptr));
282 // Replica exchange, ensemble restraints and AWH need all
283 // simulations to remain synchronized, so they need
284 // checkpoints and stop conditions to act on the same step, so
285 // the propagation of such signals must take place between
286 // simulations, not just within simulations.
287 // TODO: Make algorithm initializers set these flags.
288 simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
290 if (simulationsShareState)
292 // Inter-simulation signal communication does not need to happen
293 // often, so we use a minimum of 200 steps to reduce overhead.
294 const int c_minimumInterSimulationSignallingInterval = 200;
295 nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
300 if (startingBehavior != StartingBehavior::RestartWithAppending)
302 pleaseCiteCouplingAlgorithms(fplog, *ir);
304 gmx_mdoutf* outf = init_mdoutf(fplog,
316 simulationsShareState,
318 gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
322 mdoutf_get_fp_dhdl(outf),
325 simulationsShareState,
328 gstat = global_stat_init(ir);
330 const auto& simulationWork = runScheduleWork->simulationWork;
331 const bool useGpuForPme = simulationWork.useGpuPme;
332 const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
333 const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
334 const bool useGpuForUpdate = simulationWork.useGpuUpdate;
336 /* Check for polarizable models and flexible constraints */
337 shellfc = init_shell_flexcon(fplog,
339 constr ? constr->numFlexibleConstraints() : 0,
345 double io = compute_io(ir, top_global.natoms, *groups, energyOutput.numEnergyTerms(), 1);
346 if ((io > 2000) && MASTER(cr))
348 fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
352 // Local state only becomes valid now.
353 std::unique_ptr<t_state> stateInstance;
356 gmx_localtop_t top(top_global.ffparams);
358 auto mdatoms = mdAtoms->mdatoms();
360 ForceBuffers f(fr->useMts,
361 ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
362 ? PinningPolicy::PinnedIfSupported
363 : PinningPolicy::CannotBePinned);
364 if (DOMAINDECOMP(cr))
366 stateInstance = std::make_unique<t_state>();
367 state = stateInstance.get();
368 dd_init_local_state(*cr->dd, state_global, state);
370 /* Distribute the charge groups over the nodes from the master node */
371 dd_partition_system(fplog,
392 upd.setNumAtoms(state->natoms);
396 state_change_natoms(state_global, state_global->natoms);
397 /* Copy the pointer to the global state */
398 state = state_global;
400 /* Generate and initialize new topology */
401 mdAlgorithmsSetupAtomData(cr, *ir, top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
403 upd.setNumAtoms(state->natoms);
406 std::unique_ptr<UpdateConstrainGpu> integrator;
408 StatePropagatorDataGpu* stateGpu = fr->stateGpu;
410 // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
413 GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
414 || constr->numConstraintsTotal() == 0,
415 "Constraints in domain decomposition are only supported with update "
416 "groups if using GPU update.\n");
417 GMX_RELEASE_ASSERT(ir->eConstrAlg != ConstraintAlgorithm::Shake || constr == nullptr
418 || constr->numConstraintsTotal() == 0,
419 "SHAKE is not supported with GPU update.");
420 GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
421 "Either PME or short-ranged non-bonded interaction tasks must run on "
422 "the GPU to use GPU update.\n");
423 GMX_RELEASE_ASSERT(ir->eI == IntegrationAlgorithm::MD,
424 "Only the md integrator is supported with the GPU update.\n");
426 ir->etc != TemperatureCoupling::NoseHoover,
427 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
429 ir->epc == PressureCoupling::No || ir->epc == PressureCoupling::ParrinelloRahman
430 || ir->epc == PressureCoupling::Berendsen || ir->epc == PressureCoupling::CRescale,
431 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
432 "with the GPU update.\n");
433 GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
434 "Virtual sites are not supported with the GPU update.\n");
435 GMX_RELEASE_ASSERT(ed == nullptr,
436 "Essential dynamics is not supported with the GPU update.\n");
437 GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
438 "Constraints pulling is not supported with the GPU update.\n");
439 GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
440 "Orientation restraints are not supported with the GPU update.\n");
442 ir->efep == FreeEnergyPerturbationType::No
443 || (!haveFepPerturbedMasses(top_global) && !havePerturbedConstraints(top_global)),
444 "Free energy perturbation of masses and constraints are not supported with the GPU "
447 if (constr != nullptr && constr->numConstraintsTotal() > 0)
451 .appendText("Updating coordinates and applying constraints on the GPU.");
455 GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
457 GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
458 "Device stream manager should be initialized in order to use GPU "
459 "update-constraints.");
461 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
462 "Update stream should be initialized in order to use GPU "
463 "update-constraints.");
464 integrator = std::make_unique<UpdateConstrainGpu>(
468 fr->deviceStreamManager->context(),
469 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
470 stateGpu->xUpdatedOnDevice(),
473 integrator->setPbc(PbcType::Xyz, state->box);
476 if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
478 changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
482 changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
485 // NOTE: The global state is no longer used at this point.
486 // But state_global is still used as temporary storage space for writing
487 // the global state to file and potentially for replica exchange.
488 // (Global topology should persist.)
490 update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
494 /* Check nstexpanded here, because the grompp check was broken */
495 if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
498 "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
500 init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
505 EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
508 preparePrevStepPullCom(ir,
510 gmx::arrayRefFromArray(mdatoms->massT, mdatoms->nr),
514 startingBehavior != StartingBehavior::NewSimulation);
516 // TODO: Remove this by converting AWH into a ForceProvider
517 auto awh = prepareAwhModule(fplog,
522 startingBehavior != StartingBehavior::NewSimulation,
524 opt2fn("-awh", nfile, fnm),
527 if (useReplicaExchange && MASTER(cr))
529 repl_ex = init_replica_exchange(fplog, ms, top_global.natoms, ir, replExParams);
531 /* PME tuning is only supported in the Verlet scheme, with PME for
532 * Coulomb. It is not supported with only LJ PME. */
533 bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
534 && ir->cutoff_scheme != CutoffScheme::Group);
536 pme_load_balancing_t* pme_loadbal = nullptr;
540 &pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu());
543 if (!ir->bContinuation)
545 if (state->flags & enumValueToBitMask(StateEntry::V))
547 auto v = makeArrayRef(state->v);
548 /* Set the velocities of vsites, shells and frozen atoms to zero */
549 for (i = 0; i < mdatoms->homenr; i++)
551 if (mdatoms->ptype[i] == ParticleType::Shell)
555 else if (mdatoms->cFREEZE)
557 for (m = 0; m < DIM; m++)
559 if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
570 /* Constrain the initial coordinates and velocities */
571 do_constrain_first(fplog,
576 state->x.arrayRefWithPadding(),
577 state->v.arrayRefWithPadding(),
579 state->lambda[FreeEnergyPerturbationCouplingType::Bonded]);
583 if (ir->efep != FreeEnergyPerturbationType::No)
585 /* Set free energy calculation frequency as the greatest common
586 * denominator of nstdhdl and repl_ex_nst. */
587 nstfep = ir->fepvals->nstdhdl;
590 nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
592 if (useReplicaExchange)
594 nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
598 nstfep = std::gcd(ir->awhParams->nstSampleCoord(), nstfep);
602 /* Be REALLY careful about what flags you set here. You CANNOT assume
603 * this is the first step, since we might be restarting from a checkpoint,
604 * and in that case we should not do any modifications to the state.
606 bStopCM = (ir->comm_mode != ComRemovalAlgorithm::No && !ir->bContinuation);
608 // When restarting from a checkpoint, it can be appropriate to
609 // initialize ekind from quantities in the checkpoint. Otherwise,
610 // compute_globals must initialize ekind before the simulation
611 // starts/restarts. However, only the master rank knows what was
612 // found in the checkpoint file, so we have to communicate in
613 // order to coordinate the restart.
615 // TODO Consider removing this communication if/when checkpoint
616 // reading directly follows .tpr reading, because all ranks can
617 // agree on hasReadEkinState at that time.
618 bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
621 gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
623 if (hasReadEkinState)
625 restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
628 unsigned int cglo_flags =
629 (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
630 | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
632 bSumEkinhOld = FALSE;
634 t_vcm vcm(top_global.groups, *ir);
635 reportComRemovalInfo(fplog, vcm);
637 /* To minimize communication, compute_globals computes the COM velocity
638 * and the kinetic energy for the velocities without COM motion removed.
639 * Thus to get the kinetic energy without the COM contribution, we need
640 * to call compute_globals twice.
642 for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
644 unsigned int cglo_flags_iteration = cglo_flags;
645 if (bStopCM && cgloIteration == 0)
647 cglo_flags_iteration |= CGLO_STOPCM;
648 cglo_flags_iteration &= ~CGLO_TEMPERATURE;
650 if (DOMAINDECOMP(cr) && shouldCheckNumberOfBondedInteractions(*cr->dd) && cgloIteration == 0)
652 cglo_flags_iteration |= CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS;
654 compute_globals(gstat,
659 makeConstArrayRef(state->x),
660 makeConstArrayRef(state->v),
671 gmx::ArrayRef<real>{},
675 cglo_flags_iteration);
676 if (cglo_flags_iteration & CGLO_STOPCM)
678 /* At initialization, do not pass x with acceleration-correction mode
679 * to avoid (incorrect) correction of the initial coordinates.
681 auto x = (vcm.mode == ComRemovalAlgorithm::LinearAccelerationCorrection)
683 : makeArrayRef(state->x);
684 process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
685 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
688 if (DOMAINDECOMP(cr))
690 checkNumberOfBondedInteractions(
691 mdlog, cr, top_global, &top, makeConstArrayRef(state->x), state->box);
693 if (ir->eI == IntegrationAlgorithm::VVAK)
695 /* a second call to get the half step temperature initialized as well */
696 /* we do the same call as above, but turn the pressure off -- internally to
697 compute_globals, this is recognized as a velocity verlet half-step
698 kinetic energy calculation. This minimized excess variables, but
699 perhaps loses some logic?*/
701 compute_globals(gstat,
706 makeConstArrayRef(state->x),
707 makeConstArrayRef(state->v),
718 gmx::ArrayRef<real>{},
722 cglo_flags & ~CGLO_PRESSURE);
725 /* Calculate the initial half step temperature, and save the ekinh_old */
726 if (startingBehavior == StartingBehavior::NewSimulation)
728 for (i = 0; (i < ir->opts.ngtc); i++)
730 copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
734 /* need to make an initiation call to get the Trotter variables set, as well as other constants
735 for non-trotter temperature control */
736 auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
740 if (!ir->bContinuation)
742 if (constr && ir->eConstrAlg == ConstraintAlgorithm::Lincs)
745 "RMS relative constraint deviation after constraining: %.2e\n",
748 if (EI_STATE_VELOCITY(ir->eI))
750 real temp = enerd->term[F_TEMP];
751 if (ir->eI != IntegrationAlgorithm::VV)
753 /* Result of Ekin averaged over velocities of -half
754 * and +half step, while we only have -half step here.
758 fprintf(fplog, "Initial temperature: %g K\n", temp);
763 fprintf(stderr, "starting mdrun '%s'\n", *(top_global.name));
766 sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
770 sprintf(tbuf, "%s", "infinite");
772 if (ir->init_step > 0)
775 "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
776 gmx_step_str(ir->init_step + ir->nsteps, sbuf),
778 gmx_step_str(ir->init_step, sbuf2),
779 ir->init_step * ir->delta_t);
783 fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
785 fprintf(fplog, "\n");
788 walltime_accounting_start_time(walltime_accounting);
789 wallcycle_start(wcycle, ewcRUN);
790 print_start(fplog, cr, walltime_accounting, "mdrun");
792 /***********************************************************
796 ************************************************************/
799 /* Skip the first Nose-Hoover integration when we get the state from tpx */
800 bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
801 bSumEkinhOld = FALSE;
803 bNeedRepartition = FALSE;
805 step = ir->init_step;
808 auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
809 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
810 simulationsShareState,
813 mdrunOptions.reproducible,
815 mdrunOptions.maximumHoursToRun,
820 walltime_accounting);
822 auto checkpointHandler = std::make_unique<CheckpointHandler>(
823 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
824 simulationsShareState,
827 mdrunOptions.writeConfout,
828 mdrunOptions.checkpointOptions.period);
830 const bool resetCountersIsLocal = true;
831 auto resetHandler = std::make_unique<ResetHandler>(
832 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
833 !resetCountersIsLocal,
836 mdrunOptions.timingOptions.resetHalfway,
837 mdrunOptions.maximumHoursToRun,
840 walltime_accounting);
842 const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
844 if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
846 logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
849 /* and stop now if we should */
850 bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
854 /* Determine if this is a neighbor search step */
855 bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
857 if (bPMETune && bNStList)
859 // This has to be here because PME load balancing is called so early.
860 // TODO: Move to after all booleans are defined.
861 if (useGpuForUpdate && !bFirstStep)
863 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
864 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
866 /* PME grid + cut-off optimization with GPUs or PME nodes */
867 pme_loadbal_do(pme_loadbal,
869 (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
880 simulationWork.useGpuPmePpCommunication);
883 wallcycle_start(wcycle, ewcSTEP);
885 bLastStep = (step_rel == ir->nsteps);
886 t = t0 + step * ir->delta_t;
888 // TODO Refactor this, so that nstfep does not need a default value of zero
889 if (ir->efep != FreeEnergyPerturbationType::No || ir->bSimTemp)
891 /* find and set the current lambdas */
892 state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
894 bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
895 bDoFEP = ((ir->efep != FreeEnergyPerturbationType::No) && do_per_step(step, nstfep));
896 bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
900 bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
901 && do_per_step(step, replExParams.exchangeInterval));
903 if (doSimulatedAnnealing)
905 // TODO: Avoid changing inputrec (#3854)
906 // Simulated annealing updates the reference temperature.
907 auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
908 update_annealing_target_temp(nonConstInputrec, t, &upd);
911 /* Stop Center of Mass motion */
912 bStopCM = (ir->comm_mode != ComRemovalAlgorithm::No && do_per_step(step, ir->nstcomm));
914 /* Determine whether or not to do Neighbour Searching */
915 bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
917 /* Note that the stopHandler will cause termination at nstglobalcomm
918 * steps. Since this concides with nstcalcenergy, nsttcouple and/or
919 * nstpcouple steps, we have computed the half-step kinetic energy
920 * of the previous step and can always output energies at the last step.
922 bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
924 /* do_log triggers energy and virial calculation. Because this leads
925 * to different code paths, forces can be different. Thus for exact
926 * continuation we should avoid extra log output.
927 * Note that the || bLastStep can result in non-exact continuation
928 * beyond the last step. But we don't consider that to be an issue.
930 do_log = (do_per_step(step, ir->nstlog)
931 || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
932 do_verbose = mdrunOptions.verbose
933 && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
935 if (useGpuForUpdate && !bFirstStep && bNS)
937 // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
938 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
939 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
940 // Copy coordinate from the GPU when needed at the search step.
941 // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
942 // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
943 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
944 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
947 // We only need to calculate virtual velocities if we are writing them in the current step
948 const bool needVirtualVelocitiesThisStep =
950 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep());
952 if (vsite != nullptr)
954 // Virtual sites need to be updated before domain decomposition and forces are calculated
955 wallcycle_start(wcycle, ewcVSITECONSTR);
956 // md-vv calculates virtual velocities once it has full-step real velocities
957 vsite->construct(state->x,
960 (!EI_VV(inputrec->eI) && needVirtualVelocitiesThisStep)
961 ? VSiteOperation::PositionsAndVelocities
962 : VSiteOperation::Positions);
963 wallcycle_stop(wcycle, ewcVSITECONSTR);
966 if (bNS && !(bFirstStep && ir->bContinuation))
968 bMasterState = FALSE;
969 /* Correct the new box if it is too skewed */
970 if (inputrecDynamicBox(ir))
972 if (correct_box(fplog, step, state->box))
975 // If update is offloaded, it should be informed about the box size change
978 integrator->setPbc(PbcType::Xyz, state->box);
982 if (DOMAINDECOMP(cr) && bMasterState)
984 dd_collect_state(cr->dd, state, state_global);
987 if (DOMAINDECOMP(cr))
989 /* Repartition the domain decomposition */
990 dd_partition_system(fplog,
1010 do_verbose && !bPMETunePrinting);
1011 upd.setNumAtoms(state->natoms);
1015 // Allocate or re-size GPU halo exchange object, if necessary
1016 if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
1018 GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
1019 "GPU device manager has to be initialized to use GPU "
1020 "version of halo exchange.");
1021 constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
1024 if (MASTER(cr) && do_log)
1026 gmx::EnergyOutput::printHeader(
1027 fplog, step, t); /* can we improve the information printed here? */
1030 if (ir->efep != FreeEnergyPerturbationType::No)
1032 update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
1037 /* We need the kinetic energy at minus the half step for determining
1038 * the full step kinetic energy and possibly for T-coupling.*/
1039 /* This may not be quite working correctly yet . . . . */
1040 int cglo_flags = CGLO_GSTAT | CGLO_TEMPERATURE;
1041 if (DOMAINDECOMP(cr) && shouldCheckNumberOfBondedInteractions(*cr->dd))
1043 cglo_flags |= CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS;
1045 compute_globals(gstat,
1050 makeConstArrayRef(state->x),
1051 makeConstArrayRef(state->v),
1062 gmx::ArrayRef<real>{},
1067 if (DOMAINDECOMP(cr))
1069 checkNumberOfBondedInteractions(
1070 mdlog, cr, top_global, &top, makeConstArrayRef(state->x), state->box);
1073 clear_mat(force_vir);
1075 checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1077 /* Determine the energy and pressure:
1078 * at nstcalcenergy steps and at energy output steps (set below).
1080 if (EI_VV(ir->eI) && (!bInitStep))
1082 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1083 bCalcVir = bCalcEnerStep
1084 || (ir->epc != PressureCoupling::No
1085 && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1089 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1090 bCalcVir = bCalcEnerStep
1091 || (ir->epc != PressureCoupling::No && do_per_step(step, ir->nstpcouple));
1093 bCalcEner = bCalcEnerStep;
1095 do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1097 if (do_ene || do_log || bDoReplEx)
1103 /* Do we need global communication ? */
1104 bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1105 || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1107 force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1108 | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1109 | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
1110 if (fr->useMts && !do_per_step(step, ir->nstfout))
1112 force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1117 /* Now is the time to relax the shells */
1118 relax_shell_flexcon(fplog,
1121 mdrunOptions.verbose,
1133 state->x.arrayRefWithPadding(),
1134 state->v.arrayRefWithPadding(),
1149 ddBalanceRegionHandler);
1153 /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1154 is updated (or the AWH update will be performed twice for one step when continuing).
1155 It would be best to call this update function from do_md_trajectory_writing but that
1156 would occur after do_force. One would have to divide the update_awh function into one
1157 function applying the AWH force and one doing the AWH bias update. The update AWH
1158 bias function could then be called after do_md_trajectory_writing (then containing
1159 update_awh_history). The checkpointing will in the future probably moved to the start
1160 of the md loop which will rid of this issue. */
1161 if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1163 awh->updateHistory(state_global->awhHistory.get());
1166 /* The coordinates (x) are shifted (to get whole molecules)
1168 * This is parallellized as well, and does communication too.
1169 * Check comments in sim_util.c
1184 state->x.arrayRefWithPadding(),
1196 ed ? ed->getLegacyED() : nullptr,
1197 (bNS ? GMX_FORCE_NS : 0) | force_flags,
1198 ddBalanceRegionHandler);
1201 // VV integrators do not need the following velocity half step
1202 // if it is the first step after starting from a checkpoint.
1203 // That is, the half step is needed on all other steps, and
1204 // also the first step when starting from a .tpr file.
1207 integrateVVFirstStep(step,
1240 &saved_conserved_quantity,
1250 if (vsite != nullptr && needVirtualVelocitiesThisStep)
1252 // Positions were calculated earlier
1253 wallcycle_start(wcycle, ewcVSITECONSTR);
1254 vsite->construct(state->x, state->v, state->box, VSiteOperation::Velocities);
1255 wallcycle_stop(wcycle, ewcVSITECONSTR);
1259 /* ######## END FIRST UPDATE STEP ############## */
1260 /* ######## If doing VV, we now have v(dt) ###### */
1263 /* perform extended ensemble sampling in lambda - we don't
1264 actually move to the new state before outputting
1265 statistics, but if performing simulated tempering, we
1266 do update the velocities and the tau_t. */
1267 // TODO: Avoid changing inputrec (#3854)
1268 // Simulated tempering updates the reference temperature.
1269 // Expanded ensemble without simulated tempering does not change the inputrec.
1270 auto* nonConstInputrec = const_cast<t_inputrec*>(inputrec);
1271 lamnew = ExpandedEnsembleDynamics(fplog,
1279 state->v.rvec_array(),
1281 /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1284 copy_df_history(state_global->dfhist, state->dfhist);
1288 // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1289 // coordinates have not already been copied for i) search or ii) CPU force tasks.
1290 if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1291 && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1292 || checkpointHandler->isCheckpointingStep()))
1294 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1295 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1297 // Copy velocities if needed for the output/checkpointing.
1298 // NOTE: Copy on the search steps is done at the beginning of the step.
1299 if (useGpuForUpdate && !bNS
1300 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1302 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1303 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1305 // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1306 // and update is offloaded hence forces are kept on the GPU for update and have not been
1307 // already transferred in do_force().
1308 // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1309 // when the forces are ready on the GPU -- the same synchronizer should be used as the one
1310 // prior to GPU update.
1311 // TODO: When the output flags will be included in step workload, this copy can be combined with the
1312 // copy call in do_force(...).
1313 // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1314 // on host after the D2H copy in do_force(...).
1315 if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1316 && do_per_step(step, ir->nstfout))
1318 stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1319 stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1321 /* Now we have the energies and forces corresponding to the
1322 * coordinates at time t. We must output all of this before
1325 do_md_trajectory_writing(fplog,
1342 checkpointHandler->isCheckpointingStep(),
1345 mdrunOptions.writeConfout,
1347 /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1348 bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x, t);
1350 /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1351 if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1352 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1354 copy_mat(state->svir_prev, shake_vir);
1355 copy_mat(state->fvir_prev, force_vir);
1358 stopHandler->setSignal();
1359 resetHandler->setSignal(walltime_accounting);
1361 if (bGStat || !PAR(cr))
1363 /* In parallel we only have to check for checkpointing in steps
1364 * where we do global communication,
1365 * otherwise the other nodes don't know.
1367 checkpointHandler->setSignal(walltime_accounting);
1370 /* ######### START SECOND UPDATE STEP ################# */
1372 /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1373 controlled in preprocessing */
1375 if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1377 gmx_bool bIfRandomize;
1378 bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1379 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1380 if (constr && bIfRandomize)
1382 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1385 /* Box is changed in update() when we do pressure coupling,
1386 * but we should still use the old box for energy corrections and when
1387 * writing it to the energy file, so it matches the trajectory files for
1388 * the same timestep above. Make a copy in a separate array.
1390 copy_mat(state->box, lastbox);
1394 if (!useGpuForUpdate)
1396 wallcycle_start(wcycle, ewcUPDATE);
1398 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1401 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1402 /* We can only do Berendsen coupling after we have summed
1403 * the kinetic energy or virial. Since the happens
1404 * in global_state after update, we should only do it at
1405 * step % nstlist = 1 with bGStatEveryStep=FALSE.
1410 update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1411 update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1414 /* With leap-frog type integrators we compute the kinetic energy
1415 * at a whole time step as the average of the half-time step kinetic
1416 * energies of two subsequent steps. Therefore we need to compute the
1417 * half step kinetic energy also if we need energies at the next step.
1419 const bool needHalfStepKineticEnergy =
1420 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1422 // Parrinello-Rahman requires the pressure to be availible before the update to compute
1423 // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1424 const bool doParrinelloRahman = (ir->epc == PressureCoupling::ParrinelloRahman
1425 && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1429 GMX_ASSERT(!useGpuForUpdate, "GPU update is not supported with VVAK integrator.");
1431 integrateVVSecondStep(step,
1467 if (useGpuForUpdate)
1469 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1471 integrator->set(stateGpu->getCoordinates(),
1472 stateGpu->getVelocities(),
1473 stateGpu->getForces(),
1477 // Copy data to the GPU after buffers might have being reinitialized
1478 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1479 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1482 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1483 && !thisRankHasDuty(cr, DUTY_PME))
1485 // The PME forces were recieved to the host, so have to be copied
1486 stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1488 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1490 // The buffer ops were not offloaded this step, so the forces are on the
1491 // host and have to be copied
1492 stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1495 const bool doTemperatureScaling =
1496 (ir->etc != TemperatureCoupling::No
1497 && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1499 // This applies Leap-Frog, LINCS and SETTLE in succession
1500 integrator->integrate(
1501 stateGpu->getForcesReadyOnDeviceEvent(
1502 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1507 doTemperatureScaling,
1510 ir->nstpcouple * ir->delta_t,
1513 // Copy velocities D2H after update if:
1514 // - Globals are computed this step (includes the energy output steps).
1515 // - Temperature is needed for the next step.
1516 if (bGStat || needHalfStepKineticEnergy)
1518 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1519 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1524 /* With multiple time stepping we need to do an additional normal
1525 * update step to obtain the virial, as the actual MTS integration
1526 * using an acceleration where the slow forces are multiplied by mtsFactor.
1527 * Using that acceleration would result in a virial with the slow
1528 * force contribution would be a factor mtsFactor too large.
1530 if (fr->useMts && bCalcVir && constr != nullptr)
1532 upd.update_for_constraint_virial(
1533 *ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
1535 constrain_coordinates(constr,
1540 upd.xp()->arrayRefWithPadding(),
1546 ArrayRefWithPadding<const RVec> forceCombined =
1547 (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1548 ? f.view().forceMtsCombinedWithPadding()
1549 : f.view().forceWithPadding();
1551 *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
1553 wallcycle_stop(wcycle, ewcUPDATE);
1555 constrain_coordinates(constr,
1560 upd.xp()->arrayRefWithPadding(),
1562 bCalcVir && !fr->useMts,
1565 upd.update_sd_second_half(
1566 *ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, constr, do_log, do_ene);
1567 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1570 if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1572 updatePrevStepPullCom(pull_work, state);
1575 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1578 /* ############## IF NOT VV, Calculate globals HERE ############ */
1579 /* With Leap-Frog we can skip compute_globals at
1580 * non-communication steps, but we need to calculate
1581 * the kinetic energy one step before communication.
1584 // Organize to do inter-simulation signalling on steps if
1585 // and when algorithms require it.
1586 const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1588 if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1590 // Copy coordinates when needed to stop the CM motion.
1591 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1593 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1594 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1596 // Since we're already communicating at this step, we
1597 // can propagate intra-simulation signals. Note that
1598 // check_nstglobalcomm has the responsibility for
1599 // choosing the value of nstglobalcomm that is one way
1600 // bGStat becomes true, so we can't get into a
1601 // situation where e.g. checkpointing can't be
1603 bool doIntraSimSignal = true;
1604 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1612 makeConstArrayRef(state->x),
1613 makeConstArrayRef(state->v),
1624 (!EI_VV(ir->eI) && bCalcEner && constr != nullptr) ? constr->rmsdData()
1625 : gmx::ArrayRef<real>{},
1629 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1630 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1631 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1632 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1633 | (DOMAINDECOMP(cr) && shouldCheckNumberOfBondedInteractions(*cr->dd)
1634 ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1636 if (DOMAINDECOMP(cr))
1638 checkNumberOfBondedInteractions(
1639 mdlog, cr, top_global, &top, makeConstArrayRef(state->x), state->box);
1641 if (!EI_VV(ir->eI) && bStopCM)
1643 process_and_stopcm_grp(
1644 fplog, &vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
1645 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1647 // TODO: The special case of removing CM motion should be dealt more gracefully
1648 if (useGpuForUpdate)
1650 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1651 // Here we block until the H2D copy completes because event sync with the
1652 // force kernels that use the coordinates on the next steps is not implemented
1653 // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1654 stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1655 // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1656 if (vcm.mode != ComRemovalAlgorithm::No)
1658 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1665 /* ############# END CALC EKIN AND PRESSURE ################# */
1667 /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1668 the virial that should probably be addressed eventually. state->veta has better properies,
1669 but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1670 generate the new shake_vir, but test the veta value for convergence. This will take some thought. */
1672 if (ir->efep != FreeEnergyPerturbationType::No && !EI_VV(ir->eI))
1674 /* Sum up the foreign energy and dK/dl terms for md and sd.
1675 Currently done every step so that dH/dl is correct in the .edr */
1676 accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1679 update_pcouple_after_coordinates(
1680 fplog, step, ir, mdatoms, pres, force_vir, shake_vir, pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1682 const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
1683 && do_per_step(step, inputrec->nstpcouple));
1684 const bool doCRescalePressureCoupling = (inputrec->epc == PressureCoupling::CRescale
1685 && do_per_step(step, inputrec->nstpcouple));
1687 && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1689 integrator->scaleCoordinates(pressureCouplingMu);
1690 if (doCRescalePressureCoupling)
1692 matrix pressureCouplingInvMu;
1693 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1694 integrator->scaleVelocities(pressureCouplingInvMu);
1696 integrator->setPbc(PbcType::Xyz, state->box);
1699 /* ################# END UPDATE STEP 2 ################# */
1700 /* #### We now have r(t+dt) and v(t+dt/2) ############# */
1702 /* The coordinates (x) were unshifted in update */
1705 /* We will not sum ekinh_old,
1706 * so signal that we still have to do it.
1708 bSumEkinhOld = TRUE;
1713 /* ######### BEGIN PREPARING EDR OUTPUT ########### */
1715 /* use the directly determined last velocity, not actually the averaged half steps */
1716 if (bTrotter && ir->eI == IntegrationAlgorithm::VV)
1718 enerd->term[F_EKIN] = last_ekin;
1720 enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1722 if (integratorHasConservedEnergyQuantity(ir))
1726 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1730 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1733 /* ######### END PREPARING EDR OUTPUT ########### */
1739 if (fplog && do_log && bDoExpanded)
1741 /* only needed if doing expanded ensemble */
1742 PrintFreeEnergyInfoToFile(fplog,
1744 ir->expandedvals.get(),
1745 ir->bSimTemp ? ir->simtempvals.get() : nullptr,
1746 state_global->dfhist,
1753 energyOutput.addDataAtEnergyStep(bDoDHDL,
1759 ir->expandedvals.get(),
1761 PTCouplingArrays{ state->boxv,
1762 state->nosehoover_xi,
1763 state->nosehoover_vxi,
1765 state->nhpres_vxi },
1777 energyOutput.recordNonEnergyStep();
1780 gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1781 gmx_bool do_or = do_per_step(step, ir->nstorireout);
1783 if (doSimulatedAnnealing)
1785 gmx::EnergyOutput::printAnnealingTemperatures(
1786 do_log ? fplog : nullptr, groups, &(ir->opts));
1788 if (do_log || do_ene || do_dr || do_or)
1790 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf),
1794 do_log ? fplog : nullptr,
1800 if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1802 const bool isInitialOutput = false;
1803 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1808 pull_print_output(pull_work, step, t);
1811 if (do_per_step(step, ir->nstlog))
1813 if (fflush(fplog) != 0)
1815 gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1821 /* Have to do this part _after_ outputting the logfile and the edr file */
1822 /* Gets written into the state at the beginning of next loop*/
1823 state->fep_state = lamnew;
1825 else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1827 state->fep_state = awh->fepLambdaState();
1829 /* Print the remaining wall clock time for the run */
1830 if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1834 fprintf(stderr, "\n");
1836 print_time(stderr, walltime_accounting, step, ir, cr);
1839 /* Ion/water position swapping.
1840 * Not done in last step since trajectory writing happens before this call
1841 * in the MD loop and exchanges would be lost anyway. */
1842 bNeedRepartition = FALSE;
1843 if ((ir->eSwapCoords != SwapType::No) && (step > 0) && !bLastStep
1844 && do_per_step(step, ir->swap->nstswap))
1846 bNeedRepartition = do_swapcoords(cr,
1852 as_rvec_array(state->x.data()),
1854 MASTER(cr) && mdrunOptions.verbose,
1857 if (bNeedRepartition && DOMAINDECOMP(cr))
1859 dd_collect_state(cr->dd, state, state_global);
1863 /* Replica exchange */
1867 bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1870 if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1872 dd_partition_system(fplog,
1893 upd.setNumAtoms(state->natoms);
1899 /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1900 /* With all integrators, except VV, we need to retain the pressure
1901 * at the current step for coupling at the next step.
1903 if ((state->flags & enumValueToBitMask(StateEntry::PressurePrevious))
1904 && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1906 /* Store the pressure in t_state for pressure coupling
1907 * at the next MD step.
1909 copy_mat(pres, state->pres_prev);
1912 /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */
1914 if ((membed != nullptr) && (!bLastStep))
1916 rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1919 cycles = wallcycle_stop(wcycle, ewcSTEP);
1920 if (DOMAINDECOMP(cr) && wcycle)
1922 dd_cycles_add(cr->dd, cycles, ddCyclStep);
1925 /* increase the MD step number */
1932 fcReportProgress(ir->nsteps + ir->init_step, step);
1936 resetHandler->resetCounters(
1937 step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1939 /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1940 imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1942 /* End of main MD loop */
1944 /* Closing TNG files can include compressing data. Therefore it is good to do that
1945 * before stopping the time measurements. */
1946 mdoutf_tng_close(outf);
1948 /* Stop measuring walltime */
1949 walltime_accounting_end_time(walltime_accounting);
1951 if (!thisRankHasDuty(cr, DUTY_PME))
1953 /* Tell the PME only node to finish */
1954 gmx_pme_send_finish(cr);
1959 if (ir->nstcalcenergy > 0)
1961 energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1963 gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1964 energyOutput.printAverages(fplog, groups);
1971 pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1974 done_shellfc(fplog, shellfc, step_rel);
1976 if (useReplicaExchange && MASTER(cr))
1978 print_replica_exchange_statistics(fplog, repl_ex);
1981 walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1983 global_stat_destroy(gstat);