2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implements the integrator for normal molecular dynamics simulations
41 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42 * \ingroup module_mdrun
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/gpuhaloexchange.h"
62 #include "gromacs/domdec/mdsetup.h"
63 #include "gromacs/domdec/partition.h"
64 #include "gromacs/essentialdynamics/edsam.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/ewald/pme_pp.h"
67 #include "gromacs/fileio/trxio.h"
68 #include "gromacs/gmxlib/network.h"
69 #include "gromacs/gmxlib/nrnb.h"
70 #include "gromacs/gpu_utils/gpu_utils.h"
71 #include "gromacs/imd/imd.h"
72 #include "gromacs/listed_forces/manage_threading.h"
73 #include "gromacs/math/functions.h"
74 #include "gromacs/math/utilities.h"
75 #include "gromacs/math/vec.h"
76 #include "gromacs/math/vectypes.h"
77 #include "gromacs/mdlib/checkpointhandler.h"
78 #include "gromacs/mdlib/compute_io.h"
79 #include "gromacs/mdlib/constr.h"
80 #include "gromacs/mdlib/ebin.h"
81 #include "gromacs/mdlib/enerdata_utils.h"
82 #include "gromacs/mdlib/energyoutput.h"
83 #include "gromacs/mdlib/expanded.h"
84 #include "gromacs/mdlib/force.h"
85 #include "gromacs/mdlib/force_flags.h"
86 #include "gromacs/mdlib/forcerec.h"
87 #include "gromacs/mdlib/md_support.h"
88 #include "gromacs/mdlib/mdatoms.h"
89 #include "gromacs/mdlib/mdoutf.h"
90 #include "gromacs/mdlib/membed.h"
91 #include "gromacs/mdlib/resethandler.h"
92 #include "gromacs/mdlib/sighandler.h"
93 #include "gromacs/mdlib/simulationsignal.h"
94 #include "gromacs/mdlib/stat.h"
95 #include "gromacs/mdlib/stophandler.h"
96 #include "gromacs/mdlib/tgroup.h"
97 #include "gromacs/mdlib/trajectory_writing.h"
98 #include "gromacs/mdlib/update.h"
99 #include "gromacs/mdlib/update_constrain_gpu.h"
100 #include "gromacs/mdlib/vcm.h"
101 #include "gromacs/mdlib/vsite.h"
102 #include "gromacs/mdrunutility/handlerestart.h"
103 #include "gromacs/mdrunutility/multisim.h"
104 #include "gromacs/mdrunutility/printtime.h"
105 #include "gromacs/mdtypes/awh_history.h"
106 #include "gromacs/mdtypes/awh_params.h"
107 #include "gromacs/mdtypes/commrec.h"
108 #include "gromacs/mdtypes/df_history.h"
109 #include "gromacs/mdtypes/energyhistory.h"
110 #include "gromacs/mdtypes/fcdata.h"
111 #include "gromacs/mdtypes/forcerec.h"
112 #include "gromacs/mdtypes/group.h"
113 #include "gromacs/mdtypes/inputrec.h"
114 #include "gromacs/mdtypes/interaction_const.h"
115 #include "gromacs/mdtypes/md_enums.h"
116 #include "gromacs/mdtypes/mdatom.h"
117 #include "gromacs/mdtypes/mdrunoptions.h"
118 #include "gromacs/mdtypes/observableshistory.h"
119 #include "gromacs/mdtypes/pullhistory.h"
120 #include "gromacs/mdtypes/simulation_workload.h"
121 #include "gromacs/mdtypes/state.h"
122 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
123 #include "gromacs/modularsimulator/energyelement.h"
124 #include "gromacs/nbnxm/gpu_data_mgmt.h"
125 #include "gromacs/nbnxm/nbnxm.h"
126 #include "gromacs/pbcutil/mshift.h"
127 #include "gromacs/pbcutil/pbc.h"
128 #include "gromacs/pulling/output.h"
129 #include "gromacs/pulling/pull.h"
130 #include "gromacs/swap/swapcoords.h"
131 #include "gromacs/timing/wallcycle.h"
132 #include "gromacs/timing/walltime_accounting.h"
133 #include "gromacs/topology/atoms.h"
134 #include "gromacs/topology/idef.h"
135 #include "gromacs/topology/mtop_util.h"
136 #include "gromacs/topology/topology.h"
137 #include "gromacs/trajectory/trajectoryframe.h"
138 #include "gromacs/utility/basedefinitions.h"
139 #include "gromacs/utility/cstringutil.h"
140 #include "gromacs/utility/fatalerror.h"
141 #include "gromacs/utility/logger.h"
142 #include "gromacs/utility/real.h"
143 #include "gromacs/utility/smalloc.h"
145 #include "legacysimulator.h"
146 #include "replicaexchange.h"
150 # include "corewrap.h"
153 using gmx::SimulationSignaller;
155 void gmx::LegacySimulator::do_md()
157 // TODO Historically, the EM and MD "integrators" used different
158 // names for the t_inputrec *parameter, but these must have the
159 // same name, now that it's a member of a struct. We use this ir
160 // alias to avoid a large ripple of nearly useless changes.
161 // t_inputrec is being replaced by IMdpOptionsProvider, so this
162 // will go away eventually.
163 t_inputrec* ir = inputrec;
164 int64_t step, step_rel;
165 double t, t0 = ir->init_t, lam0[efptNR];
166 gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
167 gmx_bool bNS, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
168 gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
169 gmx_bool do_ene, do_log, do_verbose;
170 gmx_bool bMasterState;
171 unsigned int force_flags;
172 tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, tmp_vir = { { 0 } },
176 matrix pressureCouplingMu, M;
177 gmx_repl_ex_t repl_ex = nullptr;
178 PaddedHostVector<gmx::RVec> f{};
179 gmx_global_stat_t gstat;
180 t_graph* graph = nullptr;
181 gmx_shellfc_t* shellfc;
182 gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183 gmx_bool bTemp, bPres, bTrotter;
185 std::vector<RVec> cbuf;
191 real saved_conserved_quantity = 0;
194 char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
196 /* PME load balancing data for GPU kernels */
197 gmx_bool bPMETune = FALSE;
198 gmx_bool bPMETunePrinting = FALSE;
200 bool bInteractiveMDstep = false;
202 /* Domain decomposition could incorrectly miss a bonded
203 interaction, but checking for that requires a global
204 communication stage, which does not otherwise happen in DD
205 code. So we do that alongside the first global energy reduction
206 after a new DD is made. These variables handle whether the
207 check happens, and the result it returns. */
208 bool shouldCheckNumberOfBondedInteractions = false;
209 int totalNumberOfBondedInteractions = -1;
211 SimulationSignals signals;
212 // Most global communnication stages don't propagate mdrun
213 // signals, and will use this object to achieve that.
214 SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
216 if (!mdrunOptions.writeConfout)
218 // This is on by default, and the main known use case for
219 // turning it off is for convenience in benchmarking, which is
220 // something that should not show up in the general user
225 "The -noconfout functionality is deprecated, and may be removed in a "
229 /* md-vv uses averaged full step velocities for T-control
230 md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
231 md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
232 bTrotter = (EI_VV(ir->eI)
233 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
235 const bool bRerunMD = false;
237 int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
238 bGStatEveryStep = (nstglobalcomm == 1);
240 SimulationGroups* groups = &top_global->groups;
242 std::unique_ptr<EssentialDynamics> ed = nullptr;
243 if (opt2bSet("-ei", nfile, fnm))
245 /* Initialize essential dynamics sampling */
246 ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global,
247 ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior);
249 else if (observablesHistory->edsamHistory)
252 "The checkpoint is from a run with essential dynamics sampling, "
253 "but the current run did not specify the -ei option. "
254 "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
257 initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
258 Update upd(ir, deform);
259 const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
260 const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
262 bool simulationsShareState = false;
263 int nstSignalComm = nstglobalcomm;
265 // TODO This implementation of ensemble orientation restraints is nasty because
266 // a user can't just do multi-sim with single-sim orientation restraints.
267 bool usingEnsembleRestraints =
268 (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
269 bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
271 // Replica exchange, ensemble restraints and AWH need all
272 // simulations to remain synchronized, so they need
273 // checkpoints and stop conditions to act on the same step, so
274 // the propagation of such signals must take place between
275 // simulations, not just within simulations.
276 // TODO: Make algorithm initializers set these flags.
277 simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
279 if (simulationsShareState)
281 // Inter-simulation signal communication does not need to happen
282 // often, so we use a minimum of 200 steps to reduce overhead.
283 const int c_minimumInterSimulationSignallingInterval = 200;
284 nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
289 if (startingBehavior != StartingBehavior::RestartWithAppending)
291 pleaseCiteCouplingAlgorithms(fplog, *ir);
294 init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir,
295 top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms);
296 gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work,
297 mdoutf_get_fp_dhdl(outf), false, startingBehavior, mdModulesNotifier);
299 gstat = global_stat_init(ir);
301 /* Check for polarizable models and flexible constraints */
302 shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0,
303 ir->nstcalcenergy, DOMAINDECOMP(cr));
306 double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
307 if ((io > 2000) && MASTER(cr))
309 fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
313 // Local state only becomes valid now.
314 std::unique_ptr<t_state> stateInstance;
318 gmx_localtop_t top(top_global->ffparams);
320 auto mdatoms = mdAtoms->mdatoms();
322 std::unique_ptr<UpdateConstrainGpu> integrator;
324 if (DOMAINDECOMP(cr))
326 stateInstance = std::make_unique<t_state>();
327 state = stateInstance.get();
328 dd_init_local_state(cr->dd, state_global, state);
330 /* Distribute the charge groups over the nodes from the master node */
331 dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
332 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
333 nrnb, nullptr, FALSE);
334 shouldCheckNumberOfBondedInteractions = true;
335 upd.setNumAtoms(state->natoms);
339 state_change_natoms(state_global, state_global->natoms);
340 f.resizeWithPadding(state_global->natoms);
341 /* Copy the pointer to the global state */
342 state = state_global;
344 /* Generate and initialize new topology */
345 mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &graph, mdAtoms, constr, vsite, shellfc);
347 upd.setNumAtoms(state->natoms);
350 const auto& simulationWork = runScheduleWork->simulationWork;
351 const bool useGpuForPme = simulationWork.useGpuPme;
352 const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
353 const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
354 const bool useGpuForUpdate = simulationWork.useGpuUpdate;
356 StatePropagatorDataGpu* stateGpu = fr->stateGpu;
360 GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
361 || constr->numConstraintsTotal() == 0,
362 "Constraints in domain decomposition are only supported with update "
363 "groups if using GPU update.\n");
364 GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
365 || constr->numConstraintsTotal() == 0,
366 "SHAKE is not supported with GPU update.");
367 GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
368 "Either PME or short-ranged non-bonded interaction tasks must run on "
369 "the GPU to use GPU update.\n");
370 GMX_RELEASE_ASSERT(ir->eI == eiMD,
371 "Only the md integrator is supported with the GPU update.\n");
373 ir->etc != etcNOSEHOOVER,
374 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
375 GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN,
376 "Only Parrinello-Rahman and Berendsen pressure coupling are supported "
377 "with the GPU update.\n");
378 GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
379 "Virtual sites are not supported with the GPU update.\n");
380 GMX_RELEASE_ASSERT(ed == nullptr,
381 "Essential dynamics is not supported with the GPU update.\n");
382 GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(ir->pull),
383 "Constraints pulling is not supported with the GPU update.\n");
384 GMX_RELEASE_ASSERT(fcd->orires.nr == 0,
385 "Orientation restraints are not supported with the GPU update.\n");
388 || (!haveFreeEnergyType(*ir, efptBONDED) && !haveFreeEnergyType(*ir, efptMASS)),
389 "Free energy perturbation of masses and constraints are not supported with the GPU "
391 GMX_RELEASE_ASSERT(graph == nullptr, "The graph is not supported with GPU update.");
393 if (constr != nullptr && constr->numConstraintsTotal() > 0)
397 .appendText("Updating coordinates and applying constraints on the GPU.");
401 GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
404 GMX_RELEASE_ASSERT(fr->deviceContext != nullptr,
405 "GPU device context should be initialized to use GPU update.");
407 integrator = std::make_unique<UpdateConstrainGpu>(*ir, *top_global, *fr->deviceContext,
408 stateGpu->getUpdateStream(),
409 stateGpu->xUpdatedOnDevice());
411 integrator->setPbc(PbcType::Xyz, state->box);
414 if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
416 changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
418 if ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
420 changePinningPolicy(&f, PinningPolicy::PinnedIfSupported);
424 changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
427 // NOTE: The global state is no longer used at this point.
428 // But state_global is still used as temporary storage space for writing
429 // the global state to file and potentially for replica exchange.
430 // (Global topology should persist.)
432 update_mdatoms(mdatoms, state->lambda[efptMASS]);
436 /* Check nstexpanded here, because the grompp check was broken */
437 if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
440 "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
442 init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
447 EnergyElement::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
450 preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
451 startingBehavior != StartingBehavior::NewSimulation);
453 // TODO: Remove this by converting AWH into a ForceProvider
454 auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms,
455 startingBehavior != StartingBehavior::NewSimulation,
456 shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work);
458 if (useReplicaExchange && MASTER(cr))
460 repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
462 /* PME tuning is only supported in the Verlet scheme, with PME for
463 * Coulomb. It is not supported with only LJ PME. */
464 bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
465 && ir->cutoff_scheme != ecutsGROUP);
467 pme_load_balancing_t* pme_loadbal = nullptr;
470 pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata,
474 if (!ir->bContinuation)
476 if (state->flags & (1U << estV))
478 auto v = makeArrayRef(state->v);
479 /* Set the velocities of vsites, shells and frozen atoms to zero */
480 for (i = 0; i < mdatoms->homenr; i++)
482 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
486 else if (mdatoms->cFREEZE)
488 for (m = 0; m < DIM; m++)
490 if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
501 /* Constrain the initial coordinates and velocities */
502 do_constrain_first(fplog, constr, ir, mdatoms, state->natoms, state->x.arrayRefWithPadding(),
503 state->v.arrayRefWithPadding(), state->box, state->lambda[efptBONDED]);
507 /* Construct the virtual sites for the initial configuration */
508 construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, top.idef.iparams,
509 top.idef.il, fr->pbcType, fr->bMolPBC, cr, state->box);
513 if (ir->efep != efepNO)
515 /* Set free energy calculation frequency as the greatest common
516 * denominator of nstdhdl and repl_ex_nst. */
517 nstfep = ir->fepvals->nstdhdl;
520 nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
522 if (useReplicaExchange)
524 nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
528 /* Be REALLY careful about what flags you set here. You CANNOT assume
529 * this is the first step, since we might be restarting from a checkpoint,
530 * and in that case we should not do any modifications to the state.
532 bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
534 // When restarting from a checkpoint, it can be appropriate to
535 // initialize ekind from quantities in the checkpoint. Otherwise,
536 // compute_globals must initialize ekind before the simulation
537 // starts/restarts. However, only the master rank knows what was
538 // found in the checkpoint file, so we have to communicate in
539 // order to coordinate the restart.
541 // TODO Consider removing this communication if/when checkpoint
542 // reading directly follows .tpr reading, because all ranks can
543 // agree on hasReadEkinState at that time.
544 bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
547 gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
549 if (hasReadEkinState)
551 restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
554 unsigned int cglo_flags =
555 (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
556 | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
558 bSumEkinhOld = FALSE;
560 t_vcm vcm(top_global->groups, *ir);
561 reportComRemovalInfo(fplog, vcm);
563 /* To minimize communication, compute_globals computes the COM velocity
564 * and the kinetic energy for the velocities without COM motion removed.
565 * Thus to get the kinetic energy without the COM contribution, we need
566 * to call compute_globals twice.
568 for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
570 unsigned int cglo_flags_iteration = cglo_flags;
571 if (bStopCM && cgloIteration == 0)
573 cglo_flags_iteration |= CGLO_STOPCM;
574 cglo_flags_iteration &= ~CGLO_TEMPERATURE;
576 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
577 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
578 nrnb, &vcm, nullptr, enerd, force_vir, shake_vir, total_vir, pres, constr,
579 &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
581 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
583 if (cglo_flags_iteration & CGLO_STOPCM)
585 /* At initialization, do not pass x with acceleration-correction mode
586 * to avoid (incorrect) correction of the initial coordinates.
588 auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
589 : makeArrayRef(state->x);
590 process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
591 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
594 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
595 makeConstArrayRef(state->x), state->box,
596 &shouldCheckNumberOfBondedInteractions);
597 if (ir->eI == eiVVAK)
599 /* a second call to get the half step temperature initialized as well */
600 /* we do the same call as above, but turn the pressure off -- internally to
601 compute_globals, this is recognized as a velocity verlet half-step
602 kinetic energy calculation. This minimized excess variables, but
603 perhaps loses some logic?*/
605 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
606 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
607 nrnb, &vcm, nullptr, enerd, force_vir, shake_vir, total_vir, pres, constr,
608 &nullSignaller, state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
611 /* Calculate the initial half step temperature, and save the ekinh_old */
612 if (startingBehavior == StartingBehavior::NewSimulation)
614 for (i = 0; (i < ir->opts.ngtc); i++)
616 copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
620 /* need to make an initiation call to get the Trotter variables set, as well as other constants
621 for non-trotter temperature control */
622 auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
626 if (!ir->bContinuation)
628 if (constr && ir->eConstrAlg == econtLINCS)
630 fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n",
633 if (EI_STATE_VELOCITY(ir->eI))
635 real temp = enerd->term[F_TEMP];
638 /* Result of Ekin averaged over velocities of -half
639 * and +half step, while we only have -half step here.
643 fprintf(fplog, "Initial temperature: %g K\n", temp);
648 fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
651 sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
655 sprintf(tbuf, "%s", "infinite");
657 if (ir->init_step > 0)
659 fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
660 gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf,
661 gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t);
665 fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
667 fprintf(fplog, "\n");
670 walltime_accounting_start_time(walltime_accounting);
671 wallcycle_start(wcycle, ewcRUN);
672 print_start(fplog, cr, walltime_accounting, "mdrun");
675 /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */
676 int chkpt_ret = fcCheckPointParallel(cr->nodeid, NULL, 0);
679 gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0);
683 /***********************************************************
687 ************************************************************/
690 /* Skip the first Nose-Hoover integration when we get the state from tpx */
691 bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
692 bSumEkinhOld = FALSE;
694 bNeedRepartition = FALSE;
696 auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
697 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
698 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
699 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
701 auto checkpointHandler = std::make_unique<CheckpointHandler>(
702 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), simulationsShareState,
703 ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout,
704 mdrunOptions.checkpointOptions.period);
706 const bool resetCountersIsLocal = true;
707 auto resetHandler = std::make_unique<ResetHandler>(
708 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
709 !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
710 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
712 const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
714 step = ir->init_step;
717 // TODO extract this to new multi-simulation module
718 if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
720 if (!multisim_int_all_are_equal(ms, ir->nsteps))
722 GMX_LOG(mdlog.warning)
724 "Note: The number of steps is not consistent across multi "
726 "but we are proceeding anyway!");
728 if (!multisim_int_all_are_equal(ms, ir->init_step))
730 if (simulationsShareState)
735 "The initial step is not consistent across multi simulations which "
742 GMX_LOG(mdlog.warning)
744 "Note: The initial step is not consistent across multi "
746 "but we are proceeding anyway!");
751 /* and stop now if we should */
752 bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
756 /* Determine if this is a neighbor search step */
757 bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
759 if (bPMETune && bNStList)
761 // This has to be here because PME load balancing is called so early.
762 // TODO: Move to after all booleans are defined.
763 if (useGpuForUpdate && !bFirstStep)
765 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
766 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
768 /* PME grid + cut-off optimization with GPUs or PME nodes */
769 pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
770 fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel,
771 &bPMETunePrinting, simulationWork.useGpuPmePpCommunication);
774 wallcycle_start(wcycle, ewcSTEP);
776 bLastStep = (step_rel == ir->nsteps);
777 t = t0 + step * ir->delta_t;
779 // TODO Refactor this, so that nstfep does not need a default value of zero
780 if (ir->efep != efepNO || ir->bSimTemp)
782 /* find and set the current lambdas */
783 setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
785 bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
786 bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep));
787 bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
788 && (step > 0) && (startingBehavior == StartingBehavior::NewSimulation));
791 bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
792 && do_per_step(step, replExParams.exchangeInterval));
794 if (doSimulatedAnnealing)
796 update_annealing_target_temp(ir, t, &upd);
799 /* Stop Center of Mass motion */
800 bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
802 /* Determine whether or not to do Neighbour Searching */
803 bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
805 /* Note that the stopHandler will cause termination at nstglobalcomm
806 * steps. Since this concides with nstcalcenergy, nsttcouple and/or
807 * nstpcouple steps, we have computed the half-step kinetic energy
808 * of the previous step and can always output energies at the last step.
810 bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
812 /* do_log triggers energy and virial calculation. Because this leads
813 * to different code paths, forces can be different. Thus for exact
814 * continuation we should avoid extra log output.
815 * Note that the || bLastStep can result in non-exact continuation
816 * beyond the last step. But we don't consider that to be an issue.
818 do_log = (do_per_step(step, ir->nstlog)
819 || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
820 do_verbose = mdrunOptions.verbose
821 && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
823 if (useGpuForUpdate && !bFirstStep && bNS)
825 // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
826 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
827 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
828 // Copy coordinate from the GPU when needed at the search step.
829 // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
830 // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
831 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
832 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
835 if (bNS && !(bFirstStep && ir->bContinuation))
837 bMasterState = FALSE;
838 /* Correct the new box if it is too skewed */
839 if (inputrecDynamicBox(ir))
841 if (correct_box(fplog, step, state->box, graph))
844 // If update is offloaded, it should be informed about the box size change
847 integrator->setPbc(PbcType::Xyz, state->box);
851 if (DOMAINDECOMP(cr) && bMasterState)
853 dd_collect_state(cr->dd, state, state_global);
856 if (DOMAINDECOMP(cr))
858 /* Repartition the domain decomposition */
859 dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global,
860 *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top,
861 fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
862 shouldCheckNumberOfBondedInteractions = true;
863 upd.setNumAtoms(state->natoms);
865 // Allocate or re-size GPU halo exchange object, if necessary
866 if (havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange
867 && useGpuForNonbonded && is1D(*cr->dd))
869 // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
871 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
872 void* streamNonLocal = Nbnxm::gpu_get_command_stream(
873 fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
875 fr->deviceContext != nullptr,
876 "GPU device context should be initialized to use GPU halo exchange.");
877 constructGpuHaloExchange(mdlog, *cr, *fr->deviceContext, streamLocal, streamNonLocal);
882 if (MASTER(cr) && do_log)
884 energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
887 if (ir->efep != efepNO)
889 update_mdatoms(mdatoms, state->lambda[efptMASS]);
895 /* We need the kinetic energy at minus the half step for determining
896 * the full step kinetic energy and possibly for T-coupling.*/
897 /* This may not be quite working correctly yet . . . . */
898 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
899 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
900 nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr, nullptr, constr,
901 &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
902 CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
903 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
904 &top, makeConstArrayRef(state->x), state->box,
905 &shouldCheckNumberOfBondedInteractions);
907 clear_mat(force_vir);
909 checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
911 /* Determine the energy and pressure:
912 * at nstcalcenergy steps and at energy output steps (set below).
914 if (EI_VV(ir->eI) && (!bInitStep))
916 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
917 bCalcVir = bCalcEnerStep
919 && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
923 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
924 bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
926 bCalcEner = bCalcEnerStep;
928 do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
930 if (do_ene || do_log || bDoReplEx)
936 /* Do we need global communication ? */
937 bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
938 || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
940 force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
941 | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
942 | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
946 /* Now is the time to relax the shells */
947 relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
948 imdSession, pull_work, bNS, force_flags, &top, constr, enerd, fcd,
949 state->natoms, state->x.arrayRefWithPadding(),
950 state->v.arrayRefWithPadding(), state->box, state->lambda, &state->hist,
951 f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph,
952 shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
956 /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
957 is updated (or the AWH update will be performed twice for one step when continuing).
958 It would be best to call this update function from do_md_trajectory_writing but that
959 would occur after do_force. One would have to divide the update_awh function into one
960 function applying the AWH force and one doing the AWH bias update. The update AWH
961 bias function could then be called after do_md_trajectory_writing (then containing
962 update_awh_history). The checkpointing will in the future probably moved to the start
963 of the md loop which will rid of this issue. */
964 if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
966 awh->updateHistory(state_global->awhHistory.get());
969 /* The coordinates (x) are shifted (to get whole molecules)
971 * This is parallellized as well, and does communication too.
972 * Check comments in sim_util.c
974 do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
975 nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
976 f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph,
977 fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
978 (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
981 // VV integrators do not need the following velocity half step
982 // if it is the first step after starting from a checkpoint.
983 // That is, the half step is needed on all other steps, and
984 // also the first step when starting from a .tpr file.
985 if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
986 /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
988 rvec* vbuf = nullptr;
990 wallcycle_start(wcycle, ewcUPDATE);
991 if (ir->eI == eiVV && bInitStep)
993 /* if using velocity verlet with full time step Ekin,
994 * take the first half step only to compute the
995 * virial for the first step. From there,
996 * revert back to the initial coordinates
997 * so that the input is actually the initial step.
999 snew(vbuf, state->natoms);
1000 copy_rvecn(state->v.rvec_array(), vbuf, 0,
1001 state->natoms); /* should make this better for parallelizing? */
1005 /* this is for NHC in the Ekin(t+dt/2) version of vv */
1006 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
1007 trotter_seq, ettTSEQ1);
1010 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1011 etrtVELOCITY1, cr, constr);
1013 wallcycle_stop(wcycle, ewcUPDATE);
1014 constrain_velocities(step, nullptr, state, shake_vir, constr, bCalcVir, do_log, do_ene);
1015 wallcycle_start(wcycle, ewcUPDATE);
1016 /* if VV, compute the pressure and constraints */
1017 /* For VV2, we strictly only need this if using pressure
1018 * control, but we really would like to have accurate pressures
1020 * Think about ways around this in the future?
1021 * For now, keep this choice in comments.
1023 /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
1024 /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
1026 bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
1027 if (bCalcEner && ir->eI == eiVVAK)
1029 bSumEkinhOld = TRUE;
1031 /* for vv, the first half of the integration actually corresponds to the previous step.
1032 So we need information from the last step in the first half of the integration */
1033 if (bGStat || do_per_step(step - 1, nstglobalcomm))
1035 wallcycle_stop(wcycle, ewcUPDATE);
1037 gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1038 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
1039 nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
1040 &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1041 (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
1042 | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
1043 | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
1044 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1047 /* explanation of above:
1048 a) We compute Ekin at the full time step
1049 if 1) we are using the AveVel Ekin, and it's not the
1050 initial step, or 2) if we are using AveEkin, but need the full
1051 time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1052 b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1053 EkinAveVel because it's needed for the pressure */
1054 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1055 top_global, &top, makeConstArrayRef(state->x),
1056 state->box, &shouldCheckNumberOfBondedInteractions);
1059 process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1060 makeArrayRef(state->v));
1061 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1063 wallcycle_start(wcycle, ewcUPDATE);
1065 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1070 m_add(force_vir, shake_vir,
1071 total_vir); /* we need the un-dispersion corrected total vir here */
1072 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
1073 trotter_seq, ettTSEQ2);
1075 /* TODO This is only needed when we're about to write
1076 * a checkpoint, because we use it after the restart
1077 * (in a kludge?). But what should we be doing if
1078 * the startingBehavior is NewSimulation or bInitStep are true? */
1079 if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1081 copy_mat(shake_vir, state->svir_prev);
1082 copy_mat(force_vir, state->fvir_prev);
1084 if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1086 /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1087 enerd->term[F_TEMP] =
1088 sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1089 enerd->term[F_EKIN] = trace(ekind->ekin);
1092 else if (bExchanged)
1094 wallcycle_stop(wcycle, ewcUPDATE);
1095 /* We need the kinetic energy at minus the half step for determining
1096 * the full step kinetic energy and possibly for T-coupling.*/
1097 /* This may not be quite working correctly yet . . . . */
1098 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1099 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW],
1100 mdatoms, nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr,
1101 nullptr, constr, &nullSignaller, state->box, nullptr,
1102 &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
1103 wallcycle_start(wcycle, ewcUPDATE);
1106 /* if it's the initial step, we performed this first step just to get the constraint virial */
1107 if (ir->eI == eiVV && bInitStep)
1109 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1112 wallcycle_stop(wcycle, ewcUPDATE);
1115 /* compute the conserved quantity */
1118 saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1121 last_ekin = enerd->term[F_EKIN];
1123 if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1125 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1127 /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */
1128 if (ir->efep != efepNO)
1130 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1134 /* ######## END FIRST UPDATE STEP ############## */
1135 /* ######## If doing VV, we now have v(dt) ###### */
1138 /* perform extended ensemble sampling in lambda - we don't
1139 actually move to the new state before outputting
1140 statistics, but if performing simulated tempering, we
1141 do update the velocities and the tau_t. */
1143 lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state,
1144 state->dfhist, step, state->v.rvec_array(), mdatoms);
1145 /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1148 copy_df_history(state_global->dfhist, state->dfhist);
1152 // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1153 // coordinates have not already been copied for i) search or ii) CPU force tasks.
1154 if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1155 && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1156 || checkpointHandler->isCheckpointingStep()))
1158 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1159 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1161 // Copy velocities if needed for the output/checkpointing.
1162 // NOTE: Copy on the search steps is done at the beginning of the step.
1163 if (useGpuForUpdate && !bNS
1164 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1166 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1167 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1169 // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1170 // and update is offloaded hence forces are kept on the GPU for update and have not been
1171 // already transferred in do_force().
1172 // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1173 // when the forces are ready on the GPU -- the same synchronizer should be used as the one
1174 // prior to GPU update.
1175 // TODO: When the output flags will be included in step workload, this copy can be combined with the
1176 // copy call in do_force(...).
1177 // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1178 // on host after the D2H copy in do_force(...).
1179 if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1180 && do_per_step(step, ir->nstfout))
1182 stateGpu->copyForcesFromGpu(ArrayRef<RVec>(f), AtomLocality::Local);
1183 stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1185 /* Now we have the energies and forces corresponding to the
1186 * coordinates at time t. We must output all of this before
1189 do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
1190 observablesHistory, top_global, fr, outf, energyOutput, ekind, f,
1191 checkpointHandler->isCheckpointingStep(), bRerunMD, bLastStep,
1192 mdrunOptions.writeConfout, bSumEkinhOld);
1193 /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1194 bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1196 /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1197 if (startingBehavior != StartingBehavior::NewSimulation
1198 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1200 copy_mat(state->svir_prev, shake_vir);
1201 copy_mat(state->fvir_prev, force_vir);
1204 stopHandler->setSignal();
1205 resetHandler->setSignal(walltime_accounting);
1207 if (bGStat || !PAR(cr))
1209 /* In parallel we only have to check for checkpointing in steps
1210 * where we do global communication,
1211 * otherwise the other nodes don't know.
1213 checkpointHandler->setSignal(walltime_accounting);
1216 /* ######### START SECOND UPDATE STEP ################# */
1218 /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1219 controlled in preprocessing */
1221 if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1223 gmx_bool bIfRandomize;
1224 bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1225 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1226 if (constr && bIfRandomize)
1228 constrain_velocities(step, nullptr, state, tmp_vir, constr, bCalcVir, do_log, do_ene);
1231 /* Box is changed in update() when we do pressure coupling,
1232 * but we should still use the old box for energy corrections and when
1233 * writing it to the energy file, so it matches the trajectory files for
1234 * the same timestep above. Make a copy in a separate array.
1236 copy_mat(state->box, lastbox);
1240 wallcycle_start(wcycle, ewcUPDATE);
1241 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1244 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1245 /* We can only do Berendsen coupling after we have summed
1246 * the kinetic energy or virial. Since the happens
1247 * in global_state after update, we should only do it at
1248 * step % nstlist = 1 with bGStatEveryStep=FALSE.
1253 update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1254 update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1259 /* velocity half-step update */
1260 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1261 etrtVELOCITY2, cr, constr);
1264 /* Above, initialize just copies ekinh into ekin,
1265 * it doesn't copy position (for VV),
1266 * and entire integrator for MD.
1269 if (ir->eI == eiVVAK)
1271 cbuf.resize(state->x.size());
1272 std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1275 /* With leap-frog type integrators we compute the kinetic energy
1276 * at a whole time step as the average of the half-time step kinetic
1277 * energies of two subsequent steps. Therefore we need to compute the
1278 * half step kinetic energy also if we need energies at the next step.
1280 const bool needHalfStepKineticEnergy =
1281 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1283 // Parrinello-Rahman requires the pressure to be availible before the update to compute
1284 // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1285 const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
1286 && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1288 if (useGpuForUpdate)
1290 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1292 integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
1293 stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc);
1295 // Copy data to the GPU after buffers might have being reinitialized
1296 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1297 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1300 // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
1301 if (!runScheduleWork->stepWork.useGpuFBufferOps)
1303 stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::Local);
1306 const bool doTemperatureScaling =
1307 (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1309 // This applies Leap-Frog, LINCS and SETTLE in succession
1310 integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(
1311 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1312 ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling,
1313 ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M);
1315 // Copy velocities D2H after update if:
1316 // - Globals are computed this step (includes the energy output steps).
1317 // - Temperature is needed for the next step.
1318 if (bGStat || needHalfStepKineticEnergy)
1320 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1321 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1326 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1327 etrtPOSITION, cr, constr);
1329 wallcycle_stop(wcycle, ewcUPDATE);
1331 constrain_coordinates(step, &dvdl_constr, state, shake_vir, &upd, constr, bCalcVir,
1334 update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, cr, nrnb, wcycle, &upd,
1335 constr, do_log, do_ene);
1336 finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, constr);
1339 if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1341 updatePrevStepPullCom(pull_work, state);
1344 if (ir->eI == eiVVAK)
1346 /* erase F_EKIN and F_TEMP here? */
1347 /* just compute the kinetic energy at the half step to perform a trotter step */
1348 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1349 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW],
1350 mdatoms, nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir,
1351 pres, constr, &nullSignaller, lastbox, nullptr, &bSumEkinhOld,
1352 (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
1353 wallcycle_start(wcycle, ewcUPDATE);
1354 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1355 /* now we know the scaling, we can compute the positions again */
1356 std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1358 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1359 etrtPOSITION, cr, constr);
1360 wallcycle_stop(wcycle, ewcUPDATE);
1362 /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1363 /* are the small terms in the shake_vir here due
1364 * to numerical errors, or are they important
1365 * physically? I'm thinking they are just errors, but not completely sure.
1366 * For now, will call without actually constraining, constr=NULL*/
1367 finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, nullptr);
1371 /* this factor or 2 correction is necessary
1372 because half of the constraint force is removed
1373 in the vv step, so we have to double it. See
1374 the Redmine issue #1255. It is not yet clear
1375 if the factor of 2 is exact, or just a very
1376 good approximation, and this will be
1377 investigated. The next step is to see if this
1378 can be done adding a dhdl contribution from the
1379 rattle step, but this is somewhat more
1380 complicated with the current code. Will be
1381 investigated, hopefully for 4.6.3. However,
1382 this current solution is much better than
1383 having it completely wrong.
1385 enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr;
1389 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1392 if (vsite != nullptr)
1394 wallcycle_start(wcycle, ewcVSITECONSTR);
1395 if (graph != nullptr)
1397 shift_self(graph, state->box, state->x.rvec_array());
1399 construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1400 top.idef.iparams, top.idef.il, fr->pbcType, fr->bMolPBC, cr, state->box);
1402 if (graph != nullptr)
1404 unshift_self(graph, state->box, state->x.rvec_array());
1406 wallcycle_stop(wcycle, ewcVSITECONSTR);
1409 /* ############## IF NOT VV, Calculate globals HERE ############ */
1410 /* With Leap-Frog we can skip compute_globals at
1411 * non-communication steps, but we need to calculate
1412 * the kinetic energy one step before communication.
1415 // Organize to do inter-simulation signalling on steps if
1416 // and when algorithms require it.
1417 const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1419 if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1421 // Copy coordinates when needed to stop the CM motion.
1422 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1424 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1425 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1427 // Since we're already communicating at this step, we
1428 // can propagate intra-simulation signals. Note that
1429 // check_nstglobalcomm has the responsibility for
1430 // choosing the value of nstglobalcomm that is one way
1431 // bGStat becomes true, so we can't get into a
1432 // situation where e.g. checkpointing can't be
1434 bool doIntraSimSignal = true;
1435 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1438 gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1439 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
1440 nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
1441 &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1442 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1443 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1444 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1445 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1446 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1448 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1449 top_global, &top, makeConstArrayRef(state->x),
1450 state->box, &shouldCheckNumberOfBondedInteractions);
1451 if (!EI_VV(ir->eI) && bStopCM)
1453 process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1454 makeArrayRef(state->v));
1455 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1457 // TODO: The special case of removing CM motion should be dealt more gracefully
1458 if (useGpuForUpdate)
1460 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1461 // Here we block until the H2D copy completes because event sync with the
1462 // force kernels that use the coordinates on the next steps is not implemented
1463 // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1464 stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1470 /* ############# END CALC EKIN AND PRESSURE ################# */
1472 /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1473 the virial that should probably be addressed eventually. state->veta has better properies,
1474 but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1475 generate the new shake_vir, but test the veta value for convergence. This will take some thought. */
1477 if (ir->efep != efepNO && !EI_VV(ir->eI))
1479 /* Sum up the foreign energy and dhdl terms for md and sd.
1480 Currently done every step so that dhdl is correct in the .edr */
1481 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1484 update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
1485 pressureCouplingMu, state, nrnb, &upd, !useGpuForUpdate);
1487 const bool doBerendsenPressureCoupling =
1488 (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
1489 if (useGpuForUpdate && (doBerendsenPressureCoupling || doParrinelloRahman))
1491 integrator->scaleCoordinates(pressureCouplingMu);
1492 integrator->setPbc(PbcType::Xyz, state->box);
1495 /* ################# END UPDATE STEP 2 ################# */
1496 /* #### We now have r(t+dt) and v(t+dt/2) ############# */
1498 /* The coordinates (x) were unshifted in update */
1501 /* We will not sum ekinh_old,
1502 * so signal that we still have to do it.
1504 bSumEkinhOld = TRUE;
1509 /* ######### BEGIN PREPARING EDR OUTPUT ########### */
1511 /* use the directly determined last velocity, not actually the averaged half steps */
1512 if (bTrotter && ir->eI == eiVV)
1514 enerd->term[F_EKIN] = last_ekin;
1516 enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1518 if (integratorHasConservedEnergyQuantity(ir))
1522 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1526 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1529 /* ######### END PREPARING EDR OUTPUT ########### */
1535 if (fplog && do_log && bDoExpanded)
1537 /* only needed if doing expanded ensemble */
1538 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals,
1539 ir->bSimTemp ? ir->simtempvals : nullptr,
1540 state_global->dfhist, state->fep_state, ir->nstlog, step);
1544 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, state,
1545 ir->fepvals, ir->expandedvals, lastbox, shake_vir,
1546 force_vir, total_vir, pres, ekind, mu_tot, constr);
1550 energyOutput.recordNonEnergyStep();
1553 gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1554 gmx_bool do_or = do_per_step(step, ir->nstorireout);
1556 if (doSimulatedAnnealing)
1558 energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1560 if (do_log || do_ene || do_dr || do_or)
1562 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1563 do_log ? fplog : nullptr, step, t, fcd, awh.get());
1568 pull_print_output(pull_work, step, t);
1571 if (do_per_step(step, ir->nstlog))
1573 if (fflush(fplog) != 0)
1575 gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1581 /* Have to do this part _after_ outputting the logfile and the edr file */
1582 /* Gets written into the state at the beginning of next loop*/
1583 state->fep_state = lamnew;
1585 /* Print the remaining wall clock time for the run */
1586 if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1590 fprintf(stderr, "\n");
1592 print_time(stderr, walltime_accounting, step, ir, cr);
1595 /* Ion/water position swapping.
1596 * Not done in last step since trajectory writing happens before this call
1597 * in the MD loop and exchanges would be lost anyway. */
1598 bNeedRepartition = FALSE;
1599 if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1602 do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()),
1603 state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD);
1605 if (bNeedRepartition && DOMAINDECOMP(cr))
1607 dd_collect_state(cr->dd, state, state_global);
1611 /* Replica exchange */
1615 bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1618 if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1620 dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir,
1621 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
1622 nrnb, wcycle, FALSE);
1623 shouldCheckNumberOfBondedInteractions = true;
1624 upd.setNumAtoms(state->natoms);
1630 /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1631 /* With all integrators, except VV, we need to retain the pressure
1632 * at the current step for coupling at the next step.
1634 if ((state->flags & (1U << estPRES_PREV))
1635 && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1637 /* Store the pressure in t_state for pressure coupling
1638 * at the next MD step.
1640 copy_mat(pres, state->pres_prev);
1643 /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */
1645 if ((membed != nullptr) && (!bLastStep))
1647 rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1650 cycles = wallcycle_stop(wcycle, ewcSTEP);
1651 if (DOMAINDECOMP(cr) && wcycle)
1653 dd_cycles_add(cr->dd, cycles, ddCyclStep);
1656 /* increase the MD step number */
1660 resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
1661 fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1663 /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1664 imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1666 /* End of main MD loop */
1668 /* Closing TNG files can include compressing data. Therefore it is good to do that
1669 * before stopping the time measurements. */
1670 mdoutf_tng_close(outf);
1672 /* Stop measuring walltime */
1673 walltime_accounting_end_time(walltime_accounting);
1675 if (!thisRankHasDuty(cr, DUTY_PME))
1677 /* Tell the PME only node to finish */
1678 gmx_pme_send_finish(cr);
1683 if (ir->nstcalcenergy > 0)
1685 energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1686 energyOutput.printAverages(fplog, groups);
1693 pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1696 done_shellfc(fplog, shellfc, step_rel);
1698 if (useReplicaExchange && MASTER(cr))
1700 print_replica_exchange_statistics(fplog, repl_ex);
1703 walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1705 global_stat_destroy(gstat);