2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * \brief Implements the integrator for normal molecular dynamics simulations
41 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42 * \ingroup module_mdrun
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/gpuhaloexchange.h"
62 #include "gromacs/domdec/mdsetup.h"
63 #include "gromacs/domdec/partition.h"
64 #include "gromacs/essentialdynamics/edsam.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/ewald/pme_pp.h"
67 #include "gromacs/fileio/trxio.h"
68 #include "gromacs/gmxlib/network.h"
69 #include "gromacs/gmxlib/nrnb.h"
70 #include "gromacs/gpu_utils/gpu_utils.h"
71 #include "gromacs/imd/imd.h"
72 #include "gromacs/listed_forces/manage_threading.h"
73 #include "gromacs/math/functions.h"
74 #include "gromacs/math/utilities.h"
75 #include "gromacs/math/vec.h"
76 #include "gromacs/math/vectypes.h"
77 #include "gromacs/mdlib/checkpointhandler.h"
78 #include "gromacs/mdlib/compute_io.h"
79 #include "gromacs/mdlib/constr.h"
80 #include "gromacs/mdlib/ebin.h"
81 #include "gromacs/mdlib/enerdata_utils.h"
82 #include "gromacs/mdlib/energyoutput.h"
83 #include "gromacs/mdlib/expanded.h"
84 #include "gromacs/mdlib/force.h"
85 #include "gromacs/mdlib/force_flags.h"
86 #include "gromacs/mdlib/forcerec.h"
87 #include "gromacs/mdlib/md_support.h"
88 #include "gromacs/mdlib/mdatoms.h"
89 #include "gromacs/mdlib/mdoutf.h"
90 #include "gromacs/mdlib/membed.h"
91 #include "gromacs/mdlib/resethandler.h"
92 #include "gromacs/mdlib/sighandler.h"
93 #include "gromacs/mdlib/simulationsignal.h"
94 #include "gromacs/mdlib/stat.h"
95 #include "gromacs/mdlib/stophandler.h"
96 #include "gromacs/mdlib/tgroup.h"
97 #include "gromacs/mdlib/trajectory_writing.h"
98 #include "gromacs/mdlib/update.h"
99 #include "gromacs/mdlib/update_constrain_gpu.h"
100 #include "gromacs/mdlib/vcm.h"
101 #include "gromacs/mdlib/vsite.h"
102 #include "gromacs/mdrunutility/handlerestart.h"
103 #include "gromacs/mdrunutility/multisim.h"
104 #include "gromacs/mdrunutility/printtime.h"
105 #include "gromacs/mdtypes/awh_history.h"
106 #include "gromacs/mdtypes/awh_params.h"
107 #include "gromacs/mdtypes/commrec.h"
108 #include "gromacs/mdtypes/df_history.h"
109 #include "gromacs/mdtypes/energyhistory.h"
110 #include "gromacs/mdtypes/fcdata.h"
111 #include "gromacs/mdtypes/forcerec.h"
112 #include "gromacs/mdtypes/group.h"
113 #include "gromacs/mdtypes/inputrec.h"
114 #include "gromacs/mdtypes/interaction_const.h"
115 #include "gromacs/mdtypes/md_enums.h"
116 #include "gromacs/mdtypes/mdatom.h"
117 #include "gromacs/mdtypes/mdrunoptions.h"
118 #include "gromacs/mdtypes/observableshistory.h"
119 #include "gromacs/mdtypes/pullhistory.h"
120 #include "gromacs/mdtypes/simulation_workload.h"
121 #include "gromacs/mdtypes/state.h"
122 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
123 #include "gromacs/modularsimulator/energyelement.h"
124 #include "gromacs/nbnxm/gpu_data_mgmt.h"
125 #include "gromacs/nbnxm/nbnxm.h"
126 #include "gromacs/pbcutil/mshift.h"
127 #include "gromacs/pbcutil/pbc.h"
128 #include "gromacs/pulling/output.h"
129 #include "gromacs/pulling/pull.h"
130 #include "gromacs/swap/swapcoords.h"
131 #include "gromacs/timing/wallcycle.h"
132 #include "gromacs/timing/walltime_accounting.h"
133 #include "gromacs/topology/atoms.h"
134 #include "gromacs/topology/idef.h"
135 #include "gromacs/topology/mtop_util.h"
136 #include "gromacs/topology/topology.h"
137 #include "gromacs/trajectory/trajectoryframe.h"
138 #include "gromacs/utility/basedefinitions.h"
139 #include "gromacs/utility/cstringutil.h"
140 #include "gromacs/utility/fatalerror.h"
141 #include "gromacs/utility/logger.h"
142 #include "gromacs/utility/real.h"
143 #include "gromacs/utility/smalloc.h"
145 #include "legacysimulator.h"
146 #include "replicaexchange.h"
150 # include "corewrap.h"
153 using gmx::SimulationSignaller;
155 void gmx::LegacySimulator::do_md()
157 // TODO Historically, the EM and MD "integrators" used different
158 // names for the t_inputrec *parameter, but these must have the
159 // same name, now that it's a member of a struct. We use this ir
160 // alias to avoid a large ripple of nearly useless changes.
161 // t_inputrec is being replaced by IMdpOptionsProvider, so this
162 // will go away eventually.
163 t_inputrec* ir = inputrec;
164 int64_t step, step_rel;
165 double t, t0 = ir->init_t, lam0[efptNR];
166 gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
167 gmx_bool bNS, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
168 gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
169 gmx_bool do_ene, do_log, do_verbose;
170 gmx_bool bMasterState;
171 unsigned int force_flags;
172 tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, tmp_vir = { { 0 } },
176 matrix pressureCouplingMu, M;
177 gmx_repl_ex_t repl_ex = nullptr;
178 PaddedHostVector<gmx::RVec> f{};
179 gmx_global_stat_t gstat;
180 t_graph* graph = nullptr;
181 gmx_shellfc_t* shellfc;
182 gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183 gmx_bool bTemp, bPres, bTrotter;
185 std::vector<RVec> cbuf;
191 real saved_conserved_quantity = 0;
194 char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
196 /* PME load balancing data for GPU kernels */
197 gmx_bool bPMETune = FALSE;
198 gmx_bool bPMETunePrinting = FALSE;
200 bool bInteractiveMDstep = false;
202 /* Domain decomposition could incorrectly miss a bonded
203 interaction, but checking for that requires a global
204 communication stage, which does not otherwise happen in DD
205 code. So we do that alongside the first global energy reduction
206 after a new DD is made. These variables handle whether the
207 check happens, and the result it returns. */
208 bool shouldCheckNumberOfBondedInteractions = false;
209 int totalNumberOfBondedInteractions = -1;
211 SimulationSignals signals;
212 // Most global communnication stages don't propagate mdrun
213 // signals, and will use this object to achieve that.
214 SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
216 if (!mdrunOptions.writeConfout)
218 // This is on by default, and the main known use case for
219 // turning it off is for convenience in benchmarking, which is
220 // something that should not show up in the general user
225 "The -noconfout functionality is deprecated, and may be removed in a "
229 /* md-vv uses averaged full step velocities for T-control
230 md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
231 md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
232 bTrotter = (EI_VV(ir->eI)
233 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
235 const bool bRerunMD = false;
237 int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
238 bGStatEveryStep = (nstglobalcomm == 1);
240 SimulationGroups* groups = &top_global->groups;
242 std::unique_ptr<EssentialDynamics> ed = nullptr;
243 if (opt2bSet("-ei", nfile, fnm))
245 /* Initialize essential dynamics sampling */
246 ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global,
247 ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior);
249 else if (observablesHistory->edsamHistory)
252 "The checkpoint is from a run with essential dynamics sampling, "
253 "but the current run did not specify the -ei option. "
254 "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
257 initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
258 Update upd(ir, deform);
259 const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
260 const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
262 bool simulationsShareState = false;
263 int nstSignalComm = nstglobalcomm;
265 // TODO This implementation of ensemble orientation restraints is nasty because
266 // a user can't just do multi-sim with single-sim orientation restraints.
267 bool usingEnsembleRestraints =
268 (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
269 bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
271 // Replica exchange, ensemble restraints and AWH need all
272 // simulations to remain synchronized, so they need
273 // checkpoints and stop conditions to act on the same step, so
274 // the propagation of such signals must take place between
275 // simulations, not just within simulations.
276 // TODO: Make algorithm initializers set these flags.
277 simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
279 if (simulationsShareState)
281 // Inter-simulation signal communication does not need to happen
282 // often, so we use a minimum of 200 steps to reduce overhead.
283 const int c_minimumInterSimulationSignallingInterval = 200;
284 nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
289 if (startingBehavior != StartingBehavior::RestartWithAppending)
291 pleaseCiteCouplingAlgorithms(fplog, *ir);
294 init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir,
295 top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms);
296 gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work,
297 mdoutf_get_fp_dhdl(outf), false, startingBehavior, mdModulesNotifier);
299 gstat = global_stat_init(ir);
301 /* Check for polarizable models and flexible constraints */
302 shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0,
303 ir->nstcalcenergy, DOMAINDECOMP(cr));
306 double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
307 if ((io > 2000) && MASTER(cr))
309 fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
313 // Local state only becomes valid now.
314 std::unique_ptr<t_state> stateInstance;
318 gmx_localtop_t top(top_global->ffparams);
320 auto mdatoms = mdAtoms->mdatoms();
322 std::unique_ptr<UpdateConstrainGpu> integrator;
324 if (DOMAINDECOMP(cr))
326 stateInstance = std::make_unique<t_state>();
327 state = stateInstance.get();
328 dd_init_local_state(cr->dd, state_global, state);
330 /* Distribute the charge groups over the nodes from the master node */
331 dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
332 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
333 nrnb, nullptr, FALSE);
334 shouldCheckNumberOfBondedInteractions = true;
335 upd.setNumAtoms(state->natoms);
339 state_change_natoms(state_global, state_global->natoms);
340 f.resizeWithPadding(state_global->natoms);
341 /* Copy the pointer to the global state */
342 state = state_global;
344 /* Generate and initialize new topology */
345 mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &graph, mdAtoms, constr, vsite, shellfc);
347 upd.setNumAtoms(state->natoms);
350 const auto& simulationWork = runScheduleWork->simulationWork;
351 const bool useGpuForPme = simulationWork.useGpuPme;
352 const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
353 const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
354 const bool useGpuForUpdate = simulationWork.useGpuUpdate;
356 StatePropagatorDataGpu* stateGpu = fr->stateGpu;
360 GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
361 || constr->numConstraintsTotal() == 0,
362 "Constraints in domain decomposition are only supported with update "
363 "groups if using GPU update.\n");
364 GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
365 || constr->numConstraintsTotal() == 0,
366 "SHAKE is not supported with GPU update.");
367 GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
368 "Either PME or short-ranged non-bonded interaction tasks must run on "
369 "the GPU to use GPU update.\n");
370 GMX_RELEASE_ASSERT(ir->eI == eiMD,
371 "Only the md integrator is supported with the GPU update.\n");
373 ir->etc != etcNOSEHOOVER,
374 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
375 GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN,
376 "Only Parrinello-Rahman and Berendsen pressure coupling are supported "
377 "with the GPU update.\n");
378 GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
379 "Virtual sites are not supported with the GPU update.\n");
380 GMX_RELEASE_ASSERT(ed == nullptr,
381 "Essential dynamics is not supported with the GPU update.\n");
382 GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(ir->pull),
383 "Constraints pulling is not supported with the GPU update.\n");
384 GMX_RELEASE_ASSERT(fcd->orires.nr == 0,
385 "Orientation restraints are not supported with the GPU update.\n");
388 || (!haveFreeEnergyType(*ir, efptBONDED) && !haveFreeEnergyType(*ir, efptMASS)),
389 "Free energy perturbation of masses and constraints are not supported with the GPU "
391 GMX_RELEASE_ASSERT(graph == nullptr, "The graph is not supported with GPU update.");
393 if (constr != nullptr && constr->numConstraintsTotal() > 0)
397 .appendText("Updating coordinates and applying constraints on the GPU.");
401 GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
403 integrator = std::make_unique<UpdateConstrainGpu>(
404 *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
406 integrator->setPbc(PbcType::Xyz, state->box);
409 if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
411 changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
413 if ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
415 changePinningPolicy(&f, PinningPolicy::PinnedIfSupported);
419 changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
422 // NOTE: The global state is no longer used at this point.
423 // But state_global is still used as temporary storage space for writing
424 // the global state to file and potentially for replica exchange.
425 // (Global topology should persist.)
427 update_mdatoms(mdatoms, state->lambda[efptMASS]);
431 /* Check nstexpanded here, because the grompp check was broken */
432 if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
435 "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
437 init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
442 EnergyElement::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
445 preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
446 startingBehavior != StartingBehavior::NewSimulation);
448 // TODO: Remove this by converting AWH into a ForceProvider
449 auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms,
450 startingBehavior != StartingBehavior::NewSimulation,
451 shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work);
453 if (useReplicaExchange && MASTER(cr))
455 repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
457 /* PME tuning is only supported in the Verlet scheme, with PME for
458 * Coulomb. It is not supported with only LJ PME. */
459 bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
460 && ir->cutoff_scheme != ecutsGROUP);
462 pme_load_balancing_t* pme_loadbal = nullptr;
465 pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata,
469 if (!ir->bContinuation)
471 if (state->flags & (1U << estV))
473 auto v = makeArrayRef(state->v);
474 /* Set the velocities of vsites, shells and frozen atoms to zero */
475 for (i = 0; i < mdatoms->homenr; i++)
477 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
481 else if (mdatoms->cFREEZE)
483 for (m = 0; m < DIM; m++)
485 if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
496 /* Constrain the initial coordinates and velocities */
497 do_constrain_first(fplog, constr, ir, mdatoms, state->natoms, state->x.arrayRefWithPadding(),
498 state->v.arrayRefWithPadding(), state->box, state->lambda[efptBONDED]);
502 /* Construct the virtual sites for the initial configuration */
503 construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, top.idef.iparams,
504 top.idef.il, fr->pbcType, fr->bMolPBC, cr, state->box);
508 if (ir->efep != efepNO)
510 /* Set free energy calculation frequency as the greatest common
511 * denominator of nstdhdl and repl_ex_nst. */
512 nstfep = ir->fepvals->nstdhdl;
515 nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
517 if (useReplicaExchange)
519 nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
523 /* Be REALLY careful about what flags you set here. You CANNOT assume
524 * this is the first step, since we might be restarting from a checkpoint,
525 * and in that case we should not do any modifications to the state.
527 bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
529 // When restarting from a checkpoint, it can be appropriate to
530 // initialize ekind from quantities in the checkpoint. Otherwise,
531 // compute_globals must initialize ekind before the simulation
532 // starts/restarts. However, only the master rank knows what was
533 // found in the checkpoint file, so we have to communicate in
534 // order to coordinate the restart.
536 // TODO Consider removing this communication if/when checkpoint
537 // reading directly follows .tpr reading, because all ranks can
538 // agree on hasReadEkinState at that time.
539 bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
542 gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
544 if (hasReadEkinState)
546 restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
549 unsigned int cglo_flags =
550 (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
551 | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
553 bSumEkinhOld = FALSE;
555 t_vcm vcm(top_global->groups, *ir);
556 reportComRemovalInfo(fplog, vcm);
558 /* To minimize communication, compute_globals computes the COM velocity
559 * and the kinetic energy for the velocities without COM motion removed.
560 * Thus to get the kinetic energy without the COM contribution, we need
561 * to call compute_globals twice.
563 for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
565 unsigned int cglo_flags_iteration = cglo_flags;
566 if (bStopCM && cgloIteration == 0)
568 cglo_flags_iteration |= CGLO_STOPCM;
569 cglo_flags_iteration &= ~CGLO_TEMPERATURE;
571 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
572 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
573 nrnb, &vcm, nullptr, enerd, force_vir, shake_vir, total_vir, pres, constr,
574 &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
576 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
578 if (cglo_flags_iteration & CGLO_STOPCM)
580 /* At initialization, do not pass x with acceleration-correction mode
581 * to avoid (incorrect) correction of the initial coordinates.
583 auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
584 : makeArrayRef(state->x);
585 process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
586 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
589 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
590 makeConstArrayRef(state->x), state->box,
591 &shouldCheckNumberOfBondedInteractions);
592 if (ir->eI == eiVVAK)
594 /* a second call to get the half step temperature initialized as well */
595 /* we do the same call as above, but turn the pressure off -- internally to
596 compute_globals, this is recognized as a velocity verlet half-step
597 kinetic energy calculation. This minimized excess variables, but
598 perhaps loses some logic?*/
600 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
601 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
602 nrnb, &vcm, nullptr, enerd, force_vir, shake_vir, total_vir, pres, constr,
603 &nullSignaller, state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
606 /* Calculate the initial half step temperature, and save the ekinh_old */
607 if (startingBehavior == StartingBehavior::NewSimulation)
609 for (i = 0; (i < ir->opts.ngtc); i++)
611 copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
615 /* need to make an initiation call to get the Trotter variables set, as well as other constants
616 for non-trotter temperature control */
617 auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
621 if (!ir->bContinuation)
623 if (constr && ir->eConstrAlg == econtLINCS)
625 fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n",
628 if (EI_STATE_VELOCITY(ir->eI))
630 real temp = enerd->term[F_TEMP];
633 /* Result of Ekin averaged over velocities of -half
634 * and +half step, while we only have -half step here.
638 fprintf(fplog, "Initial temperature: %g K\n", temp);
643 fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
646 sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
650 sprintf(tbuf, "%s", "infinite");
652 if (ir->init_step > 0)
654 fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
655 gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf,
656 gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t);
660 fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
662 fprintf(fplog, "\n");
665 walltime_accounting_start_time(walltime_accounting);
666 wallcycle_start(wcycle, ewcRUN);
667 print_start(fplog, cr, walltime_accounting, "mdrun");
670 /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */
671 int chkpt_ret = fcCheckPointParallel(cr->nodeid, NULL, 0);
674 gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0);
678 /***********************************************************
682 ************************************************************/
685 /* Skip the first Nose-Hoover integration when we get the state from tpx */
686 bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
687 bSumEkinhOld = FALSE;
689 bNeedRepartition = FALSE;
691 auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
692 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
693 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
694 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
696 auto checkpointHandler = std::make_unique<CheckpointHandler>(
697 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), simulationsShareState,
698 ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout,
699 mdrunOptions.checkpointOptions.period);
701 const bool resetCountersIsLocal = true;
702 auto resetHandler = std::make_unique<ResetHandler>(
703 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
704 !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
705 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
707 const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
709 step = ir->init_step;
712 // TODO extract this to new multi-simulation module
713 if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
715 if (!multisim_int_all_are_equal(ms, ir->nsteps))
717 GMX_LOG(mdlog.warning)
719 "Note: The number of steps is not consistent across multi "
721 "but we are proceeding anyway!");
723 if (!multisim_int_all_are_equal(ms, ir->init_step))
725 if (simulationsShareState)
730 "The initial step is not consistent across multi simulations which "
737 GMX_LOG(mdlog.warning)
739 "Note: The initial step is not consistent across multi "
741 "but we are proceeding anyway!");
746 /* and stop now if we should */
747 bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
751 /* Determine if this is a neighbor search step */
752 bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
754 if (bPMETune && bNStList)
756 // This has to be here because PME load balancing is called so early.
757 // TODO: Move to after all booleans are defined.
758 if (useGpuForUpdate && !bFirstStep)
760 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
761 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
763 /* PME grid + cut-off optimization with GPUs or PME nodes */
764 pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
765 fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel,
766 &bPMETunePrinting, simulationWork.useGpuPmePpCommunication);
769 wallcycle_start(wcycle, ewcSTEP);
771 bLastStep = (step_rel == ir->nsteps);
772 t = t0 + step * ir->delta_t;
774 // TODO Refactor this, so that nstfep does not need a default value of zero
775 if (ir->efep != efepNO || ir->bSimTemp)
777 /* find and set the current lambdas */
778 setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
780 bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
781 bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep));
782 bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
783 && (step > 0) && (startingBehavior == StartingBehavior::NewSimulation));
786 bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
787 && do_per_step(step, replExParams.exchangeInterval));
789 if (doSimulatedAnnealing)
791 update_annealing_target_temp(ir, t, &upd);
794 /* Stop Center of Mass motion */
795 bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
797 /* Determine whether or not to do Neighbour Searching */
798 bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
800 /* Note that the stopHandler will cause termination at nstglobalcomm
801 * steps. Since this concides with nstcalcenergy, nsttcouple and/or
802 * nstpcouple steps, we have computed the half-step kinetic energy
803 * of the previous step and can always output energies at the last step.
805 bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
807 /* do_log triggers energy and virial calculation. Because this leads
808 * to different code paths, forces can be different. Thus for exact
809 * continuation we should avoid extra log output.
810 * Note that the || bLastStep can result in non-exact continuation
811 * beyond the last step. But we don't consider that to be an issue.
813 do_log = (do_per_step(step, ir->nstlog)
814 || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
815 do_verbose = mdrunOptions.verbose
816 && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
818 if (useGpuForUpdate && !bFirstStep && bNS)
820 // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
821 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
822 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
823 // Copy coordinate from the GPU when needed at the search step.
824 // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
825 // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
826 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
827 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
830 if (bNS && !(bFirstStep && ir->bContinuation))
832 bMasterState = FALSE;
833 /* Correct the new box if it is too skewed */
834 if (inputrecDynamicBox(ir))
836 if (correct_box(fplog, step, state->box, graph))
839 // If update is offloaded, it should be informed about the box size change
842 integrator->setPbc(PbcType::Xyz, state->box);
846 if (DOMAINDECOMP(cr) && bMasterState)
848 dd_collect_state(cr->dd, state, state_global);
851 if (DOMAINDECOMP(cr))
853 /* Repartition the domain decomposition */
854 dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global,
855 *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top,
856 fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
857 shouldCheckNumberOfBondedInteractions = true;
858 upd.setNumAtoms(state->natoms);
860 // Allocate or re-size GPU halo exchange object, if necessary
861 if (havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange
862 && useGpuForNonbonded && is1D(*cr->dd))
864 // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
866 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
867 void* streamNonLocal = Nbnxm::gpu_get_command_stream(
868 fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
869 constructGpuHaloExchange(mdlog, *cr, streamLocal, streamNonLocal);
874 if (MASTER(cr) && do_log)
876 energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
879 if (ir->efep != efepNO)
881 update_mdatoms(mdatoms, state->lambda[efptMASS]);
887 /* We need the kinetic energy at minus the half step for determining
888 * the full step kinetic energy and possibly for T-coupling.*/
889 /* This may not be quite working correctly yet . . . . */
890 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
891 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
892 nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr, nullptr, constr,
893 &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
894 CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
895 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
896 &top, makeConstArrayRef(state->x), state->box,
897 &shouldCheckNumberOfBondedInteractions);
899 clear_mat(force_vir);
901 checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
903 /* Determine the energy and pressure:
904 * at nstcalcenergy steps and at energy output steps (set below).
906 if (EI_VV(ir->eI) && (!bInitStep))
908 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
909 bCalcVir = bCalcEnerStep
911 && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
915 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
916 bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
918 bCalcEner = bCalcEnerStep;
920 do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
922 if (do_ene || do_log || bDoReplEx)
928 /* Do we need global communication ? */
929 bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
930 || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
932 force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
933 | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
934 | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
938 /* Now is the time to relax the shells */
939 relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
940 imdSession, pull_work, bNS, force_flags, &top, constr, enerd, fcd,
941 state->natoms, state->x.arrayRefWithPadding(),
942 state->v.arrayRefWithPadding(), state->box, state->lambda, &state->hist,
943 f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph,
944 shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
948 /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
949 is updated (or the AWH update will be performed twice for one step when continuing).
950 It would be best to call this update function from do_md_trajectory_writing but that
951 would occur after do_force. One would have to divide the update_awh function into one
952 function applying the AWH force and one doing the AWH bias update. The update AWH
953 bias function could then be called after do_md_trajectory_writing (then containing
954 update_awh_history). The checkpointing will in the future probably moved to the start
955 of the md loop which will rid of this issue. */
956 if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
958 awh->updateHistory(state_global->awhHistory.get());
961 /* The coordinates (x) are shifted (to get whole molecules)
963 * This is parallellized as well, and does communication too.
964 * Check comments in sim_util.c
966 do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
967 nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
968 f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph,
969 fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
970 (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
973 // VV integrators do not need the following velocity half step
974 // if it is the first step after starting from a checkpoint.
975 // That is, the half step is needed on all other steps, and
976 // also the first step when starting from a .tpr file.
977 if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
978 /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
980 rvec* vbuf = nullptr;
982 wallcycle_start(wcycle, ewcUPDATE);
983 if (ir->eI == eiVV && bInitStep)
985 /* if using velocity verlet with full time step Ekin,
986 * take the first half step only to compute the
987 * virial for the first step. From there,
988 * revert back to the initial coordinates
989 * so that the input is actually the initial step.
991 snew(vbuf, state->natoms);
992 copy_rvecn(state->v.rvec_array(), vbuf, 0,
993 state->natoms); /* should make this better for parallelizing? */
997 /* this is for NHC in the Ekin(t+dt/2) version of vv */
998 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
999 trotter_seq, ettTSEQ1);
1002 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1003 etrtVELOCITY1, cr, constr);
1005 wallcycle_stop(wcycle, ewcUPDATE);
1006 constrain_velocities(step, nullptr, state, shake_vir, constr, bCalcVir, do_log, do_ene);
1007 wallcycle_start(wcycle, ewcUPDATE);
1008 /* if VV, compute the pressure and constraints */
1009 /* For VV2, we strictly only need this if using pressure
1010 * control, but we really would like to have accurate pressures
1012 * Think about ways around this in the future?
1013 * For now, keep this choice in comments.
1015 /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
1016 /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
1018 bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
1019 if (bCalcEner && ir->eI == eiVVAK)
1021 bSumEkinhOld = TRUE;
1023 /* for vv, the first half of the integration actually corresponds to the previous step.
1024 So we need information from the last step in the first half of the integration */
1025 if (bGStat || do_per_step(step - 1, nstglobalcomm))
1027 wallcycle_stop(wcycle, ewcUPDATE);
1029 gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1030 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
1031 nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
1032 &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1033 (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
1034 | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
1035 | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
1036 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1039 /* explanation of above:
1040 a) We compute Ekin at the full time step
1041 if 1) we are using the AveVel Ekin, and it's not the
1042 initial step, or 2) if we are using AveEkin, but need the full
1043 time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1044 b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1045 EkinAveVel because it's needed for the pressure */
1046 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1047 top_global, &top, makeConstArrayRef(state->x),
1048 state->box, &shouldCheckNumberOfBondedInteractions);
1051 process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1052 makeArrayRef(state->v));
1053 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1055 wallcycle_start(wcycle, ewcUPDATE);
1057 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1062 m_add(force_vir, shake_vir,
1063 total_vir); /* we need the un-dispersion corrected total vir here */
1064 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
1065 trotter_seq, ettTSEQ2);
1067 /* TODO This is only needed when we're about to write
1068 * a checkpoint, because we use it after the restart
1069 * (in a kludge?). But what should we be doing if
1070 * the startingBehavior is NewSimulation or bInitStep are true? */
1071 if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1073 copy_mat(shake_vir, state->svir_prev);
1074 copy_mat(force_vir, state->fvir_prev);
1076 if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1078 /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1079 enerd->term[F_TEMP] =
1080 sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1081 enerd->term[F_EKIN] = trace(ekind->ekin);
1084 else if (bExchanged)
1086 wallcycle_stop(wcycle, ewcUPDATE);
1087 /* We need the kinetic energy at minus the half step for determining
1088 * the full step kinetic energy and possibly for T-coupling.*/
1089 /* This may not be quite working correctly yet . . . . */
1090 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1091 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW],
1092 mdatoms, nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr,
1093 nullptr, constr, &nullSignaller, state->box, nullptr,
1094 &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
1095 wallcycle_start(wcycle, ewcUPDATE);
1098 /* if it's the initial step, we performed this first step just to get the constraint virial */
1099 if (ir->eI == eiVV && bInitStep)
1101 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1104 wallcycle_stop(wcycle, ewcUPDATE);
1107 /* compute the conserved quantity */
1110 saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1113 last_ekin = enerd->term[F_EKIN];
1115 if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1117 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1119 /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */
1120 if (ir->efep != efepNO)
1122 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1126 /* ######## END FIRST UPDATE STEP ############## */
1127 /* ######## If doing VV, we now have v(dt) ###### */
1130 /* perform extended ensemble sampling in lambda - we don't
1131 actually move to the new state before outputting
1132 statistics, but if performing simulated tempering, we
1133 do update the velocities and the tau_t. */
1135 lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state,
1136 state->dfhist, step, state->v.rvec_array(), mdatoms);
1137 /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1140 copy_df_history(state_global->dfhist, state->dfhist);
1144 // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1145 // coordinates have not already been copied for i) search or ii) CPU force tasks.
1146 if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1147 && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1148 || checkpointHandler->isCheckpointingStep()))
1150 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1151 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1153 // Copy velocities if needed for the output/checkpointing.
1154 // NOTE: Copy on the search steps is done at the beginning of the step.
1155 if (useGpuForUpdate && !bNS
1156 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1158 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1159 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1161 // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1162 // and update is offloaded hence forces are kept on the GPU for update and have not been
1163 // already transferred in do_force().
1164 // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1165 // when the forces are ready on the GPU -- the same synchronizer should be used as the one
1166 // prior to GPU update.
1167 // TODO: When the output flags will be included in step workload, this copy can be combined with the
1168 // copy call in do_force(...).
1169 // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1170 // on host after the D2H copy in do_force(...).
1171 if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1172 && do_per_step(step, ir->nstfout))
1174 stateGpu->copyForcesFromGpu(ArrayRef<RVec>(f), AtomLocality::Local);
1175 stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1177 /* Now we have the energies and forces corresponding to the
1178 * coordinates at time t. We must output all of this before
1181 do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
1182 observablesHistory, top_global, fr, outf, energyOutput, ekind, f,
1183 checkpointHandler->isCheckpointingStep(), bRerunMD, bLastStep,
1184 mdrunOptions.writeConfout, bSumEkinhOld);
1185 /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1186 bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1188 /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1189 if (startingBehavior != StartingBehavior::NewSimulation
1190 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1192 copy_mat(state->svir_prev, shake_vir);
1193 copy_mat(state->fvir_prev, force_vir);
1196 stopHandler->setSignal();
1197 resetHandler->setSignal(walltime_accounting);
1199 if (bGStat || !PAR(cr))
1201 /* In parallel we only have to check for checkpointing in steps
1202 * where we do global communication,
1203 * otherwise the other nodes don't know.
1205 checkpointHandler->setSignal(walltime_accounting);
1208 /* ######### START SECOND UPDATE STEP ################# */
1210 /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1211 controlled in preprocessing */
1213 if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1215 gmx_bool bIfRandomize;
1216 bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1217 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1218 if (constr && bIfRandomize)
1220 constrain_velocities(step, nullptr, state, tmp_vir, constr, bCalcVir, do_log, do_ene);
1223 /* Box is changed in update() when we do pressure coupling,
1224 * but we should still use the old box for energy corrections and when
1225 * writing it to the energy file, so it matches the trajectory files for
1226 * the same timestep above. Make a copy in a separate array.
1228 copy_mat(state->box, lastbox);
1232 wallcycle_start(wcycle, ewcUPDATE);
1233 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1236 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1237 /* We can only do Berendsen coupling after we have summed
1238 * the kinetic energy or virial. Since the happens
1239 * in global_state after update, we should only do it at
1240 * step % nstlist = 1 with bGStatEveryStep=FALSE.
1245 update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1246 update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1251 /* velocity half-step update */
1252 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1253 etrtVELOCITY2, cr, constr);
1256 /* Above, initialize just copies ekinh into ekin,
1257 * it doesn't copy position (for VV),
1258 * and entire integrator for MD.
1261 if (ir->eI == eiVVAK)
1263 cbuf.resize(state->x.size());
1264 std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1267 /* With leap-frog type integrators we compute the kinetic energy
1268 * at a whole time step as the average of the half-time step kinetic
1269 * energies of two subsequent steps. Therefore we need to compute the
1270 * half step kinetic energy also if we need energies at the next step.
1272 const bool needHalfStepKineticEnergy =
1273 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1275 // Parrinello-Rahman requires the pressure to be availible before the update to compute
1276 // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1277 const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
1278 && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1280 if (useGpuForUpdate)
1282 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1284 integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
1285 stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc);
1287 // Copy data to the GPU after buffers might have being reinitialized
1288 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1289 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1292 // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
1293 if (!runScheduleWork->stepWork.useGpuFBufferOps)
1295 stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::Local);
1298 const bool doTemperatureScaling =
1299 (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1301 // This applies Leap-Frog, LINCS and SETTLE in succession
1302 integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(
1303 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1304 ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling,
1305 ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M);
1307 // Copy velocities D2H after update if:
1308 // - Globals are computed this step (includes the energy output steps).
1309 // - Temperature is needed for the next step.
1310 if (bGStat || needHalfStepKineticEnergy)
1312 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1313 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1318 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1319 etrtPOSITION, cr, constr);
1321 wallcycle_stop(wcycle, ewcUPDATE);
1323 constrain_coordinates(step, &dvdl_constr, state, shake_vir, &upd, constr, bCalcVir,
1326 update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, cr, nrnb, wcycle, &upd,
1327 constr, do_log, do_ene);
1328 finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, constr);
1331 if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1333 updatePrevStepPullCom(pull_work, state);
1336 if (ir->eI == eiVVAK)
1338 /* erase F_EKIN and F_TEMP here? */
1339 /* just compute the kinetic energy at the half step to perform a trotter step */
1340 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1341 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW],
1342 mdatoms, nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir,
1343 pres, constr, &nullSignaller, lastbox, nullptr, &bSumEkinhOld,
1344 (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
1345 wallcycle_start(wcycle, ewcUPDATE);
1346 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1347 /* now we know the scaling, we can compute the positions again */
1348 std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1350 update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
1351 etrtPOSITION, cr, constr);
1352 wallcycle_stop(wcycle, ewcUPDATE);
1354 /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1355 /* are the small terms in the shake_vir here due
1356 * to numerical errors, or are they important
1357 * physically? I'm thinking they are just errors, but not completely sure.
1358 * For now, will call without actually constraining, constr=NULL*/
1359 finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, nullptr);
1363 /* this factor or 2 correction is necessary
1364 because half of the constraint force is removed
1365 in the vv step, so we have to double it. See
1366 the Redmine issue #1255. It is not yet clear
1367 if the factor of 2 is exact, or just a very
1368 good approximation, and this will be
1369 investigated. The next step is to see if this
1370 can be done adding a dhdl contribution from the
1371 rattle step, but this is somewhat more
1372 complicated with the current code. Will be
1373 investigated, hopefully for 4.6.3. However,
1374 this current solution is much better than
1375 having it completely wrong.
1377 enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr;
1381 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1384 if (vsite != nullptr)
1386 wallcycle_start(wcycle, ewcVSITECONSTR);
1387 if (graph != nullptr)
1389 shift_self(graph, state->box, state->x.rvec_array());
1391 construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1392 top.idef.iparams, top.idef.il, fr->pbcType, fr->bMolPBC, cr, state->box);
1394 if (graph != nullptr)
1396 unshift_self(graph, state->box, state->x.rvec_array());
1398 wallcycle_stop(wcycle, ewcVSITECONSTR);
1401 /* ############## IF NOT VV, Calculate globals HERE ############ */
1402 /* With Leap-Frog we can skip compute_globals at
1403 * non-communication steps, but we need to calculate
1404 * the kinetic energy one step before communication.
1407 // Organize to do inter-simulation signalling on steps if
1408 // and when algorithms require it.
1409 const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1411 if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1413 // Copy coordinates when needed to stop the CM motion.
1414 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1416 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1417 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1419 // Since we're already communicating at this step, we
1420 // can propagate intra-simulation signals. Note that
1421 // check_nstglobalcomm has the responsibility for
1422 // choosing the value of nstglobalcomm that is one way
1423 // bGStat becomes true, so we can't get into a
1424 // situation where e.g. checkpointing can't be
1426 bool doIntraSimSignal = true;
1427 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1430 gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1431 makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
1432 nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
1433 &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1434 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1435 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1436 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1437 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1438 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1440 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1441 top_global, &top, makeConstArrayRef(state->x),
1442 state->box, &shouldCheckNumberOfBondedInteractions);
1443 if (!EI_VV(ir->eI) && bStopCM)
1445 process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1446 makeArrayRef(state->v));
1447 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1449 // TODO: The special case of removing CM motion should be dealt more gracefully
1450 if (useGpuForUpdate)
1452 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1453 // Here we block until the H2D copy completes because event sync with the
1454 // force kernels that use the coordinates on the next steps is not implemented
1455 // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1456 stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1462 /* ############# END CALC EKIN AND PRESSURE ################# */
1464 /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1465 the virial that should probably be addressed eventually. state->veta has better properies,
1466 but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1467 generate the new shake_vir, but test the veta value for convergence. This will take some thought. */
1469 if (ir->efep != efepNO && !EI_VV(ir->eI))
1471 /* Sum up the foreign energy and dhdl terms for md and sd.
1472 Currently done every step so that dhdl is correct in the .edr */
1473 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1476 update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
1477 pressureCouplingMu, state, nrnb, &upd, !useGpuForUpdate);
1479 const bool doBerendsenPressureCoupling =
1480 (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
1481 if (useGpuForUpdate && (doBerendsenPressureCoupling || doParrinelloRahman))
1483 integrator->scaleCoordinates(pressureCouplingMu);
1484 integrator->setPbc(PbcType::Xyz, state->box);
1487 /* ################# END UPDATE STEP 2 ################# */
1488 /* #### We now have r(t+dt) and v(t+dt/2) ############# */
1490 /* The coordinates (x) were unshifted in update */
1493 /* We will not sum ekinh_old,
1494 * so signal that we still have to do it.
1496 bSumEkinhOld = TRUE;
1501 /* ######### BEGIN PREPARING EDR OUTPUT ########### */
1503 /* use the directly determined last velocity, not actually the averaged half steps */
1504 if (bTrotter && ir->eI == eiVV)
1506 enerd->term[F_EKIN] = last_ekin;
1508 enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1510 if (integratorHasConservedEnergyQuantity(ir))
1514 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1518 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1521 /* ######### END PREPARING EDR OUTPUT ########### */
1527 if (fplog && do_log && bDoExpanded)
1529 /* only needed if doing expanded ensemble */
1530 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals,
1531 ir->bSimTemp ? ir->simtempvals : nullptr,
1532 state_global->dfhist, state->fep_state, ir->nstlog, step);
1536 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, state,
1537 ir->fepvals, ir->expandedvals, lastbox, shake_vir,
1538 force_vir, total_vir, pres, ekind, mu_tot, constr);
1542 energyOutput.recordNonEnergyStep();
1545 gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1546 gmx_bool do_or = do_per_step(step, ir->nstorireout);
1548 if (doSimulatedAnnealing)
1550 energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1552 if (do_log || do_ene || do_dr || do_or)
1554 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1555 do_log ? fplog : nullptr, step, t, fcd, awh.get());
1560 pull_print_output(pull_work, step, t);
1563 if (do_per_step(step, ir->nstlog))
1565 if (fflush(fplog) != 0)
1567 gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1573 /* Have to do this part _after_ outputting the logfile and the edr file */
1574 /* Gets written into the state at the beginning of next loop*/
1575 state->fep_state = lamnew;
1577 /* Print the remaining wall clock time for the run */
1578 if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1582 fprintf(stderr, "\n");
1584 print_time(stderr, walltime_accounting, step, ir, cr);
1587 /* Ion/water position swapping.
1588 * Not done in last step since trajectory writing happens before this call
1589 * in the MD loop and exchanges would be lost anyway. */
1590 bNeedRepartition = FALSE;
1591 if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1594 do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()),
1595 state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD);
1597 if (bNeedRepartition && DOMAINDECOMP(cr))
1599 dd_collect_state(cr->dd, state, state_global);
1603 /* Replica exchange */
1607 bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1610 if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1612 dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir,
1613 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
1614 nrnb, wcycle, FALSE);
1615 shouldCheckNumberOfBondedInteractions = true;
1616 upd.setNumAtoms(state->natoms);
1622 /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1623 /* With all integrators, except VV, we need to retain the pressure
1624 * at the current step for coupling at the next step.
1626 if ((state->flags & (1U << estPRES_PREV))
1627 && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1629 /* Store the pressure in t_state for pressure coupling
1630 * at the next MD step.
1632 copy_mat(pres, state->pres_prev);
1635 /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */
1637 if ((membed != nullptr) && (!bLastStep))
1639 rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1642 cycles = wallcycle_stop(wcycle, ewcSTEP);
1643 if (DOMAINDECOMP(cr) && wcycle)
1645 dd_cycles_add(cr->dd, cycles, ddCyclStep);
1648 /* increase the MD step number */
1652 resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
1653 fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1655 /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1656 imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1658 /* End of main MD loop */
1660 /* Closing TNG files can include compressing data. Therefore it is good to do that
1661 * before stopping the time measurements. */
1662 mdoutf_tng_close(outf);
1664 /* Stop measuring walltime */
1665 walltime_accounting_end_time(walltime_accounting);
1667 if (!thisRankHasDuty(cr, DUTY_PME))
1669 /* Tell the PME only node to finish */
1670 gmx_pme_send_finish(cr);
1675 if (ir->nstcalcenergy > 0)
1677 energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1678 energyOutput.printAverages(fplog, groups);
1685 pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1688 done_shellfc(fplog, shellfc, step_rel);
1690 if (useReplicaExchange && MASTER(cr))
1692 print_replica_exchange_statistics(fplog, repl_ex);
1695 walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1697 global_stat_destroy(gstat);