const auto& simulationWork = runScheduleWork->simulationWork;
const bool useGpuForPme = simulationWork.useGpuPme;
const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
- const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
const bool useGpuForUpdate = simulationWork.useGpuUpdate;
/* Check for polarizable models and flexible constraints */
ObservablesReducer observablesReducer = observablesReducerBuilder->build();
ForceBuffers f(simulationWork.useMts,
- ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
- ? PinningPolicy::PinnedIfSupported
- : PinningPolicy::CannotBePinned);
+ (simulationWork.useGpuFBufferOps || useGpuForUpdate) ? PinningPolicy::PinnedIfSupported
+ : PinningPolicy::CannotBePinned);
const t_mdatoms* md = mdAtoms->mdatoms();
if (haveDDAtomOrdering(*cr))
{
md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
: gmx::ArrayRef<const unsigned short>(),
md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
- : gmx::ArrayRef<const unsigned short>());
+ : gmx::ArrayRef<const unsigned short>(),
+ md->cACC ? gmx::arrayRefFromArray(md->cACC, md->nr)
+ : gmx::ArrayRef<const unsigned short>());
fr->longRangeNonbondeds->updateAfterPartition(*md);
}
else
md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
: gmx::ArrayRef<const unsigned short>(),
md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
- : gmx::ArrayRef<const unsigned short>());
+ : gmx::ArrayRef<const unsigned short>(),
+ md->cACC ? gmx::arrayRefFromArray(md->cACC, md->nr)
+ : gmx::ArrayRef<const unsigned short>());
fr->longRangeNonbondeds->updateAfterPartition(*md);
}
GMX_RELEASE_ASSERT(ir->eConstrAlg != ConstraintAlgorithm::Shake || constr == nullptr
|| constr->numConstraintsTotal() == 0,
"SHAKE is not supported with GPU update.");
- GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
+ GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuXBufferOps),
"Either PME or short-ranged non-bonded interaction tasks must run on "
"the GPU to use GPU update.\n");
GMX_RELEASE_ASSERT(ir->eI == IntegrationAlgorithm::MD,
integrator->setPbc(PbcType::Xyz, state->box);
}
- if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
+ if (useGpuForPme || simulationWork.useGpuXBufferOps || useGpuForUpdate)
{
changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
}
do_verbose = mdrunOptions.verbose
&& (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
- if (useGpuForUpdate && !bFirstStep && bNS)
+ // On search steps, when doing the update on the GPU, copy
+ // the coordinates and velocities to the host unless they are
+ // already there (ie on the first step and after replica
+ // exchange).
+ if (useGpuForUpdate && bNS && !bFirstStep && !bExchanged)
{
- // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
- stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
- // Copy coordinate from the GPU when needed at the search step.
- // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
- // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
if (correct_box(fplog, step, state->box))
{
bMasterState = TRUE;
- // If update is offloaded, it should be informed about the box size change
- if (useGpuForUpdate)
- {
- integrator->setPbc(PbcType::Xyz, state->box);
- }
}
}
+ // If update is offloaded, and the box was changed either
+ // above or in a replica exchange on the previous step,
+ // the GPU Update object should be informed
+ if (useGpuForUpdate && (bMasterState || bExchanged))
+ {
+ integrator->setPbc(PbcType::Xyz, state->box);
+ }
if (haveDDAtomOrdering(*cr) && bMasterState)
{
dd_collect_state(cr->dd, state, state_global);
md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
: gmx::ArrayRef<const unsigned short>(),
md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
- : gmx::ArrayRef<const unsigned short>());
+ : gmx::ArrayRef<const unsigned short>(),
+ md->cACC ? gmx::arrayRefFromArray(md->cACC, md->nr)
+ : gmx::ArrayRef<const unsigned short>());
fr->longRangeNonbondeds->updateAfterPartition(*md);
}
}
{
if (useGpuForUpdate)
{
- if (bNS && (bFirstStep || haveDDAtomOrdering(*cr)))
+ // On search steps, update handles to device vectors
+ if (bNS && (bFirstStep || haveDDAtomOrdering(*cr) || bExchanged))
{
integrator->set(stateGpu->getCoordinates(),
stateGpu->getVelocities(),
/* The velocity copy is redundant if we had Center-of-Mass motion removed on
* the previous step. We don't check that now. */
stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
- if (!runScheduleWork->stepWork.haveGpuPmeOnThisRank
- && !runScheduleWork->stepWork.useGpuXBufferOps)
+ if (bExchanged
+ || (!runScheduleWork->stepWork.haveGpuPmeOnThisRank
+ && !runScheduleWork->stepWork.useGpuXBufferOps))
{
stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
}
&& do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
// This applies Leap-Frog, LINCS and SETTLE in succession
- integrator->integrate(
- stateGpu->getForcesReadyOnDeviceEvent(
- AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
- ir->delta_t,
- true,
- bCalcVir,
- shake_vir,
- doTemperatureScaling,
- ekind->tcstat,
- doParrinelloRahman,
- ir->nstpcouple * ir->delta_t,
- M);
-
- // Copy velocities D2H after update if:
- // - Globals are computed this step (includes the energy output steps).
- // - Temperature is needed for the next step.
- if (bGStat || needHalfStepKineticEnergy)
- {
- stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
- stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
- }
+ integrator->integrate(stateGpu->getLocalForcesReadyOnDeviceEvent(
+ runScheduleWork->stepWork, runScheduleWork->simulationWork),
+ ir->delta_t,
+ true,
+ bCalcVir,
+ shake_vir,
+ doTemperatureScaling,
+ ekind->tcstat,
+ doParrinelloRahman,
+ ir->nstpcouple * ir->delta_t,
+ M);
}
else
{
if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
{
- updatePrevStepPullCom(pull_work, state);
+ updatePrevStepPullCom(pull_work, state->pull_com_prev_step);
}
enerd->term[F_DVDL_CONSTR] += dvdl_constr;
// and when algorithms require it.
const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
- if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+ if (useGpuForUpdate)
{
- // Copy coordinates when needed to stop the CM motion.
- if (useGpuForUpdate && (bDoReplEx || (!EI_VV(ir->eI) && bStopCM)))
+ const bool coordinatesRequiredForStopCM =
+ bStopCM && (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+ && !EI_VV(ir->eI);
+
+ // Copy coordinates when needed to stop the CM motion or for replica exchange
+ if (coordinatesRequiredForStopCM || bDoReplEx)
{
stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
+
+ // Copy velocities back to the host if:
+ // - Globals are computed this step (includes the energy output steps).
+ // - Temperature is needed for the next step.
+ // - This is a replica exchange step (even though we will only need
+ // the velocities if an exchange succeeds)
+ if (bGStat || needHalfStepKineticEnergy || bDoReplEx)
+ {
+ stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
+ }
+ }
+
+ if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+ {
// Since we're already communicating at this step, we
// can propagate intra-simulation signals. Note that
// check_nstglobalcomm has the responsibility for
// TODO: The special case of removing CM motion should be dealt more gracefully
if (useGpuForUpdate)
{
+ // Issue #3988, #4106.
+ stateGpu->resetCoordinatesCopiedToDeviceEvent(AtomLocality::Local);
stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
// Here we block until the H2D copy completes because event sync with the
// force kernels that use the coordinates on the next steps is not implemented
accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
}
+ bool scaleCoordinates = !useGpuForUpdate || bDoReplEx;
update_pcouple_after_coordinates(fplog,
step,
ir,
state,
nrnb,
upd.deform(),
- !useGpuForUpdate);
+ scaleCoordinates);
const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
&& do_per_step(step, inputrec->nstpcouple));
md->tmass,
enerd,
ir->fepvals.get(),
- ir->expandedvals.get(),
lastbox,
PTCouplingArrays{ state->boxv,
state->nosehoover_xi,
md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
: gmx::ArrayRef<const unsigned short>(),
md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
- : gmx::ArrayRef<const unsigned short>());
+ : gmx::ArrayRef<const unsigned short>(),
+ md->cACC ? gmx::arrayRefFromArray(md->cACC, md->nr)
+ : gmx::ArrayRef<const unsigned short>());
fr->longRangeNonbondeds->updateAfterPartition(*md);
}