If update is offloaded:
The H2D copy of velocities is done:
1. At the search step after the device buffers were reinitialized.
The D2H copy is done:
1. In the beginning of the step on search steps (to back up before the
device buffers are reinitialized).
2. In the beginning of the velocity output step.
3. After update when globals are computed.
4. After update when temperature is needed for the next step.
The Local locality is used for the copies when update is offloaded in
anticipation of the multi GPU case.
The REMD simulations are now not supported when update is offloaded.
Change-Id: Ifbb9636cafba8980a4a781d942420c5c2c1bcdfd
const bool useGpuForPme = simulationWork.usePmeGpu;
const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
// Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
const bool useGpuForPme = simulationWork.usePmeGpu;
const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
// Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
- const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
- const bool useGpuForUpdate = simulationWork.useGpuUpdate;
+ const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
+ const bool useGpuForUpdate = simulationWork.useGpuUpdate;
+
+
+ StatePropagatorDataGpu *stateGpu = fr->stateGpu;
GMX_LOG(mdlog.info).asParagraph().
appendText("Updating coordinates on the GPU.");
}
GMX_LOG(mdlog.info).asParagraph().
appendText("Updating coordinates on the GPU.");
}
- integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global, fr->stateGpu->getUpdateStream(), fr->stateGpu->xUpdatedOnDevice());
+ integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
}
if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
}
if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
do_verbose = mdrunOptions.verbose &&
(step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
do_verbose = mdrunOptions.verbose &&
(step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
+ // Copy velocities from the GPU when needed:
+ // - On search steps to keep copy on host (device buffers are reinitialized).
+ // - When needed for the output.
+ if (useGpuForUpdate && !bFirstStep)
+ {
+ if (bNS || do_per_step(step, ir->nstvout))
+ {
+ stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ }
+ }
+
+
if (bNS && !(bFirstStep && ir->bContinuation))
{
bMasterState = FALSE;
if (bNS && !(bFirstStep && ir->bContinuation))
{
bMasterState = FALSE;
std::copy(state->x.begin(), state->x.end(), cbuf.begin());
}
std::copy(state->x.begin(), state->x.end(), cbuf.begin());
}
+ /* With leap-frog type integrators we compute the kinetic energy
+ * at a whole time step as the average of the half-time step kinetic
+ * energies of two subsequent steps. Therefore we need to compute the
+ * half step kinetic energy also if we need energies at the next step.
+ */
+ const bool needHalfStepKineticEnergy = (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm));
+
- StatePropagatorDataGpu *stateGpu = fr->stateGpu;
if (bNS)
{
integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(), stateGpu->getForces(),
if (bNS)
{
integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(), stateGpu->getForces(),
t_pbc pbc;
set_pbc(&pbc, epbcXYZ, state->box);
integrator->setPbc(&pbc);
t_pbc pbc;
set_pbc(&pbc, epbcXYZ, state->box);
integrator->setPbc(&pbc);
+
+ // Copy data to the GPU after buffers might have being reinitialized
+ stateGpu->copyVelocitiesToGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
}
stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::All);
}
stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::All);
- stateGpu->copyVelocitiesToGpu(state->v, StatePropagatorDataGpu::AtomLocality::All);
stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), StatePropagatorDataGpu::AtomLocality::All);
stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), StatePropagatorDataGpu::AtomLocality::All);
- bool doTempCouple = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
- bool doPressureCouple = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
-
// TODO: Use StepWorkload fields.
bool useGpuFBufferOps = simulationWork.useGpuBufferOps && !(bCalcVir || bCalcEner);
// TODO: Use StepWorkload fields.
bool useGpuFBufferOps = simulationWork.useGpuBufferOps && !(bCalcVir || bCalcEner);
+ bool doTempCouple = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
+ bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
+
// This applies Leap-Frog, LINCS and SETTLE in succession
integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(StatePropagatorDataGpu::AtomLocality::Local, useGpuFBufferOps),
ir->delta_t, true, bCalcVir, shake_vir,
doTempCouple, ekind->tcstat,
// This applies Leap-Frog, LINCS and SETTLE in succession
integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(StatePropagatorDataGpu::AtomLocality::Local, useGpuFBufferOps),
ir->delta_t, true, bCalcVir, shake_vir,
doTempCouple, ekind->tcstat,
- doPressureCouple, ir->nstpcouple*ir->delta_t, M);
+ doParrinelloRahman, ir->nstpcouple*ir->delta_t, M);
stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::All);
stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::All);
- stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::All);
+
+ // Copy velocities D2H after update if:
+ // - Globals are computed this step (includes the energy output steps).
+ // - Temperature is needed for the next step.
+ if (bGStat || needHalfStepKineticEnergy)
+ {
+ stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ }
// TODO: replace with stateGpu->waitForCopyCoordinatesFromGpu(...)
integrator->waitCoordinatesReadyOnDevice();
// TODO: replace with stateGpu->waitForCopyCoordinatesFromGpu(...)
integrator->waitCoordinatesReadyOnDevice();
// and when algorithms require it.
bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
// and when algorithms require it.
bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
- if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
+ if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
{
// Since we're already communicating at this step, we
// can propagate intra-simulation signals. Note that
{
// Since we're already communicating at this step, we
// can propagate intra-simulation signals. Note that
*mdAtoms,
doEssentialDynamics,
fcd->orires.nr != 0,
*mdAtoms,
doEssentialDynamics,
fcd->orires.nr != 0,
- fcd->disres.nsystems != 0);
+ fcd->disres.nsystems != 0,
+ replExParams.exchangeInterval > 0);
const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
false,
const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
false,
const MDAtoms &mdatoms,
bool useEssentialDynamics,
bool doOrientationRestraints,
const MDAtoms &mdatoms,
bool useEssentialDynamics,
bool doOrientationRestraints,
- bool doDistanceRestraints)
+ bool doDistanceRestraints,
+ bool useReplicaExchange)
{
if (updateTarget == TaskTarget::Cpu)
{
{
if (updateTarget == TaskTarget::Cpu)
{
{
errorMessage += "Free energy perturbations are not supported.\n";
}
{
errorMessage += "Free energy perturbations are not supported.\n";
}
+ if (useReplicaExchange)
+ {
+ errorMessage += "Replica exchange simulations are not supported.\n";
+ }
if (!errorMessage.empty())
{
if (updateTarget == TaskTarget::Gpu)
if (!errorMessage.empty())
{
if (updateTarget == TaskTarget::Gpu)
* \param[in] useEssentialDynamics If essential dynamics is active.
* \param[in] doOrientationRestraints If orientation restraints are enabled.
* \param[in] doDistanceRestraints If distance restraints are enabled.
* \param[in] useEssentialDynamics If essential dynamics is active.
* \param[in] doOrientationRestraints If orientation restraints are enabled.
* \param[in] doDistanceRestraints If distance restraints are enabled.
+ * \param[in] useReplicaExchange If this is a REMD simulation.
*
* \returns Whether complete simulation can be run on GPU.
* \throws std::bad_alloc If out of memory
*
* \returns Whether complete simulation can be run on GPU.
* \throws std::bad_alloc If out of memory
const MDAtoms &mdatoms,
bool useEssentialDynamics,
bool doOrientationRestraints,
const MDAtoms &mdatoms,
bool useEssentialDynamics,
bool doOrientationRestraints,
- bool doDistanceRestraints);
+ bool doDistanceRestraints,
+ bool useReplicaExchange);