do_verbose = mdrunOptions.verbose
&& (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
- if (useGpuForUpdate && !bFirstStep && bNS)
+ // On search steps, when doing the update on the GPU, copy
+ // the coordinates and velocities to the host unless they are
+ // already there (ie on the first step and after replica
+ // exchange).
+ if (useGpuForUpdate && bNS && !bFirstStep && !bExchanged)
{
- // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
- stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
- // Copy coordinate from the GPU when needed at the search step.
- // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
- // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
if (correct_box(fplog, step, state->box))
{
bMasterState = TRUE;
- // If update is offloaded, it should be informed about the box size change
- if (useGpuForUpdate)
- {
- integrator->setPbc(PbcType::Xyz, state->box);
- }
}
}
+ // If update is offloaded, and the box was changed either
+ // above or in a replica exchange on the previous step,
+ // the GPU Update object should be informed
+ if (useGpuForUpdate && (bMasterState || bExchanged))
+ {
+ integrator->setPbc(PbcType::Xyz, state->box);
+ }
if (haveDDAtomOrdering(*cr) && bMasterState)
{
dd_collect_state(cr->dd, state, state_global);
{
if (useGpuForUpdate)
{
- if (bNS && (bFirstStep || haveDDAtomOrdering(*cr)))
+ // On search steps, update handles to device vectors
+ if (bNS && (bFirstStep || haveDDAtomOrdering(*cr) || bExchanged))
{
integrator->set(stateGpu->getCoordinates(),
stateGpu->getVelocities(),
/* The velocity copy is redundant if we had Center-of-Mass motion removed on
* the previous step. We don't check that now. */
stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
- if (!runScheduleWork->stepWork.haveGpuPmeOnThisRank
- && !runScheduleWork->stepWork.useGpuXBufferOps)
+ if (bExchanged
+ || (!runScheduleWork->stepWork.haveGpuPmeOnThisRank
+ && !runScheduleWork->stepWork.useGpuXBufferOps))
{
stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
}
doParrinelloRahman,
ir->nstpcouple * ir->delta_t,
M);
-
- // Copy velocities D2H after update if:
- // - Globals are computed this step (includes the energy output steps).
- // - Temperature is needed for the next step.
- if (bGStat || needHalfStepKineticEnergy)
- {
- stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
- stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
- }
}
else
{
if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
{
- updatePrevStepPullCom(pull_work, state);
+ updatePrevStepPullCom(pull_work, state->pull_com_prev_step);
}
enerd->term[F_DVDL_CONSTR] += dvdl_constr;
// and when algorithms require it.
const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
- if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+ if (useGpuForUpdate)
{
- // Copy coordinates when needed to stop the CM motion.
- if (useGpuForUpdate && (bDoReplEx || (!EI_VV(ir->eI) && bStopCM)))
+ const bool coordinatesRequiredForStopCM =
+ bStopCM && (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+ && !EI_VV(ir->eI);
+
+ // Copy coordinates when needed to stop the CM motion or for replica exchange
+ if (coordinatesRequiredForStopCM || bDoReplEx)
{
stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
+
+ // Copy velocities back to the host if:
+ // - Globals are computed this step (includes the energy output steps).
+ // - Temperature is needed for the next step.
+ // - This is a replica exchange step (even though we will only need
+ // the velocities if an exchange succeeds)
+ if (bGStat || needHalfStepKineticEnergy || bDoReplEx)
+ {
+ stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
+ }
+ }
+
+ if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+ {
// Since we're already communicating at this step, we
// can propagate intra-simulation signals. Note that
// check_nstglobalcomm has the responsibility for
// TODO: The special case of removing CM motion should be dealt more gracefully
if (useGpuForUpdate)
{
+ // Issue #3988, #4106.
+ stateGpu->resetCoordinatesCopiedToDeviceEvent(AtomLocality::Local);
stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
// Here we block until the H2D copy completes because event sync with the
// force kernels that use the coordinates on the next steps is not implemented
accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
}
+ bool scaleCoordinates = !useGpuForUpdate || bDoReplEx;
update_pcouple_after_coordinates(fplog,
step,
ir,
state,
nrnb,
upd.deform(),
- !useGpuForUpdate);
+ scaleCoordinates);
const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
&& do_per_step(step, inputrec->nstpcouple));
md->tmass,
enerd,
ir->fepvals.get(),
- ir->expandedvals.get(),
lastbox,
PTCouplingArrays{ state->boxv,
state->nosehoover_xi,