if (bPMETune && bNStList)
{
+ // This has to be here because PME load balancing is called so early.
+ // TODO: Move to after all booleans are defined.
+ if (useGpuForUpdate && !bFirstStep)
+ {
+ stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ }
/* PME grid + cut-off optimization with GPUs or PME nodes */
pme_loadbal_do(pme_loadbal, cr,
(mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
do_verbose = mdrunOptions.verbose &&
(step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
- // Copy velocities from the GPU when needed:
- // - On search steps to keep copy on host (device buffers are reinitialized).
- // - When needed for the output.
if (useGpuForUpdate && !bFirstStep)
{
+ // Copy velocities from the GPU when needed:
+ // - On search steps to keep copy on host (device buffers are reinitialized).
+ // - When needed for the output.
if (bNS || do_per_step(step, ir->nstvout))
{
stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
}
- }
+ // Copy coordinate from the GPU when needed:
+ // - On search steps to keep copy on host (device buffers are reinitialized).
+ // - There are CPU bonded forces that need current coordinates
+ // - When needed for the output.
+ if (bNS ||
+ (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) ||
+ do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed))
+ {
+ stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ }
+ }
if (bNS && !(bFirstStep && ir->bContinuation))
{
// Copy data to the GPU after buffers might have being reinitialized
stateGpu->copyVelocitiesToGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
}
- stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::All);
stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), StatePropagatorDataGpu::AtomLocality::All);
// TODO: Use StepWorkload fields.
ir->delta_t, true, bCalcVir, shake_vir,
doTempCouple, ekind->tcstat,
doParrinelloRahman, ir->nstpcouple*ir->delta_t, M);
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::All);
// Copy velocities D2H after update if:
// - Globals are computed this step (includes the energy output steps).
{
stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
}
- // TODO: replace with stateGpu->waitForCopyCoordinatesFromGpu(...)
- integrator->waitCoordinatesReadyOnDevice();
}
else
{
{
process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
+
+ // TODO: The special case of removing CM motion should be dealt more gracefully
+ if (useGpuForUpdate)
+ {
+ stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitCoordinatesCopiedToDevice(StatePropagatorDataGpu::AtomLocality::Local);
+ }
}
}
}