&& !(simulationWork.computeNonbondedAtMtsLevel1 && !computeSlowForces);
flags.computeDhdl = ((legacyFlags & GMX_FORCE_DHDL) != 0);
- if (simulationWork.useGpuBufferOps)
+ if (simulationWork.useGpuXBufferOps || simulationWork.useGpuFBufferOps)
{
GMX_ASSERT(simulationWork.useGpuNonbonded,
"Can only offload buffer ops if nonbonded computation is also offloaded");
}
- flags.useGpuXBufferOps = simulationWork.useGpuBufferOps;
+ flags.useGpuXBufferOps = simulationWork.useGpuXBufferOps && !flags.doNeighborSearch;
// on virial steps the CPU reduction path is taken
- flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial;
+ flags.useGpuFBufferOps = simulationWork.useGpuFBufferOps && !flags.computeVirial;
const bool rankHasGpuPmeTask = simulationWork.useGpuPme && !simulationWork.haveSeparatePmeRank;
flags.useGpuPmeFReduction = flags.computeSlowForces && flags.useGpuFBufferOps
&& (rankHasGpuPmeTask || simulationWork.useGpuPmePpCommunication);
const bool reinitGpuPmePpComms =
simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
- auto* localXReadyOnDevice = (stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuBufferOps)
+ auto* localXReadyOnDevice = (stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuXBufferOps)
? stateGpu->getCoordinatesReadyOnDeviceEvent(
AtomLocality::Local, simulationWork, stepWork)
: nullptr;
haveCopiedXFromGpu = true;
}
+ if (stepWork.doNeighborSearch
+ && (stepWork.haveGpuPmeOnThisRank || simulationWork.useGpuXBufferOps || simulationWork.useGpuFBufferOps))
+ {
+ // TODO refactor this to do_md, after partitioning.
+ stateGpu->reinit(mdatoms->homenr,
+ getLocalAtomCount(cr->dd, *mdatoms, simulationWork.havePpDomainDecomposition));
+ if (stepWork.haveGpuPmeOnThisRank)
+ {
+ // TODO: This should be moved into PME setup function ( pme_gpu_prepare_computation(...) )
+ pme_gpu_set_device_x(fr->pmedata, stateGpu->getCoordinates());
+ }
+ }
+
// Coordinates on the device are needed if PME or BufferOps are offloaded.
// The local coordinates can be copied right away.
// NOTE: Consider moving this copy to right after they are updated and constrained,
// if the later is not offloaded.
if (stepWork.haveGpuPmeOnThisRank || stepWork.useGpuXBufferOps)
{
- if (stepWork.doNeighborSearch)
- {
- // TODO refactor this to do_md, after partitioning.
- stateGpu->reinit(mdatoms->homenr,
- getLocalAtomCount(cr->dd, *mdatoms, simulationWork.havePpDomainDecomposition));
- if (stepWork.haveGpuPmeOnThisRank)
- {
- // TODO: This should be moved into PME setup function ( pme_gpu_prepare_computation(...) )
- pme_gpu_set_device_x(fr->pmedata, stateGpu->getCoordinates());
- }
- }
// We need to copy coordinates when:
// 1. Update is not offloaded
// 2. The buffers were reinitialized on search step
wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchLocal);
wallcycle_stop(wcycle, WallCycleCounter::NS);
- if (stepWork.useGpuXBufferOps)
+ if (simulationWork.useGpuXBufferOps)
{
nbv->atomdata_init_copy_x_to_nbat_x_gpu();
}
- if (simulationWork.useGpuBufferOps)
+ if (simulationWork.useGpuFBufferOps)
{
setupLocalGpuForceReduction(runScheduleWork,
fr->nbv.get(),
wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
- if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
+ if (!stepWork.useGpuXBufferOps)
{
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local);
}
if (simulationWork.useGpuNonbonded)
{
- if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
+ if (!stepWork.useGpuXBufferOps)
{
wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);