using gmx::StepWorkload;
using gmx::DomainLifetimeWorkload;
using gmx::SimulationWorkload;
+using gmx::AtomLocality;
+using gmx::InteractionLocality;
// TODO: this environment variable allows us to verify before release
// that on less common architectures the total cost of polling is not larger than
const interaction_const_t *ic,
gmx_enerdata_t *enerd,
const StepWorkload &stepWork,
- const Nbnxm::InteractionLocality ilocality,
+ const InteractionLocality ilocality,
const int clearF,
const int64_t step,
t_nrnb *nrnb,
GpuTaskCompletion completionType = (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check;
isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv,
stepWork,
- Nbnxm::AtomLocality::Local,
+ AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
forceWithShiftForces.shiftForces(), completionType, wcycle);
if (isNbGpuDone)
{
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local,
forceWithShiftForces.force());
}
}
// 2. The buffers were reinitialized on search step
if (!simulationWork.useGpuUpdate || stepWork.doNeighborSearch)
{
- stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::Local);
}
}
}
#endif /* GMX_MPI */
- const auto localXReadyOnDevice = (stateGpu != nullptr) ? stateGpu->getCoordinatesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+ const auto localXReadyOnDevice = (stateGpu != nullptr) ? stateGpu->getCoordinatesReadyOnDeviceEvent(AtomLocality::Local,
simulationWork, stepWork) : nullptr;
if (useGpuPmeOnThisRank)
{
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+ nbv->constructPairlist(InteractionLocality::Local,
&top->excls, step, nrnb);
- nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::Local);
+ nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
wallcycle_stop(wcycle, ewcNS);
if (useGpuXBufOps == BufferOpsUseGpu::True)
{
GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded");
- nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::Local, false,
+ nbv->convertCoordinatesGpu(AtomLocality::Local, false,
stateGpu->getCoordinates(),
localXReadyOnDevice);
}
else
{
- nbv->convertCoordinates(Nbnxm::AtomLocality::Local, false,
+ nbv->convertCoordinates(AtomLocality::Local, false,
x.unpaddedArrayRef());
}
}
if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
{
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
- Nbnxm::AtomLocality::Local);
+ AtomLocality::Local);
}
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
// with X buffer ops offloaded to the GPU on all but the search steps
/* launch local nonbonded work on GPU */
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo,
step, nrnb, wcycle);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal,
+ nbv->constructPairlist(InteractionLocality::NonLocal,
&top->excls, step, nrnb);
- nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::NonLocal);
+ nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
wallcycle_stop(wcycle, ewcNS);
if (ddUsesGpuDirectCommunication)
if (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork)
{
//non-local part of coordinate buffer must be copied back to host for CPU work
- stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
}
}
else
// The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
if (!useGpuPmeOnThisRank && !ddUsesGpuDirectCommunication)
{
- stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
}
- nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::NonLocal, false,
+ nbv->convertCoordinatesGpu(AtomLocality::NonLocal, false,
stateGpu->getCoordinates(),
- stateGpu->getCoordinatesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+ stateGpu->getCoordinatesReadyOnDeviceEvent(AtomLocality::NonLocal,
simulationWork, stepWork));
}
else
{
- nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false,
+ nbv->convertCoordinates(AtomLocality::NonLocal, false,
x.unpaddedArrayRef());
}
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
- Nbnxm::AtomLocality::NonLocal);
+ AtomLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
}
/* launch non-local nonbonded tasks on GPU */
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo,
step, nrnb, wcycle);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (havePPDomainDecomposition(cr))
{
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
- stepWork, Nbnxm::AtomLocality::NonLocal);
+ stepWork, AtomLocality::NonLocal);
}
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
- stepWork, Nbnxm::AtomLocality::Local);
+ stepWork, AtomLocality::Local);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (domainWork.haveGpuBondedWork && stepWork.computeEnergy)
if (!useOrEmulateGpuNb)
{
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFYes,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFYes,
step, nrnb, wcycle);
}
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
+ nbv->dispatchFreeEnergyKernel(InteractionLocality::Local,
fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
inputrec->fepvals, lambda.data(),
enerd, stepWork, nrnb);
if (havePPDomainDecomposition(cr))
{
- nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
+ nbv->dispatchFreeEnergyKernel(InteractionLocality::NonLocal,
fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
inputrec->fepvals, lambda.data(),
enerd, stepWork, nrnb);
{
if (havePPDomainDecomposition(cr))
{
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo,
step, nrnb, wcycle);
}
* communication with calculation with domain decomposition.
*/
wallcycle_stop(wcycle, ewcFORCE);
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::All, forceOut.forceWithShiftForces().force());
wallcycle_start_nocount(wcycle, ewcFORCE);
}
if (simulationWork.useGpuNonbonded)
{
cycles_wait_gpu += Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
- stepWork, Nbnxm::AtomLocality::NonLocal,
+ stepWork, AtomLocality::NonLocal,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
forceWithShiftForces.shiftForces(),
else
{
wallcycle_start_nocount(wcycle, ewcFORCE);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes,
step, nrnb, wcycle);
wallcycle_stop(wcycle, ewcFORCE);
}
if (haveNonLocalForceContribInCpuBuffer)
{
- stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+ stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::NonLocal);
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal,
useGpuFBufOps == BufferOpsUseGpu::True));
}
- nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
+ nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::NonLocal,
stateGpu->getForces(),
pme_gpu_get_device_f(fr->pmedata),
dependencyList,
if (!useGpuForcesHaloExchange)
{
// copy from GPU input for dd_move_f()
- stateGpu->copyForcesFromGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->copyForcesFromGpu(forceOut.forceWithShiftForces().force(), AtomLocality::NonLocal);
}
}
else
{
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::NonLocal,
forceWithShiftForces.force());
}
{
if (haveCpuLocalForces)
{
- stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local);
}
gpuHaloExchange->communicateHaloForces(haveCpuLocalForces);
}
{
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
- stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->waitForcesReadyOnHost(AtomLocality::NonLocal);
}
dd_move_f(cr->dd, &forceOut.forceWithShiftForces(), wcycle);
}
const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */
const float waitCycles =
Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
- stepWork, Nbnxm::AtomLocality::Local,
+ stepWork, AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
forceOut.forceWithShiftForces().shiftForces(),
// NOTE: emulation kernel is not included in the balancing region,
// but emulation mode does not target performance anyway
wallcycle_start_nocount(wcycle, ewcFORCE);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local,
DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
step, nrnb, wcycle);
wallcycle_stop(wcycle, ewcFORCE);
// These should be unified.
if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange)
{
- stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+ stateGpu->copyForcesToGpu(forceWithShift, AtomLocality::Local);
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::Local,
useGpuFBufOps == BufferOpsUseGpu::True));
}
if (useGpuForcesHaloExchange)
// push the event into the dependencyList
nbv->stream_local_wait_for_nonlocal();
}
- nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::Local,
+ nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::Local,
stateGpu->getForces(),
pmeForcePtr,
dependencyList,
stepWork.useGpuPmeFReduction, haveLocalForceContribInCpuBuffer);
- stateGpu->copyForcesFromGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local);
+ stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
}
else
{
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, forceWithShift);
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceWithShift);
}
}