pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags, useGpuPmeForceReduction);
if (!pme_pp->useGpuDirectComm)
{
- stateGpu->copyCoordinatesToGpu(gmx::ArrayRef<gmx::RVec>(pme_pp->x), gmx::StatePropagatorDataGpu::AtomLocality::All);
+ stateGpu->copyCoordinatesToGpu(gmx::ArrayRef<gmx::RVec>(pme_pp->x), gmx::AtomLocality::All);
}
// On the separate PME rank we do not need a synchronizer as we schedule everything in a single stream
// TODO: with pme on GPU the receive should make a list of synchronizers and pass it here #3157
gmx_pme_reinit_atoms(pme, atomCount, charges.data());
stateGpu->reinit(atomCount, atomCount);
- stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()), gmx::StatePropagatorDataGpu::AtomLocality::All);
+ stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()), gmx::AtomLocality::All);
pme_gpu_set_kernelparam_coordinates(pme->gpu, stateGpu->getCoordinates());
break;
if (useGpuForBonded)
{
auto stream = DOMAINDECOMP(cr) ?
- Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal) :
- Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::Local);
+ Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal) :
+ Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, gmx::InteractionLocality::Local);
// TODO the heap allocation is only needed while
// t_forcerec lacks a constructor.
fr->gpuBonded = new gmx::GpuBonded(mtop->ffparams,
using gmx::StepWorkload;
using gmx::DomainLifetimeWorkload;
using gmx::SimulationWorkload;
+using gmx::AtomLocality;
+using gmx::InteractionLocality;
// TODO: this environment variable allows us to verify before release
// that on less common architectures the total cost of polling is not larger than
const interaction_const_t *ic,
gmx_enerdata_t *enerd,
const StepWorkload &stepWork,
- const Nbnxm::InteractionLocality ilocality,
+ const InteractionLocality ilocality,
const int clearF,
const int64_t step,
t_nrnb *nrnb,
GpuTaskCompletion completionType = (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check;
isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv,
stepWork,
- Nbnxm::AtomLocality::Local,
+ AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
forceWithShiftForces.shiftForces(), completionType, wcycle);
if (isNbGpuDone)
{
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local,
forceWithShiftForces.force());
}
}
// 2. The buffers were reinitialized on search step
if (!simulationWork.useGpuUpdate || stepWork.doNeighborSearch)
{
- stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::Local);
}
}
}
#endif /* GMX_MPI */
- const auto localXReadyOnDevice = (stateGpu != nullptr) ? stateGpu->getCoordinatesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+ const auto localXReadyOnDevice = (stateGpu != nullptr) ? stateGpu->getCoordinatesReadyOnDeviceEvent(AtomLocality::Local,
simulationWork, stepWork) : nullptr;
if (useGpuPmeOnThisRank)
{
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+ nbv->constructPairlist(InteractionLocality::Local,
&top->excls, step, nrnb);
- nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::Local);
+ nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
wallcycle_stop(wcycle, ewcNS);
if (useGpuXBufOps == BufferOpsUseGpu::True)
{
GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded");
- nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::Local, false,
+ nbv->convertCoordinatesGpu(AtomLocality::Local, false,
stateGpu->getCoordinates(),
localXReadyOnDevice);
}
else
{
- nbv->convertCoordinates(Nbnxm::AtomLocality::Local, false,
+ nbv->convertCoordinates(AtomLocality::Local, false,
x.unpaddedArrayRef());
}
}
if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
{
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
- Nbnxm::AtomLocality::Local);
+ AtomLocality::Local);
}
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
// with X buffer ops offloaded to the GPU on all but the search steps
/* launch local nonbonded work on GPU */
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo,
step, nrnb, wcycle);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal,
+ nbv->constructPairlist(InteractionLocality::NonLocal,
&top->excls, step, nrnb);
- nbv->setupGpuShortRangeWork(fr->gpuBonded, Nbnxm::InteractionLocality::NonLocal);
+ nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
wallcycle_stop(wcycle, ewcNS);
if (ddUsesGpuDirectCommunication)
if (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork)
{
//non-local part of coordinate buffer must be copied back to host for CPU work
- stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
}
}
else
// The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
if (!useGpuPmeOnThisRank && !ddUsesGpuDirectCommunication)
{
- stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
}
- nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::NonLocal, false,
+ nbv->convertCoordinatesGpu(AtomLocality::NonLocal, false,
stateGpu->getCoordinates(),
- stateGpu->getCoordinatesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+ stateGpu->getCoordinatesReadyOnDeviceEvent(AtomLocality::NonLocal,
simulationWork, stepWork));
}
else
{
- nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false,
+ nbv->convertCoordinates(AtomLocality::NonLocal, false,
x.unpaddedArrayRef());
}
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
- Nbnxm::AtomLocality::NonLocal);
+ AtomLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
}
/* launch non-local nonbonded tasks on GPU */
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo,
step, nrnb, wcycle);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (havePPDomainDecomposition(cr))
{
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
- stepWork, Nbnxm::AtomLocality::NonLocal);
+ stepWork, AtomLocality::NonLocal);
}
Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
- stepWork, Nbnxm::AtomLocality::Local);
+ stepWork, AtomLocality::Local);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
if (domainWork.haveGpuBondedWork && stepWork.computeEnergy)
if (!useOrEmulateGpuNb)
{
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFYes,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFYes,
step, nrnb, wcycle);
}
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
+ nbv->dispatchFreeEnergyKernel(InteractionLocality::Local,
fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
inputrec->fepvals, lambda.data(),
enerd, stepWork, nrnb);
if (havePPDomainDecomposition(cr))
{
- nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
+ nbv->dispatchFreeEnergyKernel(InteractionLocality::NonLocal,
fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
inputrec->fepvals, lambda.data(),
enerd, stepWork, nrnb);
{
if (havePPDomainDecomposition(cr))
{
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo,
step, nrnb, wcycle);
}
* communication with calculation with domain decomposition.
*/
wallcycle_stop(wcycle, ewcFORCE);
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::All, forceOut.forceWithShiftForces().force());
wallcycle_start_nocount(wcycle, ewcFORCE);
}
if (simulationWork.useGpuNonbonded)
{
cycles_wait_gpu += Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
- stepWork, Nbnxm::AtomLocality::NonLocal,
+ stepWork, AtomLocality::NonLocal,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
forceWithShiftForces.shiftForces(),
else
{
wallcycle_start_nocount(wcycle, ewcFORCE);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes,
step, nrnb, wcycle);
wallcycle_stop(wcycle, ewcFORCE);
}
if (haveNonLocalForceContribInCpuBuffer)
{
- stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+ stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::NonLocal);
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal,
useGpuFBufOps == BufferOpsUseGpu::True));
}
- nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
+ nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::NonLocal,
stateGpu->getForces(),
pme_gpu_get_device_f(fr->pmedata),
dependencyList,
if (!useGpuForcesHaloExchange)
{
// copy from GPU input for dd_move_f()
- stateGpu->copyForcesFromGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->copyForcesFromGpu(forceOut.forceWithShiftForces().force(), AtomLocality::NonLocal);
}
}
else
{
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::NonLocal,
forceWithShiftForces.force());
}
{
if (haveCpuLocalForces)
{
- stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local);
}
gpuHaloExchange->communicateHaloForces(haveCpuLocalForces);
}
{
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
- stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
+ stateGpu->waitForcesReadyOnHost(AtomLocality::NonLocal);
}
dd_move_f(cr->dd, &forceOut.forceWithShiftForces(), wcycle);
}
const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */
const float waitCycles =
Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
- stepWork, Nbnxm::AtomLocality::Local,
+ stepWork, AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
forceOut.forceWithShiftForces().shiftForces(),
// NOTE: emulation kernel is not included in the balancing region,
// but emulation mode does not target performance anyway
wallcycle_start_nocount(wcycle, ewcFORCE);
- do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local,
+ do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local,
DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
step, nrnb, wcycle);
wallcycle_stop(wcycle, ewcFORCE);
// These should be unified.
if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange)
{
- stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+ stateGpu->copyForcesToGpu(forceWithShift, AtomLocality::Local);
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::Local,
useGpuFBufOps == BufferOpsUseGpu::True));
}
if (useGpuForcesHaloExchange)
// push the event into the dependencyList
nbv->stream_local_wait_for_nonlocal();
}
- nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::Local,
+ nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::Local,
stateGpu->getForces(),
pmeForcePtr,
dependencyList,
stepWork.useGpuPmeFReduction, haveLocalForceContribInCpuBuffer);
- stateGpu->copyForcesFromGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local);
+ stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
}
else
{
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, forceWithShift);
+ nbv->atomdata_add_nbat_f_to_f(AtomLocality::Local, forceWithShift);
}
}
// TODO: Move to after all booleans are defined.
if (useGpuForUpdate && !bFirstStep)
{
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
/* PME grid + cut-off optimization with GPUs or PME nodes */
pme_loadbal_do(pme_loadbal, cr,
// - When needed for the output.
if (bNS || do_per_step(step, ir->nstvout))
{
- stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
}
// Copy coordinate from the GPU when needed:
(runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) ||
do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed))
{
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
}
integrator->setPbc(&pbc);
// Copy data to the GPU after buffers might have being reinitialized
- stateGpu->copyVelocitiesToGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
}
- stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), StatePropagatorDataGpu::AtomLocality::All);
+ stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::All);
// TODO: Use StepWorkload fields.
bool useGpuFBufferOps = simulationWork.useGpuBufferOps && !(bCalcVir || bCalcEner);
bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
// This applies Leap-Frog, LINCS and SETTLE in succession
- integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(StatePropagatorDataGpu::AtomLocality::Local, useGpuFBufferOps),
+ integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::Local, useGpuFBufferOps),
ir->delta_t, true, bCalcVir, shake_vir,
doTempCouple, ekind->tcstat,
doParrinelloRahman, ir->nstpcouple*ir->delta_t, M);
// - Temperature is needed for the next step.
if (bGStat || needHalfStepKineticEnergy)
{
- stateGpu->copyVelocitiesFromGpu(state->v, StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitVelocitiesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitCoordinatesReadyOnHost(StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
+ stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
+ stateGpu->copyCoordinatesFromGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
-
}
else
{
// TODO: The special case of removing CM motion should be dealt more gracefully
if (useGpuForUpdate)
{
- stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), StatePropagatorDataGpu::AtomLocality::Local);
- stateGpu->waitCoordinatesCopiedToDevice(StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->copyCoordinatesToGpu(ArrayRef<RVec>(state->x), AtomLocality::Local);
+ stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
}
}
}
if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
{
GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
- void *streamLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::Local);
- void *streamNonLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
+ void *streamLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
+ void *streamNonLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
"NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || devFlags.enableGpuBufferOps))
{
const void *pmeStream = pme_gpu_get_device_stream(fr->pmedata);
- const void *localStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::Local) : nullptr;
- const void *nonLocalStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal) : nullptr;
+ const void *localStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local) : nullptr;
+ const void *nonLocalStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal) : nullptr;
const void *deviceContext = pme_gpu_get_device_context(fr->pmedata);
const int paddingSize = pme_gpu_get_padding_size(fr->pmedata);
GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator) ? GpuApiCallBehavior::Async : GpuApiCallBehavior::Sync;
/* TODO: Avoid updating all atoms at every bNS step */
fr->nbv->setAtomProperties(*mdatoms, fr->cginfo);
- fr->nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+ fr->nbv->constructPairlist(InteractionLocality::Local,
&top.excls, step, nrnb);
bNS = FALSE;
}
/* Note: NonLocal refers to the inserted molecule */
- fr->nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false, x);
+ fr->nbv->convertCoordinates(AtomLocality::NonLocal, false, x);
/* Clear some matrix variables */
clear_mat(force_vir);
*/
/*! \libinternal \file
- * \brief Defines nbnxn locality enums
+ * \brief Defines atom and atom interaction locality enums
*
* \author Berk Hess <hess@kth.se>
- * \ingroup module_nbnxm
+ * \ingroup module_mdtypes
*/
-#ifndef GMX_NBNXM_LOCALITY_H
-#define GMX_NBNXM_LOCALITY_H
+#ifndef GMX_MDTYPES_LOCALITY_H
+#define GMX_MDTYPES_LOCALITY_H
-namespace Nbnxm
+#include "gromacs/utility/enumerationhelpers.h"
+
+namespace gmx
{
/*! \brief Atom locality indicator: local, non-local, all.
Count = 3 //!< The number of atom locality types
};
+/*! \brief Descriptive strings for atom localities */
+static const EnumerationArray<AtomLocality, const char *> c_atomLocalityNames = { "local", "non-local", "all" };
+
/*! \brief Interaction locality indicator: local, non-local, all.
*
* Used for calls to:
Count = 2 //!< The number of interaction locality types
};
-} // namespace Nbnxm
+/*! \brief Descriptive strings for interaction localities */
+static const EnumerationArray<InteractionLocality, const char *> c_interactionLocalityNames = { "local", "non-local" };
+
+} // namespace gmx
-#endif // GMX_NBNXM_LOCALITY_H
+#endif // GMX_MDTYPES_LOCALITY_H
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
+#include "locality.h"
+
class GpuEventSynchronizer;
namespace gmx
{
public:
- /*! \brief Atom locality indicator: local, non-local, all.
- *
- * \todo This should be managed by a separate object, since the localities
- * are used here and in buffer ops.
- */
- enum class AtomLocality : int
- {
- Local = 0, //!< Local atoms
- NonLocal = 1, //!< Non-local atoms
- All = 2, //!< Both local and non-local atoms
- Count = 3 //!< The number of atom locality types
- };
-
/*! \brief Constructor
*
* The buffers are reallocated only at the reinit call, the padding is
// This is slightly different from nbnxn_get_atom_range(...) at the end of the file
// TODO: Combine if possible
static void getAtomRanges(const Nbnxm::GridSet &gridSet,
- const Nbnxm::AtomLocality locality,
+ const gmx::AtomLocality locality,
int *gridBegin,
int *gridEnd)
{
switch (locality)
{
- case Nbnxm::AtomLocality::All:
+ case gmx::AtomLocality::All:
*gridBegin = 0;
*gridEnd = gridSet.grids().size();
break;
- case Nbnxm::AtomLocality::Local:
+ case gmx::AtomLocality::Local:
*gridBegin = 0;
*gridEnd = 1;
break;
- case Nbnxm::AtomLocality::NonLocal:
+ case gmx::AtomLocality::NonLocal:
*gridBegin = 1;
*gridEnd = gridSet.grids().size();
break;
- case Nbnxm::AtomLocality::Count:
+ case gmx::AtomLocality::Count:
GMX_ASSERT(false, "Count is invalid locality specifier");
break;
}
/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
- const Nbnxm::AtomLocality locality,
+ const gmx::AtomLocality locality,
bool fillLocal,
const rvec *coordinates,
nbnxn_atomdata_t *nbat)
/* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet,
- const Nbnxm::AtomLocality locality,
+ const gmx::AtomLocality locality,
bool fillLocal,
gmx_nbnxn_gpu_t *gpu_nbv,
DeviceBuffer<float> d_x,
/* Add the force array(s) from nbnxn_atomdata_t to f */
void reduceForces(nbnxn_atomdata_t *nbat,
- const Nbnxm::AtomLocality locality,
+ const gmx::AtomLocality locality,
const Nbnxm::GridSet &gridSet,
rvec *f)
{
if (nbat->out.size() > 1)
{
- if (locality != Nbnxm::AtomLocality::All)
+ if (locality != gmx::AtomLocality::All)
{
gmx_incons("add_f_to_f called with nout>1 and locality!=eatAll");
}
}
/* Add the force array(s) from nbnxn_atomdata_t to f */
-void reduceForcesGpu(const Nbnxm::AtomLocality locality,
+void reduceForcesGpu(const gmx::AtomLocality locality,
DeviceBuffer<float> totalForcesDevice,
const Nbnxm::GridSet &gridSet,
void *pmeForcesDevice,
}
}
-void nbnxn_get_atom_range(const Nbnxm::AtomLocality atomLocality,
+void nbnxn_get_atom_range(const gmx::AtomLocality atomLocality,
const Nbnxm::GridSet &gridSet,
int *atomStart,
int *nAtoms)
switch (atomLocality)
{
- case Nbnxm::AtomLocality::All:
+ case gmx::AtomLocality::All:
*atomStart = 0;
*nAtoms = gridSet.numRealAtomsTotal();
break;
- case Nbnxm::AtomLocality::Local:
+ case gmx::AtomLocality::Local:
*atomStart = 0;
*nAtoms = gridSet.numRealAtomsLocal();
break;
- case Nbnxm::AtomLocality::NonLocal:
+ case gmx::AtomLocality::NonLocal:
*atomStart = gridSet.numRealAtomsLocal();
*nAtoms = gridSet.numRealAtomsTotal() - gridSet.numRealAtomsLocal();
break;
- case Nbnxm::AtomLocality::Count:
+ case gmx::AtomLocality::Count:
GMX_ASSERT(false, "Count is invalid locality specifier");
break;
}
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/bitmask.h"
#include "gromacs/utility/real.h"
#include "gpu_types.h"
-#include "locality.h"
namespace gmx
{
* \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer.
*/
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
- Nbnxm::AtomLocality locality,
+ gmx::AtomLocality locality,
bool fillLocal,
const rvec *coordinates,
nbnxn_atomdata_t *nbat);
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
*/
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet,
- Nbnxm::AtomLocality locality,
+ gmx::AtomLocality locality,
bool fillLocal,
gmx_nbnxn_gpu_t *gpu_nbv,
DeviceBuffer<float> d_x,
* \param[out] totalForce Buffer to accumulate resulting force
*/
void reduceForces(nbnxn_atomdata_t *nbat,
- Nbnxm::AtomLocality locality,
+ gmx::AtomLocality locality,
const Nbnxm::GridSet &gridSet,
rvec *totalForce);
* \param[in] useGpuFPmeReduction Whether PME forces should be added.
* \param[in] accumulateForce Whether there are usefull data already in the total force buffer.
*/
-void reduceForcesGpu(Nbnxm::AtomLocality locality,
+void reduceForcesGpu(gmx::AtomLocality locality,
DeviceBuffer<float> totalForcesDevice,
const Nbnxm::GridSet &gridSet,
void *pmeForcesDevice,
gmx::ArrayRef<gmx::RVec> fshift);
/* Get the atom start index and number of atoms for a given locality */
-void nbnxn_get_atom_range(Nbnxm::AtomLocality atomLocality,
+void nbnxn_get_atom_range(gmx::AtomLocality atomLocality,
const Nbnxm::GridSet &gridSet,
int *atomStart,
int *nAtoms);
atomInfo, system.coordinates,
0, nullptr);
- nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+ nbv->constructPairlist(gmx::InteractionLocality::Local,
&system.excls, 0, &nrnb);
t_mdatoms mdatoms;
// Run pre-iteration to avoid cache misses
for (int iter = 0; iter < options.numPreIterations; iter++)
{
- nbv->dispatchNonbondedKernel(InteractionLocality::Local,
+ nbv->dispatchNonbondedKernel(gmx::InteractionLocality::Local,
ic, stepWork, enbvClearFYes, system.forceRec,
&enerd,
&nrnb);
}
const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
- const PairlistSet &pairlistSet = nbv->pairlistSets().pairlistSet(InteractionLocality::Local);
+ const PairlistSet &pairlistSet = nbv->pairlistSets().pairlistSet(gmx::InteractionLocality::Local);
const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
gmx_cycles_t cycles = gmx_cycles_read();
for (int iter = 0; iter < numIterations; iter++)
{
// Run the kernel without force clearing
- nbv->dispatchNonbondedKernel(InteractionLocality::Local,
+ nbv->dispatchNonbondedKernel(gmx::InteractionLocality::Local,
ic, stepWork, enbvClearFNo, system.forceRec,
&enerd,
&nrnb);
//NOLINTNEXTLINE(misc-definitions-in-headers)
void setupGpuShortRangeWork(gmx_nbnxn_gpu_t *nb,
const gmx::GpuBonded *gpuBonded,
- const Nbnxm::InteractionLocality iLocality)
+ const gmx::InteractionLocality iLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
*/
static bool
haveGpuShortRangeWork(const gmx_nbnxn_gpu_t &nb,
- const Nbnxm::InteractionLocality iLocality)
+ const gmx::InteractionLocality iLocality)
{
return nb.haveWork[iLocality];
}
//NOLINTNEXTLINE(misc-definitions-in-headers)
bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t *nb,
- const Nbnxm::AtomLocality aLocality)
+ const gmx::AtomLocality aLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
}
}
-//TODO: move into shared source file with gmx_compile_cpp_as_cuda
+/*! \brief Attempts to complete nonbonded GPU task.
+ *
+ * See documentation in nbnxm_gpu.h for details.
+ *
+ * \todo Move into shared source file with gmx_compile_cpp_as_cuda
+ */
//NOLINTNEXTLINE(misc-definitions-in-headers)
bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb,
const gmx::StepWorkload &stepWork,
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/mdtypes/interaction_const.h"
+#include "gromacs/mdtypes/locality.h"
#include "gpu_types.h"
-#include "locality.h"
struct NbnxnPairlistGpu;
struct nbnxn_atomdata_t;
GPU_FUNC_QUALIFIER
void gpu_init_pairlist(gmx_nbnxn_gpu_t gmx_unused *nb,
const struct NbnxnPairlistGpu gmx_unused *h_nblist,
- InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
+ gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/** Initializes atom-data on the GPU, called at every pair search step. */
GPU_FUNC_QUALIFIER
* Note: CUDA only.
*/
CUDA_FUNC_QUALIFIER
-void *gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused *nb,
- InteractionLocality gmx_unused iloc) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void *gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused *nb,
+ gmx::InteractionLocality gmx_unused iloc) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
/** Returns an opaque pointer to the GPU coordinate+charge array
* Note: CUDA only.
#include "config.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/enumerationhelpers.h"
-#include "locality.h"
#include "pairlist.h"
#if GMX_GPU == GMX_GPU_OPENCL
namespace Nbnxm
{
+using gmx::AtomLocality;
+using gmx::InteractionLocality;
+
/*! \internal
* \brief GPU region timers used for timing GPU kernels and H2D/D2H transfers.
*
}
void
-nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
+nonbonded_verlet_t::dispatchNonbondedKernel(gmx::InteractionLocality iLocality,
const interaction_const_t &ic,
const gmx::StepWorkload &stepWork,
int clearF,
}
void
-nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
+nonbonded_verlet_t::dispatchFreeEnergyKernel(gmx::InteractionLocality iLocality,
const t_forcerec *fr,
rvec x[],
gmx::ForceWithShiftForces *forceWithShiftForces,
nbnxn_atomdata_set(nbat.get(), pairSearch_->gridSet(), &mdatoms, atomInfo.data());
}
-void nonbonded_verlet_t::convertCoordinates(const Nbnxm::AtomLocality locality,
+void nonbonded_verlet_t::convertCoordinates(const gmx::AtomLocality locality,
const bool fillLocal,
gmx::ArrayRef<const gmx::RVec> coordinates)
{
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
}
-void nonbonded_verlet_t::convertCoordinatesGpu(const Nbnxm::AtomLocality locality,
+void nonbonded_verlet_t::convertCoordinatesGpu(const gmx::AtomLocality locality,
const bool fillLocal,
DeviceBuffer<float> d_x,
GpuEventSynchronizer *xReadyOnDevice)
}
void
-nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality,
- gmx::ArrayRef<gmx::RVec> force)
+nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const gmx::AtomLocality locality,
+ gmx::ArrayRef<gmx::RVec> force)
{
/* Skip the reduction if there was no short-range GPU work to do
}
void
-nonbonded_verlet_t::atomdata_add_nbat_f_to_f_gpu(const Nbnxm::AtomLocality locality,
+nonbonded_verlet_t::atomdata_add_nbat_f_to_f_gpu(const gmx::AtomLocality locality,
DeviceBuffer<float> totalForcesDevice,
void *forcesPmeDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
Nbnxm::nbnxn_gpu_init_x_to_nbat_x(pairSearch_->gridSet(), gpu_nbv);
}
-void nonbonded_verlet_t::insertNonlocalGpuDependency(const Nbnxm::InteractionLocality interactionLocality)
+void nonbonded_verlet_t::insertNonlocalGpuDependency(const gmx::InteractionLocality interactionLocality)
{
Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality);
}
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/range.h"
#include "gromacs/utility/real.h"
-#include "locality.h"
-
-// TODO: Remove this include and the two nbnxm includes above
+// TODO: Remove this include
#include "nbnxm_gpu.h"
struct gmx_device_info_t;
gmx::ArrayRef<const int> getGridIndices() const;
//! Constructs the pairlist for the given locality
- void constructPairlist(Nbnxm::InteractionLocality iLocality,
+ void constructPairlist(gmx::InteractionLocality iLocality,
const t_blocka *excl,
int64_t step,
t_nrnb *nrnb);
* \param[in] fillLocal If the coordinates for filler particles should be zeroed.
* \param[in] coordinates Coordinates in plain rvec format to be transformed.
*/
- void convertCoordinates(Nbnxm::AtomLocality locality,
+ void convertCoordinates(gmx::AtomLocality locality,
bool fillLocal,
gmx::ArrayRef<const gmx::RVec> coordinates);
* \param[in] d_x GPU coordinates buffer in plain rvec format to be transformed.
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
*/
- void convertCoordinatesGpu(Nbnxm::AtomLocality locality,
+ void convertCoordinatesGpu(gmx::AtomLocality locality,
bool fillLocal,
DeviceBuffer<float> d_x,
GpuEventSynchronizer *xReadyOnDevice);
void atomdata_init_copy_x_to_nbat_x_gpu();
//! Sync the nonlocal GPU stream with dependent tasks in the local queue.
- void insertNonlocalGpuDependency(Nbnxm::InteractionLocality interactionLocality);
+ void insertNonlocalGpuDependency(gmx::InteractionLocality interactionLocality);
//! Returns a reference to the pairlist sets
const PairlistSets &pairlistSets() const
bool isDynamicPruningStepGpu(int64_t step) const;
//! Dispatches the dynamic pruning kernel for the given locality, for CPU lists
- void dispatchPruneKernelCpu(Nbnxm::InteractionLocality iLocality,
+ void dispatchPruneKernelCpu(gmx::InteractionLocality iLocality,
const rvec *shift_vec);
//! Dispatches the dynamic pruning kernel for GPU lists
void dispatchPruneKernelGpu(int64_t step);
//! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
- void dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
+ void dispatchNonbondedKernel(gmx::InteractionLocality iLocality,
const interaction_const_t &ic,
const gmx::StepWorkload &stepWork,
int clearF,
t_nrnb *nrnb);
//! Executes the non-bonded free-energy kernel, always runs on the CPU
- void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
+ void dispatchFreeEnergyKernel(gmx::InteractionLocality iLocality,
const t_forcerec *fr,
rvec x[],
gmx::ForceWithShiftForces *forceWithShiftForces,
* \param [in] locality Local or non-local
* \param [inout] force Force to be added to
*/
- void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
+ void atomdata_add_nbat_f_to_f(gmx::AtomLocality locality,
gmx::ArrayRef<gmx::RVec> force);
/*! \brief Add the forces stored in nbat to total force using GPU buffer opse
* \param [in] useGpuFPmeReduction Whether PME forces should be added
* \param [in] accumulateForce If the total force buffer already contains data
*/
- void atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality locality,
+ void atomdata_add_nbat_f_to_f_gpu(gmx::AtomLocality locality,
DeviceBuffer<float> totalForcesDevice,
void *forcesPmeDevice,
gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
real rlistInner);
//! Set up internal flags that indicate what type of short-range work there is.
- void setupGpuShortRangeWork(const gmx::GpuBonded *gpuBonded,
- const Nbnxm::InteractionLocality iLocality)
+ void setupGpuShortRangeWork(const gmx::GpuBonded *gpuBonded,
+ const gmx::InteractionLocality iLocality)
{
if (useGpu() && !emulateGpu())
{
}
//! Returns true if there is GPU short-range work for the given atom locality.
- bool haveGpuShortRangeWork(const Nbnxm::AtomLocality aLocality)
+ bool haveGpuShortRangeWork(const gmx::AtomLocality aLocality)
{
return ((useGpu() && !emulateGpu()) &&
Nbnxm::haveGpuShortRangeWork(gpu_nbv, aLocality));
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
#include "atomdata.h"
#include "gpu_types.h"
-#include "locality.h"
struct interaction_const_t;
struct nbnxn_atomdata_t;
GPU_FUNC_QUALIFIER
void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused *nb,
const struct nbnxn_atomdata_t gmx_unused *nbdata,
- AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
+ gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the nonbonded force calculations.
GPU_FUNC_QUALIFIER
void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb,
const gmx::StepWorkload gmx_unused &stepWork,
- InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
+ gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the nonbonded prune-only kernel.
* \param [in] numParts Number of parts the pair list is split into in the rolling kernel.
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb,
- InteractionLocality gmx_unused iloc,
- int gmx_unused numParts) GPU_FUNC_TERM;
+void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb,
+ gmx::InteractionLocality gmx_unused iloc,
+ int gmx_unused numParts) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the download of short-range forces from the GPU
void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb,
nbnxn_atomdata_t gmx_unused *nbatom,
const gmx::StepWorkload gmx_unused &stepWork,
- AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
+ gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
/*! \brief Attempts to complete nonbonded GPU task.
*
GPU_FUNC_QUALIFIER
bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
const gmx::StepWorkload gmx_unused &stepWork,
- AtomLocality gmx_unused aloc,
+ gmx::AtomLocality gmx_unused aloc,
real gmx_unused *e_lj,
real gmx_unused *e_el,
gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
* \param[out] e_lj Pointer to the LJ energy output to accumulate into
* \param[out] e_el Pointer to the electrostatics energy output to accumulate into
* \param[out] shiftForces Shift forces buffer to accumulate into
- */
+ * \param[out] wcycle Pointer to wallcycle data structure */
GPU_FUNC_QUALIFIER
float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
const gmx::StepWorkload gmx_unused &stepWork,
- AtomLocality gmx_unused aloc,
+ gmx::AtomLocality gmx_unused aloc,
real gmx_unused *e_lj,
real gmx_unused *e_el,
gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
DeviceBuffer<float> gmx_unused d_x,
GpuEventSynchronizer gmx_unused *xReadyOnDevice,
- Nbnxm::AtomLocality gmx_unused locality,
+ gmx::AtomLocality gmx_unused locality,
int gmx_unused gridId,
int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
*/
CUDA_FUNC_QUALIFIER
void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused *nb,
- InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
+ gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
/*! \brief Set up internal flags that indicate what type of short-range work there is.
*
GPU_FUNC_QUALIFIER
void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused *nb,
const gmx::GpuBonded gmx_unused *gpuBonded,
- Nbnxm::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
+ gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
/*! \brief Returns true if there is GPU short-range work for the given atom locality.
*
*/
GPU_FUNC_QUALIFIER
bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused *nb,
- Nbnxm::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false);
+ gmx::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Initialization for F buffer operations on GPU */
CUDA_FUNC_QUALIFIER
*
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_add_nbat_f_to_f(AtomLocality gmx_unused atomLocality,
+void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
DeviceBuffer<float> gmx_unused totalForcesDevice,
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
void gmx_unused *pmeForcesDevice,
params_(pairlistParams),
minimumIlistCountForGpuBalancing_(minimumIlistCountForGpuBalancing)
{
- localSet_ = std::make_unique<PairlistSet>(Nbnxm::InteractionLocality::Local,
+ localSet_ = std::make_unique<PairlistSet>(gmx::InteractionLocality::Local,
params_);
if (haveMultipleDomains)
{
- nonlocalSet_ = std::make_unique<PairlistSet>(Nbnxm::InteractionLocality::NonLocal,
+ nonlocalSet_ = std::make_unique<PairlistSet>(gmx::InteractionLocality::NonLocal,
params_);
}
}
return shmem;
}
+/*! \brief
+ * Launch the pairlist prune only kernel for the given locality.
+ * \p numParts tells in how many parts, i.e. calls the list will be pruned.
+ */
void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t *nb,
const InteractionLocality iloc,
const int numParts)
using Grid = Nbnxm::Grid; // TODO: Remove when refactoring this file
// Convience alias for partial Nbnxn namespace usage
-using InteractionLocality = Nbnxm::InteractionLocality;
+using InteractionLocality = gmx::InteractionLocality;
/* We shift the i-particles backward for PBC.
* This leads to more conditionals than shifting forward.
}
// TODO: Move to pairlistset.cpp
-PairlistSet::PairlistSet(const Nbnxm::InteractionLocality locality,
- const PairlistParams &pairlistParams) :
+PairlistSet::PairlistSet(const InteractionLocality locality,
+ const PairlistParams &pairlistParams) :
locality_(locality),
params_(pairlistParams)
{
/* Returns the i-zone range for pairlist construction for the give locality */
static Range<int>
getIZoneRange(const Nbnxm::GridSet::DomainSetup &domainSetup,
- const Nbnxm::InteractionLocality locality)
+ const InteractionLocality locality)
{
if (domainSetup.doTestParticleInsertion)
{
/* Returns the j-zone range for pairlist construction for the give locality and i-zone */
static Range<int>
-getJZoneRange(const gmx_domdec_zones_t &ddZones,
- const Nbnxm::InteractionLocality locality,
- const int iZone)
+getJZoneRange(const gmx_domdec_zones_t &ddZones,
+ const InteractionLocality locality,
+ const int iZone)
{
if (locality == InteractionLocality::Local)
{
nbat, excl, minimumIlistCountForGpuBalancing_,
nrnb, &pairSearch->cycleCounting_);
- if (iLocality == Nbnxm::InteractionLocality::Local)
+ if (iLocality == InteractionLocality::Local)
{
outerListCreationStep_ = step;
}
}
void
-nonbonded_verlet_t::constructPairlist(const Nbnxm::InteractionLocality iLocality,
- const t_blocka *excl,
- int64_t step,
- t_nrnb *nrnb)
+nonbonded_verlet_t::constructPairlist(const InteractionLocality iLocality,
+ const t_blocka *excl,
+ int64_t step,
+ t_nrnb *nrnb)
{
pairlistSets_->construct(iLocality, pairSearch_.get(), nbat.get(), excl,
step, nrnb);
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/mdtypes/nblist.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/defaultinitializationallocator.h"
// This file with constants is separate from this file to be able
// to include it during OpenCL jitting without including config.h
#include "constants.h"
-#include "locality.h"
#include "pairlistparams.h"
struct NbnxnPairlistCpuWork;
#include "config.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
-#include "locality.h"
-
namespace Nbnxm
{
enum class KernelType;
#include <memory>
#include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#include "locality.h"
#include "pairlist.h"
struct nbnxn_atomdata_t;
{
public:
//! Constructor: initializes the pairlist set as empty
- PairlistSet(Nbnxm::InteractionLocality locality,
+ PairlistSet(gmx::InteractionLocality locality,
const PairlistParams &listParams);
~PairlistSet();
const rvec *shift_vec);
//! Returns the locality
- Nbnxm::InteractionLocality locality() const
+ gmx::InteractionLocality locality() const
{
return locality_;
}
private:
//! The locality of the pairlist set
- Nbnxm::InteractionLocality locality_;
+ gmx::InteractionLocality locality_;
//! List of pairlists in CPU layout
std::vector<NbnxnPairlistCpu> cpuLists_;
//! List of working list for rebalancing CPU lists
#include <memory>
-#include "locality.h"
+#include "gromacs/mdtypes/locality.h"
+
#include "pairlistparams.h"
struct nbnxn_atomdata_t;
int minimumIlistCountForGpuBalancing);
//! Construct the pairlist set for the given locality
- void construct(Nbnxm::InteractionLocality iLocality,
+ void construct(gmx::InteractionLocality iLocality,
PairSearch *pairSearch,
nbnxn_atomdata_t *nbat,
const t_blocka *excl,
t_nrnb *nrnb);
//! Dispatches the dynamic pruning kernel for the given locality
- void dispatchPruneKernel(Nbnxm::InteractionLocality iLocality,
+ void dispatchPruneKernel(gmx::InteractionLocality iLocality,
const nbnxn_atomdata_t *nbat,
const rvec *shift_vec);
}
//! Returns the pair-list set for the given locality
- const PairlistSet &pairlistSet(Nbnxm::InteractionLocality iLocality) const
+ const PairlistSet &pairlistSet(gmx::InteractionLocality iLocality) const
{
- if (iLocality == Nbnxm::InteractionLocality::Local)
+ if (iLocality == gmx::InteractionLocality::Local)
{
return *localSet_;
}
private:
//! Returns the pair-list set for the given locality
- PairlistSet &pairlistSet(Nbnxm::InteractionLocality iLocality)
+ PairlistSet &pairlistSet(gmx::InteractionLocality iLocality)
{
- if (iLocality == Nbnxm::InteractionLocality::Local)
+ if (iLocality == gmx::InteractionLocality::Local)
{
return *localSet_;
}
#include "kernels_simd_4xm/kernel_prune.h"
void
-PairlistSets::dispatchPruneKernel(const Nbnxm::InteractionLocality iLocality,
- const nbnxn_atomdata_t *nbat,
- const rvec *shift_vec)
+PairlistSets::dispatchPruneKernel(const gmx::InteractionLocality iLocality,
+ const nbnxn_atomdata_t *nbat,
+ const rvec *shift_vec)
{
pairlistSet(iLocality).dispatchPruneKernel(nbat, shift_vec);
}
}
void
-nonbonded_verlet_t::dispatchPruneKernelCpu(const Nbnxm::InteractionLocality iLocality,
- const rvec *shift_vec)
+nonbonded_verlet_t::dispatchPruneKernelCpu(const gmx::InteractionLocality iLocality,
+ const rvec *shift_vec)
{
pairlistSets_->dispatchPruneKernel(iLocality, nbat.get(), shift_vec);
}
const bool stepIsEven = (pairlistSets().numStepsWithPairlist(step) % 2 == 0);
Nbnxm::gpu_launch_kernel_pruneonly(gpu_nbv,
- stepIsEven ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal,
+ stepIsEven ? gmx::InteractionLocality::Local : gmx::InteractionLocality::NonLocal,
pairlistSets().params().numRollingPruningParts);
wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NONBONDED);