const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
- int adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
NBAtomData* adat = nb->atdat;
gpu_plist* plist = nb->plist[iloc];
return;
}
- /* calculate the atom data index range based on locality */
- if (atomLocality == AtomLocality::Local)
- {
- adat_begin = 0;
- adat_len = adat->numAtomsLocal;
- }
- else
- {
- adat_begin = adat->numAtomsLocal;
- adat_len = adat->numAtoms - adat->numAtomsLocal;
- }
+ /* local/nonlocal offset and length used for xq and f */
+ auto atomsRange = getGpuAtomRange(adat, atomLocality);
/* beginning of timed HtoD section */
if (bDoTime)
static_assert(sizeof(adat->xq[0]) == sizeof(Float4),
"The size of the xyzq buffer element should be equal to the size of float4.");
copyToDeviceBuffer(&adat->xq,
- reinterpret_cast<const Float4*>(nbatom->x().data()) + adat_begin,
- adat_begin,
- adat_len,
+ reinterpret_cast<const Float4*>(nbatom->x().data()) + atomsRange.begin(),
+ atomsRange.begin(),
+ atomsRange.size(),
deviceStream,
GpuApiCallBehavior::Async,
nullptr);
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
- int adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
-
/* determine interaction locality from atom locality */
const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
GMX_ASSERT(iloc == InteractionLocality::Local
return;
}
- getGpuAtomRange(adat, atomLocality, &adat_begin, &adat_len);
+ /* local/nonlocal offset and length used for xq and f */
+ auto atomsRange = getGpuAtomRange(adat, atomLocality);
/* beginning of timed D2H section */
if (bDoTime)
static_assert(
sizeof(adat->f[0]) == sizeof(Float3),
"The size of the force buffer element should be equal to the size of float3.");
- copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + adat_begin,
+ copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + atomsRange.begin(),
&adat->f,
- adat_begin,
- adat_len,
+ atomsRange.begin(),
+ atomsRange.size(),
deviceStream,
GpuApiCallBehavior::Async,
nullptr);
#include "gromacs/timing/gpu_timing.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/range.h"
#include "gromacs/utility/stringutil.h"
#include "gpu_common_utils.h"
*
* \param[in] atomData Atom descriptor data structure
* \param[in] atomLocality Atom locality specifier
- * \param[out] atomRangeBegin Starting index of the atom range in the atom data array.
- * \param[out] atomRangeLen Atom range length in the atom data array.
+ * \returns Range of indexes for selected locality.
*/
-template<typename AtomDataT>
-static inline void getGpuAtomRange(const AtomDataT* atomData,
- const AtomLocality atomLocality,
- int* atomRangeBegin,
- int* atomRangeLen)
+static inline gmx::Range<int> getGpuAtomRange(const NBAtomData* atomData, const AtomLocality atomLocality)
{
assert(atomData);
validateGpuAtomLocality(atomLocality);
/* calculate the atom data index range based on locality */
if (atomLocality == AtomLocality::Local)
{
- *atomRangeBegin = 0;
- *atomRangeLen = atomData->numAtomsLocal;
+ return gmx::Range<int>(0, atomData->numAtomsLocal);
}
else
{
- *atomRangeBegin = atomData->numAtomsLocal;
- *atomRangeLen = atomData->numAtoms - atomData->numAtomsLocal;
+ return gmx::Range<int>(atomData->numAtomsLocal, atomData->numAtoms);
}
}
const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
- /* local/nonlocal offset and length used for xq and f */
- int adat_begin, adat_len;
-
NBAtomData* adat = nb->atdat;
gpu_plist* plist = nb->plist[iloc];
cl_timers_t* t = nb->timers;
return;
}
- /* calculate the atom data index range based on locality */
- if (atomLocality == AtomLocality::Local)
- {
- adat_begin = 0;
- adat_len = adat->numAtomsLocal;
- }
- else
- {
- adat_begin = adat->numAtomsLocal;
- adat_len = adat->numAtoms - adat->numAtomsLocal;
- }
+ /* local/nonlocal offset and length used for xq and f */
+ auto atomsRange = getGpuAtomRange(adat, atomLocality);
/* beginning of timed HtoD section */
if (bDoTime)
static_assert(sizeof(float) == sizeof(*nbatom->x().data()),
"The size of the xyzq buffer element should be equal to the size of float4.");
copyToDeviceBuffer(&adat->xq,
- reinterpret_cast<const Float4*>(nbatom->x().data()) + adat_begin,
- adat_begin,
- adat_len,
+ reinterpret_cast<const Float4*>(nbatom->x().data()) + atomsRange.begin(),
+ atomsRange.begin(),
+ atomsRange.size(),
deviceStream,
GpuApiCallBehavior::Async,
bDoTime ? t->xf[atomLocality].nb_h2d.fetchNextEvent() : nullptr);
void gpu_launch_cpyback(NbnxmGpu* nb,
struct nbnxn_atomdata_t* nbatom,
const gmx::StepWorkload& stepWork,
- const AtomLocality aloc)
+ const AtomLocality atomLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
cl_int gmx_unused cl_error;
- int adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
/* determine interaction locality from atom locality */
- const InteractionLocality iloc = gpuAtomToInteractionLocality(aloc);
+ const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
GMX_ASSERT(iloc == InteractionLocality::Local
|| (iloc == InteractionLocality::NonLocal && nb->bNonLocalStreamDoneMarked == false),
"Non-local stream is indicating that the copy back event is enqueued at the "
return;
}
- getGpuAtomRange(adat, aloc, &adat_begin, &adat_len);
+ /* local/nonlocal offset and length used for xq and f */
+ auto atomsRange = getGpuAtomRange(adat, atomLocality);
/* beginning of timed D2H section */
if (bDoTime)
{
- t->xf[aloc].nb_d2h.openTimingRegion(deviceStream);
+ t->xf[atomLocality].nb_d2h.openTimingRegion(deviceStream);
}
/* With DD the local D2H transfer can only start after the non-local
/* DtoH f */
GMX_ASSERT(sizeof(*nbatom->out[0].f.data()) == sizeof(float),
"The host force buffer should be in single precision to match device data size.");
- copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + adat_begin,
+ copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + atomsRange.begin(),
&adat->f,
- adat_begin,
- adat_len,
+ atomsRange.begin(),
+ atomsRange.size(),
deviceStream,
GpuApiCallBehavior::Async,
- bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
+ bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
/* kick off work */
cl_error = clFlush(deviceStream.stream());
SHIFTS,
deviceStream,
GpuApiCallBehavior::Async,
- bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
+ bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
}
/* DtoH energies */
1,
deviceStream,
GpuApiCallBehavior::Async,
- bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
+ bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
static_assert(sizeof(*nb->nbst.eElec) == sizeof(float),
"Sizes of host- and device-side electrostatic energy terms should be the "
"same.");
1,
deviceStream,
GpuApiCallBehavior::Async,
- bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
+ bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
}
}
if (bDoTime)
{
- t->xf[aloc].nb_d2h.closeTimingRegion(deviceStream);
+ t->xf[atomLocality].nb_d2h.closeTimingRegion(deviceStream);
}
}
return;
}
- int adatBegin, adatLen;
- getGpuAtomRange(adat, atomLocality, &adatBegin, &adatLen);
+ /* local/nonlocal offset and length used for xq and f */
+ auto atomsRange = getGpuAtomRange(adat, atomLocality);
// With DD the local D2H transfer can only start after the non-local kernel has finished.
if (iloc == InteractionLocality::Local && nb->bNonLocalStreamDoneMarked)
{
GMX_ASSERT(adat->f.elementSize() == sizeof(Float3),
"The size of the force buffer element should be equal to the size of float3.");
- copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + adatBegin,
+ copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + atomsRange.begin(),
&adat->f,
- adatBegin,
- adatLen,
+ atomsRange.begin(),
+ atomsRange.size(),
deviceStream,
GpuApiCallBehavior::Async,
nullptr);
return;
}
- int adatBegin, adatLen;
- getGpuAtomRange(adat, atomLocality, &adatBegin, &adatLen);
+ /* local/nonlocal offset and length used for xq and f */
+ auto atomsRange = getGpuAtomRange(adat, atomLocality);
/* HtoD x, q */
GMX_ASSERT(adat->xq.elementSize() == sizeof(Float4),
"The size of the xyzq buffer element should be equal to the size of float4.");
copyToDeviceBuffer(&adat->xq,
- reinterpret_cast<const Float4*>(nbatom->x().data()) + adatBegin,
- adatBegin,
- adatLen,
+ reinterpret_cast<const Float4*>(nbatom->x().data()) + atomsRange.begin(),
+ atomsRange.begin(),
+ atomsRange.size(),
deviceStream,
GpuApiCallBehavior::Async,
nullptr);