return reinterpret_cast<DeviceBuffer<gmx::RVec>>(nb->atdat->fShift);
}
-/* Initialization for X buffer operations on GPU. */
-/* TODO Remove explicit pinning from host arrays from here and manage in a more natural way*/
-void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv)
-{
- const DeviceStream& localStream = *gpu_nbv->deviceStreams[InteractionLocality::Local];
- bool bDoTime = gpu_nbv->bDoTime;
- const int maxNumColumns = gridSet.numColumnsMax();
-
- reallocateDeviceBuffer(&gpu_nbv->cxy_na,
- maxNumColumns * gridSet.grids().size(),
- &gpu_nbv->ncxy_na,
- &gpu_nbv->ncxy_na_alloc,
- *gpu_nbv->deviceContext_);
- reallocateDeviceBuffer(&gpu_nbv->cxy_ind,
- maxNumColumns * gridSet.grids().size(),
- &gpu_nbv->ncxy_ind,
- &gpu_nbv->ncxy_ind_alloc,
- *gpu_nbv->deviceContext_);
-
- for (unsigned int g = 0; g < gridSet.grids().size(); g++)
- {
-
- const Nbnxm::Grid& grid = gridSet.grids()[g];
-
- const int numColumns = grid.numColumns();
- const int* atomIndices = gridSet.atomIndices().data();
- const int atomIndicesSize = gridSet.atomIndices().size();
- const int* cxy_na = grid.cxy_na().data();
- const int* cxy_ind = grid.cxy_ind().data();
-
- reallocateDeviceBuffer(&gpu_nbv->atomIndices,
- atomIndicesSize,
- &gpu_nbv->atomIndicesSize,
- &gpu_nbv->atomIndicesSize_alloc,
- *gpu_nbv->deviceContext_);
-
- if (atomIndicesSize > 0)
- {
-
- if (bDoTime)
- {
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
- }
-
- copyToDeviceBuffer(&gpu_nbv->atomIndices,
- atomIndices,
- 0,
- atomIndicesSize,
- localStream,
- GpuApiCallBehavior::Async,
- nullptr);
-
- if (bDoTime)
- {
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
- }
- }
-
- if (numColumns > 0)
- {
- if (bDoTime)
- {
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
- }
-
- int* destPtr = &gpu_nbv->cxy_na[maxNumColumns * g];
- copyToDeviceBuffer(
- &destPtr, cxy_na, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr);
-
- if (bDoTime)
- {
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
- }
-
- if (bDoTime)
- {
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
- }
-
- destPtr = &gpu_nbv->cxy_ind[maxNumColumns * g];
- copyToDeviceBuffer(
- &destPtr, cxy_ind, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr);
-
- if (bDoTime)
- {
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
- }
- }
- }
-
- if (gpu_nbv->bUseTwoStreams)
- {
- // The above data is transferred on the local stream but is a
- // dependency of the nonlocal stream (specifically the nonlocal X
- // buf ops kernel). We therefore set a dependency to ensure
- // that the nonlocal stream waits on the local stream here.
- // This call records an event in the local stream:
- gpu_nbv->misc_ops_and_local_H2D_done.markEvent(
- *gpu_nbv->deviceStreams[Nbnxm::InteractionLocality::Local]);
- // ...and this call instructs the nonlocal stream to wait on that event:
- gpu_nbv->misc_ops_and_local_H2D_done.enqueueWaitEvent(
- *gpu_nbv->deviceStreams[Nbnxm::InteractionLocality::NonLocal]);
- }
-
- return;
-}
-
} // namespace Nbnxm