/* TODO Remove explicit pinning from host arrays from here and manage in a more natural way*/
void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv)
{
- const DeviceStream& deviceStream = *gpu_nbv->deviceStreams[InteractionLocality::Local];
+ const DeviceStream& localStream = *gpu_nbv->deviceStreams[InteractionLocality::Local];
bool bDoTime = gpu_nbv->bDoTime;
const int maxNumColumns = gridSet.numColumnsMax();
if (bDoTime)
{
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream);
+ gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
}
copyToDeviceBuffer(&gpu_nbv->atomIndices,
atomIndices,
0,
atomIndicesSize,
- deviceStream,
+ localStream,
GpuApiCallBehavior::Async,
nullptr);
if (bDoTime)
{
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream);
+ gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
}
}
{
if (bDoTime)
{
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream);
+ gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
}
int* destPtr = &gpu_nbv->cxy_na[maxNumColumns * g];
copyToDeviceBuffer(
- &destPtr, cxy_na, 0, numColumns, deviceStream, GpuApiCallBehavior::Async, nullptr);
+ &destPtr, cxy_na, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr);
if (bDoTime)
{
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream);
+ gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
}
if (bDoTime)
{
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream);
+ gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
}
destPtr = &gpu_nbv->cxy_ind[maxNumColumns * g];
copyToDeviceBuffer(
- &destPtr, cxy_ind, 0, numColumns, deviceStream, GpuApiCallBehavior::Async, nullptr);
+ &destPtr, cxy_ind, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr);
if (bDoTime)
{
- gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream);
+ gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
}
}
}
//! This function is documented in the header file
void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
{
- cl_atomdata_t* adat = nb->atdat;
- const DeviceStream& deviceStream = *nb->deviceStreams[InteractionLocality::Local];
+ cl_atomdata_t* adat = nb->atdat;
+ const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
/* only if we have a dynamic box */
if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
reinterpret_cast<const float*>(nbatom->shift_vec.data()),
0,
SHIFTS * DIM,
- deviceStream,
+ localStream,
GpuApiCallBehavior::Async,
nullptr);
adat->bShiftVecUploaded = CL_TRUE;
cl_timers_t* timers = nb->timers;
cl_atomdata_t* d_atdat = nb->atdat;
const DeviceContext& deviceContext = *nb->deviceContext_;
- const DeviceStream& deviceStream = *nb->deviceStreams[InteractionLocality::Local];
+ const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
natoms = nbat->numAtoms();
realloced = false;
if (bDoTime)
{
/* time async copy */
- timers->atdat.openTimingRegion(deviceStream);
+ timers->atdat.openTimingRegion(localStream);
}
/* need to reallocate if we have to copy more atoms than the amount of space
nbat->params().lj_comb.data(),
0,
2 * natoms,
- deviceStream,
+ localStream,
GpuApiCallBehavior::Async,
bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
}
nbat->params().type.data(),
0,
natoms,
- deviceStream,
+ localStream,
GpuApiCallBehavior::Async,
bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
}
if (bDoTime)
{
- timers->atdat.closeTimingRegion(deviceStream);
+ timers->atdat.closeTimingRegion(localStream);
}
/* kick off the tasks enqueued above to ensure concurrency with the search */
- cl_error = clFlush(deviceStream.stream());
+ cl_error = clFlush(localStream.stream());
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clFlush failed: " + ocl_get_error_string(cl_error)).c_str());
}