From 8e97087ba208f04eea87e4a53f303fcd04115e13 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Tue, 9 Feb 2021 15:21:03 +0300 Subject: [PATCH] Name streams after locality This renames the deviceStream into localStream to be more verbose in naming. --- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 20 +++++++++---------- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 18 ++++++++--------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index f9c590e9db..e442fde24f 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -524,7 +524,7 @@ DeviceBuffer gpu_get_fshift(NbnxmGpu* nb) /* TODO Remove explicit pinning from host arrays from here and manage in a more natural way*/ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv) { - const DeviceStream& deviceStream = *gpu_nbv->deviceStreams[InteractionLocality::Local]; + const DeviceStream& localStream = *gpu_nbv->deviceStreams[InteractionLocality::Local]; bool bDoTime = gpu_nbv->bDoTime; const int maxNumColumns = gridSet.numColumnsMax(); @@ -561,20 +561,20 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv if (bDoTime) { - gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream); + gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream); } copyToDeviceBuffer(&gpu_nbv->atomIndices, atomIndices, 0, atomIndicesSize, - deviceStream, + localStream, GpuApiCallBehavior::Async, nullptr); if (bDoTime) { - gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream); + gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream); } } @@ -582,30 +582,30 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv { if (bDoTime) { - gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream); + gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream); } int* destPtr = &gpu_nbv->cxy_na[maxNumColumns * g]; copyToDeviceBuffer( - &destPtr, cxy_na, 0, numColumns, deviceStream, GpuApiCallBehavior::Async, nullptr); + &destPtr, cxy_na, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr); if (bDoTime) { - gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream); + gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream); } if (bDoTime) { - gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream); + gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream); } destPtr = &gpu_nbv->cxy_ind[maxNumColumns * g]; copyToDeviceBuffer( - &destPtr, cxy_ind, 0, numColumns, deviceStream, GpuApiCallBehavior::Async, nullptr); + &destPtr, cxy_ind, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr); if (bDoTime) { - gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream); + gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream); } } } diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index 1badb5c04a..a70b2b8a71 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -398,8 +398,8 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial) //! This function is documented in the header file void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom) { - cl_atomdata_t* adat = nb->atdat; - const DeviceStream& deviceStream = *nb->deviceStreams[InteractionLocality::Local]; + cl_atomdata_t* adat = nb->atdat; + const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local]; /* only if we have a dynamic box */ if (nbatom->bDynamicBox || !adat->bShiftVecUploaded) @@ -410,7 +410,7 @@ void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom) reinterpret_cast(nbatom->shift_vec.data()), 0, SHIFTS * DIM, - deviceStream, + localStream, GpuApiCallBehavior::Async, nullptr); adat->bShiftVecUploaded = CL_TRUE; @@ -427,7 +427,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) cl_timers_t* timers = nb->timers; cl_atomdata_t* d_atdat = nb->atdat; const DeviceContext& deviceContext = *nb->deviceContext_; - const DeviceStream& deviceStream = *nb->deviceStreams[InteractionLocality::Local]; + const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local]; natoms = nbat->numAtoms(); realloced = false; @@ -435,7 +435,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) if (bDoTime) { /* time async copy */ - timers->atdat.openTimingRegion(deviceStream); + timers->atdat.openTimingRegion(localStream); } /* need to reallocate if we have to copy more atoms than the amount of space @@ -488,7 +488,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) nbat->params().lj_comb.data(), 0, 2 * natoms, - deviceStream, + localStream, GpuApiCallBehavior::Async, bDoTime ? timers->atdat.fetchNextEvent() : nullptr); } @@ -500,18 +500,18 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) nbat->params().type.data(), 0, natoms, - deviceStream, + localStream, GpuApiCallBehavior::Async, bDoTime ? timers->atdat.fetchNextEvent() : nullptr); } if (bDoTime) { - timers->atdat.closeTimingRegion(deviceStream); + timers->atdat.closeTimingRegion(localStream); } /* kick off the tasks enqueued above to ensure concurrency with the search */ - cl_error = clFlush(deviceStream.stream()); + cl_error = clFlush(localStream.stream()); GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, ("clFlush failed: " + ocl_get_error_string(cl_error)).c_str()); } -- 2.22.0