Name streams after locality

author Artem Zhmurov <zhmurov@gmail.com>

Tue, 9 Feb 2021 12:21:03 +0000 (15:21 +0300)

committer Mark Abraham <mark.j.abraham@gmail.com>

Tue, 9 Feb 2021 13:21:07 +0000 (13:21 +0000)
author Artem Zhmurov <zhmurov@gmail.com>
Tue, 9 Feb 2021 12:21:03 +0000 (15:21 +0300)
committer Mark Abraham <mark.j.abraham@gmail.com>
Tue, 9 Feb 2021 13:21:07 +0000 (13:21 +0000)
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index f9c590e9dba2d9c43898cfd95d47a3ee1472a87a..e442fde24f21b13dc7494b745edbb8d609939531 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -524,7 +524,7 @@ DeviceBuffer<gmx::RVec> gpu_get_fshift(NbnxmGpu* nb)
  /* TODO  Remove explicit pinning from host arrays from here and manage in a more natural way*/
  void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv)
  {
-    const DeviceStream& deviceStream  = *gpu_nbv->deviceStreams[InteractionLocality::Local];
+    const DeviceStream& localStream   = *gpu_nbv->deviceStreams[InteractionLocality::Local];
      bool                bDoTime       = gpu_nbv->bDoTime;
      const int           maxNumColumns = gridSet.numColumnsMax();
  
@@ -561,20 +561,20 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv
  
              if (bDoTime)
              {
-                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream);
+                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
              }
  
              copyToDeviceBuffer(&gpu_nbv->atomIndices,
                                 atomIndices,
                                 0,
                                 atomIndicesSize,
-                               deviceStream,
+                               localStream,
                                 GpuApiCallBehavior::Async,
                                 nullptr);
  
              if (bDoTime)
              {
-                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream);
+                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
              }
          }
  
@@ -582,30 +582,30 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv
          {
              if (bDoTime)
              {
-                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream);
+                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
              }
  
              int* destPtr = &gpu_nbv->cxy_na[maxNumColumns * g];
              copyToDeviceBuffer(
-                    &destPtr, cxy_na, 0, numColumns, deviceStream, GpuApiCallBehavior::Async, nullptr);
+                    &destPtr, cxy_na, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr);
  
              if (bDoTime)
              {
-                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream);
+                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
              }
  
              if (bDoTime)
              {
-                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(deviceStream);
+                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.openTimingRegion(localStream);
              }
  
              destPtr = &gpu_nbv->cxy_ind[maxNumColumns * g];
              copyToDeviceBuffer(
-                    &destPtr, cxy_ind, 0, numColumns, deviceStream, GpuApiCallBehavior::Async, nullptr);
+                    &destPtr, cxy_ind, 0, numColumns, localStream, GpuApiCallBehavior::Async, nullptr);
  
              if (bDoTime)
              {
-                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(deviceStream);
+                gpu_nbv->timers->xf[AtomLocality::Local].nb_h2d.closeTimingRegion(localStream);
              }
          }
      }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index 1badb5c04a1cce375ca45d75fef4ab0fa16d757d..a70b2b8a71bb47a8a3526b500648c7f22150ff6d 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -398,8 +398,8 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  //! This function is documented in the header file
  void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
-    cl_atomdata_t*      adat         = nb->atdat;
-    const DeviceStream& deviceStream = *nb->deviceStreams[InteractionLocality::Local];
+    cl_atomdata_t*      adat        = nb->atdat;
+    const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
  
      /* only if we have a dynamic box */
      if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
@@ -410,7 +410,7 @@ void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
                             reinterpret_cast<const float*>(nbatom->shift_vec.data()),
                             0,
                             SHIFTS * DIM,
-                           deviceStream,
+                           localStream,
                             GpuApiCallBehavior::Async,
                             nullptr);
          adat->bShiftVecUploaded = CL_TRUE;
@@ -427,7 +427,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      cl_timers_t*         timers        = nb->timers;
      cl_atomdata_t*       d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
-    const DeviceStream&  deviceStream  = *nb->deviceStreams[InteractionLocality::Local];
+    const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
  
      natoms    = nbat->numAtoms();
      realloced = false;
@@ -435,7 +435,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      if (bDoTime)
      {
          /* time async copy */
-        timers->atdat.openTimingRegion(deviceStream);
+        timers->atdat.openTimingRegion(localStream);
      }
  
      /* need to reallocate if we have to copy more atoms than the amount of space
@@ -488,7 +488,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
                             nbat->params().lj_comb.data(),
                             0,
                             2 * natoms,
-                           deviceStream,
+                           localStream,
                             GpuApiCallBehavior::Async,
                             bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
      }
@@ -500,18 +500,18 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
                             nbat->params().type.data(),
                             0,
                             natoms,
-                           deviceStream,
+                           localStream,
                             GpuApiCallBehavior::Async,
                             bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
      }
  
      if (bDoTime)
      {
-        timers->atdat.closeTimingRegion(deviceStream);
+        timers->atdat.closeTimingRegion(localStream);
      }
  
      /* kick off the tasks enqueued above to ensure concurrency with the search */
-    cl_error = clFlush(deviceStream.stream());
+    cl_error = clFlush(localStream.stream());
      GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                         ("clFlush failed: " + ocl_get_error_string(cl_error)).c_str());
  }
author	Artem Zhmurov <zhmurov@gmail.com>
	Tue, 9 Feb 2021 12:21:03 +0000 (15:21 +0300)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 9 Feb 2021 13:21:07 +0000 (13:21 +0000)
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history