Use DeviceStream init(...) function to create streams

author Artem Zhmurov <zhmurov@gmail.com>

Thu, 20 Feb 2020 14:27:19 +0000 (15:27 +0100)

committer Paul Bauer <paul.bauer.q@gmail.com>

Fri, 13 Mar 2020 07:36:51 +0000 (08:36 +0100)
author Artem Zhmurov <zhmurov@gmail.com>
Thu, 20 Feb 2020 14:27:19 +0000 (15:27 +0100)
committer Paul Bauer <paul.bauer.q@gmail.com>
Fri, 13 Mar 2020 07:36:51 +0000 (08:36 +0100)
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp

index 822109de4c28b96b25e36d4b36d5a3722599d4aa..ae308bf57066aa91853a777d46b6bf96c4881710 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -524,32 +524,8 @@ void pme_gpu_init_internal(PmeGpu* pmeGpu)
       * - default high priority with CUDA
       * - no priorities implemented yet with OpenCL; see #2532
       */
-#if GMX_GPU == GMX_GPU_CUDA
-    cudaError_t stat;
-    int         highest_priority, lowest_priority;
-    stat = cudaDeviceGetStreamPriorityRange(&lowest_priority, &highest_priority);
-    CU_RET_ERR(stat, "PME cudaDeviceGetStreamPriorityRange failed");
-    cudaStream_t stream;
-    stat = cudaStreamCreateWithPriority(&stream,
-                                        cudaStreamDefault, // cudaStreamNonBlocking,
-                                        highest_priority);
-    pmeGpu->archSpecific->pmeStream_.setStream(stream);
-    CU_RET_ERR(stat, "cudaStreamCreateWithPriority on the PME stream failed");
-#elif GMX_GPU == GMX_GPU_OPENCL
-    cl_command_queue_properties queueProperties =
-            pmeGpu->archSpecific->useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
-    cl_device_id device_id = pmeGpu->deviceInfo->oclDeviceId;
-    cl_int       clError;
-    pmeGpu->archSpecific->pmeStream_.setStream(clCreateCommandQueue(
-            pmeGpu->archSpecific->deviceContext_.context(), device_id, queueProperties, &clError));
-
-
-    if (clError != CL_SUCCESS)
-    {
-        GMX_THROW(gmx::InternalError(
-                gmx::formatString("Failed to create PME command queue (OpenCL error %d)", clError).c_str()));
-    }
-#endif
+    pmeGpu->archSpecific->pmeStream_.init(*pmeGpu->deviceInfo, pmeGpu->archSpecific->deviceContext_,
+                                          DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
  }
  
  void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu)
@@ -812,6 +788,8 @@ static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeG
   */
  static void pme_gpu_init(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram)
  {
+    GMX_ASSERT(deviceInfo != nullptr,
+               "Device information can not be nullptr when GPU is used for PME.");
      pme->gpu       = new PmeGpu();
      PmeGpu* pmeGpu = pme->gpu;
      changePinningPolicy(&pmeGpu->staging.h_forces, pme_get_pinning_policy());
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu

index f5aac2981ab844ddeb097c425d8c8b63b568a796..32a752746af53c4ce05cb7bf9d69ba15be2eabbd 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
@@ -64,9 +64,10 @@ PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank, const DeviceContext& device
      GMX_RELEASE_ASSERT(
              GMX_THREAD_MPI,
              "PME-PP GPU Communication is currently only supported with thread-MPI enabled");
-    cudaStream_t stream;
-    cudaStreamCreate(&stream);
-    pmePpCommStream_.setStream(stream);
+
+    // In CUDA we only need priority to create stream.
+    // (note that this will be moved from here in the follow-up patch)
+    pmePpCommStream_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
  }
  
  PmePpCommGpu::Impl::~Impl() = default;
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp

index b1fefd34a5809ec56a05281a9669afb02cdb79b0..d0027852eea08101e98810cbfdca251894d2c1cb 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
@@ -109,12 +109,10 @@ StatePropagatorDataGpu::Impl::Impl(const DeviceStream*  pmeStream,
  
          // TODO: The update stream should be created only when it is needed.
  #    if (GMX_GPU == GMX_GPU_CUDA)
-        cudaError_t  stat;
-        cudaStream_t stream;
-        stat = cudaStreamCreate(&stream);
-        updateStreamOwn_.setStream(stream);
+        // In CUDA we only need priority to create stream.
+        // (note that this will be moved from here in the follow-up patch)
+        updateStreamOwn_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
          updateStream_ = &updateStreamOwn_;
-        CU_RET_ERR(stat, "CUDA stream creation failed in StatePropagatorDataGpu");
  #    endif
      }
  
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index cbd5f8ce0197b3b434f06c58846f5d392f5384c6..36342b935fe7f5d941dce847ba797b62b345f258 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -418,8 +418,7 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
-                   int /*rank*/,
-                   bool bLocalAndNonlocal)
+                   bool                       bLocalAndNonlocal)
  {
      cudaError_t stat;
  
@@ -448,10 +447,8 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
      nb->deviceInfo = deviceInfo;
  
      /* local/non-local GPU streams */
-    cudaStream_t localStream;
-    stat = cudaStreamCreate(&localStream);
-    nb->deviceStreams[InteractionLocality::Local].setStream(localStream);
-    CU_RET_ERR(stat, "cudaStreamCreate on stream[InterationLocality::Local] failed");
+    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, DeviceContext(),
+                                                       DeviceStreamPriority::Normal, nb->bDoTime);
      if (nb->bUseTwoStreams)
      {
          init_plist(nb->plist[InteractionLocality::NonLocal]);
@@ -460,15 +457,8 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
           * priorities, because we are querying the priority range which in this
           * case will be a single value.
           */
-        int highest_priority;
-        stat = cudaDeviceGetStreamPriorityRange(nullptr, &highest_priority);
-        CU_RET_ERR(stat, "cudaDeviceGetStreamPriorityRange failed");
-
-        cudaStream_t nonLocalStream;
-        stat = cudaStreamCreateWithPriority(&nonLocalStream, cudaStreamDefault, highest_priority);
-        nb->deviceStreams[InteractionLocality::NonLocal].setStream(nonLocalStream);
-        CU_RET_ERR(stat,
-                   "cudaStreamCreateWithPriority on stream[InteractionLocality::NonLocal] failed");
+        nb->deviceStreams[InteractionLocality::NonLocal].init(
+                *nb->deviceInfo, DeviceContext(), DeviceStreamPriority::High, nb->bDoTime);
      }
  
      /* init events for sychronization (timing disabled for performance reasons!) */
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h

index e242771862b439eca885acad1fda3f77b82b496f..574588b39ac87bb6549e195eb8ca3807796331cf 100644 (file)
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -72,7 +72,6 @@ NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
                     const interaction_const_t gmx_unused* ic,
                     const PairlistParams gmx_unused& listParams,
                     const nbnxn_atomdata_t gmx_unused* nbat,
-                   int gmx_unused rank,
                     /* true if both local and non-local are done on GPU */
                     bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp

index f7c7f6dd16ac75b964263a5e3f1a60038627e787..d854ede57282d8eabc40e3d4a054159960b05d9a 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -452,7 +452,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
          gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
-                           cr->nodeid, haveMultipleDomains);
+                           haveMultipleDomains);
  
          minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
      }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index fa37263a5b16f695d83db5dbbb059d5c8e333174..adc60b0a390385fe95d2ba5cfdc0f24bdfa720c2 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -560,12 +560,8 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
-                   const int                  rank,
                     const bool                 bLocalAndNonlocal)
  {
-    cl_int                      cl_error;
-    cl_command_queue_properties queue_properties;
-
      GMX_ASSERT(ic, "Need a valid interaction constants object");
  
      auto nb = new NbnxmGpu;
@@ -596,42 +592,16 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
      /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
      nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
  
-    /* Create queues only after bDoTime has been initialized */
-    if (nb->bDoTime)
-    {
-        queue_properties = CL_QUEUE_PROFILING_ENABLE;
-    }
-    else
-    {
-        queue_properties = 0;
-    }
-
-    cl_command_queue localStream =
-            clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
-                                 nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
      /* local/non-local GPU streams */
-    nb->deviceStreams[InteractionLocality::Local].setStream(localStream);
-
-    if (CL_SUCCESS != cl_error)
-    {
-        gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d", rank,
-                  nb->deviceInfo->device_name, cl_error);
-    }
+    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, nb->dev_rundata->deviceContext_,
+                                                       DeviceStreamPriority::Normal, nb->bDoTime);
  
      if (nb->bUseTwoStreams)
      {
          init_plist(nb->plist[InteractionLocality::NonLocal]);
  
-        cl_command_queue nonLocalStream =
-                clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
-                                     nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
-        nb->deviceStreams[InteractionLocality::NonLocal].setStream(nonLocalStream);
-
-        if (CL_SUCCESS != cl_error)
-        {
-            gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d",
-                      rank, nb->deviceInfo->device_name, cl_error);
-        }
+        nb->deviceStreams[InteractionLocality::NonLocal].init(
+                *nb->deviceInfo, nb->dev_rundata->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
      }
  
      if (nb->bDoTime)
author	Artem Zhmurov <zhmurov@gmail.com>
	Thu, 20 Feb 2020 14:27:19 +0000 (15:27 +0100)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Fri, 13 Mar 2020 07:36:51 +0000 (08:36 +0100)
src/gromacs/ewald/pme_gpu_internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.cu		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history