Take over management of OpenCL context from PME and NBNXM

author Artem Zhmurov <zhmurov@gmail.com>

Sat, 1 Feb 2020 15:40:13 +0000 (16:40 +0100)

committer Christian Blau <cblau@gerrit.gromacs.org>

Wed, 11 Mar 2020 14:59:29 +0000 (15:59 +0100)
author Artem Zhmurov <zhmurov@gmail.com>
Sat, 1 Feb 2020 15:40:13 +0000 (16:40 +0100)
committer Christian Blau <cblau@gerrit.gromacs.org>
Wed, 11 Mar 2020 14:59:29 +0000 (15:59 +0100)
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index e42765cc8f6f81e1f815caf0c8649276d52b7ef2..e020a1405ba3dd9069e2c30d3e2f3be8e5344eef 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -3200,7 +3200,11 @@ gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const g
      return bCutoffAllowed;
  }
  
-void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, void* streamLocal, void* streamNonLocal)
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+                              const t_commrec&     cr,
+                              const DeviceContext& deviceContext,
+                              void*                streamLocal,
+                              void*                streamNonLocal)
  {
  
      int gpuHaloExchangeSize = 0;
@@ -3224,7 +3228,7 @@ void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, v
          for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
          {
              cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
-                    cr.dd, cr.mpi_comm_mysim, streamLocal, streamNonLocal, pulse));
+                    cr.dd, cr.mpi_comm_mysim, deviceContext, streamLocal, streamNonLocal, pulse));
          }
      }
  }
diff --git a/src/gromacs/domdec/domdec.h b/src/gromacs/domdec/domdec.h

index 51aba44e5cdad4647dced7a925539ea81297f063..0a7aa3202ef0c6eb57957a3540f95671bb96592a 100644 (file)
--- a/src/gromacs/domdec/domdec.h
+++ b/src/gromacs/domdec/domdec.h
@@ -85,6 +85,7 @@ struct t_nrnb;
  struct gmx_wallcycle;
  enum class PbcType : int;
  class t_state;
+class DeviceContext;
  class GpuEventSynchronizer;
  
  namespace gmx
@@ -314,10 +315,15 @@ void dd_bonded_cg_distance(const gmx::MDLogger& mdlog,
  /*! \brief Construct the GPU halo exchange object(s)
   * \param[in] mdlog          The logger object
   * \param[in] cr             The commrec object
+ * \param[in] deviceContext  GPU device context
   * \param[in] streamLocal    The local GPU stream
   * \param[in] streamNonLocal The non-local GPU stream
   */
-void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, void* streamLocal, void* streamNonLocal);
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+                              const t_commrec&     cr,
+                              const DeviceContext& deviceContext,
+                              void*                streamLocal,
+                              void*                streamNonLocal);
  
  /*! \brief
   * (Re-) Initialization for GPU halo exchange
diff --git a/src/gromacs/domdec/gpuhaloexchange.h b/src/gromacs/domdec/gpuhaloexchange.h

index dc65cb93d3e9d6fb66f0209e9208235898672cca..851e3d19833031fd2e669039ae1731eaa8dd4365 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange.h
+++ b/src/gromacs/domdec/gpuhaloexchange.h
@@ -49,6 +49,7 @@
  #include "gromacs/utility/gmxmpi.h"
  
  struct gmx_domdec_t;
+class DeviceContext;
  class GpuEventSynchronizer;
  
  namespace gmx
@@ -80,11 +81,17 @@ public:
       *
       * \param [inout] dd                       domdec structure
       * \param [in]    mpi_comm_mysim           communicator used for simulation
+     * \param [in]    deviceContext            GPU device context
       * \param [in]    streamLocal              local NB CUDA stream.
       * \param [in]    streamNonLocal           non-local NB CUDA stream.
       * \param [in]    pulse                    the communication pulse for this instance
       */
-    GpuHaloExchange(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* streamLocal, void* streamNonLocal, int pulse);
+    GpuHaloExchange(gmx_domdec_t*        dd,
+                    MPI_Comm             mpi_comm_mysim,
+                    const DeviceContext& deviceContext,
+                    void*                streamLocal,
+                    void*                streamNonLocal,
+                    int                  pulse);
      ~GpuHaloExchange();
  
      /*! \brief
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cpp b/src/gromacs/domdec/gpuhaloexchange_impl.cpp

index 1ce9a9d93e7490421c58feb8ef32b65a471692d3..c8ca5df8c21381c18dfcf7d86e97a9029fb920ff 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cpp
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cpp
@@ -62,6 +62,7 @@ class GpuHaloExchange::Impl
  /*!\brief Constructor stub. */
  GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* /* dd */,
                                   MPI_Comm /* mpi_comm_mysim */,
+                                 const DeviceContext& /* deviceContext */,
                                   void* /*streamLocal */,
                                   void* /*streamNonLocal */,
                                   int /*pulse */) :
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cu b/src/gromacs/domdec/gpuhaloexchange_impl.cu

index 92a1d9f3d5d2d2235099eb0ae230e01331da19e0..4a44beb3e69945b3fc2517d38edbec9b656e716e 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cu
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cu
@@ -54,6 +54,7 @@
  #include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/domdec/gpuhaloexchange.h"
  #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/devicebuffer.h"
  #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
  #include "gromacs/gpu_utils/typecasts.cuh"
@@ -415,11 +416,12 @@ GpuEventSynchronizer* GpuHaloExchange::Impl::getForcesReadyOnDeviceEvent()
  }
  
  /*! \brief Create Domdec GPU object */
-GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd,
-                            MPI_Comm      mpi_comm_mysim,
-                            void*         localStream,
-                            void*         nonLocalStream,
-                            int           pulse) :
+GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
+                            MPI_Comm             mpi_comm_mysim,
+                            const DeviceContext& deviceContext,
+                            void*                localStream,
+                            void*                nonLocalStream,
+                            int                  pulse) :
      dd_(dd),
      sendRankX_(dd->neighbor[0][1]),
      recvRankX_(dd->neighbor[0][0]),
@@ -428,6 +430,7 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd,
      usePBC_(dd->ci[dd->dim[0]] == 0),
      haloDataTransferLaunched_(new GpuEventSynchronizer()),
      mpi_comm_mysim_(mpi_comm_mysim),
+    deviceContext_(deviceContext),
      localStream_(*static_cast<cudaStream_t*>(localStream)),
      nonLocalStream_(*static_cast<cudaStream_t*>(nonLocalStream)),
      pulse_(pulse)
@@ -460,12 +463,13 @@ GpuHaloExchange::Impl::~Impl()
      delete haloDataTransferLaunched_;
  }
  
-GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* dd,
-                                 MPI_Comm      mpi_comm_mysim,
-                                 void*         localStream,
-                                 void*         nonLocalStream,
-                                 int           pulse) :
-    impl_(new Impl(dd, mpi_comm_mysim, localStream, nonLocalStream, pulse))
+GpuHaloExchange::GpuHaloExchange(gmx_domdec_t*        dd,
+                                 MPI_Comm             mpi_comm_mysim,
+                                 const DeviceContext& deviceContext,
+                                 void*                localStream,
+                                 void*                nonLocalStream,
+                                 int                  pulse) :
+    impl_(new Impl(dd, mpi_comm_mysim, deviceContext, localStream, nonLocalStream, pulse))
  {
  }
  
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cuh b/src/gromacs/domdec/gpuhaloexchange_impl.cuh

index a8d2f9204c590894d489d266a221292bd3b6d056..ba22bc5262abcc5551dd66e1978e9b59c11a3d16 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cuh
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cuh
@@ -71,11 +71,17 @@ public:
       *
       * \param [inout] dd                       domdec structure
       * \param [in]    mpi_comm_mysim           communicator used for simulation
+     * \param [in]    deviceContext            GPU device context
       * \param [in]    localStream              local NB CUDA stream
       * \param [in]    nonLocalStream           non-local NB CUDA stream
       * \param [in]    pulse                    the communication pulse for this instance
       */
-    Impl(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* localStream, void* nonLocalStream, int pulse);
+    Impl(gmx_domdec_t*        dd,
+         MPI_Comm             mpi_comm_mysim,
+         const DeviceContext& deviceContext,
+         void*                localStream,
+         void*                nonLocalStream,
+         int                  pulse);
      ~Impl();
  
      /*! \brief
@@ -176,8 +182,8 @@ private:
      GpuEventSynchronizer* haloDataTransferLaunched_ = nullptr;
      //! MPI communicator used for simulation
      MPI_Comm mpi_comm_mysim_;
-    //! Dummy GPU context object
-    const DeviceContext deviceContext_;
+    //! GPU context object
+    const DeviceContext& deviceContext_;
      //! CUDA stream for local non-bonded calculations
      cudaStream_t localStream_ = nullptr;
      //! CUDA stream for non-local non-bonded calculations
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 40a34682c00f173841f415722d165e038e44676c..1c3cb9b77460c2e2803914c93b1e0b6e7465e66d 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -436,13 +436,6 @@ GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT
  GPU_FUNC_QUALIFIER void* pme_gpu_get_device_stream(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
          GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
-/*! \brief Returns the pointer to the GPU context.
- *  \param[in] pme            The PME data structure.
- *  \returns                  Pointer to GPU context object.
- */
-GPU_FUNC_QUALIFIER const DeviceContext* pme_gpu_get_device_context(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
-        GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
  /*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
   * \param[in] pme            The PME data structure.
   * \returns                  Pointer to sychronizer
diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp

index b4cec47135c8af61d2f66a368ba635290b4647ac..4c4ed4851d8cea883a6b1531a6fdce4296276c45 100644 (file)
--- a/src/gromacs/ewald/pme_gpu.cpp
+++ b/src/gromacs/ewald/pme_gpu.cpp
@@ -442,14 +442,6 @@ void* pme_gpu_get_device_stream(const gmx_pme_t* pme)
      return pme_gpu_get_stream(pme->gpu);
  }
  
-const DeviceContext* pme_gpu_get_device_context(const gmx_pme_t* pme)
-{
-    GMX_RELEASE_ASSERT(pme, "GPU context requested from PME before PME was constructed.");
-    GMX_RELEASE_ASSERT(pme_gpu_active(pme),
-                       "GPU context requested from PME, but PME is running on the CPU.");
-    return pme_gpu_get_context(pme->gpu);
-}
-
  GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* pme)
  {
      if (!pme || !pme_gpu_active(pme))
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp

index bd3b25e1cdf1cfa2cf4e07c5f0fcf166fd5c5fc3..dd62e8c4cdfe4306527066936af4042a2edc4744 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -1527,14 +1527,6 @@ void* pme_gpu_get_stream(const PmeGpu* pmeGpu)
      }
  }
  
-const DeviceContext* pme_gpu_get_context(const PmeGpu* pmeGpu)
-{
-    GMX_RELEASE_ASSERT(
-            pmeGpu,
-            "GPU context object was requested, but PME GPU object was not (yet) initialized.");
-    return &pmeGpu->archSpecific->deviceContext_;
-}
-
  GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(const PmeGpu* pmeGpu)
  {
      if (pmeGpu && pmeGpu->kernelParams)
diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h

index a9dc9677ce53a55405a43ce5564aa9c43d3fa96a..67a1bc3d1c2a53f267ea722f9fe752f87833176d 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.h
+++ b/src/gromacs/ewald/pme_gpu_internal.h
@@ -408,13 +408,6 @@ GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_A
  GPU_FUNC_QUALIFIER void* pme_gpu_get_stream(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
          GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
-/*! \brief Return pointer to GPU context (for OpenCL builds).
- * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  Pointer to context object.
- */
-GPU_FUNC_QUALIFIER const DeviceContext* pme_gpu_get_context(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
-        GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
  /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
   * \param[in] pmeGpu         The PME GPU structure.
   * \returns                  Pointer to sync object
diff --git a/src/gromacs/ewald/pme_gpu_program.cpp b/src/gromacs/ewald/pme_gpu_program.cpp

index 6b34a41c4c467aea8436a5c6b73be590ae62eedd..23981a661b83bcf6c3a0eeafa189b38bfe7bd772 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program.cpp
+++ b/src/gromacs/ewald/pme_gpu_program.cpp
@@ -53,17 +53,14 @@
  
  #include "pme_gpu_program_impl.h"
  
-PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo) :
-    impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo))
+PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+    impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo, deviceContext))
  {
  }
  
  PmeGpuProgram::~PmeGpuProgram() = default;
  
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* deviceInfo)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
  {
-    GMX_RELEASE_ASSERT(
-            deviceInfo != nullptr,
-            "Device information can not be nullptr when building PME GPU program object.");
-    return std::make_unique<PmeGpuProgram>(*deviceInfo);
+    return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
  }
diff --git a/src/gromacs/ewald/pme_gpu_program.h b/src/gromacs/ewald/pme_gpu_program.h

index 32c33442eb5f79c4f1308ccfd009cebe57974e4e..d4dbdf449d9b17eba6edd7b5b5b0af8868a8efdd 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program.h
+++ b/src/gromacs/ewald/pme_gpu_program.h
@@ -49,13 +49,15 @@
  
  #include <memory>
  
+class DeviceContext;
+
  struct PmeGpuProgramImpl;
  struct DeviceInformation;
  
  class PmeGpuProgram
  {
  public:
-    explicit PmeGpuProgram(const DeviceInformation& deviceInfo);
+    explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
      ~PmeGpuProgram();
  
      // TODO: design getters for information inside, if needed for PME, and make this private?
@@ -69,6 +71,7 @@ using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
  /*! \brief
   * Factory function used to build persistent PME GPU program for the device at once.
   */
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* /*deviceInfo*/);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& /*deviceInfo*/,
+                                        const DeviceContext& /* deviceContext */);
  
  #endif
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cpp b/src/gromacs/ewald/pme_gpu_program_impl.cpp

index af57c03e9f4e421c0983f96f75efe0832522e9b0..ccaffa5acdc03ddfb0a568c013e1ed661aa051d1 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl.cpp
+++ b/src/gromacs/ewald/pme_gpu_program_impl.cpp
@@ -45,7 +45,9 @@
  
  #include "pme_gpu_program_impl.h"
  
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
+                                     const DeviceContext& deviceContext) :
+    deviceContext_(deviceContext),
      warpSize(0),
      spreadWorkGroupSize(0),
      gatherWorkGroupSize(0),
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cu b/src/gromacs/ewald/pme_gpu_program_impl.cu

index d17e18f50c51ff5740caaae3843374459df9389c..53bf2f0d1eab0df4cd293cc82176ee700c3425b3 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl.cu
+++ b/src/gromacs/ewald/pme_gpu_program_impl.cu
@@ -98,8 +98,9 @@ extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, true
  extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, true, false>(const PmeGpuCudaKernelParams);
  extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, false>(const PmeGpuCudaKernelParams);
  
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo) :
-    deviceContext_(deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
+                                     const DeviceContext& deviceContext) :
+    deviceContext_(deviceContext)
  {
      // kernel parameters
      warpSize              = warp_size;
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.h b/src/gromacs/ewald/pme_gpu_program_impl.h

index 1de5014821c7f91722cea9589ff1d70edf59d568..cb1471abf17be0ec925c638221039466083ade2f 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl.h
+++ b/src/gromacs/ewald/pme_gpu_program_impl.h
@@ -48,6 +48,7 @@
  #include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/utility/classhelpers.h"
  
+class DeviceContext;
  struct DeviceInformation;
  
  /*! \internal
@@ -75,10 +76,8 @@ struct PmeGpuProgramImpl
      /*! \brief
       * This is a handle to the GPU context, which is just a dummy in CUDA,
       * but is created/destroyed by this class in OpenCL.
-     * TODO: Later we want to be able to own the context at a higher level and not here,
-     * but this class would still need the non-owning context handle to build the kernels.
       */
-    DeviceContext deviceContext_;
+    const DeviceContext& deviceContext_;
  
      //! Conveniently all the PME kernels use the same single argument type
  #if GMX_GPU == GMX_GPU_CUDA
@@ -147,7 +146,7 @@ struct PmeGpuProgramImpl
  
      PmeGpuProgramImpl() = delete;
      //! Constructor for the given device
-    explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo);
+    explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
      ~PmeGpuProgramImpl();
      GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
  
diff --git a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp

index 4071beebdbc10039c147ecd1896044a1401bc315..1fa443ee4e916ffd6a00a15b290a0af2aa5bb007 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
+++ b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
@@ -53,8 +53,8 @@
  #include "pme_gpu_types_host.h"
  #include "pme_grid.h"
  
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo) :
-    deviceContext_(deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+    deviceContext_(deviceContext)
  {
      // kernel parameters
      warpSize = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp

index 2ee17b32674959e1d78363a29ed7d87420a8d1e3..845b1a33ecf45b99b78d846f0107533647e4f485 100644 (file)
--- a/src/gromacs/ewald/pme_only.cpp
+++ b/src/gromacs/ewald/pme_only.cpp
@@ -603,7 +603,8 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
                  gmx_wallcycle*            wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
                  t_inputrec*               ir,
-                PmeRunMode                runMode)
+                PmeRunMode                runMode,
+                const DeviceContext*      deviceContext)
  {
      int     ret;
      int     natoms = 0;
@@ -628,8 +629,7 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
      const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
      if (useGpuForPme)
      {
-        const void*          commandStream = pme_gpu_get_device_stream(pme);
-        const DeviceContext& deviceContext = *pme_gpu_get_device_context(pme);
+        const void* commandStream = pme_gpu_get_device_stream(pme);
  
          changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy());
          changePinningPolicy(&pme_pp->x, pme_get_pinning_policy());
@@ -640,10 +640,13 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
              pme_pp->pmeForceSenderGpu = std::make_unique<gmx::PmeForceSenderGpu>(
                      commandStream, pme_pp->mpi_comm_mysim, pme_pp->ppRanks);
          }
+        GMX_RELEASE_ASSERT(
+                deviceContext != nullptr,
+                "Device context can not be nullptr when building GPU propagator data object.");
          // TODO: Special PME-only constructor is used here. There is no mechanism to prevent from using the other constructor here.
          //       This should be made safer.
          stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                commandStream, deviceContext, GpuApiCallBehavior::Async,
+                commandStream, *deviceContext, GpuApiCallBehavior::Async,
                  pme_gpu_get_padding_size(pme), wcycle);
      }
  
diff --git a/src/gromacs/ewald/pme_only.h b/src/gromacs/ewald/pme_only.h

index 0ed37f1e2e95af00e7e06a75c1a01e375af23ce7..18edbb9b43ddf5bcdbe23127f58a2e79ece71027 100644 (file)
--- a/src/gromacs/ewald/pme_only.h
+++ b/src/gromacs/ewald/pme_only.h
@@ -55,6 +55,7 @@ struct t_nrnb;
  struct gmx_pme_t;
  struct gmx_wallcycle;
  
+class DeviceContext;
  enum class PmeRunMode;
  
  /*! \brief Called on the nodes that do PME exclusively */
@@ -64,6 +65,7 @@ int gmx_pmeonly(gmx_pme_t*                pme,
                  gmx_wallcycle*            wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
                  t_inputrec*               ir,
-                PmeRunMode                runMode);
+                PmeRunMode                runMode,
+                const DeviceContext*      deviceContext);
  
  #endif
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu.h b/src/gromacs/ewald/pme_pp_comm_gpu.h

index e9d8c4ff697c921ba82a704d3171ba9ae71a49c9..ea750cc17c1052fb95a639dd2512da2d30b905e8 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu.h
+++ b/src/gromacs/ewald/pme_pp_comm_gpu.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -45,6 +45,7 @@
  #include "gromacs/utility/classhelpers.h"
  #include "gromacs/utility/gmxmpi.h"
  
+class DeviceContext;
  class GpuEventSynchronizer;
  
  namespace gmx
@@ -61,8 +62,9 @@ public:
      /*! \brief Creates PME-PP GPU communication object
       * \param[in] comm            Communicator used for simulation
       * \param[in] pmeRank         Rank of PME task
+     * \param[in] deviceContext   GPU context.
       */
-    PmePpCommGpu(MPI_Comm comm, int pmeRank);
+    PmePpCommGpu(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext);
      ~PmePpCommGpu();
  
      /*! \brief Perform steps required when buffer size changes
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp

index 0b59ff921201b919324f08af032b6306e9b740fd..b8befc5311033680740c329c99d588b823226383 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -62,7 +62,8 @@ class PmePpCommGpu::Impl
  };
  
  /*!\brief Constructor stub. */
-PmePpCommGpu::PmePpCommGpu(MPI_Comm gmx_unused comm, int gmx_unused pmeRank) : impl_(nullptr)
+PmePpCommGpu::PmePpCommGpu(MPI_Comm /* comm */, int /* pmeRank */, const DeviceContext& /* deviceContext */) :
+    impl_(nullptr)
  {
      GMX_ASSERT(false,
                 "A CPU stub for PME-PP GPU communication was called instead of the correct "
@@ -72,26 +73,26 @@ PmePpCommGpu::PmePpCommGpu(MPI_Comm gmx_unused comm, int gmx_unused pmeRank) : i
  PmePpCommGpu::~PmePpCommGpu() = default;
  
  /*!\brief init PME-PP GPU communication stub */
-void PmePpCommGpu::reinit(int gmx_unused size)
+void PmePpCommGpu::reinit(int /* size */)
  {
      GMX_ASSERT(false,
                 "A CPU stub for PME-PP GPU communication initialization was called instead of the "
                 "correct implementation.");
  }
  
-void PmePpCommGpu::receiveForceFromPmeCudaDirect(void gmx_unused* recvPtr,
-                                                 int gmx_unused recvSize,
-                                                 bool gmx_unused receivePmeForceToGpu)
+void PmePpCommGpu::receiveForceFromPmeCudaDirect(void* /* recvPtr */,
+                                                 int /* recvSize */,
+                                                 bool /* receivePmeForceToGpu */)
  {
      GMX_ASSERT(false,
                 "A CPU stub for PME-PP GPU communication was called instead of the correct "
                 "implementation.");
  }
  
-void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void gmx_unused* sendPtr,
-                                                  int gmx_unused sendSize,
-                                                  bool gmx_unused sendPmeCoordinatesFromGpu,
-                                                  GpuEventSynchronizer gmx_unused* coordinatesOnDeviceEvent)
+void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void* /* sendPtr */,
+                                                  int /* sendSize */,
+                                                  bool /* sendPmeCoordinatesFromGpu */,
+                                                  GpuEventSynchronizer* /* coordinatesOnDeviceEvent */)
  {
      GMX_ASSERT(false,
                 "A CPU stub for PME-PP GPU communication was called instead of the correct "
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu

index 29cb73d0cabeadfe92db0e5d8a1a3e7f58d5dbc8..e6e5bacd16d4f61059c4b1bbbe957cb596eb7696 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
@@ -48,6 +48,7 @@
  #include "config.h"
  
  #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/devicebuffer.h"
  #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
  #include "gromacs/utility/gmxmpi.h"
@@ -55,7 +56,10 @@
  namespace gmx
  {
  
-PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank) : comm_(comm), pmeRank_(pmeRank)
+PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext) :
+    comm_(comm),
+    pmeRank_(pmeRank),
+    deviceContext_(deviceContext)
  {
      GMX_RELEASE_ASSERT(
              GMX_THREAD_MPI,
@@ -152,7 +156,10 @@ void* PmePpCommGpu::Impl::getForcesReadySynchronizer()
      return static_cast<void*>(&forcesReadySynchronizer_);
  }
  
-PmePpCommGpu::PmePpCommGpu(MPI_Comm comm, int pmeRank) : impl_(new Impl(comm, pmeRank)) {}
+PmePpCommGpu::PmePpCommGpu(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext) :
+    impl_(new Impl(comm, pmeRank, deviceContext))
+{
+}
  
  PmePpCommGpu::~PmePpCommGpu() = default;
  
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.h b/src/gromacs/ewald/pme_pp_comm_gpu_impl.h

index 5565bea3705d5284499aca13a51abad17709209b..c791ea5b4011ace1b20c5ace0a127fbaea0b92fb 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.h
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -59,8 +59,9 @@ public:
      /*! \brief Creates PME-PP GPU communication object.
       * \param[in] comm            Communicator used for simulation
       * \param[in] pmeRank         Rank of PME task
+     * \param[in] deviceContext   GPU context.
       */
-    Impl(MPI_Comm comm, int pmeRank);
+    Impl(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext);
      ~Impl();
  
      /*! \brief Perform steps required when buffer size changes
@@ -115,6 +116,8 @@ public:
      void* getForcesReadySynchronizer();
  
  private:
+    //! Device context object
+    const DeviceContext& deviceContext_;
      //! CUDA stream used for the communication operations in this class
      cudaStream_t pmePpCommStream_ = nullptr;
      //! Remote location of PME coordinate data buffer
diff --git a/src/gromacs/ewald/tests/pmegathertest.cpp b/src/gromacs/ewald/tests/pmegathertest.cpp

index 08d2716e20ce8b0d2ac7a7d142e18d7f3aca6bb6..59efd56faccc460f8cf0a6bea446c63a1ff5da9a 100644 (file)
--- a/src/gromacs/ewald/tests/pmegathertest.cpp
+++ b/src/gromacs/ewald/tests/pmegathertest.cpp
@@ -300,7 +300,9 @@ public:
              PmeSafePointer pmeSafe = pmeInitWrapper(&inputRec, codePath, context->getDeviceInfo(),
                                                      context->getPmeGpuProgram(), box);
              std::unique_ptr<StatePropagatorDataGpu> stateGpu =
-                    (codePath == CodePath::GPU) ? makeStatePropagatorDataGpu(*pmeSafe.get()) : nullptr;
+                    (codePath == CodePath::GPU)
+                            ? makeStatePropagatorDataGpu(*pmeSafe.get(), context->deviceContext())
+                            : nullptr;
  
              pmeInitAtoms(pmeSafe.get(), stateGpu.get(), codePath, inputAtomData.coordinates,
                           inputAtomData.charges);
diff --git a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp

index fba028fa8c1fcf377eedaa0dc5b4ce44fdd06172..8f2935b9f321b50b3f99ccbc83a084a2b244b600 100644 (file)
--- a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
+++ b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
@@ -152,7 +152,9 @@ public:
                  PmeSafePointer pmeSafe = pmeInitWrapper(&inputRec, codePath, context->getDeviceInfo(),
                                                          context->getPmeGpuProgram(), box);
                  std::unique_ptr<StatePropagatorDataGpu> stateGpu =
-                        (codePath == CodePath::GPU) ? makeStatePropagatorDataGpu(*pmeSafe.get()) : nullptr;
+                        (codePath == CodePath::GPU)
+                                ? makeStatePropagatorDataGpu(*pmeSafe.get(), context->deviceContext())
+                                : nullptr;
  
                  pmeInitAtoms(pmeSafe.get(), stateGpu.get(), codePath, coordinates, charges);
  
diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp

index 81edf195feb98a8a58d4c5a12d5ab2df89163ac1..787f3e9f424951df85d7fe26d7d72f3ec217112d 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.cpp
+++ b/src/gromacs/ewald/tests/pmetestcommon.cpp
@@ -160,14 +160,15 @@ PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
  }
  
  //! Make a GPU state-propagator manager
-std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme)
+std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t&     pme,
+                                                                   const DeviceContext& deviceContext)
  {
      // TODO: Pin the host buffer and use async memory copies
      // TODO: Special constructor for PME-only rank / PME-tests is used here. There should be a mechanism to
      //       restrict one from using other constructor here.
-    return std::make_unique<StatePropagatorDataGpu>(
-            pme_gpu_get_device_stream(&pme), *pme_gpu_get_device_context(&pme),
-            GpuApiCallBehavior::Sync, pme_gpu_get_padding_size(&pme), nullptr);
+    return std::make_unique<StatePropagatorDataGpu>(pme_gpu_get_device_stream(&pme), deviceContext,
+                                                    GpuApiCallBehavior::Sync,
+                                                    pme_gpu_get_padding_size(&pme), nullptr);
  }
  
  //! PME initialization with atom data
diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h

index ed919e80f852aad9d9de407567d04feaa542ada2..c67f78bacf265251dc9a772bfff2ec8eaa98ea60 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.h
+++ b/src/gromacs/ewald/tests/pmetestcommon.h
@@ -135,7 +135,8 @@ PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
                              real             ewaldCoeff_q  = 0.0F,
                              real             ewaldCoeff_lj = 0.0F);
  //! Make a GPU state-propagator manager
-std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme);
+std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t&     pme,
+                                                                   const DeviceContext& deviceContext);
  //! PME initialization with atom data and system box
  void pmeInitAtoms(gmx_pme_t*               pme,
                    StatePropagatorDataGpu*  stateGpu,
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp

index 9747d0376e968d29349dae7b4d6333740d3e3c6a..661f0fa4bb8c448d10432cdbf4dc17c204c8dc7d 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
@@ -108,7 +108,7 @@ static gmx_hw_info_t* hardwareInit()
  
  void PmeTestEnvironment::SetUp()
  {
-    hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(CodePath::CPU, "(CPU) ", nullptr));
+    hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(CodePath::CPU, "(CPU) "));
  
      hardwareInfo_ = hardwareInit();
      if (!pme_gpu_supports_build(nullptr) || !pme_gpu_supports_hardware(*hardwareInfo_, nullptr))
@@ -120,13 +120,15 @@ void PmeTestEnvironment::SetUp()
      for (int gpuIndex : getCompatibleGpus(hardwareInfo_->gpu_info))
      {
          const DeviceInformation* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
+        GMX_RELEASE_ASSERT(deviceInfo != nullptr,
+                           "Device information should be provided for the GPU builds.");
          init_gpu(deviceInfo);
  
          char stmp[200] = {};
          get_gpu_device_info_string(stmp, hardwareInfo_->gpu_info, gpuIndex);
          std::string description = "(GPU " + std::string(stmp) + ") ";
          hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(
-                CodePath::GPU, description.c_str(), deviceInfo));
+                CodePath::GPU, description.c_str(), *deviceInfo));
      }
  }
  
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h

index 9846cbbb0713ab93184a3eaf4b105fd14eb2e270..03df38671cbeca40c09a0b0fedfb4b42588b7e33 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.h
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.h
@@ -49,6 +49,7 @@
  #include <gtest/gtest.h>
  
  #include "gromacs/ewald/pme_gpu_program.h"
+#include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/hardware/gpu_hw_info.h"
  #include "gromacs/utility/gmxassert.h"
  
@@ -80,6 +81,8 @@ struct TestHardwareContext
      std::string description_;
      //! Device information pointer
      const DeviceInformation* deviceInfo_;
+    //! Local copy of the device context pointer
+    DeviceContext deviceContext_;
      //! Persistent compiled GPU kernels for PME.
      PmeGpuProgramStorage program_;
  
@@ -88,20 +91,32 @@ public:
      CodePath getCodePath() const { return codePath_; }
      //! Returns a human-readable context description line
      std::string getDescription() const { return description_; }
+    //! Getter for the DeviceContext
+    const DeviceContext& deviceContext() const { return deviceContext_; }
      //! Returns the device info pointer
      const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
      //! Returns the persistent PME GPU kernels
      const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
-    //! Constructs the context
-    TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation* deviceInfo) :
+    //! Constructs the context for CPU builds
+    TestHardwareContext(CodePath codePath, const char* description) :
+        codePath_(codePath),
+        description_(description)
+    {
+        GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
+                           "A GPU code path should provide DeviceInformation to the "
+                           "TestHerdwareContext constructor.");
+    }
+    //! Constructs the context for GPU builds
+    TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
          codePath_(codePath),
          description_(description),
-        deviceInfo_(deviceInfo)
+        deviceInfo_(&deviceInfo),
+        deviceContext_(deviceInfo),
+        program_(buildPmeGpuProgram(deviceInfo, deviceContext_))
      {
-        if (codePath == CodePath::GPU)
-        {
-            program_ = buildPmeGpuProgram(deviceInfo_);
-        }
+        GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
+                           "TestHerdwareContext tries to construct DeviceContext and PmeGpuProgram "
+                           "in CPU build.");
      }
      ~TestHardwareContext();
  };
diff --git a/src/gromacs/gpu_utils/device_context.h b/src/gromacs/gpu_utils/device_context.h

index d192b5543fd90967595f6597e46ad2c4b52f9171..84fc076708966df6df766102f3782f3c59fae0b7 100644 (file)
--- a/src/gromacs/gpu_utils/device_context.h
+++ b/src/gromacs/gpu_utils/device_context.h
@@ -61,18 +61,10 @@ struct DeviceInformation;
  class DeviceContext
  {
  public:
-    //! Default constructor. In OpenCL leaves context \c nullptr.
+    //! Default constructor.
      DeviceContext() {}
-    /*! \brief Second stage of construction. Creates the \c cl_context in OpenCL, does nothing in CUDA.
-     *
-     * \param[in] deviceInfo Platform-specific device information.
-     */
-    void init(const DeviceInformation& /*deviceInfo*/) {}
-    /*! \brief Construct the object and call \c init(...) .
-     *
-     * \param[in] deviceInfo Platform-specific device information.
-     */
-    DeviceContext(const DeviceInformation& deviceInfo) { init(deviceInfo); }
+    //! Constructor.
+    DeviceContext(const DeviceInformation& /* deviceInfo */) {}
      //! Destructor
      ~DeviceContext() = default;
  
diff --git a/src/gromacs/gpu_utils/device_context_ocl.cpp b/src/gromacs/gpu_utils/device_context_ocl.cpp

index 6f86b1744488ca623d64a6cb9f712e13a87269e2..1cd66239030b69b361d190a4dace27eafb9f5e38 100644 (file)
--- a/src/gromacs/gpu_utils/device_context_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_context_ocl.cpp
@@ -60,12 +60,7 @@
  #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
  /**@}*/
  
-DeviceContext::DeviceContext()
-{
-    context_ = nullptr;
-}
-
-void DeviceContext::init(const DeviceInformation& deviceInfo)
+DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
  {
      cl_platform_id                     platformId = deviceInfo.oclPlatformId;
      cl_device_id                       deviceId   = deviceInfo.oclDeviceId;
@@ -92,11 +87,6 @@ void DeviceContext::init(const DeviceInformation& deviceInfo)
      }
  }
  
-DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
-{
-    init(deviceInfo);
-}
-
  DeviceContext::~DeviceContext()
  {
      cl_int clError;
diff --git a/src/gromacs/gpu_utils/device_context_ocl.h b/src/gromacs/gpu_utils/device_context_ocl.h

index 70e3927cf80f0ba729170fcf278acb725167c5bc..a9b84b2f8e2c1867c89eed5a619da9e1ee8829c7 100644 (file)
--- a/src/gromacs/gpu_utils/device_context_ocl.h
+++ b/src/gromacs/gpu_utils/device_context_ocl.h
@@ -57,16 +57,9 @@ struct DeviceInformation;
  class DeviceContext
  {
  public:
-    //! Default constructor. Sets \c context_ to \c nullptr.
-    DeviceContext();
-    /*! \brief Second stage of construction. Creates the \c cl_context.
-     *
-     * \param[in] deviceInfo Platform-specific device information.
-     *
-     * \throws InternalError if context creation failed.
-     */
-    void init(const DeviceInformation& deviceInfo);
-    /*! \brief Construct the object and call \c init(...) .
+    //! Default constructor.
+    DeviceContext() {}
+    /*! \brief Constructor that creates the \c cl_context
       *
       * \param[in] deviceInfo Platform-specific device information.
       *
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h

index 230b3ff94e1b0b43f56faf92a5c2cde897272671..90f5b04bfe88888664627eb33f7b30ed35421aa2 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -64,8 +64,11 @@ enum class GpuApiCallBehavior;
   */
  struct gmx_device_runtime_data_t
  {
+    //! Constructor
+    gmx_device_runtime_data_t(const DeviceContext& deviceContext) : deviceContext_(deviceContext) {}
+
      //! OpenCL context
-    DeviceContext deviceContext;
+    const DeviceContext& deviceContext_;
      //! OpenCL program
      cl_program program;
  };
diff --git a/src/gromacs/listed_forces/gpubonded.h b/src/gromacs/listed_forces/gpubonded.h

index 1a231d2c2cc83289379bed789dd9a0247e1cddfa..b1c69d45721fd00ec7aa8b67e45578e93a479fb9 100644 (file)
--- a/src/gromacs/listed_forces/gpubonded.h
+++ b/src/gromacs/listed_forces/gpubonded.h
@@ -55,6 +55,7 @@
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/classhelpers.h"
  
+class DeviceContext;
  struct gmx_enerdata_t;
  struct gmx_ffparams_t;
  struct gmx_mtop_t;
@@ -106,7 +107,10 @@ class GpuBonded
  {
  public:
      //! Construct the manager with constant data and the stream to use.
-    GpuBonded(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle);
+    GpuBonded(const gmx_ffparams_t& ffparams,
+              const DeviceContext&  deviceContext,
+              void*                 streamPtr,
+              gmx_wallcycle*        wcycle);
      //! Destructor
      ~GpuBonded();
  
diff --git a/src/gromacs/listed_forces/gpubonded_impl.cpp b/src/gromacs/listed_forces/gpubonded_impl.cpp

index 94a2b5d42b26c6a7428f6e9353d5e702e75a5584..f24103229be93815e61855a81288dfa7c8572cd4 100644 (file)
--- a/src/gromacs/listed_forces/gpubonded_impl.cpp
+++ b/src/gromacs/listed_forces/gpubonded_impl.cpp
@@ -160,7 +160,10 @@ class GpuBonded::Impl
  {
  };
  
-GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */, void* /*streamPtr */, gmx_wallcycle* /* wcycle */) :
+GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */,
+                     const DeviceContext& /* deviceContext */,
+                     void* /*streamPtr */,
+                     gmx_wallcycle* /* wcycle */) :
      impl_(nullptr)
  {
  }
diff --git a/src/gromacs/listed_forces/gpubonded_impl.cu b/src/gromacs/listed_forces/gpubonded_impl.cu

index ff7092f40cd1719b0a4ffd4cb8f49bd875a417a9..763550c5c9e52d891cf574a92edb09c1bfc92b59 100644 (file)
--- a/src/gromacs/listed_forces/gpubonded_impl.cu
+++ b/src/gromacs/listed_forces/gpubonded_impl.cu
@@ -50,6 +50,7 @@
  
  #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
  #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/devicebuffer.h"
  #include "gromacs/gpu_utils/typecasts.cuh"
  #include "gromacs/mdtypes/enerdata.h"
@@ -63,7 +64,11 @@ namespace gmx
  
  // ---- GpuBonded::Impl
  
-GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle)
+GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams,
+                      const DeviceContext&  deviceContext,
+                      void*                 streamPtr,
+                      gmx_wallcycle*        wcycle) :
+    deviceContext_(deviceContext)
  {
      stream_ = *static_cast<CommandStream*>(streamPtr);
      wcycle_ = wcycle;
@@ -306,8 +311,11 @@ void GpuBonded::Impl::clearEnergies()
  
  // ---- GpuBonded
  
-GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle) :
-    impl_(new Impl(ffparams, streamPtr, wcycle))
+GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams,
+                     const DeviceContext&  deviceContext,
+                     void*                 streamPtr,
+                     gmx_wallcycle*        wcycle) :
+    impl_(new Impl(ffparams, deviceContext, streamPtr, wcycle))
  {
  }
  
diff --git a/src/gromacs/listed_forces/gpubonded_impl.h b/src/gromacs/listed_forces/gpubonded_impl.h

index 0532b40315a0b893c6ec4457d5a4714690f7cac6..a0da918893fa0340fc2b78bc81967c80e528b20f 100644 (file)
--- a/src/gromacs/listed_forces/gpubonded_impl.h
+++ b/src/gromacs/listed_forces/gpubonded_impl.h
@@ -126,7 +126,7 @@ class GpuBonded::Impl
  {
  public:
      //! Constructor
-    Impl(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle);
+    Impl(const gmx_ffparams_t& ffparams, const DeviceContext& deviceContext, void* streamPtr, gmx_wallcycle* wcycle);
      /*! \brief Destructor, non-default needed for freeing
       * device-side buffers */
      ~Impl();
@@ -180,8 +180,8 @@ private:
      //! \brief Device-side total virial
      float* d_vTot_ = nullptr;
  
-    //! Dummy GPU context object
-    const DeviceContext deviceContext_;
+    //! GPU context object
+    const DeviceContext& deviceContext_;
      //! \brief Bonded GPU stream, not owned by this module
      CommandStream stream_;
  
diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h

index c7f38cb5f5b800a11bce29de1ef13dfe5c5b58bc..fea69c594f03732d140012eff569eb3393ffb5b2 100644 (file)
--- a/src/gromacs/mdlib/forcerec.h
+++ b/src/gromacs/mdlib/forcerec.h
@@ -42,7 +42,6 @@
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/arrayref.h"
  
-struct DeviceInformation;
  struct gmx_hw_info_t;
  struct t_commrec;
  struct t_fcdata;
diff --git a/src/gromacs/mdlib/leapfrog_gpu.cuh b/src/gromacs/mdlib/leapfrog_gpu.cuh

index 98703c05b94121b8dcaaff96910d6e8b0451a909..26a6fc7399a4c373dcacbcad524f2198d8e91150 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu.cuh
+++ b/src/gromacs/mdlib/leapfrog_gpu.cuh
@@ -112,7 +112,7 @@ public:
      class Impl;
  
  private:
-    //! Dummy GPU context object
+    //! GPU context object
      const DeviceContext& deviceContext_;
      //! GPU stream
      CommandStream commandStream_;
diff --git a/src/gromacs/mdlib/lincs_gpu.cuh b/src/gromacs/mdlib/lincs_gpu.cuh

index 77423dc3231fee737151501664f2ef9f40d7dfe9..4817573b8098806b24e8a1216ef441aaa8f01c81 100644 (file)
--- a/src/gromacs/mdlib/lincs_gpu.cuh
+++ b/src/gromacs/mdlib/lincs_gpu.cuh
@@ -169,7 +169,7 @@ public:
      static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
  
  private:
-    //! Dummy GPU context object
+    //! GPU context object
      const DeviceContext& deviceContext_;
      //! GPU stream
      CommandStream commandStream_;
diff --git a/src/gromacs/mdlib/settle_gpu.cuh b/src/gromacs/mdlib/settle_gpu.cuh

index f07af017e33fdaba6b53382465b6da76d340a7a3..da8bafd8dfbf45ae0dcbcb13874525fa483da6a4 100644 (file)
--- a/src/gromacs/mdlib/settle_gpu.cuh
+++ b/src/gromacs/mdlib/settle_gpu.cuh
@@ -252,7 +252,7 @@ public:
      void set(const InteractionDefinitions& idef, const t_mdatoms& md);
  
  private:
-    //! Dummy GPU context object
+    //! GPU context object
      const DeviceContext& deviceContext_;
      //! GPU stream
      CommandStream commandStream_;
diff --git a/src/gromacs/mdlib/update_constrain_gpu.h b/src/gromacs/mdlib/update_constrain_gpu.h

index 09f0bbecc125216b361ffda8e210be14950967ff..61f8537efa24bc052851f016b86f8583bfb0a087 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu.h
+++ b/src/gromacs/mdlib/update_constrain_gpu.h
@@ -49,6 +49,7 @@
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/classhelpers.h"
  
+class DeviceContext;
  class GpuEventSynchronizer;
  
  struct gmx_mtop_t;
@@ -77,11 +78,13 @@ public:
       *                              projection from it.
       * \param[in] mtop              Topology of the system: SETTLE gets the masses for O and H atoms
       *                              and target O-H and H-H distances from this object.
+     * \param[in] deviceContext     GPU device context.
       * \param[in] commandStream     GPU stream to use. Can be nullptr.
       * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done on the GPU.
       */
      UpdateConstrainGpu(const t_inputrec&     ir,
                         const gmx_mtop_t&     mtop,
+                       const DeviceContext&  deviceContext,
                         const void*           commandStream,
                         GpuEventSynchronizer* xUpdatedOnDevice);
  
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp

index 3e10f8a403aba73d38fcf16f61aca848ecc43080..45a0743384e36e00108f9b4ef4fe5fe7106789df 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp
@@ -57,6 +57,7 @@ class UpdateConstrainGpu::Impl
  
  UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir   */,
                                         const gmx_mtop_t& /* mtop */,
+                                       const DeviceContext& /* deviceContext */,
                                         const void* /* commandStream */,
                                         GpuEventSynchronizer* /* xUpdatedOnDevice */) :
      impl_(nullptr)
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu

index a8e5a94cc6d87bbf649d84743379744cf47ea860..41f75723324aca24e314dade2f70eae39756a43c 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu
@@ -57,6 +57,7 @@
  #include <algorithm>
  
  #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/devicebuffer.h"
  #include "gromacs/gpu_utils/gputraits.cuh"
  #include "gromacs/gpu_utils/vectype_ops.cuh"
@@ -166,8 +167,10 @@ void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
  
  UpdateConstrainGpu::Impl::Impl(const t_inputrec&     ir,
                                 const gmx_mtop_t&     mtop,
+                               const DeviceContext&  deviceContext,
                                 const void*           commandStream,
                                 GpuEventSynchronizer* xUpdatedOnDevice) :
+    deviceContext_(deviceContext),
      coordinatesReady_(xUpdatedOnDevice)
  {
      GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
@@ -231,9 +234,10 @@ GpuEventSynchronizer* UpdateConstrainGpu::Impl::getCoordinatesReadySync()
  
  UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec&     ir,
                                         const gmx_mtop_t&     mtop,
+                                       const DeviceContext&  deviceContext,
                                         const void*           commandStream,
                                         GpuEventSynchronizer* xUpdatedOnDevice) :
-    impl_(new Impl(ir, mtop, commandStream, xUpdatedOnDevice))
+    impl_(new Impl(ir, mtop, deviceContext, commandStream, xUpdatedOnDevice))
  {
  }
  
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h

index 75b6814de0066627d4d1ed9e1d7d24ce95f2f9fc..dd46010e93015920529d7100fce04579ddcca594 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.h
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h
@@ -75,10 +75,15 @@ public:
       *                              projection from it.
       * \param[in] mtop              Topology of the system: SETTLE gets the masses for O and H atoms
       *                              and target O-H and H-H distances from this object.
+     * \param[in] deviceContext     GPU device context.
       * \param[in] commandStream     GPU stream to use. Can be nullptr.
       * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done on the GPU.
       */
-    Impl(const t_inputrec& ir, const gmx_mtop_t& mtop, const void* commandStream, GpuEventSynchronizer* xUpdatedOnDevice);
+    Impl(const t_inputrec&     ir,
+         const gmx_mtop_t&     mtop,
+         const DeviceContext&  deviceContext,
+         const void*           commandStream,
+         GpuEventSynchronizer* xUpdatedOnDevice);
  
      ~Impl();
  
@@ -163,8 +168,8 @@ public:
      static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
  
  private:
-    //! Dummy GPU context object
-    const DeviceContext deviceContext_;
+    //! GPU context object
+    const DeviceContext& deviceContext_;
      //! GPU stream
      CommandStream commandStream_ = nullptr;
      //! GPU kernel launch config
diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp

index 2cb1388dcabe0b8a1175e9dff2b611f5e155a720..941a7030c9eca738ea4dee3201bbefd4c0628db7 100644 (file)
--- a/src/gromacs/mdrun/md.cpp
+++ b/src/gromacs/mdrun/md.cpp
@@ -400,8 +400,13 @@ void gmx::LegacySimulator::do_md()
          {
              GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
          }
-        integrator = std::make_unique<UpdateConstrainGpu>(
-                *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
+
+        GMX_RELEASE_ASSERT(fr->deviceContext != nullptr,
+                           "GPU device context should be initialized to use GPU update.");
+
+        integrator = std::make_unique<UpdateConstrainGpu>(*ir, *top_global, *fr->deviceContext,
+                                                          stateGpu->getUpdateStream(),
+                                                          stateGpu->xUpdatedOnDevice());
  
          integrator->setPbc(PbcType::Xyz, state->box);
      }
@@ -866,7 +871,10 @@ void gmx::LegacySimulator::do_md()
                              Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
                      void* streamNonLocal = Nbnxm::gpu_get_command_stream(
                              fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
-                    constructGpuHaloExchange(mdlog, *cr, streamLocal, streamNonLocal);
+                    GMX_RELEASE_ASSERT(
+                            fr->deviceContext != nullptr,
+                            "GPU device context should be initialized to use GPU halo exchange.");
+                    constructGpuHaloExchange(mdlog, *cr, *fr->deviceContext, streamLocal, streamNonLocal);
                  }
              }
          }
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index b233b0737c9540c019f12b869fa4d906a2333ca4..081501bfff528aef8447c9dc2ec0799915b200e1 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -73,6 +73,7 @@
  #include "gromacs/fileio/tpxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/hardware/cpuinfo.h"
  #include "gromacs/hardware/detecthardware.h"
@@ -1140,9 +1141,20 @@ int Mdrunner::mdrunner()
              EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
  
      // Get the device handles for the modules, nullptr when no task is assigned.
+    // TODO: There should be only one DeviceInformation.
      DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
      DeviceInformation* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
  
+    std::unique_ptr<DeviceContext> deviceContext = nullptr;
+    if (pmeDeviceInfo)
+    {
+        deviceContext = std::make_unique<DeviceContext>(*pmeDeviceInfo);
+    }
+    else if (nonbondedDeviceInfo)
+    {
+        deviceContext = std::make_unique<DeviceContext>(*nonbondedDeviceInfo);
+    }
+
      // TODO Initialize GPU streams here.
  
      // TODO Currently this is always built, yet DD partition code
@@ -1338,13 +1350,19 @@ int Mdrunner::mdrunner()
                        opt2fn("-tablep", filenames.size(), filenames.data()),
                        opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
  
+        fr->deviceContext = deviceContext.get();
+
          if (devFlags.enableGpuPmePPComm && !thisRankHasDuty(cr, DUTY_PME))
          {
-            fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(cr->mpi_comm_mysim, cr->dd->pme_nodeid);
+            GMX_RELEASE_ASSERT(
+                    deviceContext != nullptr,
+                    "Device context can not be nullptr when PME-PP direct communications object.");
+            fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
+                    cr->mpi_comm_mysim, cr->dd->pme_nodeid, *deviceContext);
          }
  
          fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec, fr, cr, *hwinfo, nonbondedDeviceInfo,
-                                        &mtop, box, wcycle);
+                                        fr->deviceContext, &mtop, box, wcycle);
          if (useGpuForBonded)
          {
              auto stream = havePPDomainDecomposition(cr)
@@ -1352,7 +1370,10 @@ int Mdrunner::mdrunner()
                                              fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal)
                                    : Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv,
                                                                    gmx::InteractionLocality::Local);
-            gpuBonded     = std::make_unique<GpuBonded>(mtop.ffparams, stream, wcycle);
+            GMX_RELEASE_ASSERT(
+                    fr->deviceContext != nullptr,
+                    "Device context can not be nullptr when computing bonded interactions on GPU.");
+            gpuBonded = std::make_unique<GpuBonded>(mtop.ffparams, *fr->deviceContext, stream, wcycle);
              fr->gpuBonded = gpuBonded.get();
          }
  
@@ -1428,7 +1449,13 @@ int Mdrunner::mdrunner()
      PmeGpuProgramStorage pmeGpuProgram;
      if (thisRankHasPmeGpuTask)
      {
-        pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
+        GMX_RELEASE_ASSERT(
+                pmeDeviceInfo != nullptr,
+                "Device information can not be nullptr when building PME GPU program object.");
+        GMX_RELEASE_ASSERT(
+                deviceContext != nullptr,
+                "Device context can not be nullptr when building PME GPU program object.");
+        pmeGpuProgram = buildPmeGpuProgram(*pmeDeviceInfo, *deviceContext);
      }
  
      /* Initiate PME if necessary,
@@ -1566,14 +1593,16 @@ int Mdrunner::mdrunner()
                      fr->nbv->gpu_nbv != nullptr
                              ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal)
                              : nullptr;
-            const DeviceContext& deviceContext = *pme_gpu_get_device_context(fr->pmedata);
-            const int            paddingSize   = pme_gpu_get_padding_size(fr->pmedata);
+            const int          paddingSize = pme_gpu_get_padding_size(fr->pmedata);
              GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
                                                        ? GpuApiCallBehavior::Async
                                                        : GpuApiCallBehavior::Sync;
-
+            GMX_RELEASE_ASSERT(
+                    deviceContext != nullptr,
+                    "Device context can not be nullptr when building GPU propagator data object.");
              stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                    pmeStream, localStream, nonLocalStream, deviceContext, transferKind, paddingSize, wcycle);
+                    pmeStream, localStream, nonLocalStream, *deviceContext, transferKind,
+                    paddingSize, wcycle);
              fr->stateGpu = stateGpu.get();
          }
  
@@ -1608,7 +1637,8 @@ int Mdrunner::mdrunner()
          GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
          /* do PME only */
          walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
+        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode,
+                    deviceContext.get());
      }
  
      wallcycle_stop(wcycle, ewcRUN);
@@ -1670,6 +1700,7 @@ int Mdrunner::mdrunner()
  
      free_gpu(nonbondedDeviceInfo);
      free_gpu(pmeDeviceInfo);
+    deviceContext.reset(nullptr);
      sfree(fcd);
  
      if (doMembed)
diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h

index 8c4f5d2f01ffcf25ab0a6f5a99f0606942cdda54..d53b5e571cf6a9c5d2e8e9299d33daf91471c940 100644 (file)
--- a/src/gromacs/mdtypes/forcerec.h
+++ b/src/gromacs/mdtypes/forcerec.h
@@ -52,6 +52,7 @@
  struct gmx_pme_t;
  struct nonbonded_verlet_t;
  struct bonded_threading_t;
+class DeviceContext;
  class DispersionCorrection;
  struct t_forcetable;
  struct t_QMMMrec;
@@ -289,6 +290,9 @@ struct t_forcerec
      //       general StatePropagatorData object that is passed around
      gmx::StatePropagatorDataGpu* stateGpu = nullptr;
  
+    //! GPU device context
+    DeviceContext* deviceContext = nullptr;
+
      /* For PME-PP GPU communication */
      std::unique_ptr<gmx::PmePpCommGpu> pmePpCommGpu;
  };
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 7467f95b69596f4f0cb63e19195ad510fb43a6a8..666aefc62910b21babf656313f8bd17af9c25385 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -413,7 +413,8 @@ static void cuda_init_const(NbnxmGpu*                       nb,
      nbnxn_cuda_clear_e_fshift(nb);
  }
  
-NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
+                   const DeviceContext& /* deviceContext */,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h

index 9eac3f7c78977948d58a545e589c5f413dbd5b7c..822852786c80e0417c320b84b6b76d41aab46a0c 100644 (file)
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -50,6 +50,8 @@
  #include "gromacs/gpu_utils/gpu_macros.h"
  #include "gromacs/mdtypes/locality.h"
  
+class DeviceContext;
+
  struct NbnxmGpu;
  struct gmx_gpu_info_t;
  struct DeviceInformation;
@@ -65,6 +67,7 @@ namespace Nbnxm
  /** Initializes the data structures related to GPU nonbonded calculations. */
  GPU_FUNC_QUALIFIER
  NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
+                   const DeviceContext gmx_unused& deviceContext,
                     const interaction_const_t gmx_unused* ic,
                     const PairlistParams gmx_unused& listParams,
                     const nbnxn_atomdata_t gmx_unused* nbat,
diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h

index 2fa353a8486a5aaa8f8081dd9969f373eb7e2abf..a15f646ed98eb38925e8c4671397701965d63700 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.h
+++ b/src/gromacs/nbnxm/nbnxm.h
@@ -120,6 +120,7 @@
  #include "gromacs/utility/enumerationhelpers.h"
  #include "gromacs/utility/real.h"
  
+class DeviceContext;
  struct DeviceInformation;
  struct gmx_domdec_zones_t;
  struct gmx_enerdata_t;
@@ -409,6 +410,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                                                     const t_commrec*         cr,
                                                     const gmx_hw_info_t&     hardwareInfo,
                                                     const DeviceInformation* deviceInfo,
+                                                   const DeviceContext*     deviceContext,
                                                     const gmx_mtop_t*        mtop,
                                                     matrix                   box,
                                                     gmx_wallcycle*           wcycle);
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp

index 58fee75e6a831e5125475b573f19900e0f701f78..f7c7f6dd16ac75b964263a5e3f1a60038627e787 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -364,6 +364,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                                                     const t_commrec*         cr,
                                                     const gmx_hw_info_t&     hardwareInfo,
                                                     const DeviceInformation* deviceInfo,
+                                                   const DeviceContext*     deviceContext,
                                                     const gmx_mtop_t*        mtop,
                                                     matrix                   box,
                                                     gmx_wallcycle*           wcycle)
@@ -445,9 +446,13 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
      int       minimumIlistCountForGpuBalancing = 0;
      if (useGpu)
      {
+        GMX_RELEASE_ASSERT(
+                deviceContext != nullptr,
+                "Device context can not be nullptr when to use GPU for non-bonded forces.");
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
-        gpu_nbv = gpu_init(deviceInfo, fr->ic, pairlistParams, nbat.get(), cr->nodeid, haveMultipleDomains);
+        gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
+                           cr->nodeid, haveMultipleDomains);
  
          minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
      }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index 5b5911941d86f1d7bd8467b5f823a81f9f35e401..eb1234d5122a0ad1abc040d003a7bc9a58c93249 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -138,7 +138,7 @@ static void init_ewald_coulomb_force_table(const EwaldCorrectionTables&     tabl
         CL_MEM_COPY_HOST_PTR, &array_format, tabsize, 1, 0, ftmp, &cl_error);
       */
  
-    coul_tab = clCreateBuffer(runData->deviceContext.context(),
+    coul_tab = clCreateBuffer(runData->deviceContext_.context(),
                                CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                                tables.tableF.size() * sizeof(cl_float),
                                const_cast<real*>(tables.tableF.data()), &cl_error);
@@ -160,23 +160,23 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, gmx_device_runtim
      ad->ntypes = ntypes;
  
      ad->shift_vec =
-            clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
+            clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                             SHIFTS * sizeof(nbnxn_atomdata_t::shift_vec[0]), nullptr, &cl_error);
      GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                         ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
      ad->bShiftVecUploaded = CL_FALSE;
  
-    ad->fshift = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+    ad->fshift = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                  SHIFTS * sizeof(nb_staging_t::fshift[0]), nullptr, &cl_error);
      GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                         ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
-    ad->e_lj = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+    ad->e_lj = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                sizeof(float), nullptr, &cl_error);
      GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                         ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
-    ad->e_el = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+    ad->e_el = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                sizeof(float), nullptr, &cl_error);
      GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                         ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
@@ -336,7 +336,7 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
             CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);
           */
  
-        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY,
+        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY,
                                                    sizeof(cl_float), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
@@ -354,12 +354,12 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
             array_format.image_channel_data_type = CL_FLOAT;
             array_format.image_channel_order     = CL_R;
  
-           nbp->nbfp_climg2d = clCreateImage2D(runData->deviceContext.context(), CL_MEM_READ_ONLY |
+           nbp->nbfp_climg2d = clCreateImage2D(runData->deviceContext_.context(), CL_MEM_READ_ONLY |
             CL_MEM_COPY_HOST_PTR, &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
           */
  
          nbp->nbfp_climg2d = clCreateBuffer(
-                runData->deviceContext.context(),
+                runData->deviceContext_.context(),
                  CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                  nnbfp * sizeof(cl_float), const_cast<float*>(nbatParams.nbfp.data()), &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -372,7 +372,7 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
              /*  nbp->nbfp_comb_climg2d = clCreateImage2D(runData->deviceContext.context(), CL_MEM_READ_WRITE |
                 CL_MEM_COPY_HOST_PTR, &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
              nbp->nbfp_comb_climg2d =
-                    clCreateBuffer(runData->deviceContext.context(),
+                    clCreateBuffer(runData->deviceContext_.context(),
                                     CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                                     nnbfp_comb * sizeof(cl_float),
                                     const_cast<float*>(nbatParams.nbfp_comb.data()), &cl_error);
@@ -388,7 +388,7 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
              // TODO: decide which alternative is most efficient - textures or buffers.
              /* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->deviceContext.context(),
                 CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);*/
-            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY,
+            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY,
                                                      sizeof(cl_float), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                                 ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
@@ -556,6 +556,7 @@ static void nbnxn_ocl_init_const(NbnxmGpu*                       nb,
  
  //! This function is documented in the header file
  NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
+                   const DeviceContext&       deviceContext,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
@@ -583,7 +584,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
  
      /* set device info, just point it to the right GPU among the detected ones */
      nb->deviceInfo  = deviceInfo;
-    nb->dev_rundata = new gmx_device_runtime_data_t();
+    nb->dev_rundata = new gmx_device_runtime_data_t(deviceContext);
  
      /* init nbst */
      pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
@@ -605,11 +606,9 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
          queue_properties = 0;
      }
  
-    nb->dev_rundata->deviceContext.init(*deviceInfo);
-
      /* local/non-local GPU streams */
      nb->stream[InteractionLocality::Local] =
-            clCreateCommandQueue(nb->dev_rundata->deviceContext.context(),
+            clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
                                   nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
      if (CL_SUCCESS != cl_error)
      {
@@ -622,7 +621,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
          init_plist(nb->plist[InteractionLocality::NonLocal]);
  
          nb->stream[InteractionLocality::NonLocal] =
-                clCreateCommandQueue(nb->dev_rundata->deviceContext.context(),
+                clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
                                       nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
          if (CL_SUCCESS != cl_error)
          {
@@ -736,7 +735,7 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
      }
  
      // TODO most of this function is same in CUDA and OpenCL, move into the header
-    const DeviceContext& deviceContext = nb->dev_rundata->deviceContext;
+    const DeviceContext& deviceContext = nb->dev_rundata->deviceContext_;
  
      reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
                             deviceContext);
@@ -815,13 +814,13 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
              freeDeviceBuffer(&d_atdat->atom_types);
          }
  
-        d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+        d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                      CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                      nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
-        d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+        d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                       CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                       nalloc * sizeof(cl_float4), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -829,7 +828,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  
          if (useLjCombRule(nb->nbparam->vdwtype))
          {
-            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                                CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                nalloc * sizeof(cl_float2), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -837,7 +836,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
          }
          else
          {
-            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                                   CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                   nalloc * sizeof(int), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index 0ba3345780533b3f7d5881fc81c814e995324b6e..9c1c759880d0cca050b09d69f28192cbc8eb6983 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -200,10 +200,10 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
          {
              /* TODO when we have a proper MPI-aware logging module,
                 the log output here should be written there */
-            program =
-                    gmx::ocl::compileProgram(stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl",
-                                             extraDefines, nb->dev_rundata->deviceContext.context(),
-                                             nb->deviceInfo->oclDeviceId, nb->deviceInfo->deviceVendor);
+            program = gmx::ocl::compileProgram(
+                    stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
+                    nb->dev_rundata->deviceContext_.context(), nb->deviceInfo->oclDeviceId,
+                    nb->deviceInfo->deviceVendor);
          }
          catch (gmx::GromacsException& e)
          {
author	Artem Zhmurov <zhmurov@gmail.com>
	Sat, 1 Feb 2020 15:40:13 +0000 (16:40 +0100)
committer	Christian Blau <cblau@gerrit.gromacs.org>
	Wed, 11 Mar 2020 14:59:29 +0000 (15:59 +0100)
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/domdec/domdec.h		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange.h		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange_impl.cpp		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange_impl.cu		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange_impl.cuh		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl.cu		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_only.cpp		patch \| blob \| history
src/gromacs/ewald/pme_only.h		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu.h		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.cu		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.h		patch \| blob \| history
src/gromacs/ewald/tests/pmegathertest.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmesplinespreadtest.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.h		patch \| blob \| history
src/gromacs/ewald/tests/testhardwarecontexts.cpp		patch \| blob \| history
src/gromacs/ewald/tests/testhardwarecontexts.h		patch \| blob \| history
src/gromacs/gpu_utils/device_context.h		patch \| blob \| history
src/gromacs/gpu_utils/device_context_ocl.cpp		patch \| blob \| history
src/gromacs/gpu_utils/device_context_ocl.h		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.h		patch \| blob \| history
src/gromacs/listed_forces/gpubonded.h		patch \| blob \| history
src/gromacs/listed_forces/gpubonded_impl.cpp		patch \| blob \| history
src/gromacs/listed_forces/gpubonded_impl.cu		patch \| blob \| history
src/gromacs/listed_forces/gpubonded_impl.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.h		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu.cuh		patch \| blob \| history
src/gromacs/mdlib/lincs_gpu.cuh		patch \| blob \| history
src/gromacs/mdlib/settle_gpu.cuh		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu.h		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cu		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.h		patch \| blob \| history
src/gromacs/mdrun/md.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdtypes/forcerec.h		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp		patch \| blob \| history