Take over management of OpenCL context from PME and NBNXM
authorArtem Zhmurov <zhmurov@gmail.com>
Sat, 1 Feb 2020 15:40:13 +0000 (16:40 +0100)
committerChristian Blau <cblau@gerrit.gromacs.org>
Wed, 11 Mar 2020 14:59:29 +0000 (15:59 +0100)
This patch set creates the DeviceContext in runner and passes it to the
consumers (PME and NBNXM). This removes unnessesary management code
duplication, makes the device buffers in two modules compatible.

Fixes #2522
Fixes #3315
Refs #3311

Change-Id: I10358cfaced5b5c7dbdddf95679c9a9703f3a2c0

53 files changed:
src/gromacs/domdec/domdec.cpp
src/gromacs/domdec/domdec.h
src/gromacs/domdec/gpuhaloexchange.h
src/gromacs/domdec/gpuhaloexchange_impl.cpp
src/gromacs/domdec/gpuhaloexchange_impl.cu
src/gromacs/domdec/gpuhaloexchange_impl.cuh
src/gromacs/ewald/pme.h
src/gromacs/ewald/pme_gpu.cpp
src/gromacs/ewald/pme_gpu_internal.cpp
src/gromacs/ewald/pme_gpu_internal.h
src/gromacs/ewald/pme_gpu_program.cpp
src/gromacs/ewald/pme_gpu_program.h
src/gromacs/ewald/pme_gpu_program_impl.cpp
src/gromacs/ewald/pme_gpu_program_impl.cu
src/gromacs/ewald/pme_gpu_program_impl.h
src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
src/gromacs/ewald/pme_only.cpp
src/gromacs/ewald/pme_only.h
src/gromacs/ewald/pme_pp_comm_gpu.h
src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp
src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
src/gromacs/ewald/pme_pp_comm_gpu_impl.h
src/gromacs/ewald/tests/pmegathertest.cpp
src/gromacs/ewald/tests/pmesplinespreadtest.cpp
src/gromacs/ewald/tests/pmetestcommon.cpp
src/gromacs/ewald/tests/pmetestcommon.h
src/gromacs/ewald/tests/testhardwarecontexts.cpp
src/gromacs/ewald/tests/testhardwarecontexts.h
src/gromacs/gpu_utils/device_context.h
src/gromacs/gpu_utils/device_context_ocl.cpp
src/gromacs/gpu_utils/device_context_ocl.h
src/gromacs/gpu_utils/oclutils.h
src/gromacs/listed_forces/gpubonded.h
src/gromacs/listed_forces/gpubonded_impl.cpp
src/gromacs/listed_forces/gpubonded_impl.cu
src/gromacs/listed_forces/gpubonded_impl.h
src/gromacs/mdlib/forcerec.h
src/gromacs/mdlib/leapfrog_gpu.cuh
src/gromacs/mdlib/lincs_gpu.cuh
src/gromacs/mdlib/settle_gpu.cuh
src/gromacs/mdlib/update_constrain_gpu.h
src/gromacs/mdlib/update_constrain_gpu_impl.cpp
src/gromacs/mdlib/update_constrain_gpu_impl.cu
src/gromacs/mdlib/update_constrain_gpu_impl.h
src/gromacs/mdrun/md.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdtypes/forcerec.h
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
src/gromacs/nbnxm/gpu_data_mgmt.h
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index e42765cc8f6f81e1f815caf0c8649276d52b7ef2..e020a1405ba3dd9069e2c30d3e2f3be8e5344eef 100644 (file)
@@ -3200,7 +3200,11 @@ gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const g
     return bCutoffAllowed;
 }
 
-void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, void* streamLocal, void* streamNonLocal)
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+                              const t_commrec&     cr,
+                              const DeviceContext& deviceContext,
+                              void*                streamLocal,
+                              void*                streamNonLocal)
 {
 
     int gpuHaloExchangeSize = 0;
@@ -3224,7 +3228,7 @@ void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, v
         for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
         {
             cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
-                    cr.dd, cr.mpi_comm_mysim, streamLocal, streamNonLocal, pulse));
+                    cr.dd, cr.mpi_comm_mysim, deviceContext, streamLocal, streamNonLocal, pulse));
         }
     }
 }
index 51aba44e5cdad4647dced7a925539ea81297f063..0a7aa3202ef0c6eb57957a3540f95671bb96592a 100644 (file)
@@ -85,6 +85,7 @@ struct t_nrnb;
 struct gmx_wallcycle;
 enum class PbcType : int;
 class t_state;
+class DeviceContext;
 class GpuEventSynchronizer;
 
 namespace gmx
@@ -314,10 +315,15 @@ void dd_bonded_cg_distance(const gmx::MDLogger& mdlog,
 /*! \brief Construct the GPU halo exchange object(s)
  * \param[in] mdlog          The logger object
  * \param[in] cr             The commrec object
+ * \param[in] deviceContext  GPU device context
  * \param[in] streamLocal    The local GPU stream
  * \param[in] streamNonLocal The non-local GPU stream
  */
-void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, void* streamLocal, void* streamNonLocal);
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+                              const t_commrec&     cr,
+                              const DeviceContext& deviceContext,
+                              void*                streamLocal,
+                              void*                streamNonLocal);
 
 /*! \brief
  * (Re-) Initialization for GPU halo exchange
index dc65cb93d3e9d6fb66f0209e9208235898672cca..851e3d19833031fd2e669039ae1731eaa8dd4365 100644 (file)
@@ -49,6 +49,7 @@
 #include "gromacs/utility/gmxmpi.h"
 
 struct gmx_domdec_t;
+class DeviceContext;
 class GpuEventSynchronizer;
 
 namespace gmx
@@ -80,11 +81,17 @@ public:
      *
      * \param [inout] dd                       domdec structure
      * \param [in]    mpi_comm_mysim           communicator used for simulation
+     * \param [in]    deviceContext            GPU device context
      * \param [in]    streamLocal              local NB CUDA stream.
      * \param [in]    streamNonLocal           non-local NB CUDA stream.
      * \param [in]    pulse                    the communication pulse for this instance
      */
-    GpuHaloExchange(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* streamLocal, void* streamNonLocal, int pulse);
+    GpuHaloExchange(gmx_domdec_t*        dd,
+                    MPI_Comm             mpi_comm_mysim,
+                    const DeviceContext& deviceContext,
+                    void*                streamLocal,
+                    void*                streamNonLocal,
+                    int                  pulse);
     ~GpuHaloExchange();
 
     /*! \brief
index 1ce9a9d93e7490421c58feb8ef32b65a471692d3..c8ca5df8c21381c18dfcf7d86e97a9029fb920ff 100644 (file)
@@ -62,6 +62,7 @@ class GpuHaloExchange::Impl
 /*!\brief Constructor stub. */
 GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* /* dd */,
                                  MPI_Comm /* mpi_comm_mysim */,
+                                 const DeviceContext& /* deviceContext */,
                                  void* /*streamLocal */,
                                  void* /*streamNonLocal */,
                                  int /*pulse */) :
index 92a1d9f3d5d2d2235099eb0ae230e01331da19e0..4a44beb3e69945b3fc2517d38edbec9b656e716e 100644 (file)
@@ -54,6 +54,7 @@
 #include "gromacs/domdec/domdec_struct.h"
 #include "gromacs/domdec/gpuhaloexchange.h"
 #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/devicebuffer.h"
 #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
 #include "gromacs/gpu_utils/typecasts.cuh"
@@ -415,11 +416,12 @@ GpuEventSynchronizer* GpuHaloExchange::Impl::getForcesReadyOnDeviceEvent()
 }
 
 /*! \brief Create Domdec GPU object */
-GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd,
-                            MPI_Comm      mpi_comm_mysim,
-                            void*         localStream,
-                            void*         nonLocalStream,
-                            int           pulse) :
+GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
+                            MPI_Comm             mpi_comm_mysim,
+                            const DeviceContext& deviceContext,
+                            void*                localStream,
+                            void*                nonLocalStream,
+                            int                  pulse) :
     dd_(dd),
     sendRankX_(dd->neighbor[0][1]),
     recvRankX_(dd->neighbor[0][0]),
@@ -428,6 +430,7 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd,
     usePBC_(dd->ci[dd->dim[0]] == 0),
     haloDataTransferLaunched_(new GpuEventSynchronizer()),
     mpi_comm_mysim_(mpi_comm_mysim),
+    deviceContext_(deviceContext),
     localStream_(*static_cast<cudaStream_t*>(localStream)),
     nonLocalStream_(*static_cast<cudaStream_t*>(nonLocalStream)),
     pulse_(pulse)
@@ -460,12 +463,13 @@ GpuHaloExchange::Impl::~Impl()
     delete haloDataTransferLaunched_;
 }
 
-GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* dd,
-                                 MPI_Comm      mpi_comm_mysim,
-                                 void*         localStream,
-                                 void*         nonLocalStream,
-                                 int           pulse) :
-    impl_(new Impl(dd, mpi_comm_mysim, localStream, nonLocalStream, pulse))
+GpuHaloExchange::GpuHaloExchange(gmx_domdec_t*        dd,
+                                 MPI_Comm             mpi_comm_mysim,
+                                 const DeviceContext& deviceContext,
+                                 void*                localStream,
+                                 void*                nonLocalStream,
+                                 int                  pulse) :
+    impl_(new Impl(dd, mpi_comm_mysim, deviceContext, localStream, nonLocalStream, pulse))
 {
 }
 
index a8d2f9204c590894d489d266a221292bd3b6d056..ba22bc5262abcc5551dd66e1978e9b59c11a3d16 100644 (file)
@@ -71,11 +71,17 @@ public:
      *
      * \param [inout] dd                       domdec structure
      * \param [in]    mpi_comm_mysim           communicator used for simulation
+     * \param [in]    deviceContext            GPU device context
      * \param [in]    localStream              local NB CUDA stream
      * \param [in]    nonLocalStream           non-local NB CUDA stream
      * \param [in]    pulse                    the communication pulse for this instance
      */
-    Impl(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* localStream, void* nonLocalStream, int pulse);
+    Impl(gmx_domdec_t*        dd,
+         MPI_Comm             mpi_comm_mysim,
+         const DeviceContext& deviceContext,
+         void*                localStream,
+         void*                nonLocalStream,
+         int                  pulse);
     ~Impl();
 
     /*! \brief
@@ -176,8 +182,8 @@ private:
     GpuEventSynchronizer* haloDataTransferLaunched_ = nullptr;
     //! MPI communicator used for simulation
     MPI_Comm mpi_comm_mysim_;
-    //! Dummy GPU context object
-    const DeviceContext deviceContext_;
+    //! GPU context object
+    const DeviceContext& deviceContext_;
     //! CUDA stream for local non-bonded calculations
     cudaStream_t localStream_ = nullptr;
     //! CUDA stream for non-local non-bonded calculations
index 40a34682c00f173841f415722d165e038e44676c..1c3cb9b77460c2e2803914c93b1e0b6e7465e66d 100644 (file)
@@ -436,13 +436,6 @@ GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT
 GPU_FUNC_QUALIFIER void* pme_gpu_get_device_stream(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
         GPU_FUNC_TERM_WITH_RETURN(nullptr);
 
-/*! \brief Returns the pointer to the GPU context.
- *  \param[in] pme            The PME data structure.
- *  \returns                  Pointer to GPU context object.
- */
-GPU_FUNC_QUALIFIER const DeviceContext* pme_gpu_get_device_context(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
-        GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
 /*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
  * \param[in] pme            The PME data structure.
  * \returns                  Pointer to sychronizer
index b4cec47135c8af61d2f66a368ba635290b4647ac..4c4ed4851d8cea883a6b1531a6fdce4296276c45 100644 (file)
@@ -442,14 +442,6 @@ void* pme_gpu_get_device_stream(const gmx_pme_t* pme)
     return pme_gpu_get_stream(pme->gpu);
 }
 
-const DeviceContext* pme_gpu_get_device_context(const gmx_pme_t* pme)
-{
-    GMX_RELEASE_ASSERT(pme, "GPU context requested from PME before PME was constructed.");
-    GMX_RELEASE_ASSERT(pme_gpu_active(pme),
-                       "GPU context requested from PME, but PME is running on the CPU.");
-    return pme_gpu_get_context(pme->gpu);
-}
-
 GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* pme)
 {
     if (!pme || !pme_gpu_active(pme))
index bd3b25e1cdf1cfa2cf4e07c5f0fcf166fd5c5fc3..dd62e8c4cdfe4306527066936af4042a2edc4744 100644 (file)
@@ -1527,14 +1527,6 @@ void* pme_gpu_get_stream(const PmeGpu* pmeGpu)
     }
 }
 
-const DeviceContext* pme_gpu_get_context(const PmeGpu* pmeGpu)
-{
-    GMX_RELEASE_ASSERT(
-            pmeGpu,
-            "GPU context object was requested, but PME GPU object was not (yet) initialized.");
-    return &pmeGpu->archSpecific->deviceContext_;
-}
-
 GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(const PmeGpu* pmeGpu)
 {
     if (pmeGpu && pmeGpu->kernelParams)
index a9dc9677ce53a55405a43ce5564aa9c43d3fa96a..67a1bc3d1c2a53f267ea722f9fe752f87833176d 100644 (file)
@@ -408,13 +408,6 @@ GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_A
 GPU_FUNC_QUALIFIER void* pme_gpu_get_stream(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
         GPU_FUNC_TERM_WITH_RETURN(nullptr);
 
-/*! \brief Return pointer to GPU context (for OpenCL builds).
- * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  Pointer to context object.
- */
-GPU_FUNC_QUALIFIER const DeviceContext* pme_gpu_get_context(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
-        GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
 /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
  * \param[in] pmeGpu         The PME GPU structure.
  * \returns                  Pointer to sync object
index 6b34a41c4c467aea8436a5c6b73be590ae62eedd..23981a661b83bcf6c3a0eeafa189b38bfe7bd772 100644 (file)
 
 #include "pme_gpu_program_impl.h"
 
-PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo) :
-    impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo))
+PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+    impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo, deviceContext))
 {
 }
 
 PmeGpuProgram::~PmeGpuProgram() = default;
 
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* deviceInfo)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
 {
-    GMX_RELEASE_ASSERT(
-            deviceInfo != nullptr,
-            "Device information can not be nullptr when building PME GPU program object.");
-    return std::make_unique<PmeGpuProgram>(*deviceInfo);
+    return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
 }
index 32c33442eb5f79c4f1308ccfd009cebe57974e4e..d4dbdf449d9b17eba6edd7b5b5b0af8868a8efdd 100644 (file)
 
 #include <memory>
 
+class DeviceContext;
+
 struct PmeGpuProgramImpl;
 struct DeviceInformation;
 
 class PmeGpuProgram
 {
 public:
-    explicit PmeGpuProgram(const DeviceInformation& deviceInfo);
+    explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
     ~PmeGpuProgram();
 
     // TODO: design getters for information inside, if needed for PME, and make this private?
@@ -69,6 +71,7 @@ using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
 /*! \brief
  * Factory function used to build persistent PME GPU program for the device at once.
  */
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* /*deviceInfo*/);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& /*deviceInfo*/,
+                                        const DeviceContext& /* deviceContext */);
 
 #endif
index af57c03e9f4e421c0983f96f75efe0832522e9b0..ccaffa5acdc03ddfb0a568c013e1ed661aa051d1 100644 (file)
@@ -45,7 +45,9 @@
 
 #include "pme_gpu_program_impl.h"
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
+                                     const DeviceContext& deviceContext) :
+    deviceContext_(deviceContext),
     warpSize(0),
     spreadWorkGroupSize(0),
     gatherWorkGroupSize(0),
index d17e18f50c51ff5740caaae3843374459df9389c..53bf2f0d1eab0df4cd293cc82176ee700c3425b3 100644 (file)
@@ -98,8 +98,9 @@ extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, true
 extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, true, false>(const PmeGpuCudaKernelParams);
 extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, false>(const PmeGpuCudaKernelParams);
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo) :
-    deviceContext_(deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
+                                     const DeviceContext& deviceContext) :
+    deviceContext_(deviceContext)
 {
     // kernel parameters
     warpSize              = warp_size;
index 1de5014821c7f91722cea9589ff1d70edf59d568..cb1471abf17be0ec925c638221039466083ade2f 100644 (file)
@@ -48,6 +48,7 @@
 #include "gromacs/gpu_utils/gputraits.h"
 #include "gromacs/utility/classhelpers.h"
 
+class DeviceContext;
 struct DeviceInformation;
 
 /*! \internal
@@ -75,10 +76,8 @@ struct PmeGpuProgramImpl
     /*! \brief
      * This is a handle to the GPU context, which is just a dummy in CUDA,
      * but is created/destroyed by this class in OpenCL.
-     * TODO: Later we want to be able to own the context at a higher level and not here,
-     * but this class would still need the non-owning context handle to build the kernels.
      */
-    DeviceContext deviceContext_;
+    const DeviceContext& deviceContext_;
 
     //! Conveniently all the PME kernels use the same single argument type
 #if GMX_GPU == GMX_GPU_CUDA
@@ -147,7 +146,7 @@ struct PmeGpuProgramImpl
 
     PmeGpuProgramImpl() = delete;
     //! Constructor for the given device
-    explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo);
+    explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
     ~PmeGpuProgramImpl();
     GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
 
index 4071beebdbc10039c147ecd1896044a1401bc315..1fa443ee4e916ffd6a00a15b290a0af2aa5bb007 100644 (file)
@@ -53,8 +53,8 @@
 #include "pme_gpu_types_host.h"
 #include "pme_grid.h"
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo) :
-    deviceContext_(deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+    deviceContext_(deviceContext)
 {
     // kernel parameters
     warpSize = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
index 2ee17b32674959e1d78363a29ed7d87420a8d1e3..845b1a33ecf45b99b78d846f0107533647e4f485 100644 (file)
@@ -603,7 +603,8 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
                 gmx_wallcycle*            wcycle,
                 gmx_walltime_accounting_t walltime_accounting,
                 t_inputrec*               ir,
-                PmeRunMode                runMode)
+                PmeRunMode                runMode,
+                const DeviceContext*      deviceContext)
 {
     int     ret;
     int     natoms = 0;
@@ -628,8 +629,7 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
     const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
     if (useGpuForPme)
     {
-        const void*          commandStream = pme_gpu_get_device_stream(pme);
-        const DeviceContext& deviceContext = *pme_gpu_get_device_context(pme);
+        const void* commandStream = pme_gpu_get_device_stream(pme);
 
         changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy());
         changePinningPolicy(&pme_pp->x, pme_get_pinning_policy());
@@ -640,10 +640,13 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
             pme_pp->pmeForceSenderGpu = std::make_unique<gmx::PmeForceSenderGpu>(
                     commandStream, pme_pp->mpi_comm_mysim, pme_pp->ppRanks);
         }
+        GMX_RELEASE_ASSERT(
+                deviceContext != nullptr,
+                "Device context can not be nullptr when building GPU propagator data object.");
         // TODO: Special PME-only constructor is used here. There is no mechanism to prevent from using the other constructor here.
         //       This should be made safer.
         stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                commandStream, deviceContext, GpuApiCallBehavior::Async,
+                commandStream, *deviceContext, GpuApiCallBehavior::Async,
                 pme_gpu_get_padding_size(pme), wcycle);
     }
 
index 0ed37f1e2e95af00e7e06a75c1a01e375af23ce7..18edbb9b43ddf5bcdbe23127f58a2e79ece71027 100644 (file)
@@ -55,6 +55,7 @@ struct t_nrnb;
 struct gmx_pme_t;
 struct gmx_wallcycle;
 
+class DeviceContext;
 enum class PmeRunMode;
 
 /*! \brief Called on the nodes that do PME exclusively */
@@ -64,6 +65,7 @@ int gmx_pmeonly(gmx_pme_t*                pme,
                 gmx_wallcycle*            wcycle,
                 gmx_walltime_accounting_t walltime_accounting,
                 t_inputrec*               ir,
-                PmeRunMode                runMode);
+                PmeRunMode                runMode,
+                const DeviceContext*      deviceContext);
 
 #endif
index e9d8c4ff697c921ba82a704d3171ba9ae71a49c9..ea750cc17c1052fb95a639dd2512da2d30b905e8 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -45,6 +45,7 @@
 #include "gromacs/utility/classhelpers.h"
 #include "gromacs/utility/gmxmpi.h"
 
+class DeviceContext;
 class GpuEventSynchronizer;
 
 namespace gmx
@@ -61,8 +62,9 @@ public:
     /*! \brief Creates PME-PP GPU communication object
      * \param[in] comm            Communicator used for simulation
      * \param[in] pmeRank         Rank of PME task
+     * \param[in] deviceContext   GPU context.
      */
-    PmePpCommGpu(MPI_Comm comm, int pmeRank);
+    PmePpCommGpu(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext);
     ~PmePpCommGpu();
 
     /*! \brief Perform steps required when buffer size changes
index 0b59ff921201b919324f08af032b6306e9b740fd..b8befc5311033680740c329c99d588b823226383 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -62,7 +62,8 @@ class PmePpCommGpu::Impl
 };
 
 /*!\brief Constructor stub. */
-PmePpCommGpu::PmePpCommGpu(MPI_Comm gmx_unused comm, int gmx_unused pmeRank) : impl_(nullptr)
+PmePpCommGpu::PmePpCommGpu(MPI_Comm /* comm */, int /* pmeRank */, const DeviceContext& /* deviceContext */) :
+    impl_(nullptr)
 {
     GMX_ASSERT(false,
                "A CPU stub for PME-PP GPU communication was called instead of the correct "
@@ -72,26 +73,26 @@ PmePpCommGpu::PmePpCommGpu(MPI_Comm gmx_unused comm, int gmx_unused pmeRank) : i
 PmePpCommGpu::~PmePpCommGpu() = default;
 
 /*!\brief init PME-PP GPU communication stub */
-void PmePpCommGpu::reinit(int gmx_unused size)
+void PmePpCommGpu::reinit(int /* size */)
 {
     GMX_ASSERT(false,
                "A CPU stub for PME-PP GPU communication initialization was called instead of the "
                "correct implementation.");
 }
 
-void PmePpCommGpu::receiveForceFromPmeCudaDirect(void gmx_unused* recvPtr,
-                                                 int gmx_unused recvSize,
-                                                 bool gmx_unused receivePmeForceToGpu)
+void PmePpCommGpu::receiveForceFromPmeCudaDirect(void* /* recvPtr */,
+                                                 int /* recvSize */,
+                                                 bool /* receivePmeForceToGpu */)
 {
     GMX_ASSERT(false,
                "A CPU stub for PME-PP GPU communication was called instead of the correct "
                "implementation.");
 }
 
-void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void gmx_unused* sendPtr,
-                                                  int gmx_unused sendSize,
-                                                  bool gmx_unused sendPmeCoordinatesFromGpu,
-                                                  GpuEventSynchronizer gmx_unused* coordinatesOnDeviceEvent)
+void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void* /* sendPtr */,
+                                                  int /* sendSize */,
+                                                  bool /* sendPmeCoordinatesFromGpu */,
+                                                  GpuEventSynchronizer* /* coordinatesOnDeviceEvent */)
 {
     GMX_ASSERT(false,
                "A CPU stub for PME-PP GPU communication was called instead of the correct "
index 29cb73d0cabeadfe92db0e5d8a1a3e7f58d5dbc8..e6e5bacd16d4f61059c4b1bbbe957cb596eb7696 100644 (file)
@@ -48,6 +48,7 @@
 #include "config.h"
 
 #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/devicebuffer.h"
 #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
 #include "gromacs/utility/gmxmpi.h"
 namespace gmx
 {
 
-PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank) : comm_(comm), pmeRank_(pmeRank)
+PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext) :
+    comm_(comm),
+    pmeRank_(pmeRank),
+    deviceContext_(deviceContext)
 {
     GMX_RELEASE_ASSERT(
             GMX_THREAD_MPI,
@@ -152,7 +156,10 @@ void* PmePpCommGpu::Impl::getForcesReadySynchronizer()
     return static_cast<void*>(&forcesReadySynchronizer_);
 }
 
-PmePpCommGpu::PmePpCommGpu(MPI_Comm comm, int pmeRank) : impl_(new Impl(comm, pmeRank)) {}
+PmePpCommGpu::PmePpCommGpu(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext) :
+    impl_(new Impl(comm, pmeRank, deviceContext))
+{
+}
 
 PmePpCommGpu::~PmePpCommGpu() = default;
 
index 5565bea3705d5284499aca13a51abad17709209b..c791ea5b4011ace1b20c5ace0a127fbaea0b92fb 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -59,8 +59,9 @@ public:
     /*! \brief Creates PME-PP GPU communication object.
      * \param[in] comm            Communicator used for simulation
      * \param[in] pmeRank         Rank of PME task
+     * \param[in] deviceContext   GPU context.
      */
-    Impl(MPI_Comm comm, int pmeRank);
+    Impl(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext);
     ~Impl();
 
     /*! \brief Perform steps required when buffer size changes
@@ -115,6 +116,8 @@ public:
     void* getForcesReadySynchronizer();
 
 private:
+    //! Device context object
+    const DeviceContext& deviceContext_;
     //! CUDA stream used for the communication operations in this class
     cudaStream_t pmePpCommStream_ = nullptr;
     //! Remote location of PME coordinate data buffer
index 08d2716e20ce8b0d2ac7a7d142e18d7f3aca6bb6..59efd56faccc460f8cf0a6bea446c63a1ff5da9a 100644 (file)
@@ -300,7 +300,9 @@ public:
             PmeSafePointer pmeSafe = pmeInitWrapper(&inputRec, codePath, context->getDeviceInfo(),
                                                     context->getPmeGpuProgram(), box);
             std::unique_ptr<StatePropagatorDataGpu> stateGpu =
-                    (codePath == CodePath::GPU) ? makeStatePropagatorDataGpu(*pmeSafe.get()) : nullptr;
+                    (codePath == CodePath::GPU)
+                            ? makeStatePropagatorDataGpu(*pmeSafe.get(), context->deviceContext())
+                            : nullptr;
 
             pmeInitAtoms(pmeSafe.get(), stateGpu.get(), codePath, inputAtomData.coordinates,
                          inputAtomData.charges);
index fba028fa8c1fcf377eedaa0dc5b4ce44fdd06172..8f2935b9f321b50b3f99ccbc83a084a2b244b600 100644 (file)
@@ -152,7 +152,9 @@ public:
                 PmeSafePointer pmeSafe = pmeInitWrapper(&inputRec, codePath, context->getDeviceInfo(),
                                                         context->getPmeGpuProgram(), box);
                 std::unique_ptr<StatePropagatorDataGpu> stateGpu =
-                        (codePath == CodePath::GPU) ? makeStatePropagatorDataGpu(*pmeSafe.get()) : nullptr;
+                        (codePath == CodePath::GPU)
+                                ? makeStatePropagatorDataGpu(*pmeSafe.get(), context->deviceContext())
+                                : nullptr;
 
                 pmeInitAtoms(pmeSafe.get(), stateGpu.get(), codePath, coordinates, charges);
 
index 81edf195feb98a8a58d4c5a12d5ab2df89163ac1..787f3e9f424951df85d7fe26d7d72f3ec217112d 100644 (file)
@@ -160,14 +160,15 @@ PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
 }
 
 //! Make a GPU state-propagator manager
-std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme)
+std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t&     pme,
+                                                                   const DeviceContext& deviceContext)
 {
     // TODO: Pin the host buffer and use async memory copies
     // TODO: Special constructor for PME-only rank / PME-tests is used here. There should be a mechanism to
     //       restrict one from using other constructor here.
-    return std::make_unique<StatePropagatorDataGpu>(
-            pme_gpu_get_device_stream(&pme), *pme_gpu_get_device_context(&pme),
-            GpuApiCallBehavior::Sync, pme_gpu_get_padding_size(&pme), nullptr);
+    return std::make_unique<StatePropagatorDataGpu>(pme_gpu_get_device_stream(&pme), deviceContext,
+                                                    GpuApiCallBehavior::Sync,
+                                                    pme_gpu_get_padding_size(&pme), nullptr);
 }
 
 //! PME initialization with atom data
index ed919e80f852aad9d9de407567d04feaa542ada2..c67f78bacf265251dc9a772bfff2ec8eaa98ea60 100644 (file)
@@ -135,7 +135,8 @@ PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
                             real             ewaldCoeff_q  = 0.0F,
                             real             ewaldCoeff_lj = 0.0F);
 //! Make a GPU state-propagator manager
-std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme);
+std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t&     pme,
+                                                                   const DeviceContext& deviceContext);
 //! PME initialization with atom data and system box
 void pmeInitAtoms(gmx_pme_t*               pme,
                   StatePropagatorDataGpu*  stateGpu,
index 9747d0376e968d29349dae7b4d6333740d3e3c6a..661f0fa4bb8c448d10432cdbf4dc17c204c8dc7d 100644 (file)
@@ -108,7 +108,7 @@ static gmx_hw_info_t* hardwareInit()
 
 void PmeTestEnvironment::SetUp()
 {
-    hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(CodePath::CPU, "(CPU) ", nullptr));
+    hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(CodePath::CPU, "(CPU) "));
 
     hardwareInfo_ = hardwareInit();
     if (!pme_gpu_supports_build(nullptr) || !pme_gpu_supports_hardware(*hardwareInfo_, nullptr))
@@ -120,13 +120,15 @@ void PmeTestEnvironment::SetUp()
     for (int gpuIndex : getCompatibleGpus(hardwareInfo_->gpu_info))
     {
         const DeviceInformation* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
+        GMX_RELEASE_ASSERT(deviceInfo != nullptr,
+                           "Device information should be provided for the GPU builds.");
         init_gpu(deviceInfo);
 
         char stmp[200] = {};
         get_gpu_device_info_string(stmp, hardwareInfo_->gpu_info, gpuIndex);
         std::string description = "(GPU " + std::string(stmp) + ") ";
         hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(
-                CodePath::GPU, description.c_str(), deviceInfo));
+                CodePath::GPU, description.c_str(), *deviceInfo));
     }
 }
 
index 9846cbbb0713ab93184a3eaf4b105fd14eb2e270..03df38671cbeca40c09a0b0fedfb4b42588b7e33 100644 (file)
@@ -49,6 +49,7 @@
 #include <gtest/gtest.h>
 
 #include "gromacs/ewald/pme_gpu_program.h"
+#include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/utility/gmxassert.h"
 
@@ -80,6 +81,8 @@ struct TestHardwareContext
     std::string description_;
     //! Device information pointer
     const DeviceInformation* deviceInfo_;
+    //! Local copy of the device context pointer
+    DeviceContext deviceContext_;
     //! Persistent compiled GPU kernels for PME.
     PmeGpuProgramStorage program_;
 
@@ -88,20 +91,32 @@ public:
     CodePath getCodePath() const { return codePath_; }
     //! Returns a human-readable context description line
     std::string getDescription() const { return description_; }
+    //! Getter for the DeviceContext
+    const DeviceContext& deviceContext() const { return deviceContext_; }
     //! Returns the device info pointer
     const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
     //! Returns the persistent PME GPU kernels
     const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
-    //! Constructs the context
-    TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation* deviceInfo) :
+    //! Constructs the context for CPU builds
+    TestHardwareContext(CodePath codePath, const char* description) :
+        codePath_(codePath),
+        description_(description)
+    {
+        GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
+                           "A GPU code path should provide DeviceInformation to the "
+                           "TestHerdwareContext constructor.");
+    }
+    //! Constructs the context for GPU builds
+    TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
         codePath_(codePath),
         description_(description),
-        deviceInfo_(deviceInfo)
+        deviceInfo_(&deviceInfo),
+        deviceContext_(deviceInfo),
+        program_(buildPmeGpuProgram(deviceInfo, deviceContext_))
     {
-        if (codePath == CodePath::GPU)
-        {
-            program_ = buildPmeGpuProgram(deviceInfo_);
-        }
+        GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
+                           "TestHerdwareContext tries to construct DeviceContext and PmeGpuProgram "
+                           "in CPU build.");
     }
     ~TestHardwareContext();
 };
index d192b5543fd90967595f6597e46ad2c4b52f9171..84fc076708966df6df766102f3782f3c59fae0b7 100644 (file)
@@ -61,18 +61,10 @@ struct DeviceInformation;
 class DeviceContext
 {
 public:
-    //! Default constructor. In OpenCL leaves context \c nullptr.
+    //! Default constructor.
     DeviceContext() {}
-    /*! \brief Second stage of construction. Creates the \c cl_context in OpenCL, does nothing in CUDA.
-     *
-     * \param[in] deviceInfo Platform-specific device information.
-     */
-    void init(const DeviceInformation& /*deviceInfo*/) {}
-    /*! \brief Construct the object and call \c init(...) .
-     *
-     * \param[in] deviceInfo Platform-specific device information.
-     */
-    DeviceContext(const DeviceInformation& deviceInfo) { init(deviceInfo); }
+    //! Constructor.
+    DeviceContext(const DeviceInformation& /* deviceInfo */) {}
     //! Destructor
     ~DeviceContext() = default;
 
index 6f86b1744488ca623d64a6cb9f712e13a87269e2..1cd66239030b69b361d190a4dace27eafb9f5e38 100644 (file)
 #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
 /**@}*/
 
-DeviceContext::DeviceContext()
-{
-    context_ = nullptr;
-}
-
-void DeviceContext::init(const DeviceInformation& deviceInfo)
+DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
 {
     cl_platform_id                     platformId = deviceInfo.oclPlatformId;
     cl_device_id                       deviceId   = deviceInfo.oclDeviceId;
@@ -92,11 +87,6 @@ void DeviceContext::init(const DeviceInformation& deviceInfo)
     }
 }
 
-DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
-{
-    init(deviceInfo);
-}
-
 DeviceContext::~DeviceContext()
 {
     cl_int clError;
index 70e3927cf80f0ba729170fcf278acb725167c5bc..a9b84b2f8e2c1867c89eed5a619da9e1ee8829c7 100644 (file)
@@ -57,16 +57,9 @@ struct DeviceInformation;
 class DeviceContext
 {
 public:
-    //! Default constructor. Sets \c context_ to \c nullptr.
-    DeviceContext();
-    /*! \brief Second stage of construction. Creates the \c cl_context.
-     *
-     * \param[in] deviceInfo Platform-specific device information.
-     *
-     * \throws InternalError if context creation failed.
-     */
-    void init(const DeviceInformation& deviceInfo);
-    /*! \brief Construct the object and call \c init(...) .
+    //! Default constructor.
+    DeviceContext() {}
+    /*! \brief Constructor that creates the \c cl_context
      *
      * \param[in] deviceInfo Platform-specific device information.
      *
index 230b3ff94e1b0b43f56faf92a5c2cde897272671..90f5b04bfe88888664627eb33f7b30ed35421aa2 100644 (file)
@@ -64,8 +64,11 @@ enum class GpuApiCallBehavior;
  */
 struct gmx_device_runtime_data_t
 {
+    //! Constructor
+    gmx_device_runtime_data_t(const DeviceContext& deviceContext) : deviceContext_(deviceContext) {}
+
     //! OpenCL context
-    DeviceContext deviceContext;
+    const DeviceContext& deviceContext_;
     //! OpenCL program
     cl_program program;
 };
index 1a231d2c2cc83289379bed789dd9a0247e1cddfa..b1c69d45721fd00ec7aa8b67e45578e93a479fb9 100644 (file)
@@ -55,6 +55,7 @@
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/classhelpers.h"
 
+class DeviceContext;
 struct gmx_enerdata_t;
 struct gmx_ffparams_t;
 struct gmx_mtop_t;
@@ -106,7 +107,10 @@ class GpuBonded
 {
 public:
     //! Construct the manager with constant data and the stream to use.
-    GpuBonded(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle);
+    GpuBonded(const gmx_ffparams_t& ffparams,
+              const DeviceContext&  deviceContext,
+              void*                 streamPtr,
+              gmx_wallcycle*        wcycle);
     //! Destructor
     ~GpuBonded();
 
index 94a2b5d42b26c6a7428f6e9353d5e702e75a5584..f24103229be93815e61855a81288dfa7c8572cd4 100644 (file)
@@ -160,7 +160,10 @@ class GpuBonded::Impl
 {
 };
 
-GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */, void* /*streamPtr */, gmx_wallcycle* /* wcycle */) :
+GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */,
+                     const DeviceContext& /* deviceContext */,
+                     void* /*streamPtr */,
+                     gmx_wallcycle* /* wcycle */) :
     impl_(nullptr)
 {
 }
index ff7092f40cd1719b0a4ffd4cb8f49bd875a417a9..763550c5c9e52d891cf574a92edb09c1bfc92b59 100644 (file)
@@ -50,6 +50,7 @@
 
 #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
 #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/devicebuffer.h"
 #include "gromacs/gpu_utils/typecasts.cuh"
 #include "gromacs/mdtypes/enerdata.h"
@@ -63,7 +64,11 @@ namespace gmx
 
 // ---- GpuBonded::Impl
 
-GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle)
+GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams,
+                      const DeviceContext&  deviceContext,
+                      void*                 streamPtr,
+                      gmx_wallcycle*        wcycle) :
+    deviceContext_(deviceContext)
 {
     stream_ = *static_cast<CommandStream*>(streamPtr);
     wcycle_ = wcycle;
@@ -306,8 +311,11 @@ void GpuBonded::Impl::clearEnergies()
 
 // ---- GpuBonded
 
-GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle) :
-    impl_(new Impl(ffparams, streamPtr, wcycle))
+GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams,
+                     const DeviceContext&  deviceContext,
+                     void*                 streamPtr,
+                     gmx_wallcycle*        wcycle) :
+    impl_(new Impl(ffparams, deviceContext, streamPtr, wcycle))
 {
 }
 
index 0532b40315a0b893c6ec4457d5a4714690f7cac6..a0da918893fa0340fc2b78bc81967c80e528b20f 100644 (file)
@@ -126,7 +126,7 @@ class GpuBonded::Impl
 {
 public:
     //! Constructor
-    Impl(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle);
+    Impl(const gmx_ffparams_t& ffparams, const DeviceContext& deviceContext, void* streamPtr, gmx_wallcycle* wcycle);
     /*! \brief Destructor, non-default needed for freeing
      * device-side buffers */
     ~Impl();
@@ -180,8 +180,8 @@ private:
     //! \brief Device-side total virial
     float* d_vTot_ = nullptr;
 
-    //! Dummy GPU context object
-    const DeviceContext deviceContext_;
+    //! GPU context object
+    const DeviceContext& deviceContext_;
     //! \brief Bonded GPU stream, not owned by this module
     CommandStream stream_;
 
index c7f38cb5f5b800a11bce29de1ef13dfe5c5b58bc..fea69c594f03732d140012eff569eb3393ffb5b2 100644 (file)
@@ -42,7 +42,6 @@
 #include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/arrayref.h"
 
-struct DeviceInformation;
 struct gmx_hw_info_t;
 struct t_commrec;
 struct t_fcdata;
index 98703c05b94121b8dcaaff96910d6e8b0451a909..26a6fc7399a4c373dcacbcad524f2198d8e91150 100644 (file)
@@ -112,7 +112,7 @@ public:
     class Impl;
 
 private:
-    //! Dummy GPU context object
+    //! GPU context object
     const DeviceContext& deviceContext_;
     //! GPU stream
     CommandStream commandStream_;
index 77423dc3231fee737151501664f2ef9f40d7dfe9..4817573b8098806b24e8a1216ef441aaa8f01c81 100644 (file)
@@ -169,7 +169,7 @@ public:
     static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
 
 private:
-    //! Dummy GPU context object
+    //! GPU context object
     const DeviceContext& deviceContext_;
     //! GPU stream
     CommandStream commandStream_;
index f07af017e33fdaba6b53382465b6da76d340a7a3..da8bafd8dfbf45ae0dcbcb13874525fa483da6a4 100644 (file)
@@ -252,7 +252,7 @@ public:
     void set(const InteractionDefinitions& idef, const t_mdatoms& md);
 
 private:
-    //! Dummy GPU context object
+    //! GPU context object
     const DeviceContext& deviceContext_;
     //! GPU stream
     CommandStream commandStream_;
index 09f0bbecc125216b361ffda8e210be14950967ff..61f8537efa24bc052851f016b86f8583bfb0a087 100644 (file)
@@ -49,6 +49,7 @@
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/classhelpers.h"
 
+class DeviceContext;
 class GpuEventSynchronizer;
 
 struct gmx_mtop_t;
@@ -77,11 +78,13 @@ public:
      *                              projection from it.
      * \param[in] mtop              Topology of the system: SETTLE gets the masses for O and H atoms
      *                              and target O-H and H-H distances from this object.
+     * \param[in] deviceContext     GPU device context.
      * \param[in] commandStream     GPU stream to use. Can be nullptr.
      * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done on the GPU.
      */
     UpdateConstrainGpu(const t_inputrec&     ir,
                        const gmx_mtop_t&     mtop,
+                       const DeviceContext&  deviceContext,
                        const void*           commandStream,
                        GpuEventSynchronizer* xUpdatedOnDevice);
 
index 3e10f8a403aba73d38fcf16f61aca848ecc43080..45a0743384e36e00108f9b4ef4fe5fe7106789df 100644 (file)
@@ -57,6 +57,7 @@ class UpdateConstrainGpu::Impl
 
 UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir   */,
                                        const gmx_mtop_t& /* mtop */,
+                                       const DeviceContext& /* deviceContext */,
                                        const void* /* commandStream */,
                                        GpuEventSynchronizer* /* xUpdatedOnDevice */) :
     impl_(nullptr)
index a8e5a94cc6d87bbf649d84743379744cf47ea860..41f75723324aca24e314dade2f70eae39756a43c 100644 (file)
@@ -57,6 +57,7 @@
 #include <algorithm>
 
 #include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/devicebuffer.h"
 #include "gromacs/gpu_utils/gputraits.cuh"
 #include "gromacs/gpu_utils/vectype_ops.cuh"
@@ -166,8 +167,10 @@ void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
 
 UpdateConstrainGpu::Impl::Impl(const t_inputrec&     ir,
                                const gmx_mtop_t&     mtop,
+                               const DeviceContext&  deviceContext,
                                const void*           commandStream,
                                GpuEventSynchronizer* xUpdatedOnDevice) :
+    deviceContext_(deviceContext),
     coordinatesReady_(xUpdatedOnDevice)
 {
     GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
@@ -231,9 +234,10 @@ GpuEventSynchronizer* UpdateConstrainGpu::Impl::getCoordinatesReadySync()
 
 UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec&     ir,
                                        const gmx_mtop_t&     mtop,
+                                       const DeviceContext&  deviceContext,
                                        const void*           commandStream,
                                        GpuEventSynchronizer* xUpdatedOnDevice) :
-    impl_(new Impl(ir, mtop, commandStream, xUpdatedOnDevice))
+    impl_(new Impl(ir, mtop, deviceContext, commandStream, xUpdatedOnDevice))
 {
 }
 
index 75b6814de0066627d4d1ed9e1d7d24ce95f2f9fc..dd46010e93015920529d7100fce04579ddcca594 100644 (file)
@@ -75,10 +75,15 @@ public:
      *                              projection from it.
      * \param[in] mtop              Topology of the system: SETTLE gets the masses for O and H atoms
      *                              and target O-H and H-H distances from this object.
+     * \param[in] deviceContext     GPU device context.
      * \param[in] commandStream     GPU stream to use. Can be nullptr.
      * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done on the GPU.
      */
-    Impl(const t_inputrec& ir, const gmx_mtop_t& mtop, const void* commandStream, GpuEventSynchronizer* xUpdatedOnDevice);
+    Impl(const t_inputrec&     ir,
+         const gmx_mtop_t&     mtop,
+         const DeviceContext&  deviceContext,
+         const void*           commandStream,
+         GpuEventSynchronizer* xUpdatedOnDevice);
 
     ~Impl();
 
@@ -163,8 +168,8 @@ public:
     static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
 
 private:
-    //! Dummy GPU context object
-    const DeviceContext deviceContext_;
+    //! GPU context object
+    const DeviceContext& deviceContext_;
     //! GPU stream
     CommandStream commandStream_ = nullptr;
     //! GPU kernel launch config
index 2cb1388dcabe0b8a1175e9dff2b611f5e155a720..941a7030c9eca738ea4dee3201bbefd4c0628db7 100644 (file)
@@ -400,8 +400,13 @@ void gmx::LegacySimulator::do_md()
         {
             GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
         }
-        integrator = std::make_unique<UpdateConstrainGpu>(
-                *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
+
+        GMX_RELEASE_ASSERT(fr->deviceContext != nullptr,
+                           "GPU device context should be initialized to use GPU update.");
+
+        integrator = std::make_unique<UpdateConstrainGpu>(*ir, *top_global, *fr->deviceContext,
+                                                          stateGpu->getUpdateStream(),
+                                                          stateGpu->xUpdatedOnDevice());
 
         integrator->setPbc(PbcType::Xyz, state->box);
     }
@@ -866,7 +871,10 @@ void gmx::LegacySimulator::do_md()
                             Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
                     void* streamNonLocal = Nbnxm::gpu_get_command_stream(
                             fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
-                    constructGpuHaloExchange(mdlog, *cr, streamLocal, streamNonLocal);
+                    GMX_RELEASE_ASSERT(
+                            fr->deviceContext != nullptr,
+                            "GPU device context should be initialized to use GPU halo exchange.");
+                    constructGpuHaloExchange(mdlog, *cr, *fr->deviceContext, streamLocal, streamNonLocal);
                 }
             }
         }
index b233b0737c9540c019f12b869fa4d906a2333ca4..081501bfff528aef8447c9dc2ec0799915b200e1 100644 (file)
@@ -73,6 +73,7 @@
 #include "gromacs/fileio/tpxio.h"
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/hardware/cpuinfo.h"
 #include "gromacs/hardware/detecthardware.h"
@@ -1140,9 +1141,20 @@ int Mdrunner::mdrunner()
             EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
 
     // Get the device handles for the modules, nullptr when no task is assigned.
+    // TODO: There should be only one DeviceInformation.
     DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
     DeviceInformation* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
 
+    std::unique_ptr<DeviceContext> deviceContext = nullptr;
+    if (pmeDeviceInfo)
+    {
+        deviceContext = std::make_unique<DeviceContext>(*pmeDeviceInfo);
+    }
+    else if (nonbondedDeviceInfo)
+    {
+        deviceContext = std::make_unique<DeviceContext>(*nonbondedDeviceInfo);
+    }
+
     // TODO Initialize GPU streams here.
 
     // TODO Currently this is always built, yet DD partition code
@@ -1338,13 +1350,19 @@ int Mdrunner::mdrunner()
                       opt2fn("-tablep", filenames.size(), filenames.data()),
                       opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
 
+        fr->deviceContext = deviceContext.get();
+
         if (devFlags.enableGpuPmePPComm && !thisRankHasDuty(cr, DUTY_PME))
         {
-            fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(cr->mpi_comm_mysim, cr->dd->pme_nodeid);
+            GMX_RELEASE_ASSERT(
+                    deviceContext != nullptr,
+                    "Device context can not be nullptr when PME-PP direct communications object.");
+            fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
+                    cr->mpi_comm_mysim, cr->dd->pme_nodeid, *deviceContext);
         }
 
         fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec, fr, cr, *hwinfo, nonbondedDeviceInfo,
-                                        &mtop, box, wcycle);
+                                        fr->deviceContext, &mtop, box, wcycle);
         if (useGpuForBonded)
         {
             auto stream = havePPDomainDecomposition(cr)
@@ -1352,7 +1370,10 @@ int Mdrunner::mdrunner()
                                             fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal)
                                   : Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv,
                                                                   gmx::InteractionLocality::Local);
-            gpuBonded     = std::make_unique<GpuBonded>(mtop.ffparams, stream, wcycle);
+            GMX_RELEASE_ASSERT(
+                    fr->deviceContext != nullptr,
+                    "Device context can not be nullptr when computing bonded interactions on GPU.");
+            gpuBonded = std::make_unique<GpuBonded>(mtop.ffparams, *fr->deviceContext, stream, wcycle);
             fr->gpuBonded = gpuBonded.get();
         }
 
@@ -1428,7 +1449,13 @@ int Mdrunner::mdrunner()
     PmeGpuProgramStorage pmeGpuProgram;
     if (thisRankHasPmeGpuTask)
     {
-        pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
+        GMX_RELEASE_ASSERT(
+                pmeDeviceInfo != nullptr,
+                "Device information can not be nullptr when building PME GPU program object.");
+        GMX_RELEASE_ASSERT(
+                deviceContext != nullptr,
+                "Device context can not be nullptr when building PME GPU program object.");
+        pmeGpuProgram = buildPmeGpuProgram(*pmeDeviceInfo, *deviceContext);
     }
 
     /* Initiate PME if necessary,
@@ -1566,14 +1593,16 @@ int Mdrunner::mdrunner()
                     fr->nbv->gpu_nbv != nullptr
                             ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal)
                             : nullptr;
-            const DeviceContext& deviceContext = *pme_gpu_get_device_context(fr->pmedata);
-            const int            paddingSize   = pme_gpu_get_padding_size(fr->pmedata);
+            const int          paddingSize = pme_gpu_get_padding_size(fr->pmedata);
             GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
                                                       ? GpuApiCallBehavior::Async
                                                       : GpuApiCallBehavior::Sync;
-
+            GMX_RELEASE_ASSERT(
+                    deviceContext != nullptr,
+                    "Device context can not be nullptr when building GPU propagator data object.");
             stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                    pmeStream, localStream, nonLocalStream, deviceContext, transferKind, paddingSize, wcycle);
+                    pmeStream, localStream, nonLocalStream, *deviceContext, transferKind,
+                    paddingSize, wcycle);
             fr->stateGpu = stateGpu.get();
         }
 
@@ -1608,7 +1637,8 @@ int Mdrunner::mdrunner()
         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
         /* do PME only */
         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
+        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode,
+                    deviceContext.get());
     }
 
     wallcycle_stop(wcycle, ewcRUN);
@@ -1670,6 +1700,7 @@ int Mdrunner::mdrunner()
 
     free_gpu(nonbondedDeviceInfo);
     free_gpu(pmeDeviceInfo);
+    deviceContext.reset(nullptr);
     sfree(fcd);
 
     if (doMembed)
index 8c4f5d2f01ffcf25ab0a6f5a99f0606942cdda54..d53b5e571cf6a9c5d2e8e9299d33daf91471c940 100644 (file)
@@ -52,6 +52,7 @@
 struct gmx_pme_t;
 struct nonbonded_verlet_t;
 struct bonded_threading_t;
+class DeviceContext;
 class DispersionCorrection;
 struct t_forcetable;
 struct t_QMMMrec;
@@ -289,6 +290,9 @@ struct t_forcerec
     //       general StatePropagatorData object that is passed around
     gmx::StatePropagatorDataGpu* stateGpu = nullptr;
 
+    //! GPU device context
+    DeviceContext* deviceContext = nullptr;
+
     /* For PME-PP GPU communication */
     std::unique_ptr<gmx::PmePpCommGpu> pmePpCommGpu;
 };
index 7467f95b69596f4f0cb63e19195ad510fb43a6a8..666aefc62910b21babf656313f8bd17af9c25385 100644 (file)
@@ -413,7 +413,8 @@ static void cuda_init_const(NbnxmGpu*                       nb,
     nbnxn_cuda_clear_e_fshift(nb);
 }
 
-NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
+                   const DeviceContext& /* deviceContext */,
                    const interaction_const_t* ic,
                    const PairlistParams&      listParams,
                    const nbnxn_atomdata_t*    nbat,
index 9eac3f7c78977948d58a545e589c5f413dbd5b7c..822852786c80e0417c320b84b6b76d41aab46a0c 100644 (file)
@@ -50,6 +50,8 @@
 #include "gromacs/gpu_utils/gpu_macros.h"
 #include "gromacs/mdtypes/locality.h"
 
+class DeviceContext;
+
 struct NbnxmGpu;
 struct gmx_gpu_info_t;
 struct DeviceInformation;
@@ -65,6 +67,7 @@ namespace Nbnxm
 /** Initializes the data structures related to GPU nonbonded calculations. */
 GPU_FUNC_QUALIFIER
 NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
+                   const DeviceContext gmx_unused& deviceContext,
                    const interaction_const_t gmx_unused* ic,
                    const PairlistParams gmx_unused& listParams,
                    const nbnxn_atomdata_t gmx_unused* nbat,
index 2fa353a8486a5aaa8f8081dd9969f373eb7e2abf..a15f646ed98eb38925e8c4671397701965d63700 100644 (file)
 #include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/real.h"
 
+class DeviceContext;
 struct DeviceInformation;
 struct gmx_domdec_zones_t;
 struct gmx_enerdata_t;
@@ -409,6 +410,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                                                    const t_commrec*         cr,
                                                    const gmx_hw_info_t&     hardwareInfo,
                                                    const DeviceInformation* deviceInfo,
+                                                   const DeviceContext*     deviceContext,
                                                    const gmx_mtop_t*        mtop,
                                                    matrix                   box,
                                                    gmx_wallcycle*           wcycle);
index 58fee75e6a831e5125475b573f19900e0f701f78..f7c7f6dd16ac75b964263a5e3f1a60038627e787 100644 (file)
@@ -364,6 +364,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                                                    const t_commrec*         cr,
                                                    const gmx_hw_info_t&     hardwareInfo,
                                                    const DeviceInformation* deviceInfo,
+                                                   const DeviceContext*     deviceContext,
                                                    const gmx_mtop_t*        mtop,
                                                    matrix                   box,
                                                    gmx_wallcycle*           wcycle)
@@ -445,9 +446,13 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
     int       minimumIlistCountForGpuBalancing = 0;
     if (useGpu)
     {
+        GMX_RELEASE_ASSERT(
+                deviceContext != nullptr,
+                "Device context can not be nullptr when to use GPU for non-bonded forces.");
         /* init the NxN GPU data; the last argument tells whether we'll have
          * both local and non-local NB calculation on GPU */
-        gpu_nbv = gpu_init(deviceInfo, fr->ic, pairlistParams, nbat.get(), cr->nodeid, haveMultipleDomains);
+        gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
+                           cr->nodeid, haveMultipleDomains);
 
         minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
     }
index 5b5911941d86f1d7bd8467b5f823a81f9f35e401..eb1234d5122a0ad1abc040d003a7bc9a58c93249 100644 (file)
@@ -138,7 +138,7 @@ static void init_ewald_coulomb_force_table(const EwaldCorrectionTables&     tabl
        CL_MEM_COPY_HOST_PTR, &array_format, tabsize, 1, 0, ftmp, &cl_error);
      */
 
-    coul_tab = clCreateBuffer(runData->deviceContext.context(),
+    coul_tab = clCreateBuffer(runData->deviceContext_.context(),
                               CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                               tables.tableF.size() * sizeof(cl_float),
                               const_cast<real*>(tables.tableF.data()), &cl_error);
@@ -160,23 +160,23 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, gmx_device_runtim
     ad->ntypes = ntypes;
 
     ad->shift_vec =
-            clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
+            clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                            SHIFTS * sizeof(nbnxn_atomdata_t::shift_vec[0]), nullptr, &cl_error);
     GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                        ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
     ad->bShiftVecUploaded = CL_FALSE;
 
-    ad->fshift = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+    ad->fshift = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                 SHIFTS * sizeof(nb_staging_t::fshift[0]), nullptr, &cl_error);
     GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                        ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
 
-    ad->e_lj = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+    ad->e_lj = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                               sizeof(float), nullptr, &cl_error);
     GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                        ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
 
-    ad->e_el = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+    ad->e_el = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                               sizeof(float), nullptr, &cl_error);
     GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                        ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
@@ -336,7 +336,7 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
            CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);
          */
 
-        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY,
+        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY,
                                                   sizeof(cl_float), nullptr, &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                            ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
@@ -354,12 +354,12 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
            array_format.image_channel_data_type = CL_FLOAT;
            array_format.image_channel_order     = CL_R;
 
-           nbp->nbfp_climg2d = clCreateImage2D(runData->deviceContext.context(), CL_MEM_READ_ONLY |
+           nbp->nbfp_climg2d = clCreateImage2D(runData->deviceContext_.context(), CL_MEM_READ_ONLY |
            CL_MEM_COPY_HOST_PTR, &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
          */
 
         nbp->nbfp_climg2d = clCreateBuffer(
-                runData->deviceContext.context(),
+                runData->deviceContext_.context(),
                 CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                 nnbfp * sizeof(cl_float), const_cast<float*>(nbatParams.nbfp.data()), &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -372,7 +372,7 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
             /*  nbp->nbfp_comb_climg2d = clCreateImage2D(runData->deviceContext.context(), CL_MEM_READ_WRITE |
                CL_MEM_COPY_HOST_PTR, &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
             nbp->nbfp_comb_climg2d =
-                    clCreateBuffer(runData->deviceContext.context(),
+                    clCreateBuffer(runData->deviceContext_.context(),
                                    CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                                    nnbfp_comb * sizeof(cl_float),
                                    const_cast<float*>(nbatParams.nbfp_comb.data()), &cl_error);
@@ -388,7 +388,7 @@ static void init_nbparam(cl_nbparam_t*                    nbp,
             // TODO: decide which alternative is most efficient - textures or buffers.
             /* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->deviceContext.context(),
                CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);*/
-            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY,
+            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY,
                                                     sizeof(cl_float), nullptr, &cl_error);
             GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                                ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
@@ -556,6 +556,7 @@ static void nbnxn_ocl_init_const(NbnxmGpu*                       nb,
 
 //! This function is documented in the header file
 NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
+                   const DeviceContext&       deviceContext,
                    const interaction_const_t* ic,
                    const PairlistParams&      listParams,
                    const nbnxn_atomdata_t*    nbat,
@@ -583,7 +584,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
 
     /* set device info, just point it to the right GPU among the detected ones */
     nb->deviceInfo  = deviceInfo;
-    nb->dev_rundata = new gmx_device_runtime_data_t();
+    nb->dev_rundata = new gmx_device_runtime_data_t(deviceContext);
 
     /* init nbst */
     pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
@@ -605,11 +606,9 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
         queue_properties = 0;
     }
 
-    nb->dev_rundata->deviceContext.init(*deviceInfo);
-
     /* local/non-local GPU streams */
     nb->stream[InteractionLocality::Local] =
-            clCreateCommandQueue(nb->dev_rundata->deviceContext.context(),
+            clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
                                  nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
     if (CL_SUCCESS != cl_error)
     {
@@ -622,7 +621,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
         init_plist(nb->plist[InteractionLocality::NonLocal]);
 
         nb->stream[InteractionLocality::NonLocal] =
-                clCreateCommandQueue(nb->dev_rundata->deviceContext.context(),
+                clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
                                      nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
         if (CL_SUCCESS != cl_error)
         {
@@ -736,7 +735,7 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
     }
 
     // TODO most of this function is same in CUDA and OpenCL, move into the header
-    const DeviceContext& deviceContext = nb->dev_rundata->deviceContext;
+    const DeviceContext& deviceContext = nb->dev_rundata->deviceContext_;
 
     reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
                            deviceContext);
@@ -815,13 +814,13 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
             freeDeviceBuffer(&d_atdat->atom_types);
         }
 
-        d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+        d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                     CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                     nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                            ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
 
-        d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+        d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                      CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                      nalloc * sizeof(cl_float4), nullptr, &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -829,7 +828,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
 
         if (useLjCombRule(nb->nbparam->vdwtype))
         {
-            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                               CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                               nalloc * sizeof(cl_float2), nullptr, &cl_error);
             GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -837,7 +836,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
         }
         else
         {
-            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
                                                  CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                  nalloc * sizeof(int), nullptr, &cl_error);
             GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
index 0ba3345780533b3f7d5881fc81c814e995324b6e..9c1c759880d0cca050b09d69f28192cbc8eb6983 100644 (file)
@@ -200,10 +200,10 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
         {
             /* TODO when we have a proper MPI-aware logging module,
                the log output here should be written there */
-            program =
-                    gmx::ocl::compileProgram(stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl",
-                                             extraDefines, nb->dev_rundata->deviceContext.context(),
-                                             nb->deviceInfo->oclDeviceId, nb->deviceInfo->deviceVendor);
+            program = gmx::ocl::compileProgram(
+                    stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
+                    nb->dev_rundata->deviceContext_.context(), nb->deviceInfo->oclDeviceId,
+                    nb->deviceInfo->deviceVendor);
         }
         catch (gmx::GromacsException& e)
         {