From 87f193973e4263f8b81bb8d46015440d27cd6987 Mon Sep 17 00:00:00 2001
From: Artem Zhmurov <zhmurov@gmail.com>
Date: Wed, 11 Mar 2020 14:21:24 +0100
Subject: [PATCH] Store DeviceInformation inside the DeviceContext class

The device information with which the context was created is
now stored inside the DeviceContext object. This allows to pass
less arguments when information from DeviceInformation is needed
(e.g. for OpenCL optimization). The empty constructor for the
DeviceContext was also removed to make having invalid context less
probable.

Change-Id: Ie9600a89c21327246251c891807c37084f626f76
---
 src/gromacs/ewald/pme_gpu_internal.cpp        |  2 +-
 src/gromacs/ewald/pme_gpu_program.cpp         |  8 ++--
 src/gromacs/ewald/pme_gpu_program.h           | 11 ++++--
 src/gromacs/ewald/pme_gpu_program_impl.cpp    |  3 +-
 src/gromacs/ewald/pme_gpu_program_impl.cu     |  3 +-
 src/gromacs/ewald/pme_gpu_program_impl.h      |  2 +-
 .../ewald/pme_gpu_program_impl_ocl.cpp        |  3 +-
 src/gromacs/ewald/pme_pp_comm_gpu_impl.cu     |  2 +-
 .../ewald/tests/testhardwarecontexts.h        | 20 ++++++----
 src/gromacs/gpu_utils/device_context.h        | 11 ++++--
 src/gromacs/gpu_utils/device_context_ocl.cpp  |  2 +-
 src/gromacs/gpu_utils/device_context_ocl.h    | 10 +++--
 src/gromacs/gpu_utils/device_stream.cpp       |  3 +-
 src/gromacs/gpu_utils/device_stream.cu        |  3 +-
 src/gromacs/gpu_utils/device_stream.h         | 14 ++-----
 src/gromacs/gpu_utils/device_stream_ocl.cpp   |  6 +--
 src/gromacs/gpu_utils/gpu_utils.cu            |  2 +-
 src/gromacs/gpu_utils/oclutils.h              |  5 ---
 .../gpu_utils/tests/typecasts_runner.cu       |  2 +-
 src/gromacs/mdlib/tests/constrtestrunners.cu  |  2 +-
 .../mdlib/tests/leapfrogtestrunners.cu        |  2 +-
 src/gromacs/mdlib/tests/settletestrunners.cu  |  2 +-
 src/gromacs/mdrun/runner.cpp                  |  5 +--
 .../state_propagator_data_gpu_impl_gpu.cpp    |  2 +-
 src/gromacs/nbnxm/cuda/nbnxm_cuda.cu          | 26 +++++++------
 .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu        | 34 ++++++++--------
 src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h     |  7 +++-
 src/gromacs/nbnxm/gpu_data_mgmt.h             |  3 +-
 src/gromacs/nbnxm/nbnxm_setup.cpp             |  3 +-
 src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp        |  4 +-
 .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp      | 39 ++++++++-----------
 .../nbnxm/opencl/nbnxm_ocl_jit_support.cpp    |  6 +--
 src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h    |  7 +++-
 33 files changed, 125 insertions(+), 129 deletions(-)

diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp
index d6caa84f95..c7a6df563a 100644
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -505,7 +505,7 @@ void pme_gpu_init_internal(PmeGpu* pmeGpu)
      * - default high priority with CUDA
      * - no priorities implemented yet with OpenCL; see #2532
      */
-    pmeGpu->archSpecific->pmeStream_.init(*pmeGpu->deviceInfo, pmeGpu->archSpecific->deviceContext_,
+    pmeGpu->archSpecific->pmeStream_.init(pmeGpu->archSpecific->deviceContext_,
                                           DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
 }
 
diff --git a/src/gromacs/ewald/pme_gpu_program.cpp b/src/gromacs/ewald/pme_gpu_program.cpp
index 72711c91ae..efc754530a 100644
--- a/src/gromacs/ewald/pme_gpu_program.cpp
+++ b/src/gromacs/ewald/pme_gpu_program.cpp
@@ -53,8 +53,8 @@
 
 #include "pme_gpu_program_impl.h"
 
-PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
-    impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo, deviceContext))
+PmeGpuProgram::PmeGpuProgram(const DeviceContext& deviceContext) :
+    impl_(std::make_unique<PmeGpuProgramImpl>(deviceContext))
 {
 }
 
@@ -65,7 +65,7 @@ int PmeGpuProgram::warpSize() const
     return impl_->warpSize();
 }
 
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceContext& deviceContext)
 {
-    return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
+    return std::make_unique<PmeGpuProgram>(deviceContext);
 }
diff --git a/src/gromacs/ewald/pme_gpu_program.h b/src/gromacs/ewald/pme_gpu_program.h
index c4888d97c7..f73bd4d0dd 100644
--- a/src/gromacs/ewald/pme_gpu_program.h
+++ b/src/gromacs/ewald/pme_gpu_program.h
@@ -64,8 +64,12 @@ struct DeviceInformation;
 class PmeGpuProgram
 {
 public:
-    //! Constructor
-    explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
+    /*! \brief Construct a PME GPU program.
+     *
+     * \param[in] deviceContext  GPU context.
+     */
+    explicit PmeGpuProgram(const DeviceContext& deviceContext);
+    //! Destructor
     ~PmeGpuProgram();
 
     //! Return the warp size for which the kernels were compiled
@@ -83,7 +87,6 @@ using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
 /*! \brief
  * Factory function used to build persistent PME GPU program for the device at once.
  */
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& /*deviceInfo*/,
-                                        const DeviceContext& /* deviceContext */);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceContext& /* deviceContext */);
 
 #endif
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cpp b/src/gromacs/ewald/pme_gpu_program_impl.cpp
index 9056881227..a6ceac16ee 100644
--- a/src/gromacs/ewald/pme_gpu_program_impl.cpp
+++ b/src/gromacs/ewald/pme_gpu_program_impl.cpp
@@ -45,8 +45,7 @@
 
 #include "pme_gpu_program_impl.h"
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
-                                     const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
     deviceContext_(deviceContext),
     warpSize_(0),
     spreadWorkGroupSize(0),
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cu b/src/gromacs/ewald/pme_gpu_program_impl.cu
index fb74182b66..84bac0a467 100644
--- a/src/gromacs/ewald/pme_gpu_program_impl.cu
+++ b/src/gromacs/ewald/pme_gpu_program_impl.cu
@@ -104,8 +104,7 @@ extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, true,  Thre
 extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, ThreadsPerAtom::OrderSquared>(const PmeGpuCudaKernelParams);
 // clang-format on
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
-                                     const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
     deviceContext_(deviceContext)
 {
     // kernel parameters
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.h b/src/gromacs/ewald/pme_gpu_program_impl.h
index b9d1adc0d4..f1b9559d80 100644
--- a/src/gromacs/ewald/pme_gpu_program_impl.h
+++ b/src/gromacs/ewald/pme_gpu_program_impl.h
@@ -146,7 +146,7 @@ struct PmeGpuProgramImpl
 
     PmeGpuProgramImpl() = delete;
     //! Constructor for the given device
-    explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
+    explicit PmeGpuProgramImpl(const DeviceContext& deviceContext);
     ~PmeGpuProgramImpl();
     GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
 
diff --git a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
index 6672812a31..6be367b6b3 100644
--- a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
+++ b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
@@ -53,9 +53,10 @@
 #include "pme_gpu_types_host.h"
 #include "pme_grid.h"
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
     deviceContext_(deviceContext)
 {
+    const DeviceInformation& deviceInfo = deviceContext.deviceInfo();
     // kernel parameters
     warpSize_ = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
     // TODO: for Intel ideally we'd want to set these based on the compiler warp size
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
index 32a752746a..0e78978865 100644
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
@@ -67,7 +67,7 @@ PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank, const DeviceContext& device
 
     // In CUDA we only need priority to create stream.
     // (note that this will be moved from here in the follow-up patch)
-    pmePpCommStream_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
+    pmePpCommStream_.init(deviceContext, DeviceStreamPriority::Normal, false);
 }
 
 PmePpCommGpu::Impl::~Impl() = default;
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h
index 03df38671c..0af2343795 100644
--- a/src/gromacs/ewald/tests/testhardwarecontexts.h
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.h
@@ -82,7 +82,7 @@ struct TestHardwareContext
     //! Device information pointer
     const DeviceInformation* deviceInfo_;
     //! Local copy of the device context pointer
-    DeviceContext deviceContext_;
+    std::unique_ptr<DeviceContext> deviceContext_;
     //! Persistent compiled GPU kernels for PME.
     PmeGpuProgramStorage program_;
 
@@ -92,7 +92,13 @@ public:
     //! Returns a human-readable context description line
     std::string getDescription() const { return description_; }
     //! Getter for the DeviceContext
-    const DeviceContext& deviceContext() const { return deviceContext_; }
+    const DeviceContext& deviceContext() const
+    {
+        GMX_RELEASE_ASSERT(deviceContext_ != nullptr,
+                           "Trying to get device context before it was initialized or in builds "
+                           "without GPU support.");
+        return *deviceContext_;
+    }
     //! Returns the device info pointer
     const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
     //! Returns the persistent PME GPU kernels
@@ -104,19 +110,19 @@ public:
     {
         GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
                            "A GPU code path should provide DeviceInformation to the "
-                           "TestHerdwareContext constructor.");
+                           "TestHardwareContext constructor.");
     }
     //! Constructs the context for GPU builds
     TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
         codePath_(codePath),
         description_(description),
-        deviceInfo_(&deviceInfo),
-        deviceContext_(deviceInfo),
-        program_(buildPmeGpuProgram(deviceInfo, deviceContext_))
+        deviceInfo_(&deviceInfo)
     {
         GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
-                           "TestHerdwareContext tries to construct DeviceContext and PmeGpuProgram "
+                           "TestHardwareContext tries to construct DeviceContext and PmeGpuProgram "
                            "in CPU build.");
+        deviceContext_ = std::make_unique<DeviceContext>(deviceInfo);
+        program_       = buildPmeGpuProgram(*deviceContext_);
     }
     ~TestHardwareContext();
 };
diff --git a/src/gromacs/gpu_utils/device_context.h b/src/gromacs/gpu_utils/device_context.h
index 84fc076708..e1eb23255a 100644
--- a/src/gromacs/gpu_utils/device_context.h
+++ b/src/gromacs/gpu_utils/device_context.h
@@ -61,13 +61,18 @@ struct DeviceInformation;
 class DeviceContext
 {
 public:
-    //! Default constructor.
-    DeviceContext() {}
     //! Constructor.
-    DeviceContext(const DeviceInformation& /* deviceInfo */) {}
+    DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo) {}
     //! Destructor
     ~DeviceContext() = default;
 
+    //! Get the associated device information
+    const DeviceInformation& deviceInfo() const { return deviceInfo_; }
+
+private:
+    //! A reference to the device information used upon context creation
+    const DeviceInformation& deviceInfo_;
+
     GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
 };
 #endif // GMX_GPU != GMX_GPU_OPENCL
diff --git a/src/gromacs/gpu_utils/device_context_ocl.cpp b/src/gromacs/gpu_utils/device_context_ocl.cpp
index 1cd6623903..2f7babd320 100644
--- a/src/gromacs/gpu_utils/device_context_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_context_ocl.cpp
@@ -60,7 +60,7 @@
 #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
 /**@}*/
 
-DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
+DeviceContext::DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo)
 {
     cl_platform_id                     platformId = deviceInfo.oclPlatformId;
     cl_device_id                       deviceId   = deviceInfo.oclDeviceId;
diff --git a/src/gromacs/gpu_utils/device_context_ocl.h b/src/gromacs/gpu_utils/device_context_ocl.h
index a9b84b2f8e..090943962d 100644
--- a/src/gromacs/gpu_utils/device_context_ocl.h
+++ b/src/gromacs/gpu_utils/device_context_ocl.h
@@ -57,8 +57,6 @@ struct DeviceInformation;
 class DeviceContext
 {
 public:
-    //! Default constructor.
-    DeviceContext() {}
     /*! \brief Constructor that creates the \c cl_context
      *
      * \param[in] deviceInfo Platform-specific device information.
@@ -69,14 +67,18 @@ public:
     //! Destructor
     ~DeviceContext();
 
+    //! Get the associated device information
+    const DeviceInformation& deviceInfo() const { return deviceInfo_; }
     //! Getter
     cl_context context() const;
 
-    GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
-
 private:
+    //! A reference to the device information used upon context creation
+    const DeviceInformation& deviceInfo_;
     //! OpenCL context object
     cl_context context_ = nullptr;
+
+    GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
 };
 
 #endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H
diff --git a/src/gromacs/gpu_utils/device_stream.cpp b/src/gromacs/gpu_utils/device_stream.cpp
index 1b5b016fba..e1db889d72 100644
--- a/src/gromacs/gpu_utils/device_stream.cpp
+++ b/src/gromacs/gpu_utils/device_stream.cpp
@@ -46,8 +46,7 @@
 
 DeviceStream::DeviceStream() = default;
 
-void DeviceStream::init(const DeviceInformation& /* deviceInfo */,
-                        const DeviceContext& /* deviceContext */,
+void DeviceStream::init(const DeviceContext& /* deviceContext */,
                         DeviceStreamPriority /* priority */,
                         const bool /* useTiming */)
 {
diff --git a/src/gromacs/gpu_utils/device_stream.cu b/src/gromacs/gpu_utils/device_stream.cu
index 8d0b484846..2f4ebb9474 100644
--- a/src/gromacs/gpu_utils/device_stream.cu
+++ b/src/gromacs/gpu_utils/device_stream.cu
@@ -54,8 +54,7 @@ DeviceStream::DeviceStream()
     stream_ = nullptr;
 }
 
-void DeviceStream::init(const DeviceInformation& /* deviceInfo */,
-                        const DeviceContext& /* deviceContext */,
+void DeviceStream::init(const DeviceContext& /* deviceContext */,
                         DeviceStreamPriority priority,
                         const bool /* useTiming */)
 {
diff --git a/src/gromacs/gpu_utils/device_stream.h b/src/gromacs/gpu_utils/device_stream.h
index 2e654e529b..185309c905 100644
--- a/src/gromacs/gpu_utils/device_stream.h
+++ b/src/gromacs/gpu_utils/device_stream.h
@@ -78,29 +78,21 @@ public:
 
     /*! \brief Initialize
      *
-     * \param[in] deviceInfo     Platform-specific device information (only used in OpenCL).
      * \param[in] deviceContext  Device context (not used in CUDA).
      * \param[in] priority       Stream priority: high or normal.
      * \param[in] useTiming      If the timing should be enabled (not used in CUDA).
      */
-    void init(const DeviceInformation& deviceInfo,
-              const DeviceContext&     deviceContext,
-              DeviceStreamPriority     priority,
-              const bool               useTiming);
+    void init(const DeviceContext& deviceContext, DeviceStreamPriority priority, const bool useTiming);
 
     /*! \brief Construct and init.
      *
-     * \param[in] deviceInfo     Platform-specific device information (only used in OpenCL).
      * \param[in] deviceContext  Device context (only used in OpenCL).
      * \param[in] priority       Stream priority: high or normal (only used in CUDA).
      * \param[in] useTiming      If the timing should be enabled (only used in OpenCL).
      */
-    DeviceStream(const DeviceInformation& deviceInfo,
-                 const DeviceContext&     deviceContext,
-                 DeviceStreamPriority     priority,
-                 const bool               useTiming)
+    DeviceStream(const DeviceContext& deviceContext, DeviceStreamPriority priority, const bool useTiming)
     {
-        init(deviceInfo, deviceContext, priority, useTiming);
+        init(deviceContext, priority, useTiming);
     }
 
     //! Synchronize the steam
diff --git a/src/gromacs/gpu_utils/device_stream_ocl.cpp b/src/gromacs/gpu_utils/device_stream_ocl.cpp
index 013480aacf..358ef65a15 100644
--- a/src/gromacs/gpu_utils/device_stream_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_stream_ocl.cpp
@@ -54,11 +54,9 @@ DeviceStream::DeviceStream()
     stream_ = nullptr;
 }
 
-void DeviceStream::init(const DeviceInformation& deviceInfo,
-                        const DeviceContext&     deviceContext,
-                        DeviceStreamPriority /* priority */,
-                        const bool useTiming)
+void DeviceStream::init(const DeviceContext& deviceContext, DeviceStreamPriority /* priority */, const bool useTiming)
 {
+    const DeviceInformation&    deviceInfo      = deviceContext.deviceInfo();
     cl_command_queue_properties queueProperties = useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
     cl_device_id                deviceId        = deviceInfo.oclDeviceId;
     cl_int                      clError;
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu
index 1fcbdb2423..b5c16c46e8 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -220,7 +220,7 @@ static int do_sanity_checks(int dev_id, const cudaDeviceProp& dev_prop)
         const auto          dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
         DeviceInformation   deviceInfo;
         const DeviceContext deviceContext(deviceInfo);
-        const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+        const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
         launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
     }
     catch (gmx::GromacsException& ex)
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h
index ada961aa04..ee445047fa 100644
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -65,11 +65,6 @@ enum class GpuApiCallBehavior;
  */
 struct gmx_device_runtime_data_t
 {
-    //! Constructor
-    gmx_device_runtime_data_t(const DeviceContext& deviceContext) : deviceContext_(deviceContext) {}
-
-    //! OpenCL context
-    const DeviceContext& deviceContext_;
     //! OpenCL program
     cl_program program;
 };
diff --git a/src/gromacs/gpu_utils/tests/typecasts_runner.cu b/src/gromacs/gpu_utils/tests/typecasts_runner.cu
index d38212a28b..1488edbed9 100644
--- a/src/gromacs/gpu_utils/tests/typecasts_runner.cu
+++ b/src/gromacs/gpu_utils/tests/typecasts_runner.cu
@@ -112,7 +112,7 @@ void convertRVecToFloat3OnDevice(std::vector<gmx::RVec>& h_rVecOutput, const std
 {
     DeviceInformation   deviceInfo;
     const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
 
     const int numElements = h_rVecInput.size();
 
diff --git a/src/gromacs/mdlib/tests/constrtestrunners.cu b/src/gromacs/mdlib/tests/constrtestrunners.cu
index 00672af606..8c2385daab 100644
--- a/src/gromacs/mdlib/tests/constrtestrunners.cu
+++ b/src/gromacs/mdlib/tests/constrtestrunners.cu
@@ -72,7 +72,7 @@ void applyLincsGpu(ConstraintsTestData* testData, t_pbc pbc)
 {
     DeviceInformation   deviceInfo;
     const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
 
     auto lincsGpu = std::make_unique<LincsGpu>(testData->ir_.nLincsIter, testData->ir_.nProjOrder,
                                                deviceContext, deviceStream);
diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners.cu b/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
index 7b2e22aac2..2edab59438 100644
--- a/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
+++ b/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
@@ -68,7 +68,7 @@ void integrateLeapFrogGpu(LeapFrogTestData* testData, int numSteps)
 {
     DeviceInformation   deviceInfo;
     const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
 
     int numAtoms = testData->numAtoms_;
 
diff --git a/src/gromacs/mdlib/tests/settletestrunners.cu b/src/gromacs/mdlib/tests/settletestrunners.cu
index 741d2951aa..6f22af87fb 100644
--- a/src/gromacs/mdlib/tests/settletestrunners.cu
+++ b/src/gromacs/mdlib/tests/settletestrunners.cu
@@ -88,7 +88,7 @@ void applySettleGpu(SettleTestData*  testData,
 
     DeviceInformation   deviceInfo;
     const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
 
     auto settleGpu = std::make_unique<SettleGpu>(testData->mtop_, deviceContext, deviceStream);
 
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index 4348a151de..604ff0ed7b 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1449,13 +1449,10 @@ int Mdrunner::mdrunner()
     PmeGpuProgramStorage pmeGpuProgram;
     if (thisRankHasPmeGpuTask)
     {
-        GMX_RELEASE_ASSERT(
-                deviceInfo != nullptr,
-                "Device information can not be nullptr when building PME GPU program object.");
         GMX_RELEASE_ASSERT(
                 deviceContext != nullptr,
                 "Device context can not be nullptr when building PME GPU program object.");
-        pmeGpuProgram = buildPmeGpuProgram(*deviceInfo, *deviceContext);
+        pmeGpuProgram = buildPmeGpuProgram(*deviceContext);
     }
 
     /* Initiate PME if necessary,
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
index fca3ae474d..e60e9fa73b 100644
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
@@ -111,7 +111,7 @@ StatePropagatorDataGpu::Impl::Impl(const DeviceStream*  pmeStream,
 #    if (GMX_GPU == GMX_GPU_CUDA)
         // In CUDA we only need priority to create stream.
         // (note that this will be moved from here in the follow-up patch)
-        updateStreamOwn_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
+        updateStreamOwn_.init(deviceContext, DeviceStreamPriority::Normal, false);
         updateStream_ = &updateStreamOwn_;
 #    endif
     }
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
index f674c9259a..c015326e8d 100644
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -532,19 +532,20 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      * - The 1D block-grid contains as many blocks as super-clusters.
      */
     int num_threads_z = 1;
-    if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7)
+    if (nb->deviceContext_->deviceInfo().prop.major == 3 && nb->deviceContext_->deviceInfo().prop.minor == 7)
     {
         num_threads_z = 2;
     }
-    int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo);
+    int nblock = calc_nb_kernel_nblock(plist->nsci, &nb->deviceContext_->deviceInfo());
 
 
     KernelLaunchConfig config;
-    config.blockSize[0]     = c_clSize;
-    config.blockSize[1]     = c_clSize;
-    config.blockSize[2]     = num_threads_z;
-    config.gridSize[0]      = nblock;
-    config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp);
+    config.blockSize[0] = c_clSize;
+    config.blockSize[1] = c_clSize;
+    config.blockSize[2] = num_threads_z;
+    config.gridSize[0]  = nblock;
+    config.sharedMemorySize =
+            calc_shmem_required_nonbonded(num_threads_z, &nb->deviceContext_->deviceInfo(), nbp);
 
     if (debug)
     {
@@ -558,9 +559,10 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
     }
 
     auto*      timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
-    const auto kernel      = select_nbnxn_kernel(
-            nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
-            (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo);
+    const auto kernel =
+            select_nbnxn_kernel(nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
+                                (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
+                                &nb->deviceContext_->deviceInfo());
     const auto kernelArgs =
             prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
     launchGpuKernel(kernel, config, deviceStream, timingEvent, "k_calc_nb", kernelArgs);
@@ -660,8 +662,8 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      *   and j-cluster concurrency, in x, y, and z, respectively.
      * - The 1D block-grid contains as many blocks as super-clusters.
      */
-    int                num_threads_z = c_cudaPruneKernelJ4Concurrency;
-    int                nblock        = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo);
+    int num_threads_z = c_cudaPruneKernelJ4Concurrency;
+    int nblock        = calc_nb_kernel_nblock(numSciInPart, &nb->deviceContext_->deviceInfo());
     KernelLaunchConfig config;
     config.blockSize[0]     = c_clSize;
     config.blockSize[1]     = c_clSize;
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
index 36342b935f..a76880b17e 100644
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -413,8 +413,7 @@ static void cuda_init_const(NbnxmGpu*                       nb,
     nbnxn_cuda_clear_e_fshift(nb);
 }
 
-NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
-                   const DeviceContext& /* deviceContext */,
+NbnxmGpu* gpu_init(const DeviceContext&       deviceContext,
                    const interaction_const_t* ic,
                    const PairlistParams&      listParams,
                    const nbnxn_atomdata_t*    nbat,
@@ -422,7 +421,8 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
 {
     cudaError_t stat;
 
-    auto nb = new NbnxmGpu;
+    auto nb            = new NbnxmGpu();
+    nb->deviceContext_ = &deviceContext;
     snew(nb->atdat, 1);
     snew(nb->nbparam, 1);
     snew(nb->plist[InteractionLocality::Local], 1);
@@ -443,11 +443,8 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
 
     init_plist(nb->plist[InteractionLocality::Local]);
 
-    /* set device info, just point it to the right GPU among the detected ones */
-    nb->deviceInfo = deviceInfo;
-
     /* local/non-local GPU streams */
-    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, DeviceContext(),
+    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceContext_,
                                                        DeviceStreamPriority::Normal, nb->bDoTime);
     if (nb->bUseTwoStreams)
     {
@@ -458,7 +455,7 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
          * case will be a single value.
          */
         nb->deviceStreams[InteractionLocality::NonLocal].init(
-                *nb->deviceInfo, DeviceContext(), DeviceStreamPriority::High, nb->bDoTime);
+                *nb->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
     }
 
     /* init events for sychronization (timing disabled for performance reasons!) */
@@ -532,21 +529,23 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
         iTimers.didPairlistH2D = true;
     }
 
+    const DeviceContext& deviceContext = *nb->deviceContext_;
+
     reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
-                           DeviceContext());
+                           deviceContext);
     copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
                        GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
 
     reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
-                           DeviceContext());
+                           deviceContext);
     copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
                        GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
 
     reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
-                           &d_plist->nimask, &d_plist->imask_nalloc, DeviceContext());
+                           &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
 
     reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
-                           &d_plist->excl_nalloc, DeviceContext());
+                           &d_plist->excl_nalloc, deviceContext);
     copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
                        GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
 
@@ -798,7 +797,8 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
 
 int gpu_min_ci_balanced(NbnxmGpu* nb)
 {
-    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->prop.multiProcessorCount : 0;
+    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().prop.multiProcessorCount
+                         : 0;
 }
 
 gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
@@ -843,9 +843,9 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv
     const int           maxNumColumns = gridSet.numColumnsMax();
 
     reallocateDeviceBuffer(&gpu_nbv->cxy_na, maxNumColumns * gridSet.grids().size(),
-                           &gpu_nbv->ncxy_na, &gpu_nbv->ncxy_na_alloc, DeviceContext());
+                           &gpu_nbv->ncxy_na, &gpu_nbv->ncxy_na_alloc, *gpu_nbv->deviceContext_);
     reallocateDeviceBuffer(&gpu_nbv->cxy_ind, maxNumColumns * gridSet.grids().size(),
-                           &gpu_nbv->ncxy_ind, &gpu_nbv->ncxy_ind_alloc, DeviceContext());
+                           &gpu_nbv->ncxy_ind, &gpu_nbv->ncxy_ind_alloc, *gpu_nbv->deviceContext_);
 
     for (unsigned int g = 0; g < gridSet.grids().size(); g++)
     {
@@ -859,7 +859,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv
         const int* cxy_ind         = grid.cxy_ind().data();
 
         reallocateDeviceBuffer(&gpu_nbv->atomIndices, atomIndicesSize, &gpu_nbv->atomIndicesSize,
-                               &gpu_nbv->atomIndicesSize_alloc, DeviceContext());
+                               &gpu_nbv->atomIndicesSize_alloc, *gpu_nbv->deviceContext_);
 
         if (atomIndicesSize > 0)
         {
@@ -937,7 +937,7 @@ void nbnxn_gpu_init_add_nbat_f_to_f(const int*                  cell,
     if (natoms_total > 0)
     {
         reallocateDeviceBuffer(&gpu_nbv->cell, natoms_total, &gpu_nbv->ncell, &gpu_nbv->ncell_alloc,
-                               DeviceContext());
+                               *gpu_nbv->deviceContext_);
         copyToDeviceBuffer(&gpu_nbv->cell, cell, 0, natoms_total, deviceStream,
                            GpuApiCallBehavior::Async, nullptr);
     }
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
index d2bbfa6b8e..de5241a5fe 100644
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -266,8 +266,11 @@ class GpuEventSynchronizer;
  */
 struct NbnxmGpu
 {
-    /*! \brief CUDA device information */
-    const DeviceInformation* deviceInfo = nullptr;
+    /*! \brief GPU device context.
+     *
+     * \todo Make it constant reference, once NbnxmGpu is a proper class.
+     */
+    const DeviceContext* deviceContext_;
     /*! \brief true if doing both local/non-local NB work on GPU */
     bool bUseTwoStreams = false;
     /*! \brief atom data */
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h
index 574588b39a..8e114d1c65 100644
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -67,8 +67,7 @@ namespace Nbnxm
 
 /** Initializes the data structures related to GPU nonbonded calculations. */
 GPU_FUNC_QUALIFIER
-NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
-                   const DeviceContext gmx_unused& deviceContext,
+NbnxmGpu* gpu_init(const DeviceContext gmx_unused& deviceContext,
                    const interaction_const_t gmx_unused* ic,
                    const PairlistParams gmx_unused& listParams,
                    const nbnxn_atomdata_t gmx_unused* nbat,
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp
index d854ede572..d65c59c91d 100644
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -451,8 +451,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                 "Device context can not be nullptr when to use GPU for non-bonded forces.");
         /* init the NxN GPU data; the last argument tells whether we'll have
          * both local and non-local NB calculation on GPU */
-        gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
-                           haveMultipleDomains);
+        gpu_nbv = gpu_init(*deviceContext, fr->ic, pairlistParams, nbat.get(), haveMultipleDomains);
 
         minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
     }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
index e4d571e943..ca6d9e4b19 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -639,7 +639,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
     config.blockSize[1]     = c_clSize;
     config.gridSize[0]      = plist->nsci;
 
-    validate_global_work_size(config, 3, nb->deviceInfo);
+    validate_global_work_size(config, 3, &nb->deviceContext_->deviceInfo());
 
     if (debug)
     {
@@ -799,7 +799,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
     config.blockSize[2]     = num_threads_z;
     config.gridSize[0]      = numSciInPart;
 
-    validate_global_work_size(config, 3, nb->deviceInfo);
+    validate_global_work_size(config, 3, &nb->deviceContext_->deviceInfo());
 
     if (debug)
     {
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
index 8f39ffb36d..f11aa2d807 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -409,7 +409,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
     nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
 
     GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
-    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, nb->dev_rundata->deviceContext_);
+    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
 }
 
 /*! \brief Initializes the pair list data structure.
@@ -472,7 +472,7 @@ static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
     if (CL_SUCCESS != cl_error)
     {
         gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
-                  nb->deviceInfo->device_name, cl_error);
+                  nb->deviceContext_->deviceInfo().device_name, cl_error);
     }
 
     return kernel;
@@ -555,8 +555,7 @@ static void nbnxn_ocl_init_const(cl_atomdata_t*                  atomData,
 
 
 //! This function is documented in the header file
-NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
-                   const DeviceContext&       deviceContext,
+NbnxmGpu* gpu_init(const DeviceContext&       deviceContext,
                    const interaction_const_t* ic,
                    const PairlistParams&      listParams,
                    const nbnxn_atomdata_t*    nbat,
@@ -564,7 +563,8 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
 {
     GMX_ASSERT(ic, "Need a valid interaction constants object");
 
-    auto nb = new NbnxmGpu;
+    auto nb            = new NbnxmGpu();
+    nb->deviceContext_ = &deviceContext;
     snew(nb->atdat, 1);
     snew(nb->nbparam, 1);
     snew(nb->plist[InteractionLocality::Local], 1);
@@ -578,9 +578,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
     nb->timers = new cl_timers_t();
     snew(nb->timings, 1);
 
-    /* set device info, just point it to the right GPU among the detected ones */
-    nb->deviceInfo  = deviceInfo;
-    nb->dev_rundata = new gmx_device_runtime_data_t(deviceContext);
+    nb->dev_rundata = new gmx_device_runtime_data_t();
 
     /* init nbst */
     pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
@@ -593,7 +591,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
     nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
 
     /* local/non-local GPU streams */
-    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, nb->dev_rundata->deviceContext_,
+    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceContext_,
                                                        DeviceStreamPriority::Normal, nb->bDoTime);
 
     if (nb->bUseTwoStreams)
@@ -601,7 +599,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
         init_plist(nb->plist[InteractionLocality::NonLocal]);
 
         nb->deviceStreams[InteractionLocality::NonLocal].init(
-                *nb->deviceInfo, nb->dev_rundata->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
+                *nb->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
     }
 
     if (nb->bDoTime)
@@ -609,15 +607,14 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
         init_timings(nb->timings);
     }
 
-    nbnxn_ocl_init_const(nb->atdat, nb->nbparam, ic, listParams, nbat->params(),
-                         nb->dev_rundata->deviceContext_);
+    nbnxn_ocl_init_const(nb->atdat, nb->nbparam, ic, listParams, nbat->params(), *nb->deviceContext_);
 
     /* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
      * TODO: decide about NVIDIA
      */
     nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
-                           && ((nb->deviceInfo->deviceVendor == DeviceVendor::Amd)
-                               || (nb->deviceInfo->deviceVendor == DeviceVendor::Intel)
+                           && ((nb->deviceContext_->deviceInfo().deviceVendor == DeviceVendor::Amd)
+                               || (nb->deviceContext_->deviceInfo().deviceVendor == DeviceVendor::Intel)
                                || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
 
     /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
@@ -710,7 +707,7 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
     }
 
     // TODO most of this function is same in CUDA and OpenCL, move into the header
-    const DeviceContext& deviceContext = nb->dev_rundata->deviceContext_;
+    const DeviceContext& deviceContext = *nb->deviceContext_;
 
     reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
                            deviceContext);
@@ -789,21 +786,19 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
             freeDeviceBuffer(&d_atdat->atom_types);
         }
 
-        d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
-                                    CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+        d_atdat->f = clCreateBuffer(nb->deviceContext_->context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                     nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                            ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
 
-        d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
-                                     CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
+        d_atdat->xq = clCreateBuffer(nb->deviceContext_->context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                      nalloc * sizeof(cl_float4), nullptr, &cl_error);
         GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                            ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
 
         if (useLjCombRule(nb->nbparam->vdwtype))
         {
-            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
+            d_atdat->lj_comb = clCreateBuffer(nb->deviceContext_->context(),
                                               CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                               nalloc * sizeof(cl_float2), nullptr, &cl_error);
             GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -811,7 +806,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
         }
         else
         {
-            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
+            d_atdat->atom_types = clCreateBuffer(nb->deviceContext_->context(),
                                                  CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                  nalloc * sizeof(int), nullptr, &cl_error);
             GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -1010,7 +1005,7 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
 //! This function is documented in the header file
 int gpu_min_ci_balanced(NbnxmGpu* nb)
 {
-    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->compute_units : 0;
+    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().compute_units : 0;
 }
 
 //! This function is documented in the header file
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
index 9c1c759880..0f9c24ba55 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -202,13 +202,13 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
                the log output here should be written there */
             program = gmx::ocl::compileProgram(
                     stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
-                    nb->dev_rundata->deviceContext_.context(), nb->deviceInfo->oclDeviceId,
-                    nb->deviceInfo->deviceVendor);
+                    nb->deviceContext_->context(), nb->deviceContext_->deviceInfo().oclDeviceId,
+                    nb->deviceContext_->deviceInfo().deviceVendor);
         }
         catch (gmx::GromacsException& e)
         {
             e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
-                                               nb->deviceInfo->device_name));
+                                               nb->deviceContext_->deviceInfo().device_name));
             throw;
         }
     }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
index a3583761fa..6702907523 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -319,8 +319,11 @@ typedef struct Nbnxm::gpu_timers_t cl_timers_t;
  */
 struct NbnxmGpu
 {
-    //! OpenCL device information
-    const DeviceInformation* deviceInfo = nullptr;
+    /* \brief OpenCL device context
+     *
+     * \todo Make it constant reference, once NbnxmGpu is a proper class.
+     */
+    const DeviceContext* deviceContext_;
     //! OpenCL runtime data (context, kernels)
     struct gmx_device_runtime_data_t* dev_rundata = nullptr;
 
-- 
2.22.0