Store DeviceInformation inside the DeviceContext class

author Artem Zhmurov <zhmurov@gmail.com>

Wed, 11 Mar 2020 13:21:24 +0000 (14:21 +0100)

committer Christian Blau <cblau@gerrit.gromacs.org>

Thu, 19 Mar 2020 10:47:34 +0000 (11:47 +0100)
author Artem Zhmurov <zhmurov@gmail.com>
Wed, 11 Mar 2020 13:21:24 +0000 (14:21 +0100)
committer Christian Blau <cblau@gerrit.gromacs.org>
Thu, 19 Mar 2020 10:47:34 +0000 (11:47 +0100)
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp

index d6caa84f95f54c77dd3482c5c7643d699c5005fb..c7a6df563a9bf317c6a26ff4b06a9705501606e1 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -505,7 +505,7 @@ void pme_gpu_init_internal(PmeGpu* pmeGpu)
       * - default high priority with CUDA
       * - no priorities implemented yet with OpenCL; see #2532
       */
       * - default high priority with CUDA
       * - no priorities implemented yet with OpenCL; see #2532
       */
-    pmeGpu->archSpecific->pmeStream_.init(*pmeGpu->deviceInfo, pmeGpu->archSpecific->deviceContext_,
+    pmeGpu->archSpecific->pmeStream_.init(pmeGpu->archSpecific->deviceContext_,
                                            DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
  }
  
                                            DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
  }
  
diff --git a/src/gromacs/ewald/pme_gpu_program.cpp b/src/gromacs/ewald/pme_gpu_program.cpp

index 72711c91ae1017dfaa5364526c84870490742dd7..efc754530a2cce18e0933480f496516fc9448d47 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program.cpp
+++ b/src/gromacs/ewald/pme_gpu_program.cpp
@@ -53,8 +53,8 @@
  
  #include "pme_gpu_program_impl.h"
  
  
  #include "pme_gpu_program_impl.h"
  
-PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
-    impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo, deviceContext))
+PmeGpuProgram::PmeGpuProgram(const DeviceContext& deviceContext) :
+    impl_(std::make_unique<PmeGpuProgramImpl>(deviceContext))
  {
  }
  
  {
  }
  
@@ -65,7 +65,7 @@ int PmeGpuProgram::warpSize() const
      return impl_->warpSize();
  }
  
      return impl_->warpSize();
  }
  
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceContext& deviceContext)
  {
  {
-    return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
+    return std::make_unique<PmeGpuProgram>(deviceContext);
  }
  }
diff --git a/src/gromacs/ewald/pme_gpu_program.h b/src/gromacs/ewald/pme_gpu_program.h

index c4888d97c71c6755382faf99ef57e33b35e87e57..f73bd4d0dd606b5b140bb7fc22c0e816be99c544 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program.h
+++ b/src/gromacs/ewald/pme_gpu_program.h
@@ -64,8 +64,12 @@ struct DeviceInformation;
  class PmeGpuProgram
  {
  public:
  class PmeGpuProgram
  {
  public:
-    //! Constructor
-    explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
+    /*! \brief Construct a PME GPU program.
+     *
+     * \param[in] deviceContext  GPU context.
+     */
+    explicit PmeGpuProgram(const DeviceContext& deviceContext);
+    //! Destructor
      ~PmeGpuProgram();
  
      //! Return the warp size for which the kernels were compiled
      ~PmeGpuProgram();
  
      //! Return the warp size for which the kernels were compiled
@@ -83,7 +87,6 @@ using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
  /*! \brief
   * Factory function used to build persistent PME GPU program for the device at once.
   */
  /*! \brief
   * Factory function used to build persistent PME GPU program for the device at once.
   */
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& /*deviceInfo*/,
-                                        const DeviceContext& /* deviceContext */);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceContext& /* deviceContext */);
  
  #endif
  
  #endif
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cpp b/src/gromacs/ewald/pme_gpu_program_impl.cpp

index 9056881227f2c6a907932c3d6efedc5035cb386f..a6ceac16ee7128b6abb996edba3e040dfbf0cfcf 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl.cpp
+++ b/src/gromacs/ewald/pme_gpu_program_impl.cpp
@@ -45,8 +45,7 @@
  
  #include "pme_gpu_program_impl.h"
  
  
  #include "pme_gpu_program_impl.h"
  
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
-                                     const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
      deviceContext_(deviceContext),
      warpSize_(0),
      spreadWorkGroupSize(0),
      deviceContext_(deviceContext),
      warpSize_(0),
      spreadWorkGroupSize(0),
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cu b/src/gromacs/ewald/pme_gpu_program_impl.cu

index fb74182b6618683bded058fd2e06a790eab2a8a2..84bac0a467d135daee518d2b7d43aaeae3fa12d9 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl.cu
+++ b/src/gromacs/ewald/pme_gpu_program_impl.cu
@@ -104,8 +104,7 @@ extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, true,  Thre
  extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, ThreadsPerAtom::OrderSquared>(const PmeGpuCudaKernelParams);
  // clang-format on
  
  extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, ThreadsPerAtom::OrderSquared>(const PmeGpuCudaKernelParams);
  // clang-format on
  
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
-                                     const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
      deviceContext_(deviceContext)
  {
      // kernel parameters
      deviceContext_(deviceContext)
  {
      // kernel parameters
diff --git a/src/gromacs/ewald/pme_gpu_program_impl.h b/src/gromacs/ewald/pme_gpu_program_impl.h

index b9d1adc0d46c3dc8ea916b471c94bb0bdae41f89..f1b9559d80642ac01264a8a8f1f45ce968d5c2fb 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl.h
+++ b/src/gromacs/ewald/pme_gpu_program_impl.h
@@ -146,7 +146,7 @@ struct PmeGpuProgramImpl
  
      PmeGpuProgramImpl() = delete;
      //! Constructor for the given device
  
      PmeGpuProgramImpl() = delete;
      //! Constructor for the given device
-    explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
+    explicit PmeGpuProgramImpl(const DeviceContext& deviceContext);
      ~PmeGpuProgramImpl();
      GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
  
      ~PmeGpuProgramImpl();
      GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
  
diff --git a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp

index 6672812a318736e0ea95a924a826f72d7eac6b2a..6be367b6b3da22e38a503bc95a863cb56884b967 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
+++ b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
@@ -53,9 +53,10 @@
  #include "pme_gpu_types_host.h"
  #include "pme_grid.h"
  
  #include "pme_gpu_types_host.h"
  #include "pme_grid.h"
  
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
      deviceContext_(deviceContext)
  {
      deviceContext_(deviceContext)
  {
+    const DeviceInformation& deviceInfo = deviceContext.deviceInfo();
      // kernel parameters
      warpSize_ = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
      // TODO: for Intel ideally we'd want to set these based on the compiler warp size
      // kernel parameters
      warpSize_ = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
      // TODO: for Intel ideally we'd want to set these based on the compiler warp size
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu

index 32a752746af53c4ce05cb7bf9d69ba15be2eabbd..0e78978865bb4866fead00907c5d2272a5f0f80e 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
@@ -67,7 +67,7 @@ PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank, const DeviceContext& device
  
      // In CUDA we only need priority to create stream.
      // (note that this will be moved from here in the follow-up patch)
  
      // In CUDA we only need priority to create stream.
      // (note that this will be moved from here in the follow-up patch)
-    pmePpCommStream_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
+    pmePpCommStream_.init(deviceContext, DeviceStreamPriority::Normal, false);
  }
  
  PmePpCommGpu::Impl::~Impl() = default;
  }
  
  PmePpCommGpu::Impl::~Impl() = default;
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h

index 03df38671cbeca40c09a0b0fedfb4b42588b7e33..0af2343795683cd88580bb2955b71a5bb2eebd0b 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.h
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.h
@@ -82,7 +82,7 @@ struct TestHardwareContext
      //! Device information pointer
      const DeviceInformation* deviceInfo_;
      //! Local copy of the device context pointer
      //! Device information pointer
      const DeviceInformation* deviceInfo_;
      //! Local copy of the device context pointer
-    DeviceContext deviceContext_;
+    std::unique_ptr<DeviceContext> deviceContext_;
      //! Persistent compiled GPU kernels for PME.
      PmeGpuProgramStorage program_;
  
      //! Persistent compiled GPU kernels for PME.
      PmeGpuProgramStorage program_;
  
@@ -92,7 +92,13 @@ public:
      //! Returns a human-readable context description line
      std::string getDescription() const { return description_; }
      //! Getter for the DeviceContext
      //! Returns a human-readable context description line
      std::string getDescription() const { return description_; }
      //! Getter for the DeviceContext
-    const DeviceContext& deviceContext() const { return deviceContext_; }
+    const DeviceContext& deviceContext() const
+    {
+        GMX_RELEASE_ASSERT(deviceContext_ != nullptr,
+                           "Trying to get device context before it was initialized or in builds "
+                           "without GPU support.");
+        return *deviceContext_;
+    }
      //! Returns the device info pointer
      const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
      //! Returns the persistent PME GPU kernels
      //! Returns the device info pointer
      const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
      //! Returns the persistent PME GPU kernels
@@ -104,19 +110,19 @@ public:
      {
          GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
                             "A GPU code path should provide DeviceInformation to the "
      {
          GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
                             "A GPU code path should provide DeviceInformation to the "
-                           "TestHerdwareContext constructor.");
+                           "TestHardwareContext constructor.");
      }
      //! Constructs the context for GPU builds
      TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
          codePath_(codePath),
          description_(description),
      }
      //! Constructs the context for GPU builds
      TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
          codePath_(codePath),
          description_(description),
-        deviceInfo_(&deviceInfo),
-        deviceContext_(deviceInfo),
-        program_(buildPmeGpuProgram(deviceInfo, deviceContext_))
+        deviceInfo_(&deviceInfo)
      {
          GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
      {
          GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
-                           "TestHerdwareContext tries to construct DeviceContext and PmeGpuProgram "
+                           "TestHardwareContext tries to construct DeviceContext and PmeGpuProgram "
                             "in CPU build.");
                             "in CPU build.");
+        deviceContext_ = std::make_unique<DeviceContext>(deviceInfo);
+        program_       = buildPmeGpuProgram(*deviceContext_);
      }
      ~TestHardwareContext();
  };
      }
      ~TestHardwareContext();
  };
diff --git a/src/gromacs/gpu_utils/device_context.h b/src/gromacs/gpu_utils/device_context.h

index 84fc076708966df6df766102f3782f3c59fae0b7..e1eb23255a1e6ce1deed171bac86ac3727e6be2f 100644 (file)
--- a/src/gromacs/gpu_utils/device_context.h
+++ b/src/gromacs/gpu_utils/device_context.h
@@ -61,13 +61,18 @@ struct DeviceInformation;
  class DeviceContext
  {
  public:
  class DeviceContext
  {
  public:
-    //! Default constructor.
-    DeviceContext() {}
      //! Constructor.
      //! Constructor.
-    DeviceContext(const DeviceInformation& /* deviceInfo */) {}
+    DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo) {}
      //! Destructor
      ~DeviceContext() = default;
  
      //! Destructor
      ~DeviceContext() = default;
  
+    //! Get the associated device information
+    const DeviceInformation& deviceInfo() const { return deviceInfo_; }
+
+private:
+    //! A reference to the device information used upon context creation
+    const DeviceInformation& deviceInfo_;
+
      GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
  };
  #endif // GMX_GPU != GMX_GPU_OPENCL
      GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
  };
  #endif // GMX_GPU != GMX_GPU_OPENCL
diff --git a/src/gromacs/gpu_utils/device_context_ocl.cpp b/src/gromacs/gpu_utils/device_context_ocl.cpp

index 1cd66239030b69b361d190a4dace27eafb9f5e38..2f7babd320064063c1627faac4053f6dc59e7d25 100644 (file)
--- a/src/gromacs/gpu_utils/device_context_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_context_ocl.cpp
@@ -60,7 +60,7 @@
  #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
  /**@}*/
  
  #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
  /**@}*/
  
-DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
+DeviceContext::DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo)
  {
      cl_platform_id                     platformId = deviceInfo.oclPlatformId;
      cl_device_id                       deviceId   = deviceInfo.oclDeviceId;
  {
      cl_platform_id                     platformId = deviceInfo.oclPlatformId;
      cl_device_id                       deviceId   = deviceInfo.oclDeviceId;
diff --git a/src/gromacs/gpu_utils/device_context_ocl.h b/src/gromacs/gpu_utils/device_context_ocl.h

index a9b84b2f8e2c1867c89eed5a619da9e1ee8829c7..090943962df80b7a0f4ffcfd5e1ee199985159ef 100644 (file)
--- a/src/gromacs/gpu_utils/device_context_ocl.h
+++ b/src/gromacs/gpu_utils/device_context_ocl.h
@@ -57,8 +57,6 @@ struct DeviceInformation;
  class DeviceContext
  {
  public:
  class DeviceContext
  {
  public:
-    //! Default constructor.
-    DeviceContext() {}
      /*! \brief Constructor that creates the \c cl_context
       *
       * \param[in] deviceInfo Platform-specific device information.
      /*! \brief Constructor that creates the \c cl_context
       *
       * \param[in] deviceInfo Platform-specific device information.
@@ -69,14 +67,18 @@ public:
      //! Destructor
      ~DeviceContext();
  
      //! Destructor
      ~DeviceContext();
  
+    //! Get the associated device information
+    const DeviceInformation& deviceInfo() const { return deviceInfo_; }
      //! Getter
      cl_context context() const;
  
      //! Getter
      cl_context context() const;
  
-    GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
-
  private:
  private:
+    //! A reference to the device information used upon context creation
+    const DeviceInformation& deviceInfo_;
      //! OpenCL context object
      cl_context context_ = nullptr;
      //! OpenCL context object
      cl_context context_ = nullptr;
+
+    GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
  };
  
  #endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H
  };
  
  #endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H
diff --git a/src/gromacs/gpu_utils/device_stream.cpp b/src/gromacs/gpu_utils/device_stream.cpp

index 1b5b016fba2a7a93d4a97c050ff12ad559d4f127..e1db889d7270fba0bfb408eba96e7f115d3288b0 100644 (file)
--- a/src/gromacs/gpu_utils/device_stream.cpp
+++ b/src/gromacs/gpu_utils/device_stream.cpp
@@ -46,8 +46,7 @@
  
  DeviceStream::DeviceStream() = default;
  
  
  DeviceStream::DeviceStream() = default;
  
-void DeviceStream::init(const DeviceInformation& /* deviceInfo */,
-                        const DeviceContext& /* deviceContext */,
+void DeviceStream::init(const DeviceContext& /* deviceContext */,
                          DeviceStreamPriority /* priority */,
                          const bool /* useTiming */)
  {
                          DeviceStreamPriority /* priority */,
                          const bool /* useTiming */)
  {
diff --git a/src/gromacs/gpu_utils/device_stream.cu b/src/gromacs/gpu_utils/device_stream.cu

index 8d0b484846ab51d243967277421b12f4b0f82b55..2f4ebb9474a6067dc5ff9386df4ceacee689707a 100644 (file)
--- a/src/gromacs/gpu_utils/device_stream.cu
+++ b/src/gromacs/gpu_utils/device_stream.cu
@@ -54,8 +54,7 @@ DeviceStream::DeviceStream()
      stream_ = nullptr;
  }
  
      stream_ = nullptr;
  }
  
-void DeviceStream::init(const DeviceInformation& /* deviceInfo */,
-                        const DeviceContext& /* deviceContext */,
+void DeviceStream::init(const DeviceContext& /* deviceContext */,
                          DeviceStreamPriority priority,
                          const bool /* useTiming */)
  {
                          DeviceStreamPriority priority,
                          const bool /* useTiming */)
  {
diff --git a/src/gromacs/gpu_utils/device_stream.h b/src/gromacs/gpu_utils/device_stream.h

index 2e654e529b02363ad26225e3c3aca36bd23961b0..185309c905543e61cabfc889550d26aa37327adf 100644 (file)
--- a/src/gromacs/gpu_utils/device_stream.h
+++ b/src/gromacs/gpu_utils/device_stream.h
@@ -78,29 +78,21 @@ public:
  
      /*! \brief Initialize
       *
  
      /*! \brief Initialize
       *
-     * \param[in] deviceInfo     Platform-specific device information (only used in OpenCL).
       * \param[in] deviceContext  Device context (not used in CUDA).
       * \param[in] priority       Stream priority: high or normal.
       * \param[in] useTiming      If the timing should be enabled (not used in CUDA).
       */
       * \param[in] deviceContext  Device context (not used in CUDA).
       * \param[in] priority       Stream priority: high or normal.
       * \param[in] useTiming      If the timing should be enabled (not used in CUDA).
       */
-    void init(const DeviceInformation& deviceInfo,
-              const DeviceContext&     deviceContext,
-              DeviceStreamPriority     priority,
-              const bool               useTiming);
+    void init(const DeviceContext& deviceContext, DeviceStreamPriority priority, const bool useTiming);
  
      /*! \brief Construct and init.
       *
  
      /*! \brief Construct and init.
       *
-     * \param[in] deviceInfo     Platform-specific device information (only used in OpenCL).
       * \param[in] deviceContext  Device context (only used in OpenCL).
       * \param[in] priority       Stream priority: high or normal (only used in CUDA).
       * \param[in] useTiming      If the timing should be enabled (only used in OpenCL).
       */
       * \param[in] deviceContext  Device context (only used in OpenCL).
       * \param[in] priority       Stream priority: high or normal (only used in CUDA).
       * \param[in] useTiming      If the timing should be enabled (only used in OpenCL).
       */
-    DeviceStream(const DeviceInformation& deviceInfo,
-                 const DeviceContext&     deviceContext,
-                 DeviceStreamPriority     priority,
-                 const bool               useTiming)
+    DeviceStream(const DeviceContext& deviceContext, DeviceStreamPriority priority, const bool useTiming)
      {
      {
-        init(deviceInfo, deviceContext, priority, useTiming);
+        init(deviceContext, priority, useTiming);
      }
  
      //! Synchronize the steam
      }
  
      //! Synchronize the steam
diff --git a/src/gromacs/gpu_utils/device_stream_ocl.cpp b/src/gromacs/gpu_utils/device_stream_ocl.cpp

index 013480aacf223aa667b69546fb2d74d3553a4fe2..358ef65a1574170f8578dffeaabc452f863f5d8b 100644 (file)
--- a/src/gromacs/gpu_utils/device_stream_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_stream_ocl.cpp
@@ -54,11 +54,9 @@ DeviceStream::DeviceStream()
      stream_ = nullptr;
  }
  
      stream_ = nullptr;
  }
  
-void DeviceStream::init(const DeviceInformation& deviceInfo,
-                        const DeviceContext&     deviceContext,
-                        DeviceStreamPriority /* priority */,
-                        const bool useTiming)
+void DeviceStream::init(const DeviceContext& deviceContext, DeviceStreamPriority /* priority */, const bool useTiming)
  {
  {
+    const DeviceInformation&    deviceInfo      = deviceContext.deviceInfo();
      cl_command_queue_properties queueProperties = useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
      cl_device_id                deviceId        = deviceInfo.oclDeviceId;
      cl_int                      clError;
      cl_command_queue_properties queueProperties = useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
      cl_device_id                deviceId        = deviceInfo.oclDeviceId;
      cl_int                      clError;
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu

index 1fcbdb24232a7eabbe54261d3839476cd549a39e..b5c16c46e8c6c3b86d1c52c54f381dc31c738d57 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -220,7 +220,7 @@ static int do_sanity_checks(int dev_id, const cudaDeviceProp& dev_prop)
          const auto          dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
          DeviceInformation   deviceInfo;
          const DeviceContext deviceContext(deviceInfo);
          const auto          dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
          DeviceInformation   deviceInfo;
          const DeviceContext deviceContext(deviceInfo);
-        const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+        const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
          launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
      }
      catch (gmx::GromacsException& ex)
          launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
      }
      catch (gmx::GromacsException& ex)
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h

index ada961aa047c4d43ecff810111312f4d117817b8..ee445047fa548dd0793ac8d16e430a8fdacf49d0 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -65,11 +65,6 @@ enum class GpuApiCallBehavior;
   */
  struct gmx_device_runtime_data_t
  {
   */
  struct gmx_device_runtime_data_t
  {
-    //! Constructor
-    gmx_device_runtime_data_t(const DeviceContext& deviceContext) : deviceContext_(deviceContext) {}
-
-    //! OpenCL context
-    const DeviceContext& deviceContext_;
      //! OpenCL program
      cl_program program;
  };
      //! OpenCL program
      cl_program program;
  };
diff --git a/src/gromacs/gpu_utils/tests/typecasts_runner.cu b/src/gromacs/gpu_utils/tests/typecasts_runner.cu

index d38212a28b9fcecdd0a0f468ca2043417da9e866..1488edbed96e26886689c7e9fa45ae01a97e3064 100644 (file)
--- a/src/gromacs/gpu_utils/tests/typecasts_runner.cu
+++ b/src/gromacs/gpu_utils/tests/typecasts_runner.cu
@@ -112,7 +112,7 @@ void convertRVecToFloat3OnDevice(std::vector<gmx::RVec>& h_rVecOutput, const std
  {
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
  {
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
  
      const int numElements = h_rVecInput.size();
  
  
      const int numElements = h_rVecInput.size();
  
diff --git a/src/gromacs/mdlib/tests/constrtestrunners.cu b/src/gromacs/mdlib/tests/constrtestrunners.cu

index 00672af606fa99150ec9d3e3f5b1303e536d2769..8c2385daabfa75ff78fec6a939a2056f5cdcb1e0 100644 (file)
--- a/src/gromacs/mdlib/tests/constrtestrunners.cu
+++ b/src/gromacs/mdlib/tests/constrtestrunners.cu
@@ -72,7 +72,7 @@ void applyLincsGpu(ConstraintsTestData* testData, t_pbc pbc)
  {
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
  {
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
  
      auto lincsGpu = std::make_unique<LincsGpu>(testData->ir_.nLincsIter, testData->ir_.nProjOrder,
                                                 deviceContext, deviceStream);
  
      auto lincsGpu = std::make_unique<LincsGpu>(testData->ir_.nLincsIter, testData->ir_.nProjOrder,
                                                 deviceContext, deviceStream);
diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners.cu b/src/gromacs/mdlib/tests/leapfrogtestrunners.cu

index 7b2e22aac2b8e96262eb30613d447e528aff22a1..2edab594381bb3bcff170d65f62f20c54ce96f02 100644 (file)
--- a/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
+++ b/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
@@ -68,7 +68,7 @@ void integrateLeapFrogGpu(LeapFrogTestData* testData, int numSteps)
  {
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
  {
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
  
      int numAtoms = testData->numAtoms_;
  
  
      int numAtoms = testData->numAtoms_;
  
diff --git a/src/gromacs/mdlib/tests/settletestrunners.cu b/src/gromacs/mdlib/tests/settletestrunners.cu

index 741d2951aa51451f4c6565a62561e0f2c41fd922..6f22af87fb7012540faf4dff979ebdfa6b7894f3 100644 (file)
--- a/src/gromacs/mdlib/tests/settletestrunners.cu
+++ b/src/gromacs/mdlib/tests/settletestrunners.cu
@@ -88,7 +88,7 @@ void applySettleGpu(SettleTestData*  testData,
  
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
  
      DeviceInformation   deviceInfo;
      const DeviceContext deviceContext(deviceInfo);
-    const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+    const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
  
      auto settleGpu = std::make_unique<SettleGpu>(testData->mtop_, deviceContext, deviceStream);
  
  
      auto settleGpu = std::make_unique<SettleGpu>(testData->mtop_, deviceContext, deviceStream);
  
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 4348a151dee64f1c8029309b3c7ae78690c862f3..604ff0ed7bf7c2ed50829d0c830c166358f23359 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1449,13 +1449,10 @@ int Mdrunner::mdrunner()
      PmeGpuProgramStorage pmeGpuProgram;
      if (thisRankHasPmeGpuTask)
      {
      PmeGpuProgramStorage pmeGpuProgram;
      if (thisRankHasPmeGpuTask)
      {
-        GMX_RELEASE_ASSERT(
-                deviceInfo != nullptr,
-                "Device information can not be nullptr when building PME GPU program object.");
          GMX_RELEASE_ASSERT(
                  deviceContext != nullptr,
                  "Device context can not be nullptr when building PME GPU program object.");
          GMX_RELEASE_ASSERT(
                  deviceContext != nullptr,
                  "Device context can not be nullptr when building PME GPU program object.");
-        pmeGpuProgram = buildPmeGpuProgram(*deviceInfo, *deviceContext);
+        pmeGpuProgram = buildPmeGpuProgram(*deviceContext);
      }
  
      /* Initiate PME if necessary,
      }
  
      /* Initiate PME if necessary,
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp

index fca3ae474d182f193c3d257577b284c3ca7f32f4..e60e9fa73b7eb7ded130cccc5d27699d6c63da84 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
@@ -111,7 +111,7 @@ StatePropagatorDataGpu::Impl::Impl(const DeviceStream*  pmeStream,
  #    if (GMX_GPU == GMX_GPU_CUDA)
          // In CUDA we only need priority to create stream.
          // (note that this will be moved from here in the follow-up patch)
  #    if (GMX_GPU == GMX_GPU_CUDA)
          // In CUDA we only need priority to create stream.
          // (note that this will be moved from here in the follow-up patch)
-        updateStreamOwn_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
+        updateStreamOwn_.init(deviceContext, DeviceStreamPriority::Normal, false);
          updateStream_ = &updateStreamOwn_;
  #    endif
      }
          updateStream_ = &updateStreamOwn_;
  #    endif
      }
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index f674c9259aa7e79225bfa4ef21eea1da40d44200..c015326e8dbee594583eda8bf566090b178c9c24 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -532,19 +532,20 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
       * - The 1D block-grid contains as many blocks as super-clusters.
       */
      int num_threads_z = 1;
       * - The 1D block-grid contains as many blocks as super-clusters.
       */
      int num_threads_z = 1;
-    if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7)
+    if (nb->deviceContext_->deviceInfo().prop.major == 3 && nb->deviceContext_->deviceInfo().prop.minor == 7)
      {
          num_threads_z = 2;
      }
      {
          num_threads_z = 2;
      }
-    int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo);
+    int nblock = calc_nb_kernel_nblock(plist->nsci, &nb->deviceContext_->deviceInfo());
  
  
      KernelLaunchConfig config;
  
  
      KernelLaunchConfig config;
-    config.blockSize[0]     = c_clSize;
-    config.blockSize[1]     = c_clSize;
-    config.blockSize[2]     = num_threads_z;
-    config.gridSize[0]      = nblock;
-    config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp);
+    config.blockSize[0] = c_clSize;
+    config.blockSize[1] = c_clSize;
+    config.blockSize[2] = num_threads_z;
+    config.gridSize[0]  = nblock;
+    config.sharedMemorySize =
+            calc_shmem_required_nonbonded(num_threads_z, &nb->deviceContext_->deviceInfo(), nbp);
  
      if (debug)
      {
  
      if (debug)
      {
@@ -558,9 +559,10 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      }
  
      auto*      timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
      }
  
      auto*      timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
-    const auto kernel      = select_nbnxn_kernel(
-            nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
-            (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo);
+    const auto kernel =
+            select_nbnxn_kernel(nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
+                                (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
+                                &nb->deviceContext_->deviceInfo());
      const auto kernelArgs =
              prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
      launchGpuKernel(kernel, config, deviceStream, timingEvent, "k_calc_nb", kernelArgs);
      const auto kernelArgs =
              prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
      launchGpuKernel(kernel, config, deviceStream, timingEvent, "k_calc_nb", kernelArgs);
@@ -660,8 +662,8 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
       *   and j-cluster concurrency, in x, y, and z, respectively.
       * - The 1D block-grid contains as many blocks as super-clusters.
       */
       *   and j-cluster concurrency, in x, y, and z, respectively.
       * - The 1D block-grid contains as many blocks as super-clusters.
       */
-    int                num_threads_z = c_cudaPruneKernelJ4Concurrency;
-    int                nblock        = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo);
+    int num_threads_z = c_cudaPruneKernelJ4Concurrency;
+    int nblock        = calc_nb_kernel_nblock(numSciInPart, &nb->deviceContext_->deviceInfo());
      KernelLaunchConfig config;
      config.blockSize[0]     = c_clSize;
      config.blockSize[1]     = c_clSize;
      KernelLaunchConfig config;
      config.blockSize[0]     = c_clSize;
      config.blockSize[1]     = c_clSize;
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 36342b935fe7f5d941dce847ba797b62b345f258..a76880b17ee1484e0b48c2649b8b6b1ada46749c 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -413,8 +413,7 @@ static void cuda_init_const(NbnxmGpu*                       nb,
      nbnxn_cuda_clear_e_fshift(nb);
  }
  
      nbnxn_cuda_clear_e_fshift(nb);
  }
  
-NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
-                   const DeviceContext& /* deviceContext */,
+NbnxmGpu* gpu_init(const DeviceContext&       deviceContext,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
@@ -422,7 +421,8 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
  {
      cudaError_t stat;
  
  {
      cudaError_t stat;
  
-    auto nb = new NbnxmGpu;
+    auto nb            = new NbnxmGpu();
+    nb->deviceContext_ = &deviceContext;
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
      snew(nb->plist[InteractionLocality::Local], 1);
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
      snew(nb->plist[InteractionLocality::Local], 1);
@@ -443,11 +443,8 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
  
      init_plist(nb->plist[InteractionLocality::Local]);
  
  
      init_plist(nb->plist[InteractionLocality::Local]);
  
-    /* set device info, just point it to the right GPU among the detected ones */
-    nb->deviceInfo = deviceInfo;
-
      /* local/non-local GPU streams */
      /* local/non-local GPU streams */
-    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, DeviceContext(),
+    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceContext_,
                                                         DeviceStreamPriority::Normal, nb->bDoTime);
      if (nb->bUseTwoStreams)
      {
                                                         DeviceStreamPriority::Normal, nb->bDoTime);
      if (nb->bUseTwoStreams)
      {
@@ -458,7 +455,7 @@ NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
           * case will be a single value.
           */
          nb->deviceStreams[InteractionLocality::NonLocal].init(
           * case will be a single value.
           */
          nb->deviceStreams[InteractionLocality::NonLocal].init(
-                *nb->deviceInfo, DeviceContext(), DeviceStreamPriority::High, nb->bDoTime);
+                *nb->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
      }
  
      /* init events for sychronization (timing disabled for performance reasons!) */
      }
  
      /* init events for sychronization (timing disabled for performance reasons!) */
@@ -532,21 +529,23 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
          iTimers.didPairlistH2D = true;
      }
  
          iTimers.didPairlistH2D = true;
      }
  
+    const DeviceContext& deviceContext = *nb->deviceContext_;
+
      reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
      reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
-                           DeviceContext());
+                           deviceContext);
      copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
                         GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
  
      reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
      copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
                         GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
  
      reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
-                           DeviceContext());
+                           deviceContext);
      copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
                         GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
  
      reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
      copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
                         GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
  
      reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
-                           &d_plist->nimask, &d_plist->imask_nalloc, DeviceContext());
+                           &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
  
      reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
  
      reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
-                           &d_plist->excl_nalloc, DeviceContext());
+                           &d_plist->excl_nalloc, deviceContext);
      copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
                         GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
  
      copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
                         GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
  
@@ -798,7 +797,8 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
  
  int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
  
  int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
-    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->prop.multiProcessorCount : 0;
+    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().prop.multiProcessorCount
+                         : 0;
  }
  
  gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
  }
  
  gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
@@ -843,9 +843,9 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv
      const int           maxNumColumns = gridSet.numColumnsMax();
  
      reallocateDeviceBuffer(&gpu_nbv->cxy_na, maxNumColumns * gridSet.grids().size(),
      const int           maxNumColumns = gridSet.numColumnsMax();
  
      reallocateDeviceBuffer(&gpu_nbv->cxy_na, maxNumColumns * gridSet.grids().size(),
-                           &gpu_nbv->ncxy_na, &gpu_nbv->ncxy_na_alloc, DeviceContext());
+                           &gpu_nbv->ncxy_na, &gpu_nbv->ncxy_na_alloc, *gpu_nbv->deviceContext_);
      reallocateDeviceBuffer(&gpu_nbv->cxy_ind, maxNumColumns * gridSet.grids().size(),
      reallocateDeviceBuffer(&gpu_nbv->cxy_ind, maxNumColumns * gridSet.grids().size(),
-                           &gpu_nbv->ncxy_ind, &gpu_nbv->ncxy_ind_alloc, DeviceContext());
+                           &gpu_nbv->ncxy_ind, &gpu_nbv->ncxy_ind_alloc, *gpu_nbv->deviceContext_);
  
      for (unsigned int g = 0; g < gridSet.grids().size(); g++)
      {
  
      for (unsigned int g = 0; g < gridSet.grids().size(); g++)
      {
@@ -859,7 +859,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv
          const int* cxy_ind         = grid.cxy_ind().data();
  
          reallocateDeviceBuffer(&gpu_nbv->atomIndices, atomIndicesSize, &gpu_nbv->atomIndicesSize,
          const int* cxy_ind         = grid.cxy_ind().data();
  
          reallocateDeviceBuffer(&gpu_nbv->atomIndices, atomIndicesSize, &gpu_nbv->atomIndicesSize,
-                               &gpu_nbv->atomIndicesSize_alloc, DeviceContext());
+                               &gpu_nbv->atomIndicesSize_alloc, *gpu_nbv->deviceContext_);
  
          if (atomIndicesSize > 0)
          {
  
          if (atomIndicesSize > 0)
          {
@@ -937,7 +937,7 @@ void nbnxn_gpu_init_add_nbat_f_to_f(const int*                  cell,
      if (natoms_total > 0)
      {
          reallocateDeviceBuffer(&gpu_nbv->cell, natoms_total, &gpu_nbv->ncell, &gpu_nbv->ncell_alloc,
      if (natoms_total > 0)
      {
          reallocateDeviceBuffer(&gpu_nbv->cell, natoms_total, &gpu_nbv->ncell, &gpu_nbv->ncell_alloc,
-                               DeviceContext());
+                               *gpu_nbv->deviceContext_);
          copyToDeviceBuffer(&gpu_nbv->cell, cell, 0, natoms_total, deviceStream,
                             GpuApiCallBehavior::Async, nullptr);
      }
          copyToDeviceBuffer(&gpu_nbv->cell, cell, 0, natoms_total, deviceStream,
                             GpuApiCallBehavior::Async, nullptr);
      }
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index d2bbfa6b8ef611b7511d26e8e24db443772d524f..de5241a5feec681eaf81e05b850f26a47d28b159 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -266,8 +266,11 @@ class GpuEventSynchronizer;
   */
  struct NbnxmGpu
  {
   */
  struct NbnxmGpu
  {
-    /*! \brief CUDA device information */
-    const DeviceInformation* deviceInfo = nullptr;
+    /*! \brief GPU device context.
+     *
+     * \todo Make it constant reference, once NbnxmGpu is a proper class.
+     */
+    const DeviceContext* deviceContext_;
      /*! \brief true if doing both local/non-local NB work on GPU */
      bool bUseTwoStreams = false;
      /*! \brief atom data */
      /*! \brief true if doing both local/non-local NB work on GPU */
      bool bUseTwoStreams = false;
      /*! \brief atom data */
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h

index 574588b39ac87bb6549e195eb8ca3807796331cf..8e114d1c65cb9affd4c5c9ee77f60dfd21c03168 100644 (file)
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -67,8 +67,7 @@ namespace Nbnxm
  
  /** Initializes the data structures related to GPU nonbonded calculations. */
  GPU_FUNC_QUALIFIER
  
  /** Initializes the data structures related to GPU nonbonded calculations. */
  GPU_FUNC_QUALIFIER
-NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
-                   const DeviceContext gmx_unused& deviceContext,
+NbnxmGpu* gpu_init(const DeviceContext gmx_unused& deviceContext,
                     const interaction_const_t gmx_unused* ic,
                     const PairlistParams gmx_unused& listParams,
                     const nbnxn_atomdata_t gmx_unused* nbat,
                     const interaction_const_t gmx_unused* ic,
                     const PairlistParams gmx_unused& listParams,
                     const nbnxn_atomdata_t gmx_unused* nbat,
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp

index d854ede57282d8eabc40e3d4a054159960b05d9a..d65c59c91d9f07ec1fa08d8271f7c385ce366df5 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -451,8 +451,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                  "Device context can not be nullptr when to use GPU for non-bonded forces.");
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
                  "Device context can not be nullptr when to use GPU for non-bonded forces.");
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
-        gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
-                           haveMultipleDomains);
+        gpu_nbv = gpu_init(*deviceContext, fr->ic, pairlistParams, nbat.get(), haveMultipleDomains);
  
          minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
      }
  
          minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
      }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index e4d571e9436e572ea8efd5d52bc8b6c7a8814b4a..ca6d9e4b197c80a515665cc2fffb171c9a69f9c2 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -639,7 +639,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
      config.blockSize[1]     = c_clSize;
      config.gridSize[0]      = plist->nsci;
  
      config.blockSize[1]     = c_clSize;
      config.gridSize[0]      = plist->nsci;
  
-    validate_global_work_size(config, 3, nb->deviceInfo);
+    validate_global_work_size(config, 3, &nb->deviceContext_->deviceInfo());
  
      if (debug)
      {
  
      if (debug)
      {
@@ -799,7 +799,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      config.blockSize[2]     = num_threads_z;
      config.gridSize[0]      = numSciInPart;
  
      config.blockSize[2]     = num_threads_z;
      config.gridSize[0]      = numSciInPart;
  
-    validate_global_work_size(config, 3, nb->deviceInfo);
+    validate_global_work_size(config, 3, &nb->deviceContext_->deviceInfo());
  
      if (debug)
      {
  
      if (debug)
      {
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index 8f39ffb36df35c81ce3f773efd05792a5036cd04..f11aa2d807b357d9ca2b9ef56e9d764fa8b56421 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -409,7 +409,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
      nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
  
      GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
      nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
  
      GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
-    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, nb->dev_rundata->deviceContext_);
+    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
  }
  
  /*! \brief Initializes the pair list data structure.
  }
  
  /*! \brief Initializes the pair list data structure.
@@ -472,7 +472,7 @@ static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
      if (CL_SUCCESS != cl_error)
      {
          gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
      if (CL_SUCCESS != cl_error)
      {
          gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
-                  nb->deviceInfo->device_name, cl_error);
+                  nb->deviceContext_->deviceInfo().device_name, cl_error);
      }
  
      return kernel;
      }
  
      return kernel;
@@ -555,8 +555,7 @@ static void nbnxn_ocl_init_const(cl_atomdata_t*                  atomData,
  
  
  //! This function is documented in the header file
  
  
  //! This function is documented in the header file
-NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
-                   const DeviceContext&       deviceContext,
+NbnxmGpu* gpu_init(const DeviceContext&       deviceContext,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
                     const interaction_const_t* ic,
                     const PairlistParams&      listParams,
                     const nbnxn_atomdata_t*    nbat,
@@ -564,7 +563,8 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
  {
      GMX_ASSERT(ic, "Need a valid interaction constants object");
  
  {
      GMX_ASSERT(ic, "Need a valid interaction constants object");
  
-    auto nb = new NbnxmGpu;
+    auto nb            = new NbnxmGpu();
+    nb->deviceContext_ = &deviceContext;
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
      snew(nb->plist[InteractionLocality::Local], 1);
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
      snew(nb->plist[InteractionLocality::Local], 1);
@@ -578,9 +578,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
      nb->timers = new cl_timers_t();
      snew(nb->timings, 1);
  
      nb->timers = new cl_timers_t();
      snew(nb->timings, 1);
  
-    /* set device info, just point it to the right GPU among the detected ones */
-    nb->deviceInfo  = deviceInfo;
-    nb->dev_rundata = new gmx_device_runtime_data_t(deviceContext);
+    nb->dev_rundata = new gmx_device_runtime_data_t();
  
      /* init nbst */
      pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
  
      /* init nbst */
      pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
@@ -593,7 +591,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
      nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
  
      /* local/non-local GPU streams */
      nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
  
      /* local/non-local GPU streams */
-    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, nb->dev_rundata->deviceContext_,
+    nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceContext_,
                                                         DeviceStreamPriority::Normal, nb->bDoTime);
  
      if (nb->bUseTwoStreams)
                                                         DeviceStreamPriority::Normal, nb->bDoTime);
  
      if (nb->bUseTwoStreams)
@@ -601,7 +599,7 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
          init_plist(nb->plist[InteractionLocality::NonLocal]);
  
          nb->deviceStreams[InteractionLocality::NonLocal].init(
          init_plist(nb->plist[InteractionLocality::NonLocal]);
  
          nb->deviceStreams[InteractionLocality::NonLocal].init(
-                *nb->deviceInfo, nb->dev_rundata->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
+                *nb->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
      }
  
      if (nb->bDoTime)
      }
  
      if (nb->bDoTime)
@@ -609,15 +607,14 @@ NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
          init_timings(nb->timings);
      }
  
          init_timings(nb->timings);
      }
  
-    nbnxn_ocl_init_const(nb->atdat, nb->nbparam, ic, listParams, nbat->params(),
-                         nb->dev_rundata->deviceContext_);
+    nbnxn_ocl_init_const(nb->atdat, nb->nbparam, ic, listParams, nbat->params(), *nb->deviceContext_);
  
      /* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
       * TODO: decide about NVIDIA
       */
      nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
  
      /* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
       * TODO: decide about NVIDIA
       */
      nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
-                           && ((nb->deviceInfo->deviceVendor == DeviceVendor::Amd)
-                               || (nb->deviceInfo->deviceVendor == DeviceVendor::Intel)
+                           && ((nb->deviceContext_->deviceInfo().deviceVendor == DeviceVendor::Amd)
+                               || (nb->deviceContext_->deviceInfo().deviceVendor == DeviceVendor::Intel)
                                 || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
  
      /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
                                 || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
  
      /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
@@ -710,7 +707,7 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
      }
  
      // TODO most of this function is same in CUDA and OpenCL, move into the header
      }
  
      // TODO most of this function is same in CUDA and OpenCL, move into the header
-    const DeviceContext& deviceContext = nb->dev_rundata->deviceContext_;
+    const DeviceContext& deviceContext = *nb->deviceContext_;
  
      reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
                             deviceContext);
  
      reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
                             deviceContext);
@@ -789,21 +786,19 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
              freeDeviceBuffer(&d_atdat->atom_types);
          }
  
              freeDeviceBuffer(&d_atdat->atom_types);
          }
  
-        d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
-                                    CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+        d_atdat->f = clCreateBuffer(nb->deviceContext_->context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                      nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
                                      nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
-        d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
-                                     CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
+        d_atdat->xq = clCreateBuffer(nb->deviceContext_->context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                       nalloc * sizeof(cl_float4), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
          if (useLjCombRule(nb->nbparam->vdwtype))
          {
                                       nalloc * sizeof(cl_float4), nullptr, &cl_error);
          GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                             ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
  
          if (useLjCombRule(nb->nbparam->vdwtype))
          {
-            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
+            d_atdat->lj_comb = clCreateBuffer(nb->deviceContext_->context(),
                                                CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                nalloc * sizeof(cl_float2), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                                                CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                nalloc * sizeof(cl_float2), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -811,7 +806,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
          }
          else
          {
          }
          else
          {
-            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
+            d_atdat->atom_types = clCreateBuffer(nb->deviceContext_->context(),
                                                   CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                   nalloc * sizeof(int), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
                                                   CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
                                                   nalloc * sizeof(int), nullptr, &cl_error);
              GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
@@ -1010,7 +1005,7 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
  //! This function is documented in the header file
  int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
  //! This function is documented in the header file
  int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
-    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->compute_units : 0;
+    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().compute_units : 0;
  }
  
  //! This function is documented in the header file
  }
  
  //! This function is documented in the header file
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index 9c1c759880d0cca050b09d69f28192cbc8eb6983..0f9c24ba554b30431b821d6af5fed111498f29d0 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -202,13 +202,13 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
                 the log output here should be written there */
              program = gmx::ocl::compileProgram(
                      stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
                 the log output here should be written there */
              program = gmx::ocl::compileProgram(
                      stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
-                    nb->dev_rundata->deviceContext_.context(), nb->deviceInfo->oclDeviceId,
-                    nb->deviceInfo->deviceVendor);
+                    nb->deviceContext_->context(), nb->deviceContext_->deviceInfo().oclDeviceId,
+                    nb->deviceContext_->deviceInfo().deviceVendor);
          }
          catch (gmx::GromacsException& e)
          {
              e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
          }
          catch (gmx::GromacsException& e)
          {
              e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
-                                               nb->deviceInfo->device_name));
+                                               nb->deviceContext_->deviceInfo().device_name));
              throw;
          }
      }
              throw;
          }
      }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index a3583761fa170669aa713be311f4aac28bffefc1..67029075239f5dc9e47535d0ef8c57961e3eb6dd 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -319,8 +319,11 @@ typedef struct Nbnxm::gpu_timers_t cl_timers_t;
   */
  struct NbnxmGpu
  {
   */
  struct NbnxmGpu
  {
-    //! OpenCL device information
-    const DeviceInformation* deviceInfo = nullptr;
+    /* \brief OpenCL device context
+     *
+     * \todo Make it constant reference, once NbnxmGpu is a proper class.
+     */
+    const DeviceContext* deviceContext_;
      //! OpenCL runtime data (context, kernels)
      struct gmx_device_runtime_data_t* dev_rundata = nullptr;
  
      //! OpenCL runtime data (context, kernels)
      struct gmx_device_runtime_data_t* dev_rundata = nullptr;
author	Artem Zhmurov <zhmurov@gmail.com>
	Wed, 11 Mar 2020 13:21:24 +0000 (14:21 +0100)
committer	Christian Blau <cblau@gerrit.gromacs.org>
	Thu, 19 Mar 2020 10:47:34 +0000 (11:47 +0100)
src/gromacs/ewald/pme_gpu_internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl.cu		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.cu		patch \| blob \| history
src/gromacs/ewald/tests/testhardwarecontexts.h		patch \| blob \| history
src/gromacs/gpu_utils/device_context.h		patch \| blob \| history
src/gromacs/gpu_utils/device_context_ocl.cpp		patch \| blob \| history
src/gromacs/gpu_utils/device_context_ocl.h		patch \| blob \| history
src/gromacs/gpu_utils/device_stream.cpp		patch \| blob \| history
src/gromacs/gpu_utils/device_stream.cu		patch \| blob \| history
src/gromacs/gpu_utils/device_stream.h		patch \| blob \| history
src/gromacs/gpu_utils/device_stream_ocl.cpp		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.h		patch \| blob \| history
src/gromacs/gpu_utils/tests/typecasts_runner.cu		patch \| blob \| history
src/gromacs/mdlib/tests/constrtestrunners.cu		patch \| blob \| history
src/gromacs/mdlib/tests/leapfrogtestrunners.cu		patch \| blob \| history
src/gromacs/mdlib/tests/settletestrunners.cu		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h		patch \| blob \| history