Use DeviceBuffer<RVec> in GPU force reduction and PME code

author Andrey Alekseenko <al42and@gmail.com>

Tue, 16 Mar 2021 10:03:24 +0000 (11:03 +0100)

committer Paul Bauer <paul.bauer.q@gmail.com>

Fri, 19 Mar 2021 15:27:49 +0000 (15:27 +0000)
author Andrey Alekseenko <al42and@gmail.com>
Tue, 16 Mar 2021 10:03:24 +0000 (11:03 +0100)
committer Paul Bauer <paul.bauer.q@gmail.com>
Fri, 19 Mar 2021 15:27:49 +0000 (15:27 +0000)
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index c9f1a6e2809e342327916ef1ba26a007614e7748..d897c0fb0caf08004fa44908da06c5738d09d859 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -467,12 +467,12 @@ GPU_FUNC_QUALIFIER void pme_gpu_set_device_x(const gmx_pme_t*        GPU_FUNC_AR
   * \param[in] pme            The PME data structure.
   * \returns                  Pointer to force data
   */
-GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
-        GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
  
  /*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
   * \param[in] pme            The PME data structure.
- * \returns                  Pointer to sychronizer
+ * \returns                  Pointer to synchronizer
   */
  GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
          GPU_FUNC_TERM_WITH_RETURN(nullptr);
diff --git a/src/gromacs/ewald/pme_force_sender_gpu.h b/src/gromacs/ewald/pme_force_sender_gpu.h

index 081ba454e63ae688f05f470d7f7f4781e05970ba..261260977993847c232c726f5778007944890a1d 100644 (file)
--- a/src/gromacs/ewald/pme_force_sender_gpu.h
+++ b/src/gromacs/ewald/pme_force_sender_gpu.h
@@ -45,6 +45,7 @@
  #include <memory>
  
  #include "gromacs/math/vectypes.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/utility/gmxmpi.h"
  
  class GpuEventSynchronizer;
@@ -83,7 +84,7 @@ public:
       * Initialization of GPU PME Force sender
       * \param[in] d_f   force buffer in GPU memory
       */
-    void sendForceBufferAddressToPpRanks(rvec* d_f);
+    void sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f);
  
      /*! \brief
       * Send force synchronizer to PP rank
diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp b/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp

index 8d8b97f5c5712330f90a6688c697b91bc260b133..69974b31196ab9ca0cc6269158669c608cd20d62 100644 (file)
--- a/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp
+++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp
@@ -48,6 +48,7 @@
  #include "config.h"
  
  #include "gromacs/ewald/pme_force_sender_gpu.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/gmxassert.h"
  
@@ -75,7 +76,7 @@ PmeForceSenderGpu::PmeForceSenderGpu(GpuEventSynchronizer* /*pmeForcesReady */,
  PmeForceSenderGpu::~PmeForceSenderGpu() = default;
  
  /*!\brief init PME-PP GPU communication stub */
-void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* /* d_f */)
+void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> /* d_f */)
  {
      GMX_ASSERT(!impl_,
                 "A CPU stub for PME-PP GPU communication initialization was called instead of the "
diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.cu b/src/gromacs/ewald/pme_force_sender_gpu_impl.cu

index 44a2e30de33111941ea49abaa02b5d6d7dcc0078..509a624d041bccad080957ae53542c6dcf066b32 100644 (file)
--- a/src/gromacs/ewald/pme_force_sender_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.cu
@@ -70,7 +70,7 @@ PmeForceSenderGpu::Impl::Impl(GpuEventSynchronizer*  pmeForcesReady,
  PmeForceSenderGpu::Impl::~Impl() = default;
  
  /*! \brief  sends force buffer address to PP ranks */
-void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(rvec* d_f)
+void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(DeviceBuffer<Float3> d_f)
  {
      int ind_start = 0;
      int ind_end   = 0;
@@ -113,7 +113,7 @@ PmeForceSenderGpu::PmeForceSenderGpu(GpuEventSynchronizer*  pmeForcesReady,
  
  PmeForceSenderGpu::~PmeForceSenderGpu() = default;
  
-void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* d_f)
+void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f)
  {
      impl_->sendForceBufferAddressToPpRanks(d_f);
  }
diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.h b/src/gromacs/ewald/pme_force_sender_gpu_impl.h

index ad9718c4685b55d18cdf47edd78ff7993de562c7..c7d4c0d76c98fdf8f6e94d83d311d938a27a7692 100644 (file)
--- a/src/gromacs/ewald/pme_force_sender_gpu_impl.h
+++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.h
@@ -44,9 +44,12 @@
  #define GMX_PMEFORCESENDERGPU_IMPL_H
  
  #include "gromacs/ewald/pme_force_sender_gpu.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/utility/arrayref.h"
  
+class GpuEventSynchronizer;
+
  namespace gmx
  {
  
@@ -68,7 +71,7 @@ public:
       * sends force buffer address to PP rank
       * \param[in] d_f   force buffer in GPU memory
       */
-    void sendForceBufferAddressToPpRanks(rvec* d_f);
+    void sendForceBufferAddressToPpRanks(DeviceBuffer<Float3> d_f);
  
      /*! \brief
       * Send force synchronizer to PP rank
diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp

index e0c32e207d1b46edcf1365c6750581f980c013d3..225fb1050a281a33cfc3cff9bec8dea32d03e7b9 100644 (file)
--- a/src/gromacs/ewald/pme_gpu.cpp
+++ b/src/gromacs/ewald/pme_gpu.cpp
@@ -440,11 +440,11 @@ void pme_gpu_reinit_computation(const gmx_pme_t* pme, gmx_wallcycle* wcycle)
      wallcycle_stop(wcycle, ewcLAUNCH_GPU);
  }
  
-void* pme_gpu_get_device_f(const gmx_pme_t* pme)
+DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* pme)
  {
      if (!pme || !pme_gpu_active(pme))
      {
-        return nullptr;
+        return DeviceBuffer<gmx::RVec>{};
      }
      return pme_gpu_get_kernelparam_forces(pme->gpu);
  }
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp

index 8eea806ac1934177f4f10364b09017354983efe6..798a9be9ef3563485370ca9d39bd7b18c37b3398 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -1701,7 +1701,7 @@ void pme_gpu_gather(PmeGpu* pmeGpu, real** h_grids, const float lambda)
      }
  }
  
-void* pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
+DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
  {
      if (pmeGpu && pmeGpu->kernelParams)
      {
@@ -1709,7 +1709,7 @@ void* pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
      }
      else
      {
-        return nullptr;
+        return DeviceBuffer<gmx::RVec>{};
      }
  }
  
diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h

index 41b912e2b6a7385ac8ee102ebe3ab198b52e043f..7baa6bd3475eb1a79ff5e7f498c0554a7eb5f735 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.h
+++ b/src/gromacs/ewald/pme_gpu_internal.h
@@ -405,8 +405,8 @@ GPU_FUNC_QUALIFIER void pme_gpu_set_kernelparam_coordinates(const PmeGpu* GPU_FU
   * \param[in] pmeGpu         The PME GPU structure.
   * \returns                  Pointer to force data
   */
-GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
-        GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
+        GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
  
  /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
   * \param[in] pmeGpu         The PME GPU structure.
diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp

index 9ba22e26207b00f76218c0d153620f66f13f3b15..138711440c505085f1b358706209baacebf0f686 100644 (file)
--- a/src/gromacs/ewald/pme_only.cpp
+++ b/src/gromacs/ewald/pme_only.cpp
@@ -445,8 +445,7 @@ static int gmx_pme_recv_coeffs_coords(struct gmx_pme_t*            pme,
                      // This rank will have its data accessed directly by PP rank, so needs to send the remote addresses.
                      pme_pp->pmeCoordinateReceiverGpu->sendCoordinateBufferAddressToPpRanks(
                              stateGpu->getCoordinates());
-                    pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(
-                            reinterpret_cast<rvec*>(pme_gpu_get_device_f(pme)));
+                    pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(pme_gpu_get_device_f(pme));
                  }
              }
  
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu.h b/src/gromacs/ewald/pme_pp_comm_gpu.h

index 3e56da9af3e8604c3030e7b04772a18be7b62c83..886e0c221b8f8a456b99ef4e40fcea45070da3e9 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu.h
+++ b/src/gromacs/ewald/pme_pp_comm_gpu.h
@@ -44,6 +44,7 @@
  
  #include <memory>
  
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/utility/gmxmpi.h"
  
  class DeviceContext;
@@ -99,7 +100,7 @@ public:
      /*! \brief
       * Return pointer to buffer used for staging PME force on GPU
       */
-    void* getGpuForceStagingPtr();
+    DeviceBuffer<gmx::RVec> getGpuForceStagingPtr();
  
      /*! \brief
       * Return pointer to event recorded when forces are ready
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp

index c5f92aa53f01cbba57da1682e4df4d28a60a2836..d31b976c10f5a9de691420b747c26d9c0574d8ec 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp
@@ -102,12 +102,12 @@ void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void* /* sendPtr */,
                 "implementation.");
  }
  
-void* PmePpCommGpu::getGpuForceStagingPtr()
+DeviceBuffer<gmx::RVec> PmePpCommGpu::getGpuForceStagingPtr()
  {
      GMX_ASSERT(!impl_,
                 "A CPU stub for PME-PP GPU communication was called instead of the correct "
                 "implementation.");
-    return nullptr;
+    return DeviceBuffer<gmx::RVec>{};
  }
  
  GpuEventSynchronizer* PmePpCommGpu::getForcesReadySynchronizer()
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu

index 0ecf0281333003926fb8a875fd61eb60ca4337ff..cb9e787c446836d27549d3e1c6775017520bc07f 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu
@@ -64,7 +64,8 @@ PmePpCommGpu::Impl::Impl(MPI_Comm             comm,
      deviceContext_(deviceContext),
      pmePpCommStream_(deviceStream),
      comm_(comm),
-    pmeRank_(pmeRank)
+    pmeRank_(pmeRank),
+    d_pmeForces_(nullptr)
  {
      GMX_RELEASE_ASSERT(
              GMX_THREAD_MPI,
@@ -155,9 +156,10 @@ void PmePpCommGpu::Impl::sendCoordinatesToPmeCudaDirect(void* sendPtr,
      GMX_UNUSED_VALUE(coordinatesReadyOnDeviceEvent);
  #endif
  }
-void* PmePpCommGpu::Impl::getGpuForceStagingPtr()
+
+DeviceBuffer<Float3> PmePpCommGpu::Impl::getGpuForceStagingPtr()
  {
-    return static_cast<void*>(d_pmeForces_);
+    return d_pmeForces_;
  }
  
  GpuEventSynchronizer* PmePpCommGpu::Impl::getForcesReadySynchronizer()
@@ -194,7 +196,7 @@ void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void*                 sendPtr,
              sendPtr, sendSize, sendPmeCoordinatesFromGpu, coordinatesReadyOnDeviceEvent);
  }
  
-void* PmePpCommGpu::getGpuForceStagingPtr()
+DeviceBuffer<gmx::RVec> PmePpCommGpu::getGpuForceStagingPtr()
  {
      return impl_->getGpuForceStagingPtr();
  }
diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.h b/src/gromacs/ewald/pme_pp_comm_gpu_impl.h

index 042891a04d3aaf044914e9cbf666c4c3254712f1..70ef8f937c3476b20e91e2d048fb759d4b36f9fb 100644 (file)
--- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.h
+++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.h
@@ -44,7 +44,9 @@
  #define GMX_PME_PP_COMM_GPU_IMPL_H
  
  #include "gromacs/ewald/pme_pp_comm_gpu.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/utility/gmxmpi.h"
  
@@ -110,7 +112,7 @@ public:
      /*! \brief
       * Return pointer to buffer used for staging PME force on GPU
       */
-    void* getGpuForceStagingPtr();
+    DeviceBuffer<Float3> getGpuForceStagingPtr();
  
      /*! \brief
       * Return pointer to event recorded when forces are ready
@@ -131,7 +133,7 @@ private:
      //! Rank of PME task
      int pmeRank_ = -1;
      //! Buffer for staging PME force on GPU
-    rvec* d_pmeForces_ = nullptr;
+    DeviceBuffer<gmx::RVec> d_pmeForces_;
      //! number of atoms in PME force staging array
      int d_pmeForcesSize_ = -1;
      //! number of atoms allocated in recvbuf array
diff --git a/src/gromacs/mdlib/gpuforcereduction.h b/src/gromacs/mdlib/gpuforcereduction.h

index b23df660ed3fc1fb5ecdce731463e304c6538ac7..2955dd60c2013e48ac126ec1c97ae7f6ed58065d 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction.h
+++ b/src/gromacs/mdlib/gpuforcereduction.h
@@ -92,7 +92,7 @@ public:
       *
       * \param [in] forcePtr  Pointer to force to be reduced
       */
-    void registerRvecForce(void* forcePtr);
+    void registerRvecForce(DeviceBuffer<gmx::RVec> forcePtr);
  
      /*! \brief Add a dependency for this force reduction
       *
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cpp b/src/gromacs/mdlib/gpuforcereduction_impl.cpp

index b431fbad495c950c63cd568c287cf3305810ac52..6d826d66757c6936e60e8451790debedcf89954d 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cpp
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.cpp
@@ -82,7 +82,7 @@ void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> /* forcePtr */)
  }
  
  // NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerRvecForce(void* /* forcePtr */)
+void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> /* forcePtr */)
  {
      GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
  }
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cu b/src/gromacs/mdlib/gpuforcereduction_impl.cu

index dab7d4da0cbd8c9938a85766211d47d7a2ce9938..f62ec44d467239343115881c5472b89ad01f44d5 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cu
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.cu
@@ -43,7 +43,7 @@
  
  #include "gmxpre.h"
  
-#include "gpuforcereduction_impl.cuh"
+#include "gpuforcereduction_impl.h"
  
  #include <stdio.h>
  
@@ -108,11 +108,14 @@ static __global__ void reduceKernel(const float3* __restrict__ gm_nbnxmForce,
  GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext,
                                const DeviceStream&  deviceStream,
                                gmx_wallcycle*       wcycle) :
+    baseForce_(nullptr),
      deviceContext_(deviceContext),
      deviceStream_(deviceStream),
+    nbnxmForceToAdd_(nullptr),
+    rvecForceToAdd_(nullptr),
      wcycle_(wcycle){};
  
-void GpuForceReduction::Impl::reinit(float3*               baseForcePtr,
+void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3>  baseForcePtr,
                                       const int             numAtoms,
                                       ArrayRef<const int>   cell,
                                       const int             atomStart,
@@ -223,9 +226,9 @@ void GpuForceReduction::registerNbnxmForce(DeviceBuffer<Float3> forcePtr)
      impl_->registerNbnxmForce(forcePtr);
  }
  
-void GpuForceReduction::registerRvecForce(void* forcePtr)
+void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> forcePtr)
  {
-    impl_->registerRvecForce(reinterpret_cast<DeviceBuffer<RVec>>(forcePtr));
+    impl_->registerRvecForce(forcePtr);
  }
  
  void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency)
@@ -240,7 +243,7 @@ void GpuForceReduction::reinit(DeviceBuffer<RVec>    baseForcePtr,
                                 const bool            accumulate,
                                 GpuEventSynchronizer* completionMarker)
  {
-    impl_->reinit(asFloat3(baseForcePtr), numAtoms, cell, atomStart, accumulate, completionMarker);
+    impl_->reinit(baseForcePtr, numAtoms, cell, atomStart, accumulate, completionMarker);
  }
  void GpuForceReduction::execute()
  {
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cuh b/src/gromacs/mdlib/gpuforcereduction_impl.h

similarity index 95%

rename from src/gromacs/mdlib/gpuforcereduction_impl.cuh

rename to src/gromacs/mdlib/gpuforcereduction_impl.h

index c7d9493c821129515e44db0cb811ec3b3d6aef61..98c8ca2c7be9140babcbb234893a099a442a6429 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cuh
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.h
@@ -59,7 +59,7 @@ struct cellInfo
      //! cell index mapping for any nbat-format forces
      const int* cell = nullptr;
      //! device copy of cell index mapping for any nbat-format forces
-    int* d_cell = nullptr;
+    DeviceBuffer<int> d_cell;
      //! number of atoms in cell array
      int cellSize = -1;
      //! number of atoms allocated in cell array
@@ -76,7 +76,7 @@ public:
       * \param [in] deviceContext GPU device context
       * \param [in] wcycle        The wallclock counter
       */
-    Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStreami, gmx_wallcycle* wcycle);
+    Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream, gmx_wallcycle* wcycle);
      ~Impl();
  
      /*! \brief Register a nbnxm-format force to be reduced
@@ -106,7 +106,7 @@ public:
       * \param [in] accumulate       Whether reduction should be accumulated
       * \param [in] completionMarker Event to be marked when launch of reduction is complete
       */
-    void reinit(float3*               baseForcePtr,
+    void reinit(DeviceBuffer<Float3>  baseForcePtr,
                  const int             numAtoms,
                  ArrayRef<const int>   cell,
                  const int             atomStart,
@@ -118,7 +118,7 @@ public:
  
  private:
      //! force to be used as a base for this reduction
-    float3* baseForce_ = nullptr;
+    DeviceBuffer<Float3> baseForce_;
      //! starting atom
      int atomStart_ = 0;
      //! number of atoms
@@ -134,9 +134,9 @@ private:
      //! stream to be used for this reduction
      const DeviceStream& deviceStream_;
      //! Nbnxm force to be added in this reduction
-    DeviceBuffer<RVec> nbnxmForceToAdd_ = nullptr;
+    DeviceBuffer<RVec> nbnxmForceToAdd_;
      //! Rvec-format force to be added in this reduction
-    DeviceBuffer<RVec> rvecForceToAdd_ = nullptr;
+    DeviceBuffer<RVec> rvecForceToAdd_;
      //! event to be marked when redcution launch has been completed
      GpuEventSynchronizer* completionMarker_ = nullptr;
      //! The wallclock counter
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 99dc4758c4070546394ece82f252eecca21dc6c4..ec10bb27a0fa7dd3fc09c0c7aa819b2e6c97b6d7 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1123,9 +1123,10 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork,
      if (runScheduleWork->simulationWork.useGpuPme
          && (thisRankHasDuty(cr, DUTY_PME) || runScheduleWork->simulationWork.useGpuPmePpCommunication))
      {
-        void* forcePtr = thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata)
-                                                       : // PME force buffer on same GPU
-                                 fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU
+        DeviceBuffer<gmx::RVec> forcePtr =
+                thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata)
+                                              :                    // PME force buffer on same GPU
+                        fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU
          fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr);
  
          GpuEventSynchronizer* const pmeSynchronizer =
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu

index b56185651170a6895d88cffe31ac20f3e2760bde..3b428a183b591af953d6100f31ec8b7975dd3b1a 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu
@@ -60,6 +60,7 @@
  #include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/device_stream.h"
  #include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
  #include "gromacs/gpu_utils/gputraits.cuh"
  #include "gromacs/gpu_utils/vectype_ops.cuh"
  #include "gromacs/mdlib/leapfrog_gpu.h"
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h

index 76e41398e762aaa941d41b24e7968bd0e242421e..1d1a8fd0de1a8453e6a20a022fedcf5da60c54bb 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.h
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h
@@ -48,13 +48,14 @@
  
  #include "gmxpre.h"
  
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
  #include "gromacs/mdlib/leapfrog_gpu.h"
  #include "gromacs/mdlib/lincs_gpu.cuh"
  #include "gromacs/mdlib/settle_gpu.cuh"
  #include "gromacs/mdlib/update_constrain_gpu.h"
  #include "gromacs/mdtypes/inputrec.h"
  
+class GpuEventSynchronizer;
+
  namespace gmx
  {
author	Andrey Alekseenko <al42and@gmail.com>
	Tue, 16 Mar 2021 10:03:24 +0000 (11:03 +0100)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Fri, 19 Mar 2021 15:27:49 +0000 (15:27 +0000)
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/ewald/pme_force_sender_gpu.h		patch \| blob \| history
src/gromacs/ewald/pme_force_sender_gpu_impl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_force_sender_gpu_impl.cu		patch \| blob \| history
src/gromacs/ewald/pme_force_sender_gpu_impl.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_only.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu.h		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.cu		patch \| blob \| history
src/gromacs/ewald/pme_pp_comm_gpu_impl.h		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction.h		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.cpp		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.cu		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.h	[moved from src/gromacs/mdlib/gpuforcereduction_impl.cuh with 95% similarity]	patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cu		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.h		patch \| blob \| history