Remove duplicating pointers to device buffers in GPU listed forces
authorArtem Zhmurov <zhmurov@gmail.com>
Mon, 7 Jun 2021 10:53:30 +0000 (13:53 +0300)
committerArtem Zhmurov <zhmurov@gmail.com>
Mon, 7 Jun 2021 12:00:15 +0000 (12:00 +0000)
XYZQ, forces and shift forces are saved in both ListedForces class and
in BondedCudaKernelParameters class. This removes the later and
make it so the buffers are passed directly to the kernel.

src/gromacs/listed_forces/listed_forces_gpu_impl.cu
src/gromacs/listed_forces/listed_forces_gpu_impl.h
src/gromacs/listed_forces/listed_forces_gpu_internal.cu

index d02f53562364c219f3fd1dd58f9babac2043afed..1c321ce3b7cb1fd0119886279ca2036925febda5 100644 (file)
@@ -101,9 +101,6 @@ ListedForcesGpu::Impl::Impl(const gmx_ffparams_t& ffparams,
 
     kernelParams_.electrostaticsScaleFactor = electrostaticsScaleFactor;
     kernelParams_.d_forceParams             = d_forceParams_;
-    kernelParams_.d_xq                      = d_xq_;
-    kernelParams_.d_f                       = d_f_;
-    kernelParams_.d_fShift                  = d_fShift_;
     kernelParams_.d_vTot                    = d_vTot_;
     for (int i = 0; i < numFTypesOnGpu; i++)
     {
@@ -284,9 +281,6 @@ void ListedForcesGpu::Impl::updateInteractionListsAndDeviceBuffers(ArrayRef<cons
     d_f_      = asFloat3(d_fPtr);
     d_fShift_ = asFloat3(d_fShiftPtr);
 
-    kernelParams_.d_xq          = d_xq_;
-    kernelParams_.d_f           = d_f_;
-    kernelParams_.d_fShift      = d_fShift_;
     kernelParams_.d_forceParams = d_forceParams_;
     kernelParams_.d_vTot        = d_vTot_;
 
index 902ceb24c400194c0185596874ccde83220d8de6..ea9e8b822cfa56868c4b208fa8329226d8574b64 100644 (file)
@@ -95,12 +95,6 @@ struct BondedCudaKernelParameters
 
     //! Force parameters (on GPU)
     t_iparams* d_forceParams;
-    //! Coordinates before the timestep (on GPU)
-    const float4* d_xq;
-    //! Forces on atoms (on GPU)
-    float3* d_f;
-    //! Force shifts on atoms (on GPU)
-    float3* d_fShift;
     //! Total Energy (on GPU)
     float* d_vTot;
     //! Interaction list atoms (on GPU)
@@ -114,9 +108,6 @@ struct BondedCudaKernelParameters
 
         electrostaticsScaleFactor = 1.0;
         d_forceParams             = nullptr;
-        d_xq                      = nullptr;
-        d_f                       = nullptr;
-        d_fShift                  = nullptr;
         d_vTot                    = nullptr;
     }
 };
index 9b35ea6077440dbde4350fe3939d686766a89e99..7797f2c11e9252e2f3b2a5b19bdf2868658fa9f6 100644 (file)
@@ -727,7 +727,7 @@ namespace gmx
 {
 
 template<bool calcVir, bool calcEner>
-__global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
+__global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams, float4* gm_xq, float3* gm_f, float3* gm_fShift)
 {
     assert(blockDim.y == 1 && blockDim.z == 1);
     const int tid          = blockIdx.x * blockDim.x + threadIdx.x;
@@ -773,8 +773,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                  numBonds,
                                                  iatoms,
                                                  kernelParams.d_forceParams,
-                                                 kernelParams.d_xq,
-                                                 kernelParams.d_f,
+                                                 gm_xq,
+                                                 gm_f,
                                                  sm_fShiftLoc,
                                                  kernelParams.pbcAiuc);
                     break;
@@ -784,8 +784,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                   numBonds,
                                                   iatoms,
                                                   kernelParams.d_forceParams,
-                                                  kernelParams.d_xq,
-                                                  kernelParams.d_f,
+                                                  gm_xq,
+                                                  gm_f,
                                                   sm_fShiftLoc,
                                                   kernelParams.pbcAiuc);
                     break;
@@ -795,8 +795,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                         numBonds,
                                                         iatoms,
                                                         kernelParams.d_forceParams,
-                                                        kernelParams.d_xq,
-                                                        kernelParams.d_f,
+                                                        gm_xq,
+                                                        gm_f,
                                                         sm_fShiftLoc,
                                                         kernelParams.pbcAiuc);
                     break;
@@ -807,8 +807,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                  numBonds,
                                                  iatoms,
                                                  kernelParams.d_forceParams,
-                                                 kernelParams.d_xq,
-                                                 kernelParams.d_f,
+                                                 gm_xq,
+                                                 gm_f,
                                                  sm_fShiftLoc,
                                                  kernelParams.pbcAiuc);
                     break;
@@ -818,8 +818,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                   numBonds,
                                                   iatoms,
                                                   kernelParams.d_forceParams,
-                                                  kernelParams.d_xq,
-                                                  kernelParams.d_f,
+                                                  gm_xq,
+                                                  gm_f,
                                                   sm_fShiftLoc,
                                                   kernelParams.pbcAiuc);
                     break;
@@ -829,8 +829,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                  numBonds,
                                                  iatoms,
                                                  kernelParams.d_forceParams,
-                                                 kernelParams.d_xq,
-                                                 kernelParams.d_f,
+                                                 gm_xq,
+                                                 gm_f,
                                                  sm_fShiftLoc,
                                                  kernelParams.pbcAiuc);
                     break;
@@ -839,8 +839,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
                                                  numBonds,
                                                  iatoms,
                                                  kernelParams.d_forceParams,
-                                                 kernelParams.d_xq,
-                                                 kernelParams.d_f,
+                                                 gm_xq,
+                                                 gm_f,
                                                  sm_fShiftLoc,
                                                  kernelParams.pbcAiuc,
                                                  kernelParams.electrostaticsScaleFactor,
@@ -899,7 +899,7 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams)
         __syncthreads();
         if (threadIdx.x < c_numShiftVectors)
         {
-            atomicAdd(kernelParams.d_fShift[threadIdx.x], sm_fShiftLoc[threadIdx.x]);
+            atomicAdd(gm_fShift[threadIdx.x], sm_fShiftLoc[threadIdx.x]);
         }
     }
 }
@@ -926,7 +926,8 @@ void ListedForcesGpu::Impl::launchKernel()
 
     auto kernelPtr = exec_kernel_gpu<calcVir, calcEner>;
 
-    const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, kernelLaunchConfig_, &kernelParams_);
+    const auto kernelArgs = prepareGpuKernelArguments(
+            kernelPtr, kernelLaunchConfig_, &kernelParams_, &d_xq_, &d_f_, &d_fShift_);
 
     launchGpuKernel(kernelPtr,
                     kernelLaunchConfig_,