From: Artem Zhmurov Date: Mon, 7 Jun 2021 10:53:30 +0000 (+0300) Subject: Remove duplicating pointers to device buffers in GPU listed forces X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=40bcedcdb9abb1dcbc7e5f873b9d6462c361f9e6;p=alexxy%2Fgromacs.git Remove duplicating pointers to device buffers in GPU listed forces XYZQ, forces and shift forces are saved in both ListedForces class and in BondedCudaKernelParameters class. This removes the later and make it so the buffers are passed directly to the kernel. --- diff --git a/src/gromacs/listed_forces/listed_forces_gpu_impl.cu b/src/gromacs/listed_forces/listed_forces_gpu_impl.cu index d02f535623..1c321ce3b7 100644 --- a/src/gromacs/listed_forces/listed_forces_gpu_impl.cu +++ b/src/gromacs/listed_forces/listed_forces_gpu_impl.cu @@ -101,9 +101,6 @@ ListedForcesGpu::Impl::Impl(const gmx_ffparams_t& ffparams, kernelParams_.electrostaticsScaleFactor = electrostaticsScaleFactor; kernelParams_.d_forceParams = d_forceParams_; - kernelParams_.d_xq = d_xq_; - kernelParams_.d_f = d_f_; - kernelParams_.d_fShift = d_fShift_; kernelParams_.d_vTot = d_vTot_; for (int i = 0; i < numFTypesOnGpu; i++) { @@ -284,9 +281,6 @@ void ListedForcesGpu::Impl::updateInteractionListsAndDeviceBuffers(ArrayRef -__global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) +__global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams, float4* gm_xq, float3* gm_f, float3* gm_fShift) { assert(blockDim.y == 1 && blockDim.z == 1); const int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -773,8 +773,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc); break; @@ -784,8 +784,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc); break; @@ -795,8 +795,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc); break; @@ -807,8 +807,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc); break; @@ -818,8 +818,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc); break; @@ -829,8 +829,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc); break; @@ -839,8 +839,8 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) numBonds, iatoms, kernelParams.d_forceParams, - kernelParams.d_xq, - kernelParams.d_f, + gm_xq, + gm_f, sm_fShiftLoc, kernelParams.pbcAiuc, kernelParams.electrostaticsScaleFactor, @@ -899,7 +899,7 @@ __global__ void exec_kernel_gpu(BondedCudaKernelParameters kernelParams) __syncthreads(); if (threadIdx.x < c_numShiftVectors) { - atomicAdd(kernelParams.d_fShift[threadIdx.x], sm_fShiftLoc[threadIdx.x]); + atomicAdd(gm_fShift[threadIdx.x], sm_fShiftLoc[threadIdx.x]); } } } @@ -926,7 +926,8 @@ void ListedForcesGpu::Impl::launchKernel() auto kernelPtr = exec_kernel_gpu; - const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, kernelLaunchConfig_, &kernelParams_); + const auto kernelArgs = prepareGpuKernelArguments( + kernelPtr, kernelLaunchConfig_, &kernelParams_, &d_xq_, &d_f_, &d_fShift_); launchGpuKernel(kernelPtr, kernelLaunchConfig_,