From a2084d8287a631ba554fd4fd674f3502d9746c90 Mon Sep 17 00:00:00 2001 From: Magnus Lundborg Date: Mon, 13 Sep 2021 18:08:15 +0000 Subject: [PATCH] Calculate FEP (GPU) splines for all atoms. Since charges can be 0 in one state, but not in the other, the splines still need to be calculated. This was already ensured by c_skipNeutralAtoms being false, but this commit makes that more clear. Refs: #4139 --- src/gromacs/ewald/pme_gather.cu | 4 ++-- .../ewald/pme_gpu_calculate_splines.cuh | 18 +++++++++++++----- src/gromacs/ewald/pme_spread.clh | 7 +++++-- src/gromacs/ewald/pme_spread.cu | 4 ++-- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/gromacs/ewald/pme_gather.cu b/src/gromacs/ewald/pme_gather.cu index 160f544d3e..40eea2d2dc 100644 --- a/src/gromacs/ewald/pme_gather.cu +++ b/src/gromacs/ewald/pme_gather.cu @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -459,7 +459,7 @@ __launch_bounds__(c_gatherMaxThreadsPerBlock, c_gatherMinBlocksPerMP) __global__ atomX = gm_coordinates[atomIndexGlobal]; atomCharge = gm_coefficientsA[atomIndexGlobal]; } - calculate_splines( + calculate_splines( kernelParams, atomIndexOffset, atomX, atomCharge, sm_theta, sm_dtheta, sm_gridlineIndices); __syncwarp(); } diff --git a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh index fc319eb1c2..1e3ccefb01 100644 --- a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh +++ b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -173,8 +173,12 @@ __device__ __forceinline__ void pme_gpu_stage_atom_data(T* __restrict__ sm_desti * \tparam[in] atomsPerBlock Number of atoms processed by a block - should be accounted for * in the sizes of the shared memory arrays. * \tparam[in] atomsPerWarp Number of atoms processed by a warp - * \tparam[in] writeSmDtheta Bool controling if the theta derivative should be written to shared memory. Enables calculation of dtheta if set. - * \tparam[in] writeGlobal A boolean which tells if the theta values and gridlines should be written to global memory. Enables calculation of dtheta if set. + * \tparam[in] writeSmDtheta Bool controling if the theta derivative should be written to + * shared memory. Enables calculation of dtheta if set. + * \tparam[in] writeGlobal A boolean which tells if the theta values and gridlines should + * be written to global memory. Enables calculation of dtheta if + * set. + * \tparam[in] numGrids The number of grids using the splines. * \param[in] kernelParams Input PME CUDA data in constant memory. * \param[in] atomIndexOffset Starting atom index for the execution block w.r.t. global memory. * \param[in] atomX Atom coordinate of atom processed by thread. @@ -184,7 +188,7 @@ __device__ __forceinline__ void pme_gpu_stage_atom_data(T* __restrict__ sm_desti * \param[out] sm_gridlineIndices Atom gridline indices in the shared memory. */ -template +template __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams kernelParams, const int atomIndexOffset, const float3 atomX, @@ -193,6 +197,9 @@ __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams k float* __restrict__ sm_dtheta, int* __restrict__ sm_gridlineIndices) { + assert(numGrids == 1 || numGrids == 2); + assert(numGrids == 1 || c_skipNeutralAtoms == false); + /* Global memory pointers for output */ float* __restrict__ gm_theta = kernelParams.atoms.d_theta; float* __restrict__ gm_dtheta = kernelParams.atoms.d_dtheta; @@ -293,7 +300,8 @@ __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams k /* B-spline calculation */ const int chargeCheck = pme_gpu_check_atom_charge(atomCharge); - if (chargeCheck) + /* With FEP (numGrids == 2), we might have 0 charge in state A, but !=0 in state B, so we always calculate splines */ + if (numGrids == 2 || chargeCheck) { float div; int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1 diff --git a/src/gromacs/ewald/pme_spread.clh b/src/gromacs/ewald/pme_spread.clh index 9e887a9e6e..7f6f4beccd 100644 --- a/src/gromacs/ewald/pme_spread.clh +++ b/src/gromacs/ewald/pme_spread.clh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -132,6 +132,8 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern /* Thread index w.r.t. block */ assert((get_local_id(2) * get_local_size(1) + get_local_id(1)) * get_local_size(0) + get_local_id(0) < MAX_INT); + assert(numGrids == 1 || numGrids == 2); + assert(numGrids == 1 || c_skipNeutralAtoms == false); const int threadLocalIndex = (int)((get_local_id(2) * get_local_size(1) + get_local_id(1)) * get_local_size(0) + get_local_id(0)); @@ -238,7 +240,8 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern /* B-spline calculation */ const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]); - if (chargeCheck) + /* With FEP (numGrids == 2), we might have 0 charge in state A, but !=0 in state B, so we always calculate splines */ + if (numGrids == 2 || chargeCheck) { int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1 diff --git a/src/gromacs/ewald/pme_spread.cu b/src/gromacs/ewald/pme_spread.cu index 3cf3a1f7ca..38f41ec5b2 100644 --- a/src/gromacs/ewald/pme_spread.cu +++ b/src/gromacs/ewald/pme_spread.cu @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013-2016,2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2013-2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -252,7 +252,7 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU { atomX = gm_coordinates[atomIndexGlobal]; } - calculate_splines( + calculate_splines( kernelParams, atomIndexOffset, atomX, atomCharge, sm_theta, &dtheta, sm_gridlineIndices); __syncwarp(); } -- 2.22.0