Calculate FEP (GPU) splines for all atoms.

author Magnus Lundborg <magnus.lundborg@scilifelab.se>

Mon, 13 Sep 2021 18:08:15 +0000 (18:08 +0000)

committer Magnus Lundborg <magnus.lundborg@scilifelab.se>

Mon, 13 Sep 2021 18:08:15 +0000 (18:08 +0000)
author Magnus Lundborg <magnus.lundborg@scilifelab.se>
Mon, 13 Sep 2021 18:08:15 +0000 (18:08 +0000)
committer Magnus Lundborg <magnus.lundborg@scilifelab.se>
Mon, 13 Sep 2021 18:08:15 +0000 (18:08 +0000)
diff --git a/src/gromacs/ewald/pme_gather.cu b/src/gromacs/ewald/pme_gather.cu

index 160f544d3ecb3ace27be9eba5a7265f5a5845697..40eea2d2dc6a30a527fb9616cf7a636d7c4dc906 100644 (file)
--- a/src/gromacs/ewald/pme_gather.cu
+++ b/src/gromacs/ewald/pme_gather.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -459,7 +459,7 @@ __launch_bounds__(c_gatherMaxThreadsPerBlock, c_gatherMinBlocksPerMP) __global__
              atomX      = gm_coordinates[atomIndexGlobal];
              atomCharge = gm_coefficientsA[atomIndexGlobal];
          }
-        calculate_splines<order, atomsPerBlock, atomsPerWarp, true, false>(
+        calculate_splines<order, atomsPerBlock, atomsPerWarp, true, false, numGrids>(
                  kernelParams, atomIndexOffset, atomX, atomCharge, sm_theta, sm_dtheta, sm_gridlineIndices);
          __syncwarp();
      }
diff --git a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh

index fc319eb1c23a78e53f18505bc8f1c9a41a5f9a37..1e3ccefb01aa57bf1aaa268efcbe16aee13ecf03 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh
+++ b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -173,8 +173,12 @@ __device__ __forceinline__ void pme_gpu_stage_atom_data(T* __restrict__ sm_desti
   * \tparam[in] atomsPerBlock        Number of atoms processed by a block - should be accounted for
   *                                  in the sizes of the shared memory arrays.
   * \tparam[in] atomsPerWarp         Number of atoms processed by a warp
- * \tparam[in] writeSmDtheta        Bool controling if the theta derivative should be written to shared memory. Enables calculation of dtheta if set.
- * \tparam[in] writeGlobal          A boolean which tells if the theta values and gridlines should be written to global memory. Enables calculation of dtheta if set.
+ * \tparam[in] writeSmDtheta        Bool controling if the theta derivative should be written to
+ *                                  shared memory. Enables calculation of dtheta if set.
+ * \tparam[in] writeGlobal          A boolean which tells if the theta values and gridlines should
+ *                                  be written to global memory. Enables calculation of dtheta if
+ *                                  set.
+ * \tparam[in] numGrids             The number of grids using the splines.
   * \param[in]  kernelParams         Input PME CUDA data in constant memory.
   * \param[in]  atomIndexOffset      Starting atom index for the execution block w.r.t. global memory.
   * \param[in]  atomX                Atom coordinate of atom processed by thread.
@@ -184,7 +188,7 @@ __device__ __forceinline__ void pme_gpu_stage_atom_data(T* __restrict__ sm_desti
   * \param[out] sm_gridlineIndices   Atom gridline indices in the shared memory.
   */
  
-template<int order, int atomsPerBlock, int atomsPerWarp, bool writeSmDtheta, bool writeGlobal>
+template<int order, int atomsPerBlock, int atomsPerWarp, bool writeSmDtheta, bool writeGlobal, int numGrids>
  __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams kernelParams,
                                                    const int                    atomIndexOffset,
                                                    const float3                 atomX,
@@ -193,6 +197,9 @@ __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams k
                                                    float* __restrict__ sm_dtheta,
                                                    int* __restrict__ sm_gridlineIndices)
  {
+    assert(numGrids == 1 || numGrids == 2);
+    assert(numGrids == 1 || c_skipNeutralAtoms == false);
+
      /* Global memory pointers for output */
      float* __restrict__ gm_theta         = kernelParams.atoms.d_theta;
      float* __restrict__ gm_dtheta        = kernelParams.atoms.d_dtheta;
@@ -293,7 +300,8 @@ __device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams k
          /* B-spline calculation */
  
          const int chargeCheck = pme_gpu_check_atom_charge(atomCharge);
-        if (chargeCheck)
+        /* With FEP (numGrids == 2), we might have 0 charge in state A, but !=0 in state B, so we always calculate splines */
+        if (numGrids == 2 || chargeCheck)
          {
              float div;
              int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
diff --git a/src/gromacs/ewald/pme_spread.clh b/src/gromacs/ewald/pme_spread.clh

index 9e887a9e6efb7fe14eab0007065baf83b7f46ac4..7f6f4beccdf4b224100d49c9953ec88721ca0d08 100644 (file)
--- a/src/gromacs/ewald/pme_spread.clh
+++ b/src/gromacs/ewald/pme_spread.clh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -132,6 +132,8 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
      /* Thread index w.r.t. block */
      assert((get_local_id(2) * get_local_size(1) + get_local_id(1)) * get_local_size(0) + get_local_id(0)
             < MAX_INT);
+    assert(numGrids == 1 || numGrids == 2);
+    assert(numGrids == 1 || c_skipNeutralAtoms == false);
      const int threadLocalIndex =
              (int)((get_local_id(2) * get_local_size(1) + get_local_id(1)) * get_local_size(0)
                    + get_local_id(0));
@@ -238,7 +240,8 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
          /* B-spline calculation */
  
          const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]);
-        if (chargeCheck)
+        /* With FEP (numGrids == 2), we might have 0 charge in state A, but !=0 in state B, so we always calculate splines */
+        if (numGrids == 2 || chargeCheck)
          {
              int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
  
diff --git a/src/gromacs/ewald/pme_spread.cu b/src/gromacs/ewald/pme_spread.cu

index 3cf3a1f7ca77599297e51adb4a0d57b043f7117c..38f41ec5b26e7504d2f1c6586a0d26ff37a077f2 100644 (file)
--- a/src/gromacs/ewald/pme_spread.cu
+++ b/src/gromacs/ewald/pme_spread.cu
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013-2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013-2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -252,7 +252,7 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU
          {
              atomX = gm_coordinates[atomIndexGlobal];
          }
-        calculate_splines<order, atomsPerBlock, atomsPerWarp, false, writeGlobal>(
+        calculate_splines<order, atomsPerBlock, atomsPerWarp, false, writeGlobal, numGrids>(
                  kernelParams, atomIndexOffset, atomX, atomCharge, sm_theta, &dtheta, sm_gridlineIndices);
          __syncwarp();
      }
author	Magnus Lundborg <magnus.lundborg@scilifelab.se>
	Mon, 13 Sep 2021 18:08:15 +0000 (18:08 +0000)
committer	Magnus Lundborg <magnus.lundborg@scilifelab.se>
	Mon, 13 Sep 2021 18:08:15 +0000 (18:08 +0000)
src/gromacs/ewald/pme_gather.cu		patch \| blob \| history
src/gromacs/ewald/pme_gpu_calculate_splines.cuh		patch \| blob \| history
src/gromacs/ewald/pme_spread.clh		patch \| blob \| history
src/gromacs/ewald/pme_spread.cu		patch \| blob \| history