Use RVec instead of float for x, v and f device buffers

[alexxy/gromacs.git] / src / gromacs / ewald / pme_gather.cu
diff --git a/src/gromacs/ewald/pme_gather.cu b/src/gromacs/ewald/pme_gather.cu

index 8b2ff5f80e7e0a3e1623bc6c10501eb50cfe48a5..616516df2335214cb16b118cb84d796d1e7a5b33 100644 (file)
--- a/src/gromacs/ewald/pme_gather.cu
+++ b/src/gromacs/ewald/pme_gather.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -44,6 +44,7 @@
  #include <cassert>
  
  #include "gromacs/gpu_utils/cuda_kernel_utils.cuh"
+#include "gromacs/gpu_utils/cudautils.cuh"
  
  #include "pme.cuh"
  #include "pme_calculate_splines.cuh"
@@ -321,32 +322,27 @@ __launch_bounds__(c_gatherMaxThreadsPerBlock, c_gatherMinBlocksPerMP) __global__
      }
      else
      {
+        const float3* __restrict__ gm_coordinates = asFloat3(kernelParams.atoms.d_coordinates);
          /* Recaclulate  Splines  */
          if (c_useAtomDataPrefetch)
          {
              // charges
              __shared__ float sm_coefficients[atomsPerBlock];
              // Coordinates
-            __shared__ float sm_coordinates[DIM * atomsPerBlock];
+            __shared__ float3 sm_coordinates[atomsPerBlock];
              /* Staging coefficients/charges */
-            pme_gpu_stage_atom_data<float, atomsPerBlock, 1>(kernelParams, sm_coefficients,
-                                                             kernelParams.atoms.d_coefficients);
+            pme_gpu_stage_atom_data<float, atomsPerBlock, 1>(kernelParams, sm_coefficients, gm_coefficients);
  
              /* Staging coordinates */
-            pme_gpu_stage_atom_data<float, atomsPerBlock, DIM>(kernelParams, sm_coordinates,
-                                                               kernelParams.atoms.d_coordinates);
+            pme_gpu_stage_atom_data<float3, atomsPerBlock, 1>(kernelParams, sm_coordinates, gm_coordinates);
              __syncthreads();
-            atomX.x    = sm_coordinates[atomIndexLocal * DIM + XX];
-            atomX.y    = sm_coordinates[atomIndexLocal * DIM + YY];
-            atomX.z    = sm_coordinates[atomIndexLocal * DIM + ZZ];
+            atomX      = sm_coordinates[atomIndexLocal];
              atomCharge = sm_coefficients[atomIndexLocal];
          }
          else
          {
+            atomX      = gm_coordinates[atomIndexGlobal];
              atomCharge = gm_coefficients[atomIndexGlobal];
-            atomX.x    = kernelParams.atoms.d_coordinates[atomIndexGlobal * DIM + XX];
-            atomX.y    = kernelParams.atoms.d_coordinates[atomIndexGlobal * DIM + YY];
-            atomX.z    = kernelParams.atoms.d_coordinates[atomIndexGlobal * DIM + ZZ];
          }
          calculate_splines<order, atomsPerBlock, atomsPerWarp, true, false>(
                  kernelParams, atomIndexOffset, atomX, atomCharge, sm_theta, sm_dtheta, sm_gridlineIndices);