Use RVec instead of float for x, v and f device buffers
[alexxy/gromacs.git] / src / gromacs / ewald / pme_gather.cu
index 8b2ff5f80e7e0a3e1623bc6c10501eb50cfe48a5..616516df2335214cb16b118cb84d796d1e7a5b33 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -44,6 +44,7 @@
 #include <cassert>
 
 #include "gromacs/gpu_utils/cuda_kernel_utils.cuh"
+#include "gromacs/gpu_utils/cudautils.cuh"
 
 #include "pme.cuh"
 #include "pme_calculate_splines.cuh"
@@ -321,32 +322,27 @@ __launch_bounds__(c_gatherMaxThreadsPerBlock, c_gatherMinBlocksPerMP) __global__
     }
     else
     {
+        const float3* __restrict__ gm_coordinates = asFloat3(kernelParams.atoms.d_coordinates);
         /* Recaclulate  Splines  */
         if (c_useAtomDataPrefetch)
         {
             // charges
             __shared__ float sm_coefficients[atomsPerBlock];
             // Coordinates
-            __shared__ float sm_coordinates[DIM * atomsPerBlock];
+            __shared__ float3 sm_coordinates[atomsPerBlock];
             /* Staging coefficients/charges */
-            pme_gpu_stage_atom_data<float, atomsPerBlock, 1>(kernelParams, sm_coefficients,
-                                                             kernelParams.atoms.d_coefficients);
+            pme_gpu_stage_atom_data<float, atomsPerBlock, 1>(kernelParams, sm_coefficients, gm_coefficients);
 
             /* Staging coordinates */
-            pme_gpu_stage_atom_data<float, atomsPerBlock, DIM>(kernelParams, sm_coordinates,
-                                                               kernelParams.atoms.d_coordinates);
+            pme_gpu_stage_atom_data<float3, atomsPerBlock, 1>(kernelParams, sm_coordinates, gm_coordinates);
             __syncthreads();
-            atomX.x    = sm_coordinates[atomIndexLocal * DIM + XX];
-            atomX.y    = sm_coordinates[atomIndexLocal * DIM + YY];
-            atomX.z    = sm_coordinates[atomIndexLocal * DIM + ZZ];
+            atomX      = sm_coordinates[atomIndexLocal];
             atomCharge = sm_coefficients[atomIndexLocal];
         }
         else
         {
+            atomX      = gm_coordinates[atomIndexGlobal];
             atomCharge = gm_coefficients[atomIndexGlobal];
-            atomX.x    = kernelParams.atoms.d_coordinates[atomIndexGlobal * DIM + XX];
-            atomX.y    = kernelParams.atoms.d_coordinates[atomIndexGlobal * DIM + YY];
-            atomX.z    = kernelParams.atoms.d_coordinates[atomIndexGlobal * DIM + ZZ];
         }
         calculate_splines<order, atomsPerBlock, atomsPerWarp, true, false>(
                 kernelParams, atomIndexOffset, atomX, atomCharge, sm_theta, sm_dtheta, sm_gridlineIndices);