Use RVec instead of float for x, v and f device buffers

[alexxy/gromacs.git] / src / gromacs / nbnxm / cuda / nbnxm_cuda.cu
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index 94e99879db0e3c54976e19a24cf030885143ca7e..2defa174d647f2a34f4fad32acba32523df1615e 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -818,7 +818,7 @@ void cuda_set_cacheconfig()
  void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
                             bool                      setFillerCoords,
                             NbnxmGpu*                 nb,
-                           DeviceBuffer<float>       d_x,
+                           DeviceBuffer<gmx::RVec>   d_x,
                             GpuEventSynchronizer*     xReadyOnDevice,
                             const Nbnxm::AtomLocality locality,
                             int                       gridId,
@@ -862,12 +862,13 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
          auto kernelFn = setFillerCoords ? nbnxn_gpu_x_to_nbat_x_kernel<true>
                                          : nbnxn_gpu_x_to_nbat_x_kernel<false>;
          float4*    d_xq          = adat->xq;
+        float3*    d_xFloat3     = asFloat3(d_x);
          const int* d_atomIndices = nb->atomIndices;
          const int* d_cxy_na      = &nb->cxy_na[numColumnsMax * gridId];
          const int* d_cxy_ind     = &nb->cxy_ind[numColumnsMax * gridId];
-        const auto kernelArgs =
-                prepareGpuKernelArguments(kernelFn, config, &numColumns, &d_xq, &d_x, &d_atomIndices,
-                                          &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell);
+        const auto kernelArgs    = prepareGpuKernelArguments(kernelFn, config, &numColumns, &d_xq,
+                                                          &d_xFloat3, &d_atomIndices, &d_cxy_na,
+                                                          &d_cxy_ind, &cellOffset, &numAtomsPerCell);
          launchGpuKernel(kernelFn, config, nullptr, "XbufferOps", kernelArgs);
      }
  
@@ -884,7 +885,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
   *       forces only after Local stream already done so.
   */
  void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality                         atomLocality,
-                               DeviceBuffer<float>                        totalForcesDevice,
+                               DeviceBuffer<gmx::RVec>                    totalForcesDevice,
                                 NbnxmGpu*                                  nb,
                                 void*                                      pmeForcesDevice,
                                 gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
@@ -935,8 +936,8 @@ void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality                         atomLo
      }
  
      const float3* d_fNB    = adat->f;
-    const float3* d_fPme   = (float3*)pmeForcesDevice;
-    float3*       d_fTotal = (float3*)totalForcesDevice;
+    const float3* d_fPme   = static_cast<float3*>(pmeForcesDevice);
+    float3*       d_fTotal = asFloat3(totalForcesDevice);
      const int*    d_cell   = nb->cell;
  
      const auto kernelArgs = prepareGpuKernelArguments(kernelFn, config, &d_fNB, &d_fPme, &d_fTotal,