Add FloatN aliases to CUDA and use them in NBNXM

author Artem Zhmurov <zhmurov@gmail.com>

Sat, 20 Feb 2021 08:12:02 +0000 (11:12 +0300)

committer Paul Bauer <paul.bauer.q@gmail.com>

Mon, 22 Feb 2021 08:19:42 +0000 (08:19 +0000)
author Artem Zhmurov <zhmurov@gmail.com>
Sat, 20 Feb 2021 08:12:02 +0000 (11:12 +0300)
committer Paul Bauer <paul.bauer.q@gmail.com>
Mon, 22 Feb 2021 08:19:42 +0000 (08:19 +0000)
diff --git a/src/gromacs/gpu_utils/gputraits.cuh b/src/gromacs/gpu_utils/gputraits.cuh

index fec113b4b4c1f4bcd1414e1ee8cfd4d4478b687f..656e8a231969a5bc85da806e01ab120b04504799 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits.cuh
+++ b/src/gromacs/gpu_utils/gputraits.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -45,6 +45,7 @@
   * \ingroup module_gpu_utils
   */
  #include <cuda_runtime.h>
+#include "gromacs/math/vectypes.h"
  
  //! Device texture for fast read-only data fetching
  using DeviceTexture = cudaTextureObject_t;
@@ -52,6 +53,15 @@ using DeviceTexture = cudaTextureObject_t;
  //! \brief Single GPU call timing event - meaningless in CUDA
  using CommandEvent = void;
  
+//! Convenience alias for 2-wide float
+using Float2 = float2;
+
+//! Convenience alias for 3-wide float
+using Float3 = gmx::RVec;
+
+//! Convenience alias for 4-wide float.
+using Float4 = float4;
+
  /*! \internal \brief
   * GPU kernels scheduling description. This is same in OpenCL/CUDA.
   * Provides reasonable defaults, one typically only needs to set the GPU stream
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index 6796da5aac6660b63ea165d0397aadcc779d1ca4..62d50f039d40271de79585e8ac13379af5508ba9 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -496,17 +496,17 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
          adat_len   = adat->natoms - adat->natoms_local;
      }
  
-    /* HtoD x, q */
      /* beginning of timed HtoD section */
      if (bDoTime)
      {
          t->xf[atomLocality].nb_h2d.openTimingRegion(deviceStream);
      }
  
-    static_assert(sizeof(adat->xq[0]) == sizeof(float4),
+    /* HtoD x, q */
+    static_assert(sizeof(adat->xq[0]) == sizeof(Float4),
                    "The size of the xyzq buffer element should be equal to the size of float4.");
      copyToDeviceBuffer(&adat->xq,
-                       reinterpret_cast<const float4*>(nbatom->x().data()) + adat_begin,
+                       reinterpret_cast<const Float4*>(nbatom->x().data()) + adat_begin,
                         adat_begin,
                         adat_len,
                         deviceStream,
@@ -845,9 +845,9 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
      if (!stepWork.useGpuFBufferOps)
      {
          static_assert(
-                sizeof(adat->f[0]) == sizeof(float3),
+                sizeof(adat->f[0]) == sizeof(Float3),
                  "The size of the force buffer element should be equal to the size of float3.");
-        copyFromDeviceBuffer(reinterpret_cast<float3*>(nbatom->out[0].f.data()) + adat_begin,
+        copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + adat_begin,
                               &adat->f,
                               adat_begin,
                               adat_len,
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 7d1334144fc4342db4db557226593b4ef4ae56bf..dff7e4d21cf8e68f98c1a6e7852b7b9a67fe4ef0 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -268,7 +268,7 @@ void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
          static_assert(sizeof(adat->shift_vec[0]) == sizeof(nbatom->shift_vec[0]),
                        "Sizes of host- and device-side shift vectors should be the same.");
          copyToDeviceBuffer(&adat->shift_vec,
-                           reinterpret_cast<const float3*>(nbatom->shift_vec.data()),
+                           reinterpret_cast<const Float3*>(nbatom->shift_vec.data()),
                             0,
                             SHIFTS,
                             localStream,
@@ -368,10 +368,10 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  
      if (useLjCombRule(nb->nbparam->vdwType))
      {
-        static_assert(sizeof(d_atdat->lj_comb[0]) == sizeof(float2),
+        static_assert(sizeof(d_atdat->lj_comb[0]) == sizeof(Float2),
                        "Size of the LJ parameters element should be equal to the size of float2.");
          copyToDeviceBuffer(&d_atdat->lj_comb,
-                           reinterpret_cast<const float2*>(nbat->params().lj_comb.data()),
+                           reinterpret_cast<const Float2*>(nbat->params().lj_comb.data()),
                             0,
                             natoms,
                             localStream,
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh

index 0ff57b25b120cff746e1d06d00a2a1177eb729b4..688e094715ea6912efb5bb918637ffa153a052dd 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
@@ -48,6 +48,7 @@
  
  #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
  #include "gromacs/gpu_utils/cuda_kernel_utils.cuh"
+#include "gromacs/gpu_utils/typecasts.cuh"
  #include "gromacs/math/utilities.h"
  #include "gromacs/pbcutil/ishift.h"
  /* Note that floating-point constants in CUDA code should be suffixed
@@ -178,8 +179,8 @@ __launch_bounds__(THREADS_PER_BLOCK)
      float2        ljcp_i, ljcp_j;
  #    endif
      const float4*        xq          = atdat.xq;
-    float3*              f           = atdat.f;
-    const float3*        shift_vec   = atdat.shift_vec;
+    float3*              f           = asFloat3(atdat.f);
+    const float3*        shift_vec   = asFloat3(atdat.shift_vec);
      float                rcoulomb_sq = nbparam.rcoulomb_sq;
  #    ifdef VDW_CUTOFF_CHECK
      float                rvdw_sq     = nbparam.rvdw_sq;
@@ -648,7 +649,8 @@ __launch_bounds__(THREADS_PER_BLOCK)
      /* add up local shift forces into global mem, tidxj indexes x,y,z */
      if (bCalcFshift && (tidxj & 3) < 3)
      {
-        atomicAdd(&(atdat.fshift[nb_sci.shift].x) + (tidxj & 3), fshift_buf);
+        float3* fshift = asFloat3(atdat.fshift);
+        atomicAdd(&(fshift[nb_sci.shift].x) + (tidxj & 3), fshift_buf);
      }
  
  #    ifdef CALC_ENERGIES
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh

index 563e1edc0c74b4faa13492f3d2af70b19c5a5d4d..11c51227f4e8b04598ea9ac7257b2a856c12a23d 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -47,6 +47,7 @@
  #include "gmxpre.h"
  
  #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
+#include "gromacs/gpu_utils/typecasts.cuh"
  #include "gromacs/math/utilities.h"
  #include "gromacs/pbcutil/ishift.h"
  
@@ -124,7 +125,7 @@ nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const Nbnx
      const nbnxn_sci_t* pl_sci    = plist.sci;
      nbnxn_cj4_t*       pl_cj4    = plist.cj4;
      const float4*      xq        = atdat.xq;
-    const float3*      shift_vec = atdat.shift_vec;
+    const float3*      shift_vec = asFloat3(atdat.shift_vec);
  
      float rlistOuter_sq = nbparam.rlistOuter_sq;
      float rlistInner_sq = nbparam.rlistInner_sq;
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index 65a247ad08408a498790eca6d971d8f1fd361df5..f1b1a6db8178a2a20aeb71ae4b251d815edf08a8 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -84,7 +84,7 @@ struct nb_staging_t
      //! electrostatic energy
      float* e_el = nullptr;
      //! shift forces
-    float3* fshift = nullptr;
+    Float3* fshift = nullptr;
  };
  
  /** \internal
@@ -100,9 +100,9 @@ struct cu_atomdata
      int nalloc;
  
      //! atom coordinates + charges, size natoms
-    DeviceBuffer<float4> xq;
+    DeviceBuffer<Float4> xq;
      //! force output array, size natoms
-    DeviceBuffer<float3> f;
+    DeviceBuffer<Float3> f;
  
      //! LJ energy output, size 1
      DeviceBuffer<float> e_lj;
@@ -110,17 +110,17 @@ struct cu_atomdata
      DeviceBuffer<float> e_el;
  
      //! shift forces
-    DeviceBuffer<float3> fshift;
+    DeviceBuffer<Float3> fshift;
  
      //! number of atom types
      int ntypes;
      //! atom type indices, size natoms
      DeviceBuffer<int> atom_types;
      //! sqrt(c6),sqrt(c12) size natoms
-    DeviceBuffer<float2> lj_comb;
+    DeviceBuffer<Float2> lj_comb;
  
      //! shifts
-    DeviceBuffer<float3> shift_vec;
+    DeviceBuffer<Float3> shift_vec;
      //! true if the shift vector has been uploaded
      bool bShiftVecUploaded;
  };
author	Artem Zhmurov <zhmurov@gmail.com>
	Sat, 20 Feb 2021 08:12:02 +0000 (11:12 +0300)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Mon, 22 Feb 2021 08:19:42 +0000 (08:19 +0000)
src/gromacs/gpu_utils/gputraits.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history