Unify NB atoms and staging data structures in OpenCL, CUDA and SYCL

author Artem Zhmurov <zhmurov@gmail.com>

Mon, 22 Feb 2021 14:15:16 +0000 (14:15 +0000)

committer Joe Jordan <ejjordan12@gmail.com>

Mon, 22 Feb 2021 14:15:16 +0000 (14:15 +0000)
author Artem Zhmurov <zhmurov@gmail.com>
Mon, 22 Feb 2021 14:15:16 +0000 (14:15 +0000)
committer Joe Jordan <ejjordan12@gmail.com>
Mon, 22 Feb 2021 14:15:16 +0000 (14:15 +0000)
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index 62d50f039d40271de79585e8ac13379af5508ba9..594c4ca2910b44304eaa766c828758fbfec771e0 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -122,7 +122,7 @@ namespace Nbnxm
  constexpr static int c_bufOpsThreadsPerBlock = 128;
  
  /*! Nonbonded kernel function pointer type */
-typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const NBParamGpu, const gpu_plist, bool);
+typedef void (*nbnxn_cu_kfunc_ptr_t)(const NBAtomData, const NBParamGpu, const gpu_plist, bool);
  
  /*********************************/
  
@@ -456,7 +456,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  
      int adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
  
-    cu_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
      cu_timers_t*        t            = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -488,12 +488,12 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
      if (atomLocality == AtomLocality::Local)
      {
          adat_begin = 0;
-        adat_len   = adat->natoms_local;
+        adat_len   = adat->numAtomsLocal;
      }
      else
      {
-        adat_begin = adat->natoms_local;
-        adat_len   = adat->natoms - adat->natoms_local;
+        adat_begin = adat->numAtomsLocal;
+        adat_len   = adat->numAtoms - adat->numAtomsLocal;
      }
  
      /* beginning of timed HtoD section */
@@ -546,7 +546,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
   */
  void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
  {
-    cu_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      cu_timers_t*        t            = nb->timers;
@@ -667,7 +667,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
  
  void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
-    cu_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      cu_timers_t*        t            = nb->timers;
@@ -811,7 +811,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                 "beginning of the copy back function.");
  
      /* extract the data */
-    cu_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      cu_timers_t*        t            = nb->timers;
      bool                bDoTime      = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -872,24 +872,24 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
          /* DtoH fshift when virial is needed */
          if (stepWork.computeVirial)
          {
-            static_assert(sizeof(nb->nbst.fshift[0]) == sizeof(adat->fshift[0]),
+            static_assert(sizeof(nb->nbst.fShift[0]) == sizeof(adat->fShift[0]),
                            "Sizes of host- and device-side shift vectors should be the same.");
              copyFromDeviceBuffer(
-                    nb->nbst.fshift, &adat->fshift, 0, SHIFTS, deviceStream, GpuApiCallBehavior::Async, nullptr);
+                    nb->nbst.fShift, &adat->fShift, 0, SHIFTS, deviceStream, GpuApiCallBehavior::Async, nullptr);
          }
  
          /* DtoH energies */
          if (stepWork.computeEnergy)
          {
-            static_assert(sizeof(nb->nbst.e_lj[0]) == sizeof(adat->e_lj[0]),
+            static_assert(sizeof(nb->nbst.eLJ[0]) == sizeof(adat->eLJ[0]),
                            "Sizes of host- and device-side LJ energy terms should be the same.");
              copyFromDeviceBuffer(
-                    nb->nbst.e_lj, &adat->e_lj, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
-            static_assert(sizeof(nb->nbst.e_el[0]) == sizeof(adat->e_el[0]),
+                    nb->nbst.eLJ, &adat->eLJ, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
+            static_assert(sizeof(nb->nbst.eElec[0]) == sizeof(adat->eElec[0]),
                            "Sizes of host- and device-side electrostatic energy terms should be the "
                            "same.");
              copyFromDeviceBuffer(
-                    nb->nbst.e_el, &adat->e_el, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
+                    nb->nbst.eElec, &adat->eElec, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
          }
      }
  
@@ -929,7 +929,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
-    cu_atomdata_t* adat = nb->atdat;
+    NBAtomData* adat = nb->atdat;
  
      const int                  numColumns      = grid.numColumns();
      const int                  cellOffset      = grid.cellOffset();
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index db8654440e14116102bca32b6e555ed2ea72e48d..2505422927b705a17a3b6b97f7670d548741110e 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -97,15 +97,15 @@ static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
  
  /*! Initializes the atomdata structure first time, it only gets filled at
      pair-search. */
-static void init_atomdata_first(cu_atomdata_t* ad, int ntypes, const DeviceContext& deviceContext)
+static void init_atomdata_first(NBAtomData* ad, int ntypes, const DeviceContext& deviceContext)
  {
-    ad->ntypes = ntypes;
-    allocateDeviceBuffer(&ad->shift_vec, SHIFTS, deviceContext);
-    ad->bShiftVecUploaded = false;
+    ad->numTypes = ntypes;
+    allocateDeviceBuffer(&ad->shiftVec, SHIFTS, deviceContext);
+    ad->shiftVecUploaded = false;
  
-    allocateDeviceBuffer(&ad->fshift, SHIFTS, deviceContext);
-    allocateDeviceBuffer(&ad->e_lj, 1, deviceContext);
-    allocateDeviceBuffer(&ad->e_el, 1, deviceContext);
+    allocateDeviceBuffer(&ad->fShift, SHIFTS, deviceContext);
+    allocateDeviceBuffer(&ad->eLJ, 1, deviceContext);
+    allocateDeviceBuffer(&ad->eElec, 1, deviceContext);
  
      /* initialize to nullptr poiters to data that is not allocated here and will
         need reallocation in nbnxn_cuda_init_atomdata */
@@ -113,8 +113,8 @@ static void init_atomdata_first(cu_atomdata_t* ad, int ntypes, const DeviceConte
      ad->f  = nullptr;
  
      /* size -1 indicates that the respective array hasn't been initialized yet */
-    ad->natoms = -1;
-    ad->nalloc = -1;
+    ad->numAtoms      = -1;
+    ad->numAtomsAlloc = -1;
  }
  
  /*! Initializes the nonbonded parameter data structure. */
@@ -198,9 +198,9 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
      snew(nb->timings, 1);
  
      /* init nbst */
-    pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
-    pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
-    pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
+    pmalloc((void**)&nb->nbst.eLJ, sizeof(*nb->nbst.eLJ));
+    pmalloc((void**)&nb->nbst.eElec, sizeof(*nb->nbst.eElec));
+    pmalloc((void**)&nb->nbst.fShift, SHIFTS * sizeof(*nb->nbst.fShift));
  
      init_plist(nb->plist[InteractionLocality::Local]);
  
@@ -259,29 +259,29 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
  
  void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
-    cu_atomdata_t*      adat        = nb->atdat;
+    NBAtomData*         adat        = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
  
      /* only if we have a dynamic box */
-    if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
+    if (nbatom->bDynamicBox || !adat->shiftVecUploaded)
      {
-        static_assert(sizeof(adat->shift_vec[0]) == sizeof(nbatom->shift_vec[0]),
+        static_assert(sizeof(adat->shiftVec[0]) == sizeof(nbatom->shift_vec[0]),
                        "Sizes of host- and device-side shift vectors should be the same.");
-        copyToDeviceBuffer(&adat->shift_vec,
+        copyToDeviceBuffer(&adat->shiftVec,
                             reinterpret_cast<const Float3*>(nbatom->shift_vec.data()),
                             0,
                             SHIFTS,
                             localStream,
                             GpuApiCallBehavior::Async,
                             nullptr);
-        adat->bShiftVecUploaded = true;
+        adat->shiftVecUploaded = true;
      }
  }
  
  /*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
  static void nbnxn_cuda_clear_f(NbnxmGpu* nb, int natoms_clear)
  {
-    cu_atomdata_t*      adat        = nb->atdat;
+    NBAtomData*         adat        = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
      clearDeviceBufferAsync(&adat->f, 0, natoms_clear, localStream);
  }
@@ -289,17 +289,17 @@ static void nbnxn_cuda_clear_f(NbnxmGpu* nb, int natoms_clear)
  /*! Clears nonbonded shift force output array and energy outputs on the GPU. */
  static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb)
  {
-    cu_atomdata_t*      adat        = nb->atdat;
+    NBAtomData*         adat        = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
  
-    clearDeviceBufferAsync(&adat->fshift, 0, SHIFTS, localStream);
-    clearDeviceBufferAsync(&adat->e_lj, 0, 1, localStream);
-    clearDeviceBufferAsync(&adat->e_el, 0, 1, localStream);
+    clearDeviceBufferAsync(&adat->fShift, 0, SHIFTS, localStream);
+    clearDeviceBufferAsync(&adat->eLJ, 0, 1, localStream);
+    clearDeviceBufferAsync(&adat->eElec, 0, 1, localStream);
  }
  
  void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  {
-    nbnxn_cuda_clear_f(nb, nb->atdat->natoms);
+    nbnxn_cuda_clear_f(nb, nb->atdat->numAtoms);
      /* clear shift force array and energies if the outputs were
         used in the current step */
      if (computeVirial)
@@ -314,7 +314,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      bool                 realloced;
      bool                 bDoTime       = nb->bDoTime;
      cu_timers_t*         timers        = nb->timers;
-    cu_atomdata_t*       d_atdat       = nb->atdat;
+    NBAtomData*          d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
  
@@ -329,36 +329,36 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  
      /* need to reallocate if we have to copy more atoms than the amount of space
         available and only allocate if we haven't initialized yet, i.e d_atdat->natoms == -1 */
-    if (natoms > d_atdat->nalloc)
+    if (natoms > d_atdat->numAtomsAlloc)
      {
          nalloc = over_alloc_small(natoms);
  
          /* free up first if the arrays have already been initialized */
-        if (d_atdat->nalloc != -1)
+        if (d_atdat->numAtomsAlloc != -1)
          {
              freeDeviceBuffer(&d_atdat->f);
              freeDeviceBuffer(&d_atdat->xq);
-            freeDeviceBuffer(&d_atdat->atom_types);
-            freeDeviceBuffer(&d_atdat->lj_comb);
+            freeDeviceBuffer(&d_atdat->atomTypes);
+            freeDeviceBuffer(&d_atdat->ljComb);
          }
  
          allocateDeviceBuffer(&d_atdat->f, nalloc, deviceContext);
          allocateDeviceBuffer(&d_atdat->xq, nalloc, deviceContext);
          if (useLjCombRule(nb->nbparam->vdwType))
          {
-            allocateDeviceBuffer(&d_atdat->lj_comb, nalloc, deviceContext);
+            allocateDeviceBuffer(&d_atdat->ljComb, nalloc, deviceContext);
          }
          else
          {
-            allocateDeviceBuffer(&d_atdat->atom_types, nalloc, deviceContext);
+            allocateDeviceBuffer(&d_atdat->atomTypes, nalloc, deviceContext);
          }
  
-        d_atdat->nalloc = nalloc;
-        realloced       = true;
+        d_atdat->numAtomsAlloc = nalloc;
+        realloced              = true;
      }
  
-    d_atdat->natoms       = natoms;
-    d_atdat->natoms_local = nbat->natoms_local;
+    d_atdat->numAtoms      = natoms;
+    d_atdat->numAtomsLocal = nbat->natoms_local;
  
      /* need to clear GPU f output if realloc happened */
      if (realloced)
@@ -368,9 +368,9 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  
      if (useLjCombRule(nb->nbparam->vdwType))
      {
-        static_assert(sizeof(d_atdat->lj_comb[0]) == sizeof(Float2),
+        static_assert(sizeof(d_atdat->ljComb[0]) == sizeof(Float2),
                        "Size of the LJ parameters element should be equal to the size of float2.");
-        copyToDeviceBuffer(&d_atdat->lj_comb,
+        copyToDeviceBuffer(&d_atdat->ljComb,
                             reinterpret_cast<const Float2*>(nbat->params().lj_comb.data()),
                             0,
                             natoms,
@@ -380,9 +380,9 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      }
      else
      {
-        static_assert(sizeof(d_atdat->atom_types[0]) == sizeof(nbat->params().type[0]),
+        static_assert(sizeof(d_atdat->atomTypes[0]) == sizeof(nbat->params().type[0]),
                        "Sizes of host- and device-side atom types should be the same.");
-        copyToDeviceBuffer(&d_atdat->atom_types,
+        copyToDeviceBuffer(&d_atdat->atomTypes,
                             nbat->params().type.data(),
                             0,
                             natoms,
@@ -399,16 +399,13 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  
  void gpu_free(NbnxmGpu* nb)
  {
-    cu_atomdata_t* atdat;
-    NBParamGpu*    nbparam;
-
      if (nb == nullptr)
      {
          return;
      }
  
-    atdat   = nb->atdat;
-    nbparam = nb->nbparam;
+    NBAtomData* atdat   = nb->atdat;
+    NBParamGpu* nbparam = nb->nbparam;
  
      if ((!nbparam->coulomb_tab)
          && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin))
@@ -428,16 +425,16 @@ void gpu_free(NbnxmGpu* nb)
          destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj);
      }
  
-    freeDeviceBuffer(&atdat->shift_vec);
-    freeDeviceBuffer(&atdat->fshift);
+    freeDeviceBuffer(&atdat->shiftVec);
+    freeDeviceBuffer(&atdat->fShift);
  
-    freeDeviceBuffer(&atdat->e_lj);
-    freeDeviceBuffer(&atdat->e_el);
+    freeDeviceBuffer(&atdat->eLJ);
+    freeDeviceBuffer(&atdat->eElec);
  
      freeDeviceBuffer(&atdat->f);
      freeDeviceBuffer(&atdat->xq);
-    freeDeviceBuffer(&atdat->atom_types);
-    freeDeviceBuffer(&atdat->lj_comb);
+    freeDeviceBuffer(&atdat->atomTypes);
+    freeDeviceBuffer(&atdat->ljComb);
  
      /* Free plist */
      auto* plist = nb->plist[InteractionLocality::Local];
@@ -457,14 +454,14 @@ void gpu_free(NbnxmGpu* nb)
      }
  
      /* Free nbst */
-    pfree(nb->nbst.e_lj);
-    nb->nbst.e_lj = nullptr;
+    pfree(nb->nbst.eLJ);
+    nb->nbst.eLJ = nullptr;
  
-    pfree(nb->nbst.e_el);
-    nb->nbst.e_el = nullptr;
+    pfree(nb->nbst.eElec);
+    nb->nbst.eElec = nullptr;
  
-    pfree(nb->nbst.fshift);
-    nb->nbst.fshift = nullptr;
+    pfree(nb->nbst.fShift);
+    nb->nbst.fShift = nullptr;
  
      sfree(atdat);
      sfree(nbparam);
@@ -501,7 +498,7 @@ DeviceBuffer<gmx::RVec> gpu_get_fshift(NbnxmGpu* nb)
  {
      assert(nb);
  
-    return reinterpret_cast<DeviceBuffer<gmx::RVec>>(nb->atdat->fshift);
+    return reinterpret_cast<DeviceBuffer<gmx::RVec>>(nb->atdat->fShift);
  }
  
  /* Initialization for X buffer operations on GPU. */
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh

index 688e094715ea6912efb5bb918637ffa153a052dd..344e971c845afeb7e5a25abb65e7af8e3dcf8b32 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh
@@ -159,7 +159,7 @@ __launch_bounds__(THREADS_PER_BLOCK)
          __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
  #    endif /* CALC_ENERGIES */
  #endif     /* PRUNE_NBL */
-                (const cu_atomdata_t atdat, const NBParamGpu nbparam, const Nbnxm::gpu_plist plist, bool bCalcFshift)
+                (const NBAtomData atdat, const NBParamGpu nbparam, const Nbnxm::gpu_plist plist, bool bCalcFshift)
  #ifdef FUNCTION_DECLARATION_ONLY
                          ; /* Only do function declaration, omit the function body. */
  #else
@@ -172,15 +172,15 @@ __launch_bounds__(THREADS_PER_BLOCK)
              nbnxn_cj4_t* pl_cj4      = plist.cj4;
      const nbnxn_excl_t*  excl        = plist.excl;
  #    ifndef LJ_COMB
-    const int*           atom_types  = atdat.atom_types;
-    int                  ntypes      = atdat.ntypes;
+    const int*           atom_types  = atdat.atomTypes;
+    int                  ntypes      = atdat.numTypes;
  #    else
-    const float2* lj_comb = atdat.lj_comb;
+    const float2* lj_comb = atdat.ljComb;
      float2        ljcp_i, ljcp_j;
  #    endif
      const float4*        xq          = atdat.xq;
      float3*              f           = asFloat3(atdat.f);
-    const float3*        shift_vec   = asFloat3(atdat.shift_vec);
+    const float3*        shift_vec   = asFloat3(atdat.shiftVec);
      float                rcoulomb_sq = nbparam.rcoulomb_sq;
  #    ifdef VDW_CUTOFF_CHECK
      float                rvdw_sq     = nbparam.rvdw_sq;
@@ -207,8 +207,8 @@ __launch_bounds__(THREADS_PER_BLOCK)
  #        else
      float reactionFieldShift = nbparam.c_rf;
  #        endif /* EL_EWALD_ANY */
-    float*               e_lj        = atdat.e_lj;
-    float*               e_el        = atdat.e_el;
+    float*               e_lj        = atdat.eLJ;
+    float*               e_el        = atdat.eElec;
  #    endif     /* CALC_ENERGIES */
  
      /* thread/block/warp id-s */
@@ -649,8 +649,8 @@ __launch_bounds__(THREADS_PER_BLOCK)
      /* add up local shift forces into global mem, tidxj indexes x,y,z */
      if (bCalcFshift && (tidxj & 3) < 3)
      {
-        float3* fshift = asFloat3(atdat.fshift);
-        atomicAdd(&(fshift[nb_sci.shift].x) + (tidxj & 3), fshift_buf);
+        float3* fShift = asFloat3(atdat.fShift);
+        atomicAdd(&(fShift[nb_sci.shift].x) + (tidxj & 3), fshift_buf);
      }
  
  #    ifdef CALC_ENERGIES
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu

index fb8ebb2e766fec2574438098b1ef3efe34b52758..2ff980a6b0cf0dd32f13d521a98316a6a765698d 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -39,7 +39,7 @@
  #ifndef FUNCTION_DECLARATION_ONLY
  /* Instantiate external template functions */
  template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
+nbnxn_kernel_prune_cuda<false>(const NBAtomData, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
  template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
+nbnxn_kernel_prune_cuda<true>(const NBAtomData, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
  #endif
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh

index 11c51227f4e8b04598ea9ac7257b2a856c12a23d..c5c55e667f6531969c4f6c770f5342578463585d 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh
@@ -104,7 +104,7 @@
   */
  template<bool haveFreshList>
  __launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
-        void nbnxn_kernel_prune_cuda(const cu_atomdata_t    atdat,
+        void nbnxn_kernel_prune_cuda(const NBAtomData       atdat,
                                       const NBParamGpu       nbparam,
                                       const Nbnxm::gpu_plist plist,
                                       int                    numParts,
@@ -115,9 +115,9 @@ __launch_bounds__(THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) __global__
  // Add extern declarations so each translation unit understands that
  // there will be a definition provided.
  extern template __global__ void
-nbnxn_kernel_prune_cuda<true>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
+nbnxn_kernel_prune_cuda<true>(const NBAtomData, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
  extern template __global__ void
-nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
+nbnxn_kernel_prune_cuda<false>(const NBAtomData, const NBParamGpu, const Nbnxm::gpu_plist, int, int);
  #else
  {
  
@@ -125,7 +125,7 @@ nbnxn_kernel_prune_cuda<false>(const cu_atomdata_t, const NBParamGpu, const Nbnx
      const nbnxn_sci_t* pl_sci    = plist.sci;
      nbnxn_cj4_t*       pl_cj4    = plist.cj4;
      const float4*      xq        = atdat.xq;
-    const float3*      shift_vec = asFloat3(atdat.shift_vec);
+    const float3*      shift_vec = asFloat3(atdat.shiftVec);
  
      float rlistOuter_sq = nbparam.rlistOuter_sq;
      float rlistInner_sq = nbparam.rlistInner_sq;
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index f1b1a6db8178a2a20aeb71ae4b251d815edf08a8..08d96de90f2b507885214c06002c113e9bfe8486 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -64,67 +64,6 @@
  /*! \brief cluster size = number of atoms per cluster. */
  static constexpr int c_clSize = c_nbnxnGpuClusterSize;
  
-/* All structs prefixed with "cu_" hold data used in GPU calculations and
- * are passed to the kernels, except cu_timers_t. */
-/*! \cond */
-typedef struct cu_atomdata cu_atomdata_t;
-/*! \endcond */
-
-
-/** \internal
- * \brief Staging area for temporary data downloaded from the GPU.
- *
- *  The energies/shift forces get downloaded here first, before getting added
- *  to the CPU-side aggregate values.
- */
-struct nb_staging_t
-{
-    //! LJ energy
-    float* e_lj = nullptr;
-    //! electrostatic energy
-    float* e_el = nullptr;
-    //! shift forces
-    Float3* fshift = nullptr;
-};
-
-/** \internal
- * \brief Nonbonded atom data - both inputs and outputs.
- */
-struct cu_atomdata
-{
-    //! number of atoms
-    int natoms;
-    //! number of local atoms
-    int natoms_local;
-    //! allocation size for the atom data (xq, f)
-    int nalloc;
-
-    //! atom coordinates + charges, size natoms
-    DeviceBuffer<Float4> xq;
-    //! force output array, size natoms
-    DeviceBuffer<Float3> f;
-
-    //! LJ energy output, size 1
-    DeviceBuffer<float> e_lj;
-    //! Electrostatics energy input, size 1
-    DeviceBuffer<float> e_el;
-
-    //! shift forces
-    DeviceBuffer<Float3> fshift;
-
-    //! number of atom types
-    int ntypes;
-    //! atom type indices, size natoms
-    DeviceBuffer<int> atom_types;
-    //! sqrt(c6),sqrt(c12) size natoms
-    DeviceBuffer<Float2> lj_comb;
-
-    //! shifts
-    DeviceBuffer<Float3> shift_vec;
-    //! true if the shift vector has been uploaded
-    bool bShiftVecUploaded;
-};
-
  /** \internal
   * \brief Typedef of actual timer type.
   */
@@ -146,7 +85,7 @@ struct NbnxmGpu
      bool bNonLocalStreamDoneMarked = false;
  
      /*! \brief atom data */
-    cu_atomdata_t* atdat = nullptr;
+    NBAtomData* atdat = nullptr;
      /*! \brief array of atom indices */
      int* atomIndices = nullptr;
      /*! \brief size of atom indices */
@@ -170,7 +109,7 @@ struct NbnxmGpu
      /*! \brief pair-list data structures (local and non-local) */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, Nbnxm::gpu_plist*> plist = { { nullptr } };
      /*! \brief staging area where fshift/energies get downloaded */
-    nb_staging_t nbst;
+    NBStagingData nbst;
      /*! \brief local and non-local GPU streams */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, const DeviceStream*> deviceStreams;
  
diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h

index bb1eb874aefc4da4ddf6f105a2efdb261a6d94bb..dccfe1eed03ac6fa2ea38c4038921c90ca9f085b 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common.h
+++ b/src/gromacs/nbnxm/gpu_common.h
@@ -183,12 +183,12 @@ static inline void getGpuAtomRange(const AtomDataT*   atomData,
      if (atomLocality == AtomLocality::Local)
      {
          *atomRangeBegin = 0;
-        *atomRangeLen   = atomData->natoms_local;
+        *atomRangeLen   = atomData->numAtomsLocal;
      }
      else
      {
-        *atomRangeBegin = atomData->natoms_local;
-        *atomRangeLen   = atomData->natoms - atomData->natoms_local;
+        *atomRangeBegin = atomData->numAtomsLocal;
+        *atomRangeLen   = atomData->numAtoms - atomData->numAtomsLocal;
      }
  }
  
@@ -241,7 +241,6 @@ static void countPruneKernelTime(GpuTimers*                 timers,
   * Note that this function should always be called after the transfers into the
   * staging buffers has completed.
   *
- * \tparam     StagingData    Type of staging data
   * \param[in]  nbst           Nonbonded staging data
   * \param[in]  iLocality      Interaction locality specifier
   * \param[in]  reduceEnergies True if energy reduction should be done
@@ -250,7 +249,7 @@ static void countPruneKernelTime(GpuTimers*                 timers,
   * \param[out] e_el           Variable to accumulate electrostatic energy into
   * \param[out] fshift         Pointer to the array of shift forces to accumulate into
   */
-static inline void gpu_reduce_staged_outputs(const nb_staging_t&       nbst,
+static inline void gpu_reduce_staged_outputs(const NBStagingData&      nbst,
                                               const InteractionLocality iLocality,
                                               const bool                reduceEnergies,
                                               const bool                reduceFshift,
@@ -263,15 +262,15 @@ static inline void gpu_reduce_staged_outputs(const nb_staging_t&       nbst,
      {
          if (reduceEnergies)
          {
-            *e_lj += *nbst.e_lj;
-            *e_el += *nbst.e_el;
+            *e_lj += *nbst.eLJ;
+            *e_el += *nbst.eElec;
          }
  
          if (reduceFshift)
          {
              for (int i = 0; i < SHIFTS; i++)
              {
-                rvec_inc(fshift[i], nbst.fshift[i]);
+                rvec_inc(fshift[i], nbst.fShift[i]);
              }
          }
      }
diff --git a/src/gromacs/nbnxm/gpu_types_common.h b/src/gromacs/nbnxm/gpu_types_common.h

index ebd5db9b343981ae46000064eefe8f58b3b58472..85c5853ebd4de1cbae1ef6e1209c90ca9ee56035 100644 (file)
--- a/src/gromacs/nbnxm/gpu_types_common.h
+++ b/src/gromacs/nbnxm/gpu_types_common.h
@@ -73,6 +73,60 @@
  //! Default for the prune kernel's j4 processing concurrency.
  static constexpr int c_pruneKernelJ4Concurrency = GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY;
  
+/*! \internal
+ * \brief Staging area for temporary data downloaded from the GPU.
+ *
+ * Since SYCL buffers already have host-side storage, this is a bit redundant.
+ * But it allows prefetching of the data from GPU, and brings GPU backends closer together.
+ */
+struct NBStagingData
+{
+    //! LJ energy
+    float* eLJ = nullptr;
+    //! electrostatic energy
+    float* eElec = nullptr;
+    //! shift forces
+    Float3* fShift = nullptr;
+};
+
+/** \internal
+ * \brief Nonbonded atom data - both inputs and outputs.
+ */
+struct NBAtomData
+{
+    //! number of atoms
+    int numAtoms;
+    //! number of local atoms
+    int numAtomsLocal;
+    //! allocation size for the atom data (xq, f)
+    int numAtomsAlloc;
+
+    //! atom coordinates + charges, size \ref numAtoms
+    DeviceBuffer<Float4> xq;
+    //! force output array, size \ref numAtoms
+    DeviceBuffer<Float3> f;
+
+    //! LJ energy output, size 1
+    DeviceBuffer<float> eLJ;
+    //! Electrostatics energy input, size 1
+    DeviceBuffer<float> eElec;
+
+    //! shift forces
+    DeviceBuffer<Float3> fShift;
+
+    //! number of atom types
+    int numTypes;
+    //! atom type indices, size \ref numAtoms
+    DeviceBuffer<int> atomTypes;
+    //! sqrt(c6),sqrt(c12) size \ref numAtoms
+    DeviceBuffer<Float2> ljComb;
+
+    //! shifts
+    DeviceBuffer<Float3> shiftVec;
+    //! true if the shift vector has been uploaded
+    bool shiftVecUploaded;
+};
+
  /** \internal
   * \brief Parameters required for the GPU nonbonded calculations.
   */
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index 6cdad010199c3150f96e86d147b192217c491a06..e00874a30b6a78761a3c4b12c83ef9185b52243e 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -530,7 +530,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
      /* local/nonlocal offset and length used for xq and f */
      int adat_begin, adat_len;
  
-    cl_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
      cl_timers_t*        t            = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -562,12 +562,12 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
      if (atomLocality == AtomLocality::Local)
      {
          adat_begin = 0;
-        adat_len   = adat->natoms_local;
+        adat_len   = adat->numAtomsLocal;
      }
      else
      {
-        adat_begin = adat->natoms_local;
-        adat_len   = adat->natoms - adat->natoms_local;
+        adat_begin = adat->numAtomsLocal;
+        adat_len   = adat->numAtoms - adat->numAtomsLocal;
      }
  
      /* beginning of timed HtoD section */
@@ -622,7 +622,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
   */
  void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
  {
-    cl_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      cl_timers_t*        t            = nb->timers;
@@ -717,11 +717,11 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
                                                            &nbparams_params,
                                                            &adat->xq,
                                                            &adat->f,
-                                                          &adat->e_lj,
-                                                          &adat->e_el,
-                                                          &adat->fshift,
-                                                          &adat->lj_comb,
-                                                          &adat->shift_vec,
+                                                          &adat->eLJ,
+                                                          &adat->eElec,
+                                                          &adat->fShift,
+                                                          &adat->ljComb,
+                                                          &adat->shiftVec,
                                                            &nbp->nbfp,
                                                            &nbp->nbfp_comb,
                                                            &nbp->coulomb_tab,
@@ -736,15 +736,15 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
      {
          const auto kernelArgs = prepareGpuKernelArguments(kernel,
                                                            config,
-                                                          &adat->ntypes,
+                                                          &adat->numTypes,
                                                            &nbparams_params,
                                                            &adat->xq,
                                                            &adat->f,
-                                                          &adat->e_lj,
-                                                          &adat->e_el,
-                                                          &adat->fshift,
-                                                          &adat->atom_types,
-                                                          &adat->shift_vec,
+                                                          &adat->eLJ,
+                                                          &adat->eElec,
+                                                          &adat->fShift,
+                                                          &adat->atomTypes,
+                                                          &adat->shiftVec,
                                                            &nbp->nbfp,
                                                            &nbp->nbfp_comb,
                                                            &nbp->coulomb_tab,
@@ -793,7 +793,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
   */
  void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
-    cl_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      cl_timers_t*        t            = nb->timers;
@@ -898,7 +898,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
                                                        config,
                                                        &nbparams_params,
                                                        &adat->xq,
-                                                      &adat->shift_vec,
+                                                      &adat->shiftVec,
                                                        &plist->sci,
                                                        &plist->cj4,
                                                        &plist->imask,
@@ -945,7 +945,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                 "Non-local stream is indicating that the copy back event is enqueued at the "
                 "beginning of the copy back function.");
  
-    cl_atomdata_t*      adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      cl_timers_t*        t            = nb->timers;
      bool                bDoTime      = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
@@ -1013,10 +1013,10 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
          if (stepWork.computeVirial)
          {
              static_assert(
-                    sizeof(*nb->nbst.fshift) == sizeof(Float3),
+                    sizeof(*nb->nbst.fShift) == sizeof(Float3),
                      "Sizes of host- and device-side shift vector elements should be the same.");
-            copyFromDeviceBuffer(nb->nbst.fshift,
-                                 &adat->fshift,
+            copyFromDeviceBuffer(nb->nbst.fShift,
+                                 &adat->fShift,
                                   0,
                                   SHIFTS,
                                   deviceStream,
@@ -1027,20 +1027,20 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
          /* DtoH energies */
          if (stepWork.computeEnergy)
          {
-            static_assert(sizeof(*nb->nbst.e_lj) == sizeof(float),
+            static_assert(sizeof(*nb->nbst.eLJ) == sizeof(float),
                            "Sizes of host- and device-side LJ energy terms should be the same.");
-            copyFromDeviceBuffer(nb->nbst.e_lj,
-                                 &adat->e_lj,
+            copyFromDeviceBuffer(nb->nbst.eLJ,
+                                 &adat->eLJ,
                                   0,
                                   1,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
                                   bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
-            static_assert(sizeof(*nb->nbst.e_el) == sizeof(float),
+            static_assert(sizeof(*nb->nbst.eElec) == sizeof(float),
                            "Sizes of host- and device-side electrostatic energy terms should be the "
                            "same.");
-            copyFromDeviceBuffer(nb->nbst.e_el,
-                                 &adat->e_el,
+            copyFromDeviceBuffer(nb->nbst.eElec,
+                                 &adat->eElec,
                                   0,
                                   1,
                                   deviceStream,
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index da998182cab678c5ae77f15393bca429baf34bec..25cb3158b2b58f26209bd10cafe5794438c8195d 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -103,16 +103,16 @@ static unsigned int gpu_min_ci_balanced_factor = 50;
  /*! \brief Initializes the atomdata structure first time, it only gets filled at
      pair-search.
   */
-static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, const DeviceContext& deviceContext)
+static void init_atomdata_first(NBAtomData* ad, int ntypes, const DeviceContext& deviceContext)
  {
-    ad->ntypes = ntypes;
+    ad->numTypes = ntypes;
  
-    allocateDeviceBuffer(&ad->shift_vec, SHIFTS * DIM, deviceContext);
-    ad->bShiftVecUploaded = CL_FALSE;
+    allocateDeviceBuffer(&ad->shiftVec, SHIFTS * DIM, deviceContext);
+    ad->shiftVecUploaded = false;
  
-    allocateDeviceBuffer(&ad->fshift, SHIFTS * DIM, deviceContext);
-    allocateDeviceBuffer(&ad->e_lj, 1, deviceContext);
-    allocateDeviceBuffer(&ad->e_el, 1, deviceContext);
+    allocateDeviceBuffer(&ad->fShift, SHIFTS * DIM, deviceContext);
+    allocateDeviceBuffer(&ad->eLJ, 1, deviceContext);
+    allocateDeviceBuffer(&ad->eElec, 1, deviceContext);
  
      /* initialize to nullptr pointers to data that is not allocated here and will
         need reallocation in nbnxn_gpu_init_atomdata */
@@ -120,8 +120,8 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, const DeviceConte
      ad->f  = nullptr;
  
      /* size -1 indicates that the respective array hasn't been initialized yet */
-    ad->natoms = -1;
-    ad->nalloc = -1;
+    ad->numAtoms      = -1;
+    ad->numAtomsAlloc = -1;
  }
  
  
@@ -206,7 +206,7 @@ static void nbnxn_ocl_clear_e_fshift(NbnxmGpu* nb)
  {
  
      cl_int           cl_error;
-    cl_atomdata_t*   adat = nb->atdat;
+    NBAtomData*      adat = nb->atdat;
      cl_command_queue ls   = nb->deviceStreams[InteractionLocality::Local]->stream();
  
      size_t local_work_size[3]  = { 1, 1, 1 };
@@ -223,9 +223,9 @@ static void nbnxn_ocl_clear_e_fshift(NbnxmGpu* nb)
      global_work_size[0] = ((shifts + local_work_size[0] - 1) / local_work_size[0]) * local_work_size[0];
  
      arg_no   = 0;
-    cl_error = clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->fshift));
-    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->e_lj));
-    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->e_el));
+    cl_error = clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->fShift));
+    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->eLJ));
+    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->eElec));
      cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_uint), &shifts);
      GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
  
@@ -264,7 +264,7 @@ static void nbnxn_gpu_init_kernels(NbnxmGpu* nb)
   *  Initializes members of the atomdata and nbparam structs and
   *  clears e/fshift output buffers.
   */
-static void nbnxn_ocl_init_const(cl_atomdata_t*                  atomData,
+static void nbnxn_ocl_init_const(NBAtomData*                     atomData,
                                   NBParamGpu*                     nbParams,
                                   const interaction_const_t*      ic,
                                   const PairlistParams&           listParams,
@@ -304,9 +304,9 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
      nb->dev_rundata = new gmx_device_runtime_data_t();
  
      /* init nbst */
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.e_el), sizeof(*nb->nbst.e_el));
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.fshift), SHIFTS * sizeof(*nb->nbst.fshift));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.eElec), sizeof(*nb->nbst.eElec));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift));
  
      init_plist(nb->plist[InteractionLocality::Local]);
  
@@ -372,7 +372,7 @@ static void nbnxn_ocl_clear_f(NbnxmGpu* nb, int natoms_clear)
          return;
      }
  
-    cl_atomdata_t*      atomData    = nb->atdat;
+    NBAtomData*         atomData    = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
  
      clearDeviceBufferAsync(&atomData->f, 0, natoms_clear, localStream);
@@ -381,7 +381,7 @@ static void nbnxn_ocl_clear_f(NbnxmGpu* nb, int natoms_clear)
  //! This function is documented in the header file
  void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  {
-    nbnxn_ocl_clear_f(nb, nb->atdat->natoms);
+    nbnxn_ocl_clear_f(nb, nb->atdat->numAtoms);
      /* clear shift force array and energies if the outputs were
         used in the current step */
      if (computeVirial)
@@ -398,22 +398,22 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  //! This function is documented in the header file
  void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
-    cl_atomdata_t*      adat        = nb->atdat;
+    NBAtomData*         adat        = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
  
      /* only if we have a dynamic box */
-    if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
+    if (nbatom->bDynamicBox || !adat->shiftVecUploaded)
      {
          static_assert(sizeof(Float3) == sizeof(nbatom->shift_vec[0]),
                        "Sizes of host- and device-side shift vectors should be the same.");
-        copyToDeviceBuffer(&adat->shift_vec,
+        copyToDeviceBuffer(&adat->shiftVec,
                             reinterpret_cast<const Float3*>(nbatom->shift_vec.data()),
                             0,
                             SHIFTS,
                             localStream,
                             GpuApiCallBehavior::Async,
                             nullptr);
-        adat->bShiftVecUploaded = CL_TRUE;
+        adat->shiftVecUploaded = true;
      }
  }
  
@@ -425,7 +425,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      bool                 realloced;
      bool                 bDoTime       = nb->bDoTime;
      cl_timers_t*         timers        = nb->timers;
-    cl_atomdata_t*       d_atdat       = nb->atdat;
+    NBAtomData*          d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
  
@@ -440,17 +440,17 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  
      /* need to reallocate if we have to copy more atoms than the amount of space
         available and only allocate if we haven't initialized yet, i.e d_atdat->natoms == -1 */
-    if (natoms > d_atdat->nalloc)
+    if (natoms > d_atdat->numAtomsAlloc)
      {
          nalloc = over_alloc_small(natoms);
  
          /* free up first if the arrays have already been initialized */
-        if (d_atdat->nalloc != -1)
+        if (d_atdat->numAtomsAlloc != -1)
          {
              freeDeviceBuffer(&d_atdat->f);
              freeDeviceBuffer(&d_atdat->xq);
-            freeDeviceBuffer(&d_atdat->lj_comb);
-            freeDeviceBuffer(&d_atdat->atom_types);
+            freeDeviceBuffer(&d_atdat->ljComb);
+            freeDeviceBuffer(&d_atdat->atomTypes);
          }
  
  
@@ -460,19 +460,19 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
          if (useLjCombRule(nb->nbparam->vdwType))
          {
              // Two Lennard-Jones parameters per atom
-            allocateDeviceBuffer(&d_atdat->lj_comb, nalloc, deviceContext);
+            allocateDeviceBuffer(&d_atdat->ljComb, nalloc, deviceContext);
          }
          else
          {
-            allocateDeviceBuffer(&d_atdat->atom_types, nalloc, deviceContext);
+            allocateDeviceBuffer(&d_atdat->atomTypes, nalloc, deviceContext);
          }
  
-        d_atdat->nalloc = nalloc;
-        realloced       = true;
+        d_atdat->numAtomsAlloc = nalloc;
+        realloced              = true;
      }
  
-    d_atdat->natoms       = natoms;
-    d_atdat->natoms_local = nbat->natoms_local;
+    d_atdat->numAtoms      = natoms;
+    d_atdat->numAtomsLocal = nbat->natoms_local;
  
      /* need to clear GPU f output if realloc happened */
      if (realloced)
@@ -484,7 +484,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      {
          static_assert(sizeof(float) == sizeof(*nbat->params().lj_comb.data()),
                        "Size of the LJ parameters element should be equal to the size of float2.");
-        copyToDeviceBuffer(&d_atdat->lj_comb,
+        copyToDeviceBuffer(&d_atdat->ljComb,
                             reinterpret_cast<const Float2*>(nbat->params().lj_comb.data()),
                             0,
                             natoms,
@@ -496,7 +496,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      {
          static_assert(sizeof(int) == sizeof(*nbat->params().type.data()),
                        "Sizes of host- and device-side atom types should be the same.");
-        copyToDeviceBuffer(&d_atdat->atom_types,
+        copyToDeviceBuffer(&d_atdat->atomTypes,
                             nbat->params().type.data(),
                             0,
                             natoms,
@@ -592,12 +592,12 @@ void gpu_free(NbnxmGpu* nb)
      /* Free atdat */
      freeDeviceBuffer(&(nb->atdat->xq));
      freeDeviceBuffer(&(nb->atdat->f));
-    freeDeviceBuffer(&(nb->atdat->e_lj));
-    freeDeviceBuffer(&(nb->atdat->e_el));
-    freeDeviceBuffer(&(nb->atdat->fshift));
-    freeDeviceBuffer(&(nb->atdat->lj_comb));
-    freeDeviceBuffer(&(nb->atdat->atom_types));
-    freeDeviceBuffer(&(nb->atdat->shift_vec));
+    freeDeviceBuffer(&(nb->atdat->eLJ));
+    freeDeviceBuffer(&(nb->atdat->eElec));
+    freeDeviceBuffer(&(nb->atdat->fShift));
+    freeDeviceBuffer(&(nb->atdat->ljComb));
+    freeDeviceBuffer(&(nb->atdat->atomTypes));
+    freeDeviceBuffer(&(nb->atdat->shiftVec));
      sfree(nb->atdat);
  
      /* Free nbparam */
@@ -624,14 +624,14 @@ void gpu_free(NbnxmGpu* nb)
      }
  
      /* Free nbst */
-    pfree(nb->nbst.e_lj);
-    nb->nbst.e_lj = nullptr;
+    pfree(nb->nbst.eLJ);
+    nb->nbst.eLJ = nullptr;
  
-    pfree(nb->nbst.e_el);
-    nb->nbst.e_el = nullptr;
+    pfree(nb->nbst.eElec);
+    nb->nbst.eElec = nullptr;
  
-    pfree(nb->nbst.fshift);
-    nb->nbst.fshift = nullptr;
+    pfree(nb->nbst.fShift);
+    nb->nbst.fShift = nullptr;
  
      freeGpuProgram(nb->dev_rundata->program);
      delete nb->dev_rundata;
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index 751e3529624533bb33f025dd175ff58df4cb61ff..474d90700d95d3d6aba8600a571f3f235a43884a 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -74,62 +74,6 @@ enum ePruneKind
      ePruneNR
  };
  
-/*! \internal
- * \brief Staging area for temporary data downloaded from the GPU.
- *
- *  The energies/shift forces get downloaded here first, before getting added
- *  to the CPU-side aggregate values.
- */
-struct nb_staging_t
-{
-    //! LJ energy
-    float* e_lj = nullptr;
-    //! electrostatic energy
-    float* e_el = nullptr;
-    //! float3 buffer with shift forces
-    Float3* fshift = nullptr;
-};
-
-/*! \internal
- * \brief Nonbonded atom data - both inputs and outputs.
- */
-typedef struct cl_atomdata
-{
-    //! number of atoms
-    int natoms;
-    //! number of local atoms
-    int natoms_local;
-    //! allocation size for the atom data (xq, f)
-    int nalloc;
-
-    //! float4 buffer with atom coordinates + charges, size natoms
-    DeviceBuffer<Float4> xq;
-
-    //! float3 buffer with force output array, size natoms
-    DeviceBuffer<Float3> f;
-
-    //! LJ energy output, size 1
-    DeviceBuffer<float> e_lj;
-    //! Electrostatics energy input, size 1
-    DeviceBuffer<float> e_el;
-
-    //! float3 buffer with shift forces
-    DeviceBuffer<Float3> fshift;
-
-    //! number of atom types
-    int ntypes;
-    //! int buffer with atom type indices, size natoms
-    DeviceBuffer<int> atom_types;
-    //! float2 buffer with sqrt(c6),sqrt(c12), size natoms
-    DeviceBuffer<Float2> lj_comb;
-
-    //! float3 buffer with shifts values
-    DeviceBuffer<Float3> shift_vec;
-
-    //! true if the shift vector has been uploaded
-    bool bShiftVecUploaded;
-} cl_atomdata_t;
-
  /*! \internal
   * \brief Data structure shared between the OpenCL device code and OpenCL host code
   *
@@ -229,13 +173,13 @@ struct NbnxmGpu
      bool bNonLocalStreamDoneMarked = false;
  
      //! atom data
-    cl_atomdata_t* atdat = nullptr;
+    NBAtomData* atdat = nullptr;
      //! parameters required for the non-bonded calc.
      NBParamGpu* nbparam = nullptr;
      //! pair-list data structures (local and non-local)
      gmx::EnumerationArray<Nbnxm::InteractionLocality, Nbnxm::gpu_plist*> plist = { nullptr };
      //! staging area where fshift/energies get downloaded
-    nb_staging_t nbst;
+    NBStagingData nbst;
  
      //! local and non-local GPU queues
      gmx::EnumerationArray<Nbnxm::InteractionLocality, const DeviceStream*> deviceStreams;
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp

index c313deb1106215f7db59f6b1618240231ebf1fbb..60ba8b32f483d33ddfca6700c1194c3771fe8566 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp
@@ -87,7 +87,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                 "beginning of the copy back function.");
  
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
-    sycl_atomdata_t*    adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
  
      /* don't launch non-local copy-back if there was no non-local work to do */
      if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(*nb, iloc))
@@ -138,24 +138,24 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
          /* DtoH fshift when virial is needed */
          if (stepWork.computeVirial)
          {
-            GMX_ASSERT(sizeof(*nb->nbst.fshift) == adat->fShift.elementSize(),
+            GMX_ASSERT(sizeof(*nb->nbst.fShift) == adat->fShift.elementSize(),
                         "Sizes of host- and device-side shift vector elements should be the same.");
              copyFromDeviceBuffer(
-                    nb->nbst.fshift, &adat->fShift, 0, SHIFTS, deviceStream, GpuApiCallBehavior::Async, nullptr);
+                    nb->nbst.fShift, &adat->fShift, 0, SHIFTS, deviceStream, GpuApiCallBehavior::Async, nullptr);
          }
  
          /* DtoH energies */
          if (stepWork.computeEnergy)
          {
-            GMX_ASSERT(sizeof(*nb->nbst.e_lj) == sizeof(float),
+            GMX_ASSERT(sizeof(*nb->nbst.eLJ) == sizeof(float),
                         "Sizes of host- and device-side LJ energy terms should be the same.");
              copyFromDeviceBuffer(
-                    nb->nbst.e_lj, &adat->eLJ, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
-            GMX_ASSERT(sizeof(*nb->nbst.e_el) == sizeof(float),
+                    nb->nbst.eLJ, &adat->eLJ, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
+            GMX_ASSERT(sizeof(*nb->nbst.eElec) == sizeof(float),
                         "Sizes of host- and device-side electrostatic energy terms should be the "
                         "same.");
              copyFromDeviceBuffer(
-                    nb->nbst.e_el, &adat->eElec, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
+                    nb->nbst.eElec, &adat->eElec, 0, 1, deviceStream, GpuApiCallBehavior::Async, nullptr);
          }
      }
  }
@@ -168,7 +168,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  
      const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
  
-    sycl_atomdata_t*    adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp

index e033a507e8f44698bf3dc26dac55f8e5f9a8f255..0f67f04f0a0bf181ef5dc48c085bd53145d5dc7f 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp
@@ -59,10 +59,10 @@ namespace Nbnxm
  //! This function is documented in the header file
  void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  {
-    sycl_atomdata_t*    adat        = nb->atdat;
+    NBAtomData*         adat        = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
      // Clear forces
-    clearDeviceBufferAsync(&adat->f, 0, nb->atdat->natoms, localStream);
+    clearDeviceBufferAsync(&adat->f, 0, nb->atdat->numAtoms, localStream);
      // Clear shift force array and energies if the outputs were used in the current step
      if (computeVirial)
      {
@@ -76,7 +76,7 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  static void initAtomdataFirst(NbnxmGpu* nb, int numTypes, const DeviceContext& deviceContext)
  {
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
-    sycl_atomdata_t*    atomdata    = nb->atdat;
+    NBAtomData*         atomdata    = nb->atdat;
      atomdata->numTypes              = numTypes;
      allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext);
      atomdata->shiftVecUploaded = false;
@@ -95,8 +95,8 @@ static void initAtomdataFirst(NbnxmGpu* nb, int numTypes, const DeviceContext& d
      atomdata->f  = nullptr;
  
      /* size -1 indicates that the respective array hasn't been initialized yet */
-    atomdata->natoms   = -1;
-    atomdata->numAlloc = -1;
+    atomdata->numAtoms      = -1;
+    atomdata->numAtomsAlloc = -1;
  }
  
  /*! \brief Initialize the nonbonded parameter data structure. */
@@ -144,7 +144,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
  {
      auto* nb                              = new NbnxmGpu();
      nb->deviceContext_                    = &deviceStreamManager.context();
-    nb->atdat                             = new sycl_atomdata_t;
+    nb->atdat                             = new NBAtomData;
      nb->nbparam                           = new NBParamGpu;
      nb->plist[InteractionLocality::Local] = new Nbnxm::gpu_plist;
      if (bLocalAndNonlocal)
@@ -158,9 +158,9 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
      nb->timings = nullptr;
  
      /* init nbst */
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.e_el), sizeof(*nb->nbst.e_el));
-    pmalloc(reinterpret_cast<void**>(&nb->nbst.fshift), SHIFTS * sizeof(*nb->nbst.fshift));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.eElec), sizeof(*nb->nbst.eElec));
+    pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift));
  
      init_plist(nb->plist[InteractionLocality::Local]);
  
@@ -195,7 +195,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
  
  void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
-    sycl_atomdata_t*    adat        = nb->atdat;
+    NBAtomData*         adat        = nb->atdat;
      const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
  
      /* only if we have a dynamic box */
@@ -217,18 +217,18 @@ void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  {
      GMX_ASSERT(!nb->bDoTime, "Timing on SYCL not supported yet");
-    sycl_atomdata_t*     atdat         = nb->atdat;
+    NBAtomData*          atdat         = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
  
      int  numAtoms    = nbat->numAtoms();
      bool reallocated = false;
-    if (numAtoms > atdat->numAlloc)
+    if (numAtoms > atdat->numAtomsAlloc)
      {
          int numAlloc = over_alloc_small(numAtoms);
  
          /* free up first if the arrays have already been initialized */
-        if (atdat->numAlloc != -1)
+        if (atdat->numAtomsAlloc != -1)
          {
              freeDeviceBuffer(&atdat->f);
              freeDeviceBuffer(&atdat->xq);
@@ -247,17 +247,17 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
              allocateDeviceBuffer(&atdat->atomTypes, numAlloc, deviceContext);
          }
  
-        atdat->numAlloc = numAlloc;
-        reallocated     = true;
+        atdat->numAtomsAlloc = numAlloc;
+        reallocated          = true;
      }
  
-    atdat->natoms       = numAtoms;
-    atdat->natoms_local = nbat->natoms_local;
+    atdat->numAtoms      = numAtoms;
+    atdat->numAtomsLocal = nbat->natoms_local;
  
      /* need to clear GPU f output if realloc happened */
      if (reallocated)
      {
-        clearDeviceBufferAsync(&atdat->f, 0, atdat->numAlloc, localStream);
+        clearDeviceBufferAsync(&atdat->f, 0, atdat->numAtomsAlloc, localStream);
      }
  
      if (useLjCombRule(nb->nbparam->vdwType))
@@ -293,8 +293,8 @@ void gpu_free(NbnxmGpu* nb)
          return;
      }
  
-    sycl_atomdata_t* atdat   = nb->atdat;
-    NBParamGpu*      nbparam = nb->nbparam;
+    NBAtomData* atdat   = nb->atdat;
+    NBParamGpu* nbparam = nb->nbparam;
  
      if ((!nbparam->coulomb_tab)
          && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin))
@@ -322,14 +322,14 @@ void gpu_free(NbnxmGpu* nb)
      }
  
      /* Free nbst */
-    pfree(nb->nbst.e_lj);
-    nb->nbst.e_lj = nullptr;
+    pfree(nb->nbst.eLJ);
+    nb->nbst.eLJ = nullptr;
  
-    pfree(nb->nbst.e_el);
-    nb->nbst.e_el = nullptr;
+    pfree(nb->nbst.eElec);
+    nb->nbst.eElec = nullptr;
  
-    pfree(nb->nbst.fshift);
-    nb->nbst.fshift = nullptr;
+    pfree(nb->nbst.fShift);
+    nb->nbst.fShift = nullptr;
  
      delete atdat;
      delete nbparam;
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index 946eb2dd4d955b9680908ddaed0f2a8d018841a7..770732c70cc9eda458b02717ce98c64135425642 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
@@ -962,7 +962,7 @@ cl::sycl::event chooseAndLaunchNbnxmKernel(bool          doPruneNBL,
  
  void launchNbnxmKernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
  {
-    sycl_atomdata_t*    adat         = nb->atdat;
+    NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      const bool          doPruneNBL   = (plist->haveFreshList && !nb->didPrune[iloc]);
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp

index cf9ce2f67d45d0fb602b002fdc046718456b4ed9..62f4bb7592c835e07733cd37c4648a194cbc3a6f 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp
@@ -264,7 +264,7 @@ void launchNbnxmKernelPruneOnly(NbnxmGpu*                 nb,
                                  const int                 part,
                                  const int                 numSciInPart)
  {
-    sycl_atomdata_t*    adat          = nb->atdat;
+    NBAtomData*         adat          = nb->atdat;
      NBParamGpu*         nbp           = nb->nbparam;
      gpu_plist*          plist         = nb->plist[iloc];
      const bool          haveFreshList = plist->haveFreshList;
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h b/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h

index ba14d9b86740813616545e01bdbfdc92c626e4c6..c1e23c1a7420f94bf59801e59a12f75f35c26759 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h
@@ -55,60 +55,6 @@
  #include "gromacs/timing/gpu_timing.h"
  #include "gromacs/utility/enumerationhelpers.h"
  
-/*! \internal
- * \brief Staging area for temporary data downloaded from the GPU.
- *
- * Since SYCL buffers already have host-side storage, this is a bit redundant.
- * But it allows prefetching of the data from GPU, and brings GPU backends closer together.
- */
-struct nb_staging_t
-{
-    //! LJ energy
-    float* e_lj = nullptr;
-    //! electrostatic energy
-    float* e_el = nullptr;
-    //! shift forces
-    Float3* fshift = nullptr;
-};
-
-/** \internal
- * \brief Nonbonded atom data - both inputs and outputs.
- */
-struct sycl_atomdata_t
-{
-    //! number of atoms
-    int natoms;
-    //! number of local atoms
-    int natoms_local; //
-    //! allocation size for the atom data (xq, f)
-    int numAlloc;
-
-    //! atom coordinates + charges, size \ref natoms
-    DeviceBuffer<Float4> xq;
-    //! force output array, size \ref natoms
-    DeviceBuffer<Float3> f;
-
-    //! LJ energy output, size 1
-    DeviceBuffer<float> eLJ;
-    //! Electrostatics energy input, size 1
-    DeviceBuffer<float> eElec;
-
-    //! shift forces
-    DeviceBuffer<Float3> fShift;
-
-    //! number of atom types
-    int numTypes;
-    //! atom type indices, size \ref natoms
-    DeviceBuffer<int> atomTypes;
-    //! sqrt(c6),sqrt(c12) size \ref natoms
-    DeviceBuffer<Float2> ljComb;
-
-    //! shifts
-    DeviceBuffer<Float3> shiftVec;
-    //! true if the shift vector has been uploaded
-    bool shiftVecUploaded;
-};
-
  class GpuEventSynchronizer;
  
  /*! \internal
@@ -126,13 +72,13 @@ struct NbnxmGpu
      /*! \brief true indicates that the nonlocal_done event was marked */
      bool bNonLocalStreamDoneMarked = false;
      /*! \brief atom data */
-    sycl_atomdata_t* atdat = nullptr;
+    NBAtomData* atdat = nullptr;
  
      NBParamGpu* nbparam = nullptr;
      /*! \brief pair-list data structures (local and non-local) */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, Nbnxm::gpu_plist*> plist = { { nullptr } };
      /*! \brief staging area where fshift/energies get downloaded. Will be removed in SYCL. */
-    nb_staging_t nbst;
+    NBStagingData nbst;
      /*! \brief local and non-local GPU streams */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, const DeviceStream*> deviceStreams;
author	Artem Zhmurov <zhmurov@gmail.com>
	Mon, 22 Feb 2021 14:15:16 +0000 (14:15 +0000)
committer	Joe Jordan <ejjordan12@gmail.com>
	Mon, 22 Feb 2021 14:15:16 +0000 (14:15 +0000)
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_kernel_pruneonly.cuh		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_common.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_types_common.h		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h		patch \| blob \| history