Unify more functions in CUDA and OpenCL implementations of NBNXM

author Artem Zhmurov <zhmurov@gmail.com>

Thu, 10 Sep 2020 05:39:44 +0000 (05:39 +0000)

committer Joe Jordan <ejjordan12@gmail.com>

Thu, 10 Sep 2020 05:39:44 +0000 (05:39 +0000)
author Artem Zhmurov <zhmurov@gmail.com>
Thu, 10 Sep 2020 05:39:44 +0000 (05:39 +0000)
committer Joe Jordan <ejjordan12@gmail.com>
Thu, 10 Sep 2020 05:39:44 +0000 (05:39 +0000)
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index ea1261ee2f2fc45d9750c668979f91e77a96cbd9..f754cc4795c1890b9c719759d66f4bfadeefa41e 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -226,25 +226,6 @@ static void init_nbparam(NBParamGpu*                     nbp,
      }
  }
  
-/*! Re-generate the GPU Ewald force table, resets rlist, and update the
- *  electrostatic type switching to twin cut-off (or back) if needed. */
-void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic)
-{
-    if (!nbv || !nbv->useGpu())
-    {
-        return;
-    }
-    NbnxmGpu*   nb  = nbv->gpu_nbv;
-    NBParamGpu* nbp = nbv->gpu_nbv->nbparam;
-
-    set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
-
-    nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
-
-    GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
-    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
-}
-
  /*! Initializes simulation constant data. */
  static void cuda_init_const(NbnxmGpu*                       nb,
                              const interaction_const_t*      ic,
@@ -351,64 +332,6 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
      return nb;
  }
  
-void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
-{
-    char                sbuf[STRLEN];
-    bool                bDoTime      = (nb->bDoTime && !h_plist->sci.empty());
-    const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
-    gpu_plist*          d_plist      = nb->plist[iloc];
-
-    if (d_plist->na_c < 0)
-    {
-        d_plist->na_c = h_plist->na_ci;
-    }
-    else
-    {
-        if (d_plist->na_c != h_plist->na_ci)
-        {
-            sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)",
-                    d_plist->na_c, h_plist->na_ci);
-            gmx_incons(sbuf);
-        }
-    }
-
-    gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc];
-
-    if (bDoTime)
-    {
-        iTimers.pl_h2d.openTimingRegion(deviceStream);
-        iTimers.didPairlistH2D = true;
-    }
-
-    const DeviceContext& deviceContext = *nb->deviceContext_;
-
-    reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
-                           deviceContext);
-    copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
-                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
-
-    reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
-                           deviceContext);
-    copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
-                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
-
-    reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
-                           &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
-
-    reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
-                           &d_plist->excl_nalloc, deviceContext);
-    copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
-                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
-
-    if (bDoTime)
-    {
-        iTimers.pl_h2d.closeTimingRegion(deviceStream);
-    }
-
-    /* the next use of thist list we be the first one, so we need to prune */
-    d_plist->haveFreshList = true;
-}
-
  void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
      cu_atomdata_t*      adat        = nb->atdat;
@@ -621,31 +544,12 @@ void gpu_free(NbnxmGpu* nb)
      }
  }
  
-//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
-{
-    return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
-}
-
-void gpu_reset_timings(nonbonded_verlet_t* nbv)
-{
-    if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime)
-    {
-        init_timings(nbv->gpu_nbv->timings);
-    }
-}
-
  int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
      return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().prop.multiProcessorCount
                           : 0;
  }
  
-gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
-{
-    return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN));
-}
-
  void* gpu_get_xq(NbnxmGpu* nb)
  {
      assert(nb);
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h

index a1ee291ae87fade1b49fc10c424803052cd81329..a472cb437dabaf08cc4b8603cdf692614000ca85 100644 (file)
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -122,7 +122,7 @@ int gpu_min_ci_balanced(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
  
  /** Returns if analytical Ewald GPU kernels are used. */
  GPU_FUNC_QUALIFIER
-gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
+bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
  
  /** Returns an opaque pointer to the GPU command stream
   *  Note: CUDA only.
diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp

index ac8fec7f51a40146a33aadf962ababec707510bd..aaf9bd863e04a50c198bf840f3998fc9a176165e 100644 (file)
--- a/src/gromacs/nbnxm/kerneldispatch.cpp
+++ b/src/gromacs/nbnxm/kerneldispatch.cpp
@@ -61,6 +61,7 @@
  
  #include "kernel_common.h"
  #include "nbnxm_gpu.h"
+#include "nbnxm_gpu_data_mgmt.h"
  #include "nbnxm_simd.h"
  #include "pairlistset.h"
  #include "pairlistsets.h"
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp

index 105ceefb724c4fba642aa45bf0881aa6506a4b46..53ee861fd2a144848d4ef6935eeb547c254c1b36 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
@@ -58,9 +58,12 @@
  
  #include "nbnxm_gpu_data_mgmt.h"
  
+#include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/timing/gpu_timing.h"
+#include "gromacs/utility/cstringutil.h"
  
  #include "nbnxm_gpu.h"
+#include "pairlistsets.h"
  
  namespace Nbnxm
  {
@@ -155,6 +158,23 @@ void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const
      nbp->vdw_switch       = ic->vdw_switch;
  }
  
+void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic)
+{
+    if (!nbv || !nbv->useGpu())
+    {
+        return;
+    }
+    NbnxmGpu*   nb  = nbv->gpu_nbv;
+    NBParamGpu* nbp = nb->nbparam;
+
+    set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
+
+    nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
+
+    GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
+    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
+}
+
  void init_plist(gpu_plist* pl)
  {
      /* initialize to nullptr pointers to data that is not allocated here and will
@@ -200,4 +220,87 @@ void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
      t->dynamicPruneTime.t = 0.0;
  }
  
+//! This function is documented in the header file
+void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+{
+    char sbuf[STRLEN];
+    // Timing accumulation should happen only if there was work to do
+    // because getLastRangeTime() gets skipped with empty lists later
+    // which leads to the counter not being reset.
+    bool                bDoTime      = (nb->bDoTime && !h_plist->sci.empty());
+    const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
+    gpu_plist*          d_plist      = nb->plist[iloc];
+
+    if (d_plist->na_c < 0)
+    {
+        d_plist->na_c = h_plist->na_ci;
+    }
+    else
+    {
+        if (d_plist->na_c != h_plist->na_ci)
+        {
+            sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)",
+                    d_plist->na_c, h_plist->na_ci);
+            gmx_incons(sbuf);
+        }
+    }
+
+    gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc];
+
+    if (bDoTime)
+    {
+        iTimers.pl_h2d.openTimingRegion(deviceStream);
+        iTimers.didPairlistH2D = true;
+    }
+
+    // TODO most of this function is same in CUDA and OpenCL, move into the header
+    const DeviceContext& deviceContext = *nb->deviceContext_;
+
+    reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
+                           deviceContext);
+    copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
+                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
+
+    reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
+                           deviceContext);
+    copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
+                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
+
+    reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
+                           &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
+
+    reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
+                           &d_plist->excl_nalloc, deviceContext);
+    copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
+                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
+
+    if (bDoTime)
+    {
+        iTimers.pl_h2d.closeTimingRegion(deviceStream);
+    }
+
+    /* need to prune the pair list during the next step */
+    d_plist->haveFreshList = true;
+}
+
+//! This function is documented in the header file
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
+{
+    return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
+}
+
+//! This function is documented in the header file
+void gpu_reset_timings(nonbonded_verlet_t* nbv)
+{
+    if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime)
+    {
+        init_timings(nbv->gpu_nbv->timings);
+    }
+}
+
+bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
+{
+    return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN));
+}
+
  } // namespace Nbnxm
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h

index 8c17e7749df2ad680dcc627036b239c6306c6805..761737ddf0c50a2f19a1c335796bb4e0434ac85c 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h
@@ -48,6 +48,11 @@ struct interaction_const_t;
  struct NBParamGpu;
  struct PairlistParams;
  
+namespace gmx
+{
+enum class InteractionLocality;
+}
+
  namespace Nbnxm
  {
  
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index f47d754e2f5da8b2ca7829ff025e6053e4e81ede..19b861db0ce02ce59408146b07e406cdd20ad2b5 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -249,24 +249,6 @@ static void init_nbparam(NBParamGpu*                     nbp,
      }
  }
  
-//! This function is documented in the header file
-void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic)
-{
-    if (!nbv || !nbv->useGpu())
-    {
-        return;
-    }
-    NbnxmGpu*   nb  = nbv->gpu_nbv;
-    NBParamGpu* nbp = nb->nbparam;
-
-    set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
-
-    nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
-
-    GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
-    init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
-}
-
  /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
  static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
  {
@@ -478,69 +460,6 @@ void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
      GMX_ASSERT(cl_error == CL_SUCCESS, ("clFlush failed: " + ocl_get_error_string(cl_error)).c_str());
  }
  
-//! This function is documented in the header file
-void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
-{
-    char sbuf[STRLEN];
-    // Timing accumulation should happen only if there was work to do
-    // because getLastRangeTime() gets skipped with empty lists later
-    // which leads to the counter not being reset.
-    bool                bDoTime      = (nb->bDoTime && !h_plist->sci.empty());
-    const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
-    gpu_plist*          d_plist      = nb->plist[iloc];
-
-    if (d_plist->na_c < 0)
-    {
-        d_plist->na_c = h_plist->na_ci;
-    }
-    else
-    {
-        if (d_plist->na_c != h_plist->na_ci)
-        {
-            sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)",
-                    d_plist->na_c, h_plist->na_ci);
-            gmx_incons(sbuf);
-        }
-    }
-
-    gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc];
-
-    if (bDoTime)
-    {
-        iTimers.pl_h2d.openTimingRegion(deviceStream);
-        iTimers.didPairlistH2D = true;
-    }
-
-    // TODO most of this function is same in CUDA and OpenCL, move into the header
-    const DeviceContext& deviceContext = *nb->deviceContext_;
-
-    reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
-                           deviceContext);
-    copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
-                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
-
-    reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
-                           deviceContext);
-    copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
-                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
-
-    reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
-                           &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
-
-    reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
-                           &d_plist->excl_nalloc, deviceContext);
-    copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
-                       GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
-
-    if (bDoTime)
-    {
-        iTimers.pl_h2d.closeTimingRegion(deviceStream);
-    }
-
-    /* need to prune the pair list during the next step */
-    d_plist->haveFreshList = true;
-}
-
  //! This function is documented in the header file
  void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
@@ -787,31 +706,10 @@ void gpu_free(NbnxmGpu* nb)
      }
  }
  
-//! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
-{
-    return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
-}
-
-//! This function is documented in the header file
-void gpu_reset_timings(nonbonded_verlet_t* nbv)
-{
-    if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime)
-    {
-        init_timings(nbv->gpu_nbv->timings);
-    }
-}
-
  //! This function is documented in the header file
  int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
      return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().compute_units : 0;
  }
  
-//! This function is documented in the header file
-gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
-{
-    return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN));
-}
-
  } // namespace Nbnxm
author	Artem Zhmurov <zhmurov@gmail.com>
	Thu, 10 Sep 2020 05:39:44 +0000 (05:39 +0000)
committer	Joe Jordan <ejjordan12@gmail.com>
	Thu, 10 Sep 2020 05:39:44 +0000 (05:39 +0000)
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/kerneldispatch.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history