Clean some headers in NBNXM GPU
authorArtem Zhmurov <zhmurov@gmail.com>
Sun, 11 Apr 2021 17:30:41 +0000 (20:30 +0300)
committerPaul Bauer <paul.bauer.q@gmail.com>
Mon, 12 Apr 2021 06:54:46 +0000 (06:54 +0000)
1. Remove declarations of non-existing functins.
2. Remove declarations of internal helper functions,
   move some of the helper functions up in the
   respective sources so that their declaration can
   be removed from the headers.
3. Remove unnecessary includes.

Refs #2608

src/gromacs/nbnxm/gpu_common_utils.h
src/gromacs/nbnxm/gpu_data_mgmt.h
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm_gpu.h
src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h

index a3baa742d56e15f0f2e35c390e19863a5893c7ff..0d81e2f05a4dcb786aa91ec5752ea10db65b27f6 100644 (file)
 #ifndef GMX_NBNXM_GPU_COMMON_UTILS_H
 #define GMX_NBNXM_GPU_COMMON_UTILS_H
 
-#include "config.h"
-
 #include "gromacs/listed_forces/gpubonded.h"
+#include "gromacs/mdtypes/locality.h"
+#include "gromacs/nbnxm/gpu_types_common.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/range.h"
-#include "gromacs/nbnxm/nbnxm_gpu.h"
-
-#if GMX_GPU_CUDA
-#    include "cuda/nbnxm_cuda_types.h"
-#endif
-
-#if GMX_GPU_OPENCL
-#    include "opencl/nbnxm_ocl_types.h"
-#endif
 
 namespace Nbnxm
 {
index d9f81f1d8deb0a27080b61d5518a844fc63a35a3..3f04782c4f52a95158763a9945bfac2800dc958b 100644 (file)
@@ -127,25 +127,6 @@ int gpu_min_ci_balanced(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
 GPU_FUNC_QUALIFIER
 bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
 
-/** Return the enum value of electrostatics kernel type for given interaction parameters \p ic. */
-GPU_FUNC_QUALIFIER
-enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t gmx_unused& ic,
-                                                   const DeviceInformation gmx_unused& deviceInfo)
-        GPU_FUNC_TERM_WITH_RETURN(ElecType::Count);
-
-/** Return the enum value of VdW kernel type for given \p ic and \p combRule. */
-GPU_FUNC_QUALIFIER
-enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t gmx_unused& ic,
-                                       LJCombinationRule gmx_unused ljCombinationRule)
-        GPU_FUNC_TERM_WITH_RETURN(VdwType::Count);
-
-/** Returns an opaque pointer to the GPU command stream
- *  Note: CUDA only.
- */
-CUDA_FUNC_QUALIFIER
-const DeviceStream* gpu_get_command_stream(NbnxmGpu gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
-        CUDA_FUNC_TERM_WITH_RETURN(nullptr);
-
 /** Returns an opaque pointer to the GPU coordinate+charge array
  *  Note: CUDA only.
  */
index 9b026403c9f5aab52ca98062a07c6443822bd077..fa689d422761968173669b0f0493e92801c7fd40 100644 (file)
@@ -63,7 +63,6 @@
 
 #include "kernel_common.h"
 #include "nbnxm_gpu.h"
-#include "nbnxm_gpu_data_mgmt.h"
 #include "nbnxm_simd.h"
 #include "pairlistset.h"
 #include "pairlistsets.h"
index 7ea060af495603074283b717469b9880f5d1b4ea..eb1becbf7d466605e365337e50d72b59e175f027 100644 (file)
@@ -306,12 +306,6 @@ GPU_FUNC_QUALIFIER
 bool haveGpuShortRangeWork(const NbnxmGpu gmx_unused* nb, gmx::InteractionLocality gmx_unused interactionLocality)
         GPU_FUNC_TERM_WITH_RETURN(false);
 
-/*! \brief sync CPU thread on coordinate copy to device
- * \param[in] nb                   The nonbonded data GPU structure
- */
-CUDA_FUNC_QUALIFIER
-void nbnxn_wait_x_on_device(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM;
-
 /*! \brief Get the pointer to the GPU nonbonded force buffer
  *
  * \param[in] nb  The nonbonded data GPU structure
index bd0284963688a2c17524504dd4cc4207adb13514..e1df9d0a418e479f66b0e36d994d4bce46261083 100644 (file)
@@ -84,7 +84,7 @@
 namespace Nbnxm
 {
 
-inline void issueClFlushInStream(const DeviceStream& deviceStream)
+static inline void issueClFlushInStream(const DeviceStream& deviceStream)
 {
 #if GMX_GPU_OPENCL
     /* Based on the v1.2 section 5.13 of the OpenCL spec, a flush is needed
@@ -101,9 +101,9 @@ inline void issueClFlushInStream(const DeviceStream& deviceStream)
 #endif
 }
 
-void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
-                                    NBParamGpu*                  nbp,
-                                    const DeviceContext&         deviceContext)
+static inline void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
+                                                  NBParamGpu*                  nbp,
+                                                  const DeviceContext&         deviceContext)
 {
     if (nbp->coulomb_tab)
     {
@@ -115,8 +115,8 @@ void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
             &nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(), tables.tableF.size(), deviceContext);
 }
 
-enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic,
-                                               const DeviceInformation gmx_unused& deviceInfo)
+static inline ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic,
+                                                        const DeviceInformation gmx_unused& deviceInfo)
 {
     bool bTwinCut = (ic.rcoulomb != ic.rvdw);
 
@@ -173,7 +173,9 @@ enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic,
     }
 }
 
-void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t& ic, const PairlistParams& listParams)
+static inline void set_cutoff_parameters(NBParamGpu*                nbp,
+                                         const interaction_const_t& ic,
+                                         const PairlistParams&      listParams)
 {
     nbp->ewald_beta        = ic.ewaldcoeff_q;
     nbp->sh_ewald          = ic.sh_ewald;
@@ -195,24 +197,7 @@ void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t& ic, const
     nbp->vdw_switch       = ic.vdw_switch;
 }
 
-void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t& ic)
-{
-    if (!nbv || !nbv->useGpu())
-    {
-        return;
-    }
-    NbnxmGpu*   nb  = nbv->gpu_nbv;
-    NBParamGpu* nbp = nb->nbparam;
-
-    set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
-
-    nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(ic, nb->deviceContext_->deviceInfo());
-
-    GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
-    init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, *nb->deviceContext_);
-}
-
-void init_plist(gpu_plist* pl)
+static inline void init_plist(gpu_plist* pl)
 {
     /* initialize to nullptr pointers to data that is not allocated here and will
        need reallocation in nbnxn_gpu_init_pairlist */
@@ -236,7 +221,7 @@ void init_plist(gpu_plist* pl)
     pl->rollingPruningPart     = 0;
 }
 
-void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
+static inline void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
 {
     t->nb_h2d_t = 0.0;
     t->nb_d2h_t = 0.0;
@@ -258,10 +243,10 @@ void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
 }
 
 /*! \brief Initialize \p atomdata first time; it only gets filled at pair-search. */
-static void initAtomdataFirst(NBAtomDataGpu*       atomdata,
-                              int                  numTypes,
-                              const DeviceContext& deviceContext,
-                              const DeviceStream&  localStream)
+static inline void initAtomdataFirst(NBAtomDataGpu*       atomdata,
+                                     int                  numTypes,
+                                     const DeviceContext& deviceContext,
+                                     const DeviceStream&  localStream)
 {
     atomdata->numTypes = numTypes;
     allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext);
@@ -285,12 +270,91 @@ static void initAtomdataFirst(NBAtomDataGpu*       atomdata,
     atomdata->numAtomsAlloc = -1;
 }
 
+static inline VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t& ic,
+                                                LJCombinationRule          ljCombinationRule)
+{
+    if (ic.vdwtype == VanDerWaalsType::Cut)
+    {
+        switch (ic.vdw_modifier)
+        {
+            case InteractionModifiers::None:
+            case InteractionModifiers::PotShift:
+                switch (ljCombinationRule)
+                {
+                    case LJCombinationRule::None: return VdwType::Cut;
+                    case LJCombinationRule::Geometric: return VdwType::CutCombGeom;
+                    case LJCombinationRule::LorentzBerthelot: return VdwType::CutCombLB;
+                    default:
+                        GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
+                                "The requested LJ combination rule %s is not implemented in "
+                                "the GPU accelerated kernels!",
+                                enumValueToString(ljCombinationRule))));
+                }
+            case InteractionModifiers::ForceSwitch: return VdwType::FSwitch;
+            case InteractionModifiers::PotSwitch: return VdwType::PSwitch;
+            default:
+                GMX_THROW(gmx::InconsistentInputError(
+                        gmx::formatString("The requested VdW interaction modifier %s is not "
+                                          "implemented in the GPU accelerated kernels!",
+                                          enumValueToString(ic.vdw_modifier))));
+        }
+    }
+    else if (ic.vdwtype == VanDerWaalsType::Pme)
+    {
+        if (ic.ljpme_comb_rule == LongRangeVdW::Geom)
+        {
+            GMX_RELEASE_ASSERT(
+                    ljCombinationRule == LJCombinationRule::Geometric,
+                    "Combination rules for long- and short-range interactions should match.");
+            return VdwType::EwaldGeom;
+        }
+        else
+        {
+            GMX_RELEASE_ASSERT(
+                    ljCombinationRule == LJCombinationRule::LorentzBerthelot,
+                    "Combination rules for long- and short-range interactions should match.");
+            return VdwType::EwaldLB;
+        }
+    }
+    else
+    {
+        GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
+                "The requested VdW type %s is not implemented in the GPU accelerated kernels!",
+                enumValueToString(ic.vdwtype))));
+    }
+}
+
+static inline ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t& ic,
+                                                            const DeviceInformation&   deviceInfo)
+{
+    if (ic.eeltype == CoulombInteractionType::Cut)
+    {
+        return ElecType::Cut;
+    }
+    else if (EEL_RF(ic.eeltype))
+    {
+        return ElecType::RF;
+    }
+    else if ((EEL_PME(ic.eeltype) || ic.eeltype == CoulombInteractionType::Ewald))
+    {
+        return nbnxn_gpu_pick_ewald_kernel_type(ic, deviceInfo);
+    }
+    else
+    {
+        /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
+        GMX_THROW(gmx::InconsistentInputError(
+                gmx::formatString("The requested electrostatics type %s is not implemented in "
+                                  "the GPU accelerated kernels!",
+                                  enumValueToString(ic.eeltype))));
+    }
+}
+
 /*! \brief Initialize the nonbonded parameter data structure. */
-static void initNbparam(NBParamGpu*                     nbp,
-                        const interaction_const_t&      ic,
-                        const PairlistParams&           listParams,
-                        const nbnxn_atomdata_t::Params& nbatParams,
-                        const DeviceContext&            deviceContext)
+static inline void initNbparam(NBParamGpu*                     nbp,
+                               const interaction_const_t&      ic,
+                               const PairlistParams&           listParams,
+                               const nbnxn_atomdata_t::Params& nbatParams,
+                               const DeviceContext&            deviceContext)
 {
     const int numTypes = nbatParams.numTypes;
 
@@ -426,6 +490,23 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
     return nb;
 }
 
+void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t& ic)
+{
+    if (!nbv || !nbv->useGpu())
+    {
+        return;
+    }
+    NbnxmGpu*   nb  = nbv->gpu_nbv;
+    NBParamGpu* nbp = nb->nbparam;
+
+    set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
+
+    nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(ic, nb->deviceContext_->deviceInfo());
+
+    GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
+    init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, *nb->deviceContext_);
+}
+
 void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
 {
     NBAtomDataGpu*      adat        = nb->atdat;
@@ -665,81 +746,6 @@ bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
             || (nb->nbparam->elecType == ElecType::EwaldAnaTwin));
 }
 
-enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t& ic,
-                                                   const DeviceInformation&   deviceInfo)
-{
-    if (ic.eeltype == CoulombInteractionType::Cut)
-    {
-        return ElecType::Cut;
-    }
-    else if (EEL_RF(ic.eeltype))
-    {
-        return ElecType::RF;
-    }
-    else if ((EEL_PME(ic.eeltype) || ic.eeltype == CoulombInteractionType::Ewald))
-    {
-        return nbnxn_gpu_pick_ewald_kernel_type(ic, deviceInfo);
-    }
-    else
-    {
-        /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
-        GMX_THROW(gmx::InconsistentInputError(
-                gmx::formatString("The requested electrostatics type %s is not implemented in "
-                                  "the GPU accelerated kernels!",
-                                  enumValueToString(ic.eeltype))));
-    }
-}
-
-
-enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t& ic, LJCombinationRule ljCombinationRule)
-{
-    if (ic.vdwtype == VanDerWaalsType::Cut)
-    {
-        switch (ic.vdw_modifier)
-        {
-            case InteractionModifiers::None:
-            case InteractionModifiers::PotShift:
-                switch (ljCombinationRule)
-                {
-                    case LJCombinationRule::None: return VdwType::Cut;
-                    case LJCombinationRule::Geometric: return VdwType::CutCombGeom;
-                    case LJCombinationRule::LorentzBerthelot: return VdwType::CutCombLB;
-                    default:
-                        GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
-                                "The requested LJ combination rule %s is not implemented in "
-                                "the GPU accelerated kernels!",
-                                enumValueToString(ljCombinationRule))));
-                }
-            case InteractionModifiers::ForceSwitch: return VdwType::FSwitch;
-            case InteractionModifiers::PotSwitch: return VdwType::PSwitch;
-            default:
-                GMX_THROW(gmx::InconsistentInputError(
-                        gmx::formatString("The requested VdW interaction modifier %s is not "
-                                          "implemented in the GPU accelerated kernels!",
-                                          enumValueToString(ic.vdw_modifier))));
-        }
-    }
-    else if (ic.vdwtype == VanDerWaalsType::Pme)
-    {
-        if (ic.ljpme_comb_rule == LongRangeVdW::Geom)
-        {
-            assert(ljCombinationRule == LJCombinationRule::Geometric);
-            return VdwType::EwaldGeom;
-        }
-        else
-        {
-            assert(ljCombinationRule == LJCombinationRule::LorentzBerthelot);
-            return VdwType::EwaldLB;
-        }
-    }
-    else
-    {
-        GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
-                "The requested VdW type %s is not implemented in the GPU accelerated kernels!",
-                enumValueToString(ic.vdwtype))));
-    }
-}
-
 void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
 {
     GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
index 41b38027330bf4f49ec6cbb808da0a23c8c6cad3..19652c712a21d09e67119ddbb897d595d6a60a6d 100644 (file)
@@ -59,30 +59,6 @@ namespace Nbnxm
 
 struct gpu_plist;
 
-/*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale and the table GPU array.
- *
- * If called with an already allocated table, it just re-uploads the
- * table.
- */
-void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
-                                    NBParamGpu*                  nbp,
-                                    const DeviceContext&         deviceContext);
-
-/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
-enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic,
-                                               const DeviceInformation&              deviceInfo);
-
-/*! \brief Copies all parameters related to the cut-off from ic to nbp
- */
-void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t& ic, const PairlistParams& listParams);
-
-/*! \brief Initializes the pair list data structure.
- */
-void init_plist(gpu_plist* pl);
-
-/*! \brief Initializes the timings data structure. */
-void init_timings(gmx_wallclock_gpu_nbnxn_t* t);
-
 /*! \brief Initializes the NBNXM GPU data structures. */
 void gpu_init_platform_specific(NbnxmGpu* nb);