Clean up nbnxm enums
authorBerk Hess <hess@kth.se>
Thu, 17 Jan 2019 13:25:50 +0000 (14:25 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 21 Feb 2019 09:13:40 +0000 (10:13 +0100)
Converted the nbnxm kernel and ewald excl enums to enum classes.
Added resource, kernel setup and pairlist type enums.
Also made pairlistSets_ private.

Change-Id: Ic8ff8c59cf8a72d3462b8a0f7382cd1636136c40

20 files changed:
src/gromacs/mdlib/calc_verletbuf.cpp
src/gromacs/mdlib/calc_verletbuf.h
src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/atomdata.cpp
src/gromacs/nbnxm/atomdata.h
src/gromacs/nbnxm/grid.cpp
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_geometry.cpp
src/gromacs/nbnxm/nbnxm_geometry.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/pairlist.cpp
src/gromacs/nbnxm/pairlist.h
src/gromacs/nbnxm/pairlist_tuning.cpp
src/gromacs/nbnxm/pairlist_tuning.h
src/gromacs/nbnxm/pairlistset.cpp
src/gromacs/nbnxm/pairlistset.h
src/gromacs/nbnxm/prunekerneldispatch.cpp
src/gromacs/taskassignment/decidegpuusage.cpp
src/gromacs/taskassignment/decidegpuusage.h

index 7bee75dae4b7a09644f87d79ac1194635e6fafff..b494e1665214a86d5754d005a4a436797a0e284a 100644 (file)
@@ -107,18 +107,17 @@ struct pot_derivatives_t
     real  md3; // -V''' at the cutoff
 };
 
-VerletbufListSetup verletbufGetListSetup(int nbnxnKernelType)
+VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType)
 {
     /* Note that the current buffer estimation code only handles clusters
      * of size 1, 2 or 4, so for 4x8 or 8x8 we use the estimate for 4x4.
      */
     VerletbufListSetup listSetup;
 
-    listSetup.cluster_size_i = nbnxn_kernel_to_cluster_i_size(nbnxnKernelType);
-    listSetup.cluster_size_j = nbnxn_kernel_to_cluster_j_size(nbnxnKernelType);
+    listSetup.cluster_size_i = Nbnxm::IClusterSizePerKernelType[nbnxnKernelType];
+    listSetup.cluster_size_j = Nbnxm::JClusterSizePerKernelType[nbnxnKernelType];
 
-    if (nbnxnKernelType == nbnxnk8x8x8_GPU ||
-        nbnxnKernelType == nbnxnk8x8x8_PlainC)
+    if (!Nbnxm::kernelTypeUsesSimplePairlist(nbnxnKernelType))
     {
         /* The GPU kernels (except for OpenCL) split the j-clusters in two halves */
         listSetup.cluster_size_j /= 2;
@@ -134,24 +133,24 @@ VerletbufListSetup verletbufGetSafeListSetup(ListSetupType listType)
      * i- and j-cluster sizes, so we potentially overestimate, but never
      * underestimate, the buffer drift.
      */
-    int nbnxnKernelType;
+    Nbnxm::KernelType nbnxnKernelType;
 
     if (listType == ListSetupType::Gpu)
     {
-        nbnxnKernelType = nbnxnk8x8x8_GPU;
+        nbnxnKernelType = Nbnxm::KernelType::Gpu8x8x8;
     }
     else if (GMX_SIMD && listType == ListSetupType::CpuSimdWhenSupported)
     {
 #ifdef GMX_NBNXN_SIMD_2XNN
         /* We use the smallest cluster size to be on the safe side */
-        nbnxnKernelType = nbnxnk4xN_SIMD_2xNN;
+        nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_2xNN;
 #else
-        nbnxnKernelType = nbnxnk4xN_SIMD_4xN;
+        nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_4xN;
 #endif
     }
     else
     {
-        nbnxnKernelType = nbnxnk4x4_PlainC;
+        nbnxnKernelType = Nbnxm::KernelType::Cpu4x4_PlainC;
     }
 
     return verletbufGetListSetup(nbnxnKernelType);
index a444b2e097d7c3e5195b372c99b45ee42f218953..1a8cdc34eeecc8b04e63bfe5b1042077bfd617c0 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -48,6 +48,12 @@ namespace gmx
 class RangePartitioning;
 } // namespace gmx
 
+namespace Nbnxm
+{
+enum class KernelType;
+} // namespace Nbnxm
+
+
 struct VerletbufListSetup
 {
     int  cluster_size_i;  /* Cluster pair-list i-cluster size atom count */
@@ -68,7 +74,7 @@ static const real verlet_buffer_ratio_NVE_T0     = 0.10;
 
 /* Returns the pair-list setup for the given nbnxn kernel type.
  */
-VerletbufListSetup verletbufGetListSetup(int nbnxnKernelType);
+VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType);
 
 /* Enum for choosing the list type for verletbufGetSafeListSetup() */
 enum class ListSetupType
index f68d7c841a891da63fbd9046da8587e95d050d7d..b89f57fd5ba1228926147b65673d158e5d9f385d 100644 (file)
@@ -423,7 +423,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
              * the current coordinates of the atoms.
              */
             wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
-            NbnxnDispatchPruneKernel(nbv, ilocality, fr->shift_vec);
+            nbv->dispatchPruneKernel(ilocality, fr->shift_vec);
             wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
         }
 
@@ -438,23 +438,23 @@ static void do_nb_verlet(t_forcerec                       *fr,
     }
 }
 
-static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
-                             t_forcerec           *fr,
-                             rvec                  x[],
-                             rvec                  f[],
-                             const t_mdatoms      *mdatoms,
-                             t_lambda             *fepvals,
-                             real                 *lambda,
-                             gmx_enerdata_t       *enerd,
-                             int                   flags,
-                             t_nrnb               *nrnb,
-                             gmx_wallcycle_t       wcycle)
+static void do_nb_verlet_fep(const nonbonded_verlet_t         &nbv,
+                             const Nbnxm::InteractionLocality  iLocality,
+                             t_forcerec                       *fr,
+                             rvec                              x[],
+                             rvec                              f[],
+                             const t_mdatoms                  *mdatoms,
+                             t_lambda                         *fepvals,
+                             real                             *lambda,
+                             gmx_enerdata_t                   *enerd,
+                             int                               flags,
+                             t_nrnb                           *nrnb,
+                             gmx_wallcycle_t                   wcycle)
 {
     int              donb_flags;
     nb_kernel_data_t kernel_data;
     real             lam_i[efptNR];
     real             dvdl_nb[efptNR];
-    int              th;
     int              i, j;
 
     donb_flags = 0;
@@ -488,15 +488,17 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
         dvdl_nb[i]  = 0;
     }
 
-    GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl, "Number of lists should be same as number of NB threads");
+    const gmx::ArrayRef<t_nblist const * const > nbl_fep = nbv.freeEnergyPairlistSet(iLocality);
+
+    GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
 
     wallcycle_sub_start(wcycle, ewcsNONBONDED);
-#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
-    for (th = 0; th < nbl_lists->nnbl; th++)
+#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
+    for (int th = 0; th < nbl_fep.ssize(); th++)
     {
         try
         {
-            gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+            gmx_nb_free_energy_kernel(nbl_fep[th],
                                       x, f, fr, mdatoms, &kernel_data, nrnb);
         }
         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -531,12 +533,12 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
                 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
             }
             reset_foreign_enerdata(enerd);
-#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
-            for (th = 0; th < nbl_lists->nnbl; th++)
+#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
+            for (int th = 0; th < nbl_fep.ssize(); th++)
             {
                 try
                 {
-                    gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+                    gmx_nb_free_energy_kernel(nbl_fep[th],
                                               x, f, fr, mdatoms, &kernel_data, nrnb);
                 }
                 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -1138,8 +1140,8 @@ static void do_force_cutsVERLET(FILE *fplog,
         wallcycle_start_nocount(wcycle, ewcNS);
         wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
         /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
-        nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
-                            &top->excls, step, nrnb);
+        nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+                               &top->excls, step, nrnb);
         wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
         wallcycle_stop(wcycle, ewcNS);
     }
@@ -1195,8 +1197,8 @@ static void do_force_cutsVERLET(FILE *fplog,
             wallcycle_start_nocount(wcycle, ewcNS);
             wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
             /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
-            nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
-                                &top->excls, step, nrnb);
+            nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal,
+                                   &top->excls, step, nrnb);
             wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
             wallcycle_stop(wcycle, ewcNS);
         }
@@ -1362,18 +1364,18 @@ static void do_force_cutsVERLET(FILE *fplog,
         /* Calculate the local and non-local free energy interactions here.
          * Happens here on the CPU both with and without GPU.
          */
-        if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
+        if (fr->nbv->freeEnergyPairlistSet(Nbnxm::InteractionLocality::Local)[0]->nrj > 0)
         {
-            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
+            do_nb_verlet_fep(*nbv, Nbnxm::InteractionLocality::Local,
                              fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                              inputrec->fepvals, lambda,
                              enerd, flags, nrnb, wcycle);
         }
 
-        if (DOMAINDECOMP(cr) &&
-            fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
+        if (havePPDomainDecomposition(cr) &&
+            fr->nbv->freeEnergyPairlistSet(Nbnxm::InteractionLocality::NonLocal)[0]->nrj > 0)
         {
-            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
+            do_nb_verlet_fep(*nbv, Nbnxm::InteractionLocality::NonLocal,
                              fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                              inputrec->fepvals, lambda,
                              enerd, flags, nrnb, wcycle);
@@ -1403,7 +1405,7 @@ static void do_force_cutsVERLET(FILE *fplog,
 
         /* if there are multiple fshift output buffers reduce them */
         if ((flags & GMX_FORCE_VIRIAL) &&
-            nbv->pairlistSets[iloc].nnbl > 1)
+            nbv->pairlistSet(iloc).nnbl > 1)
         {
             /* This is not in a subcounter because it takes a
                negligible and constant-sized amount of time */
@@ -1458,7 +1460,7 @@ static void do_force_cutsVERLET(FILE *fplog,
             }
 
             /* skip the reduction if there was no non-local work to do */
-            if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
+            if (!nbv->pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
             {
                 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
                                                nbv->nbat, f, wcycle);
index 62ba0c401436fa47863b22194b52e4e66c7df061..0fad12d1f8b7542115ef9e17b44d9a411cdac784 100644 (file)
@@ -91,7 +91,7 @@ void nbnxn_atomdata_t::resizeForceBuffers()
 }
 
 /* Initializes an nbnxn_atomdata_output_t data structure */
-nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int                nb_kernel_type,
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
                                                  int                numEnergyGroups,
                                                  int                simdEnergyBufferStride,
                                                  gmx::PinningPolicy pinningPolicy) :
@@ -104,10 +104,9 @@ nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int                nb_kernel_ty
     Vvdw.resize(numEnergyGroups*numEnergyGroups);
     Vc.resize(numEnergyGroups*numEnergyGroups);
 
-    if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
-        nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
+    if (Nbnxm::kernelTypeIsSimd(kernelType))
     {
-        int cj_size     = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+        int cj_size     = Nbnxm::JClusterSizePerKernelType[kernelType];
         int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
         VSvdw.resize(numElements);
         VSc.resize(numElements);
@@ -438,7 +437,7 @@ nbnxn_atomdata_t::nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy) :
 /* Initializes an nbnxn_atomdata_t::Params data structure */
 static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
                                        nbnxn_atomdata_t::Params *params,
-                                       int nb_kernel_type,
+                                       const Nbnxm::KernelType kernelType,
                                        int enbnxninitcombrule,
                                        int ntype, const real *nbfp,
                                        int n_energygroups)
@@ -538,7 +537,7 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
                 gmx::boolToString(bCombGeom), gmx::boolToString(bCombLB));
     }
 
-    simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
+    simple = Nbnxm::kernelTypeUsesSimplePairlist(kernelType);
 
     switch (enbnxninitcombrule)
     {
@@ -590,8 +589,7 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
             gmx_incons("Unknown enbnxninitcombrule");
     }
 
-    bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
-             nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+    bSIMD = Nbnxm::kernelTypeIsSimd(kernelType);
 
     set_lj_parameter_data(params, bSIMD);
 
@@ -616,18 +614,17 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
 /* Initializes an nbnxn_atomdata_t data structure */
 void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                          nbnxn_atomdata_t *nbat,
-                         int nb_kernel_type,
+                         const Nbnxm::KernelType kernelType,
                          int enbnxninitcombrule,
                          int ntype, const real *nbfp,
                          int n_energygroups,
                          int nout)
 {
-    nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+    nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), kernelType,
                                enbnxninitcombrule, ntype, nbfp, n_energygroups);
 
-    const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
-    const gmx_bool bSIMD  = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
-                             nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+    const bool simple = Nbnxm::kernelTypeUsesSimplePairlist(kernelType);
+    const bool bSIMD  = Nbnxm::kernelTypeIsSimd(kernelType);
 
     if (simple)
     {
@@ -636,7 +633,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
         if (bSIMD)
         {
             pack_x = std::max(c_nbnxnCpuIClusterSize,
-                              nbnxn_kernel_to_cluster_j_size(nb_kernel_type));
+                              Nbnxm::JClusterSizePerKernelType[kernelType]);
             switch (pack_x)
             {
                 case 4:
@@ -671,7 +668,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
     for (int i = 0; i < nout; i++)
     {
         const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
-        nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+        nbat->out.emplace_back(kernelType, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
                                pinningPolicy);
     }
 
index bd210ea01444e107ed783a131748831213e86f08..fbd4dd3c00aa634a6ba48cfeab51b335791ea342 100644 (file)
@@ -54,6 +54,11 @@ struct nbnxn_search;
 struct t_mdatoms;
 struct gmx_wallcycle;
 
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
 /* Reallocate the nbnxn_atomdata_t for a size of n atoms */
 void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n);
 
@@ -76,7 +81,7 @@ enum {
  */
 void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                          nbnxn_atomdata_t *nbat,
-                         int nb_kernel_type,
+                         Nbnxm::KernelType kernelType,
                          int enbnxninitcombrule,
                          int ntype, const real *nbfp,
                          int n_energygroups,
index f4e514824852e82d2cc62fed740925c625f12e40..7ccf6d18c57a64b7ac9d0005123aeda8be12a4ed 100644 (file)
@@ -1428,8 +1428,8 @@ void nbnxn_put_on_grid(nonbonded_verlet_t             *nbv,
 
     grid->bSimple = nbv->pairlistIsSimple();
 
-    grid->na_c      = nbnxn_kernel_to_cluster_i_size(nbv->kernelType_);
-    grid->na_cj     = nbnxn_kernel_to_cluster_j_size(nbv->kernelType_);
+    grid->na_c      = IClusterSizePerListType[nbv->listParams->pairlistType];
+    grid->na_cj     = JClusterSizePerListType[nbv->listParams->pairlistType];
     grid->na_sc     = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
     grid->na_c_2log = get_2log(grid->na_c);
 
index a1ad25208ab981762c6ed29b9b2b79c8164c44fa..5c302d7c8849e18f8ceeec4aa718addb97d2f748 100644 (file)
@@ -128,8 +128,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
  * within this function.
  *
  * \param[in]     pairlistSet   Pairlists with local or non-local interactions to compute
- * \param[in]     kernel_type   The non-bonded kernel type
- * \param[in]     ewald_excl    The Ewald exclusion treatment
+ * \param[in]     kernelSetup   The non-bonded kernel setup
  * \param[in,out] nbat          The atomdata for the interactions
  * \param[in]     ic            Non-bonded interaction constants
  * \param[in]     shiftVectors  The PBC shift vectors
@@ -141,8 +140,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
  */
 static void
 nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
-                 const int                       kernel_type,
-                 const int                       ewald_excl,
+                 const Nbnxm::KernelSetup       &kernelSetup,
                  nbnxn_atomdata_t               *nbat,
                  const interaction_const_t      &ic,
                  rvec                           *shiftVectors,
@@ -160,7 +158,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
     }
     else
     {
-        if (ewald_excl == ewaldexclTable)
+        if (kernelSetup.ewaldExclusionType == Nbnxm::EwaldExclusionType::Table)
         {
             if (ic.rcoulomb == ic.rvdw)
             {
@@ -222,7 +220,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
         {
             vdwkt = vdwktLJEWALDCOMBLB;
             /* At setup we (should have) selected the C reference kernel */
-            GMX_RELEASE_ASSERT(kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+            GMX_RELEASE_ASSERT(kernelSetup.kernelType == Nbnxm::KernelType::Cpu4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
         }
     }
     else
@@ -264,9 +262,9 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
         if (!(forceFlags & GMX_FORCE_ENERGY))
         {
             /* Don't calculate energies */
-            switch (kernel_type)
+            switch (kernelSetup.kernelType)
             {
-                case nbnxnk4x4_PlainC:
+                case Nbnxm::KernelType::Cpu4x4_PlainC:
                     nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                            &ic,
                                                            shiftVectors,
@@ -274,7 +272,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                                                            fshift_p);
                     break;
 #ifdef GMX_NBNXN_SIMD_2XNN
-                case nbnxnk4xN_SIMD_2xNN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                     nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
                                                                  &ic,
                                                                  shiftVectors,
@@ -283,7 +281,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                     break;
 #endif
 #ifdef GMX_NBNXN_SIMD_4XN
-                case nbnxnk4xN_SIMD_4xN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                     nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
                                                                 &ic,
                                                                 shiftVectors,
@@ -301,9 +299,9 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
             out->Vvdw[0] = 0;
             out->Vc[0]   = 0;
 
-            switch (kernel_type)
+            switch (kernelSetup.kernelType)
             {
-                case nbnxnk4x4_PlainC:
+                case Nbnxm::KernelType::Cpu4x4_PlainC:
                     nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                          &ic,
                                                          shiftVectors,
@@ -313,7 +311,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                                                          out->Vc.data());
                     break;
 #ifdef GMX_NBNXN_SIMD_2XNN
-                case nbnxnk4xN_SIMD_2xNN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                     nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
                                                                &ic,
                                                                shiftVectors,
@@ -324,7 +322,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                     break;
 #endif
 #ifdef GMX_NBNXN_SIMD_4XN
-                case nbnxnk4xN_SIMD_4xN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                     nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
                                                               &ic,
                                                               shiftVectors,
@@ -345,9 +343,9 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
 
             int unrollj = 0;
 
-            switch (kernel_type)
+            switch (kernelSetup.kernelType)
             {
-                case nbnxnk4x4_PlainC:
+                case Nbnxm::KernelType::Cpu4x4_PlainC:
                     unrollj = c_nbnxnCpuIClusterSize;
                     nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                             &ic,
@@ -358,7 +356,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                                                             out->Vc.data());
                     break;
 #ifdef GMX_NBNXN_SIMD_2XNN
-                case nbnxnk4xN_SIMD_2xNN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                     unrollj = GMX_SIMD_REAL_WIDTH/2;
                     nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
                                                                   &ic,
@@ -370,7 +368,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                     break;
 #endif
 #ifdef GMX_NBNXN_SIMD_4XN
-                case nbnxnk4xN_SIMD_4xN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                     unrollj = GMX_SIMD_REAL_WIDTH;
                     nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
                                                                  &ic,
@@ -385,7 +383,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
             }
 
-            if (kernel_type != nbnxnk4x4_PlainC)
+            if (kernelSetup.kernelType != Nbnxm::KernelType::Cpu4x4_PlainC)
             {
                 switch (unrollj)
                 {
@@ -423,7 +421,7 @@ static void accountFlops(t_nrnb                           *nrnb,
                          const interaction_const_t        &ic,
                          const int                         forceFlags)
 {
-    const nbnxn_pairlist_set_t &pairlistSet     = nbv.pairlistSets[iLocality];
+    const nbnxn_pairlist_set_t &pairlistSet     = nbv.pairlistSet(iLocality);
     const bool                  usingGpuKernels = nbv.useGpu();
 
     int enr_nbnxn_kernel_ljc;
@@ -431,7 +429,7 @@ static void accountFlops(t_nrnb                           *nrnb,
     {
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
     }
-    else if ((!usingGpuKernels && nbv.ewaldExclusionType_ == ewaldexclAnalytical) ||
+    else if ((!usingGpuKernels && nbv.kernelSetup().ewaldExclusionType == Nbnxm::EwaldExclusionType::Analytical) ||
              (usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
     {
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
@@ -486,16 +484,15 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
                          gmx_enerdata_t            *enerd,
                          t_nrnb                    *nrnb)
 {
-    const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSets[iLocality];
+    const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSet(iLocality);
 
-    switch (nbv->kernelType_)
+    switch (nbv->kernelSetup().kernelType)
     {
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
+        case Nbnxm::KernelType::Cpu4x4_PlainC:
+        case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
             nbnxn_kernel_cpu(pairlistSet,
-                             nbv->kernelType_,
-                             nbv->ewaldExclusionType_,
+                             nbv->kernelSetup(),
                              nbv->nbat,
                              ic,
                              fr->shift_vec,
@@ -508,11 +505,11 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
                              enerd->grpp.ener[egLJSR]);
             break;
 
-        case nbnxnk8x8x8_GPU:
+        case Nbnxm::KernelType::Gpu8x8x8:
             Nbnxm::gpu_launch_kernel(nbv->gpu_nbv, forceFlags, iLocality);
             break;
 
-        case nbnxnk8x8x8_PlainC:
+        case Nbnxm::KernelType::Cpu8x8x8_PlainC:
             nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
                                  nbv->nbat, &ic,
                                  fr->shift_vec,
index 908c6bdc68a4396fc158ad7e35303787bee073b7..640d782b931d529ef9e092c978ea19fa4ff2ba4b 100644 (file)
@@ -118,6 +118,7 @@ struct gmx_hw_info_t;
 struct gmx_mtop_t;
 struct interaction_const_t;
 struct nbnxn_pairlist_set_t;
+struct nonbonded_verlet_t;
 struct t_blocka;
 struct t_commrec;
 struct t_nrnb;
@@ -130,87 +131,173 @@ class MDLogger;
 class UpdateGroupsCog;
 }
 
-//! Help pass GPU-emulation parameters with type safety.
-enum class EmulateGpuNonbonded : bool
+/*! \brief Resources that can be used to execute non-bonded kernels on */
+enum class NonbondedResource : int
 {
-    //! Do not emulate GPUs.
-    No,
-    //! Do emulate GPUs.
-    Yes
+    Cpu,
+    Gpu,
+    EmulateGpu
 };
 
+namespace Nbnxm
+{
 
 /*! \brief Nonbonded NxN kernel types: plain C, CPU SIMD, GPU, GPU emulation */
-typedef enum
+enum class KernelType : int
 {
-    nbnxnkNotSet = 0,
-    nbnxnk4x4_PlainC,
-    nbnxnk4xN_SIMD_4xN,
-    nbnxnk4xN_SIMD_2xNN,
-    nbnxnk8x8x8_GPU,
-    nbnxnk8x8x8_PlainC,
-    nbnxnkNR
-} nbnxn_kernel_type;
+    NotSet = 0,
+    Cpu4x4_PlainC,
+    Cpu4xN_Simd_4xN,
+    Cpu4xN_Simd_2xNN,
+    Gpu8x8x8,
+    Cpu8x8x8_PlainC,
+    Count
+};
 
-namespace Nbnxm
+/*! \brief Ewald exclusion types */
+enum class EwaldExclusionType : int
+{
+    NotSet = 0,
+    Table,
+    Analytical,
+    DecidedByGpuModule
+};
+
+/* \brief The non-bonded setup, also affects the pairlist construction kernel */
+struct KernelSetup
 {
+    //! The non-bonded type, also affects the pairlist construction kernel
+    KernelType         kernelType = KernelType::NotSet;
+    //! Ewald exclusion computation handling type, currently only used for CPU
+    EwaldExclusionType ewaldExclusionType = EwaldExclusionType::NotSet;
+};
 
 /*! \brief Return a string identifying the kernel type.
  *
- * \param [in] kernel_type   nonbonded kernel types, takes values from the nbnxn_kernel_type enum
- * \returns                  a string identifying the kernel corresponding to the type passed as argument
+ * \param [in] kernelType   nonbonded kernel type, takes values from the nbnxn_kernel_type enum
+ * \returns                 a string identifying the kernel corresponding to the type passed as argument
  */
-const char *lookup_kernel_name(int kernel_type);
+const char *lookup_kernel_name(Nbnxm::KernelType kernelType);
 
 } // namespace Nbnxm
 
-/*! \brief Ewald exclusion types */
-enum {
-    ewaldexclTable, ewaldexclAnalytical
-};
-
 /*! \brief Flag to tell the nonbonded kernels whether to clear the force output buffers */
 enum {
     enbvClearFNo, enbvClearFYes
 };
 
+/*! \brief Generates a pair-list for the given locality.
+ *
+ * With perturbed particles, also a group scheme style nbl_fep list is made.
+ */
+void nbnxn_make_pairlist(nonbonded_verlet_t         *nbv,
+                         Nbnxm::InteractionLocality  iLocality,
+                         nbnxn_pairlist_set_t       *pairlistSet,
+                         const t_blocka             *excl,
+                         int64_t                     step,
+                         t_nrnb                     *nrnb);
+
+/*! \brief Prune all pair-lists with given locality (currently CPU only)
+ *
+ * For all pair-lists with given locality, takes the outer list and prunes out
+ * pairs beyond the pairlist inner radius and writes the result to a list that is
+ * to be consumed by the non-bonded kernel.
+ */
+void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *pairlistSet,
+                              Nbnxm::KernelType       kernelType,
+                              const nbnxn_atomdata_t *nbat,
+                              const rvec             *shift_vec);
+
 /*! \libinternal
  *  \brief Top-level non-bonded data structure for the Verlet-type cut-off scheme. */
 struct nonbonded_verlet_t
 {
-    //! Returns whether a GPU is used for the non-bonded calculations
-    bool useGpu() const
-    {
-        return kernelType_ == nbnxnk8x8x8_GPU;
-    }
-
-    //! Returns whether a GPU is emulated for the non-bonded calculations
-    bool emulateGpu() const
-    {
-        return kernelType_ == nbnxnk8x8x8_PlainC;
-    }
-
-    //! Return whether the pairlist is of simple, CPU type
-    bool pairlistIsSimple() const
-    {
-        return !useGpu() && !emulateGpu();
-    }
-
-    std::unique_ptr<NbnxnListParameters>                                        listParams; /**< Parameters for the search and list pruning setup */
-    std::unique_ptr<nbnxn_search>                                               nbs;        /**< n vs n atom pair searching data       */
-    int                                                                         ngrp;       /**< number of interaction groups          */
-    //! Local and non-local pairlist sets
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, nbnxn_pairlist_set_t>     pairlistSets;
-    //! Atom data
-    nbnxn_atomdata_t                                                           *nbat;
-
-    //! Non-bonded kernel - see enum above
-    int                  kernelType_;
-    //! Ewald exclusion - see enum above
-    int                  ewaldExclusionType_;
-
-    gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
-    int                  min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels    */
+    public:
+        //! Returns whether a GPU is use for the non-bonded calculations
+        bool useGpu() const
+        {
+            return kernelSetup_.kernelType == Nbnxm::KernelType::Gpu8x8x8;
+        }
+
+        //! Returns whether a GPU is emulated for the non-bonded calculations
+        bool emulateGpu() const
+        {
+            return kernelSetup_.kernelType == Nbnxm::KernelType::Cpu8x8x8_PlainC;
+        }
+
+        //! Return whether the pairlist is of simple, CPU type
+        bool pairlistIsSimple() const
+        {
+            return !useGpu() && !emulateGpu();
+        }
+
+        //! Initialize the pair list sets, TODO this should be private
+        void initPairlistSets(bool haveMultipleDomains);
+
+        //! Returns a reference to the pairlist set for the requested locality
+        const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
+        {
+            GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
+                       "The requested locality should be in the list");
+            return pairlistSets_[static_cast<int>(iLocality)];
+        }
+
+        //! Constructs the pairlist for the given locality
+        void constructPairlist(Nbnxm::InteractionLocality  iLocality,
+                               const t_blocka             *excl,
+                               int64_t                     step,
+                               t_nrnb                     *nrnb)
+        {
+            nbnxn_make_pairlist(this, iLocality, &pairlistSets_[static_cast<int>(iLocality)], excl, step, nrnb);
+        }
+
+        //! Dispatches the dynamic pruning kernel for the given locality
+        void dispatchPruneKernel(Nbnxm::InteractionLocality  iLocality,
+                                 const rvec                 *shift_vec)
+        {
+            GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
+                       "The requested locality should be in the list");
+            NbnxnDispatchPruneKernel(&pairlistSets_[static_cast<int>(iLocality)],
+                                     kernelSetup_.kernelType, nbat, shift_vec);
+        }
+
+        //! Return the kernel setup
+        const Nbnxm::KernelSetup &kernelSetup() const
+        {
+            return kernelSetup_;
+        }
+
+        //! Sets the kernel setup, TODO: make private
+        void setKernelSetup(const Nbnxm::KernelSetup &kernelSetup)
+        {
+            kernelSetup_ = kernelSetup;
+        }
+
+        //! Returns the a list of free-energy pairlists for the given locality
+        const gmx::ArrayRef<t_nblist const * const>
+        freeEnergyPairlistSet(Nbnxm::InteractionLocality iLocality) const
+        {
+            return pairlistSet(iLocality).nbl_fep;
+        }
+
+        //! Parameters for the search and list pruning setup
+        std::unique_ptr<NbnxnListParameters>  listParams;
+        //! Working data for constructing the pairlists
+        std::unique_ptr<nbnxn_search>         nbs;
+    private:
+        //! Local and, optionally, non-local pairlist sets
+        std::vector<nbnxn_pairlist_set_t>     pairlistSets_;
+    public:
+        //! Atom data
+        nbnxn_atomdata_t                     *nbat;
+
+    private:
+        //! The non-bonded setup, also affects the pairlist construction kernel
+        Nbnxm::KernelSetup   kernelSetup_;
+    public:
+
+        gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
+        int                  min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels    */
 };
 
 namespace Nbnxm
@@ -276,16 +363,6 @@ void nbnxn_set_atomorder(nbnxn_search_t nbs);
 /*! \brief Returns the index position of the atoms on the pairlist search grid */
 gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
 
-/*! \brief Generates a pair-list for the given locality.
- *
- * With perturbed particles, also a group scheme style nbl_fep list is made.
- */
-void nbnxn_make_pairlist(nonbonded_verlet_t         *nbv,
-                         Nbnxm::InteractionLocality  iLocality,
-                         const t_blocka             *excl,
-                         int64_t                     step,
-                         t_nrnb                     *nrnb);
-
 /*! \brief Returns the number of steps performed with the current pair list */
 int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t   &nbv,
                               Nbnxm::InteractionLocality  ilocality,
@@ -296,16 +373,6 @@ bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t   &nbv,
                                        Nbnxm::InteractionLocality  ilocality,
                                        int64_t                     step);
 
-/*! \brief Prune all pair-lists with given locality (currently CPU only)
- *
- * For all pair-lists with given locality, takes the outer list and prunes out
- * pairs beyond the pairlist inner radius and writes the result to a list that is
- * to be consumed by the non-bonded kernel.
- */
-void NbnxnDispatchPruneKernel(nonbonded_verlet_t         *nbv,
-                              Nbnxm::InteractionLocality  iLocality,
-                              const rvec                 *shift_vec);
-
 /*! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU */
 void NbnxnDispatchKernel(nonbonded_verlet_t         *nbv,
                          Nbnxm::InteractionLocality  iLocality,
index 4c55452e9caf49c95e83aaab8048d8b68407c6cd..c164034c790106afd67036eeb5b3c510914cef56 100644 (file)
 
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/nbnxm/pairlist.h"
-#include "gromacs/simd/simd.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/real.h"
 
-bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
-{
-    if (nb_kernel_type == nbnxnkNotSet)
-    {
-        gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
-    }
-
-    switch (nb_kernel_type)
-    {
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            return false;
-
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
-            return true;
-
-        default:
-            gmx_incons("Invalid nonbonded kernel type passed!");
-            return false;
-    }
-}
-
-int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type)
-{
-    switch (nb_kernel_type)
-    {
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
-            return c_nbnxnCpuIClusterSize;
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            /* The cluster size for super/sub lists is only set here.
-             * Any value should work for the pair-search and atomdata code.
-             * The kernels, of course, might require a particular value.
-             */
-            return c_nbnxnGpuClusterSize;
-        default:
-            gmx_incons("unknown kernel type");
-    }
-}
-
-int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type)
-{
-    int nbnxn_simd_width = 0;
-    int cj_size          = 0;
-
-#if GMX_SIMD
-    nbnxn_simd_width = GMX_SIMD_REAL_WIDTH;
-#endif
-
-    switch (nb_kernel_type)
-    {
-        case nbnxnk4x4_PlainC:
-            cj_size = c_nbnxnCpuIClusterSize;
-            break;
-        case nbnxnk4xN_SIMD_4xN:
-            cj_size = nbnxn_simd_width;
-            break;
-        case nbnxnk4xN_SIMD_2xNN:
-            cj_size = nbnxn_simd_width/2;
-            break;
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            cj_size = nbnxn_kernel_to_cluster_i_size(nb_kernel_type);
-            break;
-        default:
-            gmx_incons("unknown kernel type");
-    }
-
-    return cj_size;
-}
-
 /* Clusters at the cut-off only increase rlist by 60% of their size */
 static constexpr real c_nbnxnRlistIncreaseOutsideFactor = 0.6;
 
index c683ee2b1fef79875b7ca98981442481ba5cf2e3..e364ab706008e3dcf06f60802b2b0f12e7fca2a3 100644 (file)
@@ -37,6 +37,9 @@
 #define GMX_NBNXM_NBNXM_GEOMETRY_H
 
 #include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/simd/simd.h"
 #include "gromacs/utility/fatalerror.h"
 
 /* Returns the base-2 log of n.
@@ -59,14 +62,51 @@ static inline int get_2log(int n)
     return log2;
 }
 
+namespace Nbnxm
+{
+
+/* The nbnxn i-cluster size in atoms for each nbnxn kernel type */
+static constexpr gmx::EnumerationArray<KernelType, int> IClusterSizePerKernelType =
+{
+    0,
+    c_nbnxnCpuIClusterSize,
+    c_nbnxnCpuIClusterSize,
+    c_nbnxnCpuIClusterSize,
+    c_nbnxnGpuClusterSize,
+    c_nbnxnGpuClusterSize
+};
+
+/* The nbnxn j-cluster size in atoms for each nbnxn kernel type */
+static constexpr gmx::EnumerationArray<KernelType, int> JClusterSizePerKernelType =
+{
+    0,
+    c_nbnxnCpuIClusterSize,
+#if GMX_SIMD
+    GMX_SIMD_REAL_WIDTH,
+    GMX_SIMD_REAL_WIDTH/2,
+#else
+    0,
+    0,
+#endif
+    c_nbnxnGpuClusterSize,
+    c_nbnxnGpuClusterSize
+};
+
 /* Returns whether the pair-list corresponding to nb_kernel_type is simple */
-bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
+static inline bool kernelTypeUsesSimplePairlist(const KernelType kernelType)
+{
+    return (kernelType == KernelType::Cpu4x4_PlainC ||
+            kernelType == KernelType::Cpu4xN_Simd_4xN ||
+            kernelType == KernelType::Cpu4xN_Simd_2xNN);
+}
 
-/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
-int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type);
+static inline bool kernelTypeIsSimd(const KernelType kernelType)
+{
+    return (kernelType == KernelType::Cpu4xN_Simd_4xN ||
+            kernelType == KernelType::Cpu4xN_Simd_2xNN);
+}
 
-/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
-int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type);
+} // namespace Nbnxm
 
 /* Returns the effective list radius of the pair-list
  *
index 44a3d632ca15c9c76eaabdc2e0692737a583a85e..eef7c9a3eddefaf39ceafa2649942b84e2c75561 100644 (file)
@@ -54,6 +54,7 @@
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/nbnxm/nbnxm_simd.h"
+#include "gromacs/nbnxm/pairlist.h"
 #include "gromacs/nbnxm/pairlist_tuning.h"
 #include "gromacs/nbnxm/pairlistset.h"
 #include "gromacs/simd/simd.h"
@@ -87,21 +88,24 @@ static gmx_bool nbnxn_simd_supported(const gmx::MDLogger &mdlog,
 }
 
 /*! \brief Returns the most suitable CPU kernel type and Ewald handling */
-static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
-                                  int                            *kernel_type,
-                                  int                            *ewald_excl,
-                                  const gmx_hw_info_t gmx_unused &hardwareInfo)
+static KernelSetup
+pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
+                      const gmx_hw_info_t gmx_unused &hardwareInfo)
 {
-    *kernel_type = nbnxnk4x4_PlainC;
-    *ewald_excl  = ewaldexclTable;
+    KernelSetup kernelSetup;
 
-#if GMX_SIMD
+    if (!GMX_SIMD)
+    {
+        kernelSetup.kernelType         = KernelType::Cpu4x4_PlainC;
+        kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
+    }
+    else
     {
 #ifdef GMX_NBNXN_SIMD_4XN
-        *kernel_type = nbnxnk4xN_SIMD_4xN;
+        kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
 #endif
 #ifdef GMX_NBNXN_SIMD_2XNN
-        *kernel_type = nbnxnk4xN_SIMD_2xNN;
+        kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
 #endif
 
 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
@@ -121,22 +125,21 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
          * use of HT, use 4x8 to avoid a potential performance hit.
          * On Intel Haswell 4x8 is always faster.
          */
-        *kernel_type = nbnxnk4xN_SIMD_4xN;
+        kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
 
-#if !GMX_SIMD_HAVE_FMA
-        if (EEL_PME_EWALD(ir->coulombtype) ||
-            EVDW_PME(ir->vdwtype))
+        if (!GMX_SIMD_HAVE_FMA && (EEL_PME_EWALD(ir->coulombtype) ||
+                                   EVDW_PME(ir->vdwtype)))
         {
             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
              * There are enough instructions to make 2x(4+4) efficient.
              */
-            *kernel_type = nbnxnk4xN_SIMD_2xNN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
         }
-#endif
+
         if (hardwareInfo.haveAmdZenCpu)
         {
             /* One 256-bit FMA per cycle makes 2xNN faster */
-            *kernel_type = nbnxnk4xN_SIMD_2xNN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
         }
 #endif      /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
 
@@ -144,7 +147,7 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
         if (getenv("GMX_NBNXN_SIMD_4XN") != nullptr)
         {
 #ifdef GMX_NBNXN_SIMD_4XN
-            *kernel_type = nbnxnk4xN_SIMD_4xN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
 #else
             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but GROMACS has been compiled without support for these kernels");
 #endif
@@ -152,7 +155,7 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
         if (getenv("GMX_NBNXN_SIMD_2XNN") != nullptr)
         {
 #ifdef GMX_NBNXN_SIMD_2XNN
-            *kernel_type = nbnxnk4xN_SIMD_2xNN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
 #else
             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but GROMACS has been compiled without support for these kernels");
 #endif
@@ -164,53 +167,58 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
          * will probably always be faster for a SIMD width of 8 or more.
          * With FMA analytical is sometimes faster for a width if 4 as well.
          * In single precision, this is faster on Bulldozer.
-         */
-#if GMX_SIMD_REAL_WIDTH >= 8 || \
-        (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)
-        /* On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
+         * On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
          * of single or double precision and 128 or 256-bit AVX2.
          */
-        if (!hardwareInfo.haveAmdZenCpu)
+        if (
+#if GMX_SIMD
+            (GMX_SIMD_REAL_WIDTH >= 8 ||
+             (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)) &&
+#endif
+            !hardwareInfo.haveAmdZenCpu)
         {
-            *ewald_excl = ewaldexclAnalytical;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
+        }
+        else
+        {
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
         }
-#endif
         if (getenv("GMX_NBNXN_EWALD_TABLE") != nullptr)
         {
-            *ewald_excl = ewaldexclTable;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
         }
         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != nullptr)
         {
-            *ewald_excl = ewaldexclAnalytical;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
         }
 
     }
-#endif  // GMX_SIMD
+
+    return kernelSetup;
 }
 
-const char *lookup_kernel_name(int kernel_type)
+const char *lookup_kernel_name(const KernelType kernelType)
 {
     const char *returnvalue = nullptr;
-    switch (kernel_type)
+    switch (kernelType)
     {
-        case nbnxnkNotSet:
+        case KernelType::NotSet:
             returnvalue = "not set";
             break;
-        case nbnxnk4x4_PlainC:
+        case KernelType::Cpu4x4_PlainC:
             returnvalue = "plain C";
             break;
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
+        case KernelType::Cpu4xN_Simd_4xN:
+        case KernelType::Cpu4xN_Simd_2xNN:
 #if GMX_SIMD
             returnvalue = "SIMD";
 #else  // GMX_SIMD
             returnvalue = "not available";
 #endif // GMX_SIMD
             break;
-        case nbnxnk8x8x8_GPU: returnvalue    = "GPU"; break;
-        case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
+        case KernelType::Gpu8x8x8: returnvalue        = "GPU"; break;
+        case KernelType::Cpu8x8x8_PlainC: returnvalue = "plain C"; break;
 
-        case nbnxnkNR:
         default:
             gmx_fatal(FARGS, "Illegal kernel type selected");
     }
@@ -218,45 +226,42 @@ const char *lookup_kernel_name(int kernel_type)
 };
 
 /*! \brief Returns the most suitable kernel type and Ewald handling */
-static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
-                              gmx_bool             use_simd_kernels,
-                              const gmx_hw_info_t &hardwareInfo,
-                              bool                 useGpu,
-                              EmulateGpuNonbonded  emulateGpu,
-                              const t_inputrec    *ir,
-                              int                 *kernel_type,
-                              int                 *ewald_excl,
-                              gmx_bool             bDoNonbonded)
+static KernelSetup
+pick_nbnxn_kernel(const gmx::MDLogger     &mdlog,
+                  gmx_bool                 use_simd_kernels,
+                  const gmx_hw_info_t     &hardwareInfo,
+                  const NonbondedResource &nonbondedResource,
+                  const t_inputrec        *ir,
+                  gmx_bool                 bDoNonbonded)
 {
-    GMX_RELEASE_ASSERT(kernel_type, "Need a valid kernel_type pointer");
+    KernelSetup kernelSetup;
 
-    *kernel_type = nbnxnkNotSet;
-    *ewald_excl  = ewaldexclTable;
-
-    if (emulateGpu == EmulateGpuNonbonded::Yes)
+    if (nonbondedResource == NonbondedResource::EmulateGpu)
     {
-        *kernel_type = nbnxnk8x8x8_PlainC;
+        kernelSetup.kernelType         = KernelType::Cpu8x8x8_PlainC;
+        kernelSetup.ewaldExclusionType = EwaldExclusionType::DecidedByGpuModule;
 
         if (bDoNonbonded)
         {
             GMX_LOG(mdlog.warning).asParagraph().appendText("Emulating a GPU run on the CPU (slow)");
         }
     }
-    else if (useGpu)
+    else if (nonbondedResource == NonbondedResource::Gpu)
     {
-        *kernel_type = nbnxnk8x8x8_GPU;
+        kernelSetup.kernelType         = KernelType::Gpu8x8x8;
+        kernelSetup.ewaldExclusionType = EwaldExclusionType::DecidedByGpuModule;
     }
-
-    if (*kernel_type == nbnxnkNotSet)
+    else
     {
         if (use_simd_kernels &&
             nbnxn_simd_supported(mdlog, ir))
         {
-            pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl, hardwareInfo);
+            kernelSetup = pick_nbnxn_kernel_cpu(ir, hardwareInfo);
         }
         else
         {
-            *kernel_type = nbnxnk4x4_PlainC;
+            kernelSetup.kernelType         = KernelType::Cpu4x4_PlainC;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
         }
     }
 
@@ -264,21 +269,41 @@ static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
     {
         GMX_LOG(mdlog.info).asParagraph().appendTextFormatted(
                 "Using %s %dx%d nonbonded short-range kernels",
-                lookup_kernel_name(*kernel_type),
-                nbnxn_kernel_to_cluster_i_size(*kernel_type),
-                nbnxn_kernel_to_cluster_j_size(*kernel_type));
+                lookup_kernel_name(kernelSetup.kernelType),
+                IClusterSizePerKernelType[kernelSetup.kernelType],
+                JClusterSizePerKernelType[kernelSetup.kernelType]);
 
-        if (nbnxnk4x4_PlainC == *kernel_type ||
-            nbnxnk8x8x8_PlainC == *kernel_type)
+        if (KernelType::Cpu4x4_PlainC == kernelSetup.kernelType ||
+            KernelType::Cpu8x8x8_PlainC == kernelSetup.kernelType)
         {
             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
                     "WARNING: Using the slow %s kernels. This should\n"
                     "not happen during routine usage on supported platforms.",
-                    lookup_kernel_name(*kernel_type));
+                    lookup_kernel_name(kernelSetup.kernelType));
         }
     }
+
+    GMX_RELEASE_ASSERT(kernelSetup.kernelType != KernelType::NotSet &&
+                       kernelSetup.ewaldExclusionType != EwaldExclusionType::NotSet,
+                       "All kernel setup parameters should be set here");
+
+    return kernelSetup;
 }
 
+} // namespace Nbnxm
+
+void nonbonded_verlet_t::initPairlistSets(const bool haveMultipleDomains)
+{
+    pairlistSets_.emplace_back(*listParams);
+    if (haveMultipleDomains)
+    {
+        pairlistSets_.emplace_back(*listParams);
+    }
+}
+
+namespace Nbnxm
+{
+
 void init_nb_verlet(const gmx::MDLogger     &mdlog,
                     nonbonded_verlet_t     **nb_verlet,
                     gmx_bool                 bFEP_NonBonded,
@@ -290,38 +315,42 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
                     const gmx_mtop_t        *mtop,
                     matrix                   box)
 {
-    nonbonded_verlet_t        *nbv        = new nonbonded_verlet_t();
+    nonbonded_verlet_t *nbv        = new nonbonded_verlet_t();
 
-    const EmulateGpuNonbonded  emulateGpu =
-        ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-    bool                       useGpu     = deviceInfo != nullptr;
+    const bool          emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
+    const bool          useGpu     = deviceInfo != nullptr;
 
-    GMX_RELEASE_ASSERT(!(emulateGpu == EmulateGpuNonbonded::Yes && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
+    GMX_RELEASE_ASSERT(!(emulateGpu && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
+
+    NonbondedResource nonbondedResource;
+    if (useGpu)
+    {
+        nonbondedResource = NonbondedResource::Gpu;
+    }
+    else if (emulateGpu)
+    {
+        nonbondedResource = NonbondedResource::EmulateGpu;
+    }
+    else
+    {
+        nonbondedResource = NonbondedResource::Cpu;
+    }
 
     nbv->nbs             = nullptr;
 
-    pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
-                      useGpu, emulateGpu, ir,
-                      &nbv->kernelType_,
-                      &nbv->ewaldExclusionType_,
-                      fr->bNonbonded);
+    nbv->setKernelSetup(pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
+                                          nonbondedResource, ir,
+                                          fr->bNonbonded));
 
     const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
 
-    const bool pairlistIsSimple = nbv->pairlistIsSimple();
-    for (nbnxn_pairlist_set_t &pairlistSet : nbv->pairlistSets)
-    {
-        // TODO Change this to a constructor
-        /* The second parameter tells whether lists should be combined,
-         * this is currently only and always done for GPU lists.
-         */
-        nbnxn_init_pairlist_set(&pairlistSet, pairlistIsSimple, !pairlistIsSimple);
-    }
+    nbv->listParams = std::make_unique<NbnxnListParameters>(nbv->kernelSetup().kernelType,
+                                                            ir->rlist);
+    nbv->initPairlistSets(haveMultipleDomains);
 
     nbv->min_ci_balanced = 0;
 
-    nbv->listParams = std::make_unique<NbnxnListParameters>(ir->rlist);
-    setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->kernelType_, fr->ic,
+    setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic,
                                 nbv->listParams.get());
 
     nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
@@ -369,11 +398,11 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
     }
     nbnxn_atomdata_init(mdlog,
                         nbv->nbat,
-                        nbv->kernelType_,
+                        nbv->kernelSetup().kernelType,
                         enbnxninitcombrule,
                         fr->ntype, fr->nbfp,
                         mimimumNumEnergyGroupNonbonded,
-                        pairlistIsSimple ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+                        nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1);
 
     if (useGpu)
     {
index 9a4b667756f2dc8ffe7a1bd8c604d90e19d718c9..06293a02619b8176166a21b18fbc21691e7dbe82 100644 (file)
@@ -804,13 +804,14 @@ NbnxnPairlistGpu::NbnxnPairlistGpu(gmx::PinningPolicy pinningPolicy) :
     work = new NbnxnPairlistGpuWork();
 }
 
-void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
-                             gmx_bool bSimple, gmx_bool bCombined)
+void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list)
 {
-    GMX_RELEASE_ASSERT(!bSimple || !bCombined, "Can only combine non-simple lists");
-
-    nbl_list->bSimple   = bSimple;
-    nbl_list->bCombined = bCombined;
+    nbl_list->bSimple   =
+        (nbl_list->params.pairlistType == PairlistType::Simple4x2 ||
+         nbl_list->params.pairlistType == PairlistType::Simple4x4 ||
+         nbl_list->params.pairlistType == PairlistType::Simple4x8);
+    // Currently GPU lists are always combined
+    nbl_list->bCombined = !nbl_list->bSimple;
 
     nbl_list->nnbl = gmx_omp_nthreads_get(emntNonbonded);
 
@@ -821,7 +822,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
                   nbl_list->nnbl, NBNXN_BUFFERFLAG_MAX_THREADS, NBNXN_BUFFERFLAG_MAX_THREADS);
     }
 
-    if (bSimple)
+    if (nbl_list->bSimple)
     {
         snew(nbl_list->nbl, nbl_list->nnbl);
         if (nbl_list->nnbl > 1)
@@ -833,7 +834,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
     {
         snew(nbl_list->nblGpu, nbl_list->nnbl);
     }
-    snew(nbl_list->nbl_fep, nbl_list->nnbl);
+    nbl_list->nbl_fep.resize(nbl_list->nnbl);
     /* Execute in order to avoid memory interleaving between threads */
 #pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static)
     for (int i = 0; i < nbl_list->nnbl; i++)
@@ -843,7 +844,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
             /* Allocate the nblist data structure locally on each thread
              * to optimize memory access for NUMA architectures.
              */
-            if (bSimple)
+            if (nbl_list->bSimple)
             {
                 nbl_list->nbl[i] = new NbnxnPairlistCpu();
 
@@ -2455,24 +2456,24 @@ static void icell_set_x_simple(int ci,
 static void icell_set_x(int ci,
                         real shx, real shy, real shz,
                         int stride, const real *x,
-                        int nb_kernel_type,
+                        const Nbnxm::KernelType kernelType,
                         NbnxnPairlistCpuWork *work)
 {
-    switch (nb_kernel_type)
+    switch (kernelType)
     {
 #if GMX_SIMD
 #ifdef GMX_NBNXN_SIMD_4XN
-        case nbnxnk4xN_SIMD_4xN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
             icell_set_x_simd_4xn(ci, shx, shy, shz, stride, x, work);
             break;
 #endif
 #ifdef GMX_NBNXN_SIMD_2XNN
-        case nbnxnk4xN_SIMD_2xNN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
             icell_set_x_simd_2xnn(ci, shx, shy, shz, stride, x, work);
             break;
 #endif
 #endif
-        case nbnxnk4x4_PlainC:
+        case Nbnxm::KernelType::Cpu4x4_PlainC:
             icell_set_x_simple(ci, shx, shy, shz, stride, x, &work->iClusterData);
             break;
         default:
@@ -2485,7 +2486,7 @@ static void icell_set_x(int ci,
 static void icell_set_x(int ci,
                         real shx, real shy, real shz,
                         int stride, const real *x,
-                        int gmx_unused nb_kernel_type,
+                        Nbnxm::KernelType gmx_unused kernelType,
                         NbnxnPairlistGpuWork *work)
 {
 #if !GMX_SIMD4_HAVE_REAL
@@ -3116,12 +3117,12 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
                                    const nbnxn_atomdata_t        *nbat,
                                    const real                     rlist2,
                                    const real                     rbb2,
-                                   const int                      nb_kernel_type,
+                                   const Nbnxm::KernelType        kernelType,
                                    int                           *numDistanceChecks)
 {
-    switch (nb_kernel_type)
+    switch (kernelType)
     {
-        case nbnxnk4x4_PlainC:
+        case Nbnxm::KernelType::Cpu4x4_PlainC:
             makeClusterListSimple(jGrid,
                                   nbl, ci, firstCell, lastCell,
                                   excludeSubDiagonal,
@@ -3130,7 +3131,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
                                   numDistanceChecks);
             break;
 #ifdef GMX_NBNXN_SIMD_4XN
-        case nbnxnk4xN_SIMD_4xN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
             makeClusterListSimd4xn(jGrid,
                                    nbl, ci, firstCell, lastCell,
                                    excludeSubDiagonal,
@@ -3140,7 +3141,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
             break;
 #endif
 #ifdef GMX_NBNXN_SIMD_2XNN
-        case nbnxnk4xN_SIMD_2xNN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
             makeClusterListSimd2xnn(jGrid,
                                     nbl, ci, firstCell, lastCell,
                                     excludeSubDiagonal,
@@ -3149,6 +3150,8 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
                                     numDistanceChecks);
             break;
 #endif
+        default:
+            GMX_ASSERT(false, "Unhandled kernel type");
     }
 }
 
@@ -3162,7 +3165,7 @@ static void makeClusterListWrapper(NbnxnPairlistGpu              *nbl,
                                    const nbnxn_atomdata_t        *nbat,
                                    const real                     rlist2,
                                    const real                     rbb2,
-                                   const int gmx_unused           nb_kernel_type,
+                                   Nbnxm::KernelType gmx_unused   kernelType,
                                    int                           *numDistanceChecks)
 {
     for (int cj = firstCell; cj <= lastCell; cj++)
@@ -3248,7 +3251,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
                                      const nbnxn_atomdata_t *nbat,
                                      const t_blocka &exclusions,
                                      real rlist,
-                                     int nb_kernel_type,
+                                     const Nbnxm::KernelType kernelType,
                                      int ci_block,
                                      gmx_bool bFBufferFlag,
                                      int nsubpair_max,
@@ -3283,7 +3286,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
 
     sync_work(nbl);
     GMX_ASSERT(nbl->na_ci == jGrid.na_c, "The cluster sizes in the list and grid should match");
-    nbl->na_cj = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+    nbl->na_cj = Nbnxm::JClusterSizePerKernelType[kernelType];
     na_cj_2log = get_2log(nbl->na_cj);
 
     nbl->rlist  = rlist;
@@ -3542,7 +3545,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
 
                     icell_set_x(cell0_i+ci, shx, shy, shz,
                                 nbat->xstride, nbat->x().data(),
-                                nb_kernel_type,
+                                kernelType,
                                 nbl->work);
 
                     for (int cx = cxf; cx <= cxl; cx++)
@@ -3687,7 +3690,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
                                                            excludeSubDiagonal,
                                                            nbat,
                                                            rlist2, rbb2,
-                                                           nb_kernel_type,
+                                                           kernelType,
                                                            &numDistanceChecks);
 
                                     if (bFBufferFlag)
@@ -4034,6 +4037,7 @@ static void sort_sci(NbnxnPairlistGpu *nbl)
 
 void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                          const InteractionLocality  iLocality,
+                         nbnxn_pairlist_set_t      *nbl_list,
                          const t_blocka            *excl,
                          const int64_t              step,
                          t_nrnb                    *nrnb)
@@ -4041,7 +4045,6 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
     nbnxn_search         *nbs      = nbv->nbs.get();
     nbnxn_atomdata_t     *nbat     = nbv->nbat;
     const real            rlist    = nbv->listParams->rlistOuter;
-    nbnxn_pairlist_set_t *nbl_list = &nbv->pairlistSets[iLocality];
 
     int                nsubpair_target;
     float              nsubpair_tot_est;
@@ -4170,7 +4173,7 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                         nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                  &nbs->work[th], nbat, *excl,
                                                  rlist,
-                                                 nbv->kernelType_,
+                                                 nbv->kernelSetup().kernelType,
                                                  ci_block,
                                                  nbat->bUseBufferFlags,
                                                  nsubpair_target,
@@ -4184,7 +4187,7 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                         nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                  &nbs->work[th], nbat, *excl,
                                                  rlist,
-                                                 nbv->kernelType_,
+                                                 nbv->kernelSetup().kernelType,
                                                  ci_block,
                                                  nbat->bUseBufferFlags,
                                                  nsubpair_target,
index 74bf78fc8fd71bc6483d49c025ccccd31277e7a9..71d90b38830866c1e685b05cb9a958e0f0159b3f 100644 (file)
@@ -46,6 +46,7 @@
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/bitmask.h"
 #include "gromacs/utility/defaultinitializationallocator.h"
+#include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/real.h"
 
 // This file with constants is separate from this file to be able
@@ -56,6 +57,11 @@ struct NbnxnPairlistCpuWork;
 struct NbnxnPairlistGpuWork;
 struct tMPI_Atomic;
 
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
 /* Convenience type for vector with aligned memory */
 template<typename T>
 using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
@@ -64,6 +70,18 @@ using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
 template<typename T>
 using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
 
+enum class PairlistType : int
+{
+    Simple4x2,
+    Simple4x4,
+    Simple4x8,
+    Hierarchical8x8,
+    Count
+};
+
+static constexpr gmx::EnumerationArray<PairlistType, int> IClusterSizePerListType = { 4, 4, 4, 8 };
+static constexpr gmx::EnumerationArray<PairlistType, int> JClusterSizePerListType = { 2, 4, 8, 8 };
+
 /*! \cond INTERNAL */
 
 /*! \brief The setup for generating and pruning the nbnxn pair list.
@@ -74,20 +92,15 @@ struct NbnxnListParameters
 {
     /*! \brief Constructor producing a struct with dynamic pruning disabled
      */
-    NbnxnListParameters(real rlist) :
-        useDynamicPruning(false),
-        nstlistPrune(-1),
-        rlistOuter(rlist),
-        rlistInner(rlist),
-        numRollingParts(1)
-    {
-    }
-
-    bool useDynamicPruning; //!< Are we using dynamic pair-list pruning
-    int  nstlistPrune;      //!< Pair-list dynamic pruning interval
-    real rlistOuter;        //!< Cut-off of the larger, outer pair-list
-    real rlistInner;        //!< Cut-off of the smaller, inner pair-list
-    int  numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+    NbnxnListParameters(Nbnxm::KernelType kernelType,
+                        real              rlist);
+
+    PairlistType pairlistType;      //!< The type of cluster-pair list
+    bool         useDynamicPruning; //!< Are we using dynamic pair-list pruning
+    int          nstlistPrune;      //!< Pair-list dynamic pruning interval
+    real         rlistOuter;        //!< Cut-off of the larger, outer pair-list
+    real         rlistInner;        //!< Cut-off of the smaller, inner pair-list
+    int          numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
 };
 
 /*! \endcond */
@@ -292,18 +305,23 @@ struct NbnxnPairlistGpu
 
 struct nbnxn_pairlist_set_t
 {
-    int                nnbl;                  /* number of lists */
-    NbnxnPairlistCpu **nbl;                   /* lists for CPU */
-    NbnxnPairlistCpu **nbl_work;              /* work space for rebalancing lists */
-    NbnxnPairlistGpu **nblGpu;                /* lists for GPU */
-    gmx_bool           bCombined;             /* TRUE if lists get combined into one (the 1st) */
-    gmx_bool           bSimple;               /* TRUE if the list of of type "simple"
+    nbnxn_pairlist_set_t(const NbnxnListParameters &listParams);
+
+    int                         nnbl;         /* number of lists */
+    NbnxnPairlistCpu          **nbl;          /* lists for CPU */
+    NbnxnPairlistCpu          **nbl_work;     /* work space for rebalancing lists */
+    NbnxnPairlistGpu          **nblGpu;       /* lists for GPU */
+    const NbnxnListParameters  &params;       /* Pairlist parameters desribing setup and ranges */
+    gmx_bool                    bCombined;    /* TRUE if lists get combined into one (the 1st) */
+    gmx_bool                    bSimple;      /* TRUE if the list of of type "simple"
                                                  (na_sc=na_s, no super-clusters used) */
-    int                natpair_ljq;           /* Total number of atom pairs for LJ+Q kernel */
-    int                natpair_lj;            /* Total number of atom pairs for LJ kernel   */
-    int                natpair_q;             /* Total number of atom pairs for Q kernel    */
-    t_nblist         **nbl_fep;               /* List of free-energy atom pair interactions */
-    int64_t            outerListCreationStep; /* Step at which the outer list was created */
+
+    /* Counts for debug printing */
+    int                     natpair_ljq;           /* Total number of atom pairs for LJ+Q kernel */
+    int                     natpair_lj;            /* Total number of atom pairs for LJ kernel   */
+    int                     natpair_q;             /* Total number of atom pairs for Q kernel    */
+    std::vector<t_nblist *> nbl_fep;               /* List of free-energy atom pair interactions */
+    int64_t                 outerListCreationStep; /* Step at which the outer list was created */
 };
 
 enum {
@@ -315,12 +333,12 @@ struct nbnxn_atomdata_output_t
 {
     /* Constructor
      *
-     * \param[in] nb_kernel_type          Type of non-bonded kernel
+     * \param[in] kernelType              Type of non-bonded kernel
      * \param[in] numEnergyGroups         The number of energy groups
      * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
      * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
      */
-    nbnxn_atomdata_output_t(int                nb_kernel_type,
+    nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
                             int                numEnergyGroups,
                             int                simdEnergyBUfferStride,
                             gmx::PinningPolicy pinningPolicy);
index a8924efefc1656d7845d27c03a2cd48c55c73015..d3ee3bc2b31f4c37384d24b2cbfa30c0c389cd72 100644 (file)
@@ -483,7 +483,6 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
                                  const t_inputrec          *ir,
                                  const gmx_mtop_t          *mtop,
                                  matrix                     box,
-                                 int                        nbnxnKernelType,
                                  const interaction_const_t *ic,
                                  NbnxnListParameters       *listParams)
 {
@@ -492,10 +491,14 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
     /* Initialize the parameters to no dynamic list pruning */
     listParams->useDynamicPruning = false;
 
-    const VerletbufListSetup ls   = verletbufGetListSetup(nbnxnKernelType);
+    const VerletbufListSetup ls   =
+    {
+        IClusterSizePerListType[listParams->pairlistType],
+        JClusterSizePerListType[listParams->pairlistType]
+    };
 
     /* Currently emulation mode does not support dual pair-lists */
-    const bool useGpu             = (nbnxnKernelType == nbnxnk8x8x8_GPU);
+    const bool useGpu             = (listParams->pairlistType == PairlistType::Hierarchical8x8);
 
     if (supportsDynamicPairlistGenerationInterval(*ir) &&
         getenv("GMX_DISABLE_DYNAMICPRUNING") == nullptr)
index a6f82fcddf75b66de5959fa6a62b865601c02d95..3c2227dea99e8225ed34691499403793fab5870d 100644 (file)
@@ -86,7 +86,6 @@ void increaseNstlist(FILE *fplog, t_commrec *cr,
  * \param[in]     ir               The input parameter record
  * \param[in]     mtop             The global topology
  * \param[in]     box              The unit cell
- * \param[in]     nbnxnKernelType  The type of nbnxn kernel used
  * \param[in]     ic               The nonbonded interactions constants
  * \param[in,out] listParams       The list setup parameters
  */
@@ -94,7 +93,6 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
                                  const t_inputrec          *ir,
                                  const gmx_mtop_t          *mtop,
                                  matrix                     box,
-                                 int                        nbnxnKernelType,
                                  const interaction_const_t *ic,
                                  NbnxnListParameters       *listParams);
 
index 8454b4921c22b0f3f215bf92b8baebcd2f62f71d..18b0b12a5c18fb23e2f50d0c7482f092be900cc6 100644 (file)
  * the research papers on the package. Check out http://www.gromacs.org.
  */
 
+/*! \internal \file
+ * \brief
+ * Implements functionality for nbnxn_pairlist_set_t.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_nbnxm
+ */
+
 #include "gmxpre.h"
 
+#include "pairlistset.h"
+
 #include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/utility/gmxassert.h"
+
+/*! \cond INTERNAL */
+
+NbnxnListParameters::NbnxnListParameters(const Nbnxm::KernelType kernelType,
+                                         const real              rlist) :
+    useDynamicPruning(false),
+    nstlistPrune(-1),
+    rlistOuter(rlist),
+    rlistInner(rlist),
+    numRollingParts(1)
+{
+    if (!Nbnxm::kernelTypeUsesSimplePairlist(kernelType))
+    {
+        pairlistType = PairlistType::Hierarchical8x8;
+    }
+    else
+    {
+        switch (Nbnxm::JClusterSizePerKernelType[kernelType])
+        {
+            case 2:
+                pairlistType = PairlistType::Simple4x2;
+                break;
+            case 4:
+                pairlistType = PairlistType::Simple4x4;
+                break;
+            case 8:
+                pairlistType = PairlistType::Simple4x8;
+                break;
+            default:
+                GMX_RELEASE_ASSERT(false, "Kernel type does not have a pairlist type");
+        }
+    }
+}
+
+nbnxn_pairlist_set_t::nbnxn_pairlist_set_t(const NbnxnListParameters &listParams) :
+    params(listParams)
+{
+    // TODO move this into this constructor
+    nbnxn_init_pairlist_set(this);
+}
 
 int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t         &nbv,
                               const Nbnxm::InteractionLocality  iLocality,
                               const int64_t                     step)
 {
-    return step - nbv.pairlistSets[iLocality].outerListCreationStep;
+    return step - nbv.pairlistSet(iLocality).outerListCreationStep;
 }
 
 bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t         &nbv,
@@ -51,3 +103,5 @@ bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t         &nbv,
 {
     return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
 }
+
+/*! \endcond */
index b366b797307692b43bc080782775a3a48aa2e192..e2da79e853776209a3f7f92958311f9ac633fa86 100644 (file)
@@ -68,9 +68,11 @@ nbnxn_search *nbnxn_init_search(int                        ePBC,
                                 gmx_bool                   bFEP,
                                 int                        nthread_max);
 
-/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t */
-void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
-                             gmx_bool simple, gmx_bool combined);
+/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t
+ *
+ * TODO: Merge into the constructor
+ */
+void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list);
 
 /*! \brief Prepare the list-set produced by the search for dynamic pruning
  *
index 38d62fad5c61725b5b606f6354792d897c499355..8caddc494e6d857754c5e5b9ee82e30ce0ac3f8c 100644 (file)
 #include "kernels_simd_4xm/kernel_prune.h"
 
 
-void NbnxnDispatchPruneKernel(nonbonded_verlet_t               *nbv,
-                              const Nbnxm::InteractionLocality  ilocality,
-                              const rvec                       *shift_vec)
+void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *nbl_lists,
+                              const Nbnxm::KernelType kernelType,
+                              const nbnxn_atomdata_t *nbat,
+                              const rvec             *shift_vec)
 {
-    nbnxn_pairlist_set_t     *nbl_lists  = &nbv->pairlistSets[ilocality];
-    const nbnxn_atomdata_t   *nbat       = nbv->nbat;
-    const real                rlistInner = nbv->listParams->rlistInner;
+    const real rlistInner = nbl_lists->params.rlistInner;
 
     GMX_ASSERT(nbl_lists->nbl[0]->ciOuter.size() >= nbl_lists->nbl[0]->ci.size(),
                "Here we should either have an empty ci list or ciOuter should be >= ci");
@@ -62,15 +61,15 @@ void NbnxnDispatchPruneKernel(nonbonded_verlet_t               *nbv,
     {
         NbnxnPairlistCpu *nbl = nbl_lists->nbl[i];
 
-        switch (nbv->kernelType_)
+        switch (kernelType)
         {
-            case nbnxnk4xN_SIMD_4xN:
+            case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                 nbnxn_kernel_prune_4xn(nbl, nbat, shift_vec, rlistInner);
                 break;
-            case nbnxnk4xN_SIMD_2xNN:
+            case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                 nbnxn_kernel_prune_2xnn(nbl, nbat, shift_vec, rlistInner);
                 break;
-            case nbnxnk4x4_PlainC:
+            case Nbnxm::KernelType::Cpu4x4_PlainC:
                 nbnxn_kernel_prune_ref(nbl, nbat, shift_vec, rlistInner);
                 break;
             default:
index 09f4ebc27cf8e7f01587394f964da4be470f13c1..fbb3683047f8f068016547e577522bff90fef390 100644 (file)
@@ -60,7 +60,6 @@
 #include "gromacs/mdtypes/commrec.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/taskassignment/taskassignment.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/baseversion.h"
index 02dcac5512571f55d3b46188d015d8406d483e8d..df0a50d2317eb66a9db68ff7324435e6fe72e20f 100644 (file)
@@ -49,8 +49,6 @@ struct gmx_hw_info_t;
 struct gmx_mtop_t;
 struct t_inputrec;
 
-enum class EmulateGpuNonbonded : bool;
-
 namespace gmx
 {
 
@@ -62,6 +60,15 @@ enum class TaskTarget : int
     Gpu
 };
 
+//! Help pass GPU-emulation parameters with type safety.
+enum class EmulateGpuNonbonded : bool
+{
+    //! Do not emulate GPUs.
+    No,
+    //! Do emulate GPUs.
+    Yes
+};
+
 /*! \brief Decide whether this thread-MPI simulation will run
  * nonbonded tasks on GPUs.
  *