Clean up nbnxm enums

author Berk Hess <hess@kth.se>

Thu, 17 Jan 2019 13:25:50 +0000 (14:25 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Thu, 21 Feb 2019 09:13:40 +0000 (10:13 +0100)
author Berk Hess <hess@kth.se>
Thu, 17 Jan 2019 13:25:50 +0000 (14:25 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Thu, 21 Feb 2019 09:13:40 +0000 (10:13 +0100)
diff --git a/src/gromacs/mdlib/calc_verletbuf.cpp b/src/gromacs/mdlib/calc_verletbuf.cpp

index 7bee75dae4b7a09644f87d79ac1194635e6fafff..b494e1665214a86d5754d005a4a436797a0e284a 100644 (file)
--- a/src/gromacs/mdlib/calc_verletbuf.cpp
+++ b/src/gromacs/mdlib/calc_verletbuf.cpp
@@ -107,18 +107,17 @@ struct pot_derivatives_t
      real  md3; // -V''' at the cutoff
  };
  
-VerletbufListSetup verletbufGetListSetup(int nbnxnKernelType)
+VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType)
  {
      /* Note that the current buffer estimation code only handles clusters
       * of size 1, 2 or 4, so for 4x8 or 8x8 we use the estimate for 4x4.
       */
      VerletbufListSetup listSetup;
  
-    listSetup.cluster_size_i = nbnxn_kernel_to_cluster_i_size(nbnxnKernelType);
-    listSetup.cluster_size_j = nbnxn_kernel_to_cluster_j_size(nbnxnKernelType);
+    listSetup.cluster_size_i = Nbnxm::IClusterSizePerKernelType[nbnxnKernelType];
+    listSetup.cluster_size_j = Nbnxm::JClusterSizePerKernelType[nbnxnKernelType];
  
-    if (nbnxnKernelType == nbnxnk8x8x8_GPU ||
-        nbnxnKernelType == nbnxnk8x8x8_PlainC)
+    if (!Nbnxm::kernelTypeUsesSimplePairlist(nbnxnKernelType))
      {
          /* The GPU kernels (except for OpenCL) split the j-clusters in two halves */
          listSetup.cluster_size_j /= 2;
@@ -134,24 +133,24 @@ VerletbufListSetup verletbufGetSafeListSetup(ListSetupType listType)
       * i- and j-cluster sizes, so we potentially overestimate, but never
       * underestimate, the buffer drift.
       */
-    int nbnxnKernelType;
+    Nbnxm::KernelType nbnxnKernelType;
  
      if (listType == ListSetupType::Gpu)
      {
-        nbnxnKernelType = nbnxnk8x8x8_GPU;
+        nbnxnKernelType = Nbnxm::KernelType::Gpu8x8x8;
      }
      else if (GMX_SIMD && listType == ListSetupType::CpuSimdWhenSupported)
      {
  #ifdef GMX_NBNXN_SIMD_2XNN
          /* We use the smallest cluster size to be on the safe side */
-        nbnxnKernelType = nbnxnk4xN_SIMD_2xNN;
+        nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_2xNN;
  #else
-        nbnxnKernelType = nbnxnk4xN_SIMD_4xN;
+        nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_4xN;
  #endif
      }
      else
      {
-        nbnxnKernelType = nbnxnk4x4_PlainC;
+        nbnxnKernelType = Nbnxm::KernelType::Cpu4x4_PlainC;
      }
  
      return verletbufGetListSetup(nbnxnKernelType);
diff --git a/src/gromacs/mdlib/calc_verletbuf.h b/src/gromacs/mdlib/calc_verletbuf.h

index a444b2e097d7c3e5195b372c99b45ee42f218953..1a8cdc34eeecc8b04e63bfe5b1042077bfd617c0 100644 (file)
--- a/src/gromacs/mdlib/calc_verletbuf.h
+++ b/src/gromacs/mdlib/calc_verletbuf.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -48,6 +48,12 @@ namespace gmx
  class RangePartitioning;
  } // namespace gmx
  
+namespace Nbnxm
+{
+enum class KernelType;
+} // namespace Nbnxm
+
+
  struct VerletbufListSetup
  {
      int  cluster_size_i;  /* Cluster pair-list i-cluster size atom count */
@@ -68,7 +74,7 @@ static const real verlet_buffer_ratio_NVE_T0     = 0.10;
  
  /* Returns the pair-list setup for the given nbnxn kernel type.
   */
-VerletbufListSetup verletbufGetListSetup(int nbnxnKernelType);
+VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType);
  
  /* Enum for choosing the list type for verletbufGetSafeListSetup() */
  enum class ListSetupType
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index f68d7c841a891da63fbd9046da8587e95d050d7d..b89f57fd5ba1228926147b65673d158e5d9f385d 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -423,7 +423,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
               * the current coordinates of the atoms.
               */
              wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
-            NbnxnDispatchPruneKernel(nbv, ilocality, fr->shift_vec);
+            nbv->dispatchPruneKernel(ilocality, fr->shift_vec);
              wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
          }
  
@@ -438,23 +438,23 @@ static void do_nb_verlet(t_forcerec                       *fr,
      }
  }
  
-static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
-                             t_forcerec           *fr,
-                             rvec                  x[],
-                             rvec                  f[],
-                             const t_mdatoms      *mdatoms,
-                             t_lambda             *fepvals,
-                             real                 *lambda,
-                             gmx_enerdata_t       *enerd,
-                             int                   flags,
-                             t_nrnb               *nrnb,
-                             gmx_wallcycle_t       wcycle)
+static void do_nb_verlet_fep(const nonbonded_verlet_t         &nbv,
+                             const Nbnxm::InteractionLocality  iLocality,
+                             t_forcerec                       *fr,
+                             rvec                              x[],
+                             rvec                              f[],
+                             const t_mdatoms                  *mdatoms,
+                             t_lambda                         *fepvals,
+                             real                             *lambda,
+                             gmx_enerdata_t                   *enerd,
+                             int                               flags,
+                             t_nrnb                           *nrnb,
+                             gmx_wallcycle_t                   wcycle)
  {
      int              donb_flags;
      nb_kernel_data_t kernel_data;
      real             lam_i[efptNR];
      real             dvdl_nb[efptNR];
-    int              th;
      int              i, j;
  
      donb_flags = 0;
@@ -488,15 +488,17 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
          dvdl_nb[i]  = 0;
      }
  
-    GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl, "Number of lists should be same as number of NB threads");
+    const gmx::ArrayRef<t_nblist const * const > nbl_fep = nbv.freeEnergyPairlistSet(iLocality);
+
+    GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
  
      wallcycle_sub_start(wcycle, ewcsNONBONDED);
-#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
-    for (th = 0; th < nbl_lists->nnbl; th++)
+#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
+    for (int th = 0; th < nbl_fep.ssize(); th++)
      {
          try
          {
-            gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+            gmx_nb_free_energy_kernel(nbl_fep[th],
                                        x, f, fr, mdatoms, &kernel_data, nrnb);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -531,12 +533,12 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
                  lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
              }
              reset_foreign_enerdata(enerd);
-#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
-            for (th = 0; th < nbl_lists->nnbl; th++)
+#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
+            for (int th = 0; th < nbl_fep.ssize(); th++)
              {
                  try
                  {
-                    gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+                    gmx_nb_free_energy_kernel(nbl_fep[th],
                                                x, f, fr, mdatoms, &kernel_data, nrnb);
                  }
                  GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -1138,8 +1140,8 @@ static void do_force_cutsVERLET(FILE *fplog,
          wallcycle_start_nocount(wcycle, ewcNS);
          wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
          /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
-        nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
-                            &top->excls, step, nrnb);
+        nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+                               &top->excls, step, nrnb);
          wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
          wallcycle_stop(wcycle, ewcNS);
      }
@@ -1195,8 +1197,8 @@ static void do_force_cutsVERLET(FILE *fplog,
              wallcycle_start_nocount(wcycle, ewcNS);
              wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
              /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
-            nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
-                                &top->excls, step, nrnb);
+            nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal,
+                                   &top->excls, step, nrnb);
              wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
              wallcycle_stop(wcycle, ewcNS);
          }
@@ -1362,18 +1364,18 @@ static void do_force_cutsVERLET(FILE *fplog,
          /* Calculate the local and non-local free energy interactions here.
           * Happens here on the CPU both with and without GPU.
           */
-        if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
+        if (fr->nbv->freeEnergyPairlistSet(Nbnxm::InteractionLocality::Local)[0]->nrj > 0)
          {
-            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
+            do_nb_verlet_fep(*nbv, Nbnxm::InteractionLocality::Local,
                               fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                               inputrec->fepvals, lambda,
                               enerd, flags, nrnb, wcycle);
          }
  
-        if (DOMAINDECOMP(cr) &&
-            fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
+        if (havePPDomainDecomposition(cr) &&
+            fr->nbv->freeEnergyPairlistSet(Nbnxm::InteractionLocality::NonLocal)[0]->nrj > 0)
          {
-            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
+            do_nb_verlet_fep(*nbv, Nbnxm::InteractionLocality::NonLocal,
                               fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                               inputrec->fepvals, lambda,
                               enerd, flags, nrnb, wcycle);
@@ -1403,7 +1405,7 @@ static void do_force_cutsVERLET(FILE *fplog,
  
          /* if there are multiple fshift output buffers reduce them */
          if ((flags & GMX_FORCE_VIRIAL) &&
-            nbv->pairlistSets[iloc].nnbl > 1)
+            nbv->pairlistSet(iloc).nnbl > 1)
          {
              /* This is not in a subcounter because it takes a
                 negligible and constant-sized amount of time */
@@ -1458,7 +1460,7 @@ static void do_force_cutsVERLET(FILE *fplog,
              }
  
              /* skip the reduction if there was no non-local work to do */
-            if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
+            if (!nbv->pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
              {
                  nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
                                                 nbv->nbat, f, wcycle);
diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp

index 62ba0c401436fa47863b22194b52e4e66c7df061..0fad12d1f8b7542115ef9e17b44d9a411cdac784 100644 (file)
--- a/src/gromacs/nbnxm/atomdata.cpp
+++ b/src/gromacs/nbnxm/atomdata.cpp
@@ -91,7 +91,7 @@ void nbnxn_atomdata_t::resizeForceBuffers()
  }
  
  /* Initializes an nbnxn_atomdata_output_t data structure */
-nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int                nb_kernel_type,
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
                                                   int                numEnergyGroups,
                                                   int                simdEnergyBufferStride,
                                                   gmx::PinningPolicy pinningPolicy) :
@@ -104,10 +104,9 @@ nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int                nb_kernel_ty
      Vvdw.resize(numEnergyGroups*numEnergyGroups);
      Vc.resize(numEnergyGroups*numEnergyGroups);
  
-    if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
-        nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
+    if (Nbnxm::kernelTypeIsSimd(kernelType))
      {
-        int cj_size     = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+        int cj_size     = Nbnxm::JClusterSizePerKernelType[kernelType];
          int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
          VSvdw.resize(numElements);
          VSc.resize(numElements);
@@ -438,7 +437,7 @@ nbnxn_atomdata_t::nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy) :
  /* Initializes an nbnxn_atomdata_t::Params data structure */
  static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
                                         nbnxn_atomdata_t::Params *params,
-                                       int nb_kernel_type,
+                                       const Nbnxm::KernelType kernelType,
                                         int enbnxninitcombrule,
                                         int ntype, const real *nbfp,
                                         int n_energygroups)
@@ -538,7 +537,7 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
                  gmx::boolToString(bCombGeom), gmx::boolToString(bCombLB));
      }
  
-    simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
+    simple = Nbnxm::kernelTypeUsesSimplePairlist(kernelType);
  
      switch (enbnxninitcombrule)
      {
@@ -590,8 +589,7 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
              gmx_incons("Unknown enbnxninitcombrule");
      }
  
-    bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
-             nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+    bSIMD = Nbnxm::kernelTypeIsSimd(kernelType);
  
      set_lj_parameter_data(params, bSIMD);
  
@@ -616,18 +614,17 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
  /* Initializes an nbnxn_atomdata_t data structure */
  void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                           nbnxn_atomdata_t *nbat,
-                         int nb_kernel_type,
+                         const Nbnxm::KernelType kernelType,
                           int enbnxninitcombrule,
                           int ntype, const real *nbfp,
                           int n_energygroups,
                           int nout)
  {
-    nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+    nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), kernelType,
                                 enbnxninitcombrule, ntype, nbfp, n_energygroups);
  
-    const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
-    const gmx_bool bSIMD  = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
-                             nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+    const bool simple = Nbnxm::kernelTypeUsesSimplePairlist(kernelType);
+    const bool bSIMD  = Nbnxm::kernelTypeIsSimd(kernelType);
  
      if (simple)
      {
@@ -636,7 +633,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
          if (bSIMD)
          {
              pack_x = std::max(c_nbnxnCpuIClusterSize,
-                              nbnxn_kernel_to_cluster_j_size(nb_kernel_type));
+                              Nbnxm::JClusterSizePerKernelType[kernelType]);
              switch (pack_x)
              {
                  case 4:
@@ -671,7 +668,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
      for (int i = 0; i < nout; i++)
      {
          const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
-        nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+        nbat->out.emplace_back(kernelType, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
                                 pinningPolicy);
      }
  
diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h

index bd210ea01444e107ed783a131748831213e86f08..fbd4dd3c00aa634a6ba48cfeab51b335791ea342 100644 (file)
--- a/src/gromacs/nbnxm/atomdata.h
+++ b/src/gromacs/nbnxm/atomdata.h
@@ -54,6 +54,11 @@ struct nbnxn_search;
  struct t_mdatoms;
  struct gmx_wallcycle;
  
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
  /* Reallocate the nbnxn_atomdata_t for a size of n atoms */
  void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n);
  
@@ -76,7 +81,7 @@ enum {
   */
  void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
                           nbnxn_atomdata_t *nbat,
-                         int nb_kernel_type,
+                         Nbnxm::KernelType kernelType,
                           int enbnxninitcombrule,
                           int ntype, const real *nbfp,
                           int n_energygroups,
diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp

index f4e514824852e82d2cc62fed740925c625f12e40..7ccf6d18c57a64b7ac9d0005123aeda8be12a4ed 100644 (file)
--- a/src/gromacs/nbnxm/grid.cpp
+++ b/src/gromacs/nbnxm/grid.cpp
@@ -1428,8 +1428,8 @@ void nbnxn_put_on_grid(nonbonded_verlet_t             *nbv,
  
      grid->bSimple = nbv->pairlistIsSimple();
  
-    grid->na_c      = nbnxn_kernel_to_cluster_i_size(nbv->kernelType_);
-    grid->na_cj     = nbnxn_kernel_to_cluster_j_size(nbv->kernelType_);
+    grid->na_c      = IClusterSizePerListType[nbv->listParams->pairlistType];
+    grid->na_cj     = JClusterSizePerListType[nbv->listParams->pairlistType];
      grid->na_sc     = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
      grid->na_c_2log = get_2log(grid->na_c);
  
diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp

index a1ad25208ab981762c6ed29b9b2b79c8164c44fa..5c302d7c8849e18f8ceeec4aa718addb97d2f748 100644 (file)
--- a/src/gromacs/nbnxm/kerneldispatch.cpp
+++ b/src/gromacs/nbnxm/kerneldispatch.cpp
@@ -128,8 +128,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
   * within this function.
   *
   * \param[in]     pairlistSet   Pairlists with local or non-local interactions to compute
- * \param[in]     kernel_type   The non-bonded kernel type
- * \param[in]     ewald_excl    The Ewald exclusion treatment
+ * \param[in]     kernelSetup   The non-bonded kernel setup
   * \param[in,out] nbat          The atomdata for the interactions
   * \param[in]     ic            Non-bonded interaction constants
   * \param[in]     shiftVectors  The PBC shift vectors
@@ -141,8 +140,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
   */
  static void
  nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
-                 const int                       kernel_type,
-                 const int                       ewald_excl,
+                 const Nbnxm::KernelSetup       &kernelSetup,
                   nbnxn_atomdata_t               *nbat,
                   const interaction_const_t      &ic,
                   rvec                           *shiftVectors,
@@ -160,7 +158,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
      }
      else
      {
-        if (ewald_excl == ewaldexclTable)
+        if (kernelSetup.ewaldExclusionType == Nbnxm::EwaldExclusionType::Table)
          {
              if (ic.rcoulomb == ic.rvdw)
              {
@@ -222,7 +220,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
          {
              vdwkt = vdwktLJEWALDCOMBLB;
              /* At setup we (should have) selected the C reference kernel */
-            GMX_RELEASE_ASSERT(kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+            GMX_RELEASE_ASSERT(kernelSetup.kernelType == Nbnxm::KernelType::Cpu4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
          }
      }
      else
@@ -264,9 +262,9 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
          if (!(forceFlags & GMX_FORCE_ENERGY))
          {
              /* Don't calculate energies */
-            switch (kernel_type)
+            switch (kernelSetup.kernelType)
              {
-                case nbnxnk4x4_PlainC:
+                case Nbnxm::KernelType::Cpu4x4_PlainC:
                      nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                             &ic,
                                                             shiftVectors,
@@ -274,7 +272,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                                                             fshift_p);
                      break;
  #ifdef GMX_NBNXN_SIMD_2XNN
-                case nbnxnk4xN_SIMD_2xNN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                      nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
                                                                   &ic,
                                                                   shiftVectors,
@@ -283,7 +281,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                      break;
  #endif
  #ifdef GMX_NBNXN_SIMD_4XN
-                case nbnxnk4xN_SIMD_4xN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                      nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
                                                                  &ic,
                                                                  shiftVectors,
@@ -301,9 +299,9 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
              out->Vvdw[0] = 0;
              out->Vc[0]   = 0;
  
-            switch (kernel_type)
+            switch (kernelSetup.kernelType)
              {
-                case nbnxnk4x4_PlainC:
+                case Nbnxm::KernelType::Cpu4x4_PlainC:
                      nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                           &ic,
                                                           shiftVectors,
@@ -313,7 +311,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                                                           out->Vc.data());
                      break;
  #ifdef GMX_NBNXN_SIMD_2XNN
-                case nbnxnk4xN_SIMD_2xNN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                      nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
                                                                 &ic,
                                                                 shiftVectors,
@@ -324,7 +322,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                      break;
  #endif
  #ifdef GMX_NBNXN_SIMD_4XN
-                case nbnxnk4xN_SIMD_4xN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                      nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
                                                                &ic,
                                                                shiftVectors,
@@ -345,9 +343,9 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
  
              int unrollj = 0;
  
-            switch (kernel_type)
+            switch (kernelSetup.kernelType)
              {
-                case nbnxnk4x4_PlainC:
+                case Nbnxm::KernelType::Cpu4x4_PlainC:
                      unrollj = c_nbnxnCpuIClusterSize;
                      nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
                                                              &ic,
@@ -358,7 +356,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                                                              out->Vc.data());
                      break;
  #ifdef GMX_NBNXN_SIMD_2XNN
-                case nbnxnk4xN_SIMD_2xNN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                      unrollj = GMX_SIMD_REAL_WIDTH/2;
                      nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
                                                                    &ic,
@@ -370,7 +368,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                      break;
  #endif
  #ifdef GMX_NBNXN_SIMD_4XN
-                case nbnxnk4xN_SIMD_4xN:
+                case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                      unrollj = GMX_SIMD_REAL_WIDTH;
                      nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
                                                                   &ic,
@@ -385,7 +383,7 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
                      GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
              }
  
-            if (kernel_type != nbnxnk4x4_PlainC)
+            if (kernelSetup.kernelType != Nbnxm::KernelType::Cpu4x4_PlainC)
              {
                  switch (unrollj)
                  {
@@ -423,7 +421,7 @@ static void accountFlops(t_nrnb                           *nrnb,
                           const interaction_const_t        &ic,
                           const int                         forceFlags)
  {
-    const nbnxn_pairlist_set_t &pairlistSet     = nbv.pairlistSets[iLocality];
+    const nbnxn_pairlist_set_t &pairlistSet     = nbv.pairlistSet(iLocality);
      const bool                  usingGpuKernels = nbv.useGpu();
  
      int enr_nbnxn_kernel_ljc;
@@ -431,7 +429,7 @@ static void accountFlops(t_nrnb                           *nrnb,
      {
          enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
      }
-    else if ((!usingGpuKernels && nbv.ewaldExclusionType_ == ewaldexclAnalytical) ||
+    else if ((!usingGpuKernels && nbv.kernelSetup().ewaldExclusionType == Nbnxm::EwaldExclusionType::Analytical) ||
               (usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
      {
          enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
@@ -486,16 +484,15 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
                           gmx_enerdata_t            *enerd,
                           t_nrnb                    *nrnb)
  {
-    const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSets[iLocality];
+    const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSet(iLocality);
  
-    switch (nbv->kernelType_)
+    switch (nbv->kernelSetup().kernelType)
      {
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
+        case Nbnxm::KernelType::Cpu4x4_PlainC:
+        case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
              nbnxn_kernel_cpu(pairlistSet,
-                             nbv->kernelType_,
-                             nbv->ewaldExclusionType_,
+                             nbv->kernelSetup(),
                               nbv->nbat,
                               ic,
                               fr->shift_vec,
@@ -508,11 +505,11 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
                               enerd->grpp.ener[egLJSR]);
              break;
  
-        case nbnxnk8x8x8_GPU:
+        case Nbnxm::KernelType::Gpu8x8x8:
              Nbnxm::gpu_launch_kernel(nbv->gpu_nbv, forceFlags, iLocality);
              break;
  
-        case nbnxnk8x8x8_PlainC:
+        case Nbnxm::KernelType::Cpu8x8x8_PlainC:
              nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
                                   nbv->nbat, &ic,
                                   fr->shift_vec,
diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h

index 908c6bdc68a4396fc158ad7e35303787bee073b7..640d782b931d529ef9e092c978ea19fa4ff2ba4b 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.h
+++ b/src/gromacs/nbnxm/nbnxm.h
@@ -118,6 +118,7 @@ struct gmx_hw_info_t;
  struct gmx_mtop_t;
  struct interaction_const_t;
  struct nbnxn_pairlist_set_t;
+struct nonbonded_verlet_t;
  struct t_blocka;
  struct t_commrec;
  struct t_nrnb;
@@ -130,87 +131,173 @@ class MDLogger;
  class UpdateGroupsCog;
  }
  
-//! Help pass GPU-emulation parameters with type safety.
-enum class EmulateGpuNonbonded : bool
+/*! \brief Resources that can be used to execute non-bonded kernels on */
+enum class NonbondedResource : int
  {
-    //! Do not emulate GPUs.
-    No,
-    //! Do emulate GPUs.
-    Yes
+    Cpu,
+    Gpu,
+    EmulateGpu
  };
  
+namespace Nbnxm
+{
  
  /*! \brief Nonbonded NxN kernel types: plain C, CPU SIMD, GPU, GPU emulation */
-typedef enum
+enum class KernelType : int
  {
-    nbnxnkNotSet = 0,
-    nbnxnk4x4_PlainC,
-    nbnxnk4xN_SIMD_4xN,
-    nbnxnk4xN_SIMD_2xNN,
-    nbnxnk8x8x8_GPU,
-    nbnxnk8x8x8_PlainC,
-    nbnxnkNR
-} nbnxn_kernel_type;
+    NotSet = 0,
+    Cpu4x4_PlainC,
+    Cpu4xN_Simd_4xN,
+    Cpu4xN_Simd_2xNN,
+    Gpu8x8x8,
+    Cpu8x8x8_PlainC,
+    Count
+};
  
-namespace Nbnxm
+/*! \brief Ewald exclusion types */
+enum class EwaldExclusionType : int
+{
+    NotSet = 0,
+    Table,
+    Analytical,
+    DecidedByGpuModule
+};
+
+/* \brief The non-bonded setup, also affects the pairlist construction kernel */
+struct KernelSetup
  {
+    //! The non-bonded type, also affects the pairlist construction kernel
+    KernelType         kernelType = KernelType::NotSet;
+    //! Ewald exclusion computation handling type, currently only used for CPU
+    EwaldExclusionType ewaldExclusionType = EwaldExclusionType::NotSet;
+};
  
  /*! \brief Return a string identifying the kernel type.
   *
- * \param [in] kernel_type   nonbonded kernel types, takes values from the nbnxn_kernel_type enum
- * \returns                  a string identifying the kernel corresponding to the type passed as argument
+ * \param [in] kernelType   nonbonded kernel type, takes values from the nbnxn_kernel_type enum
+ * \returns                 a string identifying the kernel corresponding to the type passed as argument
   */
-const char *lookup_kernel_name(int kernel_type);
+const char *lookup_kernel_name(Nbnxm::KernelType kernelType);
  
  } // namespace Nbnxm
  
-/*! \brief Ewald exclusion types */
-enum {
-    ewaldexclTable, ewaldexclAnalytical
-};
-
  /*! \brief Flag to tell the nonbonded kernels whether to clear the force output buffers */
  enum {
      enbvClearFNo, enbvClearFYes
  };
  
+/*! \brief Generates a pair-list for the given locality.
+ *
+ * With perturbed particles, also a group scheme style nbl_fep list is made.
+ */
+void nbnxn_make_pairlist(nonbonded_verlet_t         *nbv,
+                         Nbnxm::InteractionLocality  iLocality,
+                         nbnxn_pairlist_set_t       *pairlistSet,
+                         const t_blocka             *excl,
+                         int64_t                     step,
+                         t_nrnb                     *nrnb);
+
+/*! \brief Prune all pair-lists with given locality (currently CPU only)
+ *
+ * For all pair-lists with given locality, takes the outer list and prunes out
+ * pairs beyond the pairlist inner radius and writes the result to a list that is
+ * to be consumed by the non-bonded kernel.
+ */
+void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *pairlistSet,
+                              Nbnxm::KernelType       kernelType,
+                              const nbnxn_atomdata_t *nbat,
+                              const rvec             *shift_vec);
+
  /*! \libinternal
   *  \brief Top-level non-bonded data structure for the Verlet-type cut-off scheme. */
  struct nonbonded_verlet_t
  {
-    //! Returns whether a GPU is used for the non-bonded calculations
-    bool useGpu() const
-    {
-        return kernelType_ == nbnxnk8x8x8_GPU;
-    }
-
-    //! Returns whether a GPU is emulated for the non-bonded calculations
-    bool emulateGpu() const
-    {
-        return kernelType_ == nbnxnk8x8x8_PlainC;
-    }
-
-    //! Return whether the pairlist is of simple, CPU type
-    bool pairlistIsSimple() const
-    {
-        return !useGpu() && !emulateGpu();
-    }
-
-    std::unique_ptr<NbnxnListParameters>                                        listParams; /**< Parameters for the search and list pruning setup */
-    std::unique_ptr<nbnxn_search>                                               nbs;        /**< n vs n atom pair searching data       */
-    int                                                                         ngrp;       /**< number of interaction groups          */
-    //! Local and non-local pairlist sets
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, nbnxn_pairlist_set_t>     pairlistSets;
-    //! Atom data
-    nbnxn_atomdata_t                                                           *nbat;
-
-    //! Non-bonded kernel - see enum above
-    int                  kernelType_;
-    //! Ewald exclusion - see enum above
-    int                  ewaldExclusionType_;
-
-    gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
-    int                  min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels    */
+    public:
+        //! Returns whether a GPU is use for the non-bonded calculations
+        bool useGpu() const
+        {
+            return kernelSetup_.kernelType == Nbnxm::KernelType::Gpu8x8x8;
+        }
+
+        //! Returns whether a GPU is emulated for the non-bonded calculations
+        bool emulateGpu() const
+        {
+            return kernelSetup_.kernelType == Nbnxm::KernelType::Cpu8x8x8_PlainC;
+        }
+
+        //! Return whether the pairlist is of simple, CPU type
+        bool pairlistIsSimple() const
+        {
+            return !useGpu() && !emulateGpu();
+        }
+
+        //! Initialize the pair list sets, TODO this should be private
+        void initPairlistSets(bool haveMultipleDomains);
+
+        //! Returns a reference to the pairlist set for the requested locality
+        const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
+        {
+            GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
+                       "The requested locality should be in the list");
+            return pairlistSets_[static_cast<int>(iLocality)];
+        }
+
+        //! Constructs the pairlist for the given locality
+        void constructPairlist(Nbnxm::InteractionLocality  iLocality,
+                               const t_blocka             *excl,
+                               int64_t                     step,
+                               t_nrnb                     *nrnb)
+        {
+            nbnxn_make_pairlist(this, iLocality, &pairlistSets_[static_cast<int>(iLocality)], excl, step, nrnb);
+        }
+
+        //! Dispatches the dynamic pruning kernel for the given locality
+        void dispatchPruneKernel(Nbnxm::InteractionLocality  iLocality,
+                                 const rvec                 *shift_vec)
+        {
+            GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
+                       "The requested locality should be in the list");
+            NbnxnDispatchPruneKernel(&pairlistSets_[static_cast<int>(iLocality)],
+                                     kernelSetup_.kernelType, nbat, shift_vec);
+        }
+
+        //! Return the kernel setup
+        const Nbnxm::KernelSetup &kernelSetup() const
+        {
+            return kernelSetup_;
+        }
+
+        //! Sets the kernel setup, TODO: make private
+        void setKernelSetup(const Nbnxm::KernelSetup &kernelSetup)
+        {
+            kernelSetup_ = kernelSetup;
+        }
+
+        //! Returns the a list of free-energy pairlists for the given locality
+        const gmx::ArrayRef<t_nblist const * const>
+        freeEnergyPairlistSet(Nbnxm::InteractionLocality iLocality) const
+        {
+            return pairlistSet(iLocality).nbl_fep;
+        }
+
+        //! Parameters for the search and list pruning setup
+        std::unique_ptr<NbnxnListParameters>  listParams;
+        //! Working data for constructing the pairlists
+        std::unique_ptr<nbnxn_search>         nbs;
+    private:
+        //! Local and, optionally, non-local pairlist sets
+        std::vector<nbnxn_pairlist_set_t>     pairlistSets_;
+    public:
+        //! Atom data
+        nbnxn_atomdata_t                     *nbat;
+
+    private:
+        //! The non-bonded setup, also affects the pairlist construction kernel
+        Nbnxm::KernelSetup   kernelSetup_;
+    public:
+
+        gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
+        int                  min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels    */
  };
  
  namespace Nbnxm
@@ -276,16 +363,6 @@ void nbnxn_set_atomorder(nbnxn_search_t nbs);
  /*! \brief Returns the index position of the atoms on the pairlist search grid */
  gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
  
-/*! \brief Generates a pair-list for the given locality.
- *
- * With perturbed particles, also a group scheme style nbl_fep list is made.
- */
-void nbnxn_make_pairlist(nonbonded_verlet_t         *nbv,
-                         Nbnxm::InteractionLocality  iLocality,
-                         const t_blocka             *excl,
-                         int64_t                     step,
-                         t_nrnb                     *nrnb);
-
  /*! \brief Returns the number of steps performed with the current pair list */
  int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t   &nbv,
                                Nbnxm::InteractionLocality  ilocality,
@@ -296,16 +373,6 @@ bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t   &nbv,
                                         Nbnxm::InteractionLocality  ilocality,
                                         int64_t                     step);
  
-/*! \brief Prune all pair-lists with given locality (currently CPU only)
- *
- * For all pair-lists with given locality, takes the outer list and prunes out
- * pairs beyond the pairlist inner radius and writes the result to a list that is
- * to be consumed by the non-bonded kernel.
- */
-void NbnxnDispatchPruneKernel(nonbonded_verlet_t         *nbv,
-                              Nbnxm::InteractionLocality  iLocality,
-                              const rvec                 *shift_vec);
-
  /*! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU */
  void NbnxnDispatchKernel(nonbonded_verlet_t         *nbv,
                           Nbnxm::InteractionLocality  iLocality,
diff --git a/src/gromacs/nbnxm/nbnxm_geometry.cpp b/src/gromacs/nbnxm/nbnxm_geometry.cpp

index 4c55452e9caf49c95e83aaab8048d8b68407c6cd..c164034c790106afd67036eeb5b3c510914cef56 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_geometry.cpp
+++ b/src/gromacs/nbnxm/nbnxm_geometry.cpp
@@ -39,85 +39,9 @@
  
  #include "gromacs/nbnxm/nbnxm.h"
  #include "gromacs/nbnxm/pairlist.h"
-#include "gromacs/simd/simd.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/real.h"
  
-bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
-{
-    if (nb_kernel_type == nbnxnkNotSet)
-    {
-        gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
-    }
-
-    switch (nb_kernel_type)
-    {
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            return false;
-
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
-            return true;
-
-        default:
-            gmx_incons("Invalid nonbonded kernel type passed!");
-            return false;
-    }
-}
-
-int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type)
-{
-    switch (nb_kernel_type)
-    {
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
-            return c_nbnxnCpuIClusterSize;
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            /* The cluster size for super/sub lists is only set here.
-             * Any value should work for the pair-search and atomdata code.
-             * The kernels, of course, might require a particular value.
-             */
-            return c_nbnxnGpuClusterSize;
-        default:
-            gmx_incons("unknown kernel type");
-    }
-}
-
-int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type)
-{
-    int nbnxn_simd_width = 0;
-    int cj_size          = 0;
-
-#if GMX_SIMD
-    nbnxn_simd_width = GMX_SIMD_REAL_WIDTH;
-#endif
-
-    switch (nb_kernel_type)
-    {
-        case nbnxnk4x4_PlainC:
-            cj_size = c_nbnxnCpuIClusterSize;
-            break;
-        case nbnxnk4xN_SIMD_4xN:
-            cj_size = nbnxn_simd_width;
-            break;
-        case nbnxnk4xN_SIMD_2xNN:
-            cj_size = nbnxn_simd_width/2;
-            break;
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            cj_size = nbnxn_kernel_to_cluster_i_size(nb_kernel_type);
-            break;
-        default:
-            gmx_incons("unknown kernel type");
-    }
-
-    return cj_size;
-}
-
  /* Clusters at the cut-off only increase rlist by 60% of their size */
  static constexpr real c_nbnxnRlistIncreaseOutsideFactor = 0.6;
  
diff --git a/src/gromacs/nbnxm/nbnxm_geometry.h b/src/gromacs/nbnxm/nbnxm_geometry.h

index c683ee2b1fef79875b7ca98981442481ba5cf2e3..e364ab706008e3dcf06f60802b2b0f12e7fca2a3 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_geometry.h
+++ b/src/gromacs/nbnxm/nbnxm_geometry.h
@@ -37,6 +37,9 @@
  #define GMX_NBNXM_NBNXM_GEOMETRY_H
  
  #include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/simd/simd.h"
  #include "gromacs/utility/fatalerror.h"
  
  /* Returns the base-2 log of n.
@@ -59,14 +62,51 @@ static inline int get_2log(int n)
      return log2;
  }
  
+namespace Nbnxm
+{
+
+/* The nbnxn i-cluster size in atoms for each nbnxn kernel type */
+static constexpr gmx::EnumerationArray<KernelType, int> IClusterSizePerKernelType =
+{
+    0,
+    c_nbnxnCpuIClusterSize,
+    c_nbnxnCpuIClusterSize,
+    c_nbnxnCpuIClusterSize,
+    c_nbnxnGpuClusterSize,
+    c_nbnxnGpuClusterSize
+};
+
+/* The nbnxn j-cluster size in atoms for each nbnxn kernel type */
+static constexpr gmx::EnumerationArray<KernelType, int> JClusterSizePerKernelType =
+{
+    0,
+    c_nbnxnCpuIClusterSize,
+#if GMX_SIMD
+    GMX_SIMD_REAL_WIDTH,
+    GMX_SIMD_REAL_WIDTH/2,
+#else
+    0,
+    0,
+#endif
+    c_nbnxnGpuClusterSize,
+    c_nbnxnGpuClusterSize
+};
+
  /* Returns whether the pair-list corresponding to nb_kernel_type is simple */
-bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
+static inline bool kernelTypeUsesSimplePairlist(const KernelType kernelType)
+{
+    return (kernelType == KernelType::Cpu4x4_PlainC ||
+            kernelType == KernelType::Cpu4xN_Simd_4xN ||
+            kernelType == KernelType::Cpu4xN_Simd_2xNN);
+}
  
-/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
-int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type);
+static inline bool kernelTypeIsSimd(const KernelType kernelType)
+{
+    return (kernelType == KernelType::Cpu4xN_Simd_4xN ||
+            kernelType == KernelType::Cpu4xN_Simd_2xNN);
+}
  
-/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
-int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type);
+} // namespace Nbnxm
  
  /* Returns the effective list radius of the pair-list
   *
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp

index 44a3d632ca15c9c76eaabdc2e0692737a583a85e..eef7c9a3eddefaf39ceafa2649942b84e2c75561 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -54,6 +54,7 @@
  #include "gromacs/nbnxm/nbnxm.h"
  #include "gromacs/nbnxm/nbnxm_geometry.h"
  #include "gromacs/nbnxm/nbnxm_simd.h"
+#include "gromacs/nbnxm/pairlist.h"
  #include "gromacs/nbnxm/pairlist_tuning.h"
  #include "gromacs/nbnxm/pairlistset.h"
  #include "gromacs/simd/simd.h"
@@ -87,21 +88,24 @@ static gmx_bool nbnxn_simd_supported(const gmx::MDLogger &mdlog,
  }
  
  /*! \brief Returns the most suitable CPU kernel type and Ewald handling */
-static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
-                                  int                            *kernel_type,
-                                  int                            *ewald_excl,
-                                  const gmx_hw_info_t gmx_unused &hardwareInfo)
+static KernelSetup
+pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
+                      const gmx_hw_info_t gmx_unused &hardwareInfo)
  {
-    *kernel_type = nbnxnk4x4_PlainC;
-    *ewald_excl  = ewaldexclTable;
+    KernelSetup kernelSetup;
  
-#if GMX_SIMD
+    if (!GMX_SIMD)
+    {
+        kernelSetup.kernelType         = KernelType::Cpu4x4_PlainC;
+        kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
+    }
+    else
      {
  #ifdef GMX_NBNXN_SIMD_4XN
-        *kernel_type = nbnxnk4xN_SIMD_4xN;
+        kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
  #endif
  #ifdef GMX_NBNXN_SIMD_2XNN
-        *kernel_type = nbnxnk4xN_SIMD_2xNN;
+        kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
  #endif
  
  #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
@@ -121,22 +125,21 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
           * use of HT, use 4x8 to avoid a potential performance hit.
           * On Intel Haswell 4x8 is always faster.
           */
-        *kernel_type = nbnxnk4xN_SIMD_4xN;
+        kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
  
-#if !GMX_SIMD_HAVE_FMA
-        if (EEL_PME_EWALD(ir->coulombtype) ||
-            EVDW_PME(ir->vdwtype))
+        if (!GMX_SIMD_HAVE_FMA && (EEL_PME_EWALD(ir->coulombtype) ||
+                                   EVDW_PME(ir->vdwtype)))
          {
              /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
               * There are enough instructions to make 2x(4+4) efficient.
               */
-            *kernel_type = nbnxnk4xN_SIMD_2xNN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
          }
-#endif
+
          if (hardwareInfo.haveAmdZenCpu)
          {
              /* One 256-bit FMA per cycle makes 2xNN faster */
-            *kernel_type = nbnxnk4xN_SIMD_2xNN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
          }
  #endif      /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
  
@@ -144,7 +147,7 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
          if (getenv("GMX_NBNXN_SIMD_4XN") != nullptr)
          {
  #ifdef GMX_NBNXN_SIMD_4XN
-            *kernel_type = nbnxnk4xN_SIMD_4xN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
  #else
              gmx_fatal(FARGS, "SIMD 4xN kernels requested, but GROMACS has been compiled without support for these kernels");
  #endif
@@ -152,7 +155,7 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
          if (getenv("GMX_NBNXN_SIMD_2XNN") != nullptr)
          {
  #ifdef GMX_NBNXN_SIMD_2XNN
-            *kernel_type = nbnxnk4xN_SIMD_2xNN;
+            kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
  #else
              gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but GROMACS has been compiled without support for these kernels");
  #endif
@@ -164,53 +167,58 @@ static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused    *ir,
           * will probably always be faster for a SIMD width of 8 or more.
           * With FMA analytical is sometimes faster for a width if 4 as well.
           * In single precision, this is faster on Bulldozer.
-         */
-#if GMX_SIMD_REAL_WIDTH >= 8 || \
-        (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)
-        /* On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
+         * On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
           * of single or double precision and 128 or 256-bit AVX2.
           */
-        if (!hardwareInfo.haveAmdZenCpu)
+        if (
+#if GMX_SIMD
+            (GMX_SIMD_REAL_WIDTH >= 8 ||
+             (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)) &&
+#endif
+            !hardwareInfo.haveAmdZenCpu)
          {
-            *ewald_excl = ewaldexclAnalytical;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
+        }
+        else
+        {
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
          }
-#endif
          if (getenv("GMX_NBNXN_EWALD_TABLE") != nullptr)
          {
-            *ewald_excl = ewaldexclTable;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
          }
          if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != nullptr)
          {
-            *ewald_excl = ewaldexclAnalytical;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
          }
  
      }
-#endif  // GMX_SIMD
+
+    return kernelSetup;
  }
  
-const char *lookup_kernel_name(int kernel_type)
+const char *lookup_kernel_name(const KernelType kernelType)
  {
      const char *returnvalue = nullptr;
-    switch (kernel_type)
+    switch (kernelType)
      {
-        case nbnxnkNotSet:
+        case KernelType::NotSet:
              returnvalue = "not set";
              break;
-        case nbnxnk4x4_PlainC:
+        case KernelType::Cpu4x4_PlainC:
              returnvalue = "plain C";
              break;
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
+        case KernelType::Cpu4xN_Simd_4xN:
+        case KernelType::Cpu4xN_Simd_2xNN:
  #if GMX_SIMD
              returnvalue = "SIMD";
  #else  // GMX_SIMD
              returnvalue = "not available";
  #endif // GMX_SIMD
              break;
-        case nbnxnk8x8x8_GPU: returnvalue    = "GPU"; break;
-        case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
+        case KernelType::Gpu8x8x8: returnvalue        = "GPU"; break;
+        case KernelType::Cpu8x8x8_PlainC: returnvalue = "plain C"; break;
  
-        case nbnxnkNR:
          default:
              gmx_fatal(FARGS, "Illegal kernel type selected");
      }
@@ -218,45 +226,42 @@ const char *lookup_kernel_name(int kernel_type)
  };
  
  /*! \brief Returns the most suitable kernel type and Ewald handling */
-static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
-                              gmx_bool             use_simd_kernels,
-                              const gmx_hw_info_t &hardwareInfo,
-                              bool                 useGpu,
-                              EmulateGpuNonbonded  emulateGpu,
-                              const t_inputrec    *ir,
-                              int                 *kernel_type,
-                              int                 *ewald_excl,
-                              gmx_bool             bDoNonbonded)
+static KernelSetup
+pick_nbnxn_kernel(const gmx::MDLogger     &mdlog,
+                  gmx_bool                 use_simd_kernels,
+                  const gmx_hw_info_t     &hardwareInfo,
+                  const NonbondedResource &nonbondedResource,
+                  const t_inputrec        *ir,
+                  gmx_bool                 bDoNonbonded)
  {
-    GMX_RELEASE_ASSERT(kernel_type, "Need a valid kernel_type pointer");
+    KernelSetup kernelSetup;
  
-    *kernel_type = nbnxnkNotSet;
-    *ewald_excl  = ewaldexclTable;
-
-    if (emulateGpu == EmulateGpuNonbonded::Yes)
+    if (nonbondedResource == NonbondedResource::EmulateGpu)
      {
-        *kernel_type = nbnxnk8x8x8_PlainC;
+        kernelSetup.kernelType         = KernelType::Cpu8x8x8_PlainC;
+        kernelSetup.ewaldExclusionType = EwaldExclusionType::DecidedByGpuModule;
  
          if (bDoNonbonded)
          {
              GMX_LOG(mdlog.warning).asParagraph().appendText("Emulating a GPU run on the CPU (slow)");
          }
      }
-    else if (useGpu)
+    else if (nonbondedResource == NonbondedResource::Gpu)
      {
-        *kernel_type = nbnxnk8x8x8_GPU;
+        kernelSetup.kernelType         = KernelType::Gpu8x8x8;
+        kernelSetup.ewaldExclusionType = EwaldExclusionType::DecidedByGpuModule;
      }
-
-    if (*kernel_type == nbnxnkNotSet)
+    else
      {
          if (use_simd_kernels &&
              nbnxn_simd_supported(mdlog, ir))
          {
-            pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl, hardwareInfo);
+            kernelSetup = pick_nbnxn_kernel_cpu(ir, hardwareInfo);
          }
          else
          {
-            *kernel_type = nbnxnk4x4_PlainC;
+            kernelSetup.kernelType         = KernelType::Cpu4x4_PlainC;
+            kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
          }
      }
  
@@ -264,21 +269,41 @@ static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
      {
          GMX_LOG(mdlog.info).asParagraph().appendTextFormatted(
                  "Using %s %dx%d nonbonded short-range kernels",
-                lookup_kernel_name(*kernel_type),
-                nbnxn_kernel_to_cluster_i_size(*kernel_type),
-                nbnxn_kernel_to_cluster_j_size(*kernel_type));
+                lookup_kernel_name(kernelSetup.kernelType),
+                IClusterSizePerKernelType[kernelSetup.kernelType],
+                JClusterSizePerKernelType[kernelSetup.kernelType]);
  
-        if (nbnxnk4x4_PlainC == *kernel_type ||
-            nbnxnk8x8x8_PlainC == *kernel_type)
+        if (KernelType::Cpu4x4_PlainC == kernelSetup.kernelType ||
+            KernelType::Cpu8x8x8_PlainC == kernelSetup.kernelType)
          {
              GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
                      "WARNING: Using the slow %s kernels. This should\n"
                      "not happen during routine usage on supported platforms.",
-                    lookup_kernel_name(*kernel_type));
+                    lookup_kernel_name(kernelSetup.kernelType));
          }
      }
+
+    GMX_RELEASE_ASSERT(kernelSetup.kernelType != KernelType::NotSet &&
+                       kernelSetup.ewaldExclusionType != EwaldExclusionType::NotSet,
+                       "All kernel setup parameters should be set here");
+
+    return kernelSetup;
  }
  
+} // namespace Nbnxm
+
+void nonbonded_verlet_t::initPairlistSets(const bool haveMultipleDomains)
+{
+    pairlistSets_.emplace_back(*listParams);
+    if (haveMultipleDomains)
+    {
+        pairlistSets_.emplace_back(*listParams);
+    }
+}
+
+namespace Nbnxm
+{
+
  void init_nb_verlet(const gmx::MDLogger     &mdlog,
                      nonbonded_verlet_t     **nb_verlet,
                      gmx_bool                 bFEP_NonBonded,
@@ -290,38 +315,42 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
                      const gmx_mtop_t        *mtop,
                      matrix                   box)
  {
-    nonbonded_verlet_t        *nbv        = new nonbonded_verlet_t();
+    nonbonded_verlet_t *nbv        = new nonbonded_verlet_t();
  
-    const EmulateGpuNonbonded  emulateGpu =
-        ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-    bool                       useGpu     = deviceInfo != nullptr;
+    const bool          emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
+    const bool          useGpu     = deviceInfo != nullptr;
  
-    GMX_RELEASE_ASSERT(!(emulateGpu == EmulateGpuNonbonded::Yes && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
+    GMX_RELEASE_ASSERT(!(emulateGpu && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
+
+    NonbondedResource nonbondedResource;
+    if (useGpu)
+    {
+        nonbondedResource = NonbondedResource::Gpu;
+    }
+    else if (emulateGpu)
+    {
+        nonbondedResource = NonbondedResource::EmulateGpu;
+    }
+    else
+    {
+        nonbondedResource = NonbondedResource::Cpu;
+    }
  
      nbv->nbs             = nullptr;
  
-    pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
-                      useGpu, emulateGpu, ir,
-                      &nbv->kernelType_,
-                      &nbv->ewaldExclusionType_,
-                      fr->bNonbonded);
+    nbv->setKernelSetup(pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
+                                          nonbondedResource, ir,
+                                          fr->bNonbonded));
  
      const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
  
-    const bool pairlistIsSimple = nbv->pairlistIsSimple();
-    for (nbnxn_pairlist_set_t &pairlistSet : nbv->pairlistSets)
-    {
-        // TODO Change this to a constructor
-        /* The second parameter tells whether lists should be combined,
-         * this is currently only and always done for GPU lists.
-         */
-        nbnxn_init_pairlist_set(&pairlistSet, pairlistIsSimple, !pairlistIsSimple);
-    }
+    nbv->listParams = std::make_unique<NbnxnListParameters>(nbv->kernelSetup().kernelType,
+                                                            ir->rlist);
+    nbv->initPairlistSets(haveMultipleDomains);
  
      nbv->min_ci_balanced = 0;
  
-    nbv->listParams = std::make_unique<NbnxnListParameters>(ir->rlist);
-    setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->kernelType_, fr->ic,
+    setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic,
                                  nbv->listParams.get());
  
      nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
@@ -369,11 +398,11 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
      }
      nbnxn_atomdata_init(mdlog,
                          nbv->nbat,
-                        nbv->kernelType_,
+                        nbv->kernelSetup().kernelType,
                          enbnxninitcombrule,
                          fr->ntype, fr->nbfp,
                          mimimumNumEnergyGroupNonbonded,
-                        pairlistIsSimple ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+                        nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1);
  
      if (useGpu)
      {
diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp

index 9a4b667756f2dc8ffe7a1bd8c604d90e19d718c9..06293a02619b8176166a21b18fbc21691e7dbe82 100644 (file)
--- a/src/gromacs/nbnxm/pairlist.cpp
+++ b/src/gromacs/nbnxm/pairlist.cpp
@@ -804,13 +804,14 @@ NbnxnPairlistGpu::NbnxnPairlistGpu(gmx::PinningPolicy pinningPolicy) :
      work = new NbnxnPairlistGpuWork();
  }
  
-void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
-                             gmx_bool bSimple, gmx_bool bCombined)
+void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list)
  {
-    GMX_RELEASE_ASSERT(!bSimple || !bCombined, "Can only combine non-simple lists");
-
-    nbl_list->bSimple   = bSimple;
-    nbl_list->bCombined = bCombined;
+    nbl_list->bSimple   =
+        (nbl_list->params.pairlistType == PairlistType::Simple4x2 ||
+         nbl_list->params.pairlistType == PairlistType::Simple4x4 ||
+         nbl_list->params.pairlistType == PairlistType::Simple4x8);
+    // Currently GPU lists are always combined
+    nbl_list->bCombined = !nbl_list->bSimple;
  
      nbl_list->nnbl = gmx_omp_nthreads_get(emntNonbonded);
  
@@ -821,7 +822,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
                    nbl_list->nnbl, NBNXN_BUFFERFLAG_MAX_THREADS, NBNXN_BUFFERFLAG_MAX_THREADS);
      }
  
-    if (bSimple)
+    if (nbl_list->bSimple)
      {
          snew(nbl_list->nbl, nbl_list->nnbl);
          if (nbl_list->nnbl > 1)
@@ -833,7 +834,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
      {
          snew(nbl_list->nblGpu, nbl_list->nnbl);
      }
-    snew(nbl_list->nbl_fep, nbl_list->nnbl);
+    nbl_list->nbl_fep.resize(nbl_list->nnbl);
      /* Execute in order to avoid memory interleaving between threads */
  #pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static)
      for (int i = 0; i < nbl_list->nnbl; i++)
@@ -843,7 +844,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
              /* Allocate the nblist data structure locally on each thread
               * to optimize memory access for NUMA architectures.
               */
-            if (bSimple)
+            if (nbl_list->bSimple)
              {
                  nbl_list->nbl[i] = new NbnxnPairlistCpu();
  
@@ -2455,24 +2456,24 @@ static void icell_set_x_simple(int ci,
  static void icell_set_x(int ci,
                          real shx, real shy, real shz,
                          int stride, const real *x,
-                        int nb_kernel_type,
+                        const Nbnxm::KernelType kernelType,
                          NbnxnPairlistCpuWork *work)
  {
-    switch (nb_kernel_type)
+    switch (kernelType)
      {
  #if GMX_SIMD
  #ifdef GMX_NBNXN_SIMD_4XN
-        case nbnxnk4xN_SIMD_4xN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
              icell_set_x_simd_4xn(ci, shx, shy, shz, stride, x, work);
              break;
  #endif
  #ifdef GMX_NBNXN_SIMD_2XNN
-        case nbnxnk4xN_SIMD_2xNN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
              icell_set_x_simd_2xnn(ci, shx, shy, shz, stride, x, work);
              break;
  #endif
  #endif
-        case nbnxnk4x4_PlainC:
+        case Nbnxm::KernelType::Cpu4x4_PlainC:
              icell_set_x_simple(ci, shx, shy, shz, stride, x, &work->iClusterData);
              break;
          default:
@@ -2485,7 +2486,7 @@ static void icell_set_x(int ci,
  static void icell_set_x(int ci,
                          real shx, real shy, real shz,
                          int stride, const real *x,
-                        int gmx_unused nb_kernel_type,
+                        Nbnxm::KernelType gmx_unused kernelType,
                          NbnxnPairlistGpuWork *work)
  {
  #if !GMX_SIMD4_HAVE_REAL
@@ -3116,12 +3117,12 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
                                     const nbnxn_atomdata_t        *nbat,
                                     const real                     rlist2,
                                     const real                     rbb2,
-                                   const int                      nb_kernel_type,
+                                   const Nbnxm::KernelType        kernelType,
                                     int                           *numDistanceChecks)
  {
-    switch (nb_kernel_type)
+    switch (kernelType)
      {
-        case nbnxnk4x4_PlainC:
+        case Nbnxm::KernelType::Cpu4x4_PlainC:
              makeClusterListSimple(jGrid,
                                    nbl, ci, firstCell, lastCell,
                                    excludeSubDiagonal,
@@ -3130,7 +3131,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
                                    numDistanceChecks);
              break;
  #ifdef GMX_NBNXN_SIMD_4XN
-        case nbnxnk4xN_SIMD_4xN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
              makeClusterListSimd4xn(jGrid,
                                     nbl, ci, firstCell, lastCell,
                                     excludeSubDiagonal,
@@ -3140,7 +3141,7 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
              break;
  #endif
  #ifdef GMX_NBNXN_SIMD_2XNN
-        case nbnxnk4xN_SIMD_2xNN:
+        case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
              makeClusterListSimd2xnn(jGrid,
                                      nbl, ci, firstCell, lastCell,
                                      excludeSubDiagonal,
@@ -3149,6 +3150,8 @@ static void makeClusterListWrapper(NbnxnPairlistCpu              *nbl,
                                      numDistanceChecks);
              break;
  #endif
+        default:
+            GMX_ASSERT(false, "Unhandled kernel type");
      }
  }
  
@@ -3162,7 +3165,7 @@ static void makeClusterListWrapper(NbnxnPairlistGpu              *nbl,
                                     const nbnxn_atomdata_t        *nbat,
                                     const real                     rlist2,
                                     const real                     rbb2,
-                                   const int gmx_unused           nb_kernel_type,
+                                   Nbnxm::KernelType gmx_unused   kernelType,
                                     int                           *numDistanceChecks)
  {
      for (int cj = firstCell; cj <= lastCell; cj++)
@@ -3248,7 +3251,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
                                       const nbnxn_atomdata_t *nbat,
                                       const t_blocka &exclusions,
                                       real rlist,
-                                     int nb_kernel_type,
+                                     const Nbnxm::KernelType kernelType,
                                       int ci_block,
                                       gmx_bool bFBufferFlag,
                                       int nsubpair_max,
@@ -3283,7 +3286,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
  
      sync_work(nbl);
      GMX_ASSERT(nbl->na_ci == jGrid.na_c, "The cluster sizes in the list and grid should match");
-    nbl->na_cj = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+    nbl->na_cj = Nbnxm::JClusterSizePerKernelType[kernelType];
      na_cj_2log = get_2log(nbl->na_cj);
  
      nbl->rlist  = rlist;
@@ -3542,7 +3545,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
  
                      icell_set_x(cell0_i+ci, shx, shy, shz,
                                  nbat->xstride, nbat->x().data(),
-                                nb_kernel_type,
+                                kernelType,
                                  nbl->work);
  
                      for (int cx = cxf; cx <= cxl; cx++)
@@ -3687,7 +3690,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
                                                             excludeSubDiagonal,
                                                             nbat,
                                                             rlist2, rbb2,
-                                                           nb_kernel_type,
+                                                           kernelType,
                                                             &numDistanceChecks);
  
                                      if (bFBufferFlag)
@@ -4034,6 +4037,7 @@ static void sort_sci(NbnxnPairlistGpu *nbl)
  
  void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                           const InteractionLocality  iLocality,
+                         nbnxn_pairlist_set_t      *nbl_list,
                           const t_blocka            *excl,
                           const int64_t              step,
                           t_nrnb                    *nrnb)
@@ -4041,7 +4045,6 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
      nbnxn_search         *nbs      = nbv->nbs.get();
      nbnxn_atomdata_t     *nbat     = nbv->nbat;
      const real            rlist    = nbv->listParams->rlistOuter;
-    nbnxn_pairlist_set_t *nbl_list = &nbv->pairlistSets[iLocality];
  
      int                nsubpair_target;
      float              nsubpair_tot_est;
@@ -4170,7 +4173,7 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                          nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                   &nbs->work[th], nbat, *excl,
                                                   rlist,
-                                                 nbv->kernelType_,
+                                                 nbv->kernelSetup().kernelType,
                                                   ci_block,
                                                   nbat->bUseBufferFlags,
                                                   nsubpair_target,
@@ -4184,7 +4187,7 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                          nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                   &nbs->work[th], nbat, *excl,
                                                   rlist,
-                                                 nbv->kernelType_,
+                                                 nbv->kernelSetup().kernelType,
                                                   ci_block,
                                                   nbat->bUseBufferFlags,
                                                   nsubpair_target,
diff --git a/src/gromacs/nbnxm/pairlist.h b/src/gromacs/nbnxm/pairlist.h

index 74bf78fc8fd71bc6483d49c025ccccd31277e7a9..71d90b38830866c1e685b05cb9a958e0f0159b3f 100644 (file)
--- a/src/gromacs/nbnxm/pairlist.h
+++ b/src/gromacs/nbnxm/pairlist.h
@@ -46,6 +46,7 @@
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/bitmask.h"
  #include "gromacs/utility/defaultinitializationallocator.h"
+#include "gromacs/utility/enumerationhelpers.h"
  #include "gromacs/utility/real.h"
  
  // This file with constants is separate from this file to be able
@@ -56,6 +57,11 @@ struct NbnxnPairlistCpuWork;
  struct NbnxnPairlistGpuWork;
  struct tMPI_Atomic;
  
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
  /* Convenience type for vector with aligned memory */
  template<typename T>
  using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
@@ -64,6 +70,18 @@ using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
  template<typename T>
  using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
  
+enum class PairlistType : int
+{
+    Simple4x2,
+    Simple4x4,
+    Simple4x8,
+    Hierarchical8x8,
+    Count
+};
+
+static constexpr gmx::EnumerationArray<PairlistType, int> IClusterSizePerListType = { 4, 4, 4, 8 };
+static constexpr gmx::EnumerationArray<PairlistType, int> JClusterSizePerListType = { 2, 4, 8, 8 };
+
  /*! \cond INTERNAL */
  
  /*! \brief The setup for generating and pruning the nbnxn pair list.
@@ -74,20 +92,15 @@ struct NbnxnListParameters
  {
      /*! \brief Constructor producing a struct with dynamic pruning disabled
       */
-    NbnxnListParameters(real rlist) :
-        useDynamicPruning(false),
-        nstlistPrune(-1),
-        rlistOuter(rlist),
-        rlistInner(rlist),
-        numRollingParts(1)
-    {
-    }
-
-    bool useDynamicPruning; //!< Are we using dynamic pair-list pruning
-    int  nstlistPrune;      //!< Pair-list dynamic pruning interval
-    real rlistOuter;        //!< Cut-off of the larger, outer pair-list
-    real rlistInner;        //!< Cut-off of the smaller, inner pair-list
-    int  numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+    NbnxnListParameters(Nbnxm::KernelType kernelType,
+                        real              rlist);
+
+    PairlistType pairlistType;      //!< The type of cluster-pair list
+    bool         useDynamicPruning; //!< Are we using dynamic pair-list pruning
+    int          nstlistPrune;      //!< Pair-list dynamic pruning interval
+    real         rlistOuter;        //!< Cut-off of the larger, outer pair-list
+    real         rlistInner;        //!< Cut-off of the smaller, inner pair-list
+    int          numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
  };
  
  /*! \endcond */
@@ -292,18 +305,23 @@ struct NbnxnPairlistGpu
  
  struct nbnxn_pairlist_set_t
  {
-    int                nnbl;                  /* number of lists */
-    NbnxnPairlistCpu **nbl;                   /* lists for CPU */
-    NbnxnPairlistCpu **nbl_work;              /* work space for rebalancing lists */
-    NbnxnPairlistGpu **nblGpu;                /* lists for GPU */
-    gmx_bool           bCombined;             /* TRUE if lists get combined into one (the 1st) */
-    gmx_bool           bSimple;               /* TRUE if the list of of type "simple"
+    nbnxn_pairlist_set_t(const NbnxnListParameters &listParams);
+
+    int                         nnbl;         /* number of lists */
+    NbnxnPairlistCpu          **nbl;          /* lists for CPU */
+    NbnxnPairlistCpu          **nbl_work;     /* work space for rebalancing lists */
+    NbnxnPairlistGpu          **nblGpu;       /* lists for GPU */
+    const NbnxnListParameters  &params;       /* Pairlist parameters desribing setup and ranges */
+    gmx_bool                    bCombined;    /* TRUE if lists get combined into one (the 1st) */
+    gmx_bool                    bSimple;      /* TRUE if the list of of type "simple"
                                                   (na_sc=na_s, no super-clusters used) */
-    int                natpair_ljq;           /* Total number of atom pairs for LJ+Q kernel */
-    int                natpair_lj;            /* Total number of atom pairs for LJ kernel   */
-    int                natpair_q;             /* Total number of atom pairs for Q kernel    */
-    t_nblist         **nbl_fep;               /* List of free-energy atom pair interactions */
-    int64_t            outerListCreationStep; /* Step at which the outer list was created */
+
+    /* Counts for debug printing */
+    int                     natpair_ljq;           /* Total number of atom pairs for LJ+Q kernel */
+    int                     natpair_lj;            /* Total number of atom pairs for LJ kernel   */
+    int                     natpair_q;             /* Total number of atom pairs for Q kernel    */
+    std::vector<t_nblist *> nbl_fep;               /* List of free-energy atom pair interactions */
+    int64_t                 outerListCreationStep; /* Step at which the outer list was created */
  };
  
  enum {
@@ -315,12 +333,12 @@ struct nbnxn_atomdata_output_t
  {
      /* Constructor
       *
-     * \param[in] nb_kernel_type          Type of non-bonded kernel
+     * \param[in] kernelType              Type of non-bonded kernel
       * \param[in] numEnergyGroups         The number of energy groups
       * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
       * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
       */
-    nbnxn_atomdata_output_t(int                nb_kernel_type,
+    nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
                              int                numEnergyGroups,
                              int                simdEnergyBUfferStride,
                              gmx::PinningPolicy pinningPolicy);
diff --git a/src/gromacs/nbnxm/pairlist_tuning.cpp b/src/gromacs/nbnxm/pairlist_tuning.cpp

index a8924efefc1656d7845d27c03a2cd48c55c73015..d3ee3bc2b31f4c37384d24b2cbfa30c0c389cd72 100644 (file)
--- a/src/gromacs/nbnxm/pairlist_tuning.cpp
+++ b/src/gromacs/nbnxm/pairlist_tuning.cpp
@@ -483,7 +483,6 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
                                   const t_inputrec          *ir,
                                   const gmx_mtop_t          *mtop,
                                   matrix                     box,
-                                 int                        nbnxnKernelType,
                                   const interaction_const_t *ic,
                                   NbnxnListParameters       *listParams)
  {
@@ -492,10 +491,14 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
      /* Initialize the parameters to no dynamic list pruning */
      listParams->useDynamicPruning = false;
  
-    const VerletbufListSetup ls   = verletbufGetListSetup(nbnxnKernelType);
+    const VerletbufListSetup ls   =
+    {
+        IClusterSizePerListType[listParams->pairlistType],
+        JClusterSizePerListType[listParams->pairlistType]
+    };
  
      /* Currently emulation mode does not support dual pair-lists */
-    const bool useGpu             = (nbnxnKernelType == nbnxnk8x8x8_GPU);
+    const bool useGpu             = (listParams->pairlistType == PairlistType::Hierarchical8x8);
  
      if (supportsDynamicPairlistGenerationInterval(*ir) &&
          getenv("GMX_DISABLE_DYNAMICPRUNING") == nullptr)
diff --git a/src/gromacs/nbnxm/pairlist_tuning.h b/src/gromacs/nbnxm/pairlist_tuning.h

index a6f82fcddf75b66de5959fa6a62b865601c02d95..3c2227dea99e8225ed34691499403793fab5870d 100644 (file)
--- a/src/gromacs/nbnxm/pairlist_tuning.h
+++ b/src/gromacs/nbnxm/pairlist_tuning.h
@@ -86,7 +86,6 @@ void increaseNstlist(FILE *fplog, t_commrec *cr,
   * \param[in]     ir               The input parameter record
   * \param[in]     mtop             The global topology
   * \param[in]     box              The unit cell
- * \param[in]     nbnxnKernelType  The type of nbnxn kernel used
   * \param[in]     ic               The nonbonded interactions constants
   * \param[in,out] listParams       The list setup parameters
   */
@@ -94,7 +93,6 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
                                   const t_inputrec          *ir,
                                   const gmx_mtop_t          *mtop,
                                   matrix                     box,
-                                 int                        nbnxnKernelType,
                                   const interaction_const_t *ic,
                                   NbnxnListParameters       *listParams);
  
diff --git a/src/gromacs/nbnxm/pairlistset.cpp b/src/gromacs/nbnxm/pairlistset.cpp

index 8454b4921c22b0f3f215bf92b8baebcd2f62f71d..18b0b12a5c18fb23e2f50d0c7482f092be900cc6 100644 (file)
--- a/src/gromacs/nbnxm/pairlistset.cpp
+++ b/src/gromacs/nbnxm/pairlistset.cpp
@@ -33,16 +33,68 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
+/*! \internal \file
+ * \brief
+ * Implements functionality for nbnxn_pairlist_set_t.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_nbnxm
+ */
+
  #include "gmxpre.h"
  
+#include "pairlistset.h"
+
  #include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
  #include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/utility/gmxassert.h"
+
+/*! \cond INTERNAL */
+
+NbnxnListParameters::NbnxnListParameters(const Nbnxm::KernelType kernelType,
+                                         const real              rlist) :
+    useDynamicPruning(false),
+    nstlistPrune(-1),
+    rlistOuter(rlist),
+    rlistInner(rlist),
+    numRollingParts(1)
+{
+    if (!Nbnxm::kernelTypeUsesSimplePairlist(kernelType))
+    {
+        pairlistType = PairlistType::Hierarchical8x8;
+    }
+    else
+    {
+        switch (Nbnxm::JClusterSizePerKernelType[kernelType])
+        {
+            case 2:
+                pairlistType = PairlistType::Simple4x2;
+                break;
+            case 4:
+                pairlistType = PairlistType::Simple4x4;
+                break;
+            case 8:
+                pairlistType = PairlistType::Simple4x8;
+                break;
+            default:
+                GMX_RELEASE_ASSERT(false, "Kernel type does not have a pairlist type");
+        }
+    }
+}
+
+nbnxn_pairlist_set_t::nbnxn_pairlist_set_t(const NbnxnListParameters &listParams) :
+    params(listParams)
+{
+    // TODO move this into this constructor
+    nbnxn_init_pairlist_set(this);
+}
  
  int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t         &nbv,
                                const Nbnxm::InteractionLocality  iLocality,
                                const int64_t                     step)
  {
-    return step - nbv.pairlistSets[iLocality].outerListCreationStep;
+    return step - nbv.pairlistSet(iLocality).outerListCreationStep;
  }
  
  bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t         &nbv,
@@ -51,3 +103,5 @@ bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t         &nbv,
  {
      return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
  }
+
+/*! \endcond */
diff --git a/src/gromacs/nbnxm/pairlistset.h b/src/gromacs/nbnxm/pairlistset.h

index b366b797307692b43bc080782775a3a48aa2e192..e2da79e853776209a3f7f92958311f9ac633fa86 100644 (file)
--- a/src/gromacs/nbnxm/pairlistset.h
+++ b/src/gromacs/nbnxm/pairlistset.h
@@ -68,9 +68,11 @@ nbnxn_search *nbnxn_init_search(int                        ePBC,
                                  gmx_bool                   bFEP,
                                  int                        nthread_max);
  
-/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t */
-void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
-                             gmx_bool simple, gmx_bool combined);
+/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t
+ *
+ * TODO: Merge into the constructor
+ */
+void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list);
  
  /*! \brief Prepare the list-set produced by the search for dynamic pruning
   *
diff --git a/src/gromacs/nbnxm/prunekerneldispatch.cpp b/src/gromacs/nbnxm/prunekerneldispatch.cpp

index 38d62fad5c61725b5b606f6354792d897c499355..8caddc494e6d857754c5e5b9ee82e30ce0ac3f8c 100644 (file)
--- a/src/gromacs/nbnxm/prunekerneldispatch.cpp
+++ b/src/gromacs/nbnxm/prunekerneldispatch.cpp
@@ -45,13 +45,12 @@
  #include "kernels_simd_4xm/kernel_prune.h"
  
  
-void NbnxnDispatchPruneKernel(nonbonded_verlet_t               *nbv,
-                              const Nbnxm::InteractionLocality  ilocality,
-                              const rvec                       *shift_vec)
+void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *nbl_lists,
+                              const Nbnxm::KernelType kernelType,
+                              const nbnxn_atomdata_t *nbat,
+                              const rvec             *shift_vec)
  {
-    nbnxn_pairlist_set_t     *nbl_lists  = &nbv->pairlistSets[ilocality];
-    const nbnxn_atomdata_t   *nbat       = nbv->nbat;
-    const real                rlistInner = nbv->listParams->rlistInner;
+    const real rlistInner = nbl_lists->params.rlistInner;
  
      GMX_ASSERT(nbl_lists->nbl[0]->ciOuter.size() >= nbl_lists->nbl[0]->ci.size(),
                 "Here we should either have an empty ci list or ciOuter should be >= ci");
@@ -62,15 +61,15 @@ void NbnxnDispatchPruneKernel(nonbonded_verlet_t               *nbv,
      {
          NbnxnPairlistCpu *nbl = nbl_lists->nbl[i];
  
-        switch (nbv->kernelType_)
+        switch (kernelType)
          {
-            case nbnxnk4xN_SIMD_4xN:
+            case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
                  nbnxn_kernel_prune_4xn(nbl, nbat, shift_vec, rlistInner);
                  break;
-            case nbnxnk4xN_SIMD_2xNN:
+            case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
                  nbnxn_kernel_prune_2xnn(nbl, nbat, shift_vec, rlistInner);
                  break;
-            case nbnxnk4x4_PlainC:
+            case Nbnxm::KernelType::Cpu4x4_PlainC:
                  nbnxn_kernel_prune_ref(nbl, nbat, shift_vec, rlistInner);
                  break;
              default:
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index 09f4ebc27cf8e7f01587394f964da4be470f13c1..fbb3683047f8f068016547e577522bff90fef390 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -60,7 +60,6 @@
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/nbnxm/nbnxm.h"
  #include "gromacs/taskassignment/taskassignment.h"
  #include "gromacs/topology/topology.h"
  #include "gromacs/utility/baseversion.h"
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index 02dcac5512571f55d3b46188d015d8406d483e8d..df0a50d2317eb66a9db68ff7324435e6fe72e20f 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -49,8 +49,6 @@ struct gmx_hw_info_t;
  struct gmx_mtop_t;
  struct t_inputrec;
  
-enum class EmulateGpuNonbonded : bool;
-
  namespace gmx
  {
  
@@ -62,6 +60,15 @@ enum class TaskTarget : int
      Gpu
  };
  
+//! Help pass GPU-emulation parameters with type safety.
+enum class EmulateGpuNonbonded : bool
+{
+    //! Do not emulate GPUs.
+    No,
+    //! Do emulate GPUs.
+    Yes
+};
+
  /*! \brief Decide whether this thread-MPI simulation will run
   * nonbonded tasks on GPUs.
   *
author	Berk Hess <hess@kth.se>
	Thu, 17 Jan 2019 13:25:50 +0000 (14:25 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 21 Feb 2019 09:13:40 +0000 (10:13 +0100)
src/gromacs/mdlib/calc_verletbuf.cpp		patch \| blob \| history
src/gromacs/mdlib/calc_verletbuf.h		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/nbnxm/atomdata.cpp		patch \| blob \| history
src/gromacs/nbnxm/atomdata.h		patch \| blob \| history
src/gromacs/nbnxm/grid.cpp		patch \| blob \| history
src/gromacs/nbnxm/kerneldispatch.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_geometry.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_geometry.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/pairlist.cpp		patch \| blob \| history
src/gromacs/nbnxm/pairlist.h		patch \| blob \| history
src/gromacs/nbnxm/pairlist_tuning.cpp		patch \| blob \| history
src/gromacs/nbnxm/pairlist_tuning.h		patch \| blob \| history
src/gromacs/nbnxm/pairlistset.cpp		patch \| blob \| history
src/gromacs/nbnxm/pairlistset.h		patch \| blob \| history
src/gromacs/nbnxm/prunekerneldispatch.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history