Hide internals of nbnxm parlist
authorBerk Hess <hess@kth.se>
Tue, 19 Feb 2019 14:02:27 +0000 (15:02 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Sat, 23 Feb 2019 11:14:24 +0000 (12:14 +0100)
Introduced the PairlistSet class which holds all data related
with the nbnxm pair lists. The actual details of the lists are
no longer available outside the nbnxm module.

Change-Id: If4f36a379f2a6a133435b8ee82de8abfed5c63fd

14 files changed:
src/gromacs/ewald/pme_load_balancing.cpp
src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdrun/md.cpp
src/gromacs/nbnxm/atomdata.cpp
src/gromacs/nbnxm/atomdata.h
src/gromacs/nbnxm/grid.cpp
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/pairlist.cpp
src/gromacs/nbnxm/pairlist.h
src/gromacs/nbnxm/pairlistset.cpp
src/gromacs/nbnxm/pairlistset.h
src/gromacs/nbnxm/prunekerneldispatch.cpp

index 778c7c2c4818e5d2ff0a7c268b9039a36382dfdd..2960abf33fc31c54c2faca73dc9fd6867c8d5b31 100644 (file)
@@ -793,11 +793,8 @@ pme_load_balance(pme_load_balancing_t      *pme_lb,
 
     set = &pme_lb->setup[pme_lb->cur];
 
-    NbnxnListParameters *listParams = nbv->listParams.get();
-
     ic->rcoulomb           = set->rcut_coulomb;
-    listParams->rlistOuter = set->rlistOuter;
-    listParams->rlistInner = set->rlistInner;
+    nbv->pairlistSets_->changeRadii(set->rlistOuter, set->rlistInner);
     ic->ewaldcoeff_q       = set->ewaldcoeff_q;
     /* TODO: centralize the code that sets the potentials shifts */
     if (ic->coulomb_modifier == eintmodPOTSHIFT)
@@ -825,7 +822,7 @@ pme_load_balance(pme_load_balancing_t      *pme_lb,
     /* We always re-initialize the tables whether they are used or not */
     init_interaction_const_tables(nullptr, ic, rtab);
 
-    Nbnxm::gpu_pme_loadbal_update_param(nbv, ic, listParams);
+    Nbnxm::gpu_pme_loadbal_update_param(nbv, ic, &nbv->pairlistSets().params());
 
     if (!pme_lb->bSepPMERanks)
     {
@@ -997,7 +994,7 @@ void pme_loadbal_do(pme_load_balancing_t *pme_lb,
              * This also ensures that we won't disable the currently
              * optimal setting during a second round of PME balancing.
              */
-            set_dd_dlb_max_cutoff(cr, fr->nbv->listParams->rlistOuter);
+            set_dd_dlb_max_cutoff(cr, fr->nbv->pairlistSets().params().rlistOuter);
         }
     }
 
@@ -1014,7 +1011,7 @@ void pme_loadbal_do(pme_load_balancing_t *pme_lb,
                          step);
 
         /* Update deprecated rlist in forcerec to stay in sync with fr->nbv */
-        fr->rlist         = fr->nbv->listParams->rlistOuter;
+        fr->rlist         = fr->nbv->pairlistSets().params().rlistOuter;
 
         if (ir.eDispCorr != edispcNO)
         {
index 6f3f2e3150de955cc8cad4a1eff14544cfbf06bb..f8f7c388aeb2503fd46a93443046828c1fa56214 100644 (file)
@@ -416,8 +416,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         /* When dynamic pair-list  pruning is requested, we need to prune
          * at nstlistPrune steps.
          */
-        if (nbv->listParams->useDynamicPruning &&
-            nbnxnIsDynamicPairlistPruningStep(*nbv, ilocality, step))
+        if (nbv->pairlistSets().isDynamicPairlistPruningStep(step))
         {
             /* Prune the pair-list beyond fr->ic->rlistPrune using
              * the current coordinates of the atoms.
@@ -430,7 +429,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         wallcycle_sub_start(wcycle, ewcsNONBONDED);
     }
 
-    NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
+    nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, fr, enerd, nrnb);
 
     if (!nbv->useGpu())
     {
@@ -761,8 +760,8 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t                  *nbv
                 wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
                 wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
 
-                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
-                                               nbv->nbat, as_rvec_array(force->unpaddedArrayRef().data()), wcycle);
+                nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
+                                              as_rvec_array(force->unpaddedArrayRef().data()), wcycle);
             }
         }
     }
@@ -790,9 +789,11 @@ static inline void launchGpuRollingPruning(const t_commrec          *cr,
      * With domain decomposition we alternate local and non-local
      * pruning at even and odd steps.
      */
-    int  numRollingParts     = nbv->listParams->numRollingParts;
-    GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
-    int  stepWithCurrentList = nbnxnNumStepsWithPairlist(*nbv, Nbnxm::InteractionLocality::Local, step);
+    int  numRollingParts     = nbv->pairlistSets().params().numRollingParts;
+    GMX_ASSERT(numRollingParts == nbv->pairlistSets().params().nstlistPrune/2,
+               "Since we alternate local/non-local at even/odd steps, "
+               "we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
+    int  stepWithCurrentList = nbv->pairlistSets().numStepsWithPairlist(step);
     bool stepIsEven          = ((stepWithCurrentList & 1) == 0);
     if (stepWithCurrentList > 0 &&
         stepWithCurrentList < inputrec->nstlist - 1 &&
@@ -1274,22 +1275,18 @@ static void do_force_cutsVERLET(FILE *fplog,
                          step, nrnb, wcycle);
         }
 
-        const Nbnxm::InteractionLocality iloc =
-            (!bUseOrEmulGPU ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal);
-
         /* Add all the non-bonded force to the normal force array.
          * This can be split into a local and a non-local part when overlapping
          * communication with calculation with domain decomposition.
          */
         wallcycle_stop(wcycle, ewcFORCE);
 
-        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::All, nbv->nbat, f, wcycle);
+        nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, f, wcycle);
 
         wallcycle_start_nocount(wcycle, ewcFORCE);
 
-        /* if there are multiple fshift output buffers reduce them */
-        if ((flags & GMX_FORCE_VIRIAL) &&
-            nbv->pairlistSet(iloc).nnbl > 1)
+        /* If there are multiple fshift output buffers we need to reduce them */
+        if (flags & GMX_FORCE_VIRIAL)
         {
             /* This is not in a subcounter because it takes a
                negligible and constant-sized amount of time */
@@ -1343,12 +1340,8 @@ static void do_force_cutsVERLET(FILE *fplog,
                 wallcycle_stop(wcycle, ewcFORCE);
             }
 
-            /* skip the reduction if there was no non-local work to do */
-            if (!nbv->pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
-            {
-                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
-                                               nbv->nbat, f, wcycle);
-            }
+            nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
+                                          f, wcycle);
         }
     }
 
@@ -1438,7 +1431,7 @@ static void do_force_cutsVERLET(FILE *fplog,
         Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, flags);
 
         /* Is dynamic pair-list pruning activated? */
-        if (nbv->listParams->useDynamicPruning)
+        if (nbv->pairlistSets().params().useDynamicPruning)
         {
             launchGpuRollingPruning(cr, nbv, inputrec, step);
         }
@@ -1466,8 +1459,8 @@ static void do_force_cutsVERLET(FILE *fplog,
      * on the non-alternating path. */
     if (bUseOrEmulGPU && !alternateGpuWait)
     {
-        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
-                                       nbv->nbat, f, wcycle);
+        nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
+                                      f, wcycle);
     }
     if (DOMAINDECOMP(cr))
     {
index 09dd5ace38f83fcea3aa485e669298d93f20c67e..1d3113172f648169648ce0bfceab49d959ff4ba4 100644 (file)
@@ -412,7 +412,7 @@ void gmx::Integrator::do_md()
     if (bPMETune)
     {
         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
-                         *fr->ic, *fr->nbv->listParams, fr->pmedata, use_GPU(fr->nbv),
+                         *fr->ic, fr->nbv->pairlistSets().params(), fr->pmedata, use_GPU(fr->nbv),
                          &bPMETunePrinting);
     }
 
index 0fad12d1f8b7542115ef9e17b44d9a411cdac784..1de1af8f114bc236af7bfe55a5fa75592d75118e 100644 (file)
@@ -54,6 +54,7 @@
 #include "gromacs/mdtypes/mdatom.h"
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/nbnxm/nbnxm_geometry.h"
+#include "gromacs/nbnxm/pairlist.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/timing/wallcycle.h"
@@ -1171,7 +1172,7 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
 static void
 nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
                                     const nbnxn_atomdata_t *nbat,
-                                    gmx::ArrayRef<nbnxn_atomdata_output_t> out,
+                                    gmx::ArrayRef<const nbnxn_atomdata_output_t> out,
                                     int nfa,
                                     int a0, int a1,
                                     rvec *f)
@@ -1465,12 +1466,18 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t *nbat,
 }
 
 /* Add the force array(s) from nbnxn_atomdata_t to f */
-void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search             *nbs,
-                                    const Nbnxm::AtomLocality locality,
-                                    nbnxn_atomdata_t         *nbat,
-                                    rvec                     *f,
-                                    gmx_wallcycle            *wcycle)
+void
+nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality  locality,
+                                             rvec                      *f,
+                                             gmx_wallcycle             *wcycle)
 {
+    /* Skip the non-local reduction if there was no non-local work to do */
+    if (locality == Nbnxm::AtomLocality::NonLocal &&
+        pairlistSets().pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
+    {
+        return;
+    }
+
     wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
     wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 
@@ -1521,7 +1528,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search             *nbs,
     {
         try
         {
-            nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat,
+            nbnxn_atomdata_add_nbat_f_to_f_part(nbs.get(), nbat,
                                                 nbat->out,
                                                 1,
                                                 a0+((th+0)*na)/nth,
@@ -1543,6 +1550,15 @@ void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
 {
     gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
 
+    if (outputBuffers.size() == 1)
+    {
+        /* When there is a single output object, with CPU or GPU, shift forces
+         * have been written directly to the main buffer instead of to the
+         * (single) thread local output object. There is nothing to reduce.
+         */
+        return;
+    }
+
     for (int s = 0; s < SHIFTS; s++)
     {
         rvec sum;
index fbd4dd3c00aa634a6ba48cfeab51b335791ea342..7deeb2c1810911e28d4dbee9f833384691f32505 100644 (file)
@@ -51,6 +51,7 @@ class MDLogger;
 
 struct nbnxn_atomdata_t;
 struct nbnxn_search;
+struct nonbonded_verlet_t;
 struct t_mdatoms;
 struct gmx_wallcycle;
 
@@ -107,13 +108,6 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search  *nbs,
                                      nbnxn_atomdata_t    *nbat,
                                      gmx_wallcycle       *wcycle);
 
-/* Add the forces stored in nbat to f, zeros the forces in nbat */
-void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search           *nbs,
-                                    Nbnxm::AtomLocality     locality,
-                                    nbnxn_atomdata_t       *nbat,
-                                    rvec                   *f,
-                                    gmx_wallcycle          *wcycle);
-
 /* Add the fshift force stored in nbat to fshift */
 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
                                               rvec                   *fshift);
index 4b98d53a3cc32bf9d04075369317144be2891c10..19ed3d1021e9d01e09534e7086bee727e2c841da 100644 (file)
@@ -1428,8 +1428,8 @@ void nbnxn_put_on_grid(nonbonded_verlet_t             *nbv,
 
     grid->bSimple = nbv->pairlistIsSimple();
 
-    grid->na_c      = IClusterSizePerListType[nbv->listParams->pairlistType];
-    grid->na_cj     = JClusterSizePerListType[nbv->listParams->pairlistType];
+    grid->na_c      = IClusterSizePerListType[nbv->pairlistSets().params().pairlistType];
+    grid->na_cj     = JClusterSizePerListType[nbv->pairlistSets().params().pairlistType];
     grid->na_sc     = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
     grid->na_c_2log = get_2log(grid->na_c);
 
index fcac007c027352930d409970db694e842855c081..b4b79b0b11fabed001234dbe33fc8d10fc19c9d0 100644 (file)
@@ -422,15 +422,14 @@ nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
 }
 
 static void accountFlops(t_nrnb                           *nrnb,
+                         const nbnxn_pairlist_set_t       &pairlistSet,
                          const nonbonded_verlet_t         &nbv,
-                         const Nbnxm::InteractionLocality  iLocality,
                          const interaction_const_t        &ic,
                          const int                         forceFlags)
 {
-    const nbnxn_pairlist_set_t &pairlistSet     = nbv.pairlistSet(iLocality);
-    const bool                  usingGpuKernels = nbv.useGpu();
+    const bool usingGpuKernels = nbv.useGpu();
 
-    int enr_nbnxn_kernel_ljc;
+    int        enr_nbnxn_kernel_ljc;
     if (EEL_RF(ic.eeltype) || ic.eeltype == eelCUT)
     {
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
@@ -481,25 +480,25 @@ static void accountFlops(t_nrnb                           *nrnb,
     }
 }
 
-void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
-                         Nbnxm::InteractionLocality iLocality,
-                         const interaction_const_t &ic,
-                         int                        forceFlags,
-                         int                        clearF,
-                         t_forcerec                *fr,
-                         gmx_enerdata_t            *enerd,
-                         t_nrnb                    *nrnb)
+void
+nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
+                                            const interaction_const_t &ic,
+                                            int                        forceFlags,
+                                            int                        clearF,
+                                            t_forcerec                *fr,
+                                            gmx_enerdata_t            *enerd,
+                                            t_nrnb                    *nrnb)
 {
-    const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSet(iLocality);
+    const nbnxn_pairlist_set_t &pairlistSet = pairlistSets().pairlistSet(iLocality);
 
-    switch (nbv->kernelSetup().kernelType)
+    switch (kernelSetup().kernelType)
     {
         case Nbnxm::KernelType::Cpu4x4_PlainC:
         case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
         case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
             nbnxn_kernel_cpu(pairlistSet,
-                             nbv->kernelSetup(),
-                             nbv->nbat,
+                             kernelSetup(),
+                             nbat,
                              ic,
                              fr->shift_vec,
                              forceFlags,
@@ -512,16 +511,16 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
             break;
 
         case Nbnxm::KernelType::Gpu8x8x8:
-            Nbnxm::gpu_launch_kernel(nbv->gpu_nbv, forceFlags, iLocality);
+            Nbnxm::gpu_launch_kernel(gpu_nbv, forceFlags, iLocality);
             break;
 
         case Nbnxm::KernelType::Cpu8x8x8_PlainC:
             nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
-                                 nbv->nbat, &ic,
+                                 nbat, &ic,
                                  fr->shift_vec,
                                  forceFlags,
                                  clearF,
-                                 nbv->nbat->out[0].f,
+                                 nbat->out[0].f,
                                  fr->fshift[0],
                                  enerd->grpp.ener[egCOULSR],
                                  fr->bBHAM ?
@@ -534,7 +533,7 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
 
     }
 
-    accountFlops(nrnb, *nbv, iLocality, ic, forceFlags);
+    accountFlops(nrnb, pairlistSet, *this, ic, forceFlags);
 }
 
 void
@@ -549,7 +548,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
                                              const int                   forceFlags,
                                              t_nrnb                     *nrnb)
 {
-    const gmx::ArrayRef<t_nblist const * const > nbl_fep = pairlistSet(iLocality).nbl_fep;
+    const gmx::ArrayRef<t_nblist const * const > nbl_fep = pairlistSets().pairlistSet(iLocality).nbl_fep;
 
     /* When the first list is empty, all are empty and there is nothing to do */
     if (nbl_fep[0]->nrj == 0)
index 1b020d5916d256fc35034849fb51f69cb6a1940b..c15c0270c68b105ba30460509ff94a3d3e2969e4 100644 (file)
 #include <memory>
 
 #include "gromacs/math/vectypes.h"
-#include "gromacs/nbnxm/pairlist.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/real.h"
@@ -116,10 +115,12 @@ struct gmx_domdec_zones_t;
 struct gmx_enerdata_t;
 struct gmx_hw_info_t;
 struct gmx_mtop_t;
+struct gmx_wallcycle;
 struct interaction_const_t;
 struct nbnxn_pairlist_set_t;
 struct nbnxn_search;
 struct nonbonded_verlet_t;
+enum class PairlistType;
 struct t_blocka;
 struct t_commrec;
 struct t_lambda;
@@ -134,6 +135,31 @@ class MDLogger;
 class UpdateGroupsCog;
 }
 
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
+/*! \libinternal
+ * \brief The setup for generating and pruning the nbnxn pair list.
+ *
+ * Without dynamic pruning rlistOuter=rlistInner.
+ */
+struct NbnxnListParameters
+{
+    /*! \brief Constructor producing a struct with dynamic pruning disabled
+     */
+    NbnxnListParameters(Nbnxm::KernelType kernelType,
+                        real              rlist);
+
+    PairlistType pairlistType;      //!< The type of cluster-pair list
+    bool         useDynamicPruning; //!< Are we using dynamic pair-list pruning
+    int          nstlistPrune;      //!< Pair-list dynamic pruning interval
+    real         rlistOuter;        //!< Cut-off of the larger, outer pair-list
+    real         rlistInner;        //!< Cut-off of the smaller, inner pair-list
+    int          numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+};
+
 /*! \brief Resources that can be used to execute non-bonded kernels on */
 enum class NonbondedResource : int
 {
@@ -216,6 +242,96 @@ void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *pairlistSet,
 struct nonbonded_verlet_t
 {
     public:
+        class PairlistSets
+        {
+            public:
+                PairlistSets(const NbnxnListParameters  &listParams,
+                             bool                        haveMultipleDomains,
+                             int                         minimumIlistCountForGpuBalancing);
+
+                //! Construct the pairlist set for the given locality
+                void construct(Nbnxm::InteractionLocality  iLocality,
+                               nbnxn_search               *nbs,
+                               nbnxn_atomdata_t           *nbat,
+                               const t_blocka             *excl,
+                               Nbnxm::KernelType           kernelbType,
+                               int64_t                     step,
+                               t_nrnb                     *nrnb);
+
+                //! Dispatches the dynamic pruning kernel for the given locality
+                void dispatchPruneKernel(Nbnxm::InteractionLocality  iLocality,
+                                         const nbnxn_atomdata_t     *nbat,
+                                         const rvec                 *shift_vec,
+                                         Nbnxm::KernelType           kernelbType);
+
+                //! Returns the pair list parameters
+                const NbnxnListParameters &params() const
+                {
+                    return params_;
+                }
+
+                //! Returns the number of steps performed with the current pair list
+                int numStepsWithPairlist(int64_t step) const
+                {
+                    return step - outerListCreationStep_;
+                }
+
+                //! Returns whether step is a dynamic list pruning step, for CPU lists only
+                bool isDynamicPairlistPruningStep(int64_t step) const
+                {
+                    return (params_.useDynamicPruning &&
+                            numStepsWithPairlist(step) % params_.nstlistPrune == 0);
+                }
+
+                //! Changes the pair-list outer and inner radius
+                void changeRadii(real rlistOuter,
+                                 real rlistInner)
+                {
+                    params_.rlistOuter = rlistOuter;
+                    params_.rlistInner = rlistInner;
+                }
+
+                //! Returns the pair-list set for the given locality
+                const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
+                {
+                    if (iLocality == Nbnxm::InteractionLocality::Local)
+                    {
+                        return *localSet_;
+                    }
+                    else
+                    {
+                        GMX_ASSERT(nonlocalSet_, "Need a non-local set when requesting access");
+                        return *nonlocalSet_;
+                    }
+                }
+
+            private:
+                //! Returns the pair-list set for the given locality
+                nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality)
+                {
+                    if (iLocality == Nbnxm::InteractionLocality::Local)
+                    {
+                        return *localSet_;
+                    }
+                    else
+                    {
+                        GMX_ASSERT(nonlocalSet_, "Need a non-local set when requesting access");
+                        return *nonlocalSet_;
+                    }
+                }
+
+                //! Parameters for the search and list pruning setup
+                NbnxnListParameters                   params_;
+                //! Pair list balancing parameter for use with GPU
+                int                                   minimumIlistCountForGpuBalancing_;
+                //! Local pairlist set
+                std::unique_ptr<nbnxn_pairlist_set_t> localSet_;
+                //! Non-local pairlist set
+                std::unique_ptr<nbnxn_pairlist_set_t> nonlocalSet_;
+                //! MD step at with the outer lists in pairlistSets_ were created
+                int64_t                               outerListCreationStep_;
+        };
+
         //! Returns whether a GPU is use for the non-bonded calculations
         bool useGpu() const
         {
@@ -237,34 +353,32 @@ struct nonbonded_verlet_t
         //! Initialize the pair list sets, TODO this should be private
         void initPairlistSets(bool haveMultipleDomains);
 
-        //! Returns a reference to the pairlist set for the requested locality
-        const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
-        {
-            GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
-                       "The requested locality should be in the list");
-            return pairlistSets_[static_cast<int>(iLocality)];
-        }
-
         //! Constructs the pairlist for the given locality
         void constructPairlist(Nbnxm::InteractionLocality  iLocality,
                                const t_blocka             *excl,
                                int64_t                     step,
-                               t_nrnb                     *nrnb)
+                               t_nrnb                     *nrnb);
+
+        //! Returns a reference to the pairlist sets
+        const PairlistSets &pairlistSets() const
         {
-            nbnxn_make_pairlist(this, iLocality, &pairlistSets_[static_cast<int>(iLocality)], excl, step, nrnb);
+            return *pairlistSets_;
         }
 
         //! Dispatches the dynamic pruning kernel for the given locality
         void dispatchPruneKernel(Nbnxm::InteractionLocality  iLocality,
-                                 const rvec                 *shift_vec)
-        {
-            GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
-                       "The requested locality should be in the list");
-            NbnxnDispatchPruneKernel(&pairlistSets_[static_cast<int>(iLocality)],
-                                     kernelSetup_.kernelType, nbat, shift_vec);
-        }
-
-        //! Dispatches the non-bonded free-energy kernel, always runs on the CPU
+                                 const rvec                 *shift_vec);
+
+        //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
+        void dispatchNonbondedKernel(Nbnxm::InteractionLocality  iLocality,
+                                     const interaction_const_t  &ic,
+                                     int                         forceFlags,
+                                     int                         clearF,
+                                     t_forcerec                 *fr,
+                                     gmx_enerdata_t             *enerd,
+                                     t_nrnb                     *nrnb);
+
+        //! Executes the non-bonded free-energy kernel, always runs on the CPU
         void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocality,
                                       t_forcerec                 *fr,
                                       rvec                        x[],
@@ -276,6 +390,11 @@ struct nonbonded_verlet_t
                                       int                         forceFlags,
                                       t_nrnb                     *nrnb);
 
+        //! Add the forces stored in nbat to f, zeros the forces in nbat */
+        void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality  locality,
+                                      rvec                *f,
+                                      gmx_wallcycle       *wcycle);
+
         //! Return the kernel setup
         const Nbnxm::KernelSetup &kernelSetup() const
         {
@@ -288,14 +407,12 @@ struct nonbonded_verlet_t
             kernelSetup_ = kernelSetup;
         }
 
-        //! Parameters for the search and list pruning setup
-        std::unique_ptr<NbnxnListParameters>  listParams;
+        // TODO: Make all data members private
+    public:
+        //! All data related to the pair lists
+        std::unique_ptr<PairlistSets>         pairlistSets_;
         //! Working data for constructing the pairlists
         std::unique_ptr<nbnxn_search>         nbs;
-    private:
-        //! Local and, optionally, non-local pairlist sets
-        std::vector<nbnxn_pairlist_set_t>     pairlistSets_;
-    public:
         //! Atom data
         nbnxn_atomdata_t                     *nbat;
 
@@ -305,7 +422,6 @@ struct nonbonded_verlet_t
     public:
 
         gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
-        int                  min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels    */
 };
 
 namespace Nbnxm
@@ -371,24 +487,4 @@ void nbnxn_set_atomorder(nbnxn_search *nbs);
 /*! \brief Returns the index position of the atoms on the pairlist search grid */
 gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
 
-/*! \brief Returns the number of steps performed with the current pair list */
-int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t   &nbv,
-                              Nbnxm::InteractionLocality  ilocality,
-                              int64_t                     step);
-
-/*! \brief Returns whether step is a dynamic list pruning step */
-bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t   &nbv,
-                                       Nbnxm::InteractionLocality  ilocality,
-                                       int64_t                     step);
-
-/*! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU */
-void NbnxnDispatchKernel(nonbonded_verlet_t         *nbv,
-                         Nbnxm::InteractionLocality  iLocality,
-                         const interaction_const_t  &ic,
-                         int                         forceFlags,
-                         int                         clearF,
-                         t_forcerec                 *fr,
-                         gmx_enerdata_t             *enerd,
-                         t_nrnb                     *nrnb);
-
 #endif // GMX_NBNXN_NBNXN_H
index eef7c9a3eddefaf39ceafa2649942b84e2c75561..7b149544478a8ce30c839683557aef31bcacd9a3 100644 (file)
@@ -292,18 +292,57 @@ pick_nbnxn_kernel(const gmx::MDLogger     &mdlog,
 
 } // namespace Nbnxm
 
-void nonbonded_verlet_t::initPairlistSets(const bool haveMultipleDomains)
+nonbonded_verlet_t::PairlistSets::PairlistSets(const NbnxnListParameters &listParams,
+                                               const bool                 haveMultipleDomains,
+                                               const int                  minimumIlistCountForGpuBalancing) :
+    params_(listParams),
+    minimumIlistCountForGpuBalancing_(minimumIlistCountForGpuBalancing)
 {
-    pairlistSets_.emplace_back(*listParams);
+    localSet_ = std::make_unique<nbnxn_pairlist_set_t>(params_);
+
     if (haveMultipleDomains)
     {
-        pairlistSets_.emplace_back(*listParams);
+        nonlocalSet_ = std::make_unique<nbnxn_pairlist_set_t>(params_);
     }
 }
 
 namespace Nbnxm
 {
 
+/*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
+static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t *nbnxmGpu)
+{
+    int minimumIlistCount;
+
+    if (const char *env = getenv("GMX_NB_MIN_CI"))
+    {
+        char *end;
+
+        minimumIlistCount = strtol(env, &end, 10);
+        if (!end || (*end != 0) || minimumIlistCount < 0)
+        {
+            gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, non-negative integer required", env);
+        }
+
+        if (debug)
+        {
+            fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
+                    minimumIlistCount);
+        }
+    }
+    else
+    {
+        minimumIlistCount = gpu_min_ci_balanced(nbnxmGpu);
+        if (debug)
+        {
+            fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
+                    minimumIlistCount);
+        }
+    }
+
+    return minimumIlistCount;
+}
+
 void init_nb_verlet(const gmx::MDLogger     &mdlog,
                     nonbonded_verlet_t     **nb_verlet,
                     gmx_bool                 bFEP_NonBonded,
@@ -342,22 +381,12 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
                                           nonbondedResource, ir,
                                           fr->bNonbonded));
 
-    const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
-
-    nbv->listParams = std::make_unique<NbnxnListParameters>(nbv->kernelSetup().kernelType,
-                                                            ir->rlist);
-    nbv->initPairlistSets(haveMultipleDomains);
+    const bool          haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
 
-    nbv->min_ci_balanced = 0;
+    NbnxnListParameters listParams(nbv->kernelSetup().kernelType, ir->rlist);
 
     setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic,
-                                nbv->listParams.get());
-
-    nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
-                                              DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
-                                              DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
-                                              bFEP_NonBonded,
-                                              gmx_omp_nthreads_get(emntPairsearch));
+                                &listParams);
 
     int      enbnxninitcombrule;
     if (fr->ic->vdwtype == evdwCUT &&
@@ -404,6 +433,7 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
                         mimimumNumEnergyGroupNonbonded,
                         nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1);
 
+    int minimumIlistCountForGpuBalancing = 0;
     if (useGpu)
     {
         /* init the NxN GPU data; the last argument tells whether we'll have
@@ -411,39 +441,25 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
         gpu_init(&nbv->gpu_nbv,
                  deviceInfo,
                  fr->ic,
-                 nbv->listParams.get(),
+                 &listParams,
                  nbv->nbat,
                  cr->nodeid,
                  haveMultipleDomains);
 
-        if (const char *env = getenv("GMX_NB_MIN_CI"))
-        {
-            char *end;
-
-            nbv->min_ci_balanced = strtol(env, &end, 10);
-            if (!end || (*end != 0) || nbv->min_ci_balanced < 0)
-            {
-                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, non-negative integer required", env);
-            }
-
-            if (debug)
-            {
-                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
-                        nbv->min_ci_balanced);
-            }
-        }
-        else
-        {
-            nbv->min_ci_balanced = gpu_min_ci_balanced(nbv->gpu_nbv);
-            if (debug)
-            {
-                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
-                        nbv->min_ci_balanced);
-            }
-        }
-
+        minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(nbv->gpu_nbv);
     }
 
+    nbv->pairlistSets_ =
+        std::make_unique<nonbonded_verlet_t::PairlistSets>(listParams,
+                                                           haveMultipleDomains,
+                                                           minimumIlistCountForGpuBalancing);
+
+    nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
+                                              DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+                                              DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
+                                              bFEP_NonBonded,
+                                              gmx_omp_nthreads_get(emntPairsearch));
+
     *nb_verlet = nbv;
 }
 
index 8365163e24c38c983da38ad89a0195c7310de175..d2327f94bd46a65b72cff93b5c289d65c85352bf 100644 (file)
@@ -4026,16 +4026,18 @@ static void sort_sci(NbnxnPairlistGpu *nbl)
     std::swap(nbl->sci, work.sci_sort);
 }
 
-void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
-                         const InteractionLocality  iLocality,
-                         nbnxn_pairlist_set_t      *nbl_list,
-                         const t_blocka            *excl,
-                         const int64_t              step,
-                         t_nrnb                    *nrnb)
+void
+nonbonded_verlet_t::PairlistSets::construct(const InteractionLocality  iLocality,
+                                            nbnxn_search              *nbs,
+                                            nbnxn_atomdata_t          *nbat,
+                                            const t_blocka            *excl,
+                                            const Nbnxm::KernelType    kernelType,
+                                            const int64_t              step,
+                                            t_nrnb                    *nrnb)
 {
-    nbnxn_search         *nbs      = nbv->nbs.get();
-    nbnxn_atomdata_t     *nbat     = nbv->nbat;
-    const real            rlist    = nbv->listParams->rlistOuter;
+    nbnxn_pairlist_set_t *nbl_list = &pairlistSet(iLocality);
+
+    const real            rlist    = nbl_list->params.rlistOuter;
 
     int                nsubpair_target;
     float              nsubpair_tot_est;
@@ -4071,9 +4073,9 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
         nzi = nbs->zones->nizone;
     }
 
-    if (!nbl_list->bSimple && nbv->min_ci_balanced > 0)
+    if (!nbl_list->bSimple && minimumIlistCountForGpuBalancing_ > 0)
     {
-        get_nsubpair_target(nbs, iLocality, rlist, nbv->min_ci_balanced,
+        get_nsubpair_target(nbs, iLocality, rlist, minimumIlistCountForGpuBalancing_,
                             &nsubpair_target, &nsubpair_tot_est);
     }
     else
@@ -4164,7 +4166,7 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                         nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                  &nbs->work[th], nbat, *excl,
                                                  rlist,
-                                                 nbv->kernelSetup().kernelType,
+                                                 kernelType,
                                                  ci_block,
                                                  nbat->bUseBufferFlags,
                                                  nsubpair_target,
@@ -4178,7 +4180,7 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
                         nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                  &nbs->work[th], nbat, *excl,
                                                  rlist,
-                                                 nbv->kernelSetup().kernelType,
+                                                 kernelType,
                                                  ci_block,
                                                  nbat->bUseBufferFlags,
                                                  nsubpair_target,
@@ -4291,7 +4293,15 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
         GMX_ASSERT(nbl_list->nbl[0]->ciOuter.empty(), "ciOuter is invalid so it should be empty");
     }
 
-    nbl_list->outerListCreationStep = step;
+    if (iLocality == Nbnxm::InteractionLocality::Local)
+    {
+        outerListCreationStep_ = step;
+    }
+    else
+    {
+        GMX_RELEASE_ASSERT(outerListCreationStep_ == step,
+                           "Outer list should be created at the same step as the inner list");
+    }
 
     /* Special performance logging stuff (env.var. GMX_NBNXN_CYCLE) */
     if (iLocality == InteractionLocality::Local)
@@ -4346,19 +4356,30 @@ void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
         }
     }
 
-    if (nbv->listParams->useDynamicPruning && !nbv->useGpu())
+    if (params_.useDynamicPruning && nbl_list->bSimple)
     {
         nbnxnPrepareListForDynamicPruning(nbl_list);
     }
+}
+
+void
+nonbonded_verlet_t::constructPairlist(const Nbnxm::InteractionLocality  iLocality,
+                                      const t_blocka                   *excl,
+                                      int64_t                           step,
+                                      t_nrnb                           *nrnb)
+{
+    pairlistSets_->construct(iLocality, nbs.get(), nbat, excl,
+                             kernelSetup_.kernelType,
+                             step, nrnb);
 
-    if (nbv->useGpu())
+    if (useGpu())
     {
         /* Launch the transfer of the pairlist to the GPU.
          *
          * NOTE: The launch overhead is currently not timed separately
          */
-        Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
-                                 nbl_list->nblGpu[0],
+        Nbnxm::gpu_init_pairlist(gpu_nbv,
+                                 pairlistSets().pairlistSet(iLocality).nblGpu[0],
                                  iLocality);
     }
 }
index d54708150e3b396dfb493e847b1d86e933dc79cb..3f3bde3ad1fccf3e79b7f6f6218e538e5240b47c 100644 (file)
@@ -53,6 +53,7 @@
 // to include it during OpenCL jitting without including config.h
 #include "gromacs/nbnxm/constants.h"
 
+struct NbnxnListParameters;
 struct NbnxnPairlistCpuWork;
 struct NbnxnPairlistGpuWork;
 struct tMPI_Atomic;
@@ -82,29 +83,6 @@ enum class PairlistType : int
 static constexpr gmx::EnumerationArray<PairlistType, int> IClusterSizePerListType = { 4, 4, 4, 8 };
 static constexpr gmx::EnumerationArray<PairlistType, int> JClusterSizePerListType = { 2, 4, 8, 8 };
 
-/*! \cond INTERNAL */
-
-/*! \brief The setup for generating and pruning the nbnxn pair list.
- *
- * Without dynamic pruning rlistOuter=rlistInner.
- */
-struct NbnxnListParameters
-{
-    /*! \brief Constructor producing a struct with dynamic pruning disabled
-     */
-    NbnxnListParameters(Nbnxm::KernelType kernelType,
-                        real              rlist);
-
-    PairlistType pairlistType;      //!< The type of cluster-pair list
-    bool         useDynamicPruning; //!< Are we using dynamic pair-list pruning
-    int          nstlistPrune;      //!< Pair-list dynamic pruning interval
-    real         rlistOuter;        //!< Cut-off of the larger, outer pair-list
-    real         rlistInner;        //!< Cut-off of the smaller, inner pair-list
-    int          numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
-};
-
-/*! \endcond */
-
 /* With CPU kernels the i-cluster size is always 4 atoms. */
 static constexpr int c_nbnxnCpuIClusterSize = 4;
 
@@ -307,7 +285,6 @@ struct nbnxn_pairlist_set_t
     int                     natpair_lj;            /* Total number of atom pairs for LJ kernel   */
     int                     natpair_q;             /* Total number of atom pairs for Q kernel    */
     std::vector<t_nblist *> nbl_fep;               /* List of free-energy atom pair interactions */
-    int64_t                 outerListCreationStep; /* Step at which the outer list was created */
 };
 
 enum {
index 18b0b12a5c18fb23e2f50d0c7482f092be900cc6..11473517dfb0ffa90fc20dd1820eee9afb252330 100644 (file)
@@ -90,18 +90,4 @@ nbnxn_pairlist_set_t::nbnxn_pairlist_set_t(const NbnxnListParameters &listParams
     nbnxn_init_pairlist_set(this);
 }
 
-int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t         &nbv,
-                              const Nbnxm::InteractionLocality  iLocality,
-                              const int64_t                     step)
-{
-    return step - nbv.pairlistSet(iLocality).outerListCreationStep;
-}
-
-bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t         &nbv,
-                                       const Nbnxm::InteractionLocality  iLocality,
-                                       const int64_t                     step)
-{
-    return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
-}
-
 /*! \endcond */
index 82bcdef6e1f91f0ec5befd49f854133ea6e09c90..d550537f5bf5e26fa792c90e96a82313dc701856 100644 (file)
 #define GMX_NBNXM_PAIRLISTSET_H
 
 #include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
 #include "locality.h"
 
-struct nbnxn_pairlist_set_t;
-
 /* Initializes a set of pair lists stored in nbnxn_pairlist_set_t
  *
  * TODO: Merge into the constructor
index 8caddc494e6d857754c5e5b9ee82e30ce0ac3f8c..6ae794fbff4c8e2f69256663efd59ff378464d54 100644 (file)
 #include "kernels_simd_4xm/kernel_prune.h"
 
 
-void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *nbl_lists,
-                              const Nbnxm::KernelType kernelType,
-                              const nbnxn_atomdata_t *nbat,
-                              const rvec             *shift_vec)
+void
+nonbonded_verlet_t::PairlistSets::dispatchPruneKernel(const Nbnxm::InteractionLocality  iLocality,
+                                                      const nbnxn_atomdata_t           *nbat,
+                                                      const rvec                       *shift_vec,
+                                                      const Nbnxm::KernelType           kernelType)
 {
-    const real rlistInner = nbl_lists->params.rlistInner;
+    nbnxn_pairlist_set_t *nbl_lists  = &pairlistSet(iLocality);
+
+    const real            rlistInner = nbl_lists->params.rlistInner;
 
     GMX_ASSERT(nbl_lists->nbl[0]->ciOuter.size() >= nbl_lists->nbl[0]->ci.size(),
                "Here we should either have an empty ci list or ciOuter should be >= ci");
@@ -77,3 +80,10 @@ void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t   *nbl_lists,
         }
     }
 }
+
+void
+nonbonded_verlet_t::dispatchPruneKernel(const Nbnxm::InteractionLocality  iLocality,
+                                        const rvec                       *shift_vec)
+{
+    pairlistSets_->dispatchPruneKernel(iLocality, nbat, shift_vec, kernelSetup_.kernelType);
+}