Move all nbnxm pruning details into module
authorBerk Hess <hess@kth.se>
Fri, 22 Feb 2019 21:41:01 +0000 (22:41 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Sat, 23 Feb 2019 11:14:44 +0000 (12:14 +0100)
Change-Id: I906624a83992d1da81e5b2aef0f73c296986842c

src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/pairlist_tuning.cpp
src/gromacs/nbnxm/pairlistset.cpp
src/gromacs/nbnxm/prunekerneldispatch.cpp

index f8f7c388aeb2503fd46a93443046828c1fa56214..b00c446a303595cdac2e8b3a5d7fe5576dd69e22 100644 (file)
@@ -416,13 +416,13 @@ static void do_nb_verlet(t_forcerec                       *fr,
         /* When dynamic pair-list  pruning is requested, we need to prune
          * at nstlistPrune steps.
          */
-        if (nbv->pairlistSets().isDynamicPairlistPruningStep(step))
+        if (nbv->pairlistSets().isDynamicPruningStepCpu(step))
         {
             /* Prune the pair-list beyond fr->ic->rlistPrune using
              * the current coordinates of the atoms.
              */
             wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
-            nbv->dispatchPruneKernel(ilocality, fr->shift_vec);
+            nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec);
             wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
         }
 
@@ -767,44 +767,6 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t                  *nbv
     }
 }
 
-/*! \brief
- *  Launch the dynamic rolling pruning GPU task.
- *
- *  We currently alternate local/non-local list pruning in odd-even steps
- *  (only pruning every second step without DD).
- *
- * \param[in]     cr               The communication record
- * \param[in]     nbv              Nonbonded verlet structure
- * \param[in]     inputrec         The input record
- * \param[in]     step             The current MD step
- */
-static inline void launchGpuRollingPruning(const t_commrec          *cr,
-                                           const nonbonded_verlet_t *nbv,
-                                           const t_inputrec         *inputrec,
-                                           const int64_t             step)
-{
-    /* We should not launch the rolling pruning kernel at a search
-     * step or just before search steps, since that's useless.
-     * Without domain decomposition we prune at even steps.
-     * With domain decomposition we alternate local and non-local
-     * pruning at even and odd steps.
-     */
-    int  numRollingParts     = nbv->pairlistSets().params().numRollingParts;
-    GMX_ASSERT(numRollingParts == nbv->pairlistSets().params().nstlistPrune/2,
-               "Since we alternate local/non-local at even/odd steps, "
-               "we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
-    int  stepWithCurrentList = nbv->pairlistSets().numStepsWithPairlist(step);
-    bool stepIsEven          = ((stepWithCurrentList & 1) == 0);
-    if (stepWithCurrentList > 0 &&
-        stepWithCurrentList < inputrec->nstlist - 1 &&
-        (stepIsEven || havePPDomainDecomposition(cr)))
-    {
-        Nbnxm::gpu_launch_kernel_pruneonly(nbv->gpu_nbv,
-                                           stepIsEven ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal,
-                                           numRollingParts);
-    }
-}
-
 static void do_force_cutsVERLET(FILE *fplog,
                                 const t_commrec *cr,
                                 const gmx_multisim_t *ms,
@@ -1430,10 +1392,9 @@ static void do_force_cutsVERLET(FILE *fplog,
         wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, flags);
 
-        /* Is dynamic pair-list pruning activated? */
-        if (nbv->pairlistSets().params().useDynamicPruning)
+        if (nbv->pairlistSets().isDynamicPruningStepGpu(step))
         {
-            launchGpuRollingPruning(cr, nbv, inputrec, step);
+            nbv->dispatchPruneKernelGpu(step);
         }
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         wallcycle_stop(wcycle, ewcLAUNCH_GPU);
index c15c0270c68b105ba30460509ff94a3d3e2969e4..a3fce4a01d573389c1082b2432847763d18e6e5b 100644 (file)
@@ -150,14 +150,17 @@ struct NbnxnListParameters
     /*! \brief Constructor producing a struct with dynamic pruning disabled
      */
     NbnxnListParameters(Nbnxm::KernelType kernelType,
-                        real              rlist);
-
-    PairlistType pairlistType;      //!< The type of cluster-pair list
-    bool         useDynamicPruning; //!< Are we using dynamic pair-list pruning
-    int          nstlistPrune;      //!< Pair-list dynamic pruning interval
-    real         rlistOuter;        //!< Cut-off of the larger, outer pair-list
-    real         rlistInner;        //!< Cut-off of the smaller, inner pair-list
-    int          numRollingParts;   //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+                        real              rlist,
+                        bool              haveMultipleDomains);
+
+    PairlistType pairlistType;           //!< The type of cluster-pair list
+    real         rlistOuter;             //!< Cut-off of the larger, outer pair-list
+    real         rlistInner;             //!< Cut-off of the smaller, inner pair-list
+    bool         haveMultipleDomains;    //!< True when using DD with multiple domains
+    bool         useDynamicPruning;      //!< Are we using dynamic pair-list pruning
+    int          nstlistPrune;           //!< Pair-list dynamic pruning interval
+    int          numRollingPruningParts; //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+    int          lifetime;               //!< Lifetime in steps of the pair-list
 };
 
 /*! \brief Resources that can be used to execute non-bonded kernels on */
@@ -276,13 +279,24 @@ struct nonbonded_verlet_t
                     return step - outerListCreationStep_;
                 }
 
-                //! Returns whether step is a dynamic list pruning step, for CPU lists only
-                bool isDynamicPairlistPruningStep(int64_t step) const
+                //! Returns whether step is a dynamic list pruning step, for CPU lists
+                bool isDynamicPruningStepCpu(int64_t step) const
                 {
                     return (params_.useDynamicPruning &&
                             numStepsWithPairlist(step) % params_.nstlistPrune == 0);
                 }
 
+                //! Returns whether step is a dynamic list pruning step, for GPU lists
+                bool isDynamicPruningStepGpu(int64_t step) const
+                {
+                    const int age = numStepsWithPairlist(step);
+
+                    return (params_.useDynamicPruning &&
+                            age > 0 &&
+                            age < params_.lifetime &&
+                            (params_.haveMultipleDomains || age % 2 == 0));
+                }
+
                 //! Changes the pair-list outer and inner radius
                 void changeRadii(real rlistOuter,
                                  real rlistInner)
@@ -365,9 +379,19 @@ struct nonbonded_verlet_t
             return *pairlistSets_;
         }
 
-        //! Dispatches the dynamic pruning kernel for the given locality
-        void dispatchPruneKernel(Nbnxm::InteractionLocality  iLocality,
-                                 const rvec                 *shift_vec);
+        //! Dispatches the dynamic pruning kernel for the given locality, for CPU lists
+        void dispatchPruneKernelCpu(Nbnxm::InteractionLocality  iLocality,
+                                    const rvec                 *shift_vec);
+
+        //! Dispatches the dynamic pruning kernel for GPU lists
+        void dispatchPruneKernelGpu(int64_t step)
+        {
+            const bool stepIsEven = (pairlistSets().numStepsWithPairlist(step) % 2 == 0);
+
+            Nbnxm::gpu_launch_kernel_pruneonly(gpu_nbv,
+                                               stepIsEven ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal,
+                                               pairlistSets().params().numRollingPruningParts);
+        }
 
         //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
         void dispatchNonbondedKernel(Nbnxm::InteractionLocality  iLocality,
index 7b149544478a8ce30c839683557aef31bcacd9a3..da474ae8ba0b15474d6f1757649e32e2b21db02c 100644 (file)
@@ -383,7 +383,9 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
 
     const bool          haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
 
-    NbnxnListParameters listParams(nbv->kernelSetup().kernelType, ir->rlist);
+    NbnxnListParameters listParams(nbv->kernelSetup().kernelType,
+                                   ir->rlist,
+                                   havePPDomainDecomposition(cr));
 
     setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic,
                                 &listParams);
index cf12e6d3f5ba2adcba7d01eee8f628eddff889fb..26ec9039183fdd4f56085615cf0952c5b0926bd0 100644 (file)
@@ -388,6 +388,8 @@ setDynamicPairlistPruningParameters(const t_inputrec             *ir,
                                     const interaction_const_t    *ic,
                                     NbnxnListParameters          *listParams)
 {
+    listParams->lifetime = ir->nstlist - 1;
+
     /* When nstlistPrune was set by the user, we need to execute one loop
      * iteration to determine rlistInner.
      * Otherwise we compute rlistInner and increase nstlist as long as
@@ -439,7 +441,7 @@ setDynamicPairlistPruningParameters(const t_inputrec             *ir,
          */
         listParams->useDynamicPruning =
             (listParams->rlistInner + rlistInc < 0.99*(listParams->rlistOuter + rlistInc) &&
-             listParams->nstlistPrune < ir->nstlist - 1);
+             listParams->nstlistPrune < listParams->lifetime);
     }
 
     if (!listParams->useDynamicPruning)
@@ -543,11 +545,11 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
                                ( "With dynamic list pruning on GPUs pruning frequency must be at least as large as the rolling pruning interval (" +
                                  std::to_string(c_nbnxnGpuRollingListPruningInterval) +
                                  ").").c_str() );
-            listParams->numRollingParts = listParams->nstlistPrune/c_nbnxnGpuRollingListPruningInterval;
+            listParams->numRollingPruningParts = listParams->nstlistPrune/c_nbnxnGpuRollingListPruningInterval;
         }
         else
         {
-            listParams->numRollingParts = 1;
+            listParams->numRollingPruningParts = 1;
         }
     }
 
@@ -558,7 +560,7 @@ void setupDynamicPairlistPruning(const gmx::MDLogger       &mdlog,
     {
         mesg += gmx::formatString("Using a dual %dx%d pair-list setup updated with dynamic%s pruning:\n",
                                   ls.cluster_size_i, ls.cluster_size_j,
-                                  listParams->numRollingParts > 1 ? ", rolling" : "");
+                                  listParams->numRollingPruningParts > 1 ? ", rolling" : "");
         mesg += formatListSetup("outer", ir->nstlist, ir->nstlist, listParams->rlistOuter, interactionCutoff);
         mesg += formatListSetup("inner", listParams->nstlistPrune, ir->nstlist, listParams->rlistInner, interactionCutoff);
     }
index 11473517dfb0ffa90fc20dd1820eee9afb252330..de04304177a0439f8f4480a2f2171fc9849173d1 100644 (file)
 /*! \cond INTERNAL */
 
 NbnxnListParameters::NbnxnListParameters(const Nbnxm::KernelType kernelType,
-                                         const real              rlist) :
-    useDynamicPruning(false),
-    nstlistPrune(-1),
+                                         const real              rlist,
+                                         const bool              haveMultipleDomains) :
     rlistOuter(rlist),
     rlistInner(rlist),
-    numRollingParts(1)
+    haveMultipleDomains(haveMultipleDomains),
+    useDynamicPruning(false),
+    nstlistPrune(-1),
+    numRollingPruningParts(1),
+    lifetime(-1)
 {
     if (!Nbnxm::kernelTypeUsesSimplePairlist(kernelType))
     {
index 6ae794fbff4c8e2f69256663efd59ff378464d54..1b89ebbf835f0884bad51b35f5918e6008cbd2b8 100644 (file)
@@ -82,8 +82,8 @@ nonbonded_verlet_t::PairlistSets::dispatchPruneKernel(const Nbnxm::InteractionLo
 }
 
 void
-nonbonded_verlet_t::dispatchPruneKernel(const Nbnxm::InteractionLocality  iLocality,
-                                        const rvec                       *shift_vec)
+nonbonded_verlet_t::dispatchPruneKernelCpu(const Nbnxm::InteractionLocality  iLocality,
+                                           const rvec                       *shift_vec)
 {
     pairlistSets_->dispatchPruneKernel(iLocality, nbat, shift_vec, kernelSetup_.kernelType);
 }