Simplify make_pairlist() call signature
[alexxy/gromacs.git] / src / gromacs / mdlib / sim_util.cpp
index c057604c94f4927def8c7c6a4212383fc8b3e0b1..f68d7c841a891da63fbd9046da8587e95d050d7d 100644 (file)
@@ -403,8 +403,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         return;
     }
 
-    nonbonded_verlet_t       *nbv  = fr->nbv;
-    nonbonded_verlet_group_t *nbvg = &nbv->grp[ilocality];
+    nonbonded_verlet_t *nbv  = fr->nbv;
 
     /* GPU kernel launch overhead is already timed separately */
     if (fr->cutoff_scheme != ecutsVERLET)
@@ -412,15 +411,13 @@ static void do_nb_verlet(t_forcerec                       *fr,
         gmx_incons("Invalid cut-off scheme passed!");
     }
 
-    bool bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
-
-    if (!bUsingGpuKernels)
+    if (!nbv->useGpu())
     {
         /* When dynamic pair-list  pruning is requested, we need to prune
          * at nstlistPrune steps.
          */
         if (nbv->listParams->useDynamicPruning &&
-            (step - nbvg->nbl_lists.outerListCreationStep) % nbv->listParams->nstlistPrune == 0)
+            nbnxnIsDynamicPairlistPruningStep(*nbv, ilocality, step))
         {
             /* Prune the pair-list beyond fr->ic->rlistPrune using
              * the current coordinates of the atoms.
@@ -435,7 +432,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
 
     NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
 
-    if (!bUsingGpuKernels)
+    if (!nbv->useGpu())
     {
         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
     }
@@ -555,7 +552,7 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
 
 gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
 {
-    return nbv != nullptr && nbv->bUseGPU;
+    return nbv != nullptr && nbv->useGpu();
 }
 
 static inline void clear_rvecs_omp(int n, rvec v[])
@@ -907,7 +904,7 @@ static inline void launchGpuRollingPruning(const t_commrec          *cr,
      */
     int  numRollingParts     = nbv->listParams->numRollingParts;
     GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
-    int  stepWithCurrentList = step - nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.outerListCreationStep;
+    int  stepWithCurrentList = nbnxnNumStepsWithPairlist(*nbv, Nbnxm::InteractionLocality::Local, step);
     bool stepIsEven          = ((stepWithCurrentList & 1) == 0);
     if (stepWithCurrentList > 0 &&
         stepWithCurrentList < inputrec->nstlist - 1 &&
@@ -961,8 +958,8 @@ static void do_force_cutsVERLET(FILE *fplog,
     bFillGrid     = (bNS && bStateChanged);
     bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
     bDoForces     = ((flags & GMX_FORCE_FORCES) != 0);
-    bUseGPU       = fr->nbv->bUseGPU;
-    bUseOrEmulGPU = bUseGPU || (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes);
+    bUseGPU       = fr->nbv->useGpu();
+    bUseOrEmulGPU = bUseGPU || fr->nbv->emulateGpu();
 
     const auto pmeRunMode = fr->pmedata ? pme_run_mode(fr->pmedata) : PmeRunMode::CPU;
     // TODO slim this conditional down - inputrec and duty checks should mean the same in proper code!
@@ -1079,22 +1076,18 @@ static void do_force_cutsVERLET(FILE *fplog,
         if (!DOMAINDECOMP(cr))
         {
             wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
-            nbnxn_put_on_grid(nbv->nbs.get(), fr->ePBC, box,
+            nbnxn_put_on_grid(nbv, box,
                               0, vzero, box_diag,
                               nullptr, 0, mdatoms->homenr, -1,
                               fr->cginfo, x.unpaddedArrayRef(),
-                              0, nullptr,
-                              nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
-                              nbv->nbat);
+                              0, nullptr);
             wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
         }
         else
         {
             wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
-            nbnxn_put_on_grid_nonlocal(nbv->nbs.get(), domdec_zones(cr->dd),
-                                       fr->cginfo, x.unpaddedArrayRef(),
-                                       nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
-                                       nbv->nbat);
+            nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd),
+                                       fr->cginfo, x.unpaddedArrayRef());
             wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
         }
 
@@ -1142,32 +1135,12 @@ static void do_force_cutsVERLET(FILE *fplog,
     /* do local pair search */
     if (bNS)
     {
-        nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists;
-
         wallcycle_start_nocount(wcycle, ewcNS);
         wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
-        nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
-                            &top->excls,
-                            nbv->listParams->rlistOuter,
-                            nbv->min_ci_balanced,
-                            &pairlistSet,
-                            Nbnxm::InteractionLocality::Local,
-                            nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
-                            nrnb);
-        pairlistSet.outerListCreationStep = step;
-        if (nbv->listParams->useDynamicPruning && !bUseGPU)
-        {
-            nbnxnPrepareListForDynamicPruning(&pairlistSet);
-        }
+        /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+        nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
+                            &top->excls, step, nrnb);
         wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
-
-        if (bUseGPU)
-        {
-            /* initialize local pair-list on the GPU */
-            Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
-                                     pairlistSet.nblGpu[0],
-                                     Nbnxm::InteractionLocality::Local);
-        }
         wallcycle_stop(wcycle, ewcNS);
     }
     else
@@ -1217,35 +1190,14 @@ static void do_force_cutsVERLET(FILE *fplog,
        do non-local pair search */
     if (havePPDomainDecomposition(cr))
     {
-        nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists;
-
         if (bNS)
         {
             wallcycle_start_nocount(wcycle, ewcNS);
             wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
-            nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
-                                &top->excls,
-                                nbv->listParams->rlistOuter,
-                                nbv->min_ci_balanced,
-                                &pairlistSet,
-                                Nbnxm::InteractionLocality::NonLocal,
-                                nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
-                                nrnb);
-            pairlistSet.outerListCreationStep = step;
-            if (nbv->listParams->useDynamicPruning && !bUseGPU)
-            {
-                nbnxnPrepareListForDynamicPruning(&pairlistSet);
-            }
+            /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+            nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
+                                &top->excls, step, nrnb);
             wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
-            if (nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type == nbnxnk8x8x8_GPU)
-            {
-                /* initialize non-local pair-list on the GPU */
-                Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
-                                         pairlistSet.nblGpu[0],
-                                         Nbnxm::InteractionLocality::NonLocal);
-            }
             wallcycle_stop(wcycle, ewcNS);
         }
         else
@@ -1410,18 +1362,18 @@ static void do_force_cutsVERLET(FILE *fplog,
         /* Calculate the local and non-local free energy interactions here.
          * Happens here on the CPU both with and without GPU.
          */
-        if (fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.nbl_fep[0]->nrj > 0)
+        if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
         {
-            do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists,
+            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
                              fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                              inputrec->fepvals, lambda,
                              enerd, flags, nrnb, wcycle);
         }
 
         if (DOMAINDECOMP(cr) &&
-            fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+            fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
         {
-            do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists,
+            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
                              fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                              inputrec->fepvals, lambda,
                              enerd, flags, nrnb, wcycle);
@@ -1451,7 +1403,7 @@ static void do_force_cutsVERLET(FILE *fplog,
 
         /* if there are multiple fshift output buffers reduce them */
         if ((flags & GMX_FORCE_VIRIAL) &&
-            nbv->grp[iloc].nbl_lists.nnbl > 1)
+            nbv->pairlistSets[iloc].nnbl > 1)
         {
             /* This is not in a subcounter because it takes a
                negligible and constant-sized amount of time */
@@ -1506,7 +1458,7 @@ static void do_force_cutsVERLET(FILE *fplog,
             }
 
             /* skip the reduction if there was no non-local work to do */
-            if (!nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nblGpu[0]->sci.empty())
+            if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
             {
                 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
                                                nbv->nbat, f, wcycle);
@@ -1576,7 +1528,7 @@ static void do_force_cutsVERLET(FILE *fplog,
         }
     }
 
-    if (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes)
+    if (fr->nbv->emulateGpu())
     {
         // NOTE: emulation kernel is not included in the balancing region,
         // but emulation mode does not target performance anyway