Simplify make_pairlist() call signature
authorBerk Hess <hess@kth.se>
Fri, 11 Jan 2019 15:34:13 +0000 (16:34 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 21 Feb 2019 05:36:24 +0000 (06:36 +0100)
The nonbonded_verlet_group_t struct is no longer needed now the local
and non-local interactions always use the same kernel types.
This simplifies the make_pairlist() and put_on_grid() call signature.
Removed direct access to kernel and ewald exclusion types from outside
the nbnxn module.
Merged the pruning setup and the transfer of the pairlist to the GPU
into nbnxn_make_pairlist().
Also removed the ePBC argument from nbnxn_put_on_grid().

Change-Id: Id96d7e5aa6ce846e9d38614f7edcdced89687799

19 files changed:
src/gromacs/domdec/partition.cpp
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/forcerec.h
src/gromacs/mdlib/perf_est.cpp
src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
src/gromacs/nbnxm/grid.cpp
src/gromacs/nbnxm/internal.h
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_geometry.cpp
src/gromacs/nbnxm/nbnxm_geometry.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
src/gromacs/nbnxm/pairlist.cpp
src/gromacs/nbnxm/pairlist_tuning.cpp
src/gromacs/nbnxm/pairlistset.cpp [new file with mode: 0644]
src/gromacs/nbnxm/pairlistset.h
src/gromacs/nbnxm/prunekerneldispatch.cpp

index d2764efd62f3497826aa2de90bd72f79279f44c7..02b0d214f5bb10885bb4608692e58af482475066 100644 (file)
@@ -3415,7 +3415,7 @@ void dd_partition_system(FILE                    *fplog,
             case ecutsVERLET:
                 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
 
-                nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
+                nbnxn_put_on_grid(fr->nbv, state_local->box,
                                   0,
                                   comm->zones.size[0].bb_x0,
                                   comm->zones.size[0].bb_x1,
@@ -3424,9 +3424,7 @@ void dd_partition_system(FILE                    *fplog,
                                   comm->zones.dens_zone0,
                                   fr->cginfo,
                                   state_local->x,
-                                  ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
-                                  fr->nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
-                                  fr->nbv->nbat);
+                                  ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr);
 
                 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
                 break;
index 1dda3a2c67708d42dabe9a49e43752d421fa43d5..d4927425b35f17c6a4d548dcd3b2c3d723dda27e 100644 (file)
@@ -82,6 +82,7 @@
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 #include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/tables/forcetable.h"
@@ -1595,12 +1596,10 @@ static void initVdwEwaldParameters(FILE *fp, const t_inputrec *ir,
     }
 }
 
-gmx_bool uses_simple_tables(int                 cutoff_scheme,
-                            nonbonded_verlet_t *nbv,
-                            int                 group)
+gmx_bool uses_simple_tables(int                       cutoff_scheme,
+                            const nonbonded_verlet_t *nbv)
 {
     gmx_bool bUsesSimpleTables = TRUE;
-    int      grp_index;
 
     switch (cutoff_scheme)
     {
@@ -1608,9 +1607,8 @@ gmx_bool uses_simple_tables(int                 cutoff_scheme,
             bUsesSimpleTables = TRUE;
             break;
         case ecutsVERLET:
-            assert(nullptr != nbv);
-            grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
-            bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
+            GMX_RELEASE_ASSERT(nullptr != nbv, "A non-bonded verlet object is required with the Verlet cutoff-scheme");
+            bUsesSimpleTables = nbv->pairlistIsSimple();
             break;
         default:
             gmx_incons("unimplemented");
@@ -2677,7 +2675,7 @@ void init_forcerec(FILE                             *fp,
 void free_gpu_resources(t_forcerec                          *fr,
                         const gmx::PhysicalNodeCommunicator &physicalNodeCommunicator)
 {
-    bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->bUseGPU;
+    bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->useGpu();
 
     /* stop the GPU profiler (only CUDA) */
     stopGpuProfiler();
index b5a60c933b971ce978d8440a7b46d80b1709228a..fb16ca765a4acfe8faf4da8cff167f48659f9fe7 100644 (file)
@@ -153,9 +153,8 @@ void forcerec_set_excl_load(t_forcerec           *fr,
  */
 void update_forcerec(t_forcerec *fr, matrix box);
 
-gmx_bool uses_simple_tables(int                 cutoff_scheme,
-                            nonbonded_verlet_t *nbv,
-                            int                 group);
+gmx_bool uses_simple_tables(int                       cutoff_scheme,
+                            const nonbonded_verlet_t *nbv);
 /* Returns whether simple tables (i.e. not for use with GPUs) are used
  * with the type of kernel indicated.
  */
index 961eb50b343afda1e46ae81aeb305fe81d3392c4..8c5bc33de8cbe3fbab3e2dd99b2e5498fb414162 100644 (file)
@@ -46,7 +46,7 @@
 #include "gromacs/mdtypes/commrec.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/topology/ifunc.h"
 #include "gromacs/topology/topology.h"
index c057604c94f4927def8c7c6a4212383fc8b3e0b1..f68d7c841a891da63fbd9046da8587e95d050d7d 100644 (file)
@@ -403,8 +403,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         return;
     }
 
-    nonbonded_verlet_t       *nbv  = fr->nbv;
-    nonbonded_verlet_group_t *nbvg = &nbv->grp[ilocality];
+    nonbonded_verlet_t *nbv  = fr->nbv;
 
     /* GPU kernel launch overhead is already timed separately */
     if (fr->cutoff_scheme != ecutsVERLET)
@@ -412,15 +411,13 @@ static void do_nb_verlet(t_forcerec                       *fr,
         gmx_incons("Invalid cut-off scheme passed!");
     }
 
-    bool bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
-
-    if (!bUsingGpuKernels)
+    if (!nbv->useGpu())
     {
         /* When dynamic pair-list  pruning is requested, we need to prune
          * at nstlistPrune steps.
          */
         if (nbv->listParams->useDynamicPruning &&
-            (step - nbvg->nbl_lists.outerListCreationStep) % nbv->listParams->nstlistPrune == 0)
+            nbnxnIsDynamicPairlistPruningStep(*nbv, ilocality, step))
         {
             /* Prune the pair-list beyond fr->ic->rlistPrune using
              * the current coordinates of the atoms.
@@ -435,7 +432,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
 
     NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
 
-    if (!bUsingGpuKernels)
+    if (!nbv->useGpu())
     {
         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
     }
@@ -555,7 +552,7 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
 
 gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
 {
-    return nbv != nullptr && nbv->bUseGPU;
+    return nbv != nullptr && nbv->useGpu();
 }
 
 static inline void clear_rvecs_omp(int n, rvec v[])
@@ -907,7 +904,7 @@ static inline void launchGpuRollingPruning(const t_commrec          *cr,
      */
     int  numRollingParts     = nbv->listParams->numRollingParts;
     GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
-    int  stepWithCurrentList = step - nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.outerListCreationStep;
+    int  stepWithCurrentList = nbnxnNumStepsWithPairlist(*nbv, Nbnxm::InteractionLocality::Local, step);
     bool stepIsEven          = ((stepWithCurrentList & 1) == 0);
     if (stepWithCurrentList > 0 &&
         stepWithCurrentList < inputrec->nstlist - 1 &&
@@ -961,8 +958,8 @@ static void do_force_cutsVERLET(FILE *fplog,
     bFillGrid     = (bNS && bStateChanged);
     bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
     bDoForces     = ((flags & GMX_FORCE_FORCES) != 0);
-    bUseGPU       = fr->nbv->bUseGPU;
-    bUseOrEmulGPU = bUseGPU || (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes);
+    bUseGPU       = fr->nbv->useGpu();
+    bUseOrEmulGPU = bUseGPU || fr->nbv->emulateGpu();
 
     const auto pmeRunMode = fr->pmedata ? pme_run_mode(fr->pmedata) : PmeRunMode::CPU;
     // TODO slim this conditional down - inputrec and duty checks should mean the same in proper code!
@@ -1079,22 +1076,18 @@ static void do_force_cutsVERLET(FILE *fplog,
         if (!DOMAINDECOMP(cr))
         {
             wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
-            nbnxn_put_on_grid(nbv->nbs.get(), fr->ePBC, box,
+            nbnxn_put_on_grid(nbv, box,
                               0, vzero, box_diag,
                               nullptr, 0, mdatoms->homenr, -1,
                               fr->cginfo, x.unpaddedArrayRef(),
-                              0, nullptr,
-                              nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
-                              nbv->nbat);
+                              0, nullptr);
             wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
         }
         else
         {
             wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
-            nbnxn_put_on_grid_nonlocal(nbv->nbs.get(), domdec_zones(cr->dd),
-                                       fr->cginfo, x.unpaddedArrayRef(),
-                                       nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
-                                       nbv->nbat);
+            nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd),
+                                       fr->cginfo, x.unpaddedArrayRef());
             wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
         }
 
@@ -1142,32 +1135,12 @@ static void do_force_cutsVERLET(FILE *fplog,
     /* do local pair search */
     if (bNS)
     {
-        nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists;
-
         wallcycle_start_nocount(wcycle, ewcNS);
         wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
-        nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
-                            &top->excls,
-                            nbv->listParams->rlistOuter,
-                            nbv->min_ci_balanced,
-                            &pairlistSet,
-                            Nbnxm::InteractionLocality::Local,
-                            nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
-                            nrnb);
-        pairlistSet.outerListCreationStep = step;
-        if (nbv->listParams->useDynamicPruning && !bUseGPU)
-        {
-            nbnxnPrepareListForDynamicPruning(&pairlistSet);
-        }
+        /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+        nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
+                            &top->excls, step, nrnb);
         wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
-
-        if (bUseGPU)
-        {
-            /* initialize local pair-list on the GPU */
-            Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
-                                     pairlistSet.nblGpu[0],
-                                     Nbnxm::InteractionLocality::Local);
-        }
         wallcycle_stop(wcycle, ewcNS);
     }
     else
@@ -1217,35 +1190,14 @@ static void do_force_cutsVERLET(FILE *fplog,
        do non-local pair search */
     if (havePPDomainDecomposition(cr))
     {
-        nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists;
-
         if (bNS)
         {
             wallcycle_start_nocount(wcycle, ewcNS);
             wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
-            nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
-                                &top->excls,
-                                nbv->listParams->rlistOuter,
-                                nbv->min_ci_balanced,
-                                &pairlistSet,
-                                Nbnxm::InteractionLocality::NonLocal,
-                                nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
-                                nrnb);
-            pairlistSet.outerListCreationStep = step;
-            if (nbv->listParams->useDynamicPruning && !bUseGPU)
-            {
-                nbnxnPrepareListForDynamicPruning(&pairlistSet);
-            }
+            /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+            nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
+                                &top->excls, step, nrnb);
             wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
-            if (nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type == nbnxnk8x8x8_GPU)
-            {
-                /* initialize non-local pair-list on the GPU */
-                Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
-                                         pairlistSet.nblGpu[0],
-                                         Nbnxm::InteractionLocality::NonLocal);
-            }
             wallcycle_stop(wcycle, ewcNS);
         }
         else
@@ -1410,18 +1362,18 @@ static void do_force_cutsVERLET(FILE *fplog,
         /* Calculate the local and non-local free energy interactions here.
          * Happens here on the CPU both with and without GPU.
          */
-        if (fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.nbl_fep[0]->nrj > 0)
+        if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
         {
-            do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists,
+            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
                              fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                              inputrec->fepvals, lambda,
                              enerd, flags, nrnb, wcycle);
         }
 
         if (DOMAINDECOMP(cr) &&
-            fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+            fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
         {
-            do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists,
+            do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
                              fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
                              inputrec->fepvals, lambda,
                              enerd, flags, nrnb, wcycle);
@@ -1451,7 +1403,7 @@ static void do_force_cutsVERLET(FILE *fplog,
 
         /* if there are multiple fshift output buffers reduce them */
         if ((flags & GMX_FORCE_VIRIAL) &&
-            nbv->grp[iloc].nbl_lists.nnbl > 1)
+            nbv->pairlistSets[iloc].nnbl > 1)
         {
             /* This is not in a subcounter because it takes a
                negligible and constant-sized amount of time */
@@ -1506,7 +1458,7 @@ static void do_force_cutsVERLET(FILE *fplog,
             }
 
             /* skip the reduction if there was no non-local work to do */
-            if (!nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nblGpu[0]->sci.empty())
+            if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
             {
                 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
                                                nbv->nbat, f, wcycle);
@@ -1576,7 +1528,7 @@ static void do_force_cutsVERLET(FILE *fplog,
         }
     }
 
-    if (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes)
+    if (fr->nbv->emulateGpu())
     {
         // NOTE: emulation kernel is not included in the balancing region,
         // but emulation mode does not target performance anyway
index f75b1c080f45c6a8335a300a4d5cdd0137ffdc33..c711136b470a9577da0c7e8ee460dc73ec399f51 100644 (file)
@@ -341,7 +341,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
                                   const interaction_const_t   *ic,
                                   const NbnxnListParameters   *listParams)
 {
-    if (!nbv || nbv->grp[InteractionLocality::Local].kernel_type != nbnxnk8x8x8_GPU)
+    if (!nbv || !nbv->useGpu())
     {
         return;
     }
index 7128d74bf93ebeaccabc02eb4b27bcee7acd034f..f4e514824852e82d2cc62fed740925c625f12e40 100644 (file)
@@ -1407,8 +1407,7 @@ calc_cell_indices(nbnxn_search                   *nbs,
  * This function only operates on one domain of the domain decompostion.
  * Note that without domain decomposition there is only one domain.
  */
-void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
-                       int                             ePBC,
+void nbnxn_put_on_grid(nonbonded_verlet_t             *nbv,
                        const matrix                    box,
                        int                             ddZone,
                        const rvec                      lowerCorner,
@@ -1420,18 +1419,17 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
                        const int                      *atinfo,
                        gmx::ArrayRef<const gmx::RVec>  x,
                        int                             numAtomsMoved,
-                       const int                      *move,
-                       int                             nb_kernel_type,
-                       nbnxn_atomdata_t               *nbat)
+                       const int                      *move)
 {
+    nbnxn_search *nbs  = nbv->nbs.get();
     nbnxn_grid_t *grid = &nbs->grid[ddZone];
 
     nbs_cycle_start(&nbs->cc[enbsCCgrid]);
 
-    grid->bSimple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
+    grid->bSimple = nbv->pairlistIsSimple();
 
-    grid->na_c      = nbnxn_kernel_to_cluster_i_size(nb_kernel_type);
-    grid->na_cj     = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+    grid->na_c      = nbnxn_kernel_to_cluster_i_size(nbv->kernelType_);
+    grid->na_cj     = nbnxn_kernel_to_cluster_j_size(nbv->kernelType_);
     grid->na_sc     = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
     grid->na_c_2log = get_2log(grid->na_c);
 
@@ -1450,7 +1448,6 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
 
     if (ddZone == 0)
     {
-        nbs->ePBC = ePBC;
         copy_mat(box, nbs->box);
 
         /* Avoid zero density */
@@ -1498,6 +1495,8 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
                      lowerCorner, upperCorner,
                      nbs->grid[0].atom_density);
 
+    nbnxn_atomdata_t *nbat = nbv->nbat;
+
     calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
 
     if (ddZone == 0)
@@ -1514,12 +1513,10 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
 }
 
 /* Calls nbnxn_put_on_grid for all non-local domains */
-void nbnxn_put_on_grid_nonlocal(nbnxn_search_t                   nbs,
+void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t              *nbv,
                                 const struct gmx_domdec_zones_t *zones,
                                 const int                       *atinfo,
-                                gmx::ArrayRef<const gmx::RVec>   x,
-                                int                              nb_kernel_type,
-                                nbnxn_atomdata_t                *nbat)
+                                gmx::ArrayRef<const gmx::RVec>   x)
 {
     for (int zone = 1; zone < zones->n; zone++)
     {
@@ -1530,7 +1527,7 @@ void nbnxn_put_on_grid_nonlocal(nbnxn_search_t                   nbs,
             c1[d] = zones->size[zone].bb_x1[d];
         }
 
-        nbnxn_put_on_grid(nbs, nbs->ePBC, nullptr,
+        nbnxn_put_on_grid(nbv, nullptr,
                           zone, c0, c1,
                           nullptr,
                           zones->cg_range[zone],
@@ -1538,9 +1535,7 @@ void nbnxn_put_on_grid_nonlocal(nbnxn_search_t                   nbs,
                           -1,
                           atinfo,
                           x,
-                          0, nullptr,
-                          nb_kernel_type,
-                          nbat);
+                          0, nullptr);
     }
 }
 
index 55066e8059e110e65f86756f135296a943a7620b..68e07759b453f08087b585a424114815368be330 100644 (file)
@@ -139,13 +139,15 @@ struct nbnxn_search
 {
     /* \brief Constructor
      *
+     * \param[in] ePBC         The periodic boundary conditions
      * \param[in] n_dd_cells   The number of domain decomposition cells per dimension, without DD nullptr should be passed
      * \param[in] zones        The domain decomposition zone setup, without DD nullptr should be passed
      * \param[in] bFEP         Tells whether non-bonded interactions are perturbed
      * \param[in] nthread_max  The maximum number of threads used in the search
      */
 
-    nbnxn_search(const ivec               *n_dd_cells,
+    nbnxn_search(int                       ePBC,
+                 const ivec               *n_dd_cells,
                  const gmx_domdec_zones_t *zones,
                  gmx_bool                  bFEP,
                  int                       nthread_max);
index 14402aca91c080e2f080d9c2db47e39b9e3d3359..a1ad25208ab981762c6ed29b9b2b79c8164c44fa 100644 (file)
@@ -127,7 +127,9 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
  * Energy reduction, but not force and shift force reduction, is performed
  * within this function.
  *
- * \param[in]     nbvg          The group (local/non-local) to compute interaction for
+ * \param[in]     pairlistSet   Pairlists with local or non-local interactions to compute
+ * \param[in]     kernel_type   The non-bonded kernel type
+ * \param[in]     ewald_excl    The Ewald exclusion treatment
  * \param[in,out] nbat          The atomdata for the interactions
  * \param[in]     ic            Non-bonded interaction constants
  * \param[in]     shiftVectors  The PBC shift vectors
@@ -138,7 +140,9 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
  * \param[out]    vVdw          Output buffer for Van der Waals energies
  */
 static void
-nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
+nbnxn_kernel_cpu(const nbnxn_pairlist_set_t     &pairlistSet,
+                 const int                       kernel_type,
+                 const int                       ewald_excl,
                  nbnxn_atomdata_t               *nbat,
                  const interaction_const_t      &ic,
                  rvec                           *shiftVectors,
@@ -156,7 +160,7 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
     }
     else
     {
-        if (nbvg->ewald_excl == ewaldexclTable)
+        if (ewald_excl == ewaldexclTable)
         {
             if (ic.rcoulomb == ic.rvdw)
             {
@@ -218,7 +222,7 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
         {
             vdwkt = vdwktLJEWALDCOMBLB;
             /* At setup we (should have) selected the C reference kernel */
-            GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+            GMX_RELEASE_ASSERT(kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
         }
     }
     else
@@ -226,8 +230,8 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
         GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
     }
 
-    int                        nnbl = nbvg->nbl_lists.nnbl;
-    NbnxnPairlistCpu * const * nbl  = nbvg->nbl_lists.nbl;
+    int                        nnbl = pairlistSet.nnbl;
+    NbnxnPairlistCpu * const * nbl  = pairlistSet.nbl;
 
     int gmx_unused             nthreads = gmx_omp_nthreads_get(emntNonbonded);
 #pragma omp parallel for schedule(static) num_threads(nthreads)
@@ -260,7 +264,7 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
         if (!(forceFlags & GMX_FORCE_ENERGY))
         {
             /* Don't calculate energies */
-            switch (nbvg->kernel_type)
+            switch (kernel_type)
             {
                 case nbnxnk4x4_PlainC:
                     nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
@@ -297,7 +301,7 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
             out->Vvdw[0] = 0;
             out->Vc[0]   = 0;
 
-            switch (nbvg->kernel_type)
+            switch (kernel_type)
             {
                 case nbnxnk4x4_PlainC:
                     nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
@@ -341,7 +345,7 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
 
             int unrollj = 0;
 
-            switch (nbvg->kernel_type)
+            switch (kernel_type)
             {
                 case nbnxnk4x4_PlainC:
                     unrollj = c_nbnxnCpuIClusterSize;
@@ -381,7 +385,7 @@ nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
                     GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
             }
 
-            if (nbvg->kernel_type != nbnxnk4x4_PlainC)
+            if (kernel_type != nbnxnk4x4_PlainC)
             {
                 switch (unrollj)
                 {
@@ -419,15 +423,15 @@ static void accountFlops(t_nrnb                           *nrnb,
                          const interaction_const_t        &ic,
                          const int                         forceFlags)
 {
-    const nonbonded_verlet_group_t &nbvg            = nbv.grp[iLocality];
-    const bool                      usingGpuKernels = (nbvg.kernel_type == nbnxnk8x8x8_GPU);
+    const nbnxn_pairlist_set_t &pairlistSet     = nbv.pairlistSets[iLocality];
+    const bool                  usingGpuKernels = nbv.useGpu();
 
     int enr_nbnxn_kernel_ljc;
     if (EEL_RF(ic.eeltype) || ic.eeltype == eelCUT)
     {
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
     }
-    else if ((!usingGpuKernels && nbvg.ewald_excl == ewaldexclAnalytical) ||
+    else if ((!usingGpuKernels && nbv.ewaldExclusionType_ == ewaldexclAnalytical) ||
              (usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
     {
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
@@ -445,31 +449,31 @@ static void accountFlops(t_nrnb                           *nrnb,
     }
 
     inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
-             nbvg.nbl_lists.natpair_ljq);
+             pairlistSet.natpair_ljq);
     inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
-             nbvg.nbl_lists.natpair_lj);
+             pairlistSet.natpair_lj);
     /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
     inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
-             nbvg.nbl_lists.natpair_q);
+             pairlistSet.natpair_q);
 
     const bool calcEnergy = ((forceFlags & GMX_FORCE_ENERGY) != 0);
     if (ic.vdw_modifier == eintmodFORCESWITCH)
     {
         /* We add up the switch cost separately */
         inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (calcEnergy ? 1 : 0),
-                 nbvg.nbl_lists.natpair_ljq + nbvg.nbl_lists.natpair_lj);
+                 pairlistSet.natpair_ljq + pairlistSet.natpair_lj);
     }
     if (ic.vdw_modifier == eintmodPOTSWITCH)
     {
         /* We add up the switch cost separately */
         inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (calcEnergy ? 1 : 0),
-                 nbvg.nbl_lists.natpair_ljq + nbvg.nbl_lists.natpair_lj);
+                 pairlistSet.natpair_ljq + pairlistSet.natpair_lj);
     }
     if (ic.vdwtype == evdwPME)
     {
         /* We add up the LJ Ewald cost separately */
         inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (calcEnergy ? 1 : 0),
-                 nbvg.nbl_lists.natpair_ljq + nbvg.nbl_lists.natpair_lj);
+                 pairlistSet.natpair_ljq + pairlistSet.natpair_lj);
     }
 }
 
@@ -482,14 +486,16 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
                          gmx_enerdata_t            *enerd,
                          t_nrnb                    *nrnb)
 {
-    const nonbonded_verlet_group_t &nbvg = nbv->grp[iLocality];
+    const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSets[iLocality];
 
-    switch (nbvg.kernel_type)
+    switch (nbv->kernelType_)
     {
         case nbnxnk4x4_PlainC:
         case nbnxnk4xN_SIMD_4xN:
         case nbnxnk4xN_SIMD_2xNN:
-            nbnxn_kernel_cpu(&nbvg,
+            nbnxn_kernel_cpu(pairlistSet,
+                             nbv->kernelType_,
+                             nbv->ewaldExclusionType_,
                              nbv->nbat,
                              ic,
                              fr->shift_vec,
@@ -507,7 +513,7 @@ void NbnxnDispatchKernel(nonbonded_verlet_t        *nbv,
             break;
 
         case nbnxnk8x8x8_PlainC:
-            nbnxn_kernel_gpu_ref(nbvg.nbl_lists.nblGpu[0],
+            nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
                                  nbv->nbat, &ic,
                                  fr->shift_vec,
                                  forceFlags,
index 9e0138e162a22273c50dfd2b916e79a715e69abd..908c6bdc68a4396fc158ad7e35303787bee073b7 100644 (file)
 
 #include "gromacs/math/vectypes.h"
 #include "gromacs/nbnxm/pairlist.h"
-#include "gromacs/nbnxm/pairlistset.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/real.h"
@@ -118,6 +117,8 @@ struct gmx_enerdata_t;
 struct gmx_hw_info_t;
 struct gmx_mtop_t;
 struct interaction_const_t;
+struct nbnxn_pairlist_set_t;
+struct t_blocka;
 struct t_commrec;
 struct t_nrnb;
 struct t_forcerec;
@@ -173,31 +174,43 @@ enum {
     enbvClearFNo, enbvClearFYes
 };
 
-/*! \libinternal
- *  \brief Non-bonded interaction group data structure. */
-typedef struct nonbonded_verlet_group_t {
-    nbnxn_pairlist_set_t  nbl_lists;   /**< pair list(s)                       */
-    int                   kernel_type; /**< non-bonded kernel - see enum above */
-    int                   ewald_excl;  /**< Ewald exclusion - see enum above   */
-} nonbonded_verlet_group_t;
-
 /*! \libinternal
  *  \brief Top-level non-bonded data structure for the Verlet-type cut-off scheme. */
 struct nonbonded_verlet_t
 {
+    //! Returns whether a GPU is used for the non-bonded calculations
+    bool useGpu() const
+    {
+        return kernelType_ == nbnxnk8x8x8_GPU;
+    }
+
+    //! Returns whether a GPU is emulated for the non-bonded calculations
+    bool emulateGpu() const
+    {
+        return kernelType_ == nbnxnk8x8x8_PlainC;
+    }
+
+    //! Return whether the pairlist is of simple, CPU type
+    bool pairlistIsSimple() const
+    {
+        return !useGpu() && !emulateGpu();
+    }
+
     std::unique_ptr<NbnxnListParameters>                                        listParams; /**< Parameters for the search and list pruning setup */
     std::unique_ptr<nbnxn_search>                                               nbs;        /**< n vs n atom pair searching data       */
     int                                                                         ngrp;       /**< number of interaction groups          */
-    //! Local and non-local interaction group
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, nonbonded_verlet_group_t> grp;
+    //! Local and non-local pairlist sets
+    gmx::EnumerationArray<Nbnxm::InteractionLocality, nbnxn_pairlist_set_t>     pairlistSets;
     //! Atom data
     nbnxn_atomdata_t                                                           *nbat;
 
-    gmx_bool                                                                    bUseGPU;         /**< TRUE when non-bonded interactions are computed on a physical GPU */
-    EmulateGpuNonbonded                                                         emulateGpu;      /**< true when non-bonded interactions are computed on the CPU using GPU-style pair lists */
-    gmx_nbnxn_gpu_t                                                            *gpu_nbv;         /**< pointer to GPU nb verlet data     */
-    int                                                                         min_ci_balanced; /**< pair list balancing parameter
-                                                                                                      used for the 8x8x8 GPU kernels    */
+    //! Non-bonded kernel - see enum above
+    int                  kernelType_;
+    //! Ewald exclusion - see enum above
+    int                  ewaldExclusionType_;
+
+    gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
+    int                  min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels    */
 };
 
 namespace Nbnxm
@@ -227,8 +240,7 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
  * When move[i] < 0 particle i has migrated and will not be put on the grid.
  * Without domain decomposition move will be NULL.
  */
-void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
-                       int                             ePBC,
+void nbnxn_put_on_grid(nonbonded_verlet_t             *nb_verlet,
                        const matrix                    box,
                        int                             ddZone,
                        const rvec                      lowerCorner,
@@ -240,21 +252,17 @@ void nbnxn_put_on_grid(nbnxn_search_t                  nbs,
                        const int                      *atinfo,
                        gmx::ArrayRef<const gmx::RVec>  x,
                        int                             numAtomsMoved,
-                       const int                      *move,
-                       int                             nb_kernel_type,
-                       nbnxn_atomdata_t               *nbat);
+                       const int                      *move);
 
 /*! \brief As nbnxn_put_on_grid, but for the non-local atoms
  *
  * with domain decomposition. Should be called after calling
  * nbnxn_search_put_on_grid for the local atoms / home zone.
  */
-void nbnxn_put_on_grid_nonlocal(nbnxn_search_t                   nbs,
+void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t              *nb_verlet,
                                 const struct gmx_domdec_zones_t *zones,
                                 const int                       *atinfo,
-                                gmx::ArrayRef<const gmx::RVec>   x,
-                                int                              nb_kernel_type,
-                                nbnxn_atomdata_t                *nbat);
+                                gmx::ArrayRef<const gmx::RVec>   x);
 
 /*! \brief Returns the number of x and y cells in the local grid */
 void nbnxn_get_ncells(nbnxn_search_t nbs, int *ncx, int *ncy);
@@ -268,6 +276,26 @@ void nbnxn_set_atomorder(nbnxn_search_t nbs);
 /*! \brief Returns the index position of the atoms on the pairlist search grid */
 gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
 
+/*! \brief Generates a pair-list for the given locality.
+ *
+ * With perturbed particles, also a group scheme style nbl_fep list is made.
+ */
+void nbnxn_make_pairlist(nonbonded_verlet_t         *nbv,
+                         Nbnxm::InteractionLocality  iLocality,
+                         const t_blocka             *excl,
+                         int64_t                     step,
+                         t_nrnb                     *nrnb);
+
+/*! \brief Returns the number of steps performed with the current pair list */
+int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t   &nbv,
+                              Nbnxm::InteractionLocality  ilocality,
+                              int64_t                     step);
+
+/*! \brief Returns whether step is a dynamic list pruning step */
+bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t   &nbv,
+                                       Nbnxm::InteractionLocality  ilocality,
+                                       int64_t                     step);
+
 /*! \brief Prune all pair-lists with given locality (currently CPU only)
  *
  * For all pair-lists with given locality, takes the outer list and prunes out
index 7b1c5b16cd9f9b6514288d9678d5d4c1642ddab4..4c55452e9caf49c95e83aaab8048d8b68407c6cd 100644 (file)
 #include "gromacs/nbnxm/pairlist.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/real.h"
+
+bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
+{
+    if (nb_kernel_type == nbnxnkNotSet)
+    {
+        gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
+    }
+
+    switch (nb_kernel_type)
+    {
+        case nbnxnk8x8x8_GPU:
+        case nbnxnk8x8x8_PlainC:
+            return false;
+
+        case nbnxnk4x4_PlainC:
+        case nbnxnk4xN_SIMD_4xN:
+        case nbnxnk4xN_SIMD_2xNN:
+            return true;
+
+        default:
+            gmx_incons("Invalid nonbonded kernel type passed!");
+            return false;
+    }
+}
 
 int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type)
 {
@@ -92,3 +117,32 @@ int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type)
 
     return cj_size;
 }
+
+/* Clusters at the cut-off only increase rlist by 60% of their size */
+static constexpr real c_nbnxnRlistIncreaseOutsideFactor = 0.6;
+
+real nbnxn_get_rlist_effective_inc(const int  jClusterSize,
+                                   const real atomDensity)
+{
+    /* We should get this from the setup, but currently it's the same for
+     * all setups, including GPUs.
+     */
+    const real iClusterSize    = c_nbnxnCpuIClusterSize;
+
+    const real iVolumeIncrease = (iClusterSize - 1)/atomDensity;
+    const real jVolumeIncrease = (jClusterSize - 1)/atomDensity;
+
+    return c_nbnxnRlistIncreaseOutsideFactor*std::cbrt(iVolumeIncrease +
+                                                       jVolumeIncrease);
+}
+
+real nbnxn_get_rlist_effective_inc(const int        clusterSize,
+                                   const gmx::RVec &averageClusterBoundingBox)
+{
+    /* The average length of the diagonal of a sub cell */
+    const real diagonal    = std::sqrt(norm2(averageClusterBoundingBox));
+
+    const real volumeRatio = (clusterSize - 1.0_real)/clusterSize;
+
+    return c_nbnxnRlistIncreaseOutsideFactor*gmx::square(volumeRatio)*0.5_real*diagonal;
+}
index 250a9e1b3ae1d0e2ada1dab7fbe14a6d699189cb..c683ee2b1fef79875b7ca98981442481ba5cf2e3 100644 (file)
@@ -36,6 +36,7 @@
 #ifndef GMX_NBNXM_NBNXM_GEOMETRY_H
 #define GMX_NBNXM_NBNXM_GEOMETRY_H
 
+#include "gromacs/math/vectypes.h"
 #include "gromacs/utility/fatalerror.h"
 
 /* Returns the base-2 log of n.
@@ -58,10 +59,33 @@ static inline int get_2log(int n)
     return log2;
 }
 
+/* Returns whether the pair-list corresponding to nb_kernel_type is simple */
+bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
+
 /* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
 int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type);
 
 /* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
 int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type);
 
+/* Returns the effective list radius of the pair-list
+ *
+ * Due to the cluster size the effective pair-list is longer than
+ * that of a simple atom pair-list. This function gives the extra distance.
+ *
+ * NOTE: If the i- and j-cluster sizes are identical and you know
+ *       the physical dimensions of the clusters, use the next function
+ *       for more accurate results
+ */
+real nbnxn_get_rlist_effective_inc(int  jClusterSize,
+                                   real atomDensity);
+
+/* Returns the effective list radius of the pair-list
+ *
+ * Due to the cluster size the effective pair-list is longer than
+ * that of a simple atom pair-list. This function gives the extra distance.
+ */
+real nbnxn_get_rlist_effective_inc(int              clusterSize,
+                                   const gmx::RVec &averageClusterBoundingBox);
+
 #endif
index 0565d3faf0b11854e38d35974d713939415b39e3..44a3d632ca15c9c76eaabdc2e0692737a583a85e 100644 (file)
@@ -55,6 +55,7 @@
 #include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/nbnxm/nbnxm_simd.h"
 #include "gromacs/nbnxm/pairlist_tuning.h"
+#include "gromacs/nbnxm/pairlistset.h"
 #include "gromacs/simd/simd.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/logger.h"
@@ -220,7 +221,7 @@ const char *lookup_kernel_name(int kernel_type)
 static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
                               gmx_bool             use_simd_kernels,
                               const gmx_hw_info_t &hardwareInfo,
-                              gmx_bool             bUseGPU,
+                              bool                 useGpu,
                               EmulateGpuNonbonded  emulateGpu,
                               const t_inputrec    *ir,
                               int                 *kernel_type,
@@ -241,7 +242,7 @@ static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
             GMX_LOG(mdlog.warning).asParagraph().appendText("Emulating a GPU run on the CPU (slow)");
         }
     }
-    else if (bUseGPU)
+    else if (useGpu)
     {
         *kernel_type = nbnxnk8x8x8_GPU;
     }
@@ -289,58 +290,46 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
                     const gmx_mtop_t        *mtop,
                     matrix                   box)
 {
-    nonbonded_verlet_t *nbv;
-    char               *env;
+    nonbonded_verlet_t        *nbv        = new nonbonded_verlet_t();
 
-    nbv = new nonbonded_verlet_t();
+    const EmulateGpuNonbonded  emulateGpu =
+        ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
+    bool                       useGpu     = deviceInfo != nullptr;
 
-    nbv->emulateGpu = ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-    nbv->bUseGPU    = deviceInfo != nullptr;
-
-    GMX_RELEASE_ASSERT(!(nbv->emulateGpu == EmulateGpuNonbonded::Yes && nbv->bUseGPU), "When GPU emulation is active, there cannot be a GPU assignment");
+    GMX_RELEASE_ASSERT(!(emulateGpu == EmulateGpuNonbonded::Yes && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
 
     nbv->nbs             = nullptr;
-    nbv->min_ci_balanced = 0;
 
-    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
-    for (int i = 0; i < nbv->ngrp; i++)
-    {
-        nbv->grp[i].nbl_lists.nnbl = 0;
-        nbv->grp[i].kernel_type    = nbnxnkNotSet;
+    pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
+                      useGpu, emulateGpu, ir,
+                      &nbv->kernelType_,
+                      &nbv->ewaldExclusionType_,
+                      fr->bNonbonded);
 
-        if (i == 0) /* local */
-        {
-            pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
-                              nbv->bUseGPU, nbv->emulateGpu, ir,
-                              &nbv->grp[i].kernel_type,
-                              &nbv->grp[i].ewald_excl,
-                              fr->bNonbonded);
-        }
-        else /* non-local */
-        {
-            /* Use the same kernel for local and non-local interactions */
-            nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
-            nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
-        }
+    const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
+
+    const bool pairlistIsSimple = nbv->pairlistIsSimple();
+    for (nbnxn_pairlist_set_t &pairlistSet : nbv->pairlistSets)
+    {
+        // TODO Change this to a constructor
+        /* The second parameter tells whether lists should be combined,
+         * this is currently only and always done for GPU lists.
+         */
+        nbnxn_init_pairlist_set(&pairlistSet, pairlistIsSimple, !pairlistIsSimple);
     }
 
+    nbv->min_ci_balanced = 0;
+
     nbv->listParams = std::make_unique<NbnxnListParameters>(ir->rlist);
-    setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->grp[0].kernel_type, fr->ic,
+    setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->kernelType_, fr->ic,
                                 nbv->listParams.get());
 
-    nbv->nbs = std::make_unique<nbnxn_search>(DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+    nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
+                                              DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
                                               DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
                                               bFEP_NonBonded,
                                               gmx_omp_nthreads_get(emntPairsearch));
 
-    for (int i = 0; i < nbv->ngrp; i++)
-    {
-        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
-                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
-                                /* 8x8x8 "non-simple" lists are ATM always combined */
-                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type));
-    }
-
     int      enbnxninitcombrule;
     if (fr->ic->vdwtype == evdwCUT &&
         (fr->ic->vdw_modifier == eintmodNONE ||
@@ -368,7 +357,7 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
         enbnxninitcombrule = enbnxninitcombruleNONE;
     }
 
-    nbv->nbat = new nbnxn_atomdata_t(nbv->bUseGPU ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
+    nbv->nbat = new nbnxn_atomdata_t(useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
     int mimimumNumEnergyGroupNonbonded = ir->opts.ngener;
     if (ir->opts.ngener - ir->nwall == 1)
     {
@@ -378,16 +367,15 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
          */
         mimimumNumEnergyGroupNonbonded = 1;
     }
-    bool bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[0].kernel_type);
     nbnxn_atomdata_init(mdlog,
                         nbv->nbat,
-                        nbv->grp[0].kernel_type,
+                        nbv->kernelType_,
                         enbnxninitcombrule,
                         fr->ntype, fr->nbfp,
                         mimimumNumEnergyGroupNonbonded,
-                        bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+                        pairlistIsSimple ? gmx_omp_nthreads_get(emntNonbonded) : 1);
 
-    if (nbv->bUseGPU)
+    if (useGpu)
     {
         /* init the NxN GPU data; the last argument tells whether we'll have
          * both local and non-local NB calculation on GPU */
@@ -397,9 +385,9 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
                  nbv->listParams.get(),
                  nbv->nbat,
                  cr->nodeid,
-                 (nbv->ngrp > 1));
+                 haveMultipleDomains);
 
-        if ((env = getenv("GMX_NB_MIN_CI")) != nullptr)
+        if (const char *env = getenv("GMX_NB_MIN_CI"))
         {
             char *end;
 
index 3ee147cfd47deb954208fdfc0d23f97f81b05597..d040b67c221a8f6c3089f2f137423067f84632eb 100644 (file)
@@ -418,7 +418,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
                                   const interaction_const_t   *ic,
                                   const NbnxnListParameters   *listParams)
 {
-    if (!nbv || nbv->grp[InteractionLocality::Local].kernel_type != nbnxnk8x8x8_GPU)
+    if (!nbv || !nbv->useGpu())
     {
         return;
     }
index 16f940a71a23fc093298f8d96ae21b03c562465f..9a4b667756f2dc8ffe7a1bd8c604d90e19d718c9 100644 (file)
@@ -55,6 +55,7 @@
 #include "gromacs/mdtypes/group.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/nbnxm/atomdata.h"
+#include "gromacs/nbnxm/gpu_data_mgmt.h"
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/nbnxm/nbnxm_simd.h"
@@ -239,30 +240,6 @@ static inline int xIndexFromCj(int cj)
 }
 #endif //GMX_SIMD
 
-gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
-{
-    if (nb_kernel_type == nbnxnkNotSet)
-    {
-        gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
-    }
-
-    switch (nb_kernel_type)
-    {
-        case nbnxnk8x8x8_GPU:
-        case nbnxnk8x8x8_PlainC:
-            return FALSE;
-
-        case nbnxnk4x4_PlainC:
-        case nbnxnk4xN_SIMD_4xN:
-        case nbnxnk4xN_SIMD_2xNN:
-            return TRUE;
-
-        default:
-            gmx_incons("Invalid nonbonded kernel type passed!");
-            return FALSE;
-    }
-}
-
 /* Initializes a single nbnxn_pairlist_t data structure */
 static void nbnxn_init_pairlist_fep(t_nblist *nl)
 {
@@ -317,12 +294,13 @@ nbnxn_search_work_t::~nbnxn_search_work_t()
     free_nblist(nbl_fep.get());
 }
 
-nbnxn_search::nbnxn_search(const ivec               *n_dd_cells,
+nbnxn_search::nbnxn_search(int                       ePBC,
+                           const ivec               *n_dd_cells,
                            const gmx_domdec_zones_t *zones,
                            gmx_bool                  bFEP,
                            int                       nthread_max) :
     bFEP(bFEP),
-    ePBC(epbcNONE), // The correct value will be set during the gridding
+    ePBC(ePBC),
     zones(zones),
     natoms_local(0),
     natoms_nonlocal(0),
@@ -354,12 +332,13 @@ nbnxn_search::nbnxn_search(const ivec               *n_dd_cells,
     nbs_cycle_clear(cc);
 }
 
-nbnxn_search *nbnxn_init_search(const ivec                *n_dd_cells,
+nbnxn_search *nbnxn_init_search(int                        ePBC,
+                                const ivec                *n_dd_cells,
                                 const gmx_domdec_zones_t  *zones,
                                 gmx_bool                   bFEP,
                                 int                        nthread_max)
 {
-    return new nbnxn_search(n_dd_cells, zones, bFEP, nthread_max);
+    return new nbnxn_search(ePBC, n_dd_cells, zones, bFEP, nthread_max);
 }
 
 static void init_buffer_flags(nbnxn_buffer_flags_t *flags,
@@ -2582,28 +2561,6 @@ static real effective_buffer_1x1_vs_MxN(const nbnxn_grid_t &iGrid,
                                        minimum_subgrid_size_xy(jGrid));
 }
 
-/* Clusters at the cut-off only increase rlist by 60% of their size */
-static real nbnxn_rlist_inc_outside_fac = 0.6;
-
-/* Due to the cluster size the effective pair-list is longer than
- * that of a simple atom pair-list. This function gives the extra distance.
- */
-real nbnxn_get_rlist_effective_inc(int cluster_size_j, real atom_density)
-{
-    int  cluster_size_i;
-    real vol_inc_i, vol_inc_j;
-
-    /* We should get this from the setup, but currently it's the same for
-     * all setups, including GPUs.
-     */
-    cluster_size_i = c_nbnxnCpuIClusterSize;
-
-    vol_inc_i = (cluster_size_i - 1)/atom_density;
-    vol_inc_j = (cluster_size_j - 1)/atom_density;
-
-    return nbnxn_rlist_inc_outside_fac*std::cbrt(vol_inc_i + vol_inc_j);
-}
-
 /* Estimates the interaction volume^2 for non-local interactions */
 static real nonlocal_vol2(const struct gmx_domdec_zones_t *zones, const rvec ls, real r)
 {
@@ -2663,7 +2620,6 @@ static void get_nsubpair_target(const nbnxn_search        *nbs,
      * Maxwell is less sensitive to the exact value.
      */
     const int           nsubpair_target_min = 36;
-    rvec                ls;
     real                r_eff_sup, vol_est, nsp_est, nsp_est_nl;
 
     const nbnxn_grid_t &grid = nbs->grid[0];
@@ -2683,15 +2639,13 @@ static void get_nsubpair_target(const nbnxn_search        *nbs,
         return;
     }
 
+    gmx::RVec ls;
     ls[XX] = (grid.c1[XX] - grid.c0[XX])/(grid.numCells[XX]*c_gpuNumClusterPerCellX);
     ls[YY] = (grid.c1[YY] - grid.c0[YY])/(grid.numCells[YY]*c_gpuNumClusterPerCellY);
     ls[ZZ] = grid.na_c/(grid.atom_density*ls[XX]*ls[YY]);
 
-    /* The average length of the diagonal of a sub cell */
-    real diagonal = std::sqrt(ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ]);
-
     /* The formulas below are a heuristic estimate of the average nsj per si*/
-    r_eff_sup = rlist + nbnxn_rlist_inc_outside_fac*gmx::square((grid.na_c - 1.0)/grid.na_c)*0.5*diagonal;
+    r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(grid.na_c, ls);
 
     if (!nbs->DomDec || nbs->zones->n == 1)
     {
@@ -4078,17 +4032,17 @@ static void sort_sci(NbnxnPairlistGpu *nbl)
     std::swap(nbl->sci, work.sci_sort);
 }
 
-/* Make a local or non-local pair-list, depending on iloc */
-void nbnxn_make_pairlist(nbnxn_search              *nbs,
-                         nbnxn_atomdata_t          *nbat,
+void nbnxn_make_pairlist(nonbonded_verlet_t        *nbv,
+                         const InteractionLocality  iLocality,
                          const t_blocka            *excl,
-                         const real                 rlist,
-                         const int                  min_ci_balanced,
-                         nbnxn_pairlist_set_t      *nbl_list,
-                         const InteractionLocality  iloc,
-                         const int                  nb_kernel_type,
+                         const int64_t              step,
                          t_nrnb                    *nrnb)
 {
+    nbnxn_search         *nbs      = nbv->nbs.get();
+    nbnxn_atomdata_t     *nbat     = nbv->nbat;
+    const real            rlist    = nbv->listParams->rlistOuter;
+    nbnxn_pairlist_set_t *nbl_list = &nbv->pairlistSets[iLocality];
+
     int                nsubpair_target;
     float              nsubpair_tot_est;
     int                nnbl;
@@ -4107,13 +4061,13 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
 
     nbat->bUseBufferFlags = (nbat->out.size() > 1);
     /* We should re-init the flags before making the first list */
-    if (nbat->bUseBufferFlags && iloc == InteractionLocality::Local)
+    if (nbat->bUseBufferFlags && iLocality == InteractionLocality::Local)
     {
         init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
     }
 
     int nzi;
-    if (iloc == InteractionLocality::Local)
+    if (iLocality == InteractionLocality::Local)
     {
         /* Only zone (grid) 0 vs 0 */
         nzi = 1;
@@ -4123,9 +4077,9 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
         nzi = nbs->zones->nizone;
     }
 
-    if (!nbl_list->bSimple && min_ci_balanced > 0)
+    if (!nbl_list->bSimple && nbv->min_ci_balanced > 0)
     {
-        get_nsubpair_target(nbs, iloc, rlist, min_ci_balanced,
+        get_nsubpair_target(nbs, iLocality, rlist, nbv->min_ci_balanced,
                             &nsubpair_target, &nsubpair_tot_est);
     }
     else
@@ -4158,7 +4112,7 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
 
         int                 zj0;
         int                 zj1;
-        if (iloc == InteractionLocality::Local)
+        if (iLocality == InteractionLocality::Local)
         {
             zj0 = 0;
             zj1 = 1;
@@ -4188,7 +4142,7 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
             /* With GPU: generate progressively smaller lists for
              * load balancing for local only or non-local with 2 zones.
              */
-            progBal = (iloc == InteractionLocality::Local || nbs->zones->n <= 2);
+            progBal = (iLocality == InteractionLocality::Local || nbs->zones->n <= 2);
 
 #pragma omp parallel for num_threads(nnbl) schedule(static)
             for (int th = 0; th < nnbl; th++)
@@ -4216,7 +4170,7 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
                         nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                  &nbs->work[th], nbat, *excl,
                                                  rlist,
-                                                 nb_kernel_type,
+                                                 nbv->kernelType_,
                                                  ci_block,
                                                  nbat->bUseBufferFlags,
                                                  nsubpair_target,
@@ -4230,7 +4184,7 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
                         nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
                                                  &nbs->work[th], nbat, *excl,
                                                  rlist,
-                                                 nb_kernel_type,
+                                                 nbv->kernelType_,
                                                  ci_block,
                                                  nbat->bUseBufferFlags,
                                                  nsubpair_target,
@@ -4343,13 +4297,15 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
         GMX_ASSERT(nbl_list->nbl[0]->ciOuter.empty(), "ciOuter is invalid so it should be empty");
     }
 
+    nbl_list->outerListCreationStep = step;
+
     /* Special performance logging stuff (env.var. GMX_NBNXN_CYCLE) */
-    if (iloc == InteractionLocality::Local)
+    if (iLocality == InteractionLocality::Local)
     {
         nbs->search_count++;
     }
     if (nbs->print_cycles &&
-        (!nbs->DomDec || iloc == InteractionLocality::NonLocal) &&
+        (!nbs->DomDec || iLocality == InteractionLocality::NonLocal) &&
         nbs->search_count % 100 == 0)
     {
         nbs_cycle_print(stderr, nbs);
@@ -4395,6 +4351,22 @@ void nbnxn_make_pairlist(nbnxn_search              *nbs,
             print_reduction_cost(&nbat->buffer_flags, nbl_list->nnbl);
         }
     }
+
+    if (nbv->listParams->useDynamicPruning && !nbv->useGpu())
+    {
+        nbnxnPrepareListForDynamicPruning(nbl_list);
+    }
+
+    if (nbv->useGpu())
+    {
+        /* Launch the transfer of the pairlist to the GPU.
+         *
+         * NOTE: The launch overhead is currently not timed separately
+         */
+        Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
+                                 nbl_list->nblGpu[0],
+                                 iLocality);
+    }
 }
 
 void nbnxnPrepareListForDynamicPruning(nbnxn_pairlist_set_t *listSet)
index ae3a38d3baa0de6bef4eb2ded199dc5c7ec43d47..a8924efefc1656d7845d27c03a2cd48c55c73015 100644 (file)
@@ -61,6 +61,7 @@
 #include "gromacs/mdtypes/interaction_const.h"
 #include "gromacs/mdtypes/state.h"
 #include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/cstringutil.h"
diff --git a/src/gromacs/nbnxm/pairlistset.cpp b/src/gromacs/nbnxm/pairlistset.cpp
new file mode 100644 (file)
index 0000000..8454b49
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#include "gmxpre.h"
+
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
+
+int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t         &nbv,
+                              const Nbnxm::InteractionLocality  iLocality,
+                              const int64_t                     step)
+{
+    return step - nbv.pairlistSets[iLocality].outerListCreationStep;
+}
+
+bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t         &nbv,
+                                       const Nbnxm::InteractionLocality  iLocality,
+                                       const int64_t                     step)
+{
+    return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
+}
index 885e8f07be42b643b005cb5acab4abe5e1d4d91d..b366b797307692b43bc080782775a3a48aa2e192 100644 (file)
@@ -61,18 +61,9 @@ typedef void nbnxn_alloc_t (void **ptr, size_t nbytes);
  */
 typedef void nbnxn_free_t (void *ptr);
 
-/* Tells if the pair-list corresponding to nb_kernel_type is simple.
- * Returns FALSE for super-sub type pair-list.
- */
-gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
-
-/* Due to the cluster size the effective pair-list is longer than
- * that of a simple atom pair-list. This function gives the extra distance.
- */
-real nbnxn_get_rlist_effective_inc(int cluster_size, real atom_density);
-
 /* Allocates and initializes a pair search data structure */
-nbnxn_search *nbnxn_init_search(const ivec                *n_dd_cells,
+nbnxn_search *nbnxn_init_search(int                        ePBC,
+                                const ivec                *n_dd_cells,
                                 const gmx_domdec_zones_t  *zones,
                                 gmx_bool                   bFEP,
                                 int                        nthread_max);
@@ -81,23 +72,6 @@ nbnxn_search *nbnxn_init_search(const ivec                *n_dd_cells,
 void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
                              gmx_bool simple, gmx_bool combined);
 
-/* Make a pair-list with radius rlist, store it in nbl.
- * The parameter min_ci_balanced sets the minimum required
- * number or roughly equally sized ci blocks in nbl.
- * When set >0 ci lists will be chopped up when the estimate
- * for the number of equally sized lists is below min_ci_balanced.
- * With perturbed particles, also a group scheme style nbl_fep list is made.
- */
-void nbnxn_make_pairlist(nbnxn_search               *nbs,
-                         nbnxn_atomdata_t           *nbat,
-                         const t_blocka             *excl,
-                         real                        rlist,
-                         int                         min_ci_balanced,
-                         nbnxn_pairlist_set_t       *nbl_list,
-                         Nbnxm::InteractionLocality  iloc,
-                         int                         nb_kernel_type,
-                         t_nrnb                     *nrnb);
-
 /*! \brief Prepare the list-set produced by the search for dynamic pruning
  *
  * \param[in,out] listSet  The list-set to prepare for dynamic pruning.
index 76938e744df0acb5190a0dc754b6607d840c573d..38d62fad5c61725b5b606f6354792d897c499355 100644 (file)
@@ -49,8 +49,7 @@ void NbnxnDispatchPruneKernel(nonbonded_verlet_t               *nbv,
                               const Nbnxm::InteractionLocality  ilocality,
                               const rvec                       *shift_vec)
 {
-    nonbonded_verlet_group_t &nbvg       = nbv->grp[ilocality];
-    nbnxn_pairlist_set_t     *nbl_lists  = &nbvg.nbl_lists;
+    nbnxn_pairlist_set_t     *nbl_lists  = &nbv->pairlistSets[ilocality];
     const nbnxn_atomdata_t   *nbat       = nbv->nbat;
     const real                rlistInner = nbv->listParams->rlistInner;
 
@@ -63,7 +62,7 @@ void NbnxnDispatchPruneKernel(nonbonded_verlet_t               *nbv,
     {
         NbnxnPairlistCpu *nbl = nbl_lists->nbl[i];
 
-        switch (nbvg.kernel_type)
+        switch (nbv->kernelType_)
         {
             case nbnxnk4xN_SIMD_4xN:
                 nbnxn_kernel_prune_4xn(nbl, nbat, shift_vec, rlistInner);