Fix random typos
[alexxy/gromacs.git] / src / gromacs / nbnxm / pairlist.cpp
index b76ad3e9a75a5537ef9a3870ab286fda58ff2329..cd44bc180a691aa563781b218f622d569814150a 100644 (file)
@@ -2,7 +2,7 @@
  * This file is part of the GROMACS molecular simulation package.
  *
  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include "gromacs/domdec/domdec_struct.h"
 #include "gromacs/gmxlib/nrnb.h"
 #include "gromacs/math/functions.h"
+#include "gromacs/math/units.h"
 #include "gromacs/math/utilities.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/mdtypes/group.h"
 #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/nblist.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 #include "gromacs/pbcutil/ishift.h"
@@ -83,7 +85,7 @@ using BoundingBox1D = Nbnxm::BoundingBox1D; // TODO: Remove when refactoring thi
 
 using Grid = Nbnxm::Grid; // TODO: Remove when refactoring this file
 
-// Convience alias for partial Nbnxn namespace usage
+// Convenience alias for partial Nbnxn namespace usage
 using InteractionLocality = gmx::InteractionLocality;
 
 /* We shift the i-particles backward for PBC.
@@ -101,7 +103,7 @@ enum class NbnxnLayout
     Gpu8x8x8   // i-cluster size 8, j-cluster size 8 + super-clustering
 };
 
-#if GMX_SIMD
+#if defined(GMX_NBNXN_SIMD_4XN) || defined(GMX_NBNXN_SIMD_2XNN)
 /* Returns the j-cluster size */
 template<NbnxnLayout layout>
 static constexpr int jClusterSize()
@@ -119,7 +121,8 @@ static constexpr int jClusterSize()
  *
  * \tparam    jClusterSize      The number of atoms in a j-cluster
  * \tparam    jSubClusterIndex  The j-sub-cluster index (0/1), used when size(j-cluster) <
- * size(i-cluster) \param[in] ci                The i-cluster index
+ *                              size(i-cluster)
+ * \param[in] ci                The i-cluster index
  */
 template<int jClusterSize, int jSubClusterIndex>
 static inline int cjFromCi(int ci)
@@ -156,7 +159,8 @@ static inline int cjFromCi(int ci)
  *
  * \tparam    layout            The pair-list layout
  * \tparam    jSubClusterIndex  The j-sub-cluster index (0/1), used when size(j-cluster) <
- * size(i-cluster) \param[in] ci                The i-cluster index
+ *                              size(i-cluster)
+ * \param[in] ci                The i-cluster index
  */
 template<NbnxnLayout layout, int jSubClusterIndex>
 static inline int cjFromCi(int ci)
@@ -214,45 +218,21 @@ static inline int xIndexFromCj(int cj)
         return cj * STRIDE_P8;
     }
 }
-#endif // GMX_SIMD
-
+#endif // defined(GMX_NBNXN_SIMD_4XN) || defined(GMX_NBNXN_SIMD_2XNN)
 
-void nbnxn_init_pairlist_fep(t_nblist* nl)
+static constexpr int sizeNeededForBufferFlags(const int numAtoms)
 {
-    nl->type      = GMX_NBLIST_INTERACTION_FREE_ENERGY;
-    nl->igeometry = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
-    /* The interaction functions are set in the free energy kernel fuction */
-    nl->ivdw     = -1;
-    nl->ivdwmod  = -1;
-    nl->ielec    = -1;
-    nl->ielecmod = -1;
-
-    nl->maxnri   = 0;
-    nl->maxnrj   = 0;
-    nl->nri      = 0;
-    nl->nrj      = 0;
-    nl->iinr     = nullptr;
-    nl->gid      = nullptr;
-    nl->shift    = nullptr;
-    nl->jindex   = nullptr;
-    nl->jjnr     = nullptr;
-    nl->excl_fep = nullptr;
+    return (numAtoms + NBNXN_BUFFERFLAG_SIZE - 1) / NBNXN_BUFFERFLAG_SIZE;
 }
 
-static void init_buffer_flags(nbnxn_buffer_flags_t* flags, int natoms)
+// Resets current flags to 0 and adds more flags if needed.
+static void resizeAndZeroBufferFlags(std::vector<gmx_bitmask_t>* flags, const int numAtoms)
 {
-    flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1) / NBNXN_BUFFERFLAG_SIZE;
-    if (flags->nflag > flags->flag_nalloc)
-    {
-        flags->flag_nalloc = over_alloc_large(flags->nflag);
-        srenew(flags->flag, flags->flag_nalloc);
-    }
-    for (int b = 0; b < flags->nflag; b++)
-    {
-        bitmask_clear(&(flags->flag[b]));
-    }
+    flags->clear();
+    flags->resize(sizeNeededForBufferFlags(numAtoms), gmx_bitmask_t{ 0 });
 }
 
+
 /* Returns the pair-list cutoff between a bounding box and a grid cell given an atom-to-atom pair-list cutoff
  *
  * Given a cutoff distance between atoms, this functions returns the cutoff
@@ -515,7 +495,7 @@ clusterpair_in_range(const NbnxnPairlistGpuWork& work, int si, int csj, int stri
     static_assert(c_nbnxnGpuClusterSize == 8 || c_nbnxnGpuClusterSize == 4,
                   "A cluster is hard-coded to 4/8 atoms.");
 
-    Simd4Real rc2_S = Simd4Real(rlist2);
+    Simd4Real rc2_S{ rlist2 };
 
     const real* x_i = work.iSuperClusterData.xSimd.data();
 
@@ -677,17 +657,13 @@ NbnxnPairlistGpu::NbnxnPairlistGpu(gmx::PinningPolicy pinningPolicy) :
 }
 
 // TODO: Move to pairlistset.cpp
-PairlistSet::PairlistSet(const InteractionLocality locality, const PairlistParams& pairlistParams) :
-    locality_(locality),
-    params_(pairlistParams)
+PairlistSet::PairlistSet(const PairlistParams& pairlistParams) :
+    params_(pairlistParams),
+    combineLists_(sc_isGpuPairListType[pairlistParams.pairlistType]), // Currently GPU lists are always combined
+    isCpuType_(!sc_isGpuPairListType[pairlistParams.pairlistType])
 {
-    isCpuType_ = (params_.pairlistType == PairlistType::Simple4x2
-                  || params_.pairlistType == PairlistType::Simple4x4
-                  || params_.pairlistType == PairlistType::Simple4x8);
-    // Currently GPU lists are always combined
-    combineLists_ = !isCpuType_;
 
-    const int numLists = gmx_omp_nthreads_get(emntNonbonded);
+    const int numLists = gmx_omp_nthreads_get(ModuleMultiThread::Nonbonded);
 
     if (!combineLists_ && numLists > NBNXN_BUFFERFLAG_MAX_THREADS)
     {
@@ -695,7 +671,9 @@ PairlistSet::PairlistSet(const InteractionLocality locality, const PairlistParam
                   "%d OpenMP threads were requested. Since the non-bonded force buffer reduction "
                   "is prohibitively slow with more than %d threads, we do not allow this. Use %d "
                   "or less OpenMP threads.",
-                  numLists, NBNXN_BUFFERFLAG_MAX_THREADS, NBNXN_BUFFERFLAG_MAX_THREADS);
+                  numLists,
+                  NBNXN_BUFFERFLAG_MAX_THREADS,
+                  NBNXN_BUFFERFLAG_MAX_THREADS);
     }
 
     if (isCpuType_)
@@ -734,7 +712,6 @@ PairlistSet::PairlistSet(const InteractionLocality locality, const PairlistParam
                  * impacts performance.
                  */
                 fepLists_[i] = std::make_unique<t_nblist>();
-                nbnxn_init_pairlist_fep(fepLists_[i].get());
             }
             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
         }
@@ -752,18 +729,30 @@ static void print_nblist_statistics(FILE*                   fp,
 
     fprintf(fp, "nbl nci %zu ncj %d\n", nbl.ci.size(), nbl.ncjInUse);
     const int numAtomsJCluster = grid.geometry().numAtomsJCluster;
+
+    if (grid.numCells() == 0)
+    {
+        return;
+    }
+
     const double numAtomsPerCell = nbl.ncjInUse / static_cast<double>(grid.numCells()) * numAtomsJCluster;
-    fprintf(fp, "nbl na_cj %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n", nbl.na_cj, rl,
-            nbl.ncjInUse, nbl.ncjInUse / static_cast<double>(grid.numCells()), numAtomsPerCell,
+    fprintf(fp,
+            "nbl na_cj %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
+            nbl.na_cj,
+            rl,
+            nbl.ncjInUse,
+            nbl.ncjInUse / static_cast<double>(grid.numCells()),
+            numAtomsPerCell,
             numAtomsPerCell
                     / (0.5 * 4.0 / 3.0 * M_PI * rl * rl * rl * grid.numCells() * numAtomsJCluster
                        / (dims.gridSize[XX] * dims.gridSize[YY] * dims.gridSize[ZZ])));
 
-    fprintf(fp, "nbl average j cell list length %.1f\n",
+    fprintf(fp,
+            "nbl average j cell list length %.1f\n",
             0.25 * nbl.ncjInUse / std::max(static_cast<double>(nbl.ci.size()), 1.0));
 
-    int cs[SHIFTS] = { 0 };
-    int npexcl     = 0;
+    int cs[gmx::c_numShiftVectors] = { 0 };
+    int npexcl                     = 0;
     for (const nbnxn_ci_t& ciEntry : nbl.ci)
     {
         cs[ciEntry.shift & NBNXN_CI_SHIFT] += ciEntry.cj_ind_end - ciEntry.cj_ind_start;
@@ -775,9 +764,12 @@ static void print_nblist_statistics(FILE*                   fp,
             j++;
         }
     }
-    fprintf(fp, "nbl cell pairs, total: %zu excl: %d %.1f%%\n", nbl.cj.size(), npexcl,
+    fprintf(fp,
+            "nbl cell pairs, total: %zu excl: %d %.1f%%\n",
+            nbl.cj.size(),
+            npexcl,
             100 * npexcl / std::max(static_cast<double>(nbl.cj.size()), 1.0));
-    for (int s = 0; s < SHIFTS; s++)
+    for (int s = 0; s < gmx::c_numShiftVectors; s++)
     {
         if (cs[s] > 0)
         {
@@ -795,12 +787,21 @@ static void print_nblist_statistics(FILE*                   fp,
     const Grid&             grid = gridSet.grids()[0];
     const Grid::Dimensions& dims = grid.dimensions();
 
-    fprintf(fp, "nbl nsci %zu ncj4 %zu nsi %d excl4 %zu\n", nbl.sci.size(), nbl.cj4.size(),
-            nbl.nci_tot, nbl.excl.size());
+    fprintf(fp,
+            "nbl nsci %zu ncj4 %zu nsi %d excl4 %zu\n",
+            nbl.sci.size(),
+            nbl.cj4.size(),
+            nbl.nci_tot,
+            nbl.excl.size());
     const int numAtomsCluster = grid.geometry().numAtomsICluster;
     const double numAtomsPerCell = nbl.nci_tot / static_cast<double>(grid.numClusters()) * numAtomsCluster;
-    fprintf(fp, "nbl na_c %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n", nbl.na_ci, rl,
-            nbl.nci_tot, nbl.nci_tot / static_cast<double>(grid.numClusters()), numAtomsPerCell,
+    fprintf(fp,
+            "nbl na_c %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
+            nbl.na_ci,
+            rl,
+            nbl.nci_tot,
+            nbl.nci_tot / static_cast<double>(grid.numClusters()),
+            numAtomsPerCell,
             numAtomsPerCell
                     / (0.5 * 4.0 / 3.0 * M_PI * rl * rl * rl * grid.numClusters() * numAtomsCluster
                        / (dims.gridSize[XX] * dims.gridSize[YY] * dims.gridSize[ZZ])));
@@ -837,14 +838,20 @@ static void print_nblist_statistics(FILE*                   fp,
         sum_nsp /= nbl.sci.size();
         sum_nsp2 /= nbl.sci.size();
     }
-    fprintf(fp, "nbl #cluster-pairs: av %.1f stddev %.1f max %d\n", sum_nsp,
-            std::sqrt(sum_nsp2 - sum_nsp * sum_nsp), nsp_max);
+    fprintf(fp,
+            "nbl #cluster-pairs: av %.1f stddev %.1f max %d\n",
+            sum_nsp,
+            std::sqrt(sum_nsp2 - sum_nsp * sum_nsp),
+            nsp_max);
 
     if (!nbl.cj4.empty())
     {
         for (int b = 0; b <= c_gpuNumClusterPerCell; b++)
         {
-            fprintf(fp, "nbl j-list #i-subcell %d %7d %4.1f\n", b, c[b],
+            fprintf(fp,
+                    "nbl j-list #i-subcell %d %7d %4.1f\n",
+                    b,
+                    c[b],
                     100.0 * c[b] / size_t{ nbl.cj4.size() * c_nbnxnGpuJgroupSize });
         }
     }
@@ -960,23 +967,21 @@ gmx_unused static unsigned int get_imask_simd_j8(gmx_bool rdiag, int ci, int cj)
  * \param[in]     rbb2                The squared cut-off for putting cluster-pairs in the list based on bounding box distance only
  * \param[in,out] numDistanceChecks   The number of distance checks performed
  */
-static void makeClusterListSimple(const Grid&       jGrid,
-                                  NbnxnPairlistCpu* nbl,
-                                  int               icluster,
-                                  int               jclusterFirst,
-                                  int               jclusterLast,
-                                  bool              excludeSubDiagonal,
+static void makeClusterListSimple(const Grid&              jGrid,
+                                  NbnxnPairlistCpu*        nbl,
+                                  int                      icluster,
+                                  int                      jclusterFirst,
+                                  int                      jclusterLast,
+                                  bool                     excludeSubDiagonal,
                                   const real* gmx_restrict x_j,
                                   real                     rlist2,
                                   float                    rbb2,
-                                  int* gmx_restrict numDistanceChecks)
+                                  int* gmx_restrict        numDistanceChecks)
 {
     const BoundingBox* gmx_restrict bb_ci = nbl->work->iClusterData.bb.data();
-    const real* gmx_restrict x_ci         = nbl->work->iClusterData.x.data();
-
-    gmx_bool InRange;
+    const real* gmx_restrict        x_ci  = nbl->work->iClusterData.x.data();
 
-    InRange = FALSE;
+    bool InRange = false;
     while (!InRange && jclusterFirst <= jclusterLast)
     {
         real d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterFirst]);
@@ -989,7 +994,7 @@ static void makeClusterListSimple(const Grid&       jGrid,
          */
         if (d2 < rbb2)
         {
-            InRange = TRUE;
+            InRange = true;
         }
         else if (d2 < rlist2)
         {
@@ -1021,7 +1026,7 @@ static void makeClusterListSimple(const Grid&       jGrid,
         return;
     }
 
-    InRange = FALSE;
+    InRange = false;
     while (!InRange && jclusterLast > jclusterFirst)
     {
         real d2 = clusterBoundingBoxDistance2(bb_ci[0], jGrid.jBoundingBoxes()[jclusterLast]);
@@ -1034,7 +1039,7 @@ static void makeClusterListSimple(const Grid&       jGrid,
          */
         if (d2 < rbb2)
         {
-            InRange = TRUE;
+            InRange = true;
         }
         else if (d2 < rlist2)
         {
@@ -1132,21 +1137,14 @@ static void make_cluster_list_supersub(const Grid&       iGrid,
 
         const int cj_gl = jGrid.cellOffset() * c_gpuNumClusterPerCell + cj;
 
-        int ci1;
-        if (excludeSubDiagonal && sci == scj)
-        {
-            ci1 = subc + 1;
-        }
-        else
-        {
-            ci1 = iGrid.numClustersPerCell()[sci];
-        }
+        int ci1 = (excludeSubDiagonal && sci == scj) ? subc + 1 : iGrid.numClustersPerCell()[sci];
+
 
 #if NBNXN_BBXXXX
         /* Determine all ci1 bb distances in one call with SIMD4 */
         const int offset = packedBoundingBoxesIndex(cj) + (cj & (c_packedBoundingBoxesDimSize - 1));
-        clusterBoundingBoxDistance2_xxxx_simd4(jGrid.packedBoundingBoxes().data() + offset, ci1,
-                                               pbb_ci, d2l);
+        clusterBoundingBoxDistance2_xxxx_simd4(
+                jGrid.packedBoundingBoxes().data() + offset, ci1, pbb_ci, d2l);
         *numDistanceChecks += c_nbnxnGpuClusterSize * 2;
 #endif
 
@@ -1281,8 +1279,7 @@ struct JListRanges
 #ifndef DOXYGEN
 template<typename CjListType>
 JListRanges::JListRanges(int cjIndexStart, int cjIndexEnd, gmx::ArrayRef<const CjListType> cjList) :
-    cjIndexStart(cjIndexStart),
-    cjIndexEnd(cjIndexEnd)
+    cjIndexStart(cjIndexStart), cjIndexEnd(cjIndexEnd)
 {
     GMX_ASSERT(cjIndexEnd > cjIndexStart, "JListRanges should only be called with non-empty lists");
 
@@ -1307,23 +1304,20 @@ static inline int findJClusterInJList(int                             jCluster,
                                       const JListRanges&              ranges,
                                       gmx::ArrayRef<const CjListType> cjList)
 {
-    int index;
-
     if (jCluster < ranges.cjFirst + ranges.numDirect)
     {
         /* We can calculate the index directly using the offset */
-        index = ranges.cjIndexStart + jCluster - ranges.cjFirst;
+        return ranges.cjIndexStart + jCluster - ranges.cjFirst;
     }
     else
     {
         /* Search for jCluster using bisection */
-        index          = -1;
+        int index      = -1;
         int rangeStart = ranges.cjIndexStart + ranges.numDirect;
         int rangeEnd   = ranges.cjIndexEnd;
-        int rangeMiddle;
         while (index == -1 && rangeStart < rangeEnd)
         {
-            rangeMiddle = (rangeStart + rangeEnd) >> 1;
+            int rangeMiddle = (rangeStart + rangeEnd) >> 1;
 
             const int clusterMiddle = nblCj(cjList, rangeMiddle);
 
@@ -1340,9 +1334,8 @@ static inline int findJClusterInJList(int                             jCluster,
                 rangeStart = rangeMiddle + 1;
             }
         }
+        return index;
     }
-
-    return index;
 }
 
 // TODO: Get rid of the two functions below by renaming sci to ci (or something better)
@@ -1452,16 +1445,10 @@ static inline void fep_list_new_nri_copy(t_nblist* nlist)
 /* Rellocate FEP list for size nl->maxnri, TODO: replace by C++ */
 static void reallocate_nblist(t_nblist* nl)
 {
-    if (gmx_debug_at)
-    {
-        fprintf(debug,
-                "reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, type=%d), maxnri=%d\n",
-                nl->ielec, nl->ivdw, nl->igeometry, nl->type, nl->maxnri);
-    }
-    srenew(nl->iinr, nl->maxnri);
-    srenew(nl->gid, nl->maxnri);
-    srenew(nl->shift, nl->maxnri);
-    srenew(nl->jindex, nl->maxnri + 1);
+    nl->iinr.resize(nl->maxnri);
+    nl->gid.resize(nl->maxnri);
+    nl->shift.resize(nl->maxnri);
+    nl->jindex.resize(nl->maxnri + 1);
 }
 
 /* For load balancing of the free-energy lists over threads, we set
@@ -1483,22 +1470,16 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
                           NbnxnPairlistCpu*        nbl,
                           gmx_bool                 bDiagRemoved,
                           nbnxn_ci_t*              nbl_ci,
-                          real gmx_unused shx,
-                          real gmx_unused shy,
-                          real gmx_unused shz,
-                          real gmx_unused rlist_fep2,
-                          const Grid&     iGrid,
-                          const Grid&     jGrid,
-                          t_nblist*       nlist)
+                          real gmx_unused          shx,
+                          real gmx_unused          shy,
+                          real gmx_unused          shz,
+                          real gmx_unused          rlist_fep2,
+                          const Grid&              iGrid,
+                          const Grid&              jGrid,
+                          t_nblist*                nlist)
 {
-    int      ci, cj_ind_start, cj_ind_end, cja, cjr;
-    int      nri_max;
-    int      gid_i = 0, gid_j, gid;
-    int      egp_shift, egp_mask;
-    int      gid_cj = 0;
-    int      ind_i, ind_j, ai, aj;
-    int      nri;
-    gmx_bool bFEP_i, bFEP_i_all;
+    int gid_i  = 0;
+    int gid_cj = 0;
 
     if (nbl_ci->cj_ind_end == nbl_ci->cj_ind_start)
     {
@@ -1506,16 +1487,16 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
         return;
     }
 
-    ci = nbl_ci->ci;
+    const int ci = nbl_ci->ci;
 
-    cj_ind_start = nbl_ci->cj_ind_start;
-    cj_ind_end   = nbl_ci->cj_ind_end;
+    const int cj_ind_start = nbl_ci->cj_ind_start;
+    const int cj_ind_end   = nbl_ci->cj_ind_end;
 
     /* In worst case we have alternating energy groups
      * and create #atom-pair lists, which means we need the size
      * of a cluster pair (na_ci*na_cj) times the number of cj's.
      */
-    nri_max = nbl->na_ci * nbl->na_cj * (cj_ind_end - cj_ind_start);
+    const int nri_max = nbl->na_ci * nbl->na_cj * (cj_ind_end - cj_ind_start);
     if (nlist->nri + nri_max > nlist->maxnri)
     {
         nlist->maxnri = over_alloc_large(nlist->nri + nri_max);
@@ -1535,37 +1516,38 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
         gmx_fatal(FARGS,
                   "The Verlet scheme with %dx%d kernels and free-energy only supports up to %zu "
                   "energy groups",
-                  iGrid.geometry().numAtomsICluster, numAtomsJCluster,
+                  iGrid.geometry().numAtomsICluster,
+                  numAtomsJCluster,
                   (sizeof(gid_cj) * 8) / numAtomsJCluster);
     }
 
-    egp_shift = nbatParams.neg_2log;
-    egp_mask  = (1 << egp_shift) - 1;
+    const int egp_shift = nbatParams.neg_2log;
+    const int egp_mask  = (1 << egp_shift) - 1;
 
     /* Loop over the atoms in the i sub-cell */
-    bFEP_i_all = TRUE;
+    bool bFEP_i_all = true;
     for (int i = 0; i < nbl->na_ci; i++)
     {
-        ind_i = ci * nbl->na_ci + i;
-        ai    = atomIndices[ind_i];
+        const int ind_i = ci * nbl->na_ci + i;
+        const int ai    = atomIndices[ind_i];
         if (ai >= 0)
         {
-            nri                    = nlist->nri;
+            int nri                = nlist->nri;
             nlist->jindex[nri + 1] = nlist->jindex[nri];
             nlist->iinr[nri]       = ai;
             /* The actual energy group pair index is set later */
             nlist->gid[nri]   = 0;
             nlist->shift[nri] = nbl_ci->shift & NBNXN_CI_SHIFT;
 
-            bFEP_i = iGrid.atomIsPerturbed(ci - iGrid.cellOffset(), i);
+            bool bFEP_i = iGrid.atomIsPerturbed(ci - iGrid.cellOffset(), i);
 
             bFEP_i_all = bFEP_i_all && bFEP_i;
 
             if (nlist->nrj + (cj_ind_end - cj_ind_start) * nbl->na_cj > nlist->maxnrj)
             {
                 nlist->maxnrj = over_alloc_small(nlist->nrj + (cj_ind_end - cj_ind_start) * nbl->na_cj);
-                srenew(nlist->jjnr, nlist->maxnrj);
-                srenew(nlist->excl_fep, nlist->maxnrj);
+                nlist->jjnr.resize(nlist->maxnrj);
+                nlist->excl_fep.resize(nlist->maxnrj);
             }
 
             if (ngid > 1)
@@ -1575,14 +1557,15 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
 
             for (int cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++)
             {
-                unsigned int fep_cj;
+                unsigned int fep_cj = 0U;
+                gid_cj              = 0;
 
-                cja = nbl->cj[cj_ind].cj;
+                const int cja = nbl->cj[cj_ind].cj;
 
                 if (numAtomsJCluster == jGrid.geometry().numAtomsICluster)
                 {
-                    cjr    = cja - jGrid.cellOffset();
-                    fep_cj = jGrid.fepBits(cjr);
+                    const int cjr = cja - jGrid.cellOffset();
+                    fep_cj        = jGrid.fepBits(cjr);
                     if (ngid > 1)
                     {
                         gid_cj = nbatParams.energrp[cja];
@@ -1590,7 +1573,7 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
                 }
                 else if (2 * numAtomsJCluster == jGrid.geometry().numAtomsICluster)
                 {
-                    cjr = cja - jGrid.cellOffset() * 2;
+                    const int cjr = cja - jGrid.cellOffset() * 2;
                     /* Extract half of the ci fep/energrp mask */
                     fep_cj = (jGrid.fepBits(cjr >> 1) >> ((cjr & 1) * numAtomsJCluster))
                              & ((1 << numAtomsJCluster) - 1);
@@ -1602,7 +1585,7 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
                 }
                 else
                 {
-                    cjr = cja - (jGrid.cellOffset() >> 1);
+                    const int cjr = cja - (jGrid.cellOffset() >> 1);
                     /* Combine two ci fep masks/energrp */
                     fep_cj = jGrid.fepBits(cjr * 2)
                              + (jGrid.fepBits(cjr * 2 + 1) << jGrid.geometry().numAtomsICluster);
@@ -1619,14 +1602,14 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
                     for (int j = 0; j < nbl->na_cj; j++)
                     {
                         /* Is this interaction perturbed and not excluded? */
-                        ind_j = cja * nbl->na_cj + j;
-                        aj    = atomIndices[ind_j];
+                        const int ind_j = cja * nbl->na_cj + j;
+                        const int aj    = atomIndices[ind_j];
                         if (aj >= 0 && (bFEP_i || (fep_cj & (1 << j))) && (!bDiagRemoved || ind_j >= ind_i))
                         {
                             if (ngid > 1)
                             {
-                                gid_j = (gid_cj >> (j * egp_shift)) & egp_mask;
-                                gid   = GID(gid_i, gid_j, ngid);
+                                const int gid_j = (gid_cj >> (j * egp_shift)) & egp_mask;
+                                const int gid   = GID(gid_i, gid_j, ngid);
 
                                 if (nlist->nrj > nlist->jindex[nri] && nlist->gid[nri] != gid)
                                 {
@@ -1709,14 +1692,6 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
                           const Grid&              jGrid,
                           t_nblist*                nlist)
 {
-    int                nri_max;
-    int                c_abs;
-    int                ind_i, ind_j, ai, aj;
-    int                nri;
-    gmx_bool           bFEP_i;
-    real               xi, yi, zi;
-    const nbnxn_cj4_t* cj4;
-
     const int numJClusterGroups = nbl_sci->numJClusterGroups();
     if (numJClusterGroups == 0)
     {
@@ -1736,7 +1711,8 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
      * So for each of the na_sc i-atoms, we need max one FEP list
      * for each max_nrj_fep j-atoms.
      */
-    nri_max = nbl->na_sc * nbl->na_cj * (1 + (numJClusterGroups * c_nbnxnGpuJgroupSize) / max_nrj_fep);
+    const int nri_max =
+            nbl->na_sc * nbl->na_cj * (1 + (numJClusterGroups * c_nbnxnGpuJgroupSize) / max_nrj_fep);
     if (nlist->nri + nri_max > nlist->maxnri)
     {
         nlist->maxnri = over_alloc_large(nlist->nri + nri_max);
@@ -1746,38 +1722,39 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
     /* Loop over the atoms in the i super-cluster */
     for (int c = 0; c < c_gpuNumClusterPerCell; c++)
     {
-        c_abs = sci * c_gpuNumClusterPerCell + c;
+        const int c_abs = sci * c_gpuNumClusterPerCell + c;
 
         for (int i = 0; i < nbl->na_ci; i++)
         {
-            ind_i = c_abs * nbl->na_ci + i;
-            ai    = atomIndices[ind_i];
+            const int ind_i = c_abs * nbl->na_ci + i;
+            const int ai    = atomIndices[ind_i];
             if (ai >= 0)
             {
-                nri                    = nlist->nri;
+                int nri                = nlist->nri;
                 nlist->jindex[nri + 1] = nlist->jindex[nri];
                 nlist->iinr[nri]       = ai;
                 /* With GPUs, energy groups are not supported */
                 nlist->gid[nri]   = 0;
                 nlist->shift[nri] = nbl_sci->shift & NBNXN_CI_SHIFT;
 
-                bFEP_i = iGrid.atomIsPerturbed(c_abs - iGrid.cellOffset() * c_gpuNumClusterPerCell, i);
+                const bool bFEP_i =
+                        iGrid.atomIsPerturbed(c_abs - iGrid.cellOffset() * c_gpuNumClusterPerCell, i);
 
-                xi = nbat->x()[ind_i * nbat->xstride + XX] + shx;
-                yi = nbat->x()[ind_i * nbat->xstride + YY] + shy;
-                zi = nbat->x()[ind_i * nbat->xstride + ZZ] + shz;
+                real xi = nbat->x()[ind_i * nbat->xstride + XX] + shx;
+                real yi = nbat->x()[ind_i * nbat->xstride + YY] + shy;
+                real zi = nbat->x()[ind_i * nbat->xstride + ZZ] + shz;
 
                 const int nrjMax = nlist->nrj + numJClusterGroups * c_nbnxnGpuJgroupSize * nbl->na_cj;
                 if (nrjMax > nlist->maxnrj)
                 {
                     nlist->maxnrj = over_alloc_small(nrjMax);
-                    srenew(nlist->jjnr, nlist->maxnrj);
-                    srenew(nlist->excl_fep, nlist->maxnrj);
+                    nlist->jjnr.resize(nlist->maxnrj);
+                    nlist->excl_fep.resize(nlist->maxnrj);
                 }
 
                 for (int cj4_ind = cj4_ind_start; cj4_ind < cj4_ind_end; cj4_ind++)
                 {
-                    cj4 = &nbl->cj4[cj4_ind];
+                    const nbnxn_cj4_t* cj4 = &nbl->cj4[cj4_ind];
 
                     for (int gcj = 0; gcj < c_nbnxnGpuJgroupSize; gcj++)
                     {
@@ -1794,25 +1771,22 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
                             for (int j = 0; j < nbl->na_cj; j++)
                             {
                                 /* Is this interaction perturbed and not excluded? */
-                                ind_j = (jGrid.cellOffset() * c_gpuNumClusterPerCell + cjr) * nbl->na_cj + j;
-                                aj = atomIndices[ind_j];
+                                const int ind_j =
+                                        (jGrid.cellOffset() * c_gpuNumClusterPerCell + cjr) * nbl->na_cj + j;
+                                const int aj = atomIndices[ind_j];
                                 if (aj >= 0 && (bFEP_i || jGrid.atomIsPerturbed(cjr, j))
                                     && (!bDiagRemoved || ind_j >= ind_i))
                                 {
-                                    int          excl_pair;
-                                    unsigned int excl_bit;
-                                    real         dx, dy, dz;
-
                                     const int jHalf =
                                             j / (c_nbnxnGpuClusterSize / c_nbnxnGpuClusterpairSplit);
                                     nbnxn_excl_t& excl = get_exclusion_mask(nbl, cj4_ind, jHalf);
 
-                                    excl_pair = a_mod_wj(j) * nbl->na_ci + i;
-                                    excl_bit  = (1U << (gcj * c_gpuNumClusterPerCell + c));
+                                    int          excl_pair = a_mod_wj(j) * nbl->na_ci + i;
+                                    unsigned int excl_bit = (1U << (gcj * c_gpuNumClusterPerCell + c));
 
-                                    dx = nbat->x()[ind_j * nbat->xstride + XX] - xi;
-                                    dy = nbat->x()[ind_j * nbat->xstride + YY] - yi;
-                                    dz = nbat->x()[ind_j * nbat->xstride + ZZ] - zi;
+                                    real dx = nbat->x()[ind_j * nbat->xstride + XX] - xi;
+                                    real dy = nbat->x()[ind_j * nbat->xstride + YY] - yi;
+                                    real dz = nbat->x()[ind_j * nbat->xstride + ZZ] - zi;
 
                                     /* The unpruned GPU list has more than 2/3
                                      * of the atom pairs beyond rlist. Using
@@ -1871,9 +1845,9 @@ static void make_fep_list(gmx::ArrayRef<const int> atomIndices,
  * Sets all atom-pair exclusions from the topology stored in exclusions
  * as masks in the pair-list for i-super-cluster list entry iEntry.
  */
-static void setExclusionsForIEntry(const Nbnxm::GridSet& gridSet,
-                                   NbnxnPairlistGpu*     nbl,
-                                   gmx_bool              diagRemoved,
+static void setExclusionsForIEntry(const Nbnxm::GridSet&   gridSet,
+                                   NbnxnPairlistGpu*       nbl,
+                                   gmx_bool                diagRemoved,
                                    int gmx_unused          na_cj_2log,
                                    const nbnxn_sci_t&      iEntry,
                                    const ListOfLists<int>& exclusions)
@@ -1888,7 +1862,8 @@ static void setExclusionsForIEntry(const Nbnxm::GridSet& gridSet,
      * Note that here we can not use cj4_ind_end, since the last cj4
      * can be only partially filled, so we use cj_ind.
      */
-    const JListRanges ranges(iEntry.cj4_ind_start * c_nbnxnGpuJgroupSize, nbl->work->cj_ind,
+    const JListRanges ranges(iEntry.cj4_ind_start * c_nbnxnGpuJgroupSize,
+                             nbl->work->cj_ind,
                              gmx::makeConstArrayRef(nbl->cj4));
 
     GMX_ASSERT(nbl->na_ci == c_nbnxnGpuClusterSize, "na_ci should match the GPU cluster size");
@@ -2030,12 +2005,12 @@ static void sort_cj_excl(nbnxn_cj_t* cj, int ncj, NbnxnPairlistCpuWork* work)
 }
 
 /* Close this simple list i entry */
-static void closeIEntry(NbnxnPairlistCpu* nbl,
-                        int gmx_unused sp_max_av,
+static void closeIEntry(NbnxnPairlistCpu*   nbl,
+                        int gmx_unused      sp_max_av,
                         gmx_bool gmx_unused progBal,
-                        float gmx_unused nsp_tot_est,
-                        int gmx_unused thread,
-                        int gmx_unused nthread)
+                        float gmx_unused    nsp_tot_est,
+                        int gmx_unused      thread,
+                        int gmx_unused      nthread)
 {
     nbnxn_ci_t& ciEntry = nbl->ci.back();
 
@@ -2081,16 +2056,15 @@ static void split_sci_entry(NbnxnPairlistGpu* nbl,
                             int               thread,
                             int               nthread)
 {
-    int nsp_max;
+
+    int nsp_max = nsp_target_av;
 
     if (progBal)
     {
-        float nsp_est;
-
         /* Estimate the total numbers of ci's of the nblist combined
          * over all threads using the target number of ci's.
          */
-        nsp_est = (nsp_tot_est * thread) / nthread + nbl->nci_tot;
+        float nsp_est = (nsp_tot_est * thread) / nthread + nbl->nci_tot;
 
         /* The first ci blocks should be larger, to avoid overhead.
          * The last ci blocks should be smaller, to improve load balancing.
@@ -2100,10 +2074,6 @@ static void split_sci_entry(NbnxnPairlistGpu* nbl,
          */
         nsp_max = static_cast<int>(nsp_target_av * (nsp_tot_est * 1.5 / (nsp_est + nsp_tot_est)));
     }
-    else
-    {
-        nsp_max = nsp_target_av;
-    }
 
     const int cj4_start = nbl->sci.back().cj4_ind_start;
     const int cj4_end   = nbl->sci.back().cj4_ind_end;
@@ -2230,9 +2200,9 @@ static void clear_pairlist_fep(t_nblist* nl)
 {
     nl->nri = 0;
     nl->nrj = 0;
-    if (nl->jindex == nullptr)
+    if (nl->jindex.empty())
     {
-        snew(nl->jindex, 1);
+        nl->jindex.resize(1);
     }
     nl->jindex[0] = 0;
 }
@@ -2295,8 +2265,8 @@ gmx_unused static void set_icell_bb_supersub(gmx::ArrayRef<const BoundingBox> bb
 gmx_unused static void set_icell_bb(const Grid& iGrid, int ci, real shx, real shy, real shz, NbnxnPairlistGpuWork* work)
 {
 #if NBNXN_BBXXXX
-    set_icell_bbxxxx_supersub(iGrid.packedBoundingBoxes(), ci, shx, shy, shz,
-                              work->iSuperClusterData.bbPacked.data());
+    set_icell_bbxxxx_supersub(
+            iGrid.packedBoundingBoxes(), ci, shx, shy, shz, work->iSuperClusterData.bbPacked.data());
 #else
     set_icell_bb_supersub(iGrid.iBoundingBoxes(), ci, shx, shy, shz, work->iSuperClusterData.bb.data());
 #endif
@@ -2352,12 +2322,12 @@ static void icell_set_x(int                             ci,
 }
 
 /* Copies PBC shifted super-cell atom coordinates x,y,z to working array */
-static void icell_set_x(int                       ci,
-                        real                      shx,
-                        real                      shy,
-                        real                      shz,
-                        int                       stride,
-                        const real*               x,
+static void icell_set_x(int                                  ci,
+                        real                                 shx,
+                        real                                 shy,
+                        real                                 shz,
+                        int                                  stride,
+                        const real*                          x,
                         ClusterDistanceKernelType gmx_unused kernelType,
                         NbnxnPairlistGpuWork*                work)
 {
@@ -2437,11 +2407,7 @@ static real effective_buffer_1x1_vs_MxN(const Grid& iGrid, const Grid& jGrid)
 /* Estimates the interaction volume^2 for non-local interactions */
 static real nonlocal_vol2(const struct gmx_domdec_zones_t* zones, const rvec ls, real r)
 {
-    real cl, ca, za;
-    real vold_est;
-    real vol2_est_tot;
-
-    vol2_est_tot = 0;
+    real vol2_est_tot = 0;
 
     /* Here we simply add up the volumes of 1, 2 or 3 1D decomposition
      * not home interaction volume^2. As these volumes are not additive,
@@ -2454,9 +2420,9 @@ static real nonlocal_vol2(const struct gmx_domdec_zones_t* zones, const rvec ls,
     {
         if (zones->shift[z][XX] + zones->shift[z][YY] + zones->shift[z][ZZ] == 1)
         {
-            cl = 0;
-            ca = 1;
-            za = 1;
+            real cl = 0;
+            real ca = 1;
+            real za = 1;
             for (int d = 0; d < DIM; d++)
             {
                 if (zones->shift[z][d] == 0)
@@ -2468,7 +2434,7 @@ static real nonlocal_vol2(const struct gmx_domdec_zones_t* zones, const rvec ls,
             }
 
             /* 4 octants of a sphere */
-            vold_est = 0.25 * M_PI * r * r * r * r;
+            real vold_est = 0.25 * M_PI * r * r * r * r;
             /* 4 quarter pie slices on the edges */
             vold_est += 4 * cl * M_PI / 6.0 * r * r * r;
             /* One rectangular volume on a face */
@@ -2493,7 +2459,6 @@ static void get_nsubpair_target(const Nbnxm::GridSet&     gridSet,
      * Maxwell is less sensitive to the exact value.
      */
     const int nsubpair_target_min = 36;
-    real      r_eff_sup, vol_est, nsp_est, nsp_est_nl;
 
     const Grid& grid = gridSet.grids()[0];
 
@@ -2521,22 +2486,20 @@ static void get_nsubpair_target(const Nbnxm::GridSet&     gridSet,
     ls[ZZ] = numAtomsCluster / (dims.atomDensity * ls[XX] * ls[YY]);
 
     /* The formulas below are a heuristic estimate of the average nsj per si*/
-    r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(numAtomsCluster, ls);
+    const real r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(numAtomsCluster, ls);
 
-    if (!gridSet.domainSetup().haveMultipleDomains || gridSet.domainSetup().zones->n == 1)
-    {
-        nsp_est_nl = 0;
-    }
-    else
+    real nsp_est_nl = 0;
+    if (gridSet.domainSetup().haveMultipleDomains && gridSet.domainSetup().zones->n != 1)
     {
         nsp_est_nl = gmx::square(dims.atomDensity / numAtomsCluster)
                      * nonlocal_vol2(gridSet.domainSetup().zones, ls, r_eff_sup);
     }
 
+    real nsp_est = nsp_est_nl;
     if (iloc == InteractionLocality::Local)
     {
         /* Sub-cell interacts with itself */
-        vol_est = ls[XX] * ls[YY] * ls[ZZ];
+        real vol_est = ls[XX] * ls[YY] * ls[ZZ];
         /* 6/2 rectangular volume on the faces */
         vol_est += (ls[XX] * ls[YY] + ls[XX] * ls[ZZ] + ls[YY] * ls[ZZ]) * r_eff_sup;
         /* 12/2 quarter pie slices on the edges */
@@ -2552,7 +2515,7 @@ static void get_nsubpair_target(const Nbnxm::GridSet&     gridSet,
         /* Subtract the non-local pair count */
         nsp_est -= nsp_est_nl;
 
-        /* For small cut-offs nsp_est will be an underesimate.
+        /* For small cut-offs nsp_est will be an underestimate.
          * With DD nsp_est_nl is an overestimate so nsp_est can get negative.
          * So to avoid too small or negative nsp_est we set a minimum of
          * all cells interacting with all 3^3 direct neighbors (3^3-1)/2+1=14.
@@ -2568,10 +2531,6 @@ static void get_nsubpair_target(const Nbnxm::GridSet&     gridSet,
             fprintf(debug, "nsp_est local %5.1f non-local %5.1f\n", nsp_est, nsp_est_nl);
         }
     }
-    else
-    {
-        nsp_est = nsp_est_nl;
-    }
 
     /* Thus the (average) maximum j-list size should be as follows.
      * Since there is overhead, we shouldn't make the lists too small
@@ -2591,8 +2550,7 @@ static void print_nblist_ci_cj(FILE* fp, const NbnxnPairlistCpu& nbl)
 {
     for (const nbnxn_ci_t& ciEntry : nbl.ci)
     {
-        fprintf(fp, "ci %4d  shift %2d  ncj %3d\n", ciEntry.ci, ciEntry.shift,
-                ciEntry.cj_ind_end - ciEntry.cj_ind_start);
+        fprintf(fp, "ci %4d  shift %2d  ncj %3d\n", ciEntry.ci, ciEntry.shift, ciEntry.cj_ind_end - ciEntry.cj_ind_start);
 
         for (int j = ciEntry.cj_ind_start; j < ciEntry.cj_ind_end; j++)
         {
@@ -2623,8 +2581,7 @@ static void print_nblist_sci_cj(FILE* fp, const NbnxnPairlistGpu& nbl)
                 }
             }
         }
-        fprintf(fp, "ci %4d  shift %2d  ncj4 %2d ncp %3d\n", sci.sci, sci.shift,
-                sci.numJClusterGroups(), ncp);
+        fprintf(fp, "ci %4d  shift %2d  ncj4 %2d ncp %3d\n", sci.sci, sci.shift, sci.numJClusterGroups(), ncp);
     }
 }
 
@@ -2634,7 +2591,7 @@ static void combine_nblists(gmx::ArrayRef<const NbnxnPairlistGpu> nbls, NbnxnPai
     int nsci  = nblc->sci.size();
     int ncj4  = nblc->cj4.size();
     int nexcl = nblc->excl.size();
-    for (auto& nbl : nbls)
+    for (const auto& nbl : nbls)
     {
         nsci += nbl.sci.size();
         ncj4 += nbl.cj4.size();
@@ -2650,7 +2607,7 @@ static void combine_nblists(gmx::ArrayRef<const NbnxnPairlistGpu> nbls, NbnxnPai
     /* Each thread should copy its own data to the combined arrays,
      * as otherwise data will go back and forth between different caches.
      */
-    const int gmx_unused nthreads = gmx_omp_nthreads_get(emntPairsearch);
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Pairsearch);
 
 #pragma omp parallel for num_threads(nthreads) schedule(static)
     for (gmx::index n = 0; n < nbls.ssize(); n++)
@@ -2695,7 +2652,7 @@ static void combine_nblists(gmx::ArrayRef<const NbnxnPairlistGpu> nbls, NbnxnPai
         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
     }
 
-    for (auto& nbl : nbls)
+    for (const auto& nbl : nbls)
     {
         nblc->nci_tot += nbl.nci_tot;
     }
@@ -2723,7 +2680,7 @@ static void balance_fep_lists(gmx::ArrayRef<std::unique_ptr<t_nblist>> fepLists,
 
     const int nrj_target = (nrj_tot + numLists - 1) / numLists;
 
-    GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == numLists,
+    GMX_ASSERT(gmx_omp_nthreads_get(ModuleMultiThread::Nonbonded) == numLists,
                "We should have as many work objects as FEP lists");
 
 #pragma omp parallel for schedule(static) num_threads(numLists)
@@ -2744,8 +2701,8 @@ static void balance_fep_lists(gmx::ArrayRef<std::unique_ptr<t_nblist>> fepLists,
             if (nri_tot > nbl->maxnri || nrj_tot > nbl->maxnrj)
             {
                 nbl->maxnrj = over_alloc_small(nrj_tot);
-                srenew(nbl->jjnr, nbl->maxnrj);
-                srenew(nbl->excl_fep, nbl->maxnrj);
+                nbl->jjnr.resize(nbl->maxnrj);
+                nbl->excl_fep.resize(nbl->maxnrj);
             }
 
             clear_pairlist_fep(nbl);
@@ -2762,10 +2719,8 @@ static void balance_fep_lists(gmx::ArrayRef<std::unique_ptr<t_nblist>> fepLists,
 
         for (int i = 0; i < nbls->nri; i++)
         {
-            int nrj;
-
             /* The number of pairs in this i-entry */
-            nrj = nbls->jindex[i + 1] - nbls->jindex[i];
+            const int nrj = nbls->jindex[i + 1] - nbls->jindex[i];
 
             /* Decide if list th_dest is too large and we should procede
              * to the next destination list.
@@ -2856,24 +2811,22 @@ static float boundingbox_only_distance2(const Grid::Dimensions& iGridDims,
      * is only performed when only 1 out of 8 sub-cells in within range,
      * this is because the GPU is much faster than the cpu.
      */
-    real bbx, bby;
-    real rbb2;
 
-    bbx = 0.5 * (iGridDims.cellSize[XX] + jGridDims.cellSize[XX]);
-    bby = 0.5 * (iGridDims.cellSize[YY] + jGridDims.cellSize[YY]);
+    real bbx = 0.5 * (iGridDims.cellSize[XX] + jGridDims.cellSize[XX]);
+    real bby = 0.5 * (iGridDims.cellSize[YY] + jGridDims.cellSize[YY]);
     if (!simple)
     {
         bbx /= c_gpuNumClusterPerCellX;
         bby /= c_gpuNumClusterPerCellY;
     }
 
-    rbb2 = std::max(0.0, rlist - 0.5 * std::sqrt(bbx * bbx + bby * bby));
-    rbb2 = rbb2 * rbb2;
+    real rbb2 = std::max(0.0, rlist - 0.5 * std::sqrt(bbx * bbx + bby * bby));
+    rbb2      = rbb2 * rbb2;
 
 #if !GMX_DOUBLE
     return rbb2;
 #else
-    return (float)((1 + GMX_FLOAT_EPS) * rbb2);
+    return static_cast<float>((1 + GMX_FLOAT_EPS) * rbb2);
 #endif
 }
 
@@ -2882,7 +2835,6 @@ static int get_ci_block_size(const Grid& iGrid, const bool haveMultipleDomains,
     const int ci_block_enum      = 5;
     const int ci_block_denom     = 11;
     const int ci_block_min_atoms = 16;
-    int       ci_block;
 
     /* Here we decide how to distribute the blocks over the threads.
      * We use prime numbers to try to avoid that the grid size becomes
@@ -2897,8 +2849,8 @@ static int get_ci_block_size(const Grid& iGrid, const bool haveMultipleDomains,
      */
     GMX_ASSERT(iGrid.dimensions().numCells[XX] > 0, "Grid can't be empty");
     GMX_ASSERT(numLists > 0, "We need at least one list");
-    ci_block = (iGrid.numCells() * ci_block_enum)
-               / (ci_block_denom * iGrid.dimensions().numCells[XX] * numLists);
+    int ci_block = (iGrid.numCells() * ci_block_enum)
+                   / (ci_block_denom * iGrid.dimensions().numCells[XX] * numLists);
 
     const int numAtomsPerCell = iGrid.geometry().numAtomsPerCell;
 
@@ -2972,42 +2924,42 @@ static void makeClusterListWrapper(NbnxnPairlistCpu* nbl,
     switch (kernelType)
     {
         case ClusterDistanceKernelType::CpuPlainC:
-            makeClusterListSimple(jGrid, nbl, ci, firstCell, lastCell, excludeSubDiagonal,
-                                  nbat->x().data(), rlist2, rbb2, numDistanceChecks);
+            makeClusterListSimple(
+                    jGrid, nbl, ci, firstCell, lastCell, excludeSubDiagonal, nbat->x().data(), rlist2, rbb2, numDistanceChecks);
             break;
 #ifdef GMX_NBNXN_SIMD_4XN
         case ClusterDistanceKernelType::CpuSimd_4xM:
-            makeClusterListSimd4xn(jGrid, nbl, ci, firstCell, lastCell, excludeSubDiagonal,
-                                   nbat->x().data(), rlist2, rbb2, numDistanceChecks);
+            makeClusterListSimd4xn(
+                    jGrid, nbl, ci, firstCell, lastCell, excludeSubDiagonal, nbat->x().data(), rlist2, rbb2, numDistanceChecks);
             break;
 #endif
 #ifdef GMX_NBNXN_SIMD_2XNN
         case ClusterDistanceKernelType::CpuSimd_2xMM:
-            makeClusterListSimd2xnn(jGrid, nbl, ci, firstCell, lastCell, excludeSubDiagonal,
-                                    nbat->x().data(), rlist2, rbb2, numDistanceChecks);
+            makeClusterListSimd2xnn(
+                    jGrid, nbl, ci, firstCell, lastCell, excludeSubDiagonal, nbat->x().data(), rlist2, rbb2, numDistanceChecks);
             break;
 #endif
         default: GMX_ASSERT(false, "Unhandled kernel type");
     }
 }
 
-static void makeClusterListWrapper(NbnxnPairlistGpu* nbl,
-                                   const Grid& gmx_unused    iGrid,
-                                   const int                 ci,
-                                   const Grid&               jGrid,
-                                   const int                 firstCell,
-                                   const int                 lastCell,
-                                   const bool                excludeSubDiagonal,
-                                   const nbnxn_atomdata_t*   nbat,
-                                   const real                rlist2,
-                                   const real                rbb2,
+static void makeClusterListWrapper(NbnxnPairlistGpu*                    nbl,
+                                   const Grid& gmx_unused               iGrid,
+                                   const int                            ci,
+                                   const Grid&                          jGrid,
+                                   const int                            firstCell,
+                                   const int                            lastCell,
+                                   const bool                           excludeSubDiagonal,
+                                   const nbnxn_atomdata_t*              nbat,
+                                   const real                           rlist2,
+                                   const real                           rbb2,
                                    ClusterDistanceKernelType gmx_unused kernelType,
                                    int*                                 numDistanceChecks)
 {
     for (int cj = firstCell; cj <= lastCell; cj++)
     {
-        make_cluster_list_supersub(iGrid, jGrid, nbl, ci, cj, excludeSubDiagonal, nbat->xstride,
-                                   nbat->x().data(), rlist2, rbb2, numDistanceChecks);
+        make_cluster_list_supersub(
+                iGrid, jGrid, nbl, ci, cj, excludeSubDiagonal, nbat->xstride, nbat->x().data(), rlist2, rbb2, numDistanceChecks);
     }
 }
 
@@ -3063,10 +3015,10 @@ static void setBufferFlags(const NbnxnPairlistCpu& nbl,
 }
 
 static void setBufferFlags(const NbnxnPairlistGpu gmx_unused& nbl,
-                           int gmx_unused ncj_old_j,
-                           int gmx_unused gridj_flag_shift,
+                           int gmx_unused                     ncj_old_j,
+                           int gmx_unused                     gridj_flag_shift,
                            gmx_bitmask_t gmx_unused* gridj_flag,
-                           int gmx_unused th)
+                           int gmx_unused            th)
 {
     GMX_ASSERT(false, "This function should never be called");
 }
@@ -3091,20 +3043,11 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                                      T*                      nbl,
                                      t_nblist*               nbl_fep)
 {
-    int            na_cj_2log;
     matrix         box;
     real           rl_fep2 = 0;
-    float          rbb2;
-    int            ci_b, ci, ci_x, ci_y, ci_xy;
     ivec           shp;
-    real           bx0, bx1, by0, by1, bz0, bz1;
-    real           bz1_frac;
-    real           d2cx, d2z, d2z_cx, d2z_cy, d2zx, d2zxy, d2xy;
-    int            cxf, cxl, cyf, cyf_x, cyl;
-    int            numDistanceChecks;
     int            gridi_flag_shift = 0, gridj_flag_shift = 0;
     gmx_bitmask_t* gridj_flag = nullptr;
-    int            ncj_old_i, ncj_old_j;
 
     if (jGrid.geometry().isSimple != pairlistIsSimple(*nbl)
         || iGrid.geometry().isSimple != pairlistIsSimple(*nbl))
@@ -3115,8 +3058,8 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
     sync_work(nbl);
     GMX_ASSERT(nbl->na_ci == jGrid.geometry().numAtomsICluster,
                "The cluster sizes in the list and grid should match");
-    nbl->na_cj = JClusterSizePerListType[pairlistType];
-    na_cj_2log = get_2log(nbl->na_cj);
+    nbl->na_cj           = JClusterSizePerListType[pairlistType];
+    const int na_cj_2log = get_2log(nbl->na_cj);
 
     nbl->rlist = rlist;
 
@@ -3126,7 +3069,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
         gridi_flag_shift = getBufferFlagShift(nbl->na_ci);
         gridj_flag_shift = getBufferFlagShift(nbl->na_cj);
 
-        gridj_flag = work->buffer_flags.flag;
+        gridj_flag = work->buffer_flags.data();
     }
 
     gridSet.getBox(box);
@@ -3144,7 +3087,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
          * We should not simply use rlist, since then we would not have
          * the small, effective buffering of the NxN lists.
          * The buffer is on overestimate, but the resulting cost for pairs
-         * beyond rlist is neglible compared to the FEP pairs within rlist.
+         * beyond rlist is negligible compared to the FEP pairs within rlist.
          */
         rl_fep2 = nbl->rlist + effective_buffer_1x1_vs_MxN(iGrid, jGrid);
 
@@ -3158,7 +3101,8 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
     const Grid::Dimensions& iGridDims = iGrid.dimensions();
     const Grid::Dimensions& jGridDims = jGrid.dimensions();
 
-    rbb2 = boundingbox_only_distance2(iGridDims, jGridDims, nbl->rlist, pairlistIsSimple(*nbl));
+    const float rbb2 =
+            boundingbox_only_distance2(iGridDims, jGridDims, nbl->rlist, pairlistIsSimple(*nbl));
 
     if (debug)
     {
@@ -3215,11 +3159,14 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
 
     if (debug)
     {
-        fprintf(debug, "nbl nc_i %d col.av. %.1f ci_block %d\n", iGrid.numCells(),
-                iGrid.numCells() / static_cast<double>(iGrid.numColumns()), ci_block);
+        fprintf(debug,
+                "nbl nc_i %d col.av. %.1f ci_block %d\n",
+                iGrid.numCells(),
+                iGrid.numCells() / static_cast<double>(iGrid.numColumns()),
+                ci_block);
     }
 
-    numDistanceChecks = 0;
+    int numDistanceChecks = 0;
 
     const real listRangeBBToJCell2 =
             gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid.dimensions()));
@@ -3227,29 +3174,24 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
     /* Initially ci_b and ci to 1 before where we want them to start,
      * as they will both be incremented in next_ci.
      */
-    ci_b = -1;
-    ci   = th * ci_block - 1;
-    ci_x = 0;
-    ci_y = 0;
+    int ci_b = -1;
+    int ci   = th * ci_block - 1;
+    int ci_x = 0;
+    int ci_y = 0;
     while (next_ci(iGrid, nth, ci_block, &ci_x, &ci_y, &ci_b, &ci))
     {
         if (bSimple && flags_i[ci] == 0)
         {
             continue;
         }
-        ncj_old_i = getNumSimpleJClustersInList(*nbl);
+        const int ncj_old_i = getNumSimpleJClustersInList(*nbl);
 
-        d2cx = 0;
+        real d2cx = 0;
         if (!isIntraGridList && shp[XX] == 0)
         {
-            if (bSimple)
-            {
-                bx1 = bb_i[ci].upper.x;
-            }
-            else
-            {
-                bx1 = iGridDims.lowerCorner[XX] + (real(ci_x) + 1) * iGridDims.cellSize[XX];
-            }
+            const real bx1 =
+                    bSimple ? bb_i[ci].upper.x
+                            : iGridDims.lowerCorner[XX] + (real(ci_x) + 1) * iGridDims.cellSize[XX];
             if (bx1 < jGridDims.lowerCorner[XX])
             {
                 d2cx = gmx::square(jGridDims.lowerCorner[XX] - bx1);
@@ -3261,37 +3203,34 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
             }
         }
 
-        ci_xy = ci_x * iGridDims.numCells[YY] + ci_y;
+        int ci_xy = ci_x * iGridDims.numCells[YY] + ci_y;
 
         /* Loop over shift vectors in three dimensions */
         for (int tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
         {
             const real shz = real(tz) * box[ZZ][ZZ];
 
-            bz0 = bbcz_i[ci].lower + shz;
-            bz1 = bbcz_i[ci].upper + shz;
+            real bz0 = bbcz_i[ci].lower + shz;
+            real bz1 = bbcz_i[ci].upper + shz;
 
-            if (tz == 0)
-            {
-                d2z = 0;
-            }
-            else if (tz < 0)
+            real d2z = 0;
+            if (tz < 0)
             {
                 d2z = gmx::square(bz1);
             }
-            else
+            else if (tz > 0)
             {
                 d2z = gmx::square(bz0 - box[ZZ][ZZ]);
             }
 
-            d2z_cx = d2z + d2cx;
+            const real d2z_cx = d2z + d2cx;
 
             if (d2z_cx >= rlist2)
             {
                 continue;
             }
 
-            bz1_frac = bz1 / real(iGrid.numCellsInColumn(ci_xy));
+            real bz1_frac = bz1 / real(iGrid.numCellsInColumn(ci_xy));
             if (bz1_frac < 0)
             {
                 bz1_frac = 0;
@@ -3302,17 +3241,14 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
             {
                 const real shy = real(ty) * box[YY][YY] + real(tz) * box[ZZ][YY];
 
-                if (bSimple)
-                {
-                    by0 = bb_i[ci].lower.y + shy;
-                    by1 = bb_i[ci].upper.y + shy;
-                }
-                else
-                {
-                    by0 = iGridDims.lowerCorner[YY] + (real(ci_y)) * iGridDims.cellSize[YY] + shy;
-                    by1 = iGridDims.lowerCorner[YY] + (real(ci_y) + 1) * iGridDims.cellSize[YY] + shy;
-                }
+                const real by0 = bSimple ? bb_i[ci].lower.y + shy
+                                         : iGridDims.lowerCorner[YY]
+                                                   + (real(ci_y)) * iGridDims.cellSize[YY] + shy;
+                const real by1 = bSimple ? bb_i[ci].upper.y + shy
+                                         : iGridDims.lowerCorner[YY]
+                                                   + (real(ci_y) + 1) * iGridDims.cellSize[YY] + shy;
 
+                int cyf, cyl; //NOLINT(cppcoreguidelines-init-variables)
                 get_cell_range<YY>(by0, by1, jGridDims, d2z_cx, rlist, &cyf, &cyl);
 
                 if (cyf > cyl)
@@ -3320,7 +3256,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                     continue;
                 }
 
-                d2z_cy = d2z;
+                real d2z_cy = d2z;
                 if (by1 < jGridDims.lowerCorner[YY])
                 {
                     d2z_cy += gmx::square(jGridDims.lowerCorner[YY] - by1);
@@ -3332,11 +3268,11 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
 
                 for (int tx = -shp[XX]; tx <= shp[XX]; tx++)
                 {
-                    const int shift = XYZ2IS(tx, ty, tz);
+                    const int shift = xyzToShiftIndex(tx, ty, tz);
 
-                    const bool excludeSubDiagonal = (isIntraGridList && shift == CENTRAL);
+                    const bool excludeSubDiagonal = (isIntraGridList && shift == gmx::c_centralShiftIndex);
 
-                    if (c_pbcShiftBackward && isIntraGridList && shift > CENTRAL)
+                    if (c_pbcShiftBackward && isIntraGridList && shift > gmx::c_centralShiftIndex)
                     {
                         continue;
                     }
@@ -3344,17 +3280,14 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                     const real shx =
                             real(tx) * box[XX][XX] + real(ty) * box[YY][XX] + real(tz) * box[ZZ][XX];
 
-                    if (bSimple)
-                    {
-                        bx0 = bb_i[ci].lower.x + shx;
-                        bx1 = bb_i[ci].upper.x + shx;
-                    }
-                    else
-                    {
-                        bx0 = iGridDims.lowerCorner[XX] + (real(ci_x)) * iGridDims.cellSize[XX] + shx;
-                        bx1 = iGridDims.lowerCorner[XX] + (real(ci_x) + 1) * iGridDims.cellSize[XX] + shx;
-                    }
+                    const real bx0 = bSimple ? bb_i[ci].lower.x + shx
+                                             : iGridDims.lowerCorner[XX]
+                                                       + (real(ci_x)) * iGridDims.cellSize[XX] + shx;
+                    const real bx1 = bSimple ? bb_i[ci].upper.x + shx
+                                             : iGridDims.lowerCorner[XX]
+                                                       + (real(ci_x) + 1) * iGridDims.cellSize[XX] + shx;
 
+                    int cxf, cxl; //NOLINT(cppcoreguidelines-init-variables)
                     get_cell_range<XX>(bx0, bx1, jGridDims, d2z_cy, rlist, &cxf, &cxl);
 
                     if (cxf > cxl)
@@ -3374,13 +3307,19 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
 
                     set_icell_bb(iGrid, ci, shx, shy, shz, nbl->work.get());
 
-                    icell_set_x(cell0_i + ci, shx, shy, shz, nbat->xstride, nbat->x().data(),
-                                kernelType, nbl->work.get());
+                    icell_set_x(cell0_i + ci,
+                                shx,
+                                shy,
+                                shz,
+                                nbat->xstride,
+                                nbat->x().data(),
+                                kernelType,
+                                nbl->work.get());
 
                     for (int cx = cxf; cx <= cxl; cx++)
                     {
                         const real cx_real = cx;
-                        d2zx               = d2z;
+                        real       d2zx    = d2z;
                         if (jGridDims.lowerCorner[XX] + cx_real * jGridDims.cellSize[XX] > bx1)
                         {
                             d2zx += gmx::square(jGridDims.lowerCorner[XX]
@@ -3392,18 +3331,13 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                                                 + (cx_real + 1) * jGridDims.cellSize[XX] - bx0);
                         }
 
-                        if (isIntraGridList && cx == 0 && (!c_pbcShiftBackward || shift == CENTRAL)
-                            && cyf < ci_y)
-                        {
-                            /* Leave the pairs with i > j.
-                             * Skip half of y when i and j have the same x.
-                             */
-                            cyf_x = ci_y;
-                        }
-                        else
-                        {
-                            cyf_x = cyf;
-                        }
+                        /* When true, leave the pairs with i > j.
+                         * Skip half of y when i and j have the same x.
+                         */
+                        const bool skipHalfY = (isIntraGridList && cx == 0
+                                                && (!c_pbcShiftBackward || shift == gmx::c_centralShiftIndex)
+                                                && cyf < ci_y);
+                        const int  cyf_x     = skipHalfY ? ci_y : cyf;
 
                         for (int cy = cyf_x; cy <= cyl; cy++)
                         {
@@ -3413,7 +3347,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                                     jGrid.firstCellInColumn(cx * jGridDims.numCells[YY] + cy + 1);
 
                             const real cy_real = cy;
-                            d2zxy              = d2zx;
+                            real       d2zxy   = d2zx;
                             if (jGridDims.lowerCorner[YY] + cy_real * jGridDims.cellSize[YY] > by1)
                             {
                                 d2zxy += gmx::square(jGridDims.lowerCorner[YY]
@@ -3441,14 +3375,14 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                                  */
                                 int midCell =
                                         columnStart
-                                        + static_cast<int>(bz1_frac
-                                                           * static_cast<real>(columnEnd - columnStart));
+                                        + static_cast<int>(
+                                                bz1_frac * static_cast<real>(columnEnd - columnStart));
                                 if (midCell >= columnEnd)
                                 {
                                     midCell = columnEnd - 1;
                                 }
 
-                                d2xy = d2zxy - d2z;
+                                const real d2xy = d2zxy - d2z;
 
                                 /* Find the lowest cell that can possibly
                                  * be within range.
@@ -3509,7 +3443,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                                     /* We want each atom/cell pair only once,
                                      * only use cj >= ci.
                                      */
-                                    if (!c_pbcShiftBackward || shift == CENTRAL)
+                                    if (!c_pbcShiftBackward || shift == gmx::c_centralShiftIndex)
                                     {
                                         firstCell = std::max(firstCell, ci);
                                     }
@@ -3522,11 +3456,20 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                                                "column");
 
                                     /* For f buffer flags with simple lists */
-                                    ncj_old_j = getNumSimpleJClustersInList(*nbl);
-
-                                    makeClusterListWrapper(nbl, iGrid, ci, jGrid, firstCell, lastCell,
-                                                           excludeSubDiagonal, nbat, rlist2, rbb2,
-                                                           kernelType, &numDistanceChecks);
+                                    const int ncj_old_j = getNumSimpleJClustersInList(*nbl);
+
+                                    makeClusterListWrapper(nbl,
+                                                           iGrid,
+                                                           ci,
+                                                           jGrid,
+                                                           firstCell,
+                                                           lastCell,
+                                                           excludeSubDiagonal,
+                                                           nbat,
+                                                           rlist2,
+                                                           rbb2,
+                                                           kernelType,
+                                                           &numDistanceChecks);
 
                                     if (bFBufferFlag)
                                     {
@@ -3542,14 +3485,24 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
                     if (!exclusions.empty())
                     {
                         /* Set the exclusions for this ci list */
-                        setExclusionsForIEntry(gridSet, nbl, excludeSubDiagonal, na_cj_2log,
-                                               *getOpenIEntry(nbl), exclusions);
+                        setExclusionsForIEntry(
+                                gridSet, nbl, excludeSubDiagonal, na_cj_2log, *getOpenIEntry(nbl), exclusions);
                     }
 
                     if (haveFep)
                     {
-                        make_fep_list(gridSet.atomIndices(), nbat, nbl, excludeSubDiagonal,
-                                      getOpenIEntry(nbl), shx, shy, shz, rl_fep2, iGrid, jGrid, nbl_fep);
+                        make_fep_list(gridSet.atomIndices(),
+                                      nbat,
+                                      nbl,
+                                      excludeSubDiagonal,
+                                      getOpenIEntry(nbl),
+                                      shx,
+                                      shy,
+                                      shz,
+                                      rl_fep2,
+                                      iGrid,
+                                      jGrid,
+                                      nbl_fep);
                     }
 
                     /* Close this ci list */
@@ -3560,7 +3513,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
 
         if (bFBufferFlag && getNumSimpleJClustersInList(*nbl) > ncj_old_i)
         {
-            bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th);
+            bitmask_init_bit(&(work->buffer_flags[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th);
         }
     }
 
@@ -3583,43 +3536,43 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet&   gridSet,
 
 static void reduce_buffer_flags(gmx::ArrayRef<PairsearchWork> searchWork,
                                 int                           nsrc,
-                                const nbnxn_buffer_flags_t*   dest)
+                                gmx::ArrayRef<gmx_bitmask_t>  dest)
 {
     for (int s = 0; s < nsrc; s++)
     {
-        gmx_bitmask_t* flag = searchWork[s].buffer_flags.flag;
+        gmx::ArrayRef<gmx_bitmask_t> flags(searchWork[s].buffer_flags);
 
-        for (int b = 0; b < dest->nflag; b++)
+        for (size_t b = 0; b < dest.size(); b++)
         {
-            bitmask_union(&(dest->flag[b]), flag[b]);
+            gmx_bitmask_t& flag = dest[b];
+            bitmask_union(&flag, flags[b]);
         }
     }
 }
 
-static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout)
+static void print_reduction_cost(gmx::ArrayRef<const gmx_bitmask_t> flags, int nout)
 {
-    int           nelem, nkeep, ncopy, nred, out;
-    gmx_bitmask_t mask_0;
+    int nelem = 0;
+    int nkeep = 0;
+    int ncopy = 0;
+    int nred  = 0;
 
-    nelem = 0;
-    nkeep = 0;
-    ncopy = 0;
-    nred  = 0;
+    gmx_bitmask_t mask_0; // NOLINT(cppcoreguidelines-init-variables)
     bitmask_init_bit(&mask_0, 0);
-    for (int b = 0; b < flags->nflag; b++)
+    for (const gmx_bitmask_t& flag_mask : flags)
     {
-        if (bitmask_is_equal(flags->flag[b], mask_0))
+        if (bitmask_is_equal(flag_mask, mask_0))
         {
             /* Only flag 0 is set, no copy of reduction required */
             nelem++;
             nkeep++;
         }
-        else if (!bitmask_is_zero(flags->flag[b]))
+        else if (!bitmask_is_zero(flag_mask))
         {
             int c = 0;
-            for (out = 0; out < nout; out++)
+            for (int out = 0; out < nout; out++)
             {
-                if (bitmask_is_set(flags->flag[b], out))
+                if (bitmask_is_set(flag_mask, out))
                 {
                     c++;
                 }
@@ -3635,12 +3588,15 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout)
             }
         }
     }
-
+    const auto numFlags = static_cast<double>(flags.size());
     fprintf(debug,
-            "nbnxn reduction: #flag %d #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n",
-            flags->nflag, nout, nelem / static_cast<double>(flags->nflag),
-            nkeep / static_cast<double>(flags->nflag), ncopy / static_cast<double>(flags->nflag),
-            nred / static_cast<double>(flags->nflag));
+            "nbnxn reduction: #flag %zu #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n",
+            flags.size(),
+            nout,
+            nelem / numFlags,
+            nkeep / numFlags,
+            ncopy / numFlags,
+            nred / numFlags);
 }
 
 /* Copies the list entries from src to dest when cjStart <= *cjGlobal < cjEnd.
@@ -3648,13 +3604,13 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout)
  * When setFlags==true, flag bit t is set in flag for all i and j clusters.
  */
 template<bool setFlags>
-static void copySelectedListRange(const nbnxn_ci_t* gmx_restrict srcCi,
+static void copySelectedListRange(const nbnxn_ci_t* gmx_restrict       srcCi,
                                   const NbnxnPairlistCpu* gmx_restrict src,
-                                  NbnxnPairlistCpu* gmx_restrict dest,
-                                  gmx_bitmask_t*                 flag,
-                                  int                            iFlagShift,
-                                  int                            jFlagShift,
-                                  int                            t)
+                                  NbnxnPairlistCpu* gmx_restrict       dest,
+                                  gmx_bitmask_t*                       flag,
+                                  int                                  iFlagShift,
+                                  int                                  jFlagShift,
+                                  int                                  t)
 {
     const int ncj = srcCi->cj_ind_end - srcCi->cj_ind_start;
 
@@ -3683,7 +3639,7 @@ static void copySelectedListRange(const nbnxn_ci_t* gmx_restrict srcCi,
     }
 }
 
-#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) && __GNUC__ == 7
+#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ == 7
 /* Avoid gcc 7 avx512 loop vectorization bug (actually only needed with -mavx512f) */
 #    pragma GCC push_options
 #    pragma GCC optimize("no-tree-vectorize")
@@ -3704,7 +3660,7 @@ static int countClusterpairs(gmx::ArrayRef<const NbnxnPairlistCpu> pairlists)
     return ncjTotal;
 }
 
-#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) && __GNUC__ == 7
+#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ == 7
 #    pragma GCC pop_options
 #endif
 
@@ -3740,7 +3696,7 @@ static void rebalanceSimpleLists(gmx::ArrayRef<const NbnxnPairlistCpu> srcSet,
         /* Note that the flags in the work struct (still) contain flags
          * for all entries that are present in srcSet->nbl[t].
          */
-        gmx_bitmask_t* flag = searchWork[t].buffer_flags.flag;
+        gmx_bitmask_t* flag = &searchWork[t].buffer_flags[0];
 
         int iFlagShift = getBufferFlagShift(dest.na_ci);
         int jFlagShift = getBufferFlagShift(dest.na_cj);
@@ -3767,8 +3723,8 @@ static void rebalanceSimpleLists(gmx::ArrayRef<const NbnxnPairlistCpu> srcSet,
                         }
                         else
                         {
-                            copySelectedListRange<false>(srcCi, src, &dest, flag, iFlagShift,
-                                                         jFlagShift, t);
+                            copySelectedListRange<false>(
+                                    srcCi, src, &dest, flag, iFlagShift, jFlagShift, t);
                         }
                     }
                     cjGlobal += ncj;
@@ -3895,7 +3851,7 @@ static Range<int> getIZoneRange(const Nbnxm::GridSet::DomainSetup& domainSetup,
 }
 
 /* Returns the j-zone range for pairlist construction for the give locality and i-zone */
-static Range<int> getJZoneRange(const gmx_domdec_zones_t& ddZones,
+static Range<int> getJZoneRange(const gmx_domdec_zones_t* ddZones,
                                 const InteractionLocality locality,
                                 const int                 iZone)
 {
@@ -3907,19 +3863,20 @@ static Range<int> getJZoneRange(const gmx_domdec_zones_t& ddZones,
     else if (iZone == 0)
     {
         /* Non-local: we need to avoid the local (zone 0 vs 0) interactions */
-        return { 1, *ddZones.iZones[iZone].jZoneRange.end() };
+        return { 1, *ddZones->iZones[iZone].jZoneRange.end() };
     }
     else
     {
         /* Non-local with non-local i-zone: use all j-zones */
-        return ddZones.iZones[iZone].jZoneRange;
+        return ddZones->iZones[iZone].jZoneRange;
     }
 }
 
 //! Prepares CPU lists produced by the search for dynamic pruning
 static void prepareListsForDynamicPruning(gmx::ArrayRef<NbnxnPairlistCpu> lists);
 
-void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
+void PairlistSet::constructPairlists(gmx::InteractionLocality      locality,
+                                     const Nbnxm::GridSet&         gridSet,
                                      gmx::ArrayRef<PairsearchWork> searchWork,
                                      nbnxn_atomdata_t*             nbat,
                                      const ListOfLists<int>&       exclusions,
@@ -3929,12 +3886,6 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
 {
     const real rlist = params_.rlistOuter;
 
-    int      nsubpair_target;
-    float    nsubpair_tot_est;
-    int      ci_block;
-    gmx_bool progBal;
-    int      np_tot, np_noq, np_hlj, nap;
-
     const int numLists = (isCpuType_ ? cpuLists_.size() : gpuLists_.size());
 
     if (debug)
@@ -3944,20 +3895,17 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
 
     nbat->bUseBufferFlags = (nbat->out.size() > 1);
     /* We should re-init the flags before making the first list */
-    if (nbat->bUseBufferFlags && locality_ == InteractionLocality::Local)
+    if (nbat->bUseBufferFlags && locality == InteractionLocality::Local)
     {
-        init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
+        resizeAndZeroBufferFlags(&nbat->buffer_flags, nbat->numAtoms());
     }
 
+    int   nsubpair_target  = 0;
+    float nsubpair_tot_est = 0.0F;
     if (!isCpuType_ && minimumIlistCountForGpuBalancing > 0)
     {
-        get_nsubpair_target(gridSet, locality_, rlist, minimumIlistCountForGpuBalancing,
-                            &nsubpair_target, &nsubpair_tot_est);
-    }
-    else
-    {
-        nsubpair_target  = 0;
-        nsubpair_tot_est = 0;
+        get_nsubpair_target(
+                gridSet, locality, rlist, minimumIlistCountForGpuBalancing, &nsubpair_target, &nsubpair_tot_est);
     }
 
     /* Clear all pair-lists */
@@ -3978,15 +3926,17 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
         }
     }
 
-    const gmx_domdec_zones_t& ddZones = *gridSet.domainSetup().zones;
+    const gmx_domdec_zones_t* ddZones = gridSet.domainSetup().zones;
+    GMX_ASSERT(locality == InteractionLocality::Local || ddZones != nullptr,
+               "Nonlocal interaction locality with null ddZones.");
 
-    const auto iZoneRange = getIZoneRange(gridSet.domainSetup(), locality_);
+    const auto iZoneRange = getIZoneRange(gridSet.domainSetup(), locality);
 
     for (const int iZone : iZoneRange)
     {
         const Grid& iGrid = gridSet.grids()[iZone];
 
-        const auto jZoneRange = getJZoneRange(ddZones, locality_, iZone);
+        const auto jZoneRange = getJZoneRange(ddZones, locality, iZone);
 
         for (int jZone : jZoneRange)
         {
@@ -3999,12 +3949,13 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
 
             searchCycleCounting->start(enbsCCsearch);
 
-            ci_block = get_ci_block_size(iGrid, gridSet.domainSetup().haveMultipleDomains, numLists);
+            const int ci_block =
+                    get_ci_block_size(iGrid, gridSet.domainSetup().haveMultipleDomains, numLists);
 
             /* With GPU: generate progressively smaller lists for
              * load balancing for local only or non-local with 2 zones.
              */
-            progBal = (locality_ == InteractionLocality::Local || ddZones.n <= 2);
+            const bool progBal = (locality == InteractionLocality::Local || ddZones->n <= 2);
 
 #pragma omp parallel for num_threads(numLists) schedule(static)
             for (int th = 0; th < numLists; th++)
@@ -4016,7 +3967,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
                      */
                     if (nbat->bUseBufferFlags && (iZone == 0 && jZone == 0))
                     {
-                        init_buffer_flags(&searchWork[th].buffer_flags, nbat->numAtoms());
+                        resizeAndZeroBufferFlags(&searchWork[th].buffer_flags, nbat->numAtoms());
                     }
 
                     if (combineLists_ && th > 0)
@@ -4035,17 +3986,43 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
                     /* Divide the i cells equally over the pairlists */
                     if (isCpuType_)
                     {
-                        nbnxn_make_pairlist_part(gridSet, iGrid, jGrid, &work, nbat, exclusions, rlist,
-                                                 params_.pairlistType, ci_block, nbat->bUseBufferFlags,
-                                                 nsubpair_target, progBal, nsubpair_tot_est, th,
-                                                 numLists, &cpuLists_[th], fepListPtr);
+                        nbnxn_make_pairlist_part(gridSet,
+                                                 iGrid,
+                                                 jGrid,
+                                                 &work,
+                                                 nbat,
+                                                 exclusions,
+                                                 rlist,
+                                                 params_.pairlistType,
+                                                 ci_block,
+                                                 nbat->bUseBufferFlags,
+                                                 nsubpair_target,
+                                                 progBal,
+                                                 nsubpair_tot_est,
+                                                 th,
+                                                 numLists,
+                                                 &cpuLists_[th],
+                                                 fepListPtr);
                     }
                     else
                     {
-                        nbnxn_make_pairlist_part(gridSet, iGrid, jGrid, &work, nbat, exclusions, rlist,
-                                                 params_.pairlistType, ci_block, nbat->bUseBufferFlags,
-                                                 nsubpair_target, progBal, nsubpair_tot_est, th,
-                                                 numLists, &gpuLists_[th], fepListPtr);
+                        nbnxn_make_pairlist_part(gridSet,
+                                                 iGrid,
+                                                 jGrid,
+                                                 &work,
+                                                 nbat,
+                                                 exclusions,
+                                                 rlist,
+                                                 params_.pairlistType,
+                                                 ci_block,
+                                                 nbat->bUseBufferFlags,
+                                                 nsubpair_target,
+                                                 progBal,
+                                                 nsubpair_tot_est,
+                                                 th,
+                                                 numLists,
+                                                 &gpuLists_[th],
+                                                 fepListPtr);
                     }
 
                     work.cycleCounter.stop();
@@ -4054,9 +4031,9 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
             }
             searchCycleCounting->stop(enbsCCsearch);
 
-            np_tot = 0;
-            np_noq = 0;
-            np_hlj = 0;
+            int np_tot = 0;
+            int np_noq = 0;
+            int np_hlj = 0;
             for (int th = 0; th < numLists; th++)
             {
                 inc_nrnb(nrnb, eNR_NBNXN_DIST2, searchWork[th].ndistc);
@@ -4075,14 +4052,9 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
                     np_tot += nbl.nci_tot;
                 }
             }
-            if (isCpuType_)
-            {
-                nap = cpuLists_[0].na_ci * cpuLists_[0].na_cj;
-            }
-            else
-            {
-                nap = gmx::square(gpuLists_[0].na_ci);
-            }
+            const int nap = isCpuType_ ? cpuLists_[0].na_ci * cpuLists_[0].na_cj
+                                       : gmx::square(gpuLists_[0].na_ci);
+
             natpair_ljq_ = (np_tot - np_noq) * nap - np_hlj * nap / 2;
             natpair_lj_  = np_noq * nap;
             natpair_q_   = np_hlj * nap / 2;
@@ -4133,7 +4105,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
 
     if (nbat->bUseBufferFlags)
     {
-        reduce_buffer_flags(searchWork, numLists, &nbat->buffer_flags);
+        reduce_buffer_flags(searchWork, numLists, nbat->buffer_flags);
     }
 
     if (gridSet.haveFep())
@@ -4187,7 +4159,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet&         gridSet,
 
         if (nbat->bUseBufferFlags)
         {
-            print_reduction_cost(&nbat->buffer_flags, numLists);
+            print_reduction_cost(nbat->buffer_flags, numLists);
         }
     }
 
@@ -4216,8 +4188,13 @@ void PairlistSets::construct(const InteractionLocality iLocality,
             "exclusions should either be empty or the number of lists should match the number of "
             "local i-atoms");
 
-    pairlistSet(iLocality).constructPairlists(gridSet, pairSearch->work(), nbat, exclusions,
-                                              minimumIlistCountForGpuBalancing_, nrnb,
+    pairlistSet(iLocality).constructPairlists(iLocality,
+                                              gridSet,
+                                              pairSearch->work(),
+                                              nbat,
+                                              exclusions,
+                                              minimumIlistCountForGpuBalancing_,
+                                              nrnb,
                                               &pairSearch->cycleCounting_);
 
     if (iLocality == InteractionLocality::Local)
@@ -4246,7 +4223,7 @@ void PairlistSets::construct(const InteractionLocality iLocality,
 void nonbonded_verlet_t::constructPairlist(const InteractionLocality iLocality,
                                            const ListOfLists<int>&   exclusions,
                                            int64_t                   step,
-                                           t_nrnb*                   nrnb)
+                                           t_nrnb*                   nrnb) const
 {
     pairlistSets_->construct(iLocality, pairSearch_.get(), nbat.get(), exclusions, step, nrnb);