Move dynamic pull groups and pull nthreads
authorBerk Hess <hess@kth.se>
Tue, 30 Mar 2021 20:21:13 +0000 (20:21 +0000)
committerPaul Bauer <paul.bauer.q@gmail.com>
Tue, 30 Mar 2021 20:21:13 +0000 (20:21 +0000)
Moved dynamic pull groups into their respective coordinates.
Set the number of threads per pull group instead of globally.
Now all thread parallel regions run on 1 thread with less than 100
local atoms.

src/gromacs/pulling/output.cpp
src/gromacs/pulling/pull.cpp
src/gromacs/pulling/pull_internal.h
src/gromacs/pulling/pullutil.cpp

index 691a2488722b129d0ddf500caef90c89dc0f1d4d..220b2d606340d7a2fb926809cf929a6358ffc9ed 100644 (file)
@@ -95,7 +95,7 @@ static void addToPullxHistory(pull_t* pull)
         {
             for (int m = 0; m < DIM; m++)
             {
-                pcrdHistory.dynaX[m] += pull->dyna[c].x[m];
+                pcrdHistory.dynaX[m] += pcrd.dynamicGroup0->x[m];
             }
         }
     }
@@ -236,7 +236,7 @@ static void pull_print_x(FILE* out, pull_t* pull, double t)
                         }
                         else
                         {
-                            fprintf(out, "\t%g", pull->dyna[c].x[m]);
+                            fprintf(out, "\t%g", pcrd.dynamicGroup0->x[m]);
                         }
                     }
                 }
index 388c563f0c99ab211f07c86ff3c0ea9e6c41e445..6d5ea22148c886d7c377ba22a4bfa942ffa953a3 100644 (file)
@@ -89,6 +89,8 @@ namespace gmx
 extern template LocalAtomSet LocalAtomSetManager::add<void, void>(ArrayRef<const int> globalAtomIndex);
 } // namespace gmx
 
+using gmx::ArrayRef;
+
 static int groupPbcFromParams(const t_pull_group& params, bool setPbcRefToPrevStepCOM)
 {
     if (params.ind.size() <= 1)
@@ -120,9 +122,11 @@ static int groupPbcFromParams(const t_pull_group& params, bool setPbcRefToPrevSt
  */
 pull_group_work_t::pull_group_work_t(const t_pull_group& params,
                                      gmx::LocalAtomSet   atomSet,
-                                     bool                bSetPbcRefToPrevStepCOM) :
+                                     bool                bSetPbcRefToPrevStepCOM,
+                                     int                 maxNumThreads) :
     params(params),
     epgrppbc(groupPbcFromParams(params, bSetPbcRefToPrevStepCOM)),
+    maxNumThreads(maxNumThreads),
     needToCalcCom(false),
     atomSet(atomSet),
     mwscale(0),
@@ -176,24 +180,24 @@ double pull_conversion_factor_internal2userinput(const t_pull_coord* pcrd)
 }
 
 /* Apply forces in a mass weighted fashion for part of the pull group */
-static void apply_forces_grp_part(const pull_group_work_t*  pgrp,
+static void apply_forces_grp_part(const pull_group_work_t&  pgrp,
                                   int                       ind_start,
                                   int                       ind_end,
                                   gmx::ArrayRef<const real> masses,
                                   const dvec                f_pull,
-                                  int                       sign,
+                                  const int                 sign,
                                   rvec*                     f)
 {
-    double inv_wm = pgrp->mwscale;
+    const double inv_wm = pgrp.mwscale;
 
-    auto localAtomIndices = pgrp->atomSet.localIndex();
+    auto localAtomIndices = pgrp.atomSet.localIndex();
     for (int i = ind_start; i < ind_end; i++)
     {
         int    ii    = localAtomIndices[i];
         double wmass = masses[ii];
-        if (!pgrp->localWeights.empty())
+        if (!pgrp.localWeights.empty())
         {
-            wmass *= pgrp->localWeights[i];
+            wmass *= pgrp.localWeights[i];
         }
 
         for (int d = 0; d < DIM; d++)
@@ -204,16 +208,15 @@ static void apply_forces_grp_part(const pull_group_work_t*  pgrp,
 }
 
 /* Apply forces in a mass weighted fashion */
-static void apply_forces_grp(const pull_group_work_t*  pgrp,
+static void apply_forces_grp(const pull_group_work_t&  pgrp,
                              gmx::ArrayRef<const real> masses,
                              const dvec                f_pull,
-                             int                       sign,
-                             rvec*                     f,
-                             int                       nthreads)
+                             const int                 sign,
+                             rvec*                     f)
 {
-    auto localAtomIndices = pgrp->atomSet.localIndex();
+    auto localAtomIndices = pgrp.atomSet.localIndex();
 
-    if (pgrp->params.ind.size() == 1 && pgrp->atomSet.numAtomsLocal() == 1)
+    if (pgrp.params.ind.size() == 1 && pgrp.atomSet.numAtomsLocal() == 1)
     {
         /* Only one atom and our rank has this atom: we can skip
          * the mass weighting, which means that this code also works
@@ -226,17 +229,18 @@ static void apply_forces_grp(const pull_group_work_t*  pgrp,
     }
     else
     {
-        if (localAtomIndices.size() <= c_pullMaxNumLocalAtomsSingleThreaded)
+        const int numThreads = pgrp.numThreads();
+        if (numThreads == 1)
         {
             apply_forces_grp_part(pgrp, 0, localAtomIndices.size(), masses, f_pull, sign, f);
         }
         else
         {
-#pragma omp parallel for num_threads(nthreads) schedule(static)
-            for (int th = 0; th < nthreads; th++)
+#pragma omp parallel for num_threads(numThreads) schedule(static)
+            for (int th = 0; th < numThreads; th++)
             {
-                int ind_start = (localAtomIndices.size() * (th + 0)) / nthreads;
-                int ind_end   = (localAtomIndices.size() * (th + 1)) / nthreads;
+                int ind_start = (localAtomIndices.size() * (th + 0)) / numThreads;
+                int ind_end   = (localAtomIndices.size() * (th + 1)) / numThreads;
                 apply_forces_grp_part(pgrp, ind_start, ind_end, masses, f_pull, sign, f);
             }
         }
@@ -244,27 +248,27 @@ static void apply_forces_grp(const pull_group_work_t*  pgrp,
 }
 
 /* Apply forces in a mass weighted fashion to a cylinder group */
-static void apply_forces_cyl_grp(const pull_group_work_t*  pgrp,
+static void apply_forces_cyl_grp(const pull_group_work_t&  pgrp,
                                  const double              dv_corr,
                                  gmx::ArrayRef<const real> masses,
                                  const dvec                f_pull,
                                  double                    f_scal,
-                                 int                       sign,
-                                 rvec*                     f,
-                                 int gmx_unused nthreads)
+                                 const int                 sign,
+                                 rvec*                     f)
 {
-    double inv_wm = pgrp->mwscale;
+    const double inv_wm = pgrp.mwscale;
 
-    auto localAtomIndices = pgrp->atomSet.localIndex();
+    auto localAtomIndices = pgrp.atomSet.localIndex();
 
     /* The cylinder group is always a slab in the system, thus large.
      * Therefore we always thread-parallelize this group.
      */
-    int numAtomsLocal = localAtomIndices.size();
-#pragma omp parallel for num_threads(nthreads) schedule(static)
+    const int numAtomsLocal         = localAtomIndices.size();
+    const int gmx_unused numThreads = pgrp.numThreads();
+#pragma omp parallel for num_threads(numThreads) schedule(static)
     for (int i = 0; i < numAtomsLocal; i++)
     {
-        double weight = pgrp->localWeights[i];
+        double weight = pgrp.localWeights[i];
         if (weight == 0)
         {
             continue;
@@ -275,7 +279,7 @@ static void apply_forces_cyl_grp(const pull_group_work_t*  pgrp,
          * to be corrected for an offset (dv_corr), which was unknown when
          * we calculated dv.
          */
-        double dv_com = pgrp->dv[i] + dv_corr;
+        double dv_com = pgrp.dv[i] + dv_corr;
 
         /* Here we not only add the pull force working along vec (f_pull),
          * but also a radial component, due to the dependence of the weights
@@ -283,7 +287,7 @@ static void apply_forces_cyl_grp(const pull_group_work_t*  pgrp,
          */
         for (int m = 0; m < DIM; m++)
         {
-            f[ii][m] += sign * inv_wm * (mass * weight * f_pull[m] + pgrp->mdw[i][m] * dv_com * f_scal);
+            f[ii][m] += sign * inv_wm * (mass * weight * f_pull[m] + pgrp.mdw[i][m] * dv_com * f_scal);
         }
     }
 }
@@ -291,12 +295,12 @@ static void apply_forces_cyl_grp(const pull_group_work_t*  pgrp,
 /* Apply torque forces in a mass weighted fashion to the groups that define
  * the pull vector direction for pull coordinate pcrd.
  */
-static void apply_forces_vec_torque(const struct pull_t*      pull,
-                                    const pull_coord_work_t*  pcrd,
-                                    gmx::ArrayRef<const real> masses,
-                                    rvec*                     f)
+static void apply_forces_vec_torque(const pull_coord_work_t&          pcrd,
+                                    ArrayRef<const pull_group_work_t> pullGroups,
+                                    gmx::ArrayRef<const real>         masses,
+                                    rvec*                             f)
 {
-    const PullCoordSpatialData& spatialData = pcrd->spatialData;
+    const PullCoordSpatialData& spatialData = pcrd.spatialData;
 
     /* The component inpr along the pull vector is accounted for in the usual
      * way. Here we account for the component perpendicular to vec.
@@ -315,20 +319,20 @@ static void apply_forces_vec_torque(const struct pull_t*      pull,
     dvec f_perp;
     for (int m = 0; m < DIM; m++)
     {
-        f_perp[m] = (spatialData.dr01[m] - inpr * spatialData.vec[m]) / spatialData.vec_len * pcrd->scalarForce;
+        f_perp[m] = (spatialData.dr01[m] - inpr * spatialData.vec[m]) / spatialData.vec_len * pcrd.scalarForce;
     }
 
     /* Apply the force to the groups defining the vector using opposite signs */
-    apply_forces_grp(&pull->group[pcrd->params.group[2]], masses, f_perp, -1, f, pull->nthreads);
-    apply_forces_grp(&pull->group[pcrd->params.group[3]], masses, f_perp, 1, f, pull->nthreads);
+    apply_forces_grp(pullGroups[pcrd.params.group[2]], masses, f_perp, -1, f);
+    apply_forces_grp(pullGroups[pcrd.params.group[3]], masses, f_perp, 1, f);
 }
 
 /* Apply forces in a mass weighted fashion */
-static void apply_forces_coord(pull_t*                      pull,
-                               const pull_coord_work_t&     pcrd,
-                               const PullCoordVectorForces& forces,
-                               gmx::ArrayRef<const real>    masses,
-                               rvec*                        f)
+static void apply_forces_coord(const pull_coord_work_t&          pcrd,
+                               ArrayRef<const pull_group_work_t> pullGroups,
+                               const PullCoordVectorForces&      forces,
+                               gmx::ArrayRef<const real>         masses,
+                               rvec*                             f)
 {
     /* Here it would be more efficient to use one large thread-parallel
      * region instead of potential parallel regions within apply_forces_grp.
@@ -338,14 +342,8 @@ static void apply_forces_coord(pull_t*                      pull,
 
     if (pcrd.params.eGeom == PullGroupGeometry::Cylinder)
     {
-        apply_forces_cyl_grp(&pull->dyna[pcrd.params.coordIndex],
-                             pcrd.spatialData.cyl_dev,
-                             masses,
-                             forces.force01,
-                             pcrd.scalarForce,
-                             -1,
-                             f,
-                             pull->nthreads);
+        apply_forces_cyl_grp(
+                *pcrd.dynamicGroup0, pcrd.spatialData.cyl_dev, masses, forces.force01, pcrd.scalarForce, -1, f);
 
         /* Sum the force along the vector and the radial force */
         dvec f_tot;
@@ -353,7 +351,7 @@ static void apply_forces_coord(pull_t*                      pull,
         {
             f_tot[m] = forces.force01[m] + pcrd.scalarForce * pcrd.spatialData.ffrad[m];
         }
-        apply_forces_grp(&pull->group[pcrd.params.group[1]], masses, f_tot, 1, f, pull->nthreads);
+        apply_forces_grp(pullGroups[pcrd.params.group[1]], masses, f_tot, 1, f);
     }
     else
     {
@@ -362,27 +360,24 @@ static void apply_forces_coord(pull_t*                      pull,
             /* We need to apply the torque forces to the pull groups
              * that define the pull vector.
              */
-            apply_forces_vec_torque(pull, &pcrd, masses, f);
+            apply_forces_vec_torque(pcrd, pullGroups, masses, f);
         }
 
-        if (!pull->group[pcrd.params.group[0]].params.ind.empty())
+        if (!pullGroups[pcrd.params.group[0]].params.ind.empty())
         {
-            apply_forces_grp(
-                    &pull->group[pcrd.params.group[0]], masses, forces.force01, -1, f, pull->nthreads);
+            apply_forces_grp(pullGroups[pcrd.params.group[0]], masses, forces.force01, -1, f);
         }
-        apply_forces_grp(&pull->group[pcrd.params.group[1]], masses, forces.force01, 1, f, pull->nthreads);
+        apply_forces_grp(pullGroups[pcrd.params.group[1]], masses, forces.force01, 1, f);
 
         if (pcrd.params.ngroup >= 4)
         {
-            apply_forces_grp(
-                    &pull->group[pcrd.params.group[2]], masses, forces.force23, -1, f, pull->nthreads);
-            apply_forces_grp(&pull->group[pcrd.params.group[3]], masses, forces.force23, 1, f, pull->nthreads);
+            apply_forces_grp(pullGroups[pcrd.params.group[2]], masses, forces.force23, -1, f);
+            apply_forces_grp(pullGroups[pcrd.params.group[3]], masses, forces.force23, 1, f);
         }
         if (pcrd.params.ngroup >= 6)
         {
-            apply_forces_grp(
-                    &pull->group[pcrd.params.group[4]], masses, forces.force45, -1, f, pull->nthreads);
-            apply_forces_grp(&pull->group[pcrd.params.group[5]], masses, forces.force45, 1, f, pull->nthreads);
+            apply_forces_grp(pullGroups[pcrd.params.group[4]], masses, forces.force45, -1, f);
+            apply_forces_grp(pullGroups[pcrd.params.group[5]], masses, forces.force45, 1, f);
         }
     }
 }
@@ -607,7 +602,7 @@ static void get_pull_coord_dr(const pull_t& pull, pull_coord_work_t* pcrd, const
             pcrd,
             pbc,
             pgrp1.x,
-            pcrd->params.eGeom == PullGroupGeometry::Cylinder ? pull.dyna[coord_ind].x : pgrp0.x,
+            pcrd->params.eGeom == PullGroupGeometry::Cylinder ? pcrd->dynamicGroup0->x : pgrp0.x,
             0,
             1,
             md2,
@@ -1569,7 +1564,7 @@ void apply_external_pull_coord_force(struct pull_t*            pull,
         }
 
         apply_forces_coord(
-                pull, *pcrd, pullCoordForces, masses, as_rvec_array(forceWithVirial->force_.data()));
+                *pcrd, pull->group, pullCoordForces, masses, as_rvec_array(forceWithVirial->force_.data()));
     }
 
     pull->numExternalPotentialsStillToBeAppliedThisStep--;
@@ -1646,7 +1641,7 @@ real pull_potential(struct pull_t*                 pull,
                     *pull, pcrd, pbc, t, lambda, &V, computeVirial ? virial : nullptr, &dVdl);
 
             /* Distribute the force over the atoms in the pulled groups */
-            apply_forces_coord(pull, *pcrd, pullCoordForces, masses, f);
+            apply_forces_coord(*pcrd, pull->group, pullCoordForces, masses, f);
         }
 
         if (MASTER(cr))
@@ -1993,11 +1988,15 @@ struct pull_t* init_pull(FILE*                     fplog,
     /* Copy the pull parameters */
     pull->params = *pull_params;
 
+    /* The gmx_omp_nthreads module might not be initialized here, so max(1,) */
+    const int maxNumThreads = std::max(1, gmx_omp_nthreads_get(emntDefault));
+
     for (int i = 0; i < pull_params->ngroup; ++i)
     {
         pull->group.emplace_back(pull_params->group[i],
                                  atomSets->add(pull_params->group[i].ind),
-                                 pull_params->bSetPbcRefToPrevStepCOM);
+                                 pull_params->bSetPbcRefToPrevStepCOM,
+                                 maxNumThreads);
     }
 
     if (cr != nullptr && DOMAINDECOMP(cr))
@@ -2323,7 +2322,7 @@ struct pull_t* init_pull(FILE*                     fplog,
     /* If we use cylinder coordinates, do some initialising for them */
     if (pull->bCylinder)
     {
-        for (const pull_coord_work_t& coord : pull->coord)
+        for (pull_coord_work_t& coord : pull->coord)
         {
             if (coord.params.eGeom == PullGroupGeometry::Cylinder)
             {
@@ -2334,15 +2333,13 @@ struct pull_t* init_pull(FILE*                     fplog,
                               "reference!\n");
                 }
             }
-            const auto& referenceGroup = pull->group[coord.params.group[0]];
-            pull->dyna.emplace_back(
-                    referenceGroup.params, referenceGroup.atomSet, pull->params.bSetPbcRefToPrevStepCOM);
+            const auto& group0  = pull->group[coord.params.group[0]];
+            coord.dynamicGroup0 = std::make_unique<pull_group_work_t>(
+                    group0.params, group0.atomSet, pull->params.bSetPbcRefToPrevStepCOM, maxNumThreads);
         }
     }
 
-    /* The gmx_omp_nthreads module might not be initialized here, so max(1,) */
-    pull->nthreads = std::max(1, gmx_omp_nthreads_get(emntDefault));
-    pull->comSums.resize(pull->nthreads);
+    pull->comSums.resize(maxNumThreads);
 
     comm = &pull->comm;
 
index 1973e5f116034ecdb0c190e1b487ace9b6e22610..749d70379c8df9265c726ed9140dd2e49ab3120d 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
  * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -91,14 +91,25 @@ struct pull_group_work_t
      * \param[in] params                  The group parameters set by the user
      * \param[in] atomSet                 The global to local atom set manager
      * \param[in] setPbcRefToPrevStepCOM Does this pull group use the COM from the previous step as reference position?
+     * \param[in] maxNumThreads           Use either this number of threads of 1 for operations on x and f
      */
-    pull_group_work_t(const t_pull_group& params, gmx::LocalAtomSet atomSet, bool setPbcRefToPrevStepCOM);
+    pull_group_work_t(const t_pull_group& params,
+                      gmx::LocalAtomSet   atomSet,
+                      bool                setPbcRefToPrevStepCOM,
+                      int                 maxNumThreads);
+
+    //! Returns the number of threads to use for local atom operations based on the local atom count
+    int numThreads() const
+    {
+        return atomSet.numAtomsLocal() <= c_pullMaxNumLocalAtomsSingleThreaded ? 1 : maxNumThreads;
+    }
 
     /* Data only modified at initialization */
     const t_pull_group params;   /**< The pull group parameters */
     const int          epgrppbc; /**< The type of pbc for this pull group, see enum above */
-    bool               needToCalcCom; /**< Do we need to calculate the COM? (Not for group 0 or if only used as cylinder group) */
-    std::vector<real>  globalWeights; /**< Weights per atom set by the user and/or mass/friction coefficients, if empty all weights are equal */
+    const int maxNumThreads; /**< The maximum number of threads to use for operations on x and f */
+    bool      needToCalcCom; /**< Do we need to calculate the COM? (Not for group 0 or if only used as cylinder group) */
+    std::vector<real> globalWeights; /**< Weights per atom set by the user and/or mass/friction coefficients, if empty all weights are equal */
 
     /* Data modified only at init or at domain decomposition */
     gmx::LocalAtomSet                  atomSet;      /**< Global to local atom set mapper */
@@ -149,6 +160,9 @@ struct pull_coord_work_t
 
     const t_pull_coord params; /* Pull coordinate parameters */
 
+    /* Dynamic pull group 0 for this coordinate with dynamic weights, only present when needed */
+    std::unique_ptr<pull_group_work_t> dynamicGroup0;
+
     double value_ref; /* The reference value, usually init+rate*t, units of nm or rad */
 
     PullCoordSpatialData spatialData; /* Data defining the current geometry */
@@ -236,7 +250,6 @@ struct pull_t
 
     /* Parameters + dynamic data for groups */
     std::vector<pull_group_work_t> group; /* The pull group param and work data */
-    std::vector<pull_group_work_t> dyna;  /* Dynamic groups for geom=cylinder */
 
     /* Parameters + dynamic data for coordinates */
     std::vector<pull_coord_work_t> coord; /* The pull group param and work data */
@@ -244,8 +257,7 @@ struct pull_t
     /* Global dynamic data */
     gmx_bool bSetPBCatoms; /* Do we need to set x_pbc for the groups? */
 
-    int                  nthreads; /* Number of threads used by the pull code */
-    std::vector<ComSums> comSums;  /* Work array for summing for COM, 1 entry per thread */
+    std::vector<ComSums> comSums; /* Work array for summing for COM, 1 entry per thread */
 
     pull_comm_t comm; /* Communication parameters, communicator and buffers */
 
index b9dbb2d68ea509637955801aa9147d6036774254..d332a93bc7bf16fc95f831f996966204bd3835d1 100644 (file)
@@ -186,13 +186,11 @@ static void make_cyl_refgrps(const t_commrec*               cr,
     double inv_cyl_r2 = 1.0 / gmx::square(pull->params.cylinder_r);
 
     /* loop over all groups to make a reference group for each*/
-    for (size_t c = 0; c < pull->coord.size(); c++)
+    int bufferOffset = 0;
+    for (pull_coord_work_t& pcrd : pull->coord)
     {
-        pull_coord_work_t* pcrd;
-        double             sum_a, wmass, wwmass;
-        dvec               radf_fac0, radf_fac1;
-
-        pcrd = &pull->coord[c];
+        double sum_a, wmass, wwmass;
+        dvec   radf_fac0, radf_fac1;
 
         sum_a  = 0;
         wmass  = 0;
@@ -200,14 +198,14 @@ static void make_cyl_refgrps(const t_commrec*               cr,
         clear_dvec(radf_fac0);
         clear_dvec(radf_fac1);
 
-        if (pcrd->params.eGeom == PullGroupGeometry::Cylinder)
+        if (pcrd.params.eGeom == PullGroupGeometry::Cylinder)
         {
             /* pref will be the same group for all pull coordinates */
-            const pull_group_work_t& pref  = pull->group[pcrd->params.group[0]];
-            const pull_group_work_t& pgrp  = pull->group[pcrd->params.group[1]];
-            pull_group_work_t&       pdyna = pull->dyna[c];
+            const pull_group_work_t& pref  = pull->group[pcrd.params.group[0]];
+            const pull_group_work_t& pgrp  = pull->group[pcrd.params.group[1]];
+            pull_group_work_t&       pdyna = *pcrd.dynamicGroup0;
             rvec                     direction;
-            copy_dvec_to_rvec(pcrd->spatialData.vec, direction);
+            copy_dvec_to_rvec(pcrd.spatialData.vec, direction);
 
             /* Since we have not calculated the COM of the cylinder group yet,
              * we calculate distances with respect to location of the pull
@@ -215,15 +213,15 @@ static void make_cyl_refgrps(const t_commrec*               cr,
              * here we already have the COM of the pull group. This resolves
              * any PBC issues and we don't need to use a PBC-atom here.
              */
-            if (pcrd->params.rate != 0)
+            if (pcrd.params.rate != 0)
             {
                 /* With rate=0, value_ref is set initially */
-                pcrd->value_ref = pcrd->params.init + pcrd->params.rate * t;
+                pcrd.value_ref = pcrd.params.init + pcrd.params.rate * t;
             }
             rvec reference;
             for (int m = 0; m < DIM; m++)
             {
-                reference[m] = pgrp.x[m] - pcrd->spatialData.vec[m] * pcrd->value_ref;
+                reference[m] = pgrp.x[m] - pcrd.spatialData.vec[m] * pcrd.value_ref;
             }
 
             auto localAtomIndices = pref.atomSet.localIndex();
@@ -290,8 +288,9 @@ static void make_cyl_refgrps(const t_commrec*               cr,
             }
         }
 
-        auto buffer = gmx::arrayRefFromArray(
-                comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
+        auto buffer = gmx::arrayRefFromArray(comm->cylinderBuffer.data() + bufferOffset,
+                                             c_cylinderBufferStride);
+        bufferOffset += c_cylinderBufferStride;
 
         buffer[0] = wmass;
         buffer[1] = wwmass;
@@ -312,28 +311,26 @@ static void make_cyl_refgrps(const t_commrec*               cr,
         pullAllReduce(cr, comm, pull->coord.size() * c_cylinderBufferStride, comm->cylinderBuffer.data());
     }
 
-    for (size_t c = 0; c < pull->coord.size(); c++)
+    bufferOffset = 0;
+    for (pull_coord_work_t& pcrd : pull->coord)
     {
-        pull_coord_work_t* pcrd;
-
-        pcrd = &pull->coord[c];
-
-        if (pcrd->params.eGeom == PullGroupGeometry::Cylinder)
+        if (pcrd.params.eGeom == PullGroupGeometry::Cylinder)
         {
-            pull_group_work_t*    pdyna       = &pull->dyna[c];
-            pull_group_work_t*    pgrp        = &pull->group[pcrd->params.group[1]];
-            PullCoordSpatialData& spatialData = pcrd->spatialData;
-
-            auto buffer = gmx::constArrayRefFromArray(
-                    comm->cylinderBuffer.data() + c * c_cylinderBufferStride, c_cylinderBufferStride);
-            double wmass   = buffer[0];
-            double wwmass  = buffer[1];
-            pdyna->mwscale = 1.0 / wmass;
+            pull_group_work_t&    dynamicGroup0 = *pcrd.dynamicGroup0;
+            pull_group_work_t&    group1        = pull->group[pcrd.params.group[1]];
+            PullCoordSpatialData& spatialData   = pcrd.spatialData;
+
+            auto buffer = gmx::constArrayRefFromArray(comm->cylinderBuffer.data() + bufferOffset,
+                                                      c_cylinderBufferStride);
+            bufferOffset += c_cylinderBufferStride;
+            double wmass          = buffer[0];
+            double wwmass         = buffer[1];
+            dynamicGroup0.mwscale = 1.0 / wmass;
             /* Cylinder pulling can't be used with constraints, but we set
              * wscale and invtm anyhow, in case someone would like to use them.
              */
-            pdyna->wscale = wmass / wwmass;
-            pdyna->invtm  = wwmass / (wmass * wmass);
+            dynamicGroup0.wscale = wmass / wwmass;
+            dynamicGroup0.invtm  = wwmass / (wmass * wmass);
 
             /* We store the deviation of the COM from the reference location
              * used above, since we need it when we apply the radial forces
@@ -342,9 +339,9 @@ static void make_cyl_refgrps(const t_commrec*               cr,
             spatialData.cyl_dev = 0;
             for (int m = 0; m < DIM; m++)
             {
-                double reference = pgrp->x[m] - spatialData.vec[m] * pcrd->value_ref;
-                double dist      = -spatialData.vec[m] * buffer[2] * pdyna->mwscale;
-                pdyna->x[m]      = reference - dist;
+                double reference   = group1.x[m] - spatialData.vec[m] * pcrd.value_ref;
+                double dist        = -spatialData.vec[m] * buffer[2] * dynamicGroup0.mwscale;
+                dynamicGroup0.x[m] = reference - dist;
                 spatialData.cyl_dev += dist;
             }
             /* Now we know the exact COM of the cylinder reference group,
@@ -360,12 +357,12 @@ static void make_cyl_refgrps(const t_commrec*               cr,
             if (debug)
             {
                 fprintf(debug,
-                        "Pull cylinder group %zu:%8.3f%8.3f%8.3f m:%8.3f\n",
-                        c,
-                        pdyna->x[0],
-                        pdyna->x[1],
-                        pdyna->x[2],
-                        1.0 / pdyna->invtm);
+                        "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n",
+                        pcrd.params.coordIndex,
+                        dynamicGroup0.x[0],
+                        dynamicGroup0.x[1],
+                        dynamicGroup0.x[2],
+                        1.0 / dynamicGroup0.invtm);
                 fprintf(debug,
                         "ffrad %8.3f %8.3f %8.3f\n",
                         spatialData.ffrad[XX],
@@ -638,17 +635,18 @@ void pull_calc_coms(const t_commrec*               cr,
                 }
                 else
                 {
-#pragma omp parallel for num_threads(pull->nthreads) schedule(static)
-                    for (int t = 0; t < pull->nthreads; t++)
+                    const int numThreads = pgrp->numThreads();
+#pragma omp parallel for num_threads(numThreads) schedule(static)
+                    for (int t = 0; t < numThreads; t++)
                     {
-                        int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
-                        int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
+                        int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / numThreads;
+                        int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / numThreads;
                         sum_com_part(
                                 pgrp, ind_start, ind_end, x, xp, masses, pbc, x_pbc, &pull->comSums[t]);
                     }
 
                     /* Reduce the thread contributions to sum_com[0] */
-                    for (int t = 1; t < pull->nthreads; t++)
+                    for (int t = 1; t < numThreads; t++)
                     {
                         comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
                         comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;
@@ -677,18 +675,19 @@ void pull_calc_coms(const t_commrec*               cr,
                  * This uses a slab of the system, thus we always have many
                  * atoms in the pull groups. Therefore, always use threads.
                  */
-#pragma omp parallel for num_threads(pull->nthreads) schedule(static)
-                for (int t = 0; t < pull->nthreads; t++)
+                const int numThreads = pgrp->numThreads();
+#pragma omp parallel for num_threads(numThreads) schedule(static)
+                for (int t = 0; t < numThreads; t++)
                 {
-                    int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
-                    int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
+                    int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / numThreads;
+                    int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / numThreads;
                     sum_com_part_cosweight(
                             pgrp, ind_start, ind_end, pull->cosdim, twopi_box, x, xp, masses, &pull->comSums[t]);
                 }
 
                 /* Reduce the thread contributions to comSums[0] */
                 ComSums& comSumsTotal = pull->comSums[0];
-                for (int t = 1; t < pull->nthreads; t++)
+                for (int t = 1; t < numThreads; t++)
                 {
                     comSumsTotal.sum_cm += pull->comSums[t].sum_cm;
                     comSumsTotal.sum_sm += pull->comSums[t].sum_sm;
@@ -919,9 +918,9 @@ int pullCheckPbcWithinGroups(const pull_t& pull, gmx::ArrayRef<const gmx::RVec>
 
     /* Determine what dimensions are used for each group by pull coordinates */
     std::vector<BoolVec> dimUsed(pull.group.size(), { false, false, false });
-    for (size_t c = 0; c < pull.coord.size(); c++)
+    for (const pull_coord_work_t& pcrd : pull.coord)
     {
-        const t_pull_coord& coordParams = pull.coord[c].params;
+        const t_pull_coord& coordParams = pcrd.params;
         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
         {
             for (int d = 0; d < DIM; d++)
@@ -970,9 +969,9 @@ bool pullCheckPbcWithinGroup(const pull_t&                  pull,
 
     /* Determine what dimensions are used for each group by pull coordinates */
     BoolVec dimUsed = { false, false, false };
-    for (size_t c = 0; c < pull.coord.size(); c++)
+    for (const pull_coord_work_t& pcrd : pull.coord)
     {
-        const t_pull_coord& coordParams = pull.coord[c].params;
+        const t_pull_coord& coordParams = pcrd.params;
         for (int groupIndex = 0; groupIndex < coordParams.ngroup; groupIndex++)
         {
             if (coordParams.group[groupIndex] == groupNr)
@@ -1090,16 +1089,17 @@ void initPullComFromPrevStep(const t_commrec*               cr,
             }
             else
             {
-#pragma omp parallel for num_threads(pull->nthreads) schedule(static)
-                for (int t = 0; t < pull->nthreads; t++)
+                const int numThreads = pgrp->numThreads();
+#pragma omp parallel for num_threads(numThreads) schedule(static)
+                for (int t = 0; t < numThreads; t++)
                 {
-                    int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / pull->nthreads;
-                    int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / pull->nthreads;
+                    int ind_start = (pgrp->atomSet.numAtomsLocal() * (t + 0)) / numThreads;
+                    int ind_end   = (pgrp->atomSet.numAtomsLocal() * (t + 1)) / numThreads;
                     sum_com_part(pgrp, ind_start, ind_end, x, {}, masses, pbc, x_pbc, &pull->comSums[t]);
                 }
 
                 /* Reduce the thread contributions to sum_com[0] */
-                for (int t = 1; t < pull->nthreads; t++)
+                for (int t = 1; t < numThreads; t++)
                 {
                     comSumsTotal.sum_wm += pull->comSums[t].sum_wm;
                     comSumsTotal.sum_wwm += pull->comSums[t].sum_wwm;