Apply clang-format to source tree

[alexxy/gromacs.git] / src / gromacs / ewald / pme_load_balancing.cpp
diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp

index 60611d8f6e1db45e363fdcdbc2cb4fcd8c932a66..98d0d9e2fcc1a616f36d63ef8e72f3a51ff7f95b 100644 (file)
--- a/src/gromacs/ewald/pme_load_balancing.cpp
+++ b/src/gromacs/ewald/pme_load_balancing.cpp
@@ -81,22 +81,23 @@
  #include "pme_internal.h"
  
  /*! \brief Parameters and settings for one PP-PME setup */
-struct pme_setup_t {
-    real              rcut_coulomb;    /**< Coulomb cut-off                              */
-    real              rlistOuter;      /**< cut-off for the outer pair-list              */
-    real              rlistInner;      /**< cut-off for the inner pair-list              */
-    real              spacing;         /**< (largest) PME grid spacing                   */
-    ivec              grid;            /**< the PME grid dimensions                      */
-    real              grid_efficiency; /**< ineffiency factor for non-uniform grids <= 1 */
-    real              ewaldcoeff_q;    /**< Electrostatic Ewald coefficient            */
-    real              ewaldcoeff_lj;   /**< LJ Ewald coefficient, only for the call to send_switchgrid */
-    struct gmx_pme_t *pmedata;         /**< the data structure used in the PME code      */
-    int               count;           /**< number of times this setup has been timed    */
-    double            cycles;          /**< the fastest time for this setup in cycles    */
+struct pme_setup_t
+{
+    real rcut_coulomb;         /**< Coulomb cut-off                              */
+    real rlistOuter;           /**< cut-off for the outer pair-list              */
+    real rlistInner;           /**< cut-off for the inner pair-list              */
+    real spacing;              /**< (largest) PME grid spacing                   */
+    ivec grid;                 /**< the PME grid dimensions                      */
+    real grid_efficiency;      /**< ineffiency factor for non-uniform grids <= 1 */
+    real ewaldcoeff_q;         /**< Electrostatic Ewald coefficient            */
+    real ewaldcoeff_lj;        /**< LJ Ewald coefficient, only for the call to send_switchgrid */
+    struct gmx_pme_t* pmedata; /**< the data structure used in the PME code      */
+    int               count;   /**< number of times this setup has been timed    */
+    double            cycles;  /**< the fastest time for this setup in cycles    */
  };
  
  /*! \brief After 50 nstlist periods of not observing imbalance: never tune PME */
-const int  PMETunePeriod = 50;
+const int PMETunePeriod = 50;
  /*! \brief Trigger PME load balancing at more than 5% PME overload */
  const real loadBalanceTriggerFactor = 1.05;
  /*! \brief Scale the grid by a most at factor 1.7.
@@ -124,21 +125,29 @@ const real maxRelativeSlowdownAccepted = 1.12;
  const real maxFluctuationAccepted = 1.02;
  
  /*! \brief Enumeration whose values describe the effect limiting the load balancing */
-enum epmelb {
-    epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimPMEGRID, epmelblimMAXSCALING, epmelblimNR
+enum epmelb
+{
+    epmelblimNO,
+    epmelblimBOX,
+    epmelblimDD,
+    epmelblimPMEGRID,
+    epmelblimMAXSCALING,
+    epmelblimNR
  };
  
  /*! \brief Descriptive strings matching ::epmelb */
-static const char *pmelblim_str[epmelblimNR] =
-{ "no", "box size", "domain decompostion", "PME grid restriction", "maximum allowed grid scaling" };
+static const char* pmelblim_str[epmelblimNR] = { "no", "box size", "domain decompostion",
+                                                 "PME grid restriction",
+                                                 "maximum allowed grid scaling" };
  
-struct pme_load_balancing_t {
-    gmx_bool                 bSepPMERanks;       /**< do we have separate PME ranks? */
-    gmx_bool                 bActive;            /**< is PME tuning active? */
-    int64_t                  step_rel_stop;      /**< stop the tuning after this value of step_rel */
-    gmx_bool                 bTriggerOnDLB;      /**< trigger balancing only on DD DLB */
-    gmx_bool                 bBalance;           /**< are we in the balancing phase, i.e. trying different setups? */
-    int                      nstage;             /**< the current maximum number of stages */
+struct pme_load_balancing_t
+{
+    gmx_bool bSepPMERanks;  /**< do we have separate PME ranks? */
+    gmx_bool bActive;       /**< is PME tuning active? */
+    int64_t  step_rel_stop; /**< stop the tuning after this value of step_rel */
+    gmx_bool bTriggerOnDLB; /**< trigger balancing only on DD DLB */
+    gmx_bool bBalance;      /**< are we in the balancing phase, i.e. trying different setups? */
+    int      nstage;        /**< the current maximum number of stages */
  
      real                     cut_spacing;        /**< the minimum cutoff / PME grid spacing ratio */
      real                     rcut_vdw;           /**< Vdw cutoff (does not change) */
@@ -152,58 +161,60 @@ struct pme_load_balancing_t {
      int                      cur;                /**< the index (in setup) of the current setup */
      int                      fastest;            /**< index of the fastest setup up till now */
      int                      lower_limit;        /**< don't go below this setup index */
-    int                      start;              /**< start of setup index range to consider in stage>0 */
-    int                      end;                /**< end   of setup index range to consider in stage>0 */
-    int                      elimited;           /**< was the balancing limited, uses enum above */
-    int                      cutoff_scheme;      /**< Verlet or group cut-offs */
+    int                      start;    /**< start of setup index range to consider in stage>0 */
+    int                      end;      /**< end   of setup index range to consider in stage>0 */
+    int                      elimited; /**< was the balancing limited, uses enum above */
+    int                      cutoff_scheme; /**< Verlet or group cut-offs */
  
-    int                      stage;              /**< the current stage */
+    int stage; /**< the current stage */
  
-    int                      cycles_n;           /**< step cycle counter cummulative count */
-    double                   cycles_c;           /**< step cycle counter cummulative cycles */
+    int    cycles_n; /**< step cycle counter cummulative count */
+    double cycles_c; /**< step cycle counter cummulative cycles */
  };
  
  /* TODO The code in this file should call this getter, rather than
   * read bActive anywhere */
-bool pme_loadbal_is_active(const pme_load_balancing_t *pme_lb)
+bool pme_loadbal_is_active(const pme_load_balancing_t* pme_lb)
  {
      return pme_lb != nullptr && pme_lb->bActive;
  }
  
  // TODO Return a unique_ptr to pme_load_balancing_t
-void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
-                      t_commrec                 *cr,
-                      const gmx::MDLogger       &mdlog,
-                      const t_inputrec          &ir,
+void pme_loadbal_init(pme_load_balancing_t**     pme_lb_p,
+                      t_commrec*                 cr,
+                      const gmx::MDLogger&       mdlog,
+                      const t_inputrec&          ir,
                        const matrix               box,
-                      const interaction_const_t &ic,
-                      const nonbonded_verlet_t  &nbv,
-                      gmx_pme_t                 *pmedata,
+                      const interaction_const_t& ic,
+                      const nonbonded_verlet_t&  nbv,
+                      gmx_pme_t*                 pmedata,
                        gmx_bool                   bUseGPU,
-                      gmx_bool                  *bPrinting)
+                      gmx_bool*                  bPrinting)
  {
  
-    pme_load_balancing_t *pme_lb;
+    pme_load_balancing_t* pme_lb;
      real                  spm, sp;
      int                   d;
  
      // Note that we don't (yet) support PME load balancing with LJ-PME only.
-    GMX_RELEASE_ASSERT(EEL_PME(ir.coulombtype), "pme_loadbal_init called without PME electrostatics");
+    GMX_RELEASE_ASSERT(EEL_PME(ir.coulombtype),
+                       "pme_loadbal_init called without PME electrostatics");
      // To avoid complexity, we require a single cut-off with PME for q+LJ.
      // This is checked by grompp, but it doesn't hurt to check again.
-    GMX_RELEASE_ASSERT(!(EEL_PME(ir.coulombtype) && EVDW_PME(ir.vdwtype) && ir.rcoulomb != ir.rvdw), "With Coulomb and LJ PME, rcoulomb should be equal to rvdw");
+    GMX_RELEASE_ASSERT(!(EEL_PME(ir.coulombtype) && EVDW_PME(ir.vdwtype) && ir.rcoulomb != ir.rvdw),
+                       "With Coulomb and LJ PME, rcoulomb should be equal to rvdw");
  
      pme_lb = new pme_load_balancing_t;
  
-    pme_lb->bSepPMERanks      = !thisRankHasDuty(cr, DUTY_PME);
+    pme_lb->bSepPMERanks = !thisRankHasDuty(cr, DUTY_PME);
  
      /* Initially we turn on balancing directly on based on PP/PME imbalance */
-    pme_lb->bTriggerOnDLB     = FALSE;
+    pme_lb->bTriggerOnDLB = FALSE;
  
      /* Any number of stages >= 2 is supported */
-    pme_lb->nstage            = 2;
+    pme_lb->nstage = 2;
  
-    pme_lb->cutoff_scheme     = ir.cutoff_scheme;
+    pme_lb->cutoff_scheme = ir.cutoff_scheme;
  
      pme_lb->rbufOuter_coulomb = nbv.pairlistOuterRadius() - ic.rcoulomb;
      pme_lb->rbufOuter_vdw     = nbv.pairlistOuterRadius() - ic.rvdw;
@@ -218,29 +229,30 @@ void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
  
      pme_lb->setup.resize(1);
  
-    pme_lb->rcut_vdw                 = ic.rvdw;
-    pme_lb->rcut_coulomb_start       = ir.rcoulomb;
+    pme_lb->rcut_vdw           = ic.rvdw;
+    pme_lb->rcut_coulomb_start = ir.rcoulomb;
  
-    pme_lb->cur                      = 0;
-    pme_lb->setup[0].rcut_coulomb    = ic.rcoulomb;
-    pme_lb->setup[0].rlistOuter      = nbv.pairlistOuterRadius();
-    pme_lb->setup[0].rlistInner      = nbv.pairlistInnerRadius();
-    pme_lb->setup[0].grid[XX]        = ir.nkx;
-    pme_lb->setup[0].grid[YY]        = ir.nky;
-    pme_lb->setup[0].grid[ZZ]        = ir.nkz;
-    pme_lb->setup[0].ewaldcoeff_q    = ic.ewaldcoeff_q;
-    pme_lb->setup[0].ewaldcoeff_lj   = ic.ewaldcoeff_lj;
+    pme_lb->cur                    = 0;
+    pme_lb->setup[0].rcut_coulomb  = ic.rcoulomb;
+    pme_lb->setup[0].rlistOuter    = nbv.pairlistOuterRadius();
+    pme_lb->setup[0].rlistInner    = nbv.pairlistInnerRadius();
+    pme_lb->setup[0].grid[XX]      = ir.nkx;
+    pme_lb->setup[0].grid[YY]      = ir.nky;
+    pme_lb->setup[0].grid[ZZ]      = ir.nkz;
+    pme_lb->setup[0].ewaldcoeff_q  = ic.ewaldcoeff_q;
+    pme_lb->setup[0].ewaldcoeff_lj = ic.ewaldcoeff_lj;
  
      if (!pme_lb->bSepPMERanks)
      {
-        GMX_RELEASE_ASSERT(pmedata, "On ranks doing both PP and PME we need a valid pmedata object");
-        pme_lb->setup[0].pmedata     = pmedata;
+        GMX_RELEASE_ASSERT(pmedata,
+                           "On ranks doing both PP and PME we need a valid pmedata object");
+        pme_lb->setup[0].pmedata = pmedata;
      }
  
      spm = 0;
      for (d = 0; d < DIM; d++)
      {
-        sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
+        sp = norm(pme_lb->box_start[d]) / pme_lb->setup[0].grid[d];
          if (sp > spm)
          {
              spm = sp;
@@ -250,11 +262,11 @@ void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
  
      if (ir.fourier_spacing > 0)
      {
-        pme_lb->cut_spacing = ir.rcoulomb/ir.fourier_spacing;
+        pme_lb->cut_spacing = ir.rcoulomb / ir.fourier_spacing;
      }
      else
      {
-        pme_lb->cut_spacing = ir.rcoulomb/pme_lb->setup[0].spacing;
+        pme_lb->cut_spacing = ir.rcoulomb / pme_lb->setup[0].spacing;
      }
  
      pme_lb->stage = 0;
@@ -270,15 +282,18 @@ void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
  
      if (!wallcycle_have_counter())
      {
-        GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: Cycle counters unsupported or not enabled in kernel. Cannot use PME-PP balancing.");
+        GMX_LOG(mdlog.warning)
+                .asParagraph()
+                .appendText(
+                        "NOTE: Cycle counters unsupported or not enabled in kernel. Cannot use "
+                        "PME-PP balancing.");
      }
  
      /* Tune with GPUs and/or separate PME ranks.
       * When running only on a CPU without PME ranks, PME tuning will only help
       * with small numbers of atoms in the cut-off sphere.
       */
-    pme_lb->bActive  = (wallcycle_have_counter() && (bUseGPU ||
-                                                     pme_lb->bSepPMERanks));
+    pme_lb->bActive = (wallcycle_have_counter() && (bUseGPU || pme_lb->bSepPMERanks));
  
      /* With GPUs and no separate PME ranks we can't measure the PP/PME
       * imbalance, so we start balancing right away.
@@ -286,7 +301,7 @@ void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
       */
      pme_lb->bBalance = (pme_lb->bActive && (bUseGPU && !pme_lb->bSepPMERanks));
  
-    pme_lb->step_rel_stop = PMETunePeriod*ir.nstlist;
+    pme_lb->step_rel_stop = PMETunePeriod * ir.nstlist;
  
      /* Delay DD load balancing when GPUs are used */
      if (pme_lb->bActive && DOMAINDECOMP(cr) && cr->dd->nnodes > 1 && bUseGPU)
@@ -300,7 +315,9 @@ void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
          dd_dlb_lock(cr->dd);
          if (dd_dlb_is_locked(cr->dd))
          {
-            GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: DLB will not turn on during the first phase of PME tuning");
+            GMX_LOG(mdlog.warning)
+                    .asParagraph()
+                    .appendText("NOTE: DLB will not turn on during the first phase of PME tuning");
          }
      }
  
@@ -310,14 +327,12 @@ void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
  }
  
  /*! \brief Try to increase the cutoff during load balancing */
-static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t *pme_lb,
-                                            int                   pme_order,
-                                            const gmx_domdec_t   *dd)
+static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t* pme_lb, int pme_order, const gmx_domdec_t* dd)
  {
-    real         fac, sp;
-    real         tmpr_coulomb, tmpr_vdw;
-    int          d;
-    bool         grid_ok;
+    real fac, sp;
+    real tmpr_coulomb, tmpr_vdw;
+    int  d;
+    bool grid_ok;
  
      /* Try to add a new setup with next larger cut-off to the list */
      pme_setup_t set;
@@ -341,27 +356,19 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t *pme_lb,
  
          fac *= 1.01;
          clear_ivec(set.grid);
-        sp = calcFftGrid(nullptr, pme_lb->box_start,
-                         fac*pme_lb->setup[pme_lb->cur].spacing,
-                         minimalPmeGridSize(pme_order),
-                         &set.grid[XX],
-                         &set.grid[YY],
-                         &set.grid[ZZ]);
+        sp = calcFftGrid(nullptr, pme_lb->box_start, fac * pme_lb->setup[pme_lb->cur].spacing,
+                         minimalPmeGridSize(pme_order), &set.grid[XX], &set.grid[YY], &set.grid[ZZ]);
  
          /* As here we can't easily check if one of the PME ranks
           * uses threading, we do a conservative grid check.
           * This means we can't use pme_order or less grid lines
           * per PME rank along x, which is not a strong restriction.
           */
-        grid_ok = gmx_pme_check_restrictions(pme_order,
-                                             set.grid[XX], set.grid[YY], set.grid[ZZ],
-                                             numPmeDomains.x,
-                                             true,
-                                             false);
-    }
-    while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
+        grid_ok = gmx_pme_check_restrictions(pme_order, set.grid[XX], set.grid[YY], set.grid[ZZ],
+                                             numPmeDomains.x, true, false);
+    } while (sp <= 1.001 * pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
  
-    set.rcut_coulomb = pme_lb->cut_spacing*sp;
+    set.rcut_coulomb = pme_lb->cut_spacing * sp;
      if (set.rcut_coulomb < pme_lb->rcut_coulomb_start)
      {
          /* This is unlikely, but can happen when e.g. continuing from
@@ -375,63 +382,56 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t *pme_lb,
      if (pme_lb->cutoff_scheme == ecutsVERLET)
      {
          /* Never decrease the Coulomb and VdW list buffers */
-        set.rlistOuter  = std::max(set.rcut_coulomb + pme_lb->rbufOuter_coulomb,
-                                   pme_lb->rcut_vdw + pme_lb->rbufOuter_vdw);
-        set.rlistInner  = std::max(set.rcut_coulomb + pme_lb->rbufInner_coulomb,
-                                   pme_lb->rcut_vdw + pme_lb->rbufInner_vdw);
+        set.rlistOuter = std::max(set.rcut_coulomb + pme_lb->rbufOuter_coulomb,
+                                  pme_lb->rcut_vdw + pme_lb->rbufOuter_vdw);
+        set.rlistInner = std::max(set.rcut_coulomb + pme_lb->rbufInner_coulomb,
+                                  pme_lb->rcut_vdw + pme_lb->rbufInner_vdw);
      }
      else
      {
          /* TODO Remove these lines and pme_lb->cutoff_scheme */
-        tmpr_coulomb     = set.rcut_coulomb + pme_lb->rbufOuter_coulomb;
-        tmpr_vdw         = pme_lb->rcut_vdw + pme_lb->rbufOuter_vdw;
+        tmpr_coulomb = set.rcut_coulomb + pme_lb->rbufOuter_coulomb;
+        tmpr_vdw     = pme_lb->rcut_vdw + pme_lb->rbufOuter_vdw;
          /* Two (known) bugs with cutoff-scheme=group here:
           * - This modification of rlist results in incorrect DD comunication.
           * - We should set fr->bTwinRange = (fr->rlistlong > fr->rlist).
           */
-        set.rlistOuter  = std::min(tmpr_coulomb, tmpr_vdw);
-        set.rlistInner  = set.rlistOuter;
+        set.rlistOuter = std::min(tmpr_coulomb, tmpr_vdw);
+        set.rlistInner = set.rlistOuter;
      }
  
-    set.spacing         = sp;
+    set.spacing = sp;
      /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
      set.grid_efficiency = 1;
      for (d = 0; d < DIM; d++)
      {
-        set.grid_efficiency *= (set.grid[d]*sp)/norm(pme_lb->box_start[d]);
+        set.grid_efficiency *= (set.grid[d] * sp) / norm(pme_lb->box_start[d]);
      }
      /* The Ewald coefficient is inversly proportional to the cut-off */
-    set.ewaldcoeff_q =
-        pme_lb->setup[0].ewaldcoeff_q*pme_lb->setup[0].rcut_coulomb/set.rcut_coulomb;
+    set.ewaldcoeff_q = pme_lb->setup[0].ewaldcoeff_q * pme_lb->setup[0].rcut_coulomb / set.rcut_coulomb;
      /* We set ewaldcoeff_lj in set, even when LJ-PME is not used */
-    set.ewaldcoeff_lj =
-        pme_lb->setup[0].ewaldcoeff_lj*pme_lb->setup[0].rcut_coulomb/set.rcut_coulomb;
+    set.ewaldcoeff_lj = pme_lb->setup[0].ewaldcoeff_lj * pme_lb->setup[0].rcut_coulomb / set.rcut_coulomb;
  
-    set.count   = 0;
-    set.cycles  = 0;
+    set.count  = 0;
+    set.cycles = 0;
  
      if (debug)
      {
-        fprintf(debug, "PME loadbal: grid %d %d %d, coulomb cutoff %f\n",
-                set.grid[XX], set.grid[YY], set.grid[ZZ], set.rcut_coulomb);
+        fprintf(debug, "PME loadbal: grid %d %d %d, coulomb cutoff %f\n", set.grid[XX],
+                set.grid[YY], set.grid[ZZ], set.rcut_coulomb);
      }
      pme_lb->setup.push_back(set);
      return TRUE;
  }
  
  /*! \brief Print the PME grid */
-static void print_grid(FILE *fp_err, FILE *fp_log,
-                       const char *pre,
-                       const char *desc,
-                       const pme_setup_t *set,
-                       double cycles)
+static void print_grid(FILE* fp_err, FILE* fp_log, const char* pre, const char* desc, const pme_setup_t* set, double cycles)
  {
-    auto buf = gmx::formatString("%-11s%10s pme grid %d %d %d, coulomb cutoff %.3f",
-                                 pre, desc,
+    auto buf = gmx::formatString("%-11s%10s pme grid %d %d %d, coulomb cutoff %.3f", pre, desc,
                                   set->grid[XX], set->grid[YY], set->grid[ZZ], set->rcut_coulomb);
      if (cycles >= 0)
      {
-        buf += gmx::formatString(": %.1f M-cycles", cycles*1e-6);
+        buf += gmx::formatString(": %.1f M-cycles", cycles * 1e-6);
      }
      if (fp_err != nullptr)
      {
@@ -445,7 +445,7 @@ static void print_grid(FILE *fp_err, FILE *fp_log,
  }
  
  /*! \brief Return the index of the last setup used in PME load balancing */
-static int pme_loadbal_end(pme_load_balancing_t *pme_lb)
+static int pme_loadbal_end(pme_load_balancing_t* pme_lb)
  {
      /* In the initial stage only n is set; end is not set yet */
      if (pme_lb->end > 0)
@@ -459,14 +459,12 @@ static int pme_loadbal_end(pme_load_balancing_t *pme_lb)
  }
  
  /*! \brief Print descriptive string about what limits PME load balancing */
-static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
-                                  int64_t step,
-                                  pme_load_balancing_t *pme_lb)
+static void print_loadbal_limited(FILE* fp_err, FILE* fp_log, int64_t step, pme_load_balancing_t* pme_lb)
  {
-    auto buf = gmx::formatString("step %4s: the %s limits the PME load balancing to a coulomb cut-off of %.3f",
-                                 gmx::int64ToString(step).c_str(),
-                                 pmelblim_str[pme_lb->elimited],
-                                 pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
+    auto buf = gmx::formatString(
+            "step %4s: the %s limits the PME load balancing to a coulomb cut-off of %.3f",
+            gmx::int64ToString(step).c_str(), pmelblim_str[pme_lb->elimited],
+            pme_lb->setup[pme_loadbal_end(pme_lb) - 1].rcut_coulomb);
      if (fp_err != nullptr)
      {
          fprintf(fp_err, "\r%s\n", buf.c_str());
@@ -481,16 +479,16 @@ static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
  /*! \brief Switch load balancing to stage 1
   *
   * In this stage, only reasonably fast setups are run again. */
-static void switch_to_stage1(pme_load_balancing_t *pme_lb)
+static void switch_to_stage1(pme_load_balancing_t* pme_lb)
  {
      /* Increase start until we find a setup that is not slower than
       * maxRelativeSlowdownAccepted times the fastest setup.
       */
      pme_lb->start = pme_lb->lower_limit;
-    while (pme_lb->start + 1 < gmx::ssize(pme_lb->setup) &&
-           (pme_lb->setup[pme_lb->start].count == 0 ||
-            pme_lb->setup[pme_lb->start].cycles >
-            pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted))
+    while (pme_lb->start + 1 < gmx::ssize(pme_lb->setup)
+           && (pme_lb->setup[pme_lb->start].count == 0
+               || pme_lb->setup[pme_lb->start].cycles
+                          > pme_lb->setup[pme_lb->fastest].cycles * maxRelativeSlowdownAccepted))
      {
          pme_lb->start++;
      }
@@ -499,17 +497,16 @@ static void switch_to_stage1(pme_load_balancing_t *pme_lb)
       * any skipped setups that lie between setups that were measured to be
       * acceptably fast and too slow.
       */
-    while (pme_lb->start > pme_lb->lower_limit &&
-           pme_lb->setup[pme_lb->start - 1].count == 0)
+    while (pme_lb->start > pme_lb->lower_limit && pme_lb->setup[pme_lb->start - 1].count == 0)
      {
          pme_lb->start--;
      }
  
      /* Decrease end only with setups that we timed and that are slow. */
      pme_lb->end = pme_lb->setup.size();
-    if (pme_lb->setup[pme_lb->end - 1].count > 0 &&
-        pme_lb->setup[pme_lb->end - 1].cycles >
-        pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted)
+    if (pme_lb->setup[pme_lb->end - 1].count > 0
+        && pme_lb->setup[pme_lb->end - 1].cycles
+                   > pme_lb->setup[pme_lb->fastest].cycles * maxRelativeSlowdownAccepted)
      {
          pme_lb->end--;
      }
@@ -534,23 +531,22 @@ static void switch_to_stage1(pme_load_balancing_t *pme_lb)
   * Here we try to take into account fluctuations and changes due to external
   * factors as well as DD load balancing.
   */
-static void
-pme_load_balance(pme_load_balancing_t          *pme_lb,
-                 t_commrec                     *cr,
-                 FILE                          *fp_err,
-                 FILE                          *fp_log,
-                 const gmx::MDLogger           &mdlog,
-                 const t_inputrec              &ir,
-                 const matrix                   box,
-                 gmx::ArrayRef<const gmx::RVec> x,
-                 double                         cycles,
-                 interaction_const_t           *ic,
-                 struct nonbonded_verlet_t     *nbv,
-                 struct gmx_pme_t     **        pmedata,
-                 int64_t                        step)
+static void pme_load_balance(pme_load_balancing_t*          pme_lb,
+                             t_commrec*                     cr,
+                             FILE*                          fp_err,
+                             FILE*                          fp_log,
+                             const gmx::MDLogger&           mdlog,
+                             const t_inputrec&              ir,
+                             const matrix                   box,
+                             gmx::ArrayRef<const gmx::RVec> x,
+                             double                         cycles,
+                             interaction_const_t*           ic,
+                             struct nonbonded_verlet_t*     nbv,
+                             struct gmx_pme_t**             pmedata,
+                             int64_t                        step)
  {
      gmx_bool     OK;
-    pme_setup_t *set;
+    pme_setup_t* set;
      double       cycles_fast;
      char         buf[STRLEN], sbuf[22];
  
@@ -580,8 +576,7 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
      }
      else
      {
-        if (cycles*maxFluctuationAccepted < set->cycles &&
-            pme_lb->stage == pme_lb->nstage - 1)
+        if (cycles * maxFluctuationAccepted < set->cycles && pme_lb->stage == pme_lb->nstage - 1)
          {
              /* The performance went up a lot (due to e.g. DD load balancing).
               * Add a stage, keep the minima, but rescan all setups.
@@ -590,12 +585,13 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
  
              if (debug)
              {
-                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
+                fprintf(debug,
+                        "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this "
+                        "is more than %f\n"
                          "Increased the number stages to %d"
                          " and ignoring the previous performance\n",
-                        set->grid[XX], set->grid[YY], set->grid[ZZ],
-                        set->cycles*1e-6, cycles*1e-6, maxFluctuationAccepted,
-                        pme_lb->nstage);
+                        set->grid[XX], set->grid[YY], set->grid[ZZ], set->cycles * 1e-6,
+                        cycles * 1e-6, maxFluctuationAccepted, pme_lb->nstage);
              }
          }
          set->cycles = std::min(set->cycles, cycles);
@@ -627,8 +623,8 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
      /* Check in stage 0 if we should stop scanning grids.
       * Stop when the time is more than maxRelativeSlowDownAccepted longer than the fastest.
       */
-    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
-        cycles > pme_lb->setup[pme_lb->fastest].cycles*maxRelativeSlowdownAccepted)
+    if (pme_lb->stage == 0 && pme_lb->cur > 0
+        && cycles > pme_lb->setup[pme_lb->fastest].cycles * maxRelativeSlowdownAccepted)
      {
          pme_lb->setup.resize(pme_lb->cur + 1);
          /* Done with scanning, go to stage 1 */
@@ -639,11 +635,11 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
      {
          int gridsize_start;
  
-        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];
+        gridsize_start = set->grid[XX] * set->grid[YY] * set->grid[ZZ];
  
          do
          {
-            if (pme_lb->cur+1 < gmx::ssize(pme_lb->setup))
+            if (pme_lb->cur + 1 < gmx::ssize(pme_lb->setup))
              {
                  /* We had already generated the next setup */
                  OK = TRUE;
@@ -659,8 +655,8 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
                  }
              }
  
-            if (OK &&
-                pme_lb->setup[pme_lb->cur+1].spacing > c_maxSpacingScaling*pme_lb->setup[0].spacing)
+            if (OK
+                && pme_lb->setup[pme_lb->cur + 1].spacing > c_maxSpacingScaling * pme_lb->setup[0].spacing)
              {
                  OK               = FALSE;
                  pme_lb->elimited = epmelblimMAXSCALING;
@@ -668,8 +664,7 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
  
              if (OK && ir.ePBC != epbcNONE)
              {
-                OK = (gmx::square(pme_lb->setup[pme_lb->cur+1].rlistOuter)
-                      <= max_cutoff2(ir.ePBC, box));
+                OK = (gmx::square(pme_lb->setup[pme_lb->cur + 1].rlistOuter) <= max_cutoff2(ir.ePBC, box));
                  if (!OK)
                  {
                      pme_lb->elimited = epmelblimBOX;
@@ -682,8 +677,7 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
  
                  if (DOMAINDECOMP(cr))
                  {
-                    OK = change_dd_cutoff(cr, box, x,
-                                          pme_lb->setup[pme_lb->cur].rlistOuter);
+                    OK = change_dd_cutoff(cr, box, x, pme_lb->setup[pme_lb->cur].rlistOuter);
                      if (!OK)
                      {
                          /* Failed: do not use this setup */
@@ -702,15 +696,12 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
                  /* Switch to the next stage */
                  switch_to_stage1(pme_lb);
              }
-        }
-        while (OK &&
-               !(pme_lb->setup[pme_lb->cur].grid[XX]*
-                 pme_lb->setup[pme_lb->cur].grid[YY]*
-                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
-                 gridsize_start*gridpointsScaleFactor
-                 &&
-                 pme_lb->setup[pme_lb->cur].grid_efficiency <
-                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*relativeEfficiencyFactor));
+        } while (OK
+                 && !(pme_lb->setup[pme_lb->cur].grid[XX] * pme_lb->setup[pme_lb->cur].grid[YY]
+                                      * pme_lb->setup[pme_lb->cur].grid[ZZ]
+                              < gridsize_start * gridpointsScaleFactor
+                      && pme_lb->setup[pme_lb->cur].grid_efficiency
+                                 < pme_lb->setup[pme_lb->cur - 1].grid_efficiency * relativeEfficiencyFactor));
      }
  
      if (pme_lb->stage > 0 && pme_lb->end == 1)
@@ -741,10 +732,8 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
  
                  pme_lb->cur = pme_lb->end - 1;
              }
-        }
-        while (pme_lb->stage == pme_lb->nstage - 1 &&
-               pme_lb->setup[pme_lb->cur].count > 0 &&
-               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*maxRelativeSlowdownAccepted);
+        } while (pme_lb->stage == pme_lb->nstage - 1 && pme_lb->setup[pme_lb->cur].count > 0
+                 && pme_lb->setup[pme_lb->cur].cycles > cycles_fast * maxRelativeSlowdownAccepted);
  
          if (pme_lb->stage == pme_lb->nstage)
          {
@@ -773,15 +762,19 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
                  /* This should not happen, as we set limits on the DLB bounds.
                   * But we implement a complete failsafe solution anyhow.
                   */
-                GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
-                        "The fastest PP/PME load balancing setting (cutoff %.3d nm) is no longer available due to DD DLB or box size limitations", pme_lb->fastest);
+                GMX_LOG(mdlog.warning)
+                        .asParagraph()
+                        .appendTextFormatted(
+                                "The fastest PP/PME load balancing setting (cutoff %.3d nm) is no "
+                                "longer available due to DD DLB or box size limitations",
+                                pme_lb->fastest);
                  pme_lb->fastest = pme_lb->lower_limit;
                  pme_lb->start   = pme_lb->lower_limit;
              }
              /* Limit the range to below the current cut-off, scan from start */
-            pme_lb->end         = pme_lb->cur;
-            pme_lb->cur         = pme_lb->start;
-            pme_lb->elimited    = epmelblimDD;
+            pme_lb->end      = pme_lb->cur;
+            pme_lb->cur      = pme_lb->start;
+            pme_lb->elimited = epmelblimDD;
              print_loadbal_limited(fp_err, fp_log, step, pme_lb);
          }
      }
@@ -790,28 +783,29 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
  
      set = &pme_lb->setup[pme_lb->cur];
  
-    ic->rcoulomb           = set->rcut_coulomb;
+    ic->rcoulomb = set->rcut_coulomb;
      nbv->changePairlistRadii(set->rlistOuter, set->rlistInner);
-    ic->ewaldcoeff_q       = set->ewaldcoeff_q;
+    ic->ewaldcoeff_q = set->ewaldcoeff_q;
      /* TODO: centralize the code that sets the potentials shifts */
      if (ic->coulomb_modifier == eintmodPOTSHIFT)
      {
          GMX_RELEASE_ASSERT(ic->rcoulomb != 0, "Cutoff radius cannot be zero");
-        ic->sh_ewald = std::erfc(ic->ewaldcoeff_q*ic->rcoulomb) / ic->rcoulomb;
+        ic->sh_ewald = std::erfc(ic->ewaldcoeff_q * ic->rcoulomb) / ic->rcoulomb;
      }
      if (EVDW_PME(ic->vdwtype))
      {
          /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */
-        ic->rvdw            = set->rcut_coulomb;
-        ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
+        ic->rvdw          = set->rcut_coulomb;
+        ic->ewaldcoeff_lj = set->ewaldcoeff_lj;
          if (ic->vdw_modifier == eintmodPOTSHIFT)
          {
-            real       crc2;
+            real crc2;
  
-            ic->dispersion_shift.cpot = -1.0/gmx::power6(static_cast<double>(ic->rvdw));
-            ic->repulsion_shift.cpot  = -1.0/gmx::power12(static_cast<double>(ic->rvdw));
-            crc2                      = gmx::square(ic->ewaldcoeff_lj*ic->rvdw);
-            ic->sh_lj_ewald           = (std::exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)/gmx::power6(ic->rvdw);
+            ic->dispersion_shift.cpot = -1.0 / gmx::power6(static_cast<double>(ic->rvdw));
+            ic->repulsion_shift.cpot  = -1.0 / gmx::power12(static_cast<double>(ic->rvdw));
+            crc2                      = gmx::square(ic->ewaldcoeff_lj * ic->rvdw);
+            ic->sh_lj_ewald =
+                    (std::exp(-crc2) * (1 + crc2 + 0.5 * crc2 * crc2) - 1) / gmx::power6(ic->rvdw);
          }
      }
  
@@ -829,14 +823,14 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
           * This can lead to a lot of reallocations for PME GPU.
           * Would be nicer if the allocated grid list was hidden within a single pmedata structure.
           */
-        if ((pme_lb->setup[pme_lb->cur].pmedata == nullptr) || pme_gpu_task_enabled(pme_lb->setup[pme_lb->cur].pmedata))
+        if ((pme_lb->setup[pme_lb->cur].pmedata == nullptr)
+            || pme_gpu_task_enabled(pme_lb->setup[pme_lb->cur].pmedata))
          {
              /* Generate a new PME data structure,
               * copying part of the old pointers.
               */
-            gmx_pme_reinit(&set->pmedata,
-                           cr, pme_lb->setup[0].pmedata, &ir,
-                           set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj);
+            gmx_pme_reinit(&set->pmedata, cr, pme_lb->setup[0].pmedata, &ir, set->grid,
+                           set->ewaldcoeff_q, set->ewaldcoeff_lj);
          }
          *pmedata = set->pmedata;
      }
@@ -866,35 +860,34 @@ pme_load_balance(pme_load_balancing_t          *pme_lb,
   * the PP/PME balance might change and re-balancing can improve performance.
   * This function adds 2 stages and adjusts the considered setup range.
   */
-static void continue_pme_loadbal(pme_load_balancing_t *pme_lb,
-                                 gmx_bool              bDlbUnlocked)
+static void continue_pme_loadbal(pme_load_balancing_t* pme_lb, gmx_bool bDlbUnlocked)
  {
      /* Add 2 tuning stages, keep the detected end of the setup range */
-    pme_lb->nstage          += 2;
+    pme_lb->nstage += 2;
      if (bDlbUnlocked && pme_lb->bSepPMERanks)
      {
          /* With separate PME ranks, DLB should always lower the PP load and
           * can only increase the PME load (more communication and imbalance),
           * so we only need to scan longer cut-off's.
           */
-        pme_lb->lower_limit  = pme_lb->cur;
+        pme_lb->lower_limit = pme_lb->cur;
      }
-    pme_lb->start            = pme_lb->lower_limit;
+    pme_lb->start = pme_lb->lower_limit;
  }
  
-void pme_loadbal_do(pme_load_balancing_t          *pme_lb,
-                    t_commrec                     *cr,
-                    FILE                          *fp_err,
-                    FILE                          *fp_log,
-                    const gmx::MDLogger           &mdlog,
-                    const t_inputrec              &ir,
-                    t_forcerec                    *fr,
+void pme_loadbal_do(pme_load_balancing_t*          pme_lb,
+                    t_commrec*                     cr,
+                    FILE*                          fp_err,
+                    FILE*                          fp_log,
+                    const gmx::MDLogger&           mdlog,
+                    const t_inputrec&              ir,
+                    t_forcerec*                    fr,
                      const matrix                   box,
                      gmx::ArrayRef<const gmx::RVec> x,
                      gmx_wallcycle_t                wcycle,
                      int64_t                        step,
                      int64_t                        step_rel,
-                    gmx_bool                      *bPrinting)
+                    gmx_bool*                      bPrinting)
  {
      int    n_prev;
      double cycles_prev;
@@ -937,19 +930,17 @@ void pme_loadbal_do(pme_load_balancing_t          *pme_lb,
           * is not over the last nstlist steps, but the nstlist steps before
           * that. So the first useful ratio is available at step_rel=3*nstlist.
           */
-        else if (step_rel >= 3*ir.nstlist)
+        else if (step_rel >= 3 * ir.nstlist)
          {
              if (DDMASTER(cr->dd))
              {
                  /* If PME rank load is too high, start tuning */
-                pme_lb->bBalance =
-                    (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
+                pme_lb->bBalance = (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
              }
              dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance);
          }
  
-        pme_lb->bActive = (pme_lb->bBalance ||
-                           step_rel <= pme_lb->step_rel_stop);
+        pme_lb->bActive = (pme_lb->bBalance || step_rel <= pme_lb->step_rel_stop);
      }
  
      /* The location in the code of this balancing termination is strange.
@@ -969,14 +960,16 @@ void pme_loadbal_do(pme_load_balancing_t          *pme_lb,
          {
              /* Unlock the DLB=auto, DLB is allowed to activate */
              dd_dlb_unlock(cr->dd);
-            GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: DLB can now turn on, when beneficial");
+            GMX_LOG(mdlog.warning)
+                    .asParagraph()
+                    .appendText("NOTE: DLB can now turn on, when beneficial");
  
              /* We don't deactivate the tuning yet, since we will balance again
               * after DLB gets turned on, if it does within PMETune_period.
               */
              continue_pme_loadbal(pme_lb, TRUE);
              pme_lb->bTriggerOnDLB = TRUE;
-            pme_lb->step_rel_stop = step_rel + PMETunePeriod*ir.nstlist;
+            pme_lb->step_rel_stop = step_rel + PMETunePeriod * ir.nstlist;
          }
          else
          {
@@ -1001,14 +994,11 @@ void pme_loadbal_do(pme_load_balancing_t          *pme_lb,
           * since init_step might not be a multiple of nstlist,
           * but the first data collected is skipped anyhow.
           */
-        pme_load_balance(pme_lb, cr,
-                         fp_err, fp_log, mdlog,
-                         ir, box, x, pme_lb->cycles_c - cycles_prev,
-                         fr->ic, fr->nbv.get(), &fr->pmedata,
-                         step);
+        pme_load_balance(pme_lb, cr, fp_err, fp_log, mdlog, ir, box, x,
+                         pme_lb->cycles_c - cycles_prev, fr->ic, fr->nbv.get(), &fr->pmedata, step);
  
          /* Update deprecated rlist in forcerec to stay in sync with fr->nbv */
-        fr->rlist         = fr->nbv->pairlistOuterRadius();
+        fr->rlist = fr->nbv->pairlistOuterRadius();
  
          if (ir.eDispCorr != edispcNO)
          {
@@ -1016,8 +1006,7 @@ void pme_loadbal_do(pme_load_balancing_t          *pme_lb,
          }
      }
  
-    if (!pme_lb->bBalance &&
-        (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop))
+    if (!pme_lb->bBalance && (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop))
      {
          /* We have just deactivated the balancing and we're not measuring PP/PME
           * imbalance during the first steps of the run: deactivate the tuning.
@@ -1029,51 +1018,47 @@ void pme_loadbal_do(pme_load_balancing_t          *pme_lb,
      {
          /* Make sure DLB is allowed when we deactivate PME tuning */
          dd_dlb_unlock(cr->dd);
-        GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: DLB can now turn on, when beneficial");
+        GMX_LOG(mdlog.warning)
+                .asParagraph()
+                .appendText("NOTE: DLB can now turn on, when beneficial");
      }
  
      *bPrinting = pme_lb->bBalance;
  }
  
  /*! \brief Return product of the number of PME grid points in each dimension */
-static int pme_grid_points(const pme_setup_t *setup)
+static int pme_grid_points(const pme_setup_t* setup)
  {
-    return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
+    return setup->grid[XX] * setup->grid[YY] * setup->grid[ZZ];
  }
  
  /*! \brief Print one load-balancing setting */
-static void print_pme_loadbal_setting(FILE              *fplog,
-                                      const char        *name,
-                                      const pme_setup_t *setup)
+static void print_pme_loadbal_setting(FILE* fplog, const char* name, const pme_setup_t* setup)
  {
-    fprintf(fplog,
-            "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n",
-            name,
-            setup->rcut_coulomb, setup->rlistInner,
-            setup->grid[XX], setup->grid[YY], setup->grid[ZZ],
-            setup->spacing, 1/setup->ewaldcoeff_q);
+    fprintf(fplog, "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n", name,
+            setup->rcut_coulomb, setup->rlistInner, setup->grid[XX], setup->grid[YY],
+            setup->grid[ZZ], setup->spacing, 1 / setup->ewaldcoeff_q);
  }
  
  /*! \brief Print all load-balancing settings */
-static void print_pme_loadbal_settings(pme_load_balancing_t *pme_lb,
-                                       FILE                 *fplog,
-                                       const gmx::MDLogger  &mdlog,
+static void print_pme_loadbal_settings(pme_load_balancing_t* pme_lb,
+                                       FILE*                 fplog,
+                                       const gmx::MDLogger&  mdlog,
                                         gmx_bool              bNonBondedOnGPU)
  {
-    double     pp_ratio, grid_ratio;
-    real       pp_ratio_temporary;
+    double pp_ratio, grid_ratio;
+    real   pp_ratio_temporary;
  
      pp_ratio_temporary = pme_lb->setup[pme_lb->cur].rlistInner / pme_lb->setup[0].rlistInner;
      pp_ratio           = gmx::power3(pp_ratio_temporary);
-    grid_ratio         = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
-        static_cast<double>(pme_grid_points(&pme_lb->setup[0]));
+    grid_ratio         = pme_grid_points(&pme_lb->setup[pme_lb->cur])
+                 / static_cast<double>(pme_grid_points(&pme_lb->setup[0]));
  
      fprintf(fplog, "\n");
      fprintf(fplog, "       P P   -   P M E   L O A D   B A L A N C I N G\n");
      fprintf(fplog, "\n");
      /* Here we only warn when the optimal setting is the last one */
-    if (pme_lb->elimited != epmelblimNO &&
-        pme_lb->cur == pme_loadbal_end(pme_lb)-1)
+    if (pme_lb->elimited != epmelblimNO && pme_lb->cur == pme_loadbal_end(pme_lb) - 1)
      {
          fprintf(fplog, " NOTE: The PP/PME load balancing was limited by the %s,\n",
                  pmelblim_str[pme_lb->elimited]);
@@ -1089,16 +1074,19 @@ static void print_pme_loadbal_settings(pme_load_balancing_t *pme_lb,
      fprintf(fplog, "            rcoulomb  rlist            grid      spacing   1/beta\n");
      print_pme_loadbal_setting(fplog, "initial", &pme_lb->setup[0]);
      print_pme_loadbal_setting(fplog, "final", &pme_lb->setup[pme_lb->cur]);
-    fprintf(fplog, " cost-ratio           %4.2f             %4.2f\n",
-            pp_ratio, grid_ratio);
+    fprintf(fplog, " cost-ratio           %4.2f             %4.2f\n", pp_ratio, grid_ratio);
      fprintf(fplog, " (note that these numbers concern only part of the total PP and PME load)\n");
  
      if (pp_ratio > 1.5 && !bNonBondedOnGPU)
      {
-        GMX_LOG(mdlog.warning).asParagraph().appendText(
-                "NOTE: PME load balancing increased the non-bonded workload by more than 50%.\n"
-                "      For better performance, use (more) PME ranks (mdrun -npme),\n"
-                "      or if you are beyond the scaling limit, use fewer total ranks (or nodes).");
+        GMX_LOG(mdlog.warning)
+                .asParagraph()
+                .appendText(
+                        "NOTE: PME load balancing increased the non-bonded workload by more than "
+                        "50%.\n"
+                        "      For better performance, use (more) PME ranks (mdrun -npme),\n"
+                        "      or if you are beyond the scaling limit, use fewer total ranks (or "
+                        "nodes).");
      }
      else
      {
@@ -1106,10 +1094,7 @@ static void print_pme_loadbal_settings(pme_load_balancing_t *pme_lb,
      }
  }
  
-void pme_loadbal_done(pme_load_balancing_t *pme_lb,
-                      FILE                 *fplog,
-                      const gmx::MDLogger  &mdlog,
-                      gmx_bool              bNonBondedOnGPU)
+void pme_loadbal_done(pme_load_balancing_t* pme_lb, FILE* fplog, const gmx::MDLogger& mdlog, gmx_bool bNonBondedOnGPU)
  {
      if (fplog != nullptr && (pme_lb->cur > 0 || pme_lb->elimited != epmelblimNO))
      {