Merge release-2021 into master

[alexxy/gromacs.git] / src / gromacs / ewald / pme.cpp
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index b0b59e6eedbaf7fe8f37134805eab50a5280b711..e2c87459a0589403888d57fdfead803bbabb869f 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -114,7 +114,7 @@
  #include "gromacs/utility/logger.h"
  #include "gromacs/utility/real.h"
  #include "gromacs/utility/smalloc.h"
-#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/message_string_collector.h"
  #include "gromacs/utility/unique_cptr.h"
  
  #include "calculate_spline_moduli.h"
@@ -127,94 +127,71 @@
  #include "pme_spline_work.h"
  #include "pme_spread.h"
  
-/*! \brief Help build a descriptive message in \c error if there are
- * \c errorReasons why PME on GPU is not supported.
- *
- * \returns Whether the lack of errorReasons indicate there is support. */
-static bool addMessageIfNotSupported(const std::list<std::string>& errorReasons, std::string* error)
-{
-    bool isSupported = errorReasons.empty();
-    if (!isSupported && error)
-    {
-        std::string regressionTestMarker = "PME GPU does not support";
-        // this prefix is tested for in the regression tests script gmxtest.pl
-        *error = regressionTestMarker;
-        if (errorReasons.size() == 1)
-        {
-            *error += " " + errorReasons.back();
-        }
-        else
-        {
-            *error += ": " + gmx::joinStrings(errorReasons, "; ");
-        }
-        *error += ".";
-    }
-    return isSupported;
-}
+//NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+bool g_allowPmeWithSyclForTesting = false;
  
  bool pme_gpu_supports_build(std::string* error)
  {
-    std::list<std::string> errorReasons;
-    if (GMX_DOUBLE)
-    {
-        errorReasons.emplace_back("a double-precision build");
-    }
-    if (!GMX_GPU)
-    {
-        errorReasons.emplace_back("a non-GPU build");
-    }
-    if (GMX_GPU_SYCL)
-    {
-        errorReasons.emplace_back("SYCL build"); // SYCL-TODO
-    }
-    return addMessageIfNotSupported(errorReasons, error);
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
+    errorReasons.appendIf(GMX_DOUBLE, "Double-precision build of GROMACS.");
+    errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS.");
+    errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build."); // SYCL-TODO
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
  }
  
  bool pme_gpu_supports_hardware(const gmx_hw_info_t gmx_unused& hwinfo, std::string* error)
  {
-    std::list<std::string> errorReasons;
-
-    if (GMX_GPU_OPENCL)
-    {
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
  #ifdef __APPLE__
-        errorReasons.emplace_back("Apple OS X operating system");
+    errorReasons.appendIf(GMX_GPU_OPENCL, "Apple OS X operating system");
  #endif
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
      }
-    return addMessageIfNotSupported(errorReasons, error);
+    return errorReasons.isEmpty();
  }
  
  bool pme_gpu_supports_input(const t_inputrec& ir, std::string* error)
  {
-    std::list<std::string> errorReasons;
-    if (!EEL_PME(ir.coulombtype))
-    {
-        errorReasons.emplace_back("systems that do not use PME for electrostatics");
-    }
-    if (ir.pme_order != 4)
-    {
-        errorReasons.emplace_back("interpolation orders other than 4");
-    }
-    if (EVDW_PME(ir.vdwtype))
-    {
-        errorReasons.emplace_back("Lennard-Jones PME");
-    }
-    if (!EI_DYNAMICS(ir.eI))
-    {
-        errorReasons.emplace_back(
-                "Cannot compute PME interactions on a GPU, because PME GPU requires a dynamical "
-                "integrator (md, sd, etc).");
-    }
-    return addMessageIfNotSupported(errorReasons, error);
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
+    errorReasons.appendIf(!EEL_PME(ir.coulombtype),
+                          "Systems that do not use PME for electrostatics.");
+    errorReasons.appendIf((ir.pme_order != 4), "Interpolation orders other than 4.");
+    errorReasons.appendIf(EVDW_PME(ir.vdwtype), "Lennard-Jones PME.");
+    errorReasons.appendIf(!EI_DYNAMICS(ir.eI), "Non-dynamical integrator (use md, sd, etc).");
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
  }
  
  bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error)
  {
-    std::list<std::string> errorReasons;
-    if (ir.efep != efepNO)
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU in Mixed mode does not support:");
+    errorReasons.appendIf(ir.efep != FreeEnergyPerturbationType::No, "Free Energy Perturbation.");
+    errorReasons.finishContext();
+    if (error != nullptr)
      {
-        errorReasons.emplace_back("Free Energy Perturbation (in PME GPU mixed mode)");
+        *error = errorReasons.toString();
      }
-    return addMessageIfNotSupported(errorReasons, error);
+    return errorReasons.isEmpty();
  }
  
  /*! \brief \libinternal
@@ -228,32 +205,21 @@ bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error)
   */
  static bool pme_gpu_check_restrictions(const gmx_pme_t* pme, std::string* error)
  {
-    std::list<std::string> errorReasons;
-    if (pme->nnodes != 1)
-    {
-        errorReasons.emplace_back("PME decomposition");
-    }
-    if (pme->pme_order != 4)
-    {
-        errorReasons.emplace_back("interpolation orders other than 4");
-    }
-    if (pme->doLJ)
-    {
-        errorReasons.emplace_back("Lennard-Jones PME");
-    }
-    if (GMX_DOUBLE)
-    {
-        errorReasons.emplace_back("double precision");
-    }
-    if (!GMX_GPU)
-    {
-        errorReasons.emplace_back("non-GPU build of GROMACS");
-    }
-    if (GMX_GPU_SYCL)
-    {
-        errorReasons.emplace_back("SYCL build of GROMACS"); // SYCL-TODO
-    }
-    return addMessageIfNotSupported(errorReasons, error);
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
+    errorReasons.appendIf((pme->nnodes != 1), "PME decomposition.");
+    errorReasons.appendIf((pme->pme_order != 4), "interpolation orders other than 4.");
+    errorReasons.appendIf(pme->doLJ, "Lennard-Jones PME.");
+    errorReasons.appendIf(GMX_DOUBLE, "Double precision build of GROMACS.");
+    errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS.");
+    errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build of GROMACS."); // SYCL-TODO
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
  }
  
  PmeRunMode pme_run_mode(const gmx_pme_t* pme)
@@ -333,11 +299,7 @@ PmeAtomComm::PmeAtomComm(MPI_Comm   PmeMpiCommunicator,
                           const int  pmeOrder,
                           const int  dimIndex,
                           const bool doSpread) :
-    dimind(dimIndex),
-    bSpread(doSpread),
-    pme_order(pmeOrder),
-    nthread(numThreads),
-    spline(nthread)
+    dimind(dimIndex), bSpread(doSpread), pme_order(pmeOrder), nthread(numThreads), spline(nthread)
  {
      if (PmeMpiCommunicator != MPI_COMM_NULL)
      {
@@ -485,8 +447,18 @@ static void init_overlap_comm(pme_overlap_t* ol, int norder, MPI_Comm comm, int
      MPI_Status stat;
      for (size_t b = 0; b < ol->comm_data.size(); b++)
      {
-        MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->comm_data[b].send_id, b, &ol->comm_data[b].recv_size,
-                     1, MPI_INT, ol->comm_data[b].recv_id, b, ol->mpi_comm, &stat);
+        MPI_Sendrecv(&ol->send_size,
+                     1,
+                     MPI_INT,
+                     ol->comm_data[b].send_id,
+                     b,
+                     &ol->comm_data[b].recv_size,
+                     1,
+                     MPI_INT,
+                     ol->comm_data[b].recv_id,
+                     b,
+                     ol->mpi_comm,
+                     &stat);
      }
  #endif
  
@@ -524,7 +496,8 @@ bool gmx_pme_check_restrictions(int pme_order, int nkx, int nky, int nkz, int nu
          std::string message = gmx::formatString(
                  "pme_order (%d) is larger than the maximum allowed value (%d). Modify and "
                  "recompile the code if you really need such a high order.",
-                pme_order, PME_ORDER_MAX);
+                pme_order,
+                PME_ORDER_MAX);
          GMX_THROW(gmx::InconsistentInputError(message));
      }
  
@@ -555,7 +528,8 @@ bool gmx_pme_check_restrictions(int pme_order, int nkx, int nky, int nkz, int nu
                    "threads, the number of grid lines per rank along x should be >= pme_order (%d) "
                    "or = pmeorder-1. To resolve this issue, use fewer ranks along x (and possibly "
                    "more along y and/or z) by specifying -dd manually.",
-                  nkx / static_cast<double>(numPmeDomainsAlongX), pme_order);
+                  nkx / static_cast<double>(numPmeDomainsAlongX),
+                  pme_order);
      }
  
      return true;
@@ -593,9 +567,6 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
  
      gmx::unique_cptr<gmx_pme_t, gmx_pme_destroy> pme(new gmx_pme_t());
  
-    pme->sum_qgrid_tmp    = nullptr;
-    pme->sum_qgrid_dd_tmp = nullptr;
-
      pme->buf_nalloc = 0;
  
      pme->nnodes  = 1;
@@ -659,9 +630,13 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
              pme->ndecompdim = 2;
  
  #if GMX_MPI
-            MPI_Comm_split(pme->mpi_comm, pme->nodeid % numPmeDomains.y, pme->nodeid,
+            MPI_Comm_split(pme->mpi_comm,
+                           pme->nodeid % numPmeDomains.y,
+                           pme->nodeid,
                             &pme->mpi_comm_d[0]); /* My communicator along major dimension */
-            MPI_Comm_split(pme->mpi_comm, pme->nodeid / numPmeDomains.y, pme->nodeid,
+            MPI_Comm_split(pme->mpi_comm,
+                           pme->nodeid / numPmeDomains.y,
+                           pme->nodeid,
                             &pme->mpi_comm_d[1]); /* My communicator along minor dimension */
  
              MPI_Comm_rank(pme->mpi_comm_d[0], &pme->nodeid_major);
@@ -704,15 +679,15 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
       * not calculating free-energy for Coulomb and/or LJ while gmx_pme_init()
       * configures with free-energy, but that has never been tested.
       */
-    pme->doCoulomb     = EEL_PME(ir->coulombtype);
-    pme->doLJ          = EVDW_PME(ir->vdwtype);
-    pme->bFEP_q        = ((ir->efep != efepNO) && bFreeEnergy_q);
-    pme->bFEP_lj       = ((ir->efep != efepNO) && bFreeEnergy_lj);
-    pme->bFEP          = (pme->bFEP_q || pme->bFEP_lj);
-    pme->nkx           = ir->nkx;
-    pme->nky           = ir->nky;
-    pme->nkz           = ir->nkz;
-    pme->bP3M          = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != nullptr);
+    pme->doCoulomb = EEL_PME(ir->coulombtype);
+    pme->doLJ      = EVDW_PME(ir->vdwtype);
+    pme->bFEP_q    = ((ir->efep != FreeEnergyPerturbationType::No) && bFreeEnergy_q);
+    pme->bFEP_lj   = ((ir->efep != FreeEnergyPerturbationType::No) && bFreeEnergy_lj);
+    pme->bFEP      = (pme->bFEP_q || pme->bFEP_lj);
+    pme->nkx       = ir->nkx;
+    pme->nky       = ir->nky;
+    pme->nkz       = ir->nkz;
+    pme->bP3M = (ir->coulombtype == CoulombInteractionType::P3mAD || getenv("GMX_PME_P3M") != nullptr);
      pme->pme_order     = ir->pme_order;
      pme->ewaldcoeff_q  = ewaldcoeff_q;
      pme->ewaldcoeff_lj = ewaldcoeff_lj;
@@ -725,12 +700,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
  
      // The box requires scaling with nwalls = 2, we store that condition as well
      // as the scaling factor
-    delete pme->boxScaler;
-    pme->boxScaler = new EwaldBoxZScaler(*ir);
+    pme->boxScaler = std::make_unique<EwaldBoxZScaler>(
+            EwaldBoxZScaler(inputrecPbcXY2Walls(ir), ir->wall_ewald_zfac));
  
      /* If we violate restrictions, generate a fatal error here */
-    gmx_pme_check_restrictions(pme->pme_order, pme->nkx, pme->nky, pme->nkz, pme->nnodes_major,
-                               pme->bUseThreads, true);
+    gmx_pme_check_restrictions(
+            pme->pme_order, pme->nkx, pme->nky, pme->nkz, pme->nnodes_major, pme->bUseThreads, true);
  
      if (pme->nnodes > 1)
      {
@@ -761,8 +736,13 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
                              "      and PME grid_y (%d) and grid_z (%d) should be divisible by "
                              "#PME_ranks_y "
                              "(%d)",
-                            gmx::roundToInt((imbal - 1) * 100), pme->nkx, pme->nky,
-                            pme->nnodes_major, pme->nky, pme->nkz, pme->nnodes_minor);
+                            gmx::roundToInt((imbal - 1) * 100),
+                            pme->nkx,
+                            pme->nky,
+                            pme->nnodes_major,
+                            pme->nky,
+                            pme->nkz,
+                            pme->nnodes_minor);
          }
      }
  
@@ -771,8 +751,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
       * y is always copied through a buffer: we don't need padding in z,
       * but we do need the overlap in x because of the communication order.
       */
-    init_overlap_comm(&pme->overlap[0], pme->pme_order, pme->mpi_comm_d[0], pme->nnodes_major,
-                      pme->nodeid_major, pme->nkx,
+    init_overlap_comm(&pme->overlap[0],
+                      pme->pme_order,
+                      pme->mpi_comm_d[0],
+                      pme->nnodes_major,
+                      pme->nodeid_major,
+                      pme->nkx,
                        (div_round_up(pme->nky, pme->nnodes_minor) + pme->pme_order)
                                * (pme->nkz + pme->pme_order - 1));
  
@@ -780,8 +764,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
       * We do this with an offset buffer of equal size, so we need to allocate
       * extra for the offset. That's what the (+1)*pme->nkz is for.
       */
-    init_overlap_comm(&pme->overlap[1], pme->pme_order, pme->mpi_comm_d[1], pme->nnodes_minor,
-                      pme->nodeid_minor, pme->nky,
+    init_overlap_comm(&pme->overlap[1],
+                      pme->pme_order,
+                      pme->mpi_comm_d[1],
+                      pme->nnodes_minor,
+                      pme->nodeid_minor,
+                      pme->nky,
                        (div_round_up(pme->nkx, pme->nnodes_major) + pme->pme_order + 1) * pme->nkz);
  
      /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
@@ -813,12 +801,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
      pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
      pme->pmegrid_start_iz = 0;
  
-    make_gridindex_to_localindex(pme->nkx, pme->pmegrid_start_ix,
-                                 pme->pmegrid_nx - (pme->pme_order - 1), &pme->nnx, &pme->fshx);
-    make_gridindex_to_localindex(pme->nky, pme->pmegrid_start_iy,
-                                 pme->pmegrid_ny - (pme->pme_order - 1), &pme->nny, &pme->fshy);
-    make_gridindex_to_localindex(pme->nkz, pme->pmegrid_start_iz, pme->pmegrid_nz_base, &pme->nnz,
-                                 &pme->fshz);
+    make_gridindex_to_localindex(
+            pme->nkx, pme->pmegrid_start_ix, pme->pmegrid_nx - (pme->pme_order - 1), &pme->nnx, &pme->fshx);
+    make_gridindex_to_localindex(
+            pme->nky, pme->pmegrid_start_iy, pme->pmegrid_ny - (pme->pme_order - 1), &pme->nny, &pme->fshy);
+    make_gridindex_to_localindex(
+            pme->nkz, pme->pmegrid_start_iz, pme->pmegrid_nz_base, &pme->nnz, &pme->fshz);
  
      pme->spline_work = make_pme_spline_work(pme->pme_order);
  
@@ -830,7 +818,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
       */
      if (pme->doLJ)
      {
-        pme->ngrids = ((ir->ljpme_combination_rule == eljpmeLB) ? DO_Q_AND_LJ_LB : DO_Q_AND_LJ);
+        pme->ngrids = ((ir->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q_AND_LJ_LB : DO_Q_AND_LJ);
      }
      else
      {
@@ -844,10 +832,16 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
      {
          if ((i < DO_Q && pme->doCoulomb && (i == 0 || bFreeEnergy_q))
              || (i >= DO_Q && pme->doLJ
-                && (i == 2 || bFreeEnergy_lj || ir->ljpme_combination_rule == eljpmeLB)))
+                && (i == 2 || bFreeEnergy_lj || ir->ljpme_combination_rule == LongRangeVdW::LB)))
          {
-            pmegrids_init(&pme->pmegrid[i], pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz,
-                          pme->pmegrid_nz_base, pme->pme_order, pme->bUseThreads, pme->nthread,
+            pmegrids_init(&pme->pmegrid[i],
+                          pme->pmegrid_nx,
+                          pme->pmegrid_ny,
+                          pme->pmegrid_nz,
+                          pme->pmegrid_nz_base,
+                          pme->pme_order,
+                          pme->bUseThreads,
+                          pme->nthread,
                            pme->overlap[0].s2g1[pme->nodeid_major]
                                    - pme->overlap[0].s2g0[pme->nodeid_major + 1],
                            pme->overlap[1].s2g1[pme->nodeid_minor]
@@ -856,8 +850,14 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
              const auto allocateRealGridForGpu = (pme->runMode == PmeRunMode::Mixed)
                                                          ? gmx::PinningPolicy::PinnedIfSupported
                                                          : gmx::PinningPolicy::CannotBePinned;
-            gmx_parallel_3dfft_init(&pme->pfft_setup[i], ndata, &pme->fftgrid[i], &pme->cfftgrid[i],
-                                    pme->mpi_comm_d, bReproducible, pme->nthread, allocateRealGridForGpu);
+            gmx_parallel_3dfft_init(&pme->pfft_setup[i],
+                                    ndata,
+                                    &pme->fftgrid[i],
+                                    &pme->cfftgrid[i],
+                                    pme->mpi_comm_d,
+                                    bReproducible,
+                                    pme->nthread,
+                                    allocateRealGridForGpu);
          }
      }
  
@@ -938,15 +938,27 @@ void gmx_pme_reinit(struct gmx_pme_t** pmedata,
          const gmx::MDLogger dummyLogger;
          GMX_ASSERT(pmedata, "Invalid PME pointer");
          NumPmeDomains numPmeDomains = { pme_src->nnodes_major, pme_src->nnodes_minor };
-        *pmedata = gmx_pme_init(cr, numPmeDomains, &irc, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE,
-                                ewaldcoeff_q, ewaldcoeff_lj, pme_src->nthread, pme_src->runMode,
-                                pme_src->gpu, nullptr, nullptr, nullptr, dummyLogger);
+        *pmedata                    = gmx_pme_init(cr,
+                                numPmeDomains,
+                                &irc,
+                                pme_src->bFEP_q,
+                                pme_src->bFEP_lj,
+                                FALSE,
+                                ewaldcoeff_q,
+                                ewaldcoeff_lj,
+                                pme_src->nthread,
+                                pme_src->runMode,
+                                pme_src->gpu,
+                                nullptr,
+                                nullptr,
+                                nullptr,
+                                dummyLogger);
          /* When running PME on the CPU not using domain decomposition,
           * the atom data is allocated once only in gmx_pme_(re)init().
           */
          if (!pme_src->gpu && pme_src->nnodes == 1)
          {
-            gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), nullptr, nullptr);
+            gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), {}, {});
          }
          // TODO this is mostly passing around current values
      }
@@ -957,7 +969,7 @@ void gmx_pme_reinit(struct gmx_pme_t** pmedata,
      /* We would like to reuse the fft grids, but that's harder */
  }
  
-void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q, real* V)
+real gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q)
  {
      pmegrids_t* grid;
  
@@ -985,11 +997,13 @@ void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::
      /* Only calculate the spline coefficients, don't actually spread */
      spread_on_grid(pme, atc, nullptr, TRUE, FALSE, pme->fftgrid[PME_GRID_QA], FALSE, PME_GRID_QA);
  
-    *V = gather_energy_bsplines(pme, grid->grid.grid, atc);
+    return gather_energy_bsplines(pme, grid->grid.grid, atc);
  }
  
  /*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */
-static void calc_initial_lb_coeffs(gmx::ArrayRef<real> coefficient, const real* local_c6, const real* local_sigma)
+static void calc_initial_lb_coeffs(gmx::ArrayRef<real>       coefficient,
+                                   gmx::ArrayRef<const real> local_c6,
+                                   gmx::ArrayRef<const real> local_sigma)
  {
      for (gmx::index i = 0; i < coefficient.ssize(); ++i)
      {
@@ -1001,7 +1015,7 @@ static void calc_initial_lb_coeffs(gmx::ArrayRef<real> coefficient, const real*
  }
  
  /*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */
-static void calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient, const real* local_sigma)
+static void calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient, gmx::ArrayRef<const real> local_sigma)
  {
      for (gmx::index i = 0; i < coefficient.ssize(); ++i)
      {
@@ -1012,12 +1026,12 @@ static void calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient, const real* loc
  int gmx_pme_do(struct gmx_pme_t*              pme,
                 gmx::ArrayRef<const gmx::RVec> coordinates,
                 gmx::ArrayRef<gmx::RVec>       forces,
-               real                           chargeA[],
-               real                           chargeB[],
-               real                           c6A[],
-               real                           c6B[],
-               real                           sigmaA[],
-               real                           sigmaB[],
+               gmx::ArrayRef<const real>      chargeA,
+               gmx::ArrayRef<const real>      chargeB,
+               gmx::ArrayRef<const real>      c6A,
+               gmx::ArrayRef<const real>      c6B,
+               gmx::ArrayRef<const real>      sigmaA,
+               gmx::ArrayRef<const real>      sigmaB,
                 const matrix                   box,
                 const t_commrec*               cr,
                 int                            maxshift_x,
@@ -1037,21 +1051,16 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      GMX_ASSERT(pme->runMode == PmeRunMode::CPU,
                 "gmx_pme_do should not be called on the GPU PME run.");
  
-    int                  d, npme, grid_index, max_grid_index;
-    PmeAtomComm&         atc         = pme->atc[0];
-    pmegrids_t*          pmegrid     = nullptr;
-    real*                grid        = nullptr;
-    real*                coefficient = nullptr;
-    PmeOutput            output[2]; // The second is used for the B state with FEP
-    real                 scale, lambda;
-    gmx_bool             bClearF;
-    gmx_parallel_3dfft_t pfft_setup;
-    real*                fftgrid;
-    t_complex*           cfftgrid;
-    int                  thread;
-    gmx_bool             bFirst, bDoSplines;
-    int                  fep_state;
-    int                  fep_states_lj = pme->bFEP_lj ? 2 : 1;
+    PmeAtomComm&              atc     = pme->atc[0];
+    pmegrids_t*               pmegrid = nullptr;
+    real*                     grid    = nullptr;
+    gmx::ArrayRef<const real> coefficient;
+    std::array<PmeOutput, 2>  output; // The second is used for the B state with FEP
+    gmx_parallel_3dfft_t      pfft_setup;
+    real*                     fftgrid;
+    t_complex*                cfftgrid;
+    int                       thread;
+    const int                 fep_states_lj = pme->bFEP_lj ? 2 : 1;
      // There's no support for computing energy without virial, or vice versa
      const bool computeEnergyAndVirial = (stepWork.computeEnergy || stepWork.computeVirial);
  
@@ -1091,7 +1100,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      pme->boxScaler->scaleBox(box, scaledBox);
  
      gmx::invertBoxMatrix(scaledBox, pme->recipbox);
-    bFirst = TRUE;
+    bool bFirst = true;
  
      /* For simplicity, we construct the splines for all particles if
       * more than one PME calculations is needed. Some optimization
@@ -1100,7 +1109,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
       * that don't yet have them.
       */
  
-    bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ);
+    bool bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ);
  
      /* We need a maximum of four separate PME calculations:
       * grid_index=0: Coulomb PME with charges from state A
@@ -1112,9 +1121,9 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
       */
  
      /* If we are doing LJ-PME with LB, we only do Q here */
-    max_grid_index = (pme->ljpme_combination_rule == eljpmeLB) ? DO_Q : DO_Q_AND_LJ;
-
-    for (grid_index = 0; grid_index < max_grid_index; ++grid_index)
+    const int max_grid_index = (pme->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q : DO_Q_AND_LJ;
+    bool      bClearF;
+    for (int grid_index = 0; grid_index < max_grid_index; ++grid_index)
      {
          /* Check if we should do calculations at this grid_index
           * If grid_index is odd we should be doing FEP
@@ -1141,34 +1150,19 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
  
          grid = pmegrid->grid.grid;
  
-        if (debug)
-        {
-            fprintf(debug, "PME: number of ranks = %d, rank = %d\n", cr->nnodes, cr->nodeid);
-            fprintf(debug, "Grid = %p\n", static_cast<void*>(grid));
-            if (grid == nullptr)
-            {
-                gmx_fatal(FARGS, "No grid!");
-            }
-        }
-
          if (pme->nnodes == 1)
          {
-            atc.coefficient = gmx::arrayRefFromArray(coefficient, coordinates.size());
+            atc.coefficient = coefficient;
          }
          else
          {
-            wallcycle_start(wcycle, ewcPME_REDISTXF);
+            wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
              do_redist_pos_coeffs(pme, cr, bFirst, coordinates, coefficient);
  
-            wallcycle_stop(wcycle, ewcPME_REDISTXF);
+            wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
          }
  
-        if (debug)
-        {
-            fprintf(debug, "Rank= %6d, pme local particles=%6d\n", cr->nodeid, atc.numAtoms());
-        }
-
-        wallcycle_start(wcycle, ewcPME_SPREAD);
+        wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
  
          /* Spread the coefficients on a grid */
          spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
@@ -1192,7 +1186,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
          }
  
-        wallcycle_stop(wcycle, ewcPME_SPREAD);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeSpread);
  
          /* TODO If the OpenMP and single-threaded implementations
             converge, then spread_on_grid() and
@@ -1211,61 +1205,72 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                  /* do 3d-fft */
                  if (thread == 0)
                  {
-                    wallcycle_start(wcycle, ewcPME_FFT);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                  }
                  gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle);
                  if (thread == 0)
                  {
-                    wallcycle_stop(wcycle, ewcPME_FFT);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
                  }
  
                  /* solve in k-space for our local cells */
                  if (thread == 0)
                  {
-                    wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+                    wallcycle_start(
+                            wcycle,
+                            (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme));
                  }
                  if (grid_index < DO_Q)
                  {
-                    loop_count = solve_pme_yzx(
-                            pme, cfftgrid, scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
-                            computeEnergyAndVirial, pme->nthread, thread);
+                    loop_count = solve_pme_yzx(pme,
+                                               cfftgrid,
+                                               scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
+                                               computeEnergyAndVirial,
+                                               pme->nthread,
+                                               thread);
                  }
                  else
                  {
                      loop_count =
-                            solve_pme_lj_yzx(pme, &cfftgrid, FALSE,
+                            solve_pme_lj_yzx(pme,
+                                             &cfftgrid,
+                                             FALSE,
                                               scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
-                                             computeEnergyAndVirial, pme->nthread, thread);
+                                             computeEnergyAndVirial,
+                                             pme->nthread,
+                                             thread);
                  }
  
                  if (thread == 0)
                  {
-                    wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+                    wallcycle_stop(
+                            wcycle,
+                            (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme));
                      inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
                  }
  
                  /* do 3d-invfft */
                  if (thread == 0)
                  {
-                    wallcycle_start(wcycle, ewcPME_FFT);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                  }
                  gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle);
                  if (thread == 0)
                  {
-                    wallcycle_stop(wcycle, ewcPME_FFT);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
  
  
                      if (pme->nodeid == 0)
                      {
-                        real ntot = pme->nkx * pme->nky * pme->nkz;
-                        npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
+                        real      ntot = pme->nkx * pme->nky * pme->nkz;
+                        const int npme = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                          inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                      }
  
                      /* Note: this wallcycle region is closed below
                         outside an OpenMP region, so take care if
                         refactoring code here. */
-                    wallcycle_start(wcycle, ewcPME_GATHER);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeGather);
                  }
  
                  copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
@@ -1293,25 +1298,28 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
               * atc->f is the actual force array, not a buffer,
               * therefore we should not clear it.
               */
-            lambda  = grid_index < DO_Q ? lambda_q : lambda_lj;
-            bClearF = (bFirst && PAR(cr));
+            real lambda = grid_index < DO_Q ? lambda_q : lambda_lj;
+            bClearF     = (bFirst && PAR(cr));
  #pragma omp parallel for num_threads(pme->nthread) schedule(static)
              for (thread = 0; thread < pme->nthread; thread++)
              {
                  try
                  {
-                    gather_f_bsplines(pme, grid, bClearF, &atc, &atc.spline[thread],
+                    gather_f_bsplines(pme,
+                                      grid,
+                                      bClearF,
+                                      &atc,
+                                      &atc.spline[thread],
                                        pme->bFEP ? (grid_index % 2 == 0 ? 1.0 - lambda : lambda) : 1.0);
                  }
                  GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
              }
  
  
-            inc_nrnb(nrnb, eNR_GATHERFBSP,
-                     pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
+            inc_nrnb(nrnb, eNR_GATHERFBSP, pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
              /* Note: this wallcycle region is opened above inside an OpenMP
                 region, so take care if refactoring code here. */
-            wallcycle_stop(wcycle, ewcPME_GATHER);
+            wallcycle_stop(wcycle, WallCycleCounter::PmeGather);
          }
  
          if (computeEnergyAndVirial)
@@ -1334,13 +1342,16 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      /* For Lorentz-Berthelot combination rules in LJ-PME, we need to calculate
       * seven terms. */
  
-    if (pme->doLJ && pme->ljpme_combination_rule == eljpmeLB)
+    if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB)
      {
          /* Loop over A- and B-state if we are doing FEP */
-        for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
+        for (int fep_state = 0; fep_state < fep_states_lj; ++fep_state)
          {
-            real *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr;
-            gmx::ArrayRef<real> coefficientBuffer;
+            std::vector<real>         local_c6;
+            std::vector<real>         local_sigma;
+            gmx::ArrayRef<const real> RedistC6;
+            gmx::ArrayRef<const real> RedistSigma;
+            gmx::ArrayRef<real>       coefficientBuffer;
              if (pme->nnodes == 1)
              {
                  pme->lb_buf1.resize(atc.numAtoms());
@@ -1348,12 +1359,12 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                  switch (fep_state)
                  {
                      case 0:
-                        local_c6    = c6A;
-                        local_sigma = sigmaA;
+                        local_c6.assign(c6A.begin(), c6A.end());
+                        local_sigma.assign(sigmaA.begin(), sigmaA.end());
                          break;
                      case 1:
-                        local_c6    = c6B;
-                        local_sigma = sigmaB;
+                        local_c6.assign(c6B.begin(), c6B.end());
+                        local_sigma.assign(sigmaB.begin(), sigmaB.end());
                          break;
                      default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
                  }
@@ -1373,31 +1384,31 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                          break;
                      default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
                  }
-                wallcycle_start(wcycle, ewcPME_REDISTXF);
+                wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
  
                  do_redist_pos_coeffs(pme, cr, bFirst, coordinates, RedistC6);
                  pme->lb_buf1.resize(atc.numAtoms());
                  pme->lb_buf2.resize(atc.numAtoms());
-                local_c6 = pme->lb_buf1.data();
+                local_c6.assign(pme->lb_buf1.begin(), pme->lb_buf1.end());
                  for (int i = 0; i < atc.numAtoms(); ++i)
                  {
                      local_c6[i] = atc.coefficient[i];
                  }
  
                  do_redist_pos_coeffs(pme, cr, FALSE, coordinates, RedistSigma);
-                local_sigma = pme->lb_buf2.data();
+                local_sigma.assign(pme->lb_buf2.begin(), pme->lb_buf2.end());
                  for (int i = 0; i < atc.numAtoms(); ++i)
                  {
                      local_sigma[i] = atc.coefficient[i];
                  }
  
-                wallcycle_stop(wcycle, ewcPME_REDISTXF);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
              }
              atc.coefficient = coefficientBuffer;
              calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
  
              /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
-            for (grid_index = 2; grid_index < 9; ++grid_index)
+            for (int grid_index = 2; grid_index < 9; ++grid_index)
              {
                  /* Unpack structure */
                  pmegrid    = &pme->pmegrid[grid_index];
@@ -1406,7 +1417,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                  calc_next_lb_coeffs(coefficientBuffer, local_sigma);
                  grid = pmegrid->grid.grid;
  
-                wallcycle_start(wcycle, ewcPME_SPREAD);
+                wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
                  /* Spread the c6 on a grid */
                  spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
  
@@ -1415,7 +1426,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      inc_nrnb(nrnb, eNR_WEIGHTS, DIM * atc.numAtoms());
                  }
  
-                inc_nrnb(nrnb, eNR_SPREADBSP,
+                inc_nrnb(nrnb,
+                         eNR_SPREADBSP,
                           pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
                  if (pme->nthread == 1)
                  {
@@ -1427,7 +1439,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      }
                      copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
                  }
-                wallcycle_stop(wcycle, ewcPME_SPREAD);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeSpread);
  
                  /*Here we start a large thread parallel region*/
  #pragma omp parallel num_threads(pme->nthread) private(thread)
@@ -1438,13 +1450,13 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                          /* do 3d-fft */
                          if (thread == 0)
                          {
-                            wallcycle_start(wcycle, ewcPME_FFT);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                          }
  
                          gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle);
                          if (thread == 0)
                          {
-                            wallcycle_stop(wcycle, ewcPME_FFT);
+                            wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
                          }
                      }
                      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -1460,16 +1472,20 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      thread = gmx_omp_get_thread_num();
                      if (thread == 0)
                      {
-                        wallcycle_start(wcycle, ewcLJPME);
+                        wallcycle_start(wcycle, WallCycleCounter::LJPme);
                      }
  
                      loop_count =
-                            solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE,
+                            solve_pme_lj_yzx(pme,
+                                             &pme->cfftgrid[2],
+                                             TRUE,
                                               scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
-                                             computeEnergyAndVirial, pme->nthread, thread);
+                                             computeEnergyAndVirial,
+                                             pme->nthread,
+                                             thread);
                      if (thread == 0)
                      {
-                        wallcycle_stop(wcycle, ewcLJPME);
+                        wallcycle_stop(wcycle, WallCycleCounter::LJPme);
                          inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
                      }
                  }
@@ -1486,7 +1502,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
  
              bFirst = !pme->doCoulomb;
              calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
-            for (grid_index = 8; grid_index >= 2; --grid_index)
+            for (int grid_index = 8; grid_index >= 2; --grid_index)
              {
                  /* Unpack structure */
                  pmegrid    = &pme->pmegrid[grid_index];
@@ -1502,22 +1518,22 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                          /* do 3d-invfft */
                          if (thread == 0)
                          {
-                            wallcycle_start(wcycle, ewcPME_FFT);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                          }
  
                          gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle);
                          if (thread == 0)
                          {
-                            wallcycle_stop(wcycle, ewcPME_FFT);
+                            wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
  
  
                              if (pme->nodeid == 0)
                              {
-                                real ntot = pme->nkx * pme->nky * pme->nkz;
-                                npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
+                                real      ntot = pme->nkx * pme->nky * pme->nkz;
+                                const int npme = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                                  inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                              }
-                            wallcycle_start(wcycle, ewcPME_GATHER);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeGather);
                          }
  
                          copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
@@ -1536,8 +1552,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                  if (stepWork.computeForces)
                  {
                      /* interpolate forces for our local atoms */
-                    bClearF = (bFirst && PAR(cr));
-                    scale   = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0;
+                    bClearF    = (bFirst && PAR(cr));
+                    real scale = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0;
                      scale *= lb_scale_factor[grid_index - 2];
  
  #pragma omp parallel for num_threads(pme->nthread) schedule(static)
@@ -1545,27 +1561,28 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      {
                          try
                          {
-                            gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
-                                              &pme->atc[0].spline[thread], scale);
+                            gather_f_bsplines(
+                                    pme, grid, bClearF, &pme->atc[0], &pme->atc[0].spline[thread], scale);
                          }
                          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
                      }
  
  
-                    inc_nrnb(nrnb, eNR_GATHERFBSP,
+                    inc_nrnb(nrnb,
+                             eNR_GATHERFBSP,
                               pme->pme_order * pme->pme_order * pme->pme_order * pme->atc[0].numAtoms());
                  }
-                wallcycle_stop(wcycle, ewcPME_GATHER);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeGather);
  
                  bFirst = FALSE;
              } /* for (grid_index = 8; grid_index >= 2; --grid_index) */
          }     /* for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) */
-    }         /* if (pme->doLJ && pme->ljpme_combination_rule == eljpmeLB) */
+    }         /* if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB) */
  
      if (stepWork.computeForces && pme->nnodes > 1)
      {
-        wallcycle_start(wcycle, ewcPME_REDISTXF);
-        for (d = 0; d < pme->ndecompdim; d++)
+        wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
+        for (int d = 0; d < pme->ndecompdim; d++)
          {
              gmx::ArrayRef<gmx::RVec> forcesRef;
              if (d == pme->ndecompdim - 1)
@@ -1578,13 +1595,13 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              {
                  forcesRef = pme->atc[d + 1].f;
              }
-            if (DOMAINDECOMP(cr))
+            if (haveDDAtomOrdering(*cr))
              {
                  dd_pmeredist_f(pme, &pme->atc[d], forcesRef, d == pme->ndecompdim - 1 && pme->bPPnode);
              }
          }
  
-        wallcycle_stop(wcycle, ewcPME_REDISTXF);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
      }
  
      if (computeEnergyAndVirial)
@@ -1609,10 +1626,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      }
                  }
              }
-            if (debug)
-            {
-                fprintf(debug, "Electrostatic PME mesh energy: %g\n", *energy_q);
-            }
          }
          else
          {
@@ -1640,10 +1653,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      }
                  }
              }
-            if (debug)
-            {
-                fprintf(debug, "Lennard-Jones PME mesh energy: %g\n", *energy_lj);
-            }
          }
          else
          {
@@ -1660,8 +1669,6 @@ void gmx_pme_destroy(gmx_pme_t* pme)
          return;
      }
  
-    delete pme->boxScaler;
-
      sfree(pme->nnx);
      sfree(pme->nny);
      sfree(pme->nnz);
@@ -1697,9 +1704,6 @@ void gmx_pme_destroy(gmx_pme_t* pme)
          pme_free_all_work(&pme->solve_work, pme->nthread);
      }
  
-    sfree(pme->sum_qgrid_tmp);
-    sfree(pme->sum_qgrid_dd_tmp);
-
      destroy_pme_spline_work(pme->spline_work);
  
      if (pme->gpu != nullptr)
@@ -1710,13 +1714,16 @@ void gmx_pme_destroy(gmx_pme_t* pme)
      delete pme;
  }
  
-void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* chargesA, const real* chargesB)
+void gmx_pme_reinit_atoms(gmx_pme_t*                pme,
+                          const int                 numAtoms,
+                          gmx::ArrayRef<const real> chargesA,
+                          gmx::ArrayRef<const real> chargesB)
  {
      if (pme->gpu != nullptr)
      {
-        GMX_ASSERT(!(pme->bFEP_q && chargesB == nullptr),
+        GMX_ASSERT(!(pme->bFEP_q && chargesB.empty()),
                     "B state charges must be specified if running Coulomb FEP on the GPU");
-        pme_gpu_reinit_atoms(pme->gpu, numAtoms, chargesA, pme->bFEP_q ? chargesB : nullptr);
+        pme_gpu_reinit_atoms(pme->gpu, numAtoms, chargesA.data(), pme->bFEP_q ? chargesB.data() : nullptr);
      }
      else
      {
@@ -1729,3 +1736,23 @@ bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size)
  {
      return (pme.nkx == grid_size[XX] && pme.nky == grid_size[YY] && pme.nkz == grid_size[ZZ]);
  }
+
+void gmx::SeparatePmeRanksPermitted::disablePmeRanks(const std::string& reason)
+{
+    permitSeparatePmeRanks_ = false;
+
+    if (!reason.empty())
+    {
+        reasons_.push_back(reason);
+    }
+}
+
+bool gmx::SeparatePmeRanksPermitted::permitSeparatePmeRanks() const
+{
+    return permitSeparatePmeRanks_;
+}
+
+std::string gmx::SeparatePmeRanksPermitted::reasonsWhyDisabled() const
+{
+    return joinStrings(reasons_, "; ");
+}