X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=blobdiff_plain;f=src%2Fgromacs%2Fewald%2Fpme.cpp;h=e2c87459a0589403888d57fdfead803bbabb869f;hb=8073360f849809b7ce4fa5b5f14ae16209ca087f;hp=b0b59e6eedbaf7fe8f37134805eab50a5280b711;hpb=2f224c7322e2e2e948590e1e73c82ff2b3ed568f;p=alexxy%2Fgromacs.git

diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp
index b0b59e6eed..e2c87459a0 100644
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -114,7 +114,7 @@
 #include "gromacs/utility/logger.h"
 #include "gromacs/utility/real.h"
 #include "gromacs/utility/smalloc.h"
-#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/message_string_collector.h"
 #include "gromacs/utility/unique_cptr.h"
 
 #include "calculate_spline_moduli.h"
@@ -127,94 +127,71 @@
 #include "pme_spline_work.h"
 #include "pme_spread.h"
 
-/*! \brief Help build a descriptive message in \c error if there are
- * \c errorReasons why PME on GPU is not supported.
- *
- * \returns Whether the lack of errorReasons indicate there is support. */
-static bool addMessageIfNotSupported(const std::list<std::string>& errorReasons, std::string* error)
-{
-    bool isSupported = errorReasons.empty();
-    if (!isSupported && error)
-    {
-        std::string regressionTestMarker = "PME GPU does not support";
-        // this prefix is tested for in the regression tests script gmxtest.pl
-        *error = regressionTestMarker;
-        if (errorReasons.size() == 1)
-        {
-            *error += " " + errorReasons.back();
-        }
-        else
-        {
-            *error += ": " + gmx::joinStrings(errorReasons, "; ");
-        }
-        *error += ".";
-    }
-    return isSupported;
-}
+//NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+bool g_allowPmeWithSyclForTesting = false;
 
 bool pme_gpu_supports_build(std::string* error)
 {
-    std::list<std::string> errorReasons;
-    if (GMX_DOUBLE)
-    {
-        errorReasons.emplace_back("a double-precision build");
-    }
-    if (!GMX_GPU)
-    {
-        errorReasons.emplace_back("a non-GPU build");
-    }
-    if (GMX_GPU_SYCL)
-    {
-        errorReasons.emplace_back("SYCL build"); // SYCL-TODO
-    }
-    return addMessageIfNotSupported(errorReasons, error);
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
+    errorReasons.appendIf(GMX_DOUBLE, "Double-precision build of GROMACS.");
+    errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS.");
+    errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build."); // SYCL-TODO
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
 }
 
 bool pme_gpu_supports_hardware(const gmx_hw_info_t gmx_unused& hwinfo, std::string* error)
 {
-    std::list<std::string> errorReasons;
-
-    if (GMX_GPU_OPENCL)
-    {
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
 #ifdef __APPLE__
-        errorReasons.emplace_back("Apple OS X operating system");
+    errorReasons.appendIf(GMX_GPU_OPENCL, "Apple OS X operating system");
 #endif
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
     }
-    return addMessageIfNotSupported(errorReasons, error);
+    return errorReasons.isEmpty();
 }
 
 bool pme_gpu_supports_input(const t_inputrec& ir, std::string* error)
 {
-    std::list<std::string> errorReasons;
-    if (!EEL_PME(ir.coulombtype))
-    {
-        errorReasons.emplace_back("systems that do not use PME for electrostatics");
-    }
-    if (ir.pme_order != 4)
-    {
-        errorReasons.emplace_back("interpolation orders other than 4");
-    }
-    if (EVDW_PME(ir.vdwtype))
-    {
-        errorReasons.emplace_back("Lennard-Jones PME");
-    }
-    if (!EI_DYNAMICS(ir.eI))
-    {
-        errorReasons.emplace_back(
-                "Cannot compute PME interactions on a GPU, because PME GPU requires a dynamical "
-                "integrator (md, sd, etc).");
-    }
-    return addMessageIfNotSupported(errorReasons, error);
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
+    errorReasons.appendIf(!EEL_PME(ir.coulombtype),
+                          "Systems that do not use PME for electrostatics.");
+    errorReasons.appendIf((ir.pme_order != 4), "Interpolation orders other than 4.");
+    errorReasons.appendIf(EVDW_PME(ir.vdwtype), "Lennard-Jones PME.");
+    errorReasons.appendIf(!EI_DYNAMICS(ir.eI), "Non-dynamical integrator (use md, sd, etc).");
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
 }
 
 bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error)
 {
-    std::list<std::string> errorReasons;
-    if (ir.efep != efepNO)
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU in Mixed mode does not support:");
+    errorReasons.appendIf(ir.efep != FreeEnergyPerturbationType::No, "Free Energy Perturbation.");
+    errorReasons.finishContext();
+    if (error != nullptr)
     {
-        errorReasons.emplace_back("Free Energy Perturbation (in PME GPU mixed mode)");
+        *error = errorReasons.toString();
     }
-    return addMessageIfNotSupported(errorReasons, error);
+    return errorReasons.isEmpty();
 }
 
 /*! \brief \libinternal
@@ -228,32 +205,21 @@ bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error)
  */
 static bool pme_gpu_check_restrictions(const gmx_pme_t* pme, std::string* error)
 {
-    std::list<std::string> errorReasons;
-    if (pme->nnodes != 1)
-    {
-        errorReasons.emplace_back("PME decomposition");
-    }
-    if (pme->pme_order != 4)
-    {
-        errorReasons.emplace_back("interpolation orders other than 4");
-    }
-    if (pme->doLJ)
-    {
-        errorReasons.emplace_back("Lennard-Jones PME");
-    }
-    if (GMX_DOUBLE)
-    {
-        errorReasons.emplace_back("double precision");
-    }
-    if (!GMX_GPU)
-    {
-        errorReasons.emplace_back("non-GPU build of GROMACS");
-    }
-    if (GMX_GPU_SYCL)
-    {
-        errorReasons.emplace_back("SYCL build of GROMACS"); // SYCL-TODO
-    }
-    return addMessageIfNotSupported(errorReasons, error);
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU does not support:");
+    errorReasons.appendIf((pme->nnodes != 1), "PME decomposition.");
+    errorReasons.appendIf((pme->pme_order != 4), "interpolation orders other than 4.");
+    errorReasons.appendIf(pme->doLJ, "Lennard-Jones PME.");
+    errorReasons.appendIf(GMX_DOUBLE, "Double precision build of GROMACS.");
+    errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS.");
+    errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build of GROMACS."); // SYCL-TODO
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
 }
 
 PmeRunMode pme_run_mode(const gmx_pme_t* pme)
@@ -333,11 +299,7 @@ PmeAtomComm::PmeAtomComm(MPI_Comm   PmeMpiCommunicator,
                          const int  pmeOrder,
                          const int  dimIndex,
                          const bool doSpread) :
-    dimind(dimIndex),
-    bSpread(doSpread),
-    pme_order(pmeOrder),
-    nthread(numThreads),
-    spline(nthread)
+    dimind(dimIndex), bSpread(doSpread), pme_order(pmeOrder), nthread(numThreads), spline(nthread)
 {
     if (PmeMpiCommunicator != MPI_COMM_NULL)
     {
@@ -485,8 +447,18 @@ static void init_overlap_comm(pme_overlap_t* ol, int norder, MPI_Comm comm, int
     MPI_Status stat;
     for (size_t b = 0; b < ol->comm_data.size(); b++)
     {
-        MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->comm_data[b].send_id, b, &ol->comm_data[b].recv_size,
-                     1, MPI_INT, ol->comm_data[b].recv_id, b, ol->mpi_comm, &stat);
+        MPI_Sendrecv(&ol->send_size,
+                     1,
+                     MPI_INT,
+                     ol->comm_data[b].send_id,
+                     b,
+                     &ol->comm_data[b].recv_size,
+                     1,
+                     MPI_INT,
+                     ol->comm_data[b].recv_id,
+                     b,
+                     ol->mpi_comm,
+                     &stat);
     }
 #endif
 
@@ -524,7 +496,8 @@ bool gmx_pme_check_restrictions(int pme_order, int nkx, int nky, int nkz, int nu
         std::string message = gmx::formatString(
                 "pme_order (%d) is larger than the maximum allowed value (%d). Modify and "
                 "recompile the code if you really need such a high order.",
-                pme_order, PME_ORDER_MAX);
+                pme_order,
+                PME_ORDER_MAX);
         GMX_THROW(gmx::InconsistentInputError(message));
     }
 
@@ -555,7 +528,8 @@ bool gmx_pme_check_restrictions(int pme_order, int nkx, int nky, int nkz, int nu
                   "threads, the number of grid lines per rank along x should be >= pme_order (%d) "
                   "or = pmeorder-1. To resolve this issue, use fewer ranks along x (and possibly "
                   "more along y and/or z) by specifying -dd manually.",
-                  nkx / static_cast<double>(numPmeDomainsAlongX), pme_order);
+                  nkx / static_cast<double>(numPmeDomainsAlongX),
+                  pme_order);
     }
 
     return true;
@@ -593,9 +567,6 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
 
     gmx::unique_cptr<gmx_pme_t, gmx_pme_destroy> pme(new gmx_pme_t());
 
-    pme->sum_qgrid_tmp    = nullptr;
-    pme->sum_qgrid_dd_tmp = nullptr;
-
     pme->buf_nalloc = 0;
 
     pme->nnodes  = 1;
@@ -659,9 +630,13 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
             pme->ndecompdim = 2;
 
 #if GMX_MPI
-            MPI_Comm_split(pme->mpi_comm, pme->nodeid % numPmeDomains.y, pme->nodeid,
+            MPI_Comm_split(pme->mpi_comm,
+                           pme->nodeid % numPmeDomains.y,
+                           pme->nodeid,
                            &pme->mpi_comm_d[0]); /* My communicator along major dimension */
-            MPI_Comm_split(pme->mpi_comm, pme->nodeid / numPmeDomains.y, pme->nodeid,
+            MPI_Comm_split(pme->mpi_comm,
+                           pme->nodeid / numPmeDomains.y,
+                           pme->nodeid,
                            &pme->mpi_comm_d[1]); /* My communicator along minor dimension */
 
             MPI_Comm_rank(pme->mpi_comm_d[0], &pme->nodeid_major);
@@ -704,15 +679,15 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
      * not calculating free-energy for Coulomb and/or LJ while gmx_pme_init()
      * configures with free-energy, but that has never been tested.
      */
-    pme->doCoulomb     = EEL_PME(ir->coulombtype);
-    pme->doLJ          = EVDW_PME(ir->vdwtype);
-    pme->bFEP_q        = ((ir->efep != efepNO) && bFreeEnergy_q);
-    pme->bFEP_lj       = ((ir->efep != efepNO) && bFreeEnergy_lj);
-    pme->bFEP          = (pme->bFEP_q || pme->bFEP_lj);
-    pme->nkx           = ir->nkx;
-    pme->nky           = ir->nky;
-    pme->nkz           = ir->nkz;
-    pme->bP3M          = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != nullptr);
+    pme->doCoulomb = EEL_PME(ir->coulombtype);
+    pme->doLJ      = EVDW_PME(ir->vdwtype);
+    pme->bFEP_q    = ((ir->efep != FreeEnergyPerturbationType::No) && bFreeEnergy_q);
+    pme->bFEP_lj   = ((ir->efep != FreeEnergyPerturbationType::No) && bFreeEnergy_lj);
+    pme->bFEP      = (pme->bFEP_q || pme->bFEP_lj);
+    pme->nkx       = ir->nkx;
+    pme->nky       = ir->nky;
+    pme->nkz       = ir->nkz;
+    pme->bP3M = (ir->coulombtype == CoulombInteractionType::P3mAD || getenv("GMX_PME_P3M") != nullptr);
     pme->pme_order     = ir->pme_order;
     pme->ewaldcoeff_q  = ewaldcoeff_q;
     pme->ewaldcoeff_lj = ewaldcoeff_lj;
@@ -725,12 +700,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
 
     // The box requires scaling with nwalls = 2, we store that condition as well
     // as the scaling factor
-    delete pme->boxScaler;
-    pme->boxScaler = new EwaldBoxZScaler(*ir);
+    pme->boxScaler = std::make_unique<EwaldBoxZScaler>(
+            EwaldBoxZScaler(inputrecPbcXY2Walls(ir), ir->wall_ewald_zfac));
 
     /* If we violate restrictions, generate a fatal error here */
-    gmx_pme_check_restrictions(pme->pme_order, pme->nkx, pme->nky, pme->nkz, pme->nnodes_major,
-                               pme->bUseThreads, true);
+    gmx_pme_check_restrictions(
+            pme->pme_order, pme->nkx, pme->nky, pme->nkz, pme->nnodes_major, pme->bUseThreads, true);
 
     if (pme->nnodes > 1)
     {
@@ -761,8 +736,13 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
                             "      and PME grid_y (%d) and grid_z (%d) should be divisible by "
                             "#PME_ranks_y "
                             "(%d)",
-                            gmx::roundToInt((imbal - 1) * 100), pme->nkx, pme->nky,
-                            pme->nnodes_major, pme->nky, pme->nkz, pme->nnodes_minor);
+                            gmx::roundToInt((imbal - 1) * 100),
+                            pme->nkx,
+                            pme->nky,
+                            pme->nnodes_major,
+                            pme->nky,
+                            pme->nkz,
+                            pme->nnodes_minor);
         }
     }
 
@@ -771,8 +751,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
      * y is always copied through a buffer: we don't need padding in z,
      * but we do need the overlap in x because of the communication order.
      */
-    init_overlap_comm(&pme->overlap[0], pme->pme_order, pme->mpi_comm_d[0], pme->nnodes_major,
-                      pme->nodeid_major, pme->nkx,
+    init_overlap_comm(&pme->overlap[0],
+                      pme->pme_order,
+                      pme->mpi_comm_d[0],
+                      pme->nnodes_major,
+                      pme->nodeid_major,
+                      pme->nkx,
                       (div_round_up(pme->nky, pme->nnodes_minor) + pme->pme_order)
                               * (pme->nkz + pme->pme_order - 1));
 
@@ -780,8 +764,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
      * We do this with an offset buffer of equal size, so we need to allocate
      * extra for the offset. That's what the (+1)*pme->nkz is for.
      */
-    init_overlap_comm(&pme->overlap[1], pme->pme_order, pme->mpi_comm_d[1], pme->nnodes_minor,
-                      pme->nodeid_minor, pme->nky,
+    init_overlap_comm(&pme->overlap[1],
+                      pme->pme_order,
+                      pme->mpi_comm_d[1],
+                      pme->nnodes_minor,
+                      pme->nodeid_minor,
+                      pme->nky,
                       (div_round_up(pme->nkx, pme->nnodes_major) + pme->pme_order + 1) * pme->nkz);
 
     /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
@@ -813,12 +801,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
     pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
     pme->pmegrid_start_iz = 0;
 
-    make_gridindex_to_localindex(pme->nkx, pme->pmegrid_start_ix,
-                                 pme->pmegrid_nx - (pme->pme_order - 1), &pme->nnx, &pme->fshx);
-    make_gridindex_to_localindex(pme->nky, pme->pmegrid_start_iy,
-                                 pme->pmegrid_ny - (pme->pme_order - 1), &pme->nny, &pme->fshy);
-    make_gridindex_to_localindex(pme->nkz, pme->pmegrid_start_iz, pme->pmegrid_nz_base, &pme->nnz,
-                                 &pme->fshz);
+    make_gridindex_to_localindex(
+            pme->nkx, pme->pmegrid_start_ix, pme->pmegrid_nx - (pme->pme_order - 1), &pme->nnx, &pme->fshx);
+    make_gridindex_to_localindex(
+            pme->nky, pme->pmegrid_start_iy, pme->pmegrid_ny - (pme->pme_order - 1), &pme->nny, &pme->fshy);
+    make_gridindex_to_localindex(
+            pme->nkz, pme->pmegrid_start_iz, pme->pmegrid_nz_base, &pme->nnz, &pme->fshz);
 
     pme->spline_work = make_pme_spline_work(pme->pme_order);
 
@@ -830,7 +818,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
      */
     if (pme->doLJ)
     {
-        pme->ngrids = ((ir->ljpme_combination_rule == eljpmeLB) ? DO_Q_AND_LJ_LB : DO_Q_AND_LJ);
+        pme->ngrids = ((ir->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q_AND_LJ_LB : DO_Q_AND_LJ);
     }
     else
     {
@@ -844,10 +832,16 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
     {
         if ((i < DO_Q && pme->doCoulomb && (i == 0 || bFreeEnergy_q))
             || (i >= DO_Q && pme->doLJ
-                && (i == 2 || bFreeEnergy_lj || ir->ljpme_combination_rule == eljpmeLB)))
+                && (i == 2 || bFreeEnergy_lj || ir->ljpme_combination_rule == LongRangeVdW::LB)))
         {
-            pmegrids_init(&pme->pmegrid[i], pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz,
-                          pme->pmegrid_nz_base, pme->pme_order, pme->bUseThreads, pme->nthread,
+            pmegrids_init(&pme->pmegrid[i],
+                          pme->pmegrid_nx,
+                          pme->pmegrid_ny,
+                          pme->pmegrid_nz,
+                          pme->pmegrid_nz_base,
+                          pme->pme_order,
+                          pme->bUseThreads,
+                          pme->nthread,
                           pme->overlap[0].s2g1[pme->nodeid_major]
                                   - pme->overlap[0].s2g0[pme->nodeid_major + 1],
                           pme->overlap[1].s2g1[pme->nodeid_minor]
@@ -856,8 +850,14 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
             const auto allocateRealGridForGpu = (pme->runMode == PmeRunMode::Mixed)
                                                         ? gmx::PinningPolicy::PinnedIfSupported
                                                         : gmx::PinningPolicy::CannotBePinned;
-            gmx_parallel_3dfft_init(&pme->pfft_setup[i], ndata, &pme->fftgrid[i], &pme->cfftgrid[i],
-                                    pme->mpi_comm_d, bReproducible, pme->nthread, allocateRealGridForGpu);
+            gmx_parallel_3dfft_init(&pme->pfft_setup[i],
+                                    ndata,
+                                    &pme->fftgrid[i],
+                                    &pme->cfftgrid[i],
+                                    pme->mpi_comm_d,
+                                    bReproducible,
+                                    pme->nthread,
+                                    allocateRealGridForGpu);
         }
     }
 
@@ -938,15 +938,27 @@ void gmx_pme_reinit(struct gmx_pme_t** pmedata,
         const gmx::MDLogger dummyLogger;
         GMX_ASSERT(pmedata, "Invalid PME pointer");
         NumPmeDomains numPmeDomains = { pme_src->nnodes_major, pme_src->nnodes_minor };
-        *pmedata = gmx_pme_init(cr, numPmeDomains, &irc, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE,
-                                ewaldcoeff_q, ewaldcoeff_lj, pme_src->nthread, pme_src->runMode,
-                                pme_src->gpu, nullptr, nullptr, nullptr, dummyLogger);
+        *pmedata                    = gmx_pme_init(cr,
+                                numPmeDomains,
+                                &irc,
+                                pme_src->bFEP_q,
+                                pme_src->bFEP_lj,
+                                FALSE,
+                                ewaldcoeff_q,
+                                ewaldcoeff_lj,
+                                pme_src->nthread,
+                                pme_src->runMode,
+                                pme_src->gpu,
+                                nullptr,
+                                nullptr,
+                                nullptr,
+                                dummyLogger);
         /* When running PME on the CPU not using domain decomposition,
          * the atom data is allocated once only in gmx_pme_(re)init().
          */
         if (!pme_src->gpu && pme_src->nnodes == 1)
         {
-            gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), nullptr, nullptr);
+            gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), {}, {});
         }
         // TODO this is mostly passing around current values
     }
@@ -957,7 +969,7 @@ void gmx_pme_reinit(struct gmx_pme_t** pmedata,
     /* We would like to reuse the fft grids, but that's harder */
 }
 
-void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q, real* V)
+real gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q)
 {
     pmegrids_t* grid;
 
@@ -985,11 +997,13 @@ void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::
     /* Only calculate the spline coefficients, don't actually spread */
     spread_on_grid(pme, atc, nullptr, TRUE, FALSE, pme->fftgrid[PME_GRID_QA], FALSE, PME_GRID_QA);
 
-    *V = gather_energy_bsplines(pme, grid->grid.grid, atc);
+    return gather_energy_bsplines(pme, grid->grid.grid, atc);
 }
 
 /*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */
-static void calc_initial_lb_coeffs(gmx::ArrayRef<real> coefficient, const real* local_c6, const real* local_sigma)
+static void calc_initial_lb_coeffs(gmx::ArrayRef<real>       coefficient,
+                                   gmx::ArrayRef<const real> local_c6,
+                                   gmx::ArrayRef<const real> local_sigma)
 {
     for (gmx::index i = 0; i < coefficient.ssize(); ++i)
     {
@@ -1001,7 +1015,7 @@ static void calc_initial_lb_coeffs(gmx::ArrayRef<real> coefficient, const real*
 }
 
 /*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */
-static void calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient, const real* local_sigma)
+static void calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient, gmx::ArrayRef<const real> local_sigma)
 {
     for (gmx::index i = 0; i < coefficient.ssize(); ++i)
     {
@@ -1012,12 +1026,12 @@ static void calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient, const real* loc
 int gmx_pme_do(struct gmx_pme_t*              pme,
                gmx::ArrayRef<const gmx::RVec> coordinates,
                gmx::ArrayRef<gmx::RVec>       forces,
-               real                           chargeA[],
-               real                           chargeB[],
-               real                           c6A[],
-               real                           c6B[],
-               real                           sigmaA[],
-               real                           sigmaB[],
+               gmx::ArrayRef<const real>      chargeA,
+               gmx::ArrayRef<const real>      chargeB,
+               gmx::ArrayRef<const real>      c6A,
+               gmx::ArrayRef<const real>      c6B,
+               gmx::ArrayRef<const real>      sigmaA,
+               gmx::ArrayRef<const real>      sigmaB,
                const matrix                   box,
                const t_commrec*               cr,
                int                            maxshift_x,
@@ -1037,21 +1051,16 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     GMX_ASSERT(pme->runMode == PmeRunMode::CPU,
                "gmx_pme_do should not be called on the GPU PME run.");
 
-    int                  d, npme, grid_index, max_grid_index;
-    PmeAtomComm&         atc         = pme->atc[0];
-    pmegrids_t*          pmegrid     = nullptr;
-    real*                grid        = nullptr;
-    real*                coefficient = nullptr;
-    PmeOutput            output[2]; // The second is used for the B state with FEP
-    real                 scale, lambda;
-    gmx_bool             bClearF;
-    gmx_parallel_3dfft_t pfft_setup;
-    real*                fftgrid;
-    t_complex*           cfftgrid;
-    int                  thread;
-    gmx_bool             bFirst, bDoSplines;
-    int                  fep_state;
-    int                  fep_states_lj = pme->bFEP_lj ? 2 : 1;
+    PmeAtomComm&              atc     = pme->atc[0];
+    pmegrids_t*               pmegrid = nullptr;
+    real*                     grid    = nullptr;
+    gmx::ArrayRef<const real> coefficient;
+    std::array<PmeOutput, 2>  output; // The second is used for the B state with FEP
+    gmx_parallel_3dfft_t      pfft_setup;
+    real*                     fftgrid;
+    t_complex*                cfftgrid;
+    int                       thread;
+    const int                 fep_states_lj = pme->bFEP_lj ? 2 : 1;
     // There's no support for computing energy without virial, or vice versa
     const bool computeEnergyAndVirial = (stepWork.computeEnergy || stepWork.computeVirial);
 
@@ -1091,7 +1100,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     pme->boxScaler->scaleBox(box, scaledBox);
 
     gmx::invertBoxMatrix(scaledBox, pme->recipbox);
-    bFirst = TRUE;
+    bool bFirst = true;
 
     /* For simplicity, we construct the splines for all particles if
      * more than one PME calculations is needed. Some optimization
@@ -1100,7 +1109,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      * that don't yet have them.
      */
 
-    bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ);
+    bool bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ);
 
     /* We need a maximum of four separate PME calculations:
      * grid_index=0: Coulomb PME with charges from state A
@@ -1112,9 +1121,9 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      */
 
     /* If we are doing LJ-PME with LB, we only do Q here */
-    max_grid_index = (pme->ljpme_combination_rule == eljpmeLB) ? DO_Q : DO_Q_AND_LJ;
-
-    for (grid_index = 0; grid_index < max_grid_index; ++grid_index)
+    const int max_grid_index = (pme->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q : DO_Q_AND_LJ;
+    bool      bClearF;
+    for (int grid_index = 0; grid_index < max_grid_index; ++grid_index)
     {
         /* Check if we should do calculations at this grid_index
          * If grid_index is odd we should be doing FEP
@@ -1141,34 +1150,19 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
 
         grid = pmegrid->grid.grid;
 
-        if (debug)
-        {
-            fprintf(debug, "PME: number of ranks = %d, rank = %d\n", cr->nnodes, cr->nodeid);
-            fprintf(debug, "Grid = %p\n", static_cast<void*>(grid));
-            if (grid == nullptr)
-            {
-                gmx_fatal(FARGS, "No grid!");
-            }
-        }
-
         if (pme->nnodes == 1)
         {
-            atc.coefficient = gmx::arrayRefFromArray(coefficient, coordinates.size());
+            atc.coefficient = coefficient;
         }
         else
         {
-            wallcycle_start(wcycle, ewcPME_REDISTXF);
+            wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
             do_redist_pos_coeffs(pme, cr, bFirst, coordinates, coefficient);
 
-            wallcycle_stop(wcycle, ewcPME_REDISTXF);
+            wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
         }
 
-        if (debug)
-        {
-            fprintf(debug, "Rank= %6d, pme local particles=%6d\n", cr->nodeid, atc.numAtoms());
-        }
-
-        wallcycle_start(wcycle, ewcPME_SPREAD);
+        wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
 
         /* Spread the coefficients on a grid */
         spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
@@ -1192,7 +1186,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
             copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
         }
 
-        wallcycle_stop(wcycle, ewcPME_SPREAD);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeSpread);
 
         /* TODO If the OpenMP and single-threaded implementations
            converge, then spread_on_grid() and
@@ -1211,61 +1205,72 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                 /* do 3d-fft */
                 if (thread == 0)
                 {
-                    wallcycle_start(wcycle, ewcPME_FFT);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                 }
                 gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle);
                 if (thread == 0)
                 {
-                    wallcycle_stop(wcycle, ewcPME_FFT);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
                 }
 
                 /* solve in k-space for our local cells */
                 if (thread == 0)
                 {
-                    wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+                    wallcycle_start(
+                            wcycle,
+                            (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme));
                 }
                 if (grid_index < DO_Q)
                 {
-                    loop_count = solve_pme_yzx(
-                            pme, cfftgrid, scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
-                            computeEnergyAndVirial, pme->nthread, thread);
+                    loop_count = solve_pme_yzx(pme,
+                                               cfftgrid,
+                                               scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
+                                               computeEnergyAndVirial,
+                                               pme->nthread,
+                                               thread);
                 }
                 else
                 {
                     loop_count =
-                            solve_pme_lj_yzx(pme, &cfftgrid, FALSE,
+                            solve_pme_lj_yzx(pme,
+                                             &cfftgrid,
+                                             FALSE,
                                              scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
-                                             computeEnergyAndVirial, pme->nthread, thread);
+                                             computeEnergyAndVirial,
+                                             pme->nthread,
+                                             thread);
                 }
 
                 if (thread == 0)
                 {
-                    wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+                    wallcycle_stop(
+                            wcycle,
+                            (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme));
                     inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
                 }
 
                 /* do 3d-invfft */
                 if (thread == 0)
                 {
-                    wallcycle_start(wcycle, ewcPME_FFT);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                 }
                 gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle);
                 if (thread == 0)
                 {
-                    wallcycle_stop(wcycle, ewcPME_FFT);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
 
 
                     if (pme->nodeid == 0)
                     {
-                        real ntot = pme->nkx * pme->nky * pme->nkz;
-                        npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
+                        real      ntot = pme->nkx * pme->nky * pme->nkz;
+                        const int npme = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                         inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                     }
 
                     /* Note: this wallcycle region is closed below
                        outside an OpenMP region, so take care if
                        refactoring code here. */
-                    wallcycle_start(wcycle, ewcPME_GATHER);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeGather);
                 }
 
                 copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
@@ -1293,25 +1298,28 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              * atc->f is the actual force array, not a buffer,
              * therefore we should not clear it.
              */
-            lambda  = grid_index < DO_Q ? lambda_q : lambda_lj;
-            bClearF = (bFirst && PAR(cr));
+            real lambda = grid_index < DO_Q ? lambda_q : lambda_lj;
+            bClearF     = (bFirst && PAR(cr));
 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
             for (thread = 0; thread < pme->nthread; thread++)
             {
                 try
                 {
-                    gather_f_bsplines(pme, grid, bClearF, &atc, &atc.spline[thread],
+                    gather_f_bsplines(pme,
+                                      grid,
+                                      bClearF,
+                                      &atc,
+                                      &atc.spline[thread],
                                       pme->bFEP ? (grid_index % 2 == 0 ? 1.0 - lambda : lambda) : 1.0);
                 }
                 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
             }
 
 
-            inc_nrnb(nrnb, eNR_GATHERFBSP,
-                     pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
+            inc_nrnb(nrnb, eNR_GATHERFBSP, pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
             /* Note: this wallcycle region is opened above inside an OpenMP
                region, so take care if refactoring code here. */
-            wallcycle_stop(wcycle, ewcPME_GATHER);
+            wallcycle_stop(wcycle, WallCycleCounter::PmeGather);
         }
 
         if (computeEnergyAndVirial)
@@ -1334,13 +1342,16 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     /* For Lorentz-Berthelot combination rules in LJ-PME, we need to calculate
      * seven terms. */
 
-    if (pme->doLJ && pme->ljpme_combination_rule == eljpmeLB)
+    if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB)
     {
         /* Loop over A- and B-state if we are doing FEP */
-        for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
+        for (int fep_state = 0; fep_state < fep_states_lj; ++fep_state)
         {
-            real *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr;
-            gmx::ArrayRef<real> coefficientBuffer;
+            std::vector<real>         local_c6;
+            std::vector<real>         local_sigma;
+            gmx::ArrayRef<const real> RedistC6;
+            gmx::ArrayRef<const real> RedistSigma;
+            gmx::ArrayRef<real>       coefficientBuffer;
             if (pme->nnodes == 1)
             {
                 pme->lb_buf1.resize(atc.numAtoms());
@@ -1348,12 +1359,12 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                 switch (fep_state)
                 {
                     case 0:
-                        local_c6    = c6A;
-                        local_sigma = sigmaA;
+                        local_c6.assign(c6A.begin(), c6A.end());
+                        local_sigma.assign(sigmaA.begin(), sigmaA.end());
                         break;
                     case 1:
-                        local_c6    = c6B;
-                        local_sigma = sigmaB;
+                        local_c6.assign(c6B.begin(), c6B.end());
+                        local_sigma.assign(sigmaB.begin(), sigmaB.end());
                         break;
                     default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
                 }
@@ -1373,31 +1384,31 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                         break;
                     default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
                 }
-                wallcycle_start(wcycle, ewcPME_REDISTXF);
+                wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
 
                 do_redist_pos_coeffs(pme, cr, bFirst, coordinates, RedistC6);
                 pme->lb_buf1.resize(atc.numAtoms());
                 pme->lb_buf2.resize(atc.numAtoms());
-                local_c6 = pme->lb_buf1.data();
+                local_c6.assign(pme->lb_buf1.begin(), pme->lb_buf1.end());
                 for (int i = 0; i < atc.numAtoms(); ++i)
                 {
                     local_c6[i] = atc.coefficient[i];
                 }
 
                 do_redist_pos_coeffs(pme, cr, FALSE, coordinates, RedistSigma);
-                local_sigma = pme->lb_buf2.data();
+                local_sigma.assign(pme->lb_buf2.begin(), pme->lb_buf2.end());
                 for (int i = 0; i < atc.numAtoms(); ++i)
                 {
                     local_sigma[i] = atc.coefficient[i];
                 }
 
-                wallcycle_stop(wcycle, ewcPME_REDISTXF);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
             }
             atc.coefficient = coefficientBuffer;
             calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
 
             /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
-            for (grid_index = 2; grid_index < 9; ++grid_index)
+            for (int grid_index = 2; grid_index < 9; ++grid_index)
             {
                 /* Unpack structure */
                 pmegrid    = &pme->pmegrid[grid_index];
@@ -1406,7 +1417,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                 calc_next_lb_coeffs(coefficientBuffer, local_sigma);
                 grid = pmegrid->grid.grid;
 
-                wallcycle_start(wcycle, ewcPME_SPREAD);
+                wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
                 /* Spread the c6 on a grid */
                 spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
 
@@ -1415,7 +1426,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     inc_nrnb(nrnb, eNR_WEIGHTS, DIM * atc.numAtoms());
                 }
 
-                inc_nrnb(nrnb, eNR_SPREADBSP,
+                inc_nrnb(nrnb,
+                         eNR_SPREADBSP,
                          pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
                 if (pme->nthread == 1)
                 {
@@ -1427,7 +1439,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     }
                     copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
                 }
-                wallcycle_stop(wcycle, ewcPME_SPREAD);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeSpread);
 
                 /*Here we start a large thread parallel region*/
 #pragma omp parallel num_threads(pme->nthread) private(thread)
@@ -1438,13 +1450,13 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                         /* do 3d-fft */
                         if (thread == 0)
                         {
-                            wallcycle_start(wcycle, ewcPME_FFT);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                         }
 
                         gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle);
                         if (thread == 0)
                         {
-                            wallcycle_stop(wcycle, ewcPME_FFT);
+                            wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
                         }
                     }
                     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -1460,16 +1472,20 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     thread = gmx_omp_get_thread_num();
                     if (thread == 0)
                     {
-                        wallcycle_start(wcycle, ewcLJPME);
+                        wallcycle_start(wcycle, WallCycleCounter::LJPme);
                     }
 
                     loop_count =
-                            solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE,
+                            solve_pme_lj_yzx(pme,
+                                             &pme->cfftgrid[2],
+                                             TRUE,
                                              scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ],
-                                             computeEnergyAndVirial, pme->nthread, thread);
+                                             computeEnergyAndVirial,
+                                             pme->nthread,
+                                             thread);
                     if (thread == 0)
                     {
-                        wallcycle_stop(wcycle, ewcLJPME);
+                        wallcycle_stop(wcycle, WallCycleCounter::LJPme);
                         inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
                     }
                 }
@@ -1486,7 +1502,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
 
             bFirst = !pme->doCoulomb;
             calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
-            for (grid_index = 8; grid_index >= 2; --grid_index)
+            for (int grid_index = 8; grid_index >= 2; --grid_index)
             {
                 /* Unpack structure */
                 pmegrid    = &pme->pmegrid[grid_index];
@@ -1502,22 +1518,22 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                         /* do 3d-invfft */
                         if (thread == 0)
                         {
-                            wallcycle_start(wcycle, ewcPME_FFT);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                         }
 
                         gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle);
                         if (thread == 0)
                         {
-                            wallcycle_stop(wcycle, ewcPME_FFT);
+                            wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
 
 
                             if (pme->nodeid == 0)
                             {
-                                real ntot = pme->nkx * pme->nky * pme->nkz;
-                                npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
+                                real      ntot = pme->nkx * pme->nky * pme->nkz;
+                                const int npme = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                                 inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                             }
-                            wallcycle_start(wcycle, ewcPME_GATHER);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeGather);
                         }
 
                         copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
@@ -1536,8 +1552,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                 if (stepWork.computeForces)
                 {
                     /* interpolate forces for our local atoms */
-                    bClearF = (bFirst && PAR(cr));
-                    scale   = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0;
+                    bClearF    = (bFirst && PAR(cr));
+                    real scale = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0;
                     scale *= lb_scale_factor[grid_index - 2];
 
 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
@@ -1545,27 +1561,28 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     {
                         try
                         {
-                            gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
-                                              &pme->atc[0].spline[thread], scale);
+                            gather_f_bsplines(
+                                    pme, grid, bClearF, &pme->atc[0], &pme->atc[0].spline[thread], scale);
                         }
                         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
                     }
 
 
-                    inc_nrnb(nrnb, eNR_GATHERFBSP,
+                    inc_nrnb(nrnb,
+                             eNR_GATHERFBSP,
                              pme->pme_order * pme->pme_order * pme->pme_order * pme->atc[0].numAtoms());
                 }
-                wallcycle_stop(wcycle, ewcPME_GATHER);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeGather);
 
                 bFirst = FALSE;
             } /* for (grid_index = 8; grid_index >= 2; --grid_index) */
         }     /* for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) */
-    }         /* if (pme->doLJ && pme->ljpme_combination_rule == eljpmeLB) */
+    }         /* if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB) */
 
     if (stepWork.computeForces && pme->nnodes > 1)
     {
-        wallcycle_start(wcycle, ewcPME_REDISTXF);
-        for (d = 0; d < pme->ndecompdim; d++)
+        wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
+        for (int d = 0; d < pme->ndecompdim; d++)
         {
             gmx::ArrayRef<gmx::RVec> forcesRef;
             if (d == pme->ndecompdim - 1)
@@ -1578,13 +1595,13 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
             {
                 forcesRef = pme->atc[d + 1].f;
             }
-            if (DOMAINDECOMP(cr))
+            if (haveDDAtomOrdering(*cr))
             {
                 dd_pmeredist_f(pme, &pme->atc[d], forcesRef, d == pme->ndecompdim - 1 && pme->bPPnode);
             }
         }
 
-        wallcycle_stop(wcycle, ewcPME_REDISTXF);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
     }
 
     if (computeEnergyAndVirial)
@@ -1609,10 +1626,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     }
                 }
             }
-            if (debug)
-            {
-                fprintf(debug, "Electrostatic PME mesh energy: %g\n", *energy_q);
-            }
         }
         else
         {
@@ -1640,10 +1653,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     }
                 }
             }
-            if (debug)
-            {
-                fprintf(debug, "Lennard-Jones PME mesh energy: %g\n", *energy_lj);
-            }
         }
         else
         {
@@ -1660,8 +1669,6 @@ void gmx_pme_destroy(gmx_pme_t* pme)
         return;
     }
 
-    delete pme->boxScaler;
-
     sfree(pme->nnx);
     sfree(pme->nny);
     sfree(pme->nnz);
@@ -1697,9 +1704,6 @@ void gmx_pme_destroy(gmx_pme_t* pme)
         pme_free_all_work(&pme->solve_work, pme->nthread);
     }
 
-    sfree(pme->sum_qgrid_tmp);
-    sfree(pme->sum_qgrid_dd_tmp);
-
     destroy_pme_spline_work(pme->spline_work);
 
     if (pme->gpu != nullptr)
@@ -1710,13 +1714,16 @@ void gmx_pme_destroy(gmx_pme_t* pme)
     delete pme;
 }
 
-void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* chargesA, const real* chargesB)
+void gmx_pme_reinit_atoms(gmx_pme_t*                pme,
+                          const int                 numAtoms,
+                          gmx::ArrayRef<const real> chargesA,
+                          gmx::ArrayRef<const real> chargesB)
 {
     if (pme->gpu != nullptr)
     {
-        GMX_ASSERT(!(pme->bFEP_q && chargesB == nullptr),
+        GMX_ASSERT(!(pme->bFEP_q && chargesB.empty()),
                    "B state charges must be specified if running Coulomb FEP on the GPU");
-        pme_gpu_reinit_atoms(pme->gpu, numAtoms, chargesA, pme->bFEP_q ? chargesB : nullptr);
+        pme_gpu_reinit_atoms(pme->gpu, numAtoms, chargesA.data(), pme->bFEP_q ? chargesB.data() : nullptr);
     }
     else
     {
@@ -1729,3 +1736,23 @@ bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size)
 {
     return (pme.nkx == grid_size[XX] && pme.nky == grid_size[YY] && pme.nkz == grid_size[ZZ]);
 }
+
+void gmx::SeparatePmeRanksPermitted::disablePmeRanks(const std::string& reason)
+{
+    permitSeparatePmeRanks_ = false;
+
+    if (!reason.empty())
+    {
+        reasons_.push_back(reason);
+    }
+}
+
+bool gmx::SeparatePmeRanksPermitted::permitSeparatePmeRanks() const
+{
+    return permitSeparatePmeRanks_;
+}
+
+std::string gmx::SeparatePmeRanksPermitted::reasonsWhyDisabled() const
+{
+    return joinStrings(reasons_, "; ");
+}