Merge release-2021 into master
[alexxy/gromacs.git] / src / gromacs / ewald / pme.cpp
index fe488dc50844bde64ba5623693ce8de0946a610e..e2c87459a0589403888d57fdfead803bbabb869f 100644 (file)
 #include "pme_spline_work.h"
 #include "pme_spread.h"
 
+//NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+bool g_allowPmeWithSyclForTesting = false;
+
 bool pme_gpu_supports_build(std::string* error)
 {
     gmx::MessageStringCollector errorReasons;
@@ -134,7 +137,7 @@ bool pme_gpu_supports_build(std::string* error)
     errorReasons.startContext("PME GPU does not support:");
     errorReasons.appendIf(GMX_DOUBLE, "Double-precision build of GROMACS.");
     errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS.");
-    errorReasons.appendIf(GMX_GPU_SYCL, "SYCL build."); // SYCL-TODO
+    errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build."); // SYCL-TODO
     errorReasons.finishContext();
     if (error != nullptr)
     {
@@ -177,6 +180,20 @@ bool pme_gpu_supports_input(const t_inputrec& ir, std::string* error)
     return errorReasons.isEmpty();
 }
 
+bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error)
+{
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("PME GPU in Mixed mode does not support:");
+    errorReasons.appendIf(ir.efep != FreeEnergyPerturbationType::No, "Free Energy Perturbation.");
+    errorReasons.finishContext();
+    if (error != nullptr)
+    {
+        *error = errorReasons.toString();
+    }
+    return errorReasons.isEmpty();
+}
+
 /*! \brief \libinternal
  * Finds out if PME with given inputs is possible to run on GPU.
  * This function is an internal final check, validating the whole PME structure on creation,
@@ -196,7 +213,7 @@ static bool pme_gpu_check_restrictions(const gmx_pme_t* pme, std::string* error)
     errorReasons.appendIf(pme->doLJ, "Lennard-Jones PME.");
     errorReasons.appendIf(GMX_DOUBLE, "Double precision build of GROMACS.");
     errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS.");
-    errorReasons.appendIf(GMX_GPU_SYCL, "SYCL build of GROMACS."); // SYCL-TODO
+    errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build of GROMACS."); // SYCL-TODO
     errorReasons.finishContext();
     if (error != nullptr)
     {
@@ -550,9 +567,6 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
 
     gmx::unique_cptr<gmx_pme_t, gmx_pme_destroy> pme(new gmx_pme_t());
 
-    pme->sum_qgrid_tmp    = nullptr;
-    pme->sum_qgrid_dd_tmp = nullptr;
-
     pme->buf_nalloc = 0;
 
     pme->nnodes  = 1;
@@ -686,8 +700,8 @@ gmx_pme_t* gmx_pme_init(const t_commrec*     cr,
 
     // The box requires scaling with nwalls = 2, we store that condition as well
     // as the scaling factor
-    delete pme->boxScaler;
-    pme->boxScaler = new EwaldBoxZScaler(inputrecPbcXY2Walls(ir), ir->wall_ewald_zfac);
+    pme->boxScaler = std::make_unique<EwaldBoxZScaler>(
+            EwaldBoxZScaler(inputrecPbcXY2Walls(ir), ir->wall_ewald_zfac));
 
     /* If we violate restrictions, generate a fatal error here */
     gmx_pme_check_restrictions(
@@ -1037,21 +1051,16 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     GMX_ASSERT(pme->runMode == PmeRunMode::CPU,
                "gmx_pme_do should not be called on the GPU PME run.");
 
-    int                       d, npme, grid_index, max_grid_index;
     PmeAtomComm&              atc     = pme->atc[0];
     pmegrids_t*               pmegrid = nullptr;
     real*                     grid    = nullptr;
     gmx::ArrayRef<const real> coefficient;
-    PmeOutput                 output[2]; // The second is used for the B state with FEP
-    real                      scale, lambda;
-    gmx_bool                  bClearF;
+    std::array<PmeOutput, 2>  output; // The second is used for the B state with FEP
     gmx_parallel_3dfft_t      pfft_setup;
     real*                     fftgrid;
     t_complex*                cfftgrid;
     int                       thread;
-    gmx_bool                  bFirst, bDoSplines;
-    int                       fep_state;
-    int                       fep_states_lj = pme->bFEP_lj ? 2 : 1;
+    const int                 fep_states_lj = pme->bFEP_lj ? 2 : 1;
     // There's no support for computing energy without virial, or vice versa
     const bool computeEnergyAndVirial = (stepWork.computeEnergy || stepWork.computeVirial);
 
@@ -1091,7 +1100,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     pme->boxScaler->scaleBox(box, scaledBox);
 
     gmx::invertBoxMatrix(scaledBox, pme->recipbox);
-    bFirst = TRUE;
+    bool bFirst = true;
 
     /* For simplicity, we construct the splines for all particles if
      * more than one PME calculations is needed. Some optimization
@@ -1100,7 +1109,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      * that don't yet have them.
      */
 
-    bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ);
+    bool bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ);
 
     /* We need a maximum of four separate PME calculations:
      * grid_index=0: Coulomb PME with charges from state A
@@ -1112,9 +1121,9 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
      */
 
     /* If we are doing LJ-PME with LB, we only do Q here */
-    max_grid_index = (pme->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q : DO_Q_AND_LJ;
-
-    for (grid_index = 0; grid_index < max_grid_index; ++grid_index)
+    const int max_grid_index = (pme->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q : DO_Q_AND_LJ;
+    bool      bClearF;
+    for (int grid_index = 0; grid_index < max_grid_index; ++grid_index)
     {
         /* Check if we should do calculations at this grid_index
          * If grid_index is odd we should be doing FEP
@@ -1141,16 +1150,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
 
         grid = pmegrid->grid.grid;
 
-        if (debug)
-        {
-            fprintf(debug, "PME: number of ranks = %d, rank = %d\n", cr->nnodes, cr->nodeid);
-            fprintf(debug, "Grid = %p\n", static_cast<void*>(grid));
-            if (grid == nullptr)
-            {
-                gmx_fatal(FARGS, "No grid!");
-            }
-        }
-
         if (pme->nnodes == 1)
         {
             atc.coefficient = coefficient;
@@ -1163,11 +1162,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
             wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
         }
 
-        if (debug)
-        {
-            fprintf(debug, "Rank= %6d, pme local particles=%6d\n", cr->nodeid, atc.numAtoms());
-        }
-
         wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
 
         /* Spread the coefficients on a grid */
@@ -1268,8 +1262,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
 
                     if (pme->nodeid == 0)
                     {
-                        real ntot = pme->nkx * pme->nky * pme->nkz;
-                        npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
+                        real      ntot = pme->nkx * pme->nky * pme->nkz;
+                        const int npme = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                         inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                     }
 
@@ -1304,8 +1298,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              * atc->f is the actual force array, not a buffer,
              * therefore we should not clear it.
              */
-            lambda  = grid_index < DO_Q ? lambda_q : lambda_lj;
-            bClearF = (bFirst && PAR(cr));
+            real lambda = grid_index < DO_Q ? lambda_q : lambda_lj;
+            bClearF     = (bFirst && PAR(cr));
 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
             for (thread = 0; thread < pme->nthread; thread++)
             {
@@ -1351,7 +1345,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB)
     {
         /* Loop over A- and B-state if we are doing FEP */
-        for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
+        for (int fep_state = 0; fep_state < fep_states_lj; ++fep_state)
         {
             std::vector<real>         local_c6;
             std::vector<real>         local_sigma;
@@ -1414,7 +1408,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
             calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
 
             /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
-            for (grid_index = 2; grid_index < 9; ++grid_index)
+            for (int grid_index = 2; grid_index < 9; ++grid_index)
             {
                 /* Unpack structure */
                 pmegrid    = &pme->pmegrid[grid_index];
@@ -1508,7 +1502,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
 
             bFirst = !pme->doCoulomb;
             calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
-            for (grid_index = 8; grid_index >= 2; --grid_index)
+            for (int grid_index = 8; grid_index >= 2; --grid_index)
             {
                 /* Unpack structure */
                 pmegrid    = &pme->pmegrid[grid_index];
@@ -1535,8 +1529,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
 
                             if (pme->nodeid == 0)
                             {
-                                real ntot = pme->nkx * pme->nky * pme->nkz;
-                                npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
+                                real      ntot = pme->nkx * pme->nky * pme->nkz;
+                                const int npme = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                                 inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                             }
                             wallcycle_start(wcycle, WallCycleCounter::PmeGather);
@@ -1558,8 +1552,8 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                 if (stepWork.computeForces)
                 {
                     /* interpolate forces for our local atoms */
-                    bClearF = (bFirst && PAR(cr));
-                    scale   = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0;
+                    bClearF    = (bFirst && PAR(cr));
+                    real scale = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0;
                     scale *= lb_scale_factor[grid_index - 2];
 
 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
@@ -1588,7 +1582,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
     if (stepWork.computeForces && pme->nnodes > 1)
     {
         wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
-        for (d = 0; d < pme->ndecompdim; d++)
+        for (int d = 0; d < pme->ndecompdim; d++)
         {
             gmx::ArrayRef<gmx::RVec> forcesRef;
             if (d == pme->ndecompdim - 1)
@@ -1601,7 +1595,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
             {
                 forcesRef = pme->atc[d + 1].f;
             }
-            if (DOMAINDECOMP(cr))
+            if (haveDDAtomOrdering(*cr))
             {
                 dd_pmeredist_f(pme, &pme->atc[d], forcesRef, d == pme->ndecompdim - 1 && pme->bPPnode);
             }
@@ -1632,10 +1626,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     }
                 }
             }
-            if (debug)
-            {
-                fprintf(debug, "Electrostatic PME mesh energy: %g\n", *energy_q);
-            }
         }
         else
         {
@@ -1663,10 +1653,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                     }
                 }
             }
-            if (debug)
-            {
-                fprintf(debug, "Lennard-Jones PME mesh energy: %g\n", *energy_lj);
-            }
         }
         else
         {
@@ -1683,8 +1669,6 @@ void gmx_pme_destroy(gmx_pme_t* pme)
         return;
     }
 
-    delete pme->boxScaler;
-
     sfree(pme->nnx);
     sfree(pme->nny);
     sfree(pme->nnz);
@@ -1720,9 +1704,6 @@ void gmx_pme_destroy(gmx_pme_t* pme)
         pme_free_all_work(&pme->solve_work, pme->nthread);
     }
 
-    sfree(pme->sum_qgrid_tmp);
-    sfree(pme->sum_qgrid_dd_tmp);
-
     destroy_pme_spline_work(pme->spline_work);
 
     if (pme->gpu != nullptr)