Set up workload data structures
authorSzilárd Páll <pall.szilard@gmail.com>
Fri, 13 Sep 2019 00:02:45 +0000 (02:02 +0200)
committerSzilárd Páll <pall.szilard@gmail.com>
Wed, 18 Sep 2019 15:17:40 +0000 (17:17 +0200)
This change estabilishes three data structures for describing the
computational workload within a run. Flags related to the workload are
categorized in three groups based on their lifetime:
- per-step flags in the StepWorkload class
- domain liftime / nslist steps in DomainLifetimeWorkload
- constant over the entire simulation in SimulationWorkload

The present change only introduces naming by: renaming of the
former ForceFlags to StepWorkload and PpForceWorkload to
DomainLifetimeWorkload.

Moving flags from e.g. the force schedule into these will follow.

Change-Id: I2bcc911091e2dd8ca0bcbf53a40dfcda09ba368b

36 files changed:
src/gromacs/ewald/pme_gpu.cpp
src/gromacs/listed_forces/gpubonded.h
src/gromacs/listed_forces/gpubonded_impl.cpp
src/gromacs/listed_forces/gpubondedkernels.cu
src/gromacs/listed_forces/listed_forces.cpp
src/gromacs/listed_forces/listed_forces.h
src/gromacs/listed_forces/pairs.cpp
src/gromacs/listed_forces/pairs.h
src/gromacs/mdlib/force.cpp
src/gromacs/mdlib/force.h
src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdrun/isimulator.h
src/gromacs/mdrun/md.cpp
src/gromacs/mdrun/mimic.cpp
src/gromacs/mdrun/minimize.cpp
src/gromacs/mdrun/rerun.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdrun/shellfc.cpp
src/gromacs/mdrun/shellfc.h
src/gromacs/mdrun/simulatorbuilder.h
src/gromacs/mdrun/tpi.cpp
src/gromacs/mdtypes/simulation_workload.h [moved from src/gromacs/mdlib/ppforceworkload.h with 60% similarity]
src/gromacs/modularsimulator/forceelement.cpp
src/gromacs/modularsimulator/forceelement.h
src/gromacs/modularsimulator/modularsimulator.cpp
src/gromacs/modularsimulator/shellfcelement.cpp
src/gromacs/modularsimulator/shellfcelement.h
src/gromacs/nbnxm/benchmark/bench_setup.cpp
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
src/gromacs/nbnxm/gpu_common.h
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp
src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_gpu.h
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index ed7cd19f2e89447f3ff1d7fac728ce5fd1e7356f..e61121d49880f126ced5ab55267369458d4f4f41 100644 (file)
@@ -51,7 +51,6 @@
 #include "gromacs/fft/parallel_3dfft.h"
 #include "gromacs/math/invertmatrix.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/enerdata.h"
 #include "gromacs/mdtypes/forceoutput.h"
 #include "gromacs/mdtypes/inputrec.h"
index b7a13fcae28d3f42e10a55e3c2d5a990244cf28f..8fe7b490214c2947365c499387e14364e9af6f01 100644 (file)
@@ -65,7 +65,7 @@ struct gmx_wallcycle;
 namespace gmx
 {
 
-class ForceFlags;
+class StepWorkload;
 
 /*! \brief The number on bonded function types supported on GPUs */
 static constexpr int numFTypesOnGpu = 8;
@@ -137,9 +137,9 @@ class GpuBonded
          * assigned to the GPU */
         bool haveInteractions() const;
         /*! \brief Launches bonded kernel on a GPU */
-        void launchKernel(const t_forcerec      *fr,
-                          const gmx::ForceFlags &forceFlags,
-                          const matrix           box);
+        void launchKernel(const t_forcerec        *fr,
+                          const gmx::StepWorkload &stepWork,
+                          const matrix             box);
         /*! \brief Launches the transfer of computed bonded energies. */
         void launchEnergyTransfer();
         /*! \brief Waits on the energy transfer, and accumulates bonded energies to \c enerd. */
index 3de01e305524cc426ceed8964d4f73cef37c4c43..13a7d372833d210351c73c587d1378254b40d9e1 100644 (file)
@@ -190,7 +190,7 @@ GpuBonded::haveInteractions() const
 
 void
 GpuBonded::launchKernel(const t_forcerec           * /* fr */,
-                        const gmx::ForceFlags      & /* forceFlags */,
+                        const gmx::StepWorkload    & /* stepWork */,
                         const matrix   /* box */)
 {
 }
index 709d99cc9afa892536ba32c8ab78ae40464e413e..e78b32c6fd66cf6a778a0ff271fd5624f6a2ff7a 100644 (file)
@@ -57,8 +57,8 @@
 #include "gromacs/listed_forces/gpubonded.h"
 #include "gromacs/math/units.h"
 #include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
 #include "gromacs/utility/gmxassert.h"
@@ -862,17 +862,17 @@ GpuBonded::Impl::launchKernel(const t_forcerec *fr,
 }
 
 void
-GpuBonded::launchKernel(const t_forcerec      *fr,
-                        const gmx::ForceFlags &forceFlags,
-                        const matrix           box)
+GpuBonded::launchKernel(const t_forcerec        *fr,
+                        const gmx::StepWorkload &stepWork,
+                        const matrix             box)
 {
-    if (forceFlags.computeEnergy)
+    if (stepWork.computeEnergy)
     {
         // When we need the energy, we also need the virial
         impl_->launchKernel<true, true>
             (fr, box);
     }
-    else if (forceFlags.computeVirial)
+    else if (stepWork.computeVirial)
     {
         impl_->launchKernel<true, false>
             (fr, box);
index 9e6e7001b746264ba510c552aed3c7d2f16ff676..9a469f8f318f2434f1aed85a60d5154b39b0f00e 100644 (file)
 #include "gromacs/math/vec.h"
 #include "gromacs/mdlib/enerdata_utils.h"
 #include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/commrec.h"
 #include "gromacs/mdtypes/fcdata.h"
 #include "gromacs/mdtypes/forcerec.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/timing/wallcycle.h"
@@ -199,7 +199,7 @@ void
 reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
                      real *ener, gmx_grppairener_t *grpp, real *dvdl,
                      const bonded_threading_t *bt,
-                     const gmx::ForceFlags &forceFlags)
+                     const gmx::StepWorkload  &stepWork)
 {
     assert(bt->haveBondeds);
 
@@ -212,12 +212,12 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
     rvec * gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
 
     /* When necessary, reduce energy and virial using one thread only */
-    if ((forceFlags.computeEnergy || forceFlags.computeVirial || forceFlags.computeDhdl) &&
+    if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) &&
         bt->nthreads > 1)
     {
         gmx::ArrayRef < const std::unique_ptr < f_thread_t>> f_t = bt->f_t;
 
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             for (int i = 0; i < SHIFTS; i++)
             {
@@ -227,7 +227,7 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
                 }
             }
         }
-        if (forceFlags.computeEnergy)
+        if (stepWork.computeEnergy)
         {
             for (int i = 0; i < F_NRE; i++)
             {
@@ -247,7 +247,7 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
                 }
             }
         }
-        if (forceFlags.computeDhdl)
+        if (stepWork.computeDhdl)
         {
             for (int i = 0; i < efptNR; i++)
             {
@@ -268,14 +268,14 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
  * Note that currently we do not have bonded kernels that
  * do not compute forces.
  */
-BondedKernelFlavor selectBondedKernelFlavor(const gmx::ForceFlags &forceFlags,
-                                            const bool             useSimdKernels,
-                                            const bool             havePerturbedInteractions)
+BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload &stepWork,
+                                            const bool               useSimdKernels,
+                                            const bool               havePerturbedInteractions)
 {
     BondedKernelFlavor flavor;
-    if (forceFlags.computeEnergy || forceFlags.computeVirial)
+    if (stepWork.computeEnergy || stepWork.computeVirial)
     {
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
         }
@@ -312,7 +312,7 @@ calc_one_bond(int thread,
               t_nrnb *nrnb,
               const real *lambda, real *dvdl,
               const t_mdatoms *md, t_fcdata *fcd,
-              const gmx::ForceFlags &forceFlags,
+              const gmx::StepWorkload &stepWork,
               int *global_atom_index)
 {
     GMX_ASSERT(idef->ilsort == ilsortNO_FE || idef->ilsort == ilsortFE_SORTED,
@@ -322,7 +322,7 @@ calc_one_bond(int thread,
         (idef->ilsort == ilsortFE_SORTED &&
          idef->il[ftype].nr_nonperturbed < idef->il[ftype].nr);
     BondedKernelFlavor flavor =
-        selectBondedKernelFlavor(forceFlags, fr->use_simd_kernels, havePerturbedInteractions);
+        selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
     int                efptFTYPE;
     if (IS_RESTRAINT_TYPE(ftype))
     {
@@ -375,7 +375,7 @@ calc_one_bond(int thread,
            extended to support calling from multiple threads. */
         do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
                  pbc, g, lambda, dvdl, md, fr,
-                 havePerturbedInteractions, forceFlags,
+                 havePerturbedInteractions, stepWork,
                  grpp, global_atom_index);
     }
 
@@ -392,20 +392,20 @@ calc_one_bond(int thread,
 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
  */
 static void
-calcBondedForces(const t_idef          *idef,
-                 const rvec             x[],
-                 const t_forcerec      *fr,
-                 const t_pbc           *pbc_null,
-                 const t_graph         *g,
-                 rvec                  *fshiftMasterBuffer,
-                 gmx_enerdata_t        *enerd,
-                 t_nrnb                *nrnb,
-                 const real            *lambda,
-                 real                  *dvdl,
-                 const t_mdatoms       *md,
-                 t_fcdata              *fcd,
-                 const gmx::ForceFlags &forceFlags,
-                 int                   *global_atom_index)
+calcBondedForces(const t_idef            *idef,
+                 const rvec               x[],
+                 const t_forcerec        *fr,
+                 const t_pbc             *pbc_null,
+                 const t_graph           *g,
+                 rvec                    *fshiftMasterBuffer,
+                 gmx_enerdata_t          *enerd,
+                 t_nrnb                  *nrnb,
+                 const real              *lambda,
+                 real                    *dvdl,
+                 const t_mdatoms         *md,
+                 t_fcdata                *fcd,
+                 const gmx::StepWorkload &stepWork,
+                 int                     *global_atom_index)
 {
     bonded_threading_t *bt = fr->bondedThreading;
 
@@ -452,7 +452,7 @@ calcBondedForces(const t_idef          *idef,
                                       fr->bondedThreading->workDivision, x,
                                       ft, fshift, fr, pbc_null, g, grpp,
                                       nrnb, lambda, dvdlt,
-                                      md, fcd, forceFlags,
+                                      md, fcd, stepWork,
                                       global_atom_index);
                     epot[ftype] += v;
                 }
@@ -498,7 +498,7 @@ void calc_listed(const t_commrec             *cr,
                  const real *lambda,
                  const t_mdatoms *md,
                  t_fcdata *fcd, int *global_atom_index,
-                 const gmx::ForceFlags &forceFlags)
+                 const gmx::StepWorkload &stepWork)
 {
     const  t_pbc              *pbc_null;
     bonded_threading_t        *bt  = fr->bondedThreading;
@@ -572,16 +572,16 @@ void calc_listed(const t_commrec             *cr,
         calcBondedForces(idef, x, fr, pbc_null, g,
                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
                          enerd, nrnb, lambda, dvdl, md,
-                         fcd, forceFlags, global_atom_index);
+                         fcd, stepWork, global_atom_index);
         wallcycle_sub_stop(wcycle, ewcsLISTED);
 
         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
         reduce_thread_output(fr->natoms_force, &forceWithShiftForces,
                              enerd->term, &enerd->grpp, dvdl,
                              bt,
-                             forceFlags);
+                             stepWork);
 
-        if (forceFlags.computeDhdl)
+        if (stepWork.computeDhdl)
         {
             for (int i = 0; i < efptNR; i++)
             {
@@ -650,7 +650,7 @@ void calc_listed_lambda(const t_idef *idef,
 
             if (ilist_fe.nr > 0)
             {
-                gmx::ForceFlags tempFlags;
+                gmx::StepWorkload tempFlags;
                 tempFlags.computeEnergy  = true;
                 v = calc_one_bond(0, ftype, &idef_fe, workDivision,
                                   x, f, fshift, fr, pbc_null, g,
@@ -667,29 +667,29 @@ void calc_listed_lambda(const t_idef *idef,
 }
 
 void
-do_force_listed(struct gmx_wallcycle        *wcycle,
-                const matrix                 box,
-                const t_lambda              *fepvals,
-                const t_commrec             *cr,
-                const gmx_multisim_t        *ms,
-                const t_idef                *idef,
-                const rvec                   x[],
-                history_t                   *hist,
-                gmx::ForceOutputs           *forceOutputs,
-                const t_forcerec            *fr,
-                const struct t_pbc          *pbc,
-                const struct t_graph        *graph,
-                gmx_enerdata_t              *enerd,
-                t_nrnb                      *nrnb,
-                const real                  *lambda,
-                const t_mdatoms             *md,
-                t_fcdata                    *fcd,
-                int                         *global_atom_index,
-                const gmx::ForceFlags       &forceFlags)
+do_force_listed(struct gmx_wallcycle          *wcycle,
+                const matrix                   box,
+                const t_lambda                *fepvals,
+                const t_commrec               *cr,
+                const gmx_multisim_t          *ms,
+                const t_idef                  *idef,
+                const rvec                     x[],
+                history_t                     *hist,
+                gmx::ForceOutputs             *forceOutputs,
+                const t_forcerec              *fr,
+                const struct t_pbc            *pbc,
+                const struct t_graph          *graph,
+                gmx_enerdata_t                *enerd,
+                t_nrnb                        *nrnb,
+                const real                    *lambda,
+                const t_mdatoms               *md,
+                t_fcdata                      *fcd,
+                int                           *global_atom_index,
+                const gmx::StepWorkload       &stepWork)
 {
     t_pbc pbc_full; /* Full PBC is needed for position restraints */
 
-    if (!forceFlags.computeListedForces)
+    if (!stepWork.computeListedForces)
     {
         return;
     }
@@ -704,12 +704,12 @@ do_force_listed(struct gmx_wallcycle        *wcycle,
                 forceOutputs,
                 fr, pbc, &pbc_full,
                 graph, enerd, nrnb, lambda, md, fcd,
-                global_atom_index, forceFlags);
+                global_atom_index, stepWork);
 
     /* Check if we have to determine energy differences
      * at foreign lambda's.
      */
-    if (fepvals->n_lambda > 0 && forceFlags.computeDhdl)
+    if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
     {
         posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
 
index 885f654ea635e79ca8be96d0bf99175ad4af9cf9..86e1a4e0bfed1c55430b892085aab41058f0814f 100644 (file)
@@ -87,7 +87,7 @@ class t_state;
 namespace gmx
 {
 class ForceOutputs;
-class ForceFlags;
+class StepWorkload;
 }
 
 //! Type of CPU function to compute a bonded interaction.
@@ -118,7 +118,7 @@ void calc_listed(const t_commrec *cr,
                  gmx_enerdata_t *enerd, t_nrnb *nrnb, const real *lambda,
                  const t_mdatoms *md,
                  struct t_fcdata *fcd, int *ddgatindex,
-                 const gmx::ForceFlags &forceFlags);
+                 const gmx::StepWorkload &stepWork);
 
 /*! \brief As calc_listed(), but only determines the potential energy
  * for the perturbed interactions.
@@ -154,7 +154,7 @@ do_force_listed(struct gmx_wallcycle           *wcycle,
                 const t_mdatoms                *md,
                 struct t_fcdata                *fcd,
                 int                            *global_atom_index,
-                const gmx::ForceFlags          &forceFlags);
+                const gmx::StepWorkload        &stepWork);
 
 /*! \brief Returns true if there are position restraints. */
 bool havePositionRestraints(const t_idef   &idef,
index cc05b482cde09b49540ee90c52b00c5b12b904bc..cede2598d1ebaca96a24dd27a045813142071411 100644 (file)
 #include "gromacs/listed_forces/bonded.h"
 #include "gromacs/math/functions.h"
 #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/group.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/nblist.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/mshift.h"
 #include "gromacs/pbcutil/pbc.h"
@@ -652,14 +652,14 @@ do_pairs(int ftype, int nbonds,
          const t_mdatoms *md,
          const t_forcerec *fr,
          const bool havePerturbedInteractions,
-         const gmx::ForceFlags &forceFlags,
+         const gmx::StepWorkload &stepWork,
          gmx_grppairener_t *grppener,
          int *global_atom_index)
 {
     if (ftype == F_LJ14 &&
         fr->ic->vdwtype != evdwUSER && !EEL_USER(fr->ic->eeltype) &&
         !havePerturbedInteractions &&
-        (!forceFlags.computeVirial && !forceFlags.computeEnergy))
+        (!stepWork.computeVirial && !stepWork.computeEnergy))
     {
         /* We use a fast code-path for plain LJ 1-4 without FEP.
          *
@@ -703,7 +703,7 @@ do_pairs(int ftype, int nbonds,
                                            md, fr->ic->epsfac*fr->fudgeQQ);
         }
     }
-    else if (forceFlags.computeVirial)
+    else if (stepWork.computeVirial)
     {
         do_pairs_general<BondedKernelFlavor::ForcesAndVirialAndEnergy>(
                 ftype, nbonds, iatoms, iparams,
index 0dd7b9baa52dccba8be80d2d151ebd766010dcb9..4bf0b96e745ee432b3f7ffea814ac09a43c05e82 100644 (file)
@@ -57,7 +57,7 @@ struct t_pbc;
 
 namespace gmx
 {
-class ForceFlags;
+class StepWorkload;
 }
 
 /*! \brief Calculate VdW/charge listed pair interactions (usually 1-4
@@ -71,7 +71,7 @@ do_pairs(int ftype, int nbonds, const t_iatom iatoms[], const t_iparams iparams[
          const struct t_pbc *pbc, const struct t_graph *g,
          const real *lambda, real *dvdl, const t_mdatoms *md, const t_forcerec *fr,
          bool havePerturbedPairs,
-         const gmx::ForceFlags &forceFlags,
+         const gmx::StepWorkload &stepWork,
          gmx_grppairener_t *grppener,
          int *global_atom_index);
 
index a9b0dcb915db9fcc6772dd62859b1b171b653684..9c184b7435e5e529d7a9b99549664f3e7c8dfc6f 100644 (file)
@@ -54,7 +54,6 @@
 #include "gromacs/math/vec.h"
 #include "gromacs/math/vecdump.h"
 #include "gromacs/mdlib/forcerec_threading.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdlib/qmmm.h"
 #include "gromacs/mdlib/rf_util.h"
 #include "gromacs/mdlib/wall.h"
@@ -65,6 +64,7 @@
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/mshift.h"
 #include "gromacs/pbcutil/pbc.h"
@@ -116,7 +116,7 @@ do_force_lowlevel(t_forcerec                               *fr,
                   const real                               *lambda,
                   const t_graph                            *graph,
                   const rvec                               *mu_tot,
-                  const gmx::ForceFlags                    &forceFlags,
+                  const gmx::StepWorkload                  &stepWork,
                   const DDBalanceRegionHandler             &ddBalanceRegionHandler)
 {
     // TODO: Replace all uses of x by const coordinates
@@ -172,7 +172,7 @@ do_force_lowlevel(t_forcerec                               *fr,
         t_pbc      pbc;
 
         /* Check whether we need to take into account PBC in listed interactions. */
-        const auto needPbcForListedForces = fr->bMolPBC && forceFlags.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd);
+        const auto needPbcForListedForces = fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd);
         if (needPbcForListedForces)
         {
             /* Since all atoms are in the rectangular or triclinic unit-cell,
@@ -187,7 +187,7 @@ do_force_lowlevel(t_forcerec                               *fr,
                         forceOutputs,
                         fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
                         DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr,
-                        forceFlags);
+                        stepWork);
     }
 
     const bool computePmeOnCpu =
@@ -278,15 +278,15 @@ do_force_lowlevel(t_forcerec                               *fr,
             {
                 /* Do reciprocal PME for Coulomb and/or LJ. */
                 assert(fr->n_tpi >= 0);
-                if (fr->n_tpi == 0 || forceFlags.stateChanged)
+                if (fr->n_tpi == 0 || stepWork.stateChanged)
                 {
                     int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
 
-                    if (forceFlags.computeForces)
+                    if (stepWork.computeForces)
                     {
                         pme_flags |= GMX_PME_CALC_F;
                     }
-                    if (forceFlags.computeVirial)
+                    if (stepWork.computeVirial)
                     {
                         pme_flags |= GMX_PME_CALC_ENER_VIR;
                     }
index 1cd8354bcdef1dab9a7cc9c4c81d088fdf890d9b..cc28a009393405ec0c1fa40e36adc8dfb7e8fcf0 100644 (file)
@@ -68,9 +68,9 @@ namespace gmx
 class Awh;
 class ForceWithVirial;
 class ImdSession;
-class MdScheduleWorkload;
+class MdrunScheduleWorkload;
 class MDLogger;
-class ForceFlags;
+class StepWorkload;
 }
 
 void do_force(FILE                                     *log,
@@ -96,7 +96,7 @@ void do_force(FILE                                     *log,
               gmx::ArrayRef<real>                       lambda,
               t_graph                                  *graph,
               t_forcerec                               *fr,
-              gmx::MdScheduleWorkload                  *mdScheduleWork,
+              gmx::MdrunScheduleWorkload               *runScheduleWork,
               const gmx_vsite_t                        *vsite,
               rvec                                      mu_tot,
               double                                    t,
@@ -132,7 +132,7 @@ do_force_lowlevel(t_forcerec                               *fr,
                   const real                               *lambda,
                   const t_graph                            *graph,
                   const rvec                               *mu_tot,
-                  const gmx::ForceFlags                    &forceFlags,
+                  const gmx::StepWorkload                  &stepWork,
                   const DDBalanceRegionHandler             &ddBalanceRegionHandler);
 /* Call all the force routines */
 
index 881b9974ac29d95dcd353a23ad22be4a97aa2acc..4d0b1595ae82c5f7e0825de8df64e13bcd452e29 100644 (file)
@@ -77,7 +77,6 @@
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/forcerec.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdlib/qmmm.h"
 #include "gromacs/mdlib/update.h"
 #include "gromacs/mdtypes/commrec.h"
@@ -86,6 +85,7 @@
 #include "gromacs/mdtypes/iforceprovider.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/mdtypes/state.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 #include "gromacs/utility/sysinfo.h"
 
 using gmx::ForceOutputs;
+using gmx::StepWorkload;
+using gmx::DomainLifetimeWorkload;
 
 // TODO: this environment variable allows us to verify before release
 // that on less common architectures the total cost of polling is not larger than
@@ -271,7 +273,7 @@ static void post_process_forces(const t_commrec       *cr,
                                 const t_graph         *graph,
                                 const t_forcerec      *fr,
                                 const gmx_vsite_t     *vsite,
-                                const gmx::ForceFlags &forceFlags)
+                                const StepWorkload    &stepWork)
 {
     rvec *f = as_rvec_array(forceOutputs->forceWithShiftForces().force().data());
 
@@ -288,13 +290,13 @@ static void post_process_forces(const t_commrec       *cr,
              */
             matrix virial = { { 0 } };
             spread_vsite_f(vsite, x, fDirectVir, nullptr,
-                           forceFlags.computeVirial, virial,
+                           stepWork.computeVirial, virial,
                            nrnb,
                            &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
             forceWithVirial.addVirialContribution(virial);
         }
 
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             /* Now add the forces, this is local */
             sum_forces(f, forceWithVirial.force_);
@@ -319,14 +321,14 @@ static void post_process_forces(const t_commrec       *cr,
 static void do_nb_verlet(t_forcerec                       *fr,
                          const interaction_const_t        *ic,
                          gmx_enerdata_t                   *enerd,
-                         const gmx::ForceFlags            &forceFlags,
+                         const StepWorkload               &stepWork,
                          const Nbnxm::InteractionLocality  ilocality,
                          const int                         clearF,
                          const int64_t                     step,
                          t_nrnb                           *nrnb,
                          gmx_wallcycle_t                   wcycle)
 {
-    if (!forceFlags.computeNonbondedForces)
+    if (!stepWork.computeNonbondedForces)
     {
         /* skip non-bonded calculation */
         return;
@@ -356,7 +358,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         }
     }
 
-    nbv->dispatchNonbondedKernel(ilocality, *ic, forceFlags, clearF, *fr, enerd, nrnb);
+    nbv->dispatchNonbondedKernel(ilocality, *ic, stepWork, clearF, *fr, enerd, nrnb);
 }
 
 static inline void clear_rvecs_omp(int n, rvec v[])
@@ -514,7 +516,7 @@ haveSpecialForces(const t_inputrec              *inputrec,
  * \param[in]     x                The coordinates
  * \param[in]     mdatoms          Per atom properties
  * \param[in]     lambda           Array of free-energy lambda values
- * \param[in]     forceFlags       Force schedule flags
+ * \param[in]     stepWork         Step schedule flags
  * \param[in,out] forceWithVirial  Force and virial buffers
  * \param[in,out] enerd            Energy buffer
  * \param[in,out] ed               Essential dynamics pointer
@@ -539,7 +541,7 @@ computeSpecialForces(FILE                          *fplog,
                      gmx::ArrayRef<const gmx::RVec> x,
                      const t_mdatoms               *mdatoms,
                      real                          *lambda,
-                     const gmx::ForceFlags         &forceFlags,
+                     const StepWorkload            &stepWork,
                      gmx::ForceWithVirial          *forceWithVirial,
                      gmx_enerdata_t                *enerd,
                      gmx_edsam                     *ed,
@@ -548,7 +550,7 @@ computeSpecialForces(FILE                          *fplog,
     /* NOTE: Currently all ForceProviders only provide forces.
      *       When they also provide energies, remove this conditional.
      */
-    if (forceFlags.computeForces)
+    if (stepWork.computeForces)
     {
         gmx::ForceProviderInput  forceProviderInput(x, *mdatoms, t, box, *cr);
         gmx::ForceProviderOutput forceProviderOutput(forceWithVirial, enerd);
@@ -594,7 +596,7 @@ computeSpecialForces(FILE                          *fplog,
     }
 
     /* Add forces from interactive molecular dynamics (IMD), if any */
-    if (inputrec->bIMD && forceFlags.computeForces)
+    if (inputrec->bIMD && stepWork.computeForces)
     {
         imdSession->applyForces(f);
     }
@@ -605,20 +607,20 @@ computeSpecialForces(FILE                          *fplog,
  * \param[in]  pmedata              The PME structure
  * \param[in]  box                  The box matrix
  * \param[in]  x                    Coordinate array
- * \param[in]  forceFlags           Force schedule flags
+ * \param[in]  stepWork             Step schedule flags
  * \param[in]  pmeFlags             PME flags
  * \param[in]  useGpuForceReduction True if GPU-based force reduction is active this step
  * \param[in]  wcycle               The wallcycle structure
  */
-static inline void launchPmeGpuSpread(gmx_pme_t             *pmedata,
-                                      const matrix           box,
-                                      const rvec             x[],
-                                      const gmx::ForceFlags &forceFlags,
-                                      int                    pmeFlags,
-                                      bool                   useGpuForceReduction,
-                                      gmx_wallcycle_t        wcycle)
+static inline void launchPmeGpuSpread(gmx_pme_t          *pmedata,
+                                      const matrix        box,
+                                      const rvec          x[],
+                                      const StepWorkload &stepWork,
+                                      int                 pmeFlags,
+                                      bool                useGpuForceReduction,
+                                      gmx_wallcycle_t     wcycle)
 {
-    pme_gpu_prepare_computation(pmedata, forceFlags.haveDynamicBox, box, wcycle, pmeFlags, useGpuForceReduction);
+    pme_gpu_prepare_computation(pmedata, stepWork.haveDynamicBox, box, wcycle, pmeFlags, useGpuForceReduction);
     pme_gpu_copy_coordinates_to_gpu(pmedata, x, wcycle);
     pme_gpu_launch_spread(pmedata, wcycle);
 }
@@ -650,17 +652,17 @@ static void launchPmeGpuFftAndGather(gmx_pme_t        *pmedata,
  * \param[in,out] pmedata          PME module data
  * \param[in,out] forceOutputs     Output buffer for the forces and virial
  * \param[in,out] enerd            Energy data structure results are reduced into
- * \param[in]     forceFlags       Force schedule flags
+ * \param[in]     stepWork         Step schedule flags
  * \param[in]     pmeFlags         PME flags
  * \param[in]     wcycle           The wallcycle structure
  */
-static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t                  *nbv,
-                                        gmx_pme_t                           *pmedata,
-                                        gmx::ForceOutputs                   *forceOutputs,
-                                        gmx_enerdata_t                      *enerd,
-                                        const gmx::ForceFlags               &forceFlags,
-                                        int                                  pmeFlags,
-                                        gmx_wallcycle_t                      wcycle)
+static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv,
+                                        gmx_pme_t          *pmedata,
+                                        gmx::ForceOutputs  *forceOutputs,
+                                        gmx_enerdata_t     *enerd,
+                                        const StepWorkload &stepWork,
+                                        int                 pmeFlags,
+                                        gmx_wallcycle_t     wcycle)
 {
     bool isPmeGpuDone = false;
     bool isNbGpuDone  = false;
@@ -684,7 +686,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t                  *nbv
         {
             GpuTaskCompletion completionType = (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check;
             isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv,
-                                                     forceFlags,
+                                                     stepWork,
                                                      Nbnxm::AtomLocality::Local,
                                                      enerd->grpp.ener[egLJSR].data(),
                                                      enerd->grpp.ener[egCOULSR].data(),
@@ -705,7 +707,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t                  *nbv
  * \param[in] pull_work The pull work object.
  * \param[in] inputrec  input record
  * \param[in] force     force array
- * \param[in] forceFlags Force schedule flags
+ * \param[in] stepWork  Step schedule flags
  * \param[out] wcycle   wallcycle recording structure
  *
  * \returns             Cleared force output structure
@@ -715,15 +717,15 @@ setupForceOutputs(t_forcerec                          *fr,
                   pull_t                              *pull_work,
                   const t_inputrec                    &inputrec,
                   gmx::ArrayRefWithPadding<gmx::RVec>  force,
-                  const gmx::ForceFlags               &forceFlags,
+                  const StepWorkload                  &stepWork,
                   gmx_wallcycle_t                      wcycle)
 {
     wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER);
 
     /* NOTE: We assume fr->shiftForces is all zeros here */
-    gmx::ForceWithShiftForces forceWithShiftForces(force, forceFlags.computeVirial, fr->shiftForces);
+    gmx::ForceWithShiftForces forceWithShiftForces(force, stepWork.computeVirial, fr->shiftForces);
 
-    if (forceFlags.computeForces)
+    if (stepWork.computeForces)
     {
         /* Clear the short- and long-range forces */
         clear_rvecs_omp(fr->natoms_force_constr,
@@ -735,12 +737,12 @@ setupForceOutputs(t_forcerec                          *fr,
      * directly, such as PME. Otherwise, forceWithVirial uses the
      * the same force (f in legacy calls) buffer as other algorithms.
      */
-    const bool useSeparateForceWithVirialBuffer = (forceFlags.computeForces &&
-                                                   (forceFlags.computeVirial && fr->haveDirectVirialContributions));
+    const bool useSeparateForceWithVirialBuffer = (stepWork.computeForces &&
+                                                   (stepWork.computeVirial && fr->haveDirectVirialContributions));
     /* forceWithVirial uses the local atom range only */
     gmx::ForceWithVirial forceWithVirial(useSeparateForceWithVirialBuffer ?
                                          fr->forceBufferForDirectVirialContributions : force.unpaddedArrayRef(),
-                                         forceFlags.computeVirial);
+                                         stepWork.computeVirial);
 
     if (useSeparateForceWithVirialBuffer)
     {
@@ -763,30 +765,23 @@ setupForceOutputs(t_forcerec                          *fr,
 }
 
 
-/*! \brief Set up flags that indicate what type of work is there to compute.
- *
- * Currently we only update it at search steps,
- * but some properties may change more frequently (e.g. virial/non-virial step),
- * so when including those either the frequency of update (per-step) or the scope
- * of a flag will change (i.e. a set of flags for nstlist steps).
- *
+/*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute.
  */
 static void
-setupForceWorkload(gmx::PpForceWorkload  *forceWork,
-                   const t_inputrec      *inputrec,
-                   const t_forcerec      *fr,
-                   const pull_t          *pull_work,
-                   const gmx_edsam       *ed,
-                   const t_idef          &idef,
-                   const t_fcdata        *fcd,
-                   const gmx::ForceFlags &forceFlags
-                   )
+setupDomainLifetimeWorkload(DomainLifetimeWorkload *domainWork,
+                            const t_inputrec       *inputrec,
+                            const t_forcerec       *fr,
+                            const pull_t           *pull_work,
+                            const gmx_edsam        *ed,
+                            const t_idef           &idef,
+                            const t_fcdata         *fcd,
+                            const StepWorkload     &stepWork)
 {
-    forceWork->haveSpecialForces      = haveSpecialForces(inputrec, fr->forceProviders, pull_work, forceFlags.computeForces, ed);
-    forceWork->haveCpuBondedWork      = haveCpuBondeds(*fr);
-    forceWork->haveGpuBondedWork      = ((fr->gpuBonded != nullptr) && fr->gpuBonded->haveInteractions());
-    forceWork->haveRestraintsWork     = havePositionRestraints(idef, *fcd);
-    forceWork->haveCpuListedForceWork = haveCpuListedForces(*fr, idef, *fcd);
+    domainWork->haveSpecialForces      = haveSpecialForces(inputrec, fr->forceProviders, pull_work, stepWork.computeForces, ed);
+    domainWork->haveCpuBondedWork      = haveCpuBondeds(*fr);
+    domainWork->haveGpuBondedWork      = ((fr->gpuBonded != nullptr) && fr->gpuBonded->haveInteractions());
+    domainWork->haveRestraintsWork     = havePositionRestraints(idef, *fcd);
+    domainWork->haveCpuListedForceWork = haveCpuListedForces(*fr, idef, *fcd);
 }
 
 /*! \brief Set up force flag stuct from the force bitmask.
@@ -796,9 +791,9 @@ setupForceWorkload(gmx::PpForceWorkload  *forceWork,
  * \param[in]      isNonbondedOn        Global override, if false forces to turn off all nonbonded calculation.
  */
 static void
-setupForceFlags(gmx::ForceFlags *flags,
-                const int        legacyFlags,
-                const bool       isNonbondedOn)
+setupStepWorkload(StepWorkload *flags,
+                  const int     legacyFlags,
+                  const bool    isNonbondedOn)
 {
     flags->stateChanged           = ((legacyFlags & GMX_FORCE_STATECHANGED) != 0);
     flags->haveDynamicBox         = ((legacyFlags & GMX_FORCE_DYNAMICBOX) != 0);
@@ -815,18 +810,18 @@ setupForceFlags(gmx::ForceFlags *flags,
 /* \brief Launch end-of-step GPU tasks: buffer clearing and rolling pruning.
  *
  * TODO: eliminate the \p useGpuNonbonded and \p useGpuNonbonded when these are
- * incorporated in PpForceWorkload.
+ * incorporated in DomainLifetimeWorkload.
  */
 static void
-launchGpuEndOfStepTasks(nonbonded_verlet_t            *nbv,
-                        gmx::GpuBonded                *gpuBonded,
-                        gmx_pme_t                     *pmedata,
-                        gmx_enerdata_t                *enerd,
-                        const gmx::MdScheduleWorkload &mdScheduleWork,
-                        bool                           useGpuNonbonded,
-                        bool                           useGpuPme,
-                        int64_t                        step,
-                        gmx_wallcycle_t                wcycle)
+launchGpuEndOfStepTasks(nonbonded_verlet_t               *nbv,
+                        gmx::GpuBonded                   *gpuBonded,
+                        gmx_pme_t                        *pmedata,
+                        gmx_enerdata_t                   *enerd,
+                        const gmx::MdrunScheduleWorkload &runScheduleWork,
+                        bool                              useGpuNonbonded,
+                        bool                              useGpuPme,
+                        int64_t                           step,
+                        gmx_wallcycle_t                   wcycle)
 {
     if (useGpuNonbonded)
     {
@@ -842,7 +837,7 @@ launchGpuEndOfStepTasks(nonbonded_verlet_t            *nbv,
         /* now clear the GPU outputs while we finish the step on the CPU */
         wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
         wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, mdScheduleWork.forceFlags.computeVirial);
+        Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial);
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         wallcycle_stop(wcycle, ewcLAUNCH_GPU);
     }
@@ -852,7 +847,7 @@ launchGpuEndOfStepTasks(nonbonded_verlet_t            *nbv,
         pme_gpu_reinit_computation(pmedata, wcycle);
     }
 
-    if (mdScheduleWork.forceWork.haveGpuBondedWork && mdScheduleWork.forceFlags.computeEnergy)
+    if (runScheduleWork.domainWork.haveGpuBondedWork && runScheduleWork.stepWork.computeEnergy)
     {
         // in principle this should be included in the DD balancing region,
         // but generally it is infrequent so we'll omit it for the sake of
@@ -887,7 +882,7 @@ void do_force(FILE                                     *fplog,
               gmx::ArrayRef<real>                       lambda,
               t_graph                                  *graph,
               t_forcerec                               *fr,
-              gmx::MdScheduleWorkload                  *mdScheduleWork,
+              gmx::MdrunScheduleWorkload               *runScheduleWork,
               const gmx_vsite_t                        *vsite,
               rvec                                      mu_tot,
               double                                    t,
@@ -908,11 +903,11 @@ void do_force(FILE                                     *fplog,
     {
         legacyFlags &= ~GMX_FORCE_NONBONDED;
     }
-    setupForceFlags(&mdScheduleWork->forceFlags, legacyFlags, fr->bNonbonded);
+    setupStepWorkload(&runScheduleWork->stepWork, legacyFlags, fr->bNonbonded);
 
-    const gmx::ForceFlags &forceFlags = mdScheduleWork->forceFlags;
+    const gmx::StepWorkload &stepWork = runScheduleWork->stepWork;
 
-    bFillGrid     = (forceFlags.doNeighborSearch && forceFlags.stateChanged);
+    bFillGrid     = (stepWork.doNeighborSearch && stepWork.stateChanged);
     bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
     bUseGPU       = fr->nbv->useGpu();
     bUseOrEmulGPU = bUseGPU || fr->nbv->emulateGpu();
@@ -922,9 +917,9 @@ void do_force(FILE                                     *fplog,
     const bool useGpuPme  = EEL_PME(fr->ic->eeltype) && thisRankHasDuty(cr, DUTY_PME) &&
         ((pmeRunMode == PmeRunMode::GPU) || (pmeRunMode == PmeRunMode::Mixed));
     const int  pmeFlags = GMX_PME_SPREAD | GMX_PME_SOLVE |
-        (forceFlags.computeVirial   ? GMX_PME_CALC_ENER_VIR : 0) |
-        (forceFlags.computeEnergy ? GMX_PME_CALC_ENER_VIR : 0) |
-        (forceFlags.computeForces   ? GMX_PME_CALC_F : 0);
+        (stepWork.computeVirial   ? GMX_PME_CALC_ENER_VIR : 0) |
+        (stepWork.computeEnergy ? GMX_PME_CALC_ENER_VIR : 0) |
+        (stepWork.computeForces   ? GMX_PME_CALC_F : 0);
 
     // Switches on whether to use GPU for position and force buffer operations
     // TODO consider all possible combinations of triggers, and how to combine optimally in each case.
@@ -932,7 +927,7 @@ void do_force(FILE                                     *fplog,
         BufferOpsUseGpu::True : BufferOpsUseGpu::False;;
     // GPU Force buffer ops are disabled on virial steps, because the virial calc is not yet ported to GPU
     const BufferOpsUseGpu useGpuFBufOps = (c_enableGpuBufOps && bUseGPU && (GMX_GPU == GMX_GPU_CUDA))
-        && !(forceFlags.computeVirial || forceFlags.computeEnergy) ?
+        && !(stepWork.computeVirial || stepWork.computeEnergy) ?
         BufferOpsUseGpu::True : BufferOpsUseGpu::False;
     // TODO: move / add this flag to the internal PME GPU data structures
     const bool useGpuPmeFReduction = (useGpuFBufOps == BufferOpsUseGpu::True) &&
@@ -942,7 +937,7 @@ void do_force(FILE                                     *fplog,
      * somewhere early inside the step after communication during domain
      * decomposition (and not during the previous step as usual).
      */
-    if (forceFlags.doNeighborSearch)
+    if (stepWork.doNeighborSearch)
     {
         ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::yes);
     }
@@ -952,7 +947,7 @@ void do_force(FILE                                     *fplog,
 
     clear_mat(vir_force);
 
-    if (forceFlags.stateChanged)
+    if (stepWork.stateChanged)
     {
         if (inputrecNeedMutot(inputrec))
         {
@@ -970,7 +965,7 @@ void do_force(FILE                                     *fplog,
         /* Compute shift vectors every step,
          * because of pressure coupling or box deformation!
          */
-        if (forceFlags.haveDynamicBox && forceFlags.stateChanged)
+        if (stepWork.haveDynamicBox && stepWork.stateChanged)
         {
             calc_shifts(box, fr->shift_vec);
         }
@@ -986,7 +981,7 @@ void do_force(FILE                                     *fplog,
         }
     }
 
-    nbnxn_atomdata_copy_shiftvec(forceFlags.haveDynamicBox,
+    nbnxn_atomdata_copy_shiftvec(stepWork.haveDynamicBox,
                                  fr->shift_vec, nbv->nbat.get());
 
 #if GMX_MPI
@@ -999,20 +994,20 @@ void do_force(FILE                                     *fplog,
          */
         gmx_pme_send_coordinates(cr, box, as_rvec_array(x.unpaddedArrayRef().data()),
                                  lambda[efptCOUL], lambda[efptVDW],
-                                 (forceFlags.computeVirial || forceFlags.computeEnergy),
+                                 (stepWork.computeVirial || stepWork.computeEnergy),
                                  step, wcycle);
     }
 #endif /* GMX_MPI */
 
     if (useGpuPme)
     {
-        launchPmeGpuSpread(fr->pmedata, box, as_rvec_array(x.unpaddedArrayRef().data()), forceFlags, pmeFlags, useGpuPmeFReduction, wcycle);
+        launchPmeGpuSpread(fr->pmedata, box, as_rvec_array(x.unpaddedArrayRef().data()), stepWork, pmeFlags, useGpuPmeFReduction, wcycle);
     }
 
     /* do gridding for pair search */
-    if (forceFlags.doNeighborSearch)
+    if (stepWork.doNeighborSearch)
     {
-        if (graph && forceFlags.stateChanged)
+        if (graph && stepWork.stateChanged)
         {
             /* Calculate intramolecular shift vectors to make molecules whole */
             mk_mshift(fplog, graph, fr->ePBC, box, as_rvec_array(x.unpaddedArrayRef().data()));
@@ -1082,24 +1077,26 @@ void do_force(FILE                                     *fplog,
         }
     }
 
-    // Call it per-step as force-flags can change.
-    // Need to run after the GPU-offload bonded interaction lists
-    // are set up to be able to determine whether there is bonded work.
-    setupForceWorkload(&mdScheduleWork->forceWork,
-                       inputrec,
-                       fr,
-                       pull_work,
-                       ed,
-                       top->idef,
-                       fcd,
-                       forceFlags);
+    if (stepWork.doNeighborSearch)
+    {
+        // Need to run after the GPU-offload bonded interaction lists
+        // are set up to be able to determine whether there is bonded work.
+        setupDomainLifetimeWorkload(&runScheduleWork->domainWork,
+                                    inputrec,
+                                    fr,
+                                    pull_work,
+                                    ed,
+                                    top->idef,
+                                    fcd,
+                                    stepWork);
+    }
 
-    const gmx::PpForceWorkload &forceWork = mdScheduleWork->forceWork;
+    const gmx::DomainLifetimeWorkload &domainWork = runScheduleWork->domainWork;
 
     /* do local pair search */
-    if (forceFlags.doNeighborSearch)
+    if (stepWork.doNeighborSearch)
     {
-        // TODO: fuse this branch with the above forceFlags.doNeighborSearch block
+        // TODO: fuse this branch with the above stepWork.doNeighborSearch block
         wallcycle_start_nocount(wcycle, ewcNS);
         wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
         /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
@@ -1151,7 +1148,7 @@ void do_force(FILE                                     *fplog,
 
         wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
-        if (forceFlags.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
+        if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
         {
             Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
                                       Nbnxm::AtomLocality::Local);
@@ -1161,16 +1158,16 @@ void do_force(FILE                                     *fplog,
 
         // bonded work not split into separate local and non-local, so with DD
         // we can only launch the kernel after non-local coordinates have been received.
-        if (forceWork.haveGpuBondedWork && !havePPDomainDecomposition(cr))
+        if (domainWork.haveGpuBondedWork && !havePPDomainDecomposition(cr))
         {
             wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
-            fr->gpuBonded->launchKernel(fr, forceFlags, box);
+            fr->gpuBonded->launchKernel(fr, stepWork, box);
             wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
         }
 
         /* launch local nonbonded work on GPU */
         wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFNo,
+        do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFNo,
                      step, nrnb, wcycle);
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         wallcycle_stop(wcycle, ewcLAUNCH_GPU);
@@ -1195,9 +1192,9 @@ void do_force(FILE                                     *fplog,
        do non-local pair search */
     if (havePPDomainDecomposition(cr))
     {
-        if (forceFlags.doNeighborSearch)
+        if (stepWork.doNeighborSearch)
         {
-            // TODO: fuse this branch with the above large forceFlags.doNeighborSearch block
+            // TODO: fuse this branch with the above large stepWork.doNeighborSearch block
             wallcycle_start_nocount(wcycle, ewcNS);
             wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
             /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
@@ -1223,7 +1220,7 @@ void do_force(FILE                                     *fplog,
                 gpuHaloExchange->communicateHaloCoordinates(box);
 
                 // TODO Force flags should include haveFreeEnergyWork for this domain
-                if (forceWork.haveCpuBondedWork || (fr->efep != efepNO))
+                if (domainWork.haveCpuBondedWork || (fr->efep != efepNO))
                 {
                     //non-local part of coordinate buffer must be copied back to host for CPU work
                     nbv->launch_copy_x_from_gpu(as_rvec_array(x.unpaddedArrayRef().data()), Nbnxm::AtomLocality::NonLocal);
@@ -1257,7 +1254,7 @@ void do_force(FILE                                     *fplog,
         {
             wallcycle_start(wcycle, ewcLAUNCH_GPU);
 
-            if (forceFlags.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
+            if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
             {
                 wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
                 Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
@@ -1265,16 +1262,16 @@ void do_force(FILE                                     *fplog,
                 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
             }
 
-            if (forceWork.haveGpuBondedWork)
+            if (domainWork.haveGpuBondedWork)
             {
                 wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
-                fr->gpuBonded->launchKernel(fr, forceFlags, box);
+                fr->gpuBonded->launchKernel(fr, stepWork, box);
                 wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
             }
 
             /* launch non-local nonbonded tasks on GPU */
             wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+            do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
                          step, nrnb, wcycle);
             wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
@@ -1293,20 +1290,20 @@ void do_force(FILE                                     *fplog,
         if (havePPDomainDecomposition(cr))
         {
             Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
-                                      forceFlags, Nbnxm::AtomLocality::NonLocal, copyBackNbForce);
+                                      stepWork, Nbnxm::AtomLocality::NonLocal, copyBackNbForce);
         }
         Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
-                                  forceFlags, Nbnxm::AtomLocality::Local, copyBackNbForce);
+                                  stepWork, Nbnxm::AtomLocality::Local, copyBackNbForce);
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
-        if (forceWork.haveGpuBondedWork && forceFlags.computeEnergy)
+        if (domainWork.haveGpuBondedWork && stepWork.computeEnergy)
         {
             fr->gpuBonded->launchEnergyTransfer();
         }
         wallcycle_stop(wcycle, ewcLAUNCH_GPU);
     }
 
-    if (forceFlags.stateChanged && inputrecNeedMutot(inputrec))
+    if (stepWork.stateChanged && inputrecNeedMutot(inputrec))
     {
         if (PAR(cr))
         {
@@ -1355,7 +1352,7 @@ void do_force(FILE                                     *fplog,
     if (inputrec->bRot)
     {
         wallcycle_start(wcycle, ewcROT);
-        do_rotation(cr, enforcedRotation, box, as_rvec_array(x.unpaddedArrayRef().data()), t, step, forceFlags.doNeighborSearch);
+        do_rotation(cr, enforcedRotation, box, as_rvec_array(x.unpaddedArrayRef().data()), t, step, stepWork.doNeighborSearch);
         wallcycle_stop(wcycle, ewcROT);
     }
 
@@ -1366,7 +1363,7 @@ void do_force(FILE                                     *fplog,
 
     // Set up and clear force outputs.
     // We use std::move to keep the compiler happy, it has no effect.
-    ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, std::move(force), forceFlags, wcycle);
+    ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, std::move(force), stepWork, wcycle);
 
     /* We calculate the non-bonded forces, when done on the CPU, here.
      * We do this before calling do_force_lowlevel, because in that
@@ -1378,7 +1375,7 @@ void do_force(FILE                                     *fplog,
 
     if (!bUseOrEmulGPU)
     {
-        do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFYes,
+        do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFYes,
                      step, nrnb, wcycle);
     }
 
@@ -1390,14 +1387,14 @@ void do_force(FILE                                     *fplog,
         nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
                                       fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
                                       inputrec->fepvals, lambda.data(),
-                                      enerd, forceFlags, nrnb);
+                                      enerd, stepWork, nrnb);
 
         if (havePPDomainDecomposition(cr))
         {
             nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
                                           fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
                                           inputrec->fepvals, lambda.data(),
-                                          enerd, forceFlags, nrnb);
+                                          enerd, stepWork, nrnb);
         }
     }
 
@@ -1405,11 +1402,11 @@ void do_force(FILE                                     *fplog,
     {
         if (havePPDomainDecomposition(cr))
         {
-            do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+            do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
                          step, nrnb, wcycle);
         }
 
-        if (forceFlags.computeForces)
+        if (stepWork.computeForces)
         {
             /* Add all the non-bonded force to the normal force array.
              * This can be split into a local and a non-local part when overlapping
@@ -1421,7 +1418,7 @@ void do_force(FILE                                     *fplog,
         }
 
         /* If there are multiple fshift output buffers we need to reduce them */
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             /* This is not in a subcounter because it takes a
                negligible and constant-sized amount of time */
@@ -1438,7 +1435,7 @@ void do_force(FILE                                     *fplog,
 
     // TODO Force flags should include haveFreeEnergyWork for this domain
     if (ddUsesGpuDirectCommunication &&
-        (forceWork.haveCpuBondedWork || (fr->efep != efepNO)))
+        (domainWork.haveCpuBondedWork || (fr->efep != efepNO)))
     {
         /* Wait for non-local coordinate data to be copied from device */
         nbv->wait_nonlocal_x_copy_D2H_done();
@@ -1448,7 +1445,7 @@ void do_force(FILE                                     *fplog,
                       cr, ms, nrnb, wcycle, mdatoms,
                       x, hist, &forceOut, enerd, fcd,
                       box, lambda.data(), graph, fr->mu_tot,
-                      forceFlags,
+                      stepWork,
                       ddBalanceRegionHandler);
 
     wallcycle_stop(wcycle, ewcFORCE);
@@ -1456,8 +1453,9 @@ void do_force(FILE                                     *fplog,
     computeSpecialForces(fplog, cr, inputrec, awh, enforcedRotation,
                          imdSession, pull_work, step, t, wcycle,
                          fr->forceProviders, box, x.unpaddedArrayRef(), mdatoms, lambda.data(),
-                         forceFlags, &forceOut.forceWithVirial(), enerd,
-                         ed, forceFlags.doNeighborSearch);
+                         stepWork, &forceOut.forceWithVirial(), enerd,
+                         ed, stepWork.doNeighborSearch);
+
 
     // Will store the amount of cycles spent waiting for the GPU that
     // will be later used in the DLB accounting.
@@ -1472,7 +1470,7 @@ void do_force(FILE                                     *fplog,
             if (bUseGPU)
             {
                 cycles_wait_gpu += Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
-                                                               forceFlags, Nbnxm::AtomLocality::NonLocal,
+                                                               stepWork, Nbnxm::AtomLocality::NonLocal,
                                                                enerd->grpp.ener[egLJSR].data(),
                                                                enerd->grpp.ener[egCOULSR].data(),
                                                                forceWithShiftForces.shiftForces(),
@@ -1481,7 +1479,7 @@ void do_force(FILE                                     *fplog,
             else
             {
                 wallcycle_start_nocount(wcycle, ewcFORCE);
-                do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
+                do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
                              step, nrnb, wcycle);
                 wallcycle_stop(wcycle, ewcFORCE);
             }
@@ -1491,7 +1489,7 @@ void do_force(FILE                                     *fplog,
                 // TODO: move this into DomainLifetimeWorkload, including the second part of the condition
                 // The bonded and free energy CPU tasks can have non-local force contributions
                 // which are a dependency for the GPU force reduction.
-                bool  haveNonLocalForceContribInCpuBuffer = forceWork.haveCpuBondedWork || (fr->efep != efepNO);
+                bool  haveNonLocalForceContribInCpuBuffer = domainWork.haveCpuBondedWork || (fr->efep != efepNO);
 
                 rvec *f = as_rvec_array(forceWithShiftForces.force().data());
                 if (haveNonLocalForceContribInCpuBuffer)
@@ -1512,7 +1510,7 @@ void do_force(FILE                                     *fplog,
             }
 
 
-            if (fr->nbv->emulateGpu() && forceFlags.computeVirial)
+            if (fr->nbv->emulateGpu() && stepWork.computeVirial)
             {
                 nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat,
                                                          forceWithShiftForces.shiftForces());
@@ -1523,7 +1521,7 @@ void do_force(FILE                                     *fplog,
     const bool useGpuForcesHaloExchange = ddUsesGpuDirectCommunication && (useGpuFBufOps == BufferOpsUseGpu::True);
     const bool useCpuPmeFReduction      = thisRankHasDuty(cr, DUTY_PME) && !useGpuPmeFReduction;
     // TODO: move this into DomainLifetimeWorkload, including the second part of the condition
-    const bool haveCpuLocalForces     = (forceWork.haveSpecialForces || forceWork.haveCpuListedForceWork || useCpuPmeFReduction ||
+    const bool haveCpuLocalForces     = (domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || useCpuPmeFReduction ||
                                          (fr->efep != efepNO));
 
     if (havePPDomainDecomposition(cr))
@@ -1535,7 +1533,7 @@ void do_force(FILE                                     *fplog,
          */
         ddBalanceRegionHandler.closeAfterForceComputationCpu();
 
-        if (forceFlags.computeForces)
+        if (stepWork.computeForces)
         {
             gmx::ArrayRef<gmx::RVec>  force  = forceOut.forceWithShiftForces().force();
             rvec                     *f      = as_rvec_array(force.data());
@@ -1568,7 +1566,7 @@ void do_force(FILE                                     *fplog,
     if (alternateGpuWait)
     {
         alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &forceOut, enerd,
-                                    forceFlags, pmeFlags, wcycle);
+                                    stepWork, pmeFlags, wcycle);
     }
 
     if (!alternateGpuWait && useGpuPme)
@@ -1587,7 +1585,7 @@ void do_force(FILE                                     *fplog,
         const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */
         const float waitCycles               =
             Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv,
-                                        forceFlags, Nbnxm::AtomLocality::Local,
+                                        stepWork, Nbnxm::AtomLocality::Local,
                                         enerd->grpp.ener[egLJSR].data(),
                                         enerd->grpp.ener[egCOULSR].data(),
                                         forceOut.forceWithShiftForces().shiftForces(),
@@ -1596,7 +1594,7 @@ void do_force(FILE                                     *fplog,
         if (ddBalanceRegionHandler.useBalancingRegion())
         {
             DdBalanceRegionWaitedForGpu waitedForGpu = DdBalanceRegionWaitedForGpu::yes;
-            if (forceFlags.computeForces &&  waitCycles <= gpuWaitApiOverheadMargin)
+            if (stepWork.computeForces &&  waitCycles <= gpuWaitApiOverheadMargin)
             {
                 /* We measured few cycles, it could be that the kernel
                  * and transfer finished earlier and there was no actual
@@ -1615,7 +1613,7 @@ void do_force(FILE                                     *fplog,
         // NOTE: emulation kernel is not included in the balancing region,
         // but emulation mode does not target performance anyway
         wallcycle_start_nocount(wcycle, ewcFORCE);
-        do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local,
+        do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local,
                      DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
                      step, nrnb, wcycle);
         wallcycle_stop(wcycle, ewcFORCE);
@@ -1672,7 +1670,7 @@ void do_force(FILE                                     *fplog,
     }
 
     launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd,
-                            *mdScheduleWork,
+                            *runScheduleWork,
                             bUseGPU, useGpuPme,
                             step,
                             wcycle);
@@ -1682,21 +1680,21 @@ void do_force(FILE                                     *fplog,
         dd_force_flop_stop(cr->dd, nrnb);
     }
 
-    if (forceFlags.computeForces)
+    if (stepWork.computeForces)
     {
         rvec *f = as_rvec_array(forceOut.forceWithShiftForces().force().data());
 
         /* If we have NoVirSum forces, but we do not calculate the virial,
          * we sum fr->f_novirsum=forceOut.f later.
          */
-        if (vsite && !(fr->haveDirectVirialContributions && !forceFlags.computeVirial))
+        if (vsite && !(fr->haveDirectVirialContributions && !stepWork.computeVirial))
         {
             rvec *fshift = as_rvec_array(forceOut.forceWithShiftForces().shiftForces().data());
             spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), f, fshift, FALSE, nullptr, nrnb,
                            &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
         }
 
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             /* Calculation of the virial must be done after vsites! */
             calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()),
@@ -1713,15 +1711,15 @@ void do_force(FILE                                     *fplog,
         pme_receive_force_ener(cr, &forceOut.forceWithVirial(), enerd, wcycle);
     }
 
-    if (forceFlags.computeForces)
+    if (stepWork.computeForces)
     {
         post_process_forces(cr, step, nrnb, wcycle,
                             top, box, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut,
                             vir_force, mdatoms, graph, fr, vsite,
-                            forceFlags);
+                            stepWork);
     }
 
-    if (forceFlags.computeEnergy)
+    if (stepWork.computeEnergy)
     {
         /* Sum the potential energy terms from group contributions */
         sum_epot(&(enerd->grpp), enerd->term);
index 86cd154c9c2955291667bf0f877dee9154b64a0e..9ae093de7070b9558f40e0c1441eb2156c8ac0aa 100644 (file)
@@ -71,7 +71,7 @@ namespace gmx
 enum class StartingBehavior;
 class BoxDeformation;
 class Constraints;
-class MdScheduleWorkload;
+class MdrunScheduleWorkload;
 class IMDOutputProvider;
 struct MdModulesNotifier;
 class ImdSession;
@@ -129,7 +129,7 @@ class ISimulator
             t_forcerec                         *fr,
             gmx_enerdata_t                     *enerd,
             gmx_ekindata_t                     *ekind,
-            MdScheduleWorkload                 *mdScheduleWork,
+            MdrunScheduleWorkload              *runScheduleWork,
             const ReplicaExchangeParameters    &replExParams,
             gmx_membed_t                       *membed,
             gmx_walltime_accounting            *walltime_accounting,
@@ -164,7 +164,7 @@ class ISimulator
             fr(fr),
             enerd(enerd),
             ekind(ekind),
-            mdScheduleWork(mdScheduleWork),
+            runScheduleWork(runScheduleWork),
             replExParams(replExParams),
             membed(membed),
             walltime_accounting(walltime_accounting),
@@ -232,7 +232,7 @@ class ISimulator
         //! Kinetic energy data.
         gmx_ekindata_t                     *ekind;
         //! Schedule of work for each MD step for this task.
-        MdScheduleWorkload                 *mdScheduleWork;
+        MdrunScheduleWorkload              *runScheduleWork;
         //! Parameters for replica exchange algorihtms.
         const ReplicaExchangeParameters    &replExParams;
         //! Parameters for membrane embedding.
index f0e36b335009032ab86a829526f5c6b6dd1b5c50..30580ccfb15b84b9a6217737c879a2b9c84dad48 100644 (file)
@@ -875,7 +875,7 @@ void gmx::LegacySimulator::do_md()
                                 &state->hist,
                                 f.arrayRefWithPadding(), force_vir, mdatoms,
                                 nrnb, wcycle, graph,
-                                shellfc, fr, mdScheduleWork, t, mu_tot,
+                                shellfc, fr, runScheduleWork, t, mu_tot,
                                 vsite,
                                 ddBalanceRegionHandler);
         }
@@ -905,7 +905,7 @@ void gmx::LegacySimulator::do_md()
                      state->box, state->x.arrayRefWithPadding(), &state->hist,
                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
                      state->lambda, graph,
-                     fr, mdScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
+                     fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
                      ddBalanceRegionHandler);
         }
index af7a542a80aad4192372302f12c85b32c19efa95..2b2f9e9ada232df4e8030e959f0531ef8987ce0c 100644 (file)
@@ -436,7 +436,7 @@ void gmx::LegacySimulator::do_mimic()
                                 &state->hist,
                                 f.arrayRefWithPadding(), force_vir, mdatoms,
                                 nrnb, wcycle, graph,
-                                shellfc, fr, mdScheduleWork, t, mu_tot,
+                                shellfc, fr, runScheduleWork, t, mu_tot,
                                 vsite,
                                 ddBalanceRegionHandler);
         }
@@ -455,7 +455,7 @@ void gmx::LegacySimulator::do_mimic()
                      state->box, state->x.arrayRefWithPadding(), &state->hist,
                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
                      state->lambda, graph,
-                     fr, mdScheduleWork, vsite, mu_tot, t, ed,
+                     fr, runScheduleWork, vsite, mu_tot, t, ed,
                      GMX_FORCE_NS | force_flags,
                      ddBalanceRegionHandler);
         }
index 263410c6cc15cf3de045d6e4a9f1396dca442d79..82a2e2fa747b18b295a9432e4f3aa17c570d041b 100644 (file)
 #include "legacysimulator.h"
 #include "shellfc.h"
 
+using gmx::MdrunScheduleWorkload;
+
 //! Utility structure for manipulating states during EM
 typedef struct {
     //! Copy of the global state
@@ -790,7 +792,7 @@ class EnergyEvaluator
         //! Handles how to calculate the forces.
         t_forcerec              *fr;
         //! Schedule of force-calculation work each step for this task.
-        gmx::MdScheduleWorkload *mdScheduleWork;
+        MdrunScheduleWorkload   *runScheduleWork;
         //! Stores the computed energies.
         gmx_enerdata_t          *enerd;
 };
@@ -849,7 +851,7 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot,
              count, nrnb, wcycle, top,
              ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist,
              ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd,
-             ems->s.lambda, graph, fr, mdScheduleWork, vsite, mu_tot, t, nullptr,
+             ems->s.lambda, graph, fr, runScheduleWork, vsite, mu_tot, t, nullptr,
              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
              (bNS ? GMX_FORCE_NS : 0),
@@ -1137,7 +1139,7 @@ LegacySimulator::do_cg()
         top_global, &top,
         inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
         vsite, constr, fcd, graph,
-        mdAtoms, fr, mdScheduleWork, enerd
+        mdAtoms, fr, runScheduleWork, enerd
     };
     /* Call the force routine and some auxiliary (neighboursearching etc.) */
     /* do_force always puts the charge groups in the box and shifts again
@@ -1817,7 +1819,7 @@ LegacySimulator::do_lbfgs()
         top_global, &top,
         inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
         vsite, constr, fcd, graph,
-        mdAtoms, fr, mdScheduleWork, enerd
+        mdAtoms, fr, runScheduleWork, enerd
     };
     energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
 
@@ -2477,7 +2479,7 @@ LegacySimulator::do_steep()
         top_global, &top,
         inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
         vsite, constr, fcd, graph,
-        mdAtoms, fr, mdScheduleWork, enerd
+        mdAtoms, fr, runScheduleWork, enerd
     };
 
     /**** HERE STARTS THE LOOP ****
@@ -2782,7 +2784,7 @@ LegacySimulator::do_nm()
         top_global, &top,
         inputrec, imdSession, pull_work, nrnb, wcycle, gstat,
         vsite, constr, fcd, graph,
-        mdAtoms, fr, mdScheduleWork, enerd
+        mdAtoms, fr, runScheduleWork, enerd
     };
     energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
     cr->nnodes = nnodes;
@@ -2869,7 +2871,7 @@ LegacySimulator::do_nm()
                                         graph,
                                         shellfc,
                                         fr,
-                                        mdScheduleWork,
+                                        runScheduleWork,
                                         t,
                                         mu_tot,
                                         vsite,
index 2cc56466142f269a3d95f74aced5990a9e37dbf0..c8453f7683a46dc8eee5e3209b790bdbc96a196b 100644 (file)
@@ -558,7 +558,7 @@ void gmx::LegacySimulator::do_rerun()
                                 &state->hist,
                                 f.arrayRefWithPadding(), force_vir, mdatoms,
                                 nrnb, wcycle, graph,
-                                shellfc, fr, mdScheduleWork, t, mu_tot,
+                                shellfc, fr, runScheduleWork, t, mu_tot,
                                 vsite,
                                 ddBalanceRegionHandler);
         }
@@ -577,7 +577,7 @@ void gmx::LegacySimulator::do_rerun()
                      state->box, state->x.arrayRefWithPadding(), &state->hist,
                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
                      state->lambda, graph,
-                     fr, mdScheduleWork, vsite, mu_tot, t, ed,
+                     fr, runScheduleWork, vsite, mu_tot, t, ed,
                      GMX_FORCE_NS | force_flags,
                      ddBalanceRegionHandler);
         }
index 8d2f549c9dc0c00209ed9ca481ddf55e775b88f2..f6659972c151bc482192243043391e0138eff6c1 100644 (file)
@@ -94,7 +94,6 @@
 #include "gromacs/mdlib/md_support.h"
 #include "gromacs/mdlib/mdatoms.h"
 #include "gromacs/mdlib/membed.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdlib/qmmm.h"
 #include "gromacs/mdlib/sighandler.h"
 #include "gromacs/mdlib/stophandler.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdrunoptions.h"
 #include "gromacs/mdtypes/observableshistory.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/mdtypes/state.h"
 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 #include "gromacs/nbnxm/nbnxm.h"
@@ -1546,7 +1546,7 @@ int Mdrunner::mdrunner()
         // TODO This is not the right place to manage the lifetime of
         // this data structure, but currently it's the easiest way to
         // make it work.
-        MdScheduleWorkload mdScheduleWork;
+        MdrunScheduleWorkload runScheduleWork;
 
         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
         SimulatorBuilder simulatorBuilder;
@@ -1575,7 +1575,7 @@ int Mdrunner::mdrunner()
                     mdAtoms.get(), &nrnb, wcycle, fr,
                     &enerd,
                     &ekind,
-                    &mdScheduleWork,
+                    &runScheduleWork,
                     replExParams,
                     membed,
                     walltime_accounting,
index bd79c56b8f8c9060b28a3fda6d5ea9b9b4a51bb7..876e4a71f1378b1030829c7a8a4fa4b9f71dd0f8 100644 (file)
@@ -998,7 +998,7 @@ void relax_shell_flexcon(FILE                                     *fplog,
                          t_graph                                  *graph,
                          gmx_shellfc_t                            *shfc,
                          t_forcerec                               *fr,
-                         gmx::MdScheduleWorkload                  *mdScheduleWork,
+                         gmx::MdrunScheduleWorkload               *runScheduleWork,
                          double                                    t,
                          rvec                                      mu_tot,
                          const gmx_vsite_t                        *vsite,
@@ -1134,7 +1134,7 @@ void relax_shell_flexcon(FILE                                     *fplog,
              box, x, hist,
              forceWithPadding[Min], force_vir, md, enerd, fcd,
              lambda, graph,
-             fr, mdScheduleWork, vsite, mu_tot, t, nullptr,
+             fr, runScheduleWork, vsite, mu_tot, t, nullptr,
              (bDoNS ? GMX_FORCE_NS : 0) | shellfc_flags,
              ddBalanceRegionHandler);
 
@@ -1245,7 +1245,7 @@ void relax_shell_flexcon(FILE                                     *fplog,
                  top, box, posWithPadding[Try], hist,
                  forceWithPadding[Try], force_vir,
                  md, enerd, fcd, lambda, graph,
-                 fr, mdScheduleWork, vsite, mu_tot, t, nullptr,
+                 fr, runScheduleWork, vsite, mu_tot, t, nullptr,
                  shellfc_flags,
                  ddBalanceRegionHandler);
         sum_epot(&(enerd->grpp), enerd->term);
index 778288566962bf5ef83e9103213de919857458dc..6ec6dc99d8c23e6a1eed882d18212b3ec55e5dec 100644 (file)
@@ -62,7 +62,7 @@ namespace gmx
 {
 class Constraints;
 class ImdSession;
-class MdScheduleWorkload;
+class MdrunScheduleWorkload;
 }
 
 /* Initialization function, also predicts the initial shell postions.
@@ -102,7 +102,7 @@ void relax_shell_flexcon(FILE                                     *log,
                          t_graph                                  *graph,
                          gmx_shellfc_t                            *shfc,
                          t_forcerec                               *fr,
-                         gmx::MdScheduleWorkload                  *mdScheduleWork,
+                         gmx::MdrunScheduleWorkload               *runScheduleWork,
                          double                                    t,
                          rvec                                      mu_tot,
                          const gmx_vsite_t                        *vsite,
index 739664554ae7550a917b6e2a1c245029526d73e2..b7847bf93a5a417b751a16f80192e7c1f2ee6872 100644 (file)
@@ -74,7 +74,7 @@ namespace gmx
 enum class StartingBehavior;
 class BoxDeformation;
 class Constraints;
-class MdScheduleWorkload;
+class MdrunScheduleWorkload;
 class IMDOutputProvider;
 class ImdSession;
 class MDLogger;
index a72e3607b8dc38a537d38d3ad8d4b58427950aba..b7434f0529e18a1b924a1e885ba4e6a274449690 100644 (file)
@@ -754,7 +754,7 @@ LegacySimulator::do_tpi()
                      state_global->box, state_global->x.arrayRefWithPadding(), &state_global->hist,
                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
                      state_global->lambda,
-                     nullptr, fr, mdScheduleWork, nullptr, mu_tot, t, nullptr,
+                     nullptr, fr, runScheduleWork, nullptr, mu_tot, t, nullptr,
                      GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
                      (bStateChanged ? GMX_FORCE_STATECHANGED : 0),
                      DDBalanceRegionHandler(nullptr));
similarity index 60%
rename from src/gromacs/mdlib/ppforceworkload.h
rename to src/gromacs/mdtypes/simulation_workload.h
index 0ff7d990e166d9ffa3fee54e5f6713f332af3b43..7ae8cccab2cfbd2a7ec99bf21bdc6fca7ad38826 100644 (file)
  * the research papers on the package. Check out http://www.gromacs.org.
  */
 /*! \libinternal \file
- * \brief Declares force calculation workload manager.
+ * \brief Declares step, domain-lifetime, and run workload managers.
  *
  * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \author Szilárd Páll <pall.szilard@gmail.com>
  * \ingroup module_mdlib
  * \inlibraryapi
  */
-#ifndef GMX_MDLIB_PPFORCEWORKLOAD_H
-#define GMX_MDLIB_PPFORCEWORKLOAD_H
+#ifndef GMX_MDTYPES_SIMULATION_WORKLOAD_H
+#define GMX_MDTYPES_SIMULATION_WORKLOAD_H
 
 namespace gmx
 {
 
 /*! \libinternal
- * \brief Data structure to map force flags to booleans that have the role of
- *  directing per-step tasks undertaken by a PP rank.
+ * \brief Data structure that describes work that can change per-step.
  *
- * Note that the contents of this class have a lifetime of a single step and
- * are expected to be set every step.
+ * Note that the contents of an object of this type has a lifetime
+ * of a single step and it is expected to be set at the beginning each step.
+ *
+ * The initial set of flags map the legacy force flags to boolean flags;
+ * these have the role of directing per-step compute tasks undertaken by a PP rank.
  *
  */
-class ForceFlags
+class StepWorkload
 {
     public:
         //! Whether the state has changed, always set unless TPI is used.
@@ -77,49 +80,60 @@ class ForceFlags
 };
 
 /*! \libinternal
- * \brief Manage what force calculation work is required each step.
+ * \brief Manage computational work that has the lifetime of decomposition.
  *
- * An object of this type is updated every neighbour search stage to
- * reflect what work is required during normal MD steps, e.g. whether
- * there are bonded interactions in this PP task.
+ * An object of this type is updated every decomposition step
+ * (i.e. domain decomposition / neighbour search)
+ * reflecting what work is required during the lifetime of a domain.
+ * e.g. whether there are bonded interactions in this PP task.
  *
  * This will remove the desire for inline getters from modules that
  * describe whether they have work to do, because that can be set up
  * once per simulation or neighborlist lifetime and not changed
  * thereafter.
- *
- * \todo Add more responsibilities, including whether GPUs are in use,
- * whether there is PME work, whether DD is active, whether NB
- * local/nonlocal regions have work, whether forces/virial/energy are
- * required.
- *
- * TODO rename
  */
-class PpForceWorkload
+class DomainLifetimeWorkload
 {
     public:
-        //! Whether this MD step has bonded work to run on a GPU.
+        //! Whether the current nstlist step-range has bonded work to run on a GPU.
         bool haveGpuBondedWork = false;
-        //! Whether this MD step has bonded work to run on he CPU.
+        //! Whether the current nstlist step-range has bonded work to run on he CPU.
         bool haveCpuBondedWork = false;
-        //! Whether this MD step has restraints work to run on he CPU.
+        //! Whether the current nstlist step-range has restraints work to run on he CPU.
         bool haveRestraintsWork = false;
-        //! Whether this MD step has listed forces work to run on he CPU.
+        //! Whether the current nstlist step-range has listed forces work to run on he CPU.
         //  Note: currently this is haveCpuBondedWork | haveRestraintsWork
         bool haveCpuListedForceWork = false;
-        //! Whether this MD step has special forces on the CPU.
+        //! Whether the current nstlist step-range has special forces on the CPU.
         bool haveSpecialForces = false;
 };
 
-class MdScheduleWorkload
+/*! \libinternal
+ * \brief Manage what computation is required during the simulation.
+ *
+ * Holds information on the type of workload constant for the entire
+ * simulation.
+ *
+ * An object of this type is constructed at the beginning of the
+ * simulation and is expected to not change.
+ */
+class SimulationWorkload
+{
+};
+
+class MdrunScheduleWorkload
 {
     public:
-        //! Force schedule workload descriptor constant for an nstlist range
-        gmx::PpForceWorkload forceWork;
-        //! Force flags changing per-step
-        gmx::ForceFlags      forceFlags;
+        //! Workload descriptor for information constant for an entire run
+        gmx::SimulationWorkload     simulationWork;
+
+        //! Workload descriptor for information constant for an nstlist range of steps
+        gmx::DomainLifetimeWorkload domainWork;
+
+        //! Workload descriptor for information that may change per-step
+        gmx::StepWorkload           stepWork;
 };
 
-} // namespace gmx
+}      // namespace gmx
 
-#endif
+#endif // GMX_MDTYPES_SIMULATION_WORKLOAD_H
index 366a07539ebfe444eb4757c7d4d509d2efd85af2..0fb0ef019c535d710c0183b4ccae495c11cd7266 100644 (file)
@@ -60,21 +60,21 @@ struct t_graph;
 namespace gmx
 {
 ForceElement::ForceElement(
-        StatePropagatorData *statePropagatorData,
-        EnergyElement       *energyElement,
-        bool                 isDynamicBox,
-        FILE                *fplog,
-        const t_commrec     *cr,
-        const t_inputrec    *inputrec,
-        const MDAtoms       *mdAtoms,
-        t_nrnb              *nrnb,
-        t_forcerec          *fr,
-        t_fcdata            *fcd,
-        gmx_wallcycle       *wcycle,
-        MdScheduleWorkload  *mdScheduleWork,
-        gmx_vsite_t         *vsite,
-        ImdSession          *imdSession,
-        pull_t              *pull_work) :
+        StatePropagatorData   *statePropagatorData,
+        EnergyElement         *energyElement,
+        bool                   isDynamicBox,
+        FILE                  *fplog,
+        const t_commrec       *cr,
+        const t_inputrec      *inputrec,
+        const MDAtoms         *mdAtoms,
+        t_nrnb                *nrnb,
+        t_forcerec            *fr,
+        t_fcdata              *fcd,
+        gmx_wallcycle         *wcycle,
+        MdrunScheduleWorkload *runScheduleWork,
+        gmx_vsite_t           *vsite,
+        ImdSession            *imdSession,
+        pull_t                *pull_work) :
     nextNSStep_(-1),
     nextEnergyCalculationStep_(-1),
     nextVirialCalculationStep_(-1),
@@ -96,7 +96,7 @@ ForceElement::ForceElement(
     imdSession_(imdSession),
     pull_work_(pull_work),
     fcd_(fcd),
-    mdScheduleWork_(mdScheduleWork)
+    runScheduleWork_(runScheduleWork)
 {
     lambda_.fill(0);
 }
@@ -152,7 +152,7 @@ void ForceElement::run(Step step, Time time, unsigned int flags)
              box, x, hist,
              forces, force_vir, mdAtoms_->mdatoms(), energyElement_->enerdata(), fcd_,
              lambda_, graph,
-             fr_, mdScheduleWork_, vsite_, energyElement_->muTot(), time, ed,
+             fr_, runScheduleWork_, vsite_, energyElement_->muTot(), time, ed,
              static_cast<int>(flags), ddBalanceRegionHandler_);
     energyElement_->addToForceVirial(force_vir, step);
 }
index 86aca839f82de1ae38a1519a1519c428781868db..4334dfdaa4062b8a90e193bb9fa42ad99d0460a1 100644 (file)
@@ -62,7 +62,7 @@ class Awh;
 class EnergyElement;
 class ImdSession;
 class MDAtoms;
-class MdScheduleWorkload;
+class MdrunScheduleWorkload;
 class StatePropagatorData;
 
 //! \addtogroup module_modularsimulator
@@ -82,21 +82,21 @@ class ForceElement final :
     public:
         //! Constructor
         ForceElement(
-            StatePropagatorData *statePropagatorData,
-            EnergyElement       *energyElement,
-            bool                 isDynamicBox,
-            FILE                *fplog,
-            const t_commrec     *cr,
-            const t_inputrec    *inputrec,
-            const MDAtoms       *mdAtoms,
-            t_nrnb              *nrnb,
-            t_forcerec          *fr,
-            t_fcdata            *fcd,
-            gmx_wallcycle       *wcycle,
-            MdScheduleWorkload  *mdScheduleWork,
-            gmx_vsite_t         *vsite,
-            ImdSession          *imdSession,
-            pull_t              *pull_work);
+            StatePropagatorData   *statePropagatorData,
+            EnergyElement         *energyElement,
+            bool                   isDynamicBox,
+            FILE                  *fplog,
+            const t_commrec       *cr,
+            const t_inputrec      *inputrec,
+            const MDAtoms         *mdAtoms,
+            t_nrnb                *nrnb,
+            t_forcerec            *fr,
+            t_fcdata              *fcd,
+            gmx_wallcycle         *wcycle,
+            MdrunScheduleWorkload *runScheduleWork,
+            gmx_vsite_t           *vsite,
+            ImdSession            *imdSession,
+            pull_t                *pull_work);
 
         /*! \brief Register force calculation for step / time
          *
@@ -151,29 +151,29 @@ class ForceElement final :
 
         // Access to ISimulator data
         //! Handles logging.
-        FILE               *fplog_;
+        FILE                  *fplog_;
         //! Handles communication.
-        const t_commrec    *cr_;
+        const t_commrec       *cr_;
         //! Contains user input mdp options.
-        const t_inputrec   *inputrec_;
+        const t_inputrec      *inputrec_;
         //! Atom parameters for this domain.
-        const MDAtoms      *mdAtoms_;
+        const MDAtoms         *mdAtoms_;
         //! Manages flop accounting.
-        t_nrnb             *nrnb_;
+        t_nrnb                *nrnb_;
         //! Manages wall cycle accounting.
-        gmx_wallcycle      *wcycle_;
+        gmx_wallcycle         *wcycle_;
         //! Parameters for force calculations.
-        t_forcerec         *fr_;
+        t_forcerec            *fr_;
         //! Handles virtual sites.
-        gmx_vsite_t        *vsite_;
+        gmx_vsite_t           *vsite_;
         //! The Interactive Molecular Dynamics session.
-        ImdSession         *imdSession_;
+        ImdSession            *imdSession_;
         //! The pull work object.
-        pull_t             *pull_work_;
+        pull_t                *pull_work_;
         //! Helper struct for force calculations.
-        t_fcdata           *fcd_;
+        t_fcdata              *fcd_;
         //! Schedule of work for each MD step for this task.
-        MdScheduleWorkload *mdScheduleWork_;
+        MdrunScheduleWorkload *runScheduleWork_;
 };
 
 //! \}
index 64e2de68f31ca26d0ece32af4e0b9a34ee5eb654..885c3b204eba60135ae294ceff146b101711b0d4 100644 (file)
@@ -545,7 +545,7 @@ std::unique_ptr<ISimulatorElement> ModularSimulator::buildForces(
     {
         auto shellFCElement = std::make_unique<ShellFCElement>(
                     statePropagatorDataPtr, energyElementPtr, isVerbose, isDynamicBox, fplog,
-                    cr, inputrec, mdAtoms, nrnb, fr, fcd, wcycle, mdScheduleWork,
+                    cr, inputrec, mdAtoms, nrnb, fr, fcd, wcycle, runScheduleWork,
                     vsite, imdSession, pull_work, constr, &topologyHolder_->globalTopology());
         topologyHolder_->registerClient(shellFCElement.get());
         neighborSearchSignallerBuilder->registerSignallerClient(compat::make_not_null(shellFCElement.get()));
@@ -559,7 +559,7 @@ std::unique_ptr<ISimulatorElement> ModularSimulator::buildForces(
         auto forceElement = std::make_unique<ForceElement>(
                     statePropagatorDataPtr, energyElementPtr, isDynamicBox, fplog,
                     cr, inputrec, mdAtoms, nrnb, fr, fcd, wcycle,
-                    mdScheduleWork, vsite, imdSession, pull_work);
+                    runScheduleWork, vsite, imdSession, pull_work);
         topologyHolder_->registerClient(forceElement.get());
         neighborSearchSignallerBuilder->registerSignallerClient(compat::make_not_null(forceElement.get()));
         energySignallerBuilder->registerSignallerClient(compat::make_not_null(forceElement.get()));
index 7a82459f2de3ef96819db47e6954a429e3888533..d1e82a42799eb35efa7de0f14d8eb0e676b593d2 100644 (file)
@@ -76,24 +76,24 @@ bool ShellFCElement::doShellsOrFlexConstraints(
 }
 
 ShellFCElement::ShellFCElement(
-        StatePropagatorData *statePropagatorData,
-        EnergyElement       *energyElement,
-        bool                 isVerbose,
-        bool                 isDynamicBox,
-        FILE                *fplog,
-        const t_commrec     *cr,
-        const t_inputrec    *inputrec,
-        const MDAtoms       *mdAtoms,
-        t_nrnb              *nrnb,
-        t_forcerec          *fr,
-        t_fcdata            *fcd,
-        gmx_wallcycle       *wcycle,
-        MdScheduleWorkload  *mdScheduleWork,
-        gmx_vsite_t         *vsite,
-        ImdSession          *imdSession,
-        pull_t              *pull_work,
-        Constraints         *constr,
-        const gmx_mtop_t    *globalTopology) :
+        StatePropagatorData   *statePropagatorData,
+        EnergyElement         *energyElement,
+        bool                   isVerbose,
+        bool                   isDynamicBox,
+        FILE                  *fplog,
+        const t_commrec       *cr,
+        const t_inputrec      *inputrec,
+        const MDAtoms         *mdAtoms,
+        t_nrnb                *nrnb,
+        t_forcerec            *fr,
+        t_fcdata              *fcd,
+        gmx_wallcycle         *wcycle,
+        MdrunScheduleWorkload *runScheduleWork,
+        gmx_vsite_t           *vsite,
+        ImdSession            *imdSession,
+        pull_t                *pull_work,
+        Constraints           *constr,
+        const gmx_mtop_t      *globalTopology) :
     nextNSStep_(-1),
     nextEnergyCalculationStep_(-1),
     nextVirialCalculationStep_(-1),
@@ -116,7 +116,7 @@ ShellFCElement::ShellFCElement(
     imdSession_(imdSession),
     pull_work_(pull_work),
     fcd_(fcd),
-    mdScheduleWork_(mdScheduleWork),
+    runScheduleWork_(runScheduleWork),
     constr_(constr)
 {
     shellfc_ = init_shell_flexcon(
@@ -182,7 +182,7 @@ void ShellFCElement::run(Step step, Time time, unsigned int flags)
                         statePropagatorData_->localNumAtoms(),
                         x, v, box, lambda, hist, forces, force_vir,
                         mdAtoms_->mdatoms(), nrnb_, wcycle_, graph,
-                        shellfc_, fr_, mdScheduleWork_, time,
+                        shellfc_, fr_, runScheduleWork_, time,
                         energyElement_->muTot(), vsite_,
                         ddBalanceRegionHandler_);
     energyElement_->addToForceVirial(force_vir, step);
index 81cae5617974cffe990f9277783b0afa02978f40..4c6b225b2706d95bb969623251e67ef17070d70e 100644 (file)
@@ -59,7 +59,7 @@ class Awh;
 class EnergyElement;
 class ImdSession;
 class MDAtoms;
-class MdScheduleWorkload;
+class MdrunScheduleWorkload;
 class StatePropagatorData;
 
 //! \addtogroup module_modularsimulator
@@ -79,24 +79,24 @@ class ShellFCElement final :
     public:
         //! Constructor
         ShellFCElement(
-            StatePropagatorData *statePropagatorData,
-            EnergyElement       *energyElement,
-            bool                 isVerbose,
-            bool                 isDynamicBox,
-            FILE                *fplog,
-            const t_commrec     *cr,
-            const t_inputrec    *inputrec,
-            const MDAtoms       *mdAtoms,
-            t_nrnb              *nrnb,
-            t_forcerec          *fr,
-            t_fcdata            *fcd,
-            gmx_wallcycle       *wcycle,
-            MdScheduleWorkload  *mdScheduleWork,
-            gmx_vsite_t         *vsite,
-            ImdSession          *imdSession,
-            pull_t              *pull_work,
-            Constraints         *constr,
-            const gmx_mtop_t    *globalTopology);
+            StatePropagatorData   *statePropagatorData,
+            EnergyElement         *energyElement,
+            bool                   isVerbose,
+            bool                   isDynamicBox,
+            FILE                  *fplog,
+            const t_commrec       *cr,
+            const t_inputrec      *inputrec,
+            const MDAtoms         *mdAtoms,
+            t_nrnb                *nrnb,
+            t_forcerec            *fr,
+            t_fcdata              *fcd,
+            gmx_wallcycle         *wcycle,
+            MdrunScheduleWorkload *runScheduleWork,
+            gmx_vsite_t           *vsite,
+            ImdSession            *imdSession,
+            pull_t                *pull_work,
+            Constraints           *constr,
+            const gmx_mtop_t      *globalTopology);
 
         /*! \brief Register shell / flex constraint calculation for step / time
          *
@@ -158,31 +158,31 @@ class ShellFCElement final :
 
         // Access to ISimulator data
         //! Handles logging.
-        FILE               *fplog_;
+        FILE                  *fplog_;
         //! Handles communication.
-        const t_commrec    *cr_;
+        const t_commrec       *cr_;
         //! Contains user input mdp options.
-        const t_inputrec   *inputrec_;
+        const t_inputrec      *inputrec_;
         //! Atom parameters for this domain.
-        const MDAtoms      *mdAtoms_;
+        const MDAtoms         *mdAtoms_;
         //! Manages flop accounting.
-        t_nrnb             *nrnb_;
+        t_nrnb                *nrnb_;
         //! Manages wall cycle accounting.
-        gmx_wallcycle      *wcycle_;
+        gmx_wallcycle         *wcycle_;
         //! Parameters for force calculations.
-        t_forcerec         *fr_;
+        t_forcerec            *fr_;
         //! Handles virtual sites.
-        gmx_vsite_t        *vsite_;
+        gmx_vsite_t           *vsite_;
         //! The Interactive Molecular Dynamics session.
-        ImdSession         *imdSession_;
+        ImdSession            *imdSession_;
         //! The pull work object.
-        pull_t             *pull_work_;
+        pull_t                *pull_work_;
         //! Helper struct for force calculations.
-        t_fcdata           *fcd_;
+        t_fcdata              *fcd_;
         //! Schedule of work for each MD step for this task.
-        MdScheduleWorkload *mdScheduleWork_;
+        MdrunScheduleWorkload *runScheduleWork_;
         //! Handles constraints.
-        Constraints        *constr_;
+        Constraints           *constr_;
 };
 
 //! \}
index a7bf5d59f0ba84624069682d44e54b2b26618745..941772cae0f820a13b93f7aca6f3cff946e3e4f1 100644 (file)
 #include "gromacs/mdlib/force_flags.h"
 #include "gromacs/mdlib/forcerec.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/enerdata.h"
 #include "gromacs/mdtypes/forcerec.h"
 #include "gromacs/mdtypes/interaction_const.h"
 #include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/gridset.h"
 #include "gromacs/nbnxm/nbnxm.h"
@@ -298,12 +298,12 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system,
 
     gmx_enerdata_t        enerd(1, 0);
 
-    gmx::ForceFlags       forceFlags;
-    forceFlags.computeForces = true;
+    gmx::StepWorkload     stepWork;
+    stepWork.computeForces = true;
     if (options.computeVirialAndEnergy)
     {
-        forceFlags.computeVirial = true;
-        forceFlags.computeEnergy = true;
+        stepWork.computeVirial = true;
+        stepWork.computeEnergy = true;
     }
 
     const gmx::EnumerationArray<BenchMarkKernels, std::string>  kernelNames = { "auto", "no", "4xM", "2xMM" };
@@ -323,7 +323,7 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system,
     for (int iter = 0; iter < options.numPreIterations; iter++)
     {
         nbv->dispatchNonbondedKernel(InteractionLocality::Local,
-                                     ic, forceFlags, enbvClearFYes, system.forceRec,
+                                     ic, stepWork, enbvClearFYes, system.forceRec,
                                      &enerd,
                                      &nrnb);
     }
@@ -336,7 +336,7 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system,
     {
         // Run the kernel without force clearing
         nbv->dispatchNonbondedKernel(InteractionLocality::Local,
-                                     ic, forceFlags, enbvClearFNo, system.forceRec,
+                                     ic, stepWork, enbvClearFNo, system.forceRec,
                                      &enerd,
                                      &nrnb);
     }
index 05c0278abc9c71878138f137e0efc2d4819d07fa..10a6f0a0d37b5f1c248869b89188ddf242c0e406 100644 (file)
@@ -56,7 +56,7 @@
 #include "gromacs/gpu_utils/cudautils.cuh"
 #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
 #include "gromacs/gpu_utils/vectype_ops.cuh"
-#include "gromacs/mdlib/ppforceworkload.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/gpu_common.h"
 #include "gromacs/nbnxm/gpu_common_utils.h"
@@ -402,7 +402,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t       *nb,
    with this event in the non-local stream before launching the non-bonded kernel.
  */
 void gpu_launch_kernel(gmx_nbnxn_cuda_t          *nb,
-                       const gmx::ForceFlags     &forceFlags,
+                       const gmx::StepWorkload   &stepWork,
                        const InteractionLocality  iloc)
 {
     cu_atomdata_t       *adat    = nb->atdat;
@@ -485,10 +485,10 @@ void gpu_launch_kernel(gmx_nbnxn_cuda_t          *nb,
     auto       *timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
     const auto  kernel      = select_nbnxn_kernel(nbp->eeltype,
                                                   nbp->vdwtype,
-                                                  forceFlags.computeEnergy,
+                                                  stepWork.computeEnergy,
                                                   (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
                                                   nb->dev_info);
-    const auto kernelArgs  = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &forceFlags.computeVirial);
+    const auto kernelArgs  = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
     launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs);
 
     if (bDoTime)
@@ -640,11 +640,11 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t          *nb,
     }
 }
 
-void gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
-                        nbnxn_atomdata_t       *nbatom,
-                        const gmx::ForceFlags  &forceFlags,
-                        const AtomLocality      atomLocality,
-                        const bool              copyBackNbForce)
+void gpu_launch_cpyback(gmx_nbnxn_cuda_t        *nb,
+                        nbnxn_atomdata_t        *nbatom,
+                        const gmx::StepWorkload &stepWork,
+                        const AtomLocality       atomLocality,
+                        const bool               copyBackNbForce)
 {
     GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
 
@@ -703,14 +703,14 @@ void gpu_launch_cpyback(gmx_nbnxn_cuda_t       *nb,
     if (iloc == InteractionLocality::Local)
     {
         /* DtoH fshift when virial is needed */
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             cu_copy_D2H_async(nb->nbst.fshift, adat->fshift,
                               SHIFTS * sizeof(*nb->nbst.fshift), stream);
         }
 
         /* DtoH energies */
-        if (forceFlags.computeEnergy)
+        if (stepWork.computeEnergy)
         {
             cu_copy_D2H_async(nb->nbst.e_lj, adat->e_lj,
                               sizeof(*nb->nbst.e_lj), stream);
index 599c97edd4d249c5d9980b9abc601a1baf2205ce..5e670d4e1673872c4ef1cdaa69ae42f84b864e36 100644 (file)
@@ -58,7 +58,7 @@
 #include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/listed_forces/gpubonded.h"
 #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/ppforceworkload.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/timing/gpu_timing.h"
@@ -367,7 +367,7 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings,
 //TODO: move into shared source file with gmx_compile_cpp_as_cuda
 //NOLINTNEXTLINE(misc-definitions-in-headers)
 bool gpu_try_finish_task(gmx_nbnxn_gpu_t          *nb,
-                         const gmx::ForceFlags    &forceFlags,
+                         const gmx::StepWorkload  &stepWork,
                          const AtomLocality        aloc,
                          real                     *e_lj,
                          real                     *e_el,
@@ -410,10 +410,10 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t          *nb,
             gpuStreamSynchronize(nb->stream[iLocality]);
         }
 
-        gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, forceFlags.computeEnergy,
+        gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, stepWork.computeEnergy,
                                nb->bDoTime != 0);
 
-        gpu_reduce_staged_outputs(nb->nbst, iLocality, forceFlags.computeEnergy, forceFlags.computeVirial,
+        gpu_reduce_staged_outputs(nb->nbst, iLocality, stepWork.computeEnergy, stepWork.computeVirial,
                                   e_lj, e_el, as_rvec_array(shiftForces.data()));
     }
 
@@ -435,7 +435,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t          *nb,
  * pruning flags.
  *
  * \param[in] nb The nonbonded data GPU structure
- * \param[in]  forceFlags     Force schedule flags
+ * \param[in]  stepWork     Force schedule flags
  * \param[in] aloc Atom locality identifier
  * \param[out] e_lj Pointer to the LJ energy output to accumulate into
  * \param[out] e_el Pointer to the electrostatics energy output to accumulate into
@@ -445,7 +445,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t          *nb,
  */
 //NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
 float gpu_wait_finish_task(gmx_nbnxn_gpu_t         *nb,
-                           const gmx::ForceFlags   &forceFlags,
+                           const gmx::StepWorkload &stepWork,
                            AtomLocality             aloc,
                            real                    *e_lj,
                            real                    *e_el,
@@ -456,7 +456,7 @@ float gpu_wait_finish_task(gmx_nbnxn_gpu_t         *nb,
         (gpuAtomToInteractionLocality(aloc) == InteractionLocality::Local) ? ewcWAIT_GPU_NB_L : ewcWAIT_GPU_NB_NL;
 
     wallcycle_start(wcycle, cycleCounter);
-    gpu_try_finish_task(nb, forceFlags, aloc, e_lj, e_el, shiftForces,
+    gpu_try_finish_task(nb, stepWork, aloc, e_lj, e_el, shiftForces,
                         GpuTaskCompletion::Wait, wcycle);
     float waitTime = wallcycle_stop(wcycle, cycleCounter);
 
index 2d53a92dc752691fca58e0ee902d8f577a22c68e..24d693b47a88576ca2148c3023a234ed4c1f13c3 100644 (file)
 #include "gromacs/mdlib/enerdata_utils.h"
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/enerdata.h"
 #include "gromacs/mdtypes/forceoutput.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/interaction_const.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/nbnxm/nbnxm_simd.h"
@@ -143,7 +143,7 @@ reduceGroupEnergySimdBuffers(int                       numGroups,
  * \param[in,out] nbat          The atomdata for the interactions
  * \param[in]     ic            Non-bonded interaction constants
  * \param[in]     shiftVectors  The PBC shift vectors
- * \param[in]     forceFlags    Flags that tell what to compute
+ * \param[in]     stepWork      Flags that tell what to compute
  * \param[in]     clearF        Enum that tells if to clear the force output buffer
  * \param[out]    vCoulomb      Output buffer for Coulomb energies
  * \param[out]    vVdw          Output buffer for Van der Waals energies
@@ -155,7 +155,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
                  nbnxn_atomdata_t               *nbat,
                  const interaction_const_t      &ic,
                  rvec                           *shiftVectors,
-                 const gmx::ForceFlags          &forceFlags,
+                 const gmx::StepWorkload        &stepWork,
                  int                             clearF,
                  real                           *vCoulomb,
                  real                           *vVdw,
@@ -266,7 +266,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
         // TODO: Change to reference
         const NbnxnPairlistCpu *pairlist = &pairlists[nb];
 
-        if (!forceFlags.computeEnergy)
+        if (!stepWork.computeEnergy)
         {
             /* Don't calculate energies */
             switch (kernelSetup.kernelType)
@@ -396,7 +396,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
     }
     wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL);
 
-    if (forceFlags.computeEnergy)
+    if (stepWork.computeEnergy)
     {
         reduce_energies_over_lists(nbat, pairlists.ssize(), vVdw, vCoulomb);
     }
@@ -406,7 +406,7 @@ static void accountFlops(t_nrnb                           *nrnb,
                          const PairlistSet                &pairlistSet,
                          const nonbonded_verlet_t         &nbv,
                          const interaction_const_t        &ic,
-                         const gmx::ForceFlags            &forceFlags)
+                         const gmx::StepWorkload          &stepWork)
 {
     const bool usingGpuKernels = nbv.useGpu();
 
@@ -425,7 +425,7 @@ static void accountFlops(t_nrnb                           *nrnb,
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
     }
     int enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
-    if (forceFlags.computeEnergy)
+    if (stepWork.computeEnergy)
     {
         /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
         enr_nbnxn_kernel_ljc += 1;
@@ -443,19 +443,19 @@ static void accountFlops(t_nrnb                           *nrnb,
     if (ic.vdw_modifier == eintmodFORCESWITCH)
     {
         /* We add up the switch cost separately */
-        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (forceFlags.computeEnergy ? 1 : 0),
+        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (stepWork.computeEnergy ? 1 : 0),
                  pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_);
     }
     if (ic.vdw_modifier == eintmodPOTSWITCH)
     {
         /* We add up the switch cost separately */
-        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (forceFlags.computeEnergy ? 1 : 0),
+        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (stepWork.computeEnergy ? 1 : 0),
                  pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_);
     }
     if (ic.vdwtype == evdwPME)
     {
         /* We add up the LJ Ewald cost separately */
-        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (forceFlags.computeEnergy ? 1 : 0),
+        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (stepWork.computeEnergy ? 1 : 0),
                  pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_);
     }
 }
@@ -463,7 +463,7 @@ static void accountFlops(t_nrnb                           *nrnb,
 void
 nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
                                             const interaction_const_t &ic,
-                                            const gmx::ForceFlags     &forceFlags,
+                                            const gmx::StepWorkload   &stepWork,
                                             int                        clearF,
                                             const t_forcerec          &fr,
                                             gmx_enerdata_t            *enerd,
@@ -481,7 +481,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
                              nbat.get(),
                              ic,
                              fr.shift_vec,
-                             forceFlags,
+                             stepWork,
                              clearF,
                              enerd->grpp.ener[egCOULSR].data(),
                              fr.bBHAM ?
@@ -491,14 +491,14 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
             break;
 
         case Nbnxm::KernelType::Gpu8x8x8:
-            Nbnxm::gpu_launch_kernel(gpu_nbv, forceFlags, iLocality);
+            Nbnxm::gpu_launch_kernel(gpu_nbv, stepWork, iLocality);
             break;
 
         case Nbnxm::KernelType::Cpu8x8x8_PlainC:
             nbnxn_kernel_gpu_ref(pairlistSet.gpuList(),
                                  nbat.get(), &ic,
                                  fr.shift_vec,
-                                 forceFlags,
+                                 stepWork,
                                  clearF,
                                  nbat->out[0].f,
                                  nbat->out[0].fshift.data(),
@@ -513,7 +513,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
 
     }
 
-    accountFlops(nrnb, pairlistSet, *this, ic, forceFlags);
+    accountFlops(nrnb, pairlistSet, *this, ic, stepWork);
 }
 
 void
@@ -525,7 +525,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
                                              t_lambda                   *fepvals,
                                              real                       *lambda,
                                              gmx_enerdata_t             *enerd,
-                                             const gmx::ForceFlags      &forceFlags,
+                                             const gmx::StepWorkload    &stepWork,
                                              t_nrnb                     *nrnb)
 {
     const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists();
@@ -541,15 +541,15 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
     donb_flags |= GMX_NONBONDED_DO_SR;
 
     /* Currently all group scheme kernels always calculate (shift-)forces */
-    if (forceFlags.computeForces)
+    if (stepWork.computeForces)
     {
         donb_flags |= GMX_NONBONDED_DO_FORCE;
     }
-    if (forceFlags.computeVirial)
+    if (stepWork.computeVirial)
     {
         donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
     }
-    if (forceFlags.computeEnergy)
+    if (stepWork.computeEnergy)
     {
         donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
     }
@@ -592,7 +592,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
     /* If we do foreign lambda and we have soft-core interactions
      * we have to recalculate the (non-linear) energies contributions.
      */
-    if (fepvals->n_lambda > 0 && forceFlags.computeDhdl && fepvals->sc_alpha != 0)
+    if (fepvals->n_lambda > 0 && stepWork.computeDhdl && fepvals->sc_alpha != 0)
     {
         real lam_i[efptNR];
         kernel_data.flags          = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
index 16a439711295b359e5276ee0508ad8462e0b13a4..d9489ec746205252b2fd72167f24defa4bf8fdc5 100644 (file)
@@ -43,8 +43,8 @@
 #include "gromacs/math/functions.h"
 #include "gromacs/math/utilities.h"
 #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/nbnxm/pairlist.h"
@@ -59,7 +59,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                      const nbnxn_atomdata_t     *nbat,
                      const interaction_const_t  *iconst,
                      rvec                       *shift_vec,
-                     const gmx::ForceFlags      &forceFlags,
+                     const gmx::StepWorkload    &stepWork,
                      int                         clearF,
                      gmx::ArrayRef<real>         f,
                      real  *                     fshift,
@@ -262,7 +262,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                                     /* Reaction-field */
                                     krsq  = iconst->k_rf*rsq;
                                     fscal = qq*(int_bit*rinv - 2*krsq)*rinvsq;
-                                    if (forceFlags.computeEnergy)
+                                    if (stepWork.computeEnergy)
                                     {
                                         vcoul = qq*(int_bit*rinv + krsq - iconst->c_rf);
                                     }
@@ -278,7 +278,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
 
                                     fscal = qq*(int_bit*rinvsq - fexcl)*rinv;
 
-                                    if (forceFlags.computeEnergy)
+                                    if (stepWork.computeEnergy)
                                     {
                                         vcoul = qq*((int_bit - std::erf(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald);
                                     }
@@ -297,7 +297,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                                     Vvdw_rep  = c12*rinvsix*rinvsix;
                                     fscal    += (Vvdw_rep - Vvdw_disp)*rinvsq;
 
-                                    if (forceFlags.computeEnergy)
+                                    if (stepWork.computeEnergy)
                                     {
                                         vctot   += vcoul;
 
@@ -347,7 +347,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
             }
         }
 
-        if (forceFlags.computeEnergy)
+        if (stepWork.computeEnergy)
         {
             ggid             = 0;
             Vc[ggid]         = Vc[ggid]   + vctot;
index d26747e0c7e5664d67d3755f88c2718fcd7e56db..c508f1e34c5b00f1b323a4f57e0f9a1b957f1988 100644 (file)
@@ -46,7 +46,7 @@ struct nbnxn_atomdata_t;
 
 namespace gmx
 {
-class ForceFlags;
+class StepWorkload;
 }
 
 /* Reference (slow) kernel for nb n vs n GPU type pair lists */
@@ -55,7 +55,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                      const nbnxn_atomdata_t     *nbat,
                      const interaction_const_t  *iconst,
                      rvec                       *shift_vec,
-                     const gmx::ForceFlags      &forceFlags,
+                     const gmx::StepWorkload    &stepWork,
                      int                         clearF,
                      gmx::ArrayRef<real>         f,
                      real  *                     fshift,
index caa21d9c7f0a4a72e4e09ee14db334c64a354fd3..c6d12d2e87231a6d4f14c7b19e676b22ce371a5f 100644 (file)
@@ -329,7 +329,7 @@ struct nonbonded_verlet_t
         //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
         void dispatchNonbondedKernel(Nbnxm::InteractionLocality  iLocality,
                                      const interaction_const_t  &ic,
-                                     const gmx::ForceFlags      &forceFlags,
+                                     const gmx::StepWorkload    &stepWork,
                                      int                         clearF,
                                      const t_forcerec           &fr,
                                      gmx_enerdata_t             *enerd,
@@ -344,7 +344,7 @@ struct nonbonded_verlet_t
                                       t_lambda                   *fepvals,
                                       real                       *lambda,
                                       gmx_enerdata_t             *enerd,
-                                      const gmx::ForceFlags      &forceFlags,
+                                      const gmx::StepWorkload    &stepWork,
                                       t_nrnb                     *nrnb);
 
         /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat
index 635b9d97908bd23068f75c7f450825763b4e926f..fef2e749bc6d21d235dae9fced165031d4ee7d91 100644 (file)
@@ -59,7 +59,7 @@ enum class GpuTaskCompletion;
 namespace gmx
 {
 class GpuBonded;
-class ForceFlags;
+class StepWorkload;
 }
 
 namespace Nbnxm
@@ -93,9 +93,9 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused               *nb,
  *
  */
 GPU_FUNC_QUALIFIER
-void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused      *nb,
-                       const gmx::ForceFlags gmx_unused &forceFlags,
-                       InteractionLocality gmx_unused    iloc) GPU_FUNC_TERM;
+void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused         *nb,
+                       const gmx::StepWorkload gmx_unused &stepWork,
+                       InteractionLocality gmx_unused      iloc) GPU_FUNC_TERM;
 
 /*! \brief
  * Launch asynchronously the nonbonded prune-only kernel.
@@ -142,11 +142,11 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused     *nb,
  * (and energies/shift forces if required).
  */
 GPU_FUNC_QUALIFIER
-void gpu_launch_cpyback(gmx_nbnxn_gpu_t       gmx_unused *nb,
-                        nbnxn_atomdata_t      gmx_unused *nbatom,
-                        const gmx::ForceFlags gmx_unused  &forceFlags,
-                        AtomLocality          gmx_unused  aloc,
-                        bool                  gmx_unused  copyBackNbForce) GPU_FUNC_TERM;
+void gpu_launch_cpyback(gmx_nbnxn_gpu_t         gmx_unused *nb,
+                        nbnxn_atomdata_t        gmx_unused *nbatom,
+                        const gmx::StepWorkload gmx_unused &stepWork,
+                        AtomLocality            gmx_unused aloc,
+                        bool                    gmx_unused copyBackNbForce) GPU_FUNC_TERM;
 
 /*! \brief Attempts to complete nonbonded GPU task.
  *
@@ -176,7 +176,7 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t       gmx_unused *nb,
  *  the energy and Fshift contributions for some external/centralized reduction.
  *
  * \param[in]  nb             The nonbonded data GPU structure
- * \param[in]  forceFlags     Force schedule flags
+ * \param[in]  stepWork       Step schedule flags
  * \param[in]  aloc           Atom locality identifier
  * \param[out] e_lj           Pointer to the LJ energy output to accumulate into
  * \param[out] e_el           Pointer to the electrostatics energy output to accumulate into
@@ -186,8 +186,8 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t       gmx_unused *nb,
  * \returns                   True if the nonbonded tasks associated with \p aloc locality have completed
  */
 GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused           *nb,
-                         const gmx::ForceFlags gmx_unused     &forceFlags,
+bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused          *nb,
+                         const gmx::StepWorkload gmx_unused  &stepWork,
                          AtomLocality    gmx_unused           aloc,
                          real            gmx_unused          *e_lj,
                          real            gmx_unused          *e_el,
@@ -203,7 +203,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused           *nb,
  * pruning flags.
  *
  * \param[in] nb The nonbonded data GPU structure
- * \param[in]  forceFlags     Force schedule flags
+ * \param[in]  stepWork        Step schedule flags
  * \param[in] aloc Atom locality identifier
  * \param[out] e_lj Pointer to the LJ energy output to accumulate into
  * \param[out] e_el Pointer to the electrostatics energy output to accumulate into
@@ -211,7 +211,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused           *nb,
  */
 GPU_FUNC_QUALIFIER
 float gpu_wait_finish_task(gmx_nbnxn_gpu_t          gmx_unused *nb,
-                           const gmx::ForceFlags    gmx_unused &forceFlags,
+                           const gmx::StepWorkload  gmx_unused &stepWork,
                            AtomLocality             gmx_unused  aloc,
                            real                     gmx_unused *e_lj,
                            real                     gmx_unused *e_el,
index 1a0fbd77051b01efd2e51cc8865385366c76093d..634801dc444b5930127888e7e9393676ded9e649 100644 (file)
@@ -72,7 +72,7 @@
 #include "gromacs/gpu_utils/gputraits_ocl.h"
 #include "gromacs/gpu_utils/oclutils.h"
 #include "gromacs/hardware/hw_info.h"
-#include "gromacs/mdlib/ppforceworkload.h"
+#include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/gpu_common.h"
 #include "gromacs/nbnxm/gpu_common_utils.h"
@@ -468,7 +468,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t        *nb,
    are finished and synchronize with this event in the non-local stream.
  */
 void gpu_launch_kernel(gmx_nbnxn_ocl_t                  *nb,
-                       const gmx::ForceFlags            &forceFlags,
+                       const gmx::StepWorkload          &stepWork,
                        const Nbnxm::InteractionLocality  iloc)
 {
     cl_atomdata_t       *adat    = nb->atdat;
@@ -546,13 +546,13 @@ void gpu_launch_kernel(gmx_nbnxn_ocl_t                  *nb,
     const auto     kernel       = select_nbnxn_kernel(nb,
                                                       nbp->eeltype,
                                                       nbp->vdwtype,
-                                                      forceFlags.computeEnergy,
+                                                      stepWork.computeEnergy,
                                                       (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune));
 
 
     // The OpenCL kernel takes int as second to last argument because bool is
     // not supported as a kernel argument type (sizeof(bool) is implementation defined).
-    const int computeFshift = static_cast<int>(forceFlags.computeVirial);
+    const int computeFshift = static_cast<int>(stepWork.computeVirial);
     if (useLjCombRule(nb->nbparam->vdwtype))
     {
         const auto kernelArgs = prepareGpuKernelArguments(kernel, config,
@@ -734,7 +734,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t           *nb,
  */
 void gpu_launch_cpyback(gmx_nbnxn_ocl_t                          *nb,
                         struct nbnxn_atomdata_t                  *nbatom,
-                        const gmx::ForceFlags                    &forceFlags,
+                        const gmx::StepWorkload                  &stepWork,
                         const AtomLocality                        aloc,
                         const bool                     gmx_unused copyBackNbForce)
 {
@@ -804,14 +804,14 @@ void gpu_launch_cpyback(gmx_nbnxn_ocl_t                          *nb,
     if (iloc == InteractionLocality::Local)
     {
         /* DtoH fshift when virial is needed */
-        if (forceFlags.computeVirial)
+        if (stepWork.computeVirial)
         {
             ocl_copy_D2H_async(nb->nbst.fshift, adat->fshift, 0,
                                SHIFTS * adat->fshift_elem_size, stream, bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
         }
 
         /* DtoH energies */
-        if (forceFlags.computeEnergy)
+        if (stepWork.computeEnergy)
         {
             ocl_copy_D2H_async(nb->nbst.e_lj, adat->e_lj, 0,
                                sizeof(float), stream, bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);