From 9ee5f77f56ecdbaa9e57cf65e40631f7b6065af0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Fri, 13 Sep 2019 02:02:45 +0200 Subject: [PATCH] Set up workload data structures This change estabilishes three data structures for describing the computational workload within a run. Flags related to the workload are categorized in three groups based on their lifetime: - per-step flags in the StepWorkload class - domain liftime / nslist steps in DomainLifetimeWorkload - constant over the entire simulation in SimulationWorkload The present change only introduces naming by: renaming of the former ForceFlags to StepWorkload and PpForceWorkload to DomainLifetimeWorkload. Moving flags from e.g. the force schedule into these will follow. Change-Id: I2bcc911091e2dd8ca0bcbf53a40dfcda09ba368b --- src/gromacs/ewald/pme_gpu.cpp | 1 - src/gromacs/listed_forces/gpubonded.h | 8 +- src/gromacs/listed_forces/gpubonded_impl.cpp | 2 +- src/gromacs/listed_forces/gpubondedkernels.cu | 12 +- src/gromacs/listed_forces/listed_forces.cpp | 112 +++---- src/gromacs/listed_forces/listed_forces.h | 6 +- src/gromacs/listed_forces/pairs.cpp | 8 +- src/gromacs/listed_forces/pairs.h | 4 +- src/gromacs/mdlib/force.cpp | 14 +- src/gromacs/mdlib/force.h | 8 +- src/gromacs/mdlib/sim_util.cpp | 290 +++++++++--------- src/gromacs/mdrun/isimulator.h | 8 +- src/gromacs/mdrun/md.cpp | 4 +- src/gromacs/mdrun/mimic.cpp | 4 +- src/gromacs/mdrun/minimize.cpp | 16 +- src/gromacs/mdrun/rerun.cpp | 4 +- src/gromacs/mdrun/runner.cpp | 6 +- src/gromacs/mdrun/shellfc.cpp | 6 +- src/gromacs/mdrun/shellfc.h | 4 +- src/gromacs/mdrun/simulatorbuilder.h | 2 +- src/gromacs/mdrun/tpi.cpp | 2 +- .../simulation_workload.h} | 78 +++-- src/gromacs/modularsimulator/forceelement.cpp | 34 +- src/gromacs/modularsimulator/forceelement.h | 56 ++-- .../modularsimulator/modularsimulator.cpp | 4 +- .../modularsimulator/shellfcelement.cpp | 40 +-- src/gromacs/modularsimulator/shellfcelement.h | 64 ++-- src/gromacs/nbnxm/benchmark/bench_setup.cpp | 14 +- src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 22 +- src/gromacs/nbnxm/gpu_common.h | 14 +- src/gromacs/nbnxm/kerneldispatch.cpp | 40 +-- .../kernels_reference/kernel_gpu_ref.cpp | 12 +- .../nbnxm/kernels_reference/kernel_gpu_ref.h | 4 +- src/gromacs/nbnxm/nbnxm.h | 4 +- src/gromacs/nbnxm/nbnxm_gpu.h | 28 +- src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp | 14 +- 36 files changed, 481 insertions(+), 468 deletions(-) rename src/gromacs/{mdlib/ppforceworkload.h => mdtypes/simulation_workload.h} (60%) diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp index ed7cd19f2e..e61121d498 100644 --- a/src/gromacs/ewald/pme_gpu.cpp +++ b/src/gromacs/ewald/pme_gpu.cpp @@ -51,7 +51,6 @@ #include "gromacs/fft/parallel_3dfft.h" #include "gromacs/math/invertmatrix.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/enerdata.h" #include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/inputrec.h" diff --git a/src/gromacs/listed_forces/gpubonded.h b/src/gromacs/listed_forces/gpubonded.h index b7a13fcae2..8fe7b49021 100644 --- a/src/gromacs/listed_forces/gpubonded.h +++ b/src/gromacs/listed_forces/gpubonded.h @@ -65,7 +65,7 @@ struct gmx_wallcycle; namespace gmx { -class ForceFlags; +class StepWorkload; /*! \brief The number on bonded function types supported on GPUs */ static constexpr int numFTypesOnGpu = 8; @@ -137,9 +137,9 @@ class GpuBonded * assigned to the GPU */ bool haveInteractions() const; /*! \brief Launches bonded kernel on a GPU */ - void launchKernel(const t_forcerec *fr, - const gmx::ForceFlags &forceFlags, - const matrix box); + void launchKernel(const t_forcerec *fr, + const gmx::StepWorkload &stepWork, + const matrix box); /*! \brief Launches the transfer of computed bonded energies. */ void launchEnergyTransfer(); /*! \brief Waits on the energy transfer, and accumulates bonded energies to \c enerd. */ diff --git a/src/gromacs/listed_forces/gpubonded_impl.cpp b/src/gromacs/listed_forces/gpubonded_impl.cpp index 3de01e3055..13a7d37283 100644 --- a/src/gromacs/listed_forces/gpubonded_impl.cpp +++ b/src/gromacs/listed_forces/gpubonded_impl.cpp @@ -190,7 +190,7 @@ GpuBonded::haveInteractions() const void GpuBonded::launchKernel(const t_forcerec * /* fr */, - const gmx::ForceFlags & /* forceFlags */, + const gmx::StepWorkload & /* stepWork */, const matrix /* box */) { } diff --git a/src/gromacs/listed_forces/gpubondedkernels.cu b/src/gromacs/listed_forces/gpubondedkernels.cu index 709d99cc9a..e78b32c6fd 100644 --- a/src/gromacs/listed_forces/gpubondedkernels.cu +++ b/src/gromacs/listed_forces/gpubondedkernels.cu @@ -57,8 +57,8 @@ #include "gromacs/listed_forces/gpubonded.h" #include "gromacs/math/units.h" #include "gromacs/mdlib/force_flags.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/pbcutil/pbc.h" #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh" #include "gromacs/utility/gmxassert.h" @@ -862,17 +862,17 @@ GpuBonded::Impl::launchKernel(const t_forcerec *fr, } void -GpuBonded::launchKernel(const t_forcerec *fr, - const gmx::ForceFlags &forceFlags, - const matrix box) +GpuBonded::launchKernel(const t_forcerec *fr, + const gmx::StepWorkload &stepWork, + const matrix box) { - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { // When we need the energy, we also need the virial impl_->launchKernel (fr, box); } - else if (forceFlags.computeVirial) + else if (stepWork.computeVirial) { impl_->launchKernel (fr, box); diff --git a/src/gromacs/listed_forces/listed_forces.cpp b/src/gromacs/listed_forces/listed_forces.cpp index 9e6e7001b7..9a469f8f31 100644 --- a/src/gromacs/listed_forces/listed_forces.cpp +++ b/src/gromacs/listed_forces/listed_forces.cpp @@ -60,12 +60,12 @@ #include "gromacs/math/vec.h" #include "gromacs/mdlib/enerdata_utils.h" #include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/commrec.h" #include "gromacs/mdtypes/fcdata.h" #include "gromacs/mdtypes/forcerec.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/pbc.h" #include "gromacs/timing/wallcycle.h" @@ -199,7 +199,7 @@ void reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces, real *ener, gmx_grppairener_t *grpp, real *dvdl, const bonded_threading_t *bt, - const gmx::ForceFlags &forceFlags) + const gmx::StepWorkload &stepWork) { assert(bt->haveBondeds); @@ -212,12 +212,12 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces, rvec * gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data()); /* When necessary, reduce energy and virial using one thread only */ - if ((forceFlags.computeEnergy || forceFlags.computeVirial || forceFlags.computeDhdl) && + if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1) { gmx::ArrayRef < const std::unique_ptr < f_thread_t>> f_t = bt->f_t; - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { for (int i = 0; i < SHIFTS; i++) { @@ -227,7 +227,7 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces, } } } - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { for (int i = 0; i < F_NRE; i++) { @@ -247,7 +247,7 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces, } } } - if (forceFlags.computeDhdl) + if (stepWork.computeDhdl) { for (int i = 0; i < efptNR; i++) { @@ -268,14 +268,14 @@ reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces, * Note that currently we do not have bonded kernels that * do not compute forces. */ -BondedKernelFlavor selectBondedKernelFlavor(const gmx::ForceFlags &forceFlags, - const bool useSimdKernels, - const bool havePerturbedInteractions) +BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload &stepWork, + const bool useSimdKernels, + const bool havePerturbedInteractions) { BondedKernelFlavor flavor; - if (forceFlags.computeEnergy || forceFlags.computeVirial) + if (stepWork.computeEnergy || stepWork.computeVirial) { - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy; } @@ -312,7 +312,7 @@ calc_one_bond(int thread, t_nrnb *nrnb, const real *lambda, real *dvdl, const t_mdatoms *md, t_fcdata *fcd, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, int *global_atom_index) { GMX_ASSERT(idef->ilsort == ilsortNO_FE || idef->ilsort == ilsortFE_SORTED, @@ -322,7 +322,7 @@ calc_one_bond(int thread, (idef->ilsort == ilsortFE_SORTED && idef->il[ftype].nr_nonperturbed < idef->il[ftype].nr); BondedKernelFlavor flavor = - selectBondedKernelFlavor(forceFlags, fr->use_simd_kernels, havePerturbedInteractions); + selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions); int efptFTYPE; if (IS_RESTRAINT_TYPE(ftype)) { @@ -375,7 +375,7 @@ calc_one_bond(int thread, extended to support calling from multiple threads. */ do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift, pbc, g, lambda, dvdl, md, fr, - havePerturbedInteractions, forceFlags, + havePerturbedInteractions, stepWork, grpp, global_atom_index); } @@ -392,20 +392,20 @@ calc_one_bond(int thread, /*! \brief Compute the bonded part of the listed forces, parallelized over threads */ static void -calcBondedForces(const t_idef *idef, - const rvec x[], - const t_forcerec *fr, - const t_pbc *pbc_null, - const t_graph *g, - rvec *fshiftMasterBuffer, - gmx_enerdata_t *enerd, - t_nrnb *nrnb, - const real *lambda, - real *dvdl, - const t_mdatoms *md, - t_fcdata *fcd, - const gmx::ForceFlags &forceFlags, - int *global_atom_index) +calcBondedForces(const t_idef *idef, + const rvec x[], + const t_forcerec *fr, + const t_pbc *pbc_null, + const t_graph *g, + rvec *fshiftMasterBuffer, + gmx_enerdata_t *enerd, + t_nrnb *nrnb, + const real *lambda, + real *dvdl, + const t_mdatoms *md, + t_fcdata *fcd, + const gmx::StepWorkload &stepWork, + int *global_atom_index) { bonded_threading_t *bt = fr->bondedThreading; @@ -452,7 +452,7 @@ calcBondedForces(const t_idef *idef, fr->bondedThreading->workDivision, x, ft, fshift, fr, pbc_null, g, grpp, nrnb, lambda, dvdlt, - md, fcd, forceFlags, + md, fcd, stepWork, global_atom_index); epot[ftype] += v; } @@ -498,7 +498,7 @@ void calc_listed(const t_commrec *cr, const real *lambda, const t_mdatoms *md, t_fcdata *fcd, int *global_atom_index, - const gmx::ForceFlags &forceFlags) + const gmx::StepWorkload &stepWork) { const t_pbc *pbc_null; bonded_threading_t *bt = fr->bondedThreading; @@ -572,16 +572,16 @@ void calc_listed(const t_commrec *cr, calcBondedForces(idef, x, fr, pbc_null, g, as_rvec_array(forceWithShiftForces.shiftForces().data()), enerd, nrnb, lambda, dvdl, md, - fcd, forceFlags, global_atom_index); + fcd, stepWork, global_atom_index); wallcycle_sub_stop(wcycle, ewcsLISTED); wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS); reduce_thread_output(fr->natoms_force, &forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, - forceFlags); + stepWork); - if (forceFlags.computeDhdl) + if (stepWork.computeDhdl) { for (int i = 0; i < efptNR; i++) { @@ -650,7 +650,7 @@ void calc_listed_lambda(const t_idef *idef, if (ilist_fe.nr > 0) { - gmx::ForceFlags tempFlags; + gmx::StepWorkload tempFlags; tempFlags.computeEnergy = true; v = calc_one_bond(0, ftype, &idef_fe, workDivision, x, f, fshift, fr, pbc_null, g, @@ -667,29 +667,29 @@ void calc_listed_lambda(const t_idef *idef, } void -do_force_listed(struct gmx_wallcycle *wcycle, - const matrix box, - const t_lambda *fepvals, - const t_commrec *cr, - const gmx_multisim_t *ms, - const t_idef *idef, - const rvec x[], - history_t *hist, - gmx::ForceOutputs *forceOutputs, - const t_forcerec *fr, - const struct t_pbc *pbc, - const struct t_graph *graph, - gmx_enerdata_t *enerd, - t_nrnb *nrnb, - const real *lambda, - const t_mdatoms *md, - t_fcdata *fcd, - int *global_atom_index, - const gmx::ForceFlags &forceFlags) +do_force_listed(struct gmx_wallcycle *wcycle, + const matrix box, + const t_lambda *fepvals, + const t_commrec *cr, + const gmx_multisim_t *ms, + const t_idef *idef, + const rvec x[], + history_t *hist, + gmx::ForceOutputs *forceOutputs, + const t_forcerec *fr, + const struct t_pbc *pbc, + const struct t_graph *graph, + gmx_enerdata_t *enerd, + t_nrnb *nrnb, + const real *lambda, + const t_mdatoms *md, + t_fcdata *fcd, + int *global_atom_index, + const gmx::StepWorkload &stepWork) { t_pbc pbc_full; /* Full PBC is needed for position restraints */ - if (!forceFlags.computeListedForces) + if (!stepWork.computeListedForces) { return; } @@ -704,12 +704,12 @@ do_force_listed(struct gmx_wallcycle *wcycle, forceOutputs, fr, pbc, &pbc_full, graph, enerd, nrnb, lambda, md, fcd, - global_atom_index, forceFlags); + global_atom_index, stepWork); /* Check if we have to determine energy differences * at foreign lambda's. */ - if (fepvals->n_lambda > 0 && forceFlags.computeDhdl) + if (fepvals->n_lambda > 0 && stepWork.computeDhdl) { posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr); diff --git a/src/gromacs/listed_forces/listed_forces.h b/src/gromacs/listed_forces/listed_forces.h index 885f654ea6..86e1a4e0bf 100644 --- a/src/gromacs/listed_forces/listed_forces.h +++ b/src/gromacs/listed_forces/listed_forces.h @@ -87,7 +87,7 @@ class t_state; namespace gmx { class ForceOutputs; -class ForceFlags; +class StepWorkload; } //! Type of CPU function to compute a bonded interaction. @@ -118,7 +118,7 @@ void calc_listed(const t_commrec *cr, gmx_enerdata_t *enerd, t_nrnb *nrnb, const real *lambda, const t_mdatoms *md, struct t_fcdata *fcd, int *ddgatindex, - const gmx::ForceFlags &forceFlags); + const gmx::StepWorkload &stepWork); /*! \brief As calc_listed(), but only determines the potential energy * for the perturbed interactions. @@ -154,7 +154,7 @@ do_force_listed(struct gmx_wallcycle *wcycle, const t_mdatoms *md, struct t_fcdata *fcd, int *global_atom_index, - const gmx::ForceFlags &forceFlags); + const gmx::StepWorkload &stepWork); /*! \brief Returns true if there are position restraints. */ bool havePositionRestraints(const t_idef &idef, diff --git a/src/gromacs/listed_forces/pairs.cpp b/src/gromacs/listed_forces/pairs.cpp index cc05b482cd..cede2598d1 100644 --- a/src/gromacs/listed_forces/pairs.cpp +++ b/src/gromacs/listed_forces/pairs.cpp @@ -50,10 +50,10 @@ #include "gromacs/listed_forces/bonded.h" #include "gromacs/math/functions.h" #include "gromacs/math/vec.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/group.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/nblist.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/mshift.h" #include "gromacs/pbcutil/pbc.h" @@ -652,14 +652,14 @@ do_pairs(int ftype, int nbonds, const t_mdatoms *md, const t_forcerec *fr, const bool havePerturbedInteractions, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, gmx_grppairener_t *grppener, int *global_atom_index) { if (ftype == F_LJ14 && fr->ic->vdwtype != evdwUSER && !EEL_USER(fr->ic->eeltype) && !havePerturbedInteractions && - (!forceFlags.computeVirial && !forceFlags.computeEnergy)) + (!stepWork.computeVirial && !stepWork.computeEnergy)) { /* We use a fast code-path for plain LJ 1-4 without FEP. * @@ -703,7 +703,7 @@ do_pairs(int ftype, int nbonds, md, fr->ic->epsfac*fr->fudgeQQ); } } - else if (forceFlags.computeVirial) + else if (stepWork.computeVirial) { do_pairs_general( ftype, nbonds, iatoms, iparams, diff --git a/src/gromacs/listed_forces/pairs.h b/src/gromacs/listed_forces/pairs.h index 0dd7b9baa5..4bf0b96e74 100644 --- a/src/gromacs/listed_forces/pairs.h +++ b/src/gromacs/listed_forces/pairs.h @@ -57,7 +57,7 @@ struct t_pbc; namespace gmx { -class ForceFlags; +class StepWorkload; } /*! \brief Calculate VdW/charge listed pair interactions (usually 1-4 @@ -71,7 +71,7 @@ do_pairs(int ftype, int nbonds, const t_iatom iatoms[], const t_iparams iparams[ const struct t_pbc *pbc, const struct t_graph *g, const real *lambda, real *dvdl, const t_mdatoms *md, const t_forcerec *fr, bool havePerturbedPairs, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, gmx_grppairener_t *grppener, int *global_atom_index); diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp index a9b0dcb915..9c184b7435 100644 --- a/src/gromacs/mdlib/force.cpp +++ b/src/gromacs/mdlib/force.cpp @@ -54,7 +54,6 @@ #include "gromacs/math/vec.h" #include "gromacs/math/vecdump.h" #include "gromacs/mdlib/forcerec_threading.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdlib/qmmm.h" #include "gromacs/mdlib/rf_util.h" #include "gromacs/mdlib/wall.h" @@ -65,6 +64,7 @@ #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/mshift.h" #include "gromacs/pbcutil/pbc.h" @@ -116,7 +116,7 @@ do_force_lowlevel(t_forcerec *fr, const real *lambda, const t_graph *graph, const rvec *mu_tot, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, const DDBalanceRegionHandler &ddBalanceRegionHandler) { // TODO: Replace all uses of x by const coordinates @@ -172,7 +172,7 @@ do_force_lowlevel(t_forcerec *fr, t_pbc pbc; /* Check whether we need to take into account PBC in listed interactions. */ - const auto needPbcForListedForces = fr->bMolPBC && forceFlags.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd); + const auto needPbcForListedForces = fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd); if (needPbcForListedForces) { /* Since all atoms are in the rectangular or triclinic unit-cell, @@ -187,7 +187,7 @@ do_force_lowlevel(t_forcerec *fr, forceOutputs, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, - forceFlags); + stepWork); } const bool computePmeOnCpu = @@ -278,15 +278,15 @@ do_force_lowlevel(t_forcerec *fr, { /* Do reciprocal PME for Coulomb and/or LJ. */ assert(fr->n_tpi >= 0); - if (fr->n_tpi == 0 || forceFlags.stateChanged) + if (fr->n_tpi == 0 || stepWork.stateChanged) { int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; - if (forceFlags.computeForces) + if (stepWork.computeForces) { pme_flags |= GMX_PME_CALC_F; } - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { pme_flags |= GMX_PME_CALC_ENER_VIR; } diff --git a/src/gromacs/mdlib/force.h b/src/gromacs/mdlib/force.h index 1cd8354bcd..cc28a00939 100644 --- a/src/gromacs/mdlib/force.h +++ b/src/gromacs/mdlib/force.h @@ -68,9 +68,9 @@ namespace gmx class Awh; class ForceWithVirial; class ImdSession; -class MdScheduleWorkload; +class MdrunScheduleWorkload; class MDLogger; -class ForceFlags; +class StepWorkload; } void do_force(FILE *log, @@ -96,7 +96,7 @@ void do_force(FILE *log, gmx::ArrayRef lambda, t_graph *graph, t_forcerec *fr, - gmx::MdScheduleWorkload *mdScheduleWork, + gmx::MdrunScheduleWorkload *runScheduleWork, const gmx_vsite_t *vsite, rvec mu_tot, double t, @@ -132,7 +132,7 @@ do_force_lowlevel(t_forcerec *fr, const real *lambda, const t_graph *graph, const rvec *mu_tot, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, const DDBalanceRegionHandler &ddBalanceRegionHandler); /* Call all the force routines */ diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 881b9974ac..4d0b1595ae 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -77,7 +77,6 @@ #include "gromacs/mdlib/force.h" #include "gromacs/mdlib/forcerec.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdlib/qmmm.h" #include "gromacs/mdlib/update.h" #include "gromacs/mdtypes/commrec.h" @@ -86,6 +85,7 @@ #include "gromacs/mdtypes/iforceprovider.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/mdtypes/state.h" #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" @@ -114,6 +114,8 @@ #include "gromacs/utility/sysinfo.h" using gmx::ForceOutputs; +using gmx::StepWorkload; +using gmx::DomainLifetimeWorkload; // TODO: this environment variable allows us to verify before release // that on less common architectures the total cost of polling is not larger than @@ -271,7 +273,7 @@ static void post_process_forces(const t_commrec *cr, const t_graph *graph, const t_forcerec *fr, const gmx_vsite_t *vsite, - const gmx::ForceFlags &forceFlags) + const StepWorkload &stepWork) { rvec *f = as_rvec_array(forceOutputs->forceWithShiftForces().force().data()); @@ -288,13 +290,13 @@ static void post_process_forces(const t_commrec *cr, */ matrix virial = { { 0 } }; spread_vsite_f(vsite, x, fDirectVir, nullptr, - forceFlags.computeVirial, virial, + stepWork.computeVirial, virial, nrnb, &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle); forceWithVirial.addVirialContribution(virial); } - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { /* Now add the forces, this is local */ sum_forces(f, forceWithVirial.force_); @@ -319,14 +321,14 @@ static void post_process_forces(const t_commrec *cr, static void do_nb_verlet(t_forcerec *fr, const interaction_const_t *ic, gmx_enerdata_t *enerd, - const gmx::ForceFlags &forceFlags, + const StepWorkload &stepWork, const Nbnxm::InteractionLocality ilocality, const int clearF, const int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle) { - if (!forceFlags.computeNonbondedForces) + if (!stepWork.computeNonbondedForces) { /* skip non-bonded calculation */ return; @@ -356,7 +358,7 @@ static void do_nb_verlet(t_forcerec *fr, } } - nbv->dispatchNonbondedKernel(ilocality, *ic, forceFlags, clearF, *fr, enerd, nrnb); + nbv->dispatchNonbondedKernel(ilocality, *ic, stepWork, clearF, *fr, enerd, nrnb); } static inline void clear_rvecs_omp(int n, rvec v[]) @@ -514,7 +516,7 @@ haveSpecialForces(const t_inputrec *inputrec, * \param[in] x The coordinates * \param[in] mdatoms Per atom properties * \param[in] lambda Array of free-energy lambda values - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Step schedule flags * \param[in,out] forceWithVirial Force and virial buffers * \param[in,out] enerd Energy buffer * \param[in,out] ed Essential dynamics pointer @@ -539,7 +541,7 @@ computeSpecialForces(FILE *fplog, gmx::ArrayRef x, const t_mdatoms *mdatoms, real *lambda, - const gmx::ForceFlags &forceFlags, + const StepWorkload &stepWork, gmx::ForceWithVirial *forceWithVirial, gmx_enerdata_t *enerd, gmx_edsam *ed, @@ -548,7 +550,7 @@ computeSpecialForces(FILE *fplog, /* NOTE: Currently all ForceProviders only provide forces. * When they also provide energies, remove this conditional. */ - if (forceFlags.computeForces) + if (stepWork.computeForces) { gmx::ForceProviderInput forceProviderInput(x, *mdatoms, t, box, *cr); gmx::ForceProviderOutput forceProviderOutput(forceWithVirial, enerd); @@ -594,7 +596,7 @@ computeSpecialForces(FILE *fplog, } /* Add forces from interactive molecular dynamics (IMD), if any */ - if (inputrec->bIMD && forceFlags.computeForces) + if (inputrec->bIMD && stepWork.computeForces) { imdSession->applyForces(f); } @@ -605,20 +607,20 @@ computeSpecialForces(FILE *fplog, * \param[in] pmedata The PME structure * \param[in] box The box matrix * \param[in] x Coordinate array - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Step schedule flags * \param[in] pmeFlags PME flags * \param[in] useGpuForceReduction True if GPU-based force reduction is active this step * \param[in] wcycle The wallcycle structure */ -static inline void launchPmeGpuSpread(gmx_pme_t *pmedata, - const matrix box, - const rvec x[], - const gmx::ForceFlags &forceFlags, - int pmeFlags, - bool useGpuForceReduction, - gmx_wallcycle_t wcycle) +static inline void launchPmeGpuSpread(gmx_pme_t *pmedata, + const matrix box, + const rvec x[], + const StepWorkload &stepWork, + int pmeFlags, + bool useGpuForceReduction, + gmx_wallcycle_t wcycle) { - pme_gpu_prepare_computation(pmedata, forceFlags.haveDynamicBox, box, wcycle, pmeFlags, useGpuForceReduction); + pme_gpu_prepare_computation(pmedata, stepWork.haveDynamicBox, box, wcycle, pmeFlags, useGpuForceReduction); pme_gpu_copy_coordinates_to_gpu(pmedata, x, wcycle); pme_gpu_launch_spread(pmedata, wcycle); } @@ -650,17 +652,17 @@ static void launchPmeGpuFftAndGather(gmx_pme_t *pmedata, * \param[in,out] pmedata PME module data * \param[in,out] forceOutputs Output buffer for the forces and virial * \param[in,out] enerd Energy data structure results are reduced into - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Step schedule flags * \param[in] pmeFlags PME flags * \param[in] wcycle The wallcycle structure */ -static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv, - gmx_pme_t *pmedata, - gmx::ForceOutputs *forceOutputs, - gmx_enerdata_t *enerd, - const gmx::ForceFlags &forceFlags, - int pmeFlags, - gmx_wallcycle_t wcycle) +static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv, + gmx_pme_t *pmedata, + gmx::ForceOutputs *forceOutputs, + gmx_enerdata_t *enerd, + const StepWorkload &stepWork, + int pmeFlags, + gmx_wallcycle_t wcycle) { bool isPmeGpuDone = false; bool isNbGpuDone = false; @@ -684,7 +686,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv { GpuTaskCompletion completionType = (isPmeGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; isNbGpuDone = Nbnxm::gpu_try_finish_task(nbv->gpu_nbv, - forceFlags, + stepWork, Nbnxm::AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), @@ -705,7 +707,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv * \param[in] pull_work The pull work object. * \param[in] inputrec input record * \param[in] force force array - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Step schedule flags * \param[out] wcycle wallcycle recording structure * * \returns Cleared force output structure @@ -715,15 +717,15 @@ setupForceOutputs(t_forcerec *fr, pull_t *pull_work, const t_inputrec &inputrec, gmx::ArrayRefWithPadding force, - const gmx::ForceFlags &forceFlags, + const StepWorkload &stepWork, gmx_wallcycle_t wcycle) { wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER); /* NOTE: We assume fr->shiftForces is all zeros here */ - gmx::ForceWithShiftForces forceWithShiftForces(force, forceFlags.computeVirial, fr->shiftForces); + gmx::ForceWithShiftForces forceWithShiftForces(force, stepWork.computeVirial, fr->shiftForces); - if (forceFlags.computeForces) + if (stepWork.computeForces) { /* Clear the short- and long-range forces */ clear_rvecs_omp(fr->natoms_force_constr, @@ -735,12 +737,12 @@ setupForceOutputs(t_forcerec *fr, * directly, such as PME. Otherwise, forceWithVirial uses the * the same force (f in legacy calls) buffer as other algorithms. */ - const bool useSeparateForceWithVirialBuffer = (forceFlags.computeForces && - (forceFlags.computeVirial && fr->haveDirectVirialContributions)); + const bool useSeparateForceWithVirialBuffer = (stepWork.computeForces && + (stepWork.computeVirial && fr->haveDirectVirialContributions)); /* forceWithVirial uses the local atom range only */ gmx::ForceWithVirial forceWithVirial(useSeparateForceWithVirialBuffer ? fr->forceBufferForDirectVirialContributions : force.unpaddedArrayRef(), - forceFlags.computeVirial); + stepWork.computeVirial); if (useSeparateForceWithVirialBuffer) { @@ -763,30 +765,23 @@ setupForceOutputs(t_forcerec *fr, } -/*! \brief Set up flags that indicate what type of work is there to compute. - * - * Currently we only update it at search steps, - * but some properties may change more frequently (e.g. virial/non-virial step), - * so when including those either the frequency of update (per-step) or the scope - * of a flag will change (i.e. a set of flags for nstlist steps). - * +/*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute. */ static void -setupForceWorkload(gmx::PpForceWorkload *forceWork, - const t_inputrec *inputrec, - const t_forcerec *fr, - const pull_t *pull_work, - const gmx_edsam *ed, - const t_idef &idef, - const t_fcdata *fcd, - const gmx::ForceFlags &forceFlags - ) +setupDomainLifetimeWorkload(DomainLifetimeWorkload *domainWork, + const t_inputrec *inputrec, + const t_forcerec *fr, + const pull_t *pull_work, + const gmx_edsam *ed, + const t_idef &idef, + const t_fcdata *fcd, + const StepWorkload &stepWork) { - forceWork->haveSpecialForces = haveSpecialForces(inputrec, fr->forceProviders, pull_work, forceFlags.computeForces, ed); - forceWork->haveCpuBondedWork = haveCpuBondeds(*fr); - forceWork->haveGpuBondedWork = ((fr->gpuBonded != nullptr) && fr->gpuBonded->haveInteractions()); - forceWork->haveRestraintsWork = havePositionRestraints(idef, *fcd); - forceWork->haveCpuListedForceWork = haveCpuListedForces(*fr, idef, *fcd); + domainWork->haveSpecialForces = haveSpecialForces(inputrec, fr->forceProviders, pull_work, stepWork.computeForces, ed); + domainWork->haveCpuBondedWork = haveCpuBondeds(*fr); + domainWork->haveGpuBondedWork = ((fr->gpuBonded != nullptr) && fr->gpuBonded->haveInteractions()); + domainWork->haveRestraintsWork = havePositionRestraints(idef, *fcd); + domainWork->haveCpuListedForceWork = haveCpuListedForces(*fr, idef, *fcd); } /*! \brief Set up force flag stuct from the force bitmask. @@ -796,9 +791,9 @@ setupForceWorkload(gmx::PpForceWorkload *forceWork, * \param[in] isNonbondedOn Global override, if false forces to turn off all nonbonded calculation. */ static void -setupForceFlags(gmx::ForceFlags *flags, - const int legacyFlags, - const bool isNonbondedOn) +setupStepWorkload(StepWorkload *flags, + const int legacyFlags, + const bool isNonbondedOn) { flags->stateChanged = ((legacyFlags & GMX_FORCE_STATECHANGED) != 0); flags->haveDynamicBox = ((legacyFlags & GMX_FORCE_DYNAMICBOX) != 0); @@ -815,18 +810,18 @@ setupForceFlags(gmx::ForceFlags *flags, /* \brief Launch end-of-step GPU tasks: buffer clearing and rolling pruning. * * TODO: eliminate the \p useGpuNonbonded and \p useGpuNonbonded when these are - * incorporated in PpForceWorkload. + * incorporated in DomainLifetimeWorkload. */ static void -launchGpuEndOfStepTasks(nonbonded_verlet_t *nbv, - gmx::GpuBonded *gpuBonded, - gmx_pme_t *pmedata, - gmx_enerdata_t *enerd, - const gmx::MdScheduleWorkload &mdScheduleWork, - bool useGpuNonbonded, - bool useGpuPme, - int64_t step, - gmx_wallcycle_t wcycle) +launchGpuEndOfStepTasks(nonbonded_verlet_t *nbv, + gmx::GpuBonded *gpuBonded, + gmx_pme_t *pmedata, + gmx_enerdata_t *enerd, + const gmx::MdrunScheduleWorkload &runScheduleWork, + bool useGpuNonbonded, + bool useGpuPme, + int64_t step, + gmx_wallcycle_t wcycle) { if (useGpuNonbonded) { @@ -842,7 +837,7 @@ launchGpuEndOfStepTasks(nonbonded_verlet_t *nbv, /* now clear the GPU outputs while we finish the step on the CPU */ wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, mdScheduleWork.forceFlags.computeVirial); + Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } @@ -852,7 +847,7 @@ launchGpuEndOfStepTasks(nonbonded_verlet_t *nbv, pme_gpu_reinit_computation(pmedata, wcycle); } - if (mdScheduleWork.forceWork.haveGpuBondedWork && mdScheduleWork.forceFlags.computeEnergy) + if (runScheduleWork.domainWork.haveGpuBondedWork && runScheduleWork.stepWork.computeEnergy) { // in principle this should be included in the DD balancing region, // but generally it is infrequent so we'll omit it for the sake of @@ -887,7 +882,7 @@ void do_force(FILE *fplog, gmx::ArrayRef lambda, t_graph *graph, t_forcerec *fr, - gmx::MdScheduleWorkload *mdScheduleWork, + gmx::MdrunScheduleWorkload *runScheduleWork, const gmx_vsite_t *vsite, rvec mu_tot, double t, @@ -908,11 +903,11 @@ void do_force(FILE *fplog, { legacyFlags &= ~GMX_FORCE_NONBONDED; } - setupForceFlags(&mdScheduleWork->forceFlags, legacyFlags, fr->bNonbonded); + setupStepWorkload(&runScheduleWork->stepWork, legacyFlags, fr->bNonbonded); - const gmx::ForceFlags &forceFlags = mdScheduleWork->forceFlags; + const gmx::StepWorkload &stepWork = runScheduleWork->stepWork; - bFillGrid = (forceFlags.doNeighborSearch && forceFlags.stateChanged); + bFillGrid = (stepWork.doNeighborSearch && stepWork.stateChanged); bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); bUseGPU = fr->nbv->useGpu(); bUseOrEmulGPU = bUseGPU || fr->nbv->emulateGpu(); @@ -922,9 +917,9 @@ void do_force(FILE *fplog, const bool useGpuPme = EEL_PME(fr->ic->eeltype) && thisRankHasDuty(cr, DUTY_PME) && ((pmeRunMode == PmeRunMode::GPU) || (pmeRunMode == PmeRunMode::Mixed)); const int pmeFlags = GMX_PME_SPREAD | GMX_PME_SOLVE | - (forceFlags.computeVirial ? GMX_PME_CALC_ENER_VIR : 0) | - (forceFlags.computeEnergy ? GMX_PME_CALC_ENER_VIR : 0) | - (forceFlags.computeForces ? GMX_PME_CALC_F : 0); + (stepWork.computeVirial ? GMX_PME_CALC_ENER_VIR : 0) | + (stepWork.computeEnergy ? GMX_PME_CALC_ENER_VIR : 0) | + (stepWork.computeForces ? GMX_PME_CALC_F : 0); // Switches on whether to use GPU for position and force buffer operations // TODO consider all possible combinations of triggers, and how to combine optimally in each case. @@ -932,7 +927,7 @@ void do_force(FILE *fplog, BufferOpsUseGpu::True : BufferOpsUseGpu::False;; // GPU Force buffer ops are disabled on virial steps, because the virial calc is not yet ported to GPU const BufferOpsUseGpu useGpuFBufOps = (c_enableGpuBufOps && bUseGPU && (GMX_GPU == GMX_GPU_CUDA)) - && !(forceFlags.computeVirial || forceFlags.computeEnergy) ? + && !(stepWork.computeVirial || stepWork.computeEnergy) ? BufferOpsUseGpu::True : BufferOpsUseGpu::False; // TODO: move / add this flag to the internal PME GPU data structures const bool useGpuPmeFReduction = (useGpuFBufOps == BufferOpsUseGpu::True) && @@ -942,7 +937,7 @@ void do_force(FILE *fplog, * somewhere early inside the step after communication during domain * decomposition (and not during the previous step as usual). */ - if (forceFlags.doNeighborSearch) + if (stepWork.doNeighborSearch) { ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::yes); } @@ -952,7 +947,7 @@ void do_force(FILE *fplog, clear_mat(vir_force); - if (forceFlags.stateChanged) + if (stepWork.stateChanged) { if (inputrecNeedMutot(inputrec)) { @@ -970,7 +965,7 @@ void do_force(FILE *fplog, /* Compute shift vectors every step, * because of pressure coupling or box deformation! */ - if (forceFlags.haveDynamicBox && forceFlags.stateChanged) + if (stepWork.haveDynamicBox && stepWork.stateChanged) { calc_shifts(box, fr->shift_vec); } @@ -986,7 +981,7 @@ void do_force(FILE *fplog, } } - nbnxn_atomdata_copy_shiftvec(forceFlags.haveDynamicBox, + nbnxn_atomdata_copy_shiftvec(stepWork.haveDynamicBox, fr->shift_vec, nbv->nbat.get()); #if GMX_MPI @@ -999,20 +994,20 @@ void do_force(FILE *fplog, */ gmx_pme_send_coordinates(cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL], lambda[efptVDW], - (forceFlags.computeVirial || forceFlags.computeEnergy), + (stepWork.computeVirial || stepWork.computeEnergy), step, wcycle); } #endif /* GMX_MPI */ if (useGpuPme) { - launchPmeGpuSpread(fr->pmedata, box, as_rvec_array(x.unpaddedArrayRef().data()), forceFlags, pmeFlags, useGpuPmeFReduction, wcycle); + launchPmeGpuSpread(fr->pmedata, box, as_rvec_array(x.unpaddedArrayRef().data()), stepWork, pmeFlags, useGpuPmeFReduction, wcycle); } /* do gridding for pair search */ - if (forceFlags.doNeighborSearch) + if (stepWork.doNeighborSearch) { - if (graph && forceFlags.stateChanged) + if (graph && stepWork.stateChanged) { /* Calculate intramolecular shift vectors to make molecules whole */ mk_mshift(fplog, graph, fr->ePBC, box, as_rvec_array(x.unpaddedArrayRef().data())); @@ -1082,24 +1077,26 @@ void do_force(FILE *fplog, } } - // Call it per-step as force-flags can change. - // Need to run after the GPU-offload bonded interaction lists - // are set up to be able to determine whether there is bonded work. - setupForceWorkload(&mdScheduleWork->forceWork, - inputrec, - fr, - pull_work, - ed, - top->idef, - fcd, - forceFlags); + if (stepWork.doNeighborSearch) + { + // Need to run after the GPU-offload bonded interaction lists + // are set up to be able to determine whether there is bonded work. + setupDomainLifetimeWorkload(&runScheduleWork->domainWork, + inputrec, + fr, + pull_work, + ed, + top->idef, + fcd, + stepWork); + } - const gmx::PpForceWorkload &forceWork = mdScheduleWork->forceWork; + const gmx::DomainLifetimeWorkload &domainWork = runScheduleWork->domainWork; /* do local pair search */ - if (forceFlags.doNeighborSearch) + if (stepWork.doNeighborSearch) { - // TODO: fuse this branch with the above forceFlags.doNeighborSearch block + // TODO: fuse this branch with the above stepWork.doNeighborSearch block wallcycle_start_nocount(wcycle, ewcNS); wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL); /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ @@ -1151,7 +1148,7 @@ void do_force(FILE *fplog, wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get()); - if (forceFlags.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False)) + if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False)) { Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), Nbnxm::AtomLocality::Local); @@ -1161,16 +1158,16 @@ void do_force(FILE *fplog, // bonded work not split into separate local and non-local, so with DD // we can only launch the kernel after non-local coordinates have been received. - if (forceWork.haveGpuBondedWork && !havePPDomainDecomposition(cr)) + if (domainWork.haveGpuBondedWork && !havePPDomainDecomposition(cr)) { wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED); - fr->gpuBonded->launchKernel(fr, forceFlags, box); + fr->gpuBonded->launchKernel(fr, stepWork, box); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED); } /* launch local nonbonded work on GPU */ wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); wallcycle_stop(wcycle, ewcLAUNCH_GPU); @@ -1195,9 +1192,9 @@ void do_force(FILE *fplog, do non-local pair search */ if (havePPDomainDecomposition(cr)) { - if (forceFlags.doNeighborSearch) + if (stepWork.doNeighborSearch) { - // TODO: fuse this branch with the above large forceFlags.doNeighborSearch block + // TODO: fuse this branch with the above large stepWork.doNeighborSearch block wallcycle_start_nocount(wcycle, ewcNS); wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL); /* Note that with a GPU the launch overhead of the list transfer is not timed separately */ @@ -1223,7 +1220,7 @@ void do_force(FILE *fplog, gpuHaloExchange->communicateHaloCoordinates(box); // TODO Force flags should include haveFreeEnergyWork for this domain - if (forceWork.haveCpuBondedWork || (fr->efep != efepNO)) + if (domainWork.haveCpuBondedWork || (fr->efep != efepNO)) { //non-local part of coordinate buffer must be copied back to host for CPU work nbv->launch_copy_x_from_gpu(as_rvec_array(x.unpaddedArrayRef().data()), Nbnxm::AtomLocality::NonLocal); @@ -1257,7 +1254,7 @@ void do_force(FILE *fplog, { wallcycle_start(wcycle, ewcLAUNCH_GPU); - if (forceFlags.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False)) + if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False)) { wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), @@ -1265,16 +1262,16 @@ void do_force(FILE *fplog, wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); } - if (forceWork.haveGpuBondedWork) + if (domainWork.haveGpuBondedWork) { wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED); - fr->gpuBonded->launchKernel(fr, forceFlags, box); + fr->gpuBonded->launchKernel(fr, stepWork, box); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED); } /* launch non-local nonbonded tasks on GPU */ wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); @@ -1293,20 +1290,20 @@ void do_force(FILE *fplog, if (havePPDomainDecomposition(cr)) { Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), - forceFlags, Nbnxm::AtomLocality::NonLocal, copyBackNbForce); + stepWork, Nbnxm::AtomLocality::NonLocal, copyBackNbForce); } Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), - forceFlags, Nbnxm::AtomLocality::Local, copyBackNbForce); + stepWork, Nbnxm::AtomLocality::Local, copyBackNbForce); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); - if (forceWork.haveGpuBondedWork && forceFlags.computeEnergy) + if (domainWork.haveGpuBondedWork && stepWork.computeEnergy) { fr->gpuBonded->launchEnergyTransfer(); } wallcycle_stop(wcycle, ewcLAUNCH_GPU); } - if (forceFlags.stateChanged && inputrecNeedMutot(inputrec)) + if (stepWork.stateChanged && inputrecNeedMutot(inputrec)) { if (PAR(cr)) { @@ -1355,7 +1352,7 @@ void do_force(FILE *fplog, if (inputrec->bRot) { wallcycle_start(wcycle, ewcROT); - do_rotation(cr, enforcedRotation, box, as_rvec_array(x.unpaddedArrayRef().data()), t, step, forceFlags.doNeighborSearch); + do_rotation(cr, enforcedRotation, box, as_rvec_array(x.unpaddedArrayRef().data()), t, step, stepWork.doNeighborSearch); wallcycle_stop(wcycle, ewcROT); } @@ -1366,7 +1363,7 @@ void do_force(FILE *fplog, // Set up and clear force outputs. // We use std::move to keep the compiler happy, it has no effect. - ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, std::move(force), forceFlags, wcycle); + ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, std::move(force), stepWork, wcycle); /* We calculate the non-bonded forces, when done on the CPU, here. * We do this before calling do_force_lowlevel, because in that @@ -1378,7 +1375,7 @@ void do_force(FILE *fplog, if (!bUseOrEmulGPU) { - do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFYes, + do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, enbvClearFYes, step, nrnb, wcycle); } @@ -1390,14 +1387,14 @@ void do_force(FILE *fplog, nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local, fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), - enerd, forceFlags, nrnb); + enerd, stepWork, nrnb); if (havePPDomainDecomposition(cr)) { nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal, fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), - enerd, forceFlags, nrnb); + enerd, stepWork, nrnb); } } @@ -1405,11 +1402,11 @@ void do_force(FILE *fplog, { if (havePPDomainDecomposition(cr)) { - do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle); } - if (forceFlags.computeForces) + if (stepWork.computeForces) { /* Add all the non-bonded force to the normal force array. * This can be split into a local and a non-local part when overlapping @@ -1421,7 +1418,7 @@ void do_force(FILE *fplog, } /* If there are multiple fshift output buffers we need to reduce them */ - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ @@ -1438,7 +1435,7 @@ void do_force(FILE *fplog, // TODO Force flags should include haveFreeEnergyWork for this domain if (ddUsesGpuDirectCommunication && - (forceWork.haveCpuBondedWork || (fr->efep != efepNO))) + (domainWork.haveCpuBondedWork || (fr->efep != efepNO))) { /* Wait for non-local coordinate data to be copied from device */ nbv->wait_nonlocal_x_copy_D2H_done(); @@ -1448,7 +1445,7 @@ void do_force(FILE *fplog, cr, ms, nrnb, wcycle, mdatoms, x, hist, &forceOut, enerd, fcd, box, lambda.data(), graph, fr->mu_tot, - forceFlags, + stepWork, ddBalanceRegionHandler); wallcycle_stop(wcycle, ewcFORCE); @@ -1456,8 +1453,9 @@ void do_force(FILE *fplog, computeSpecialForces(fplog, cr, inputrec, awh, enforcedRotation, imdSession, pull_work, step, t, wcycle, fr->forceProviders, box, x.unpaddedArrayRef(), mdatoms, lambda.data(), - forceFlags, &forceOut.forceWithVirial(), enerd, - ed, forceFlags.doNeighborSearch); + stepWork, &forceOut.forceWithVirial(), enerd, + ed, stepWork.doNeighborSearch); + // Will store the amount of cycles spent waiting for the GPU that // will be later used in the DLB accounting. @@ -1472,7 +1470,7 @@ void do_force(FILE *fplog, if (bUseGPU) { cycles_wait_gpu += Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv, - forceFlags, Nbnxm::AtomLocality::NonLocal, + stepWork, Nbnxm::AtomLocality::NonLocal, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), forceWithShiftForces.shiftForces(), @@ -1481,7 +1479,7 @@ void do_force(FILE *fplog, else { wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes, + do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes, step, nrnb, wcycle); wallcycle_stop(wcycle, ewcFORCE); } @@ -1491,7 +1489,7 @@ void do_force(FILE *fplog, // TODO: move this into DomainLifetimeWorkload, including the second part of the condition // The bonded and free energy CPU tasks can have non-local force contributions // which are a dependency for the GPU force reduction. - bool haveNonLocalForceContribInCpuBuffer = forceWork.haveCpuBondedWork || (fr->efep != efepNO); + bool haveNonLocalForceContribInCpuBuffer = domainWork.haveCpuBondedWork || (fr->efep != efepNO); rvec *f = as_rvec_array(forceWithShiftForces.force().data()); if (haveNonLocalForceContribInCpuBuffer) @@ -1512,7 +1510,7 @@ void do_force(FILE *fplog, } - if (fr->nbv->emulateGpu() && forceFlags.computeVirial) + if (fr->nbv->emulateGpu() && stepWork.computeVirial) { nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat, forceWithShiftForces.shiftForces()); @@ -1523,7 +1521,7 @@ void do_force(FILE *fplog, const bool useGpuForcesHaloExchange = ddUsesGpuDirectCommunication && (useGpuFBufOps == BufferOpsUseGpu::True); const bool useCpuPmeFReduction = thisRankHasDuty(cr, DUTY_PME) && !useGpuPmeFReduction; // TODO: move this into DomainLifetimeWorkload, including the second part of the condition - const bool haveCpuLocalForces = (forceWork.haveSpecialForces || forceWork.haveCpuListedForceWork || useCpuPmeFReduction || + const bool haveCpuLocalForces = (domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || useCpuPmeFReduction || (fr->efep != efepNO)); if (havePPDomainDecomposition(cr)) @@ -1535,7 +1533,7 @@ void do_force(FILE *fplog, */ ddBalanceRegionHandler.closeAfterForceComputationCpu(); - if (forceFlags.computeForces) + if (stepWork.computeForces) { gmx::ArrayRef force = forceOut.forceWithShiftForces().force(); rvec *f = as_rvec_array(force.data()); @@ -1568,7 +1566,7 @@ void do_force(FILE *fplog, if (alternateGpuWait) { alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &forceOut, enerd, - forceFlags, pmeFlags, wcycle); + stepWork, pmeFlags, wcycle); } if (!alternateGpuWait && useGpuPme) @@ -1587,7 +1585,7 @@ void do_force(FILE *fplog, const float gpuWaitApiOverheadMargin = 2e6F; /* cycles */ const float waitCycles = Nbnxm::gpu_wait_finish_task(nbv->gpu_nbv, - forceFlags, Nbnxm::AtomLocality::Local, + stepWork, Nbnxm::AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), forceOut.forceWithShiftForces().shiftForces(), @@ -1596,7 +1594,7 @@ void do_force(FILE *fplog, if (ddBalanceRegionHandler.useBalancingRegion()) { DdBalanceRegionWaitedForGpu waitedForGpu = DdBalanceRegionWaitedForGpu::yes; - if (forceFlags.computeForces && waitCycles <= gpuWaitApiOverheadMargin) + if (stepWork.computeForces && waitCycles <= gpuWaitApiOverheadMargin) { /* We measured few cycles, it could be that the kernel * and transfer finished earlier and there was no actual @@ -1615,7 +1613,7 @@ void do_force(FILE *fplog, // NOTE: emulation kernel is not included in the balancing region, // but emulation mode does not target performance anyway wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, + do_nb_verlet(fr, ic, enerd, stepWork, Nbnxm::InteractionLocality::Local, DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes, step, nrnb, wcycle); wallcycle_stop(wcycle, ewcFORCE); @@ -1672,7 +1670,7 @@ void do_force(FILE *fplog, } launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd, - *mdScheduleWork, + *runScheduleWork, bUseGPU, useGpuPme, step, wcycle); @@ -1682,21 +1680,21 @@ void do_force(FILE *fplog, dd_force_flop_stop(cr->dd, nrnb); } - if (forceFlags.computeForces) + if (stepWork.computeForces) { rvec *f = as_rvec_array(forceOut.forceWithShiftForces().force().data()); /* If we have NoVirSum forces, but we do not calculate the virial, * we sum fr->f_novirsum=forceOut.f later. */ - if (vsite && !(fr->haveDirectVirialContributions && !forceFlags.computeVirial)) + if (vsite && !(fr->haveDirectVirialContributions && !stepWork.computeVirial)) { rvec *fshift = as_rvec_array(forceOut.forceWithShiftForces().shiftForces().data()); spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), f, fshift, FALSE, nullptr, nrnb, &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle); } - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { /* Calculation of the virial must be done after vsites! */ calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()), @@ -1713,15 +1711,15 @@ void do_force(FILE *fplog, pme_receive_force_ener(cr, &forceOut.forceWithVirial(), enerd, wcycle); } - if (forceFlags.computeForces) + if (stepWork.computeForces) { post_process_forces(cr, step, nrnb, wcycle, top, box, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut, vir_force, mdatoms, graph, fr, vsite, - forceFlags); + stepWork); } - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { /* Sum the potential energy terms from group contributions */ sum_epot(&(enerd->grpp), enerd->term); diff --git a/src/gromacs/mdrun/isimulator.h b/src/gromacs/mdrun/isimulator.h index 86cd154c9c..9ae093de70 100644 --- a/src/gromacs/mdrun/isimulator.h +++ b/src/gromacs/mdrun/isimulator.h @@ -71,7 +71,7 @@ namespace gmx enum class StartingBehavior; class BoxDeformation; class Constraints; -class MdScheduleWorkload; +class MdrunScheduleWorkload; class IMDOutputProvider; struct MdModulesNotifier; class ImdSession; @@ -129,7 +129,7 @@ class ISimulator t_forcerec *fr, gmx_enerdata_t *enerd, gmx_ekindata_t *ekind, - MdScheduleWorkload *mdScheduleWork, + MdrunScheduleWorkload *runScheduleWork, const ReplicaExchangeParameters &replExParams, gmx_membed_t *membed, gmx_walltime_accounting *walltime_accounting, @@ -164,7 +164,7 @@ class ISimulator fr(fr), enerd(enerd), ekind(ekind), - mdScheduleWork(mdScheduleWork), + runScheduleWork(runScheduleWork), replExParams(replExParams), membed(membed), walltime_accounting(walltime_accounting), @@ -232,7 +232,7 @@ class ISimulator //! Kinetic energy data. gmx_ekindata_t *ekind; //! Schedule of work for each MD step for this task. - MdScheduleWorkload *mdScheduleWork; + MdrunScheduleWorkload *runScheduleWork; //! Parameters for replica exchange algorihtms. const ReplicaExchangeParameters &replExParams; //! Parameters for membrane embedding. diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index f0e36b3350..30580ccfb1 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -875,7 +875,7 @@ void gmx::LegacySimulator::do_md() &state->hist, f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph, - shellfc, fr, mdScheduleWork, t, mu_tot, + shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); } @@ -905,7 +905,7 @@ void gmx::LegacySimulator::do_md() state->box, state->x.arrayRefWithPadding(), &state->hist, f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph, - fr, mdScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, + fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler); } diff --git a/src/gromacs/mdrun/mimic.cpp b/src/gromacs/mdrun/mimic.cpp index af7a542a80..2b2f9e9ada 100644 --- a/src/gromacs/mdrun/mimic.cpp +++ b/src/gromacs/mdrun/mimic.cpp @@ -436,7 +436,7 @@ void gmx::LegacySimulator::do_mimic() &state->hist, f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph, - shellfc, fr, mdScheduleWork, t, mu_tot, + shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); } @@ -455,7 +455,7 @@ void gmx::LegacySimulator::do_mimic() state->box, state->x.arrayRefWithPadding(), &state->hist, f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph, - fr, mdScheduleWork, vsite, mu_tot, t, ed, + fr, runScheduleWork, vsite, mu_tot, t, ed, GMX_FORCE_NS | force_flags, ddBalanceRegionHandler); } diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp index 263410c6cc..82a2e2fa74 100644 --- a/src/gromacs/mdrun/minimize.cpp +++ b/src/gromacs/mdrun/minimize.cpp @@ -107,6 +107,8 @@ #include "legacysimulator.h" #include "shellfc.h" +using gmx::MdrunScheduleWorkload; + //! Utility structure for manipulating states during EM typedef struct { //! Copy of the global state @@ -790,7 +792,7 @@ class EnergyEvaluator //! Handles how to calculate the forces. t_forcerec *fr; //! Schedule of force-calculation work each step for this task. - gmx::MdScheduleWorkload *mdScheduleWork; + MdrunScheduleWorkload *runScheduleWork; //! Stores the computed energies. gmx_enerdata_t *enerd; }; @@ -849,7 +851,7 @@ EnergyEvaluator::run(em_state_t *ems, rvec mu_tot, count, nrnb, wcycle, top, ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist, ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd, - ems->s.lambda, graph, fr, mdScheduleWork, vsite, mu_tot, t, nullptr, + ems->s.lambda, graph, fr, runScheduleWork, vsite, mu_tot, t, nullptr, GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_NS : 0), @@ -1137,7 +1139,7 @@ LegacySimulator::do_cg() top_global, &top, inputrec, imdSession, pull_work, nrnb, wcycle, gstat, vsite, constr, fcd, graph, - mdAtoms, fr, mdScheduleWork, enerd + mdAtoms, fr, runScheduleWork, enerd }; /* Call the force routine and some auxiliary (neighboursearching etc.) */ /* do_force always puts the charge groups in the box and shifts again @@ -1817,7 +1819,7 @@ LegacySimulator::do_lbfgs() top_global, &top, inputrec, imdSession, pull_work, nrnb, wcycle, gstat, vsite, constr, fcd, graph, - mdAtoms, fr, mdScheduleWork, enerd + mdAtoms, fr, runScheduleWork, enerd }; energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE); @@ -2477,7 +2479,7 @@ LegacySimulator::do_steep() top_global, &top, inputrec, imdSession, pull_work, nrnb, wcycle, gstat, vsite, constr, fcd, graph, - mdAtoms, fr, mdScheduleWork, enerd + mdAtoms, fr, runScheduleWork, enerd }; /**** HERE STARTS THE LOOP **** @@ -2782,7 +2784,7 @@ LegacySimulator::do_nm() top_global, &top, inputrec, imdSession, pull_work, nrnb, wcycle, gstat, vsite, constr, fcd, graph, - mdAtoms, fr, mdScheduleWork, enerd + mdAtoms, fr, runScheduleWork, enerd }; energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE); cr->nnodes = nnodes; @@ -2869,7 +2871,7 @@ LegacySimulator::do_nm() graph, shellfc, fr, - mdScheduleWork, + runScheduleWork, t, mu_tot, vsite, diff --git a/src/gromacs/mdrun/rerun.cpp b/src/gromacs/mdrun/rerun.cpp index 2cc5646614..c8453f7683 100644 --- a/src/gromacs/mdrun/rerun.cpp +++ b/src/gromacs/mdrun/rerun.cpp @@ -558,7 +558,7 @@ void gmx::LegacySimulator::do_rerun() &state->hist, f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph, - shellfc, fr, mdScheduleWork, t, mu_tot, + shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler); } @@ -577,7 +577,7 @@ void gmx::LegacySimulator::do_rerun() state->box, state->x.arrayRefWithPadding(), &state->hist, f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph, - fr, mdScheduleWork, vsite, mu_tot, t, ed, + fr, runScheduleWork, vsite, mu_tot, t, ed, GMX_FORCE_NS | force_flags, ddBalanceRegionHandler); } diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 8d2f549c9d..f6659972c1 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -94,7 +94,6 @@ #include "gromacs/mdlib/md_support.h" #include "gromacs/mdlib/mdatoms.h" #include "gromacs/mdlib/membed.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdlib/qmmm.h" #include "gromacs/mdlib/sighandler.h" #include "gromacs/mdlib/stophandler.h" @@ -113,6 +112,7 @@ #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/mdrunoptions.h" #include "gromacs/mdtypes/observableshistory.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/mdtypes/state.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/nbnxm/nbnxm.h" @@ -1546,7 +1546,7 @@ int Mdrunner::mdrunner() // TODO This is not the right place to manage the lifetime of // this data structure, but currently it's the easiest way to // make it work. - MdScheduleWorkload mdScheduleWork; + MdrunScheduleWorkload runScheduleWork; GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator."); SimulatorBuilder simulatorBuilder; @@ -1575,7 +1575,7 @@ int Mdrunner::mdrunner() mdAtoms.get(), &nrnb, wcycle, fr, &enerd, &ekind, - &mdScheduleWork, + &runScheduleWork, replExParams, membed, walltime_accounting, diff --git a/src/gromacs/mdrun/shellfc.cpp b/src/gromacs/mdrun/shellfc.cpp index bd79c56b8f..876e4a71f1 100644 --- a/src/gromacs/mdrun/shellfc.cpp +++ b/src/gromacs/mdrun/shellfc.cpp @@ -998,7 +998,7 @@ void relax_shell_flexcon(FILE *fplog, t_graph *graph, gmx_shellfc_t *shfc, t_forcerec *fr, - gmx::MdScheduleWorkload *mdScheduleWork, + gmx::MdrunScheduleWorkload *runScheduleWork, double t, rvec mu_tot, const gmx_vsite_t *vsite, @@ -1134,7 +1134,7 @@ void relax_shell_flexcon(FILE *fplog, box, x, hist, forceWithPadding[Min], force_vir, md, enerd, fcd, lambda, graph, - fr, mdScheduleWork, vsite, mu_tot, t, nullptr, + fr, runScheduleWork, vsite, mu_tot, t, nullptr, (bDoNS ? GMX_FORCE_NS : 0) | shellfc_flags, ddBalanceRegionHandler); @@ -1245,7 +1245,7 @@ void relax_shell_flexcon(FILE *fplog, top, box, posWithPadding[Try], hist, forceWithPadding[Try], force_vir, md, enerd, fcd, lambda, graph, - fr, mdScheduleWork, vsite, mu_tot, t, nullptr, + fr, runScheduleWork, vsite, mu_tot, t, nullptr, shellfc_flags, ddBalanceRegionHandler); sum_epot(&(enerd->grpp), enerd->term); diff --git a/src/gromacs/mdrun/shellfc.h b/src/gromacs/mdrun/shellfc.h index 7782885669..6ec6dc99d8 100644 --- a/src/gromacs/mdrun/shellfc.h +++ b/src/gromacs/mdrun/shellfc.h @@ -62,7 +62,7 @@ namespace gmx { class Constraints; class ImdSession; -class MdScheduleWorkload; +class MdrunScheduleWorkload; } /* Initialization function, also predicts the initial shell postions. @@ -102,7 +102,7 @@ void relax_shell_flexcon(FILE *log, t_graph *graph, gmx_shellfc_t *shfc, t_forcerec *fr, - gmx::MdScheduleWorkload *mdScheduleWork, + gmx::MdrunScheduleWorkload *runScheduleWork, double t, rvec mu_tot, const gmx_vsite_t *vsite, diff --git a/src/gromacs/mdrun/simulatorbuilder.h b/src/gromacs/mdrun/simulatorbuilder.h index 739664554a..b7847bf93a 100644 --- a/src/gromacs/mdrun/simulatorbuilder.h +++ b/src/gromacs/mdrun/simulatorbuilder.h @@ -74,7 +74,7 @@ namespace gmx enum class StartingBehavior; class BoxDeformation; class Constraints; -class MdScheduleWorkload; +class MdrunScheduleWorkload; class IMDOutputProvider; class ImdSession; class MDLogger; diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp index a72e3607b8..b7434f0529 100644 --- a/src/gromacs/mdrun/tpi.cpp +++ b/src/gromacs/mdrun/tpi.cpp @@ -754,7 +754,7 @@ LegacySimulator::do_tpi() state_global->box, state_global->x.arrayRefWithPadding(), &state_global->hist, f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state_global->lambda, - nullptr, fr, mdScheduleWork, nullptr, mu_tot, t, nullptr, + nullptr, fr, runScheduleWork, nullptr, mu_tot, t, nullptr, GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | (bStateChanged ? GMX_FORCE_STATECHANGED : 0), DDBalanceRegionHandler(nullptr)); diff --git a/src/gromacs/mdlib/ppforceworkload.h b/src/gromacs/mdtypes/simulation_workload.h similarity index 60% rename from src/gromacs/mdlib/ppforceworkload.h rename to src/gromacs/mdtypes/simulation_workload.h index 0ff7d990e1..7ae8cccab2 100644 --- a/src/gromacs/mdlib/ppforceworkload.h +++ b/src/gromacs/mdtypes/simulation_workload.h @@ -33,27 +33,30 @@ * the research papers on the package. Check out http://www.gromacs.org. */ /*! \libinternal \file - * \brief Declares force calculation workload manager. + * \brief Declares step, domain-lifetime, and run workload managers. * * \author Mark Abraham + * \author Szilárd Páll * \ingroup module_mdlib * \inlibraryapi */ -#ifndef GMX_MDLIB_PPFORCEWORKLOAD_H -#define GMX_MDLIB_PPFORCEWORKLOAD_H +#ifndef GMX_MDTYPES_SIMULATION_WORKLOAD_H +#define GMX_MDTYPES_SIMULATION_WORKLOAD_H namespace gmx { /*! \libinternal - * \brief Data structure to map force flags to booleans that have the role of - * directing per-step tasks undertaken by a PP rank. + * \brief Data structure that describes work that can change per-step. * - * Note that the contents of this class have a lifetime of a single step and - * are expected to be set every step. + * Note that the contents of an object of this type has a lifetime + * of a single step and it is expected to be set at the beginning each step. + * + * The initial set of flags map the legacy force flags to boolean flags; + * these have the role of directing per-step compute tasks undertaken by a PP rank. * */ -class ForceFlags +class StepWorkload { public: //! Whether the state has changed, always set unless TPI is used. @@ -77,49 +80,60 @@ class ForceFlags }; /*! \libinternal - * \brief Manage what force calculation work is required each step. + * \brief Manage computational work that has the lifetime of decomposition. * - * An object of this type is updated every neighbour search stage to - * reflect what work is required during normal MD steps, e.g. whether - * there are bonded interactions in this PP task. + * An object of this type is updated every decomposition step + * (i.e. domain decomposition / neighbour search) + * reflecting what work is required during the lifetime of a domain. + * e.g. whether there are bonded interactions in this PP task. * * This will remove the desire for inline getters from modules that * describe whether they have work to do, because that can be set up * once per simulation or neighborlist lifetime and not changed * thereafter. - * - * \todo Add more responsibilities, including whether GPUs are in use, - * whether there is PME work, whether DD is active, whether NB - * local/nonlocal regions have work, whether forces/virial/energy are - * required. - * - * TODO rename */ -class PpForceWorkload +class DomainLifetimeWorkload { public: - //! Whether this MD step has bonded work to run on a GPU. + //! Whether the current nstlist step-range has bonded work to run on a GPU. bool haveGpuBondedWork = false; - //! Whether this MD step has bonded work to run on he CPU. + //! Whether the current nstlist step-range has bonded work to run on he CPU. bool haveCpuBondedWork = false; - //! Whether this MD step has restraints work to run on he CPU. + //! Whether the current nstlist step-range has restraints work to run on he CPU. bool haveRestraintsWork = false; - //! Whether this MD step has listed forces work to run on he CPU. + //! Whether the current nstlist step-range has listed forces work to run on he CPU. // Note: currently this is haveCpuBondedWork | haveRestraintsWork bool haveCpuListedForceWork = false; - //! Whether this MD step has special forces on the CPU. + //! Whether the current nstlist step-range has special forces on the CPU. bool haveSpecialForces = false; }; -class MdScheduleWorkload +/*! \libinternal + * \brief Manage what computation is required during the simulation. + * + * Holds information on the type of workload constant for the entire + * simulation. + * + * An object of this type is constructed at the beginning of the + * simulation and is expected to not change. + */ +class SimulationWorkload +{ +}; + +class MdrunScheduleWorkload { public: - //! Force schedule workload descriptor constant for an nstlist range - gmx::PpForceWorkload forceWork; - //! Force flags changing per-step - gmx::ForceFlags forceFlags; + //! Workload descriptor for information constant for an entire run + gmx::SimulationWorkload simulationWork; + + //! Workload descriptor for information constant for an nstlist range of steps + gmx::DomainLifetimeWorkload domainWork; + + //! Workload descriptor for information that may change per-step + gmx::StepWorkload stepWork; }; -} // namespace gmx +} // namespace gmx -#endif +#endif // GMX_MDTYPES_SIMULATION_WORKLOAD_H diff --git a/src/gromacs/modularsimulator/forceelement.cpp b/src/gromacs/modularsimulator/forceelement.cpp index 366a07539e..0fb0ef019c 100644 --- a/src/gromacs/modularsimulator/forceelement.cpp +++ b/src/gromacs/modularsimulator/forceelement.cpp @@ -60,21 +60,21 @@ struct t_graph; namespace gmx { ForceElement::ForceElement( - StatePropagatorData *statePropagatorData, - EnergyElement *energyElement, - bool isDynamicBox, - FILE *fplog, - const t_commrec *cr, - const t_inputrec *inputrec, - const MDAtoms *mdAtoms, - t_nrnb *nrnb, - t_forcerec *fr, - t_fcdata *fcd, - gmx_wallcycle *wcycle, - MdScheduleWorkload *mdScheduleWork, - gmx_vsite_t *vsite, - ImdSession *imdSession, - pull_t *pull_work) : + StatePropagatorData *statePropagatorData, + EnergyElement *energyElement, + bool isDynamicBox, + FILE *fplog, + const t_commrec *cr, + const t_inputrec *inputrec, + const MDAtoms *mdAtoms, + t_nrnb *nrnb, + t_forcerec *fr, + t_fcdata *fcd, + gmx_wallcycle *wcycle, + MdrunScheduleWorkload *runScheduleWork, + gmx_vsite_t *vsite, + ImdSession *imdSession, + pull_t *pull_work) : nextNSStep_(-1), nextEnergyCalculationStep_(-1), nextVirialCalculationStep_(-1), @@ -96,7 +96,7 @@ ForceElement::ForceElement( imdSession_(imdSession), pull_work_(pull_work), fcd_(fcd), - mdScheduleWork_(mdScheduleWork) + runScheduleWork_(runScheduleWork) { lambda_.fill(0); } @@ -152,7 +152,7 @@ void ForceElement::run(Step step, Time time, unsigned int flags) box, x, hist, forces, force_vir, mdAtoms_->mdatoms(), energyElement_->enerdata(), fcd_, lambda_, graph, - fr_, mdScheduleWork_, vsite_, energyElement_->muTot(), time, ed, + fr_, runScheduleWork_, vsite_, energyElement_->muTot(), time, ed, static_cast(flags), ddBalanceRegionHandler_); energyElement_->addToForceVirial(force_vir, step); } diff --git a/src/gromacs/modularsimulator/forceelement.h b/src/gromacs/modularsimulator/forceelement.h index 86aca839f8..4334dfdaa4 100644 --- a/src/gromacs/modularsimulator/forceelement.h +++ b/src/gromacs/modularsimulator/forceelement.h @@ -62,7 +62,7 @@ class Awh; class EnergyElement; class ImdSession; class MDAtoms; -class MdScheduleWorkload; +class MdrunScheduleWorkload; class StatePropagatorData; //! \addtogroup module_modularsimulator @@ -82,21 +82,21 @@ class ForceElement final : public: //! Constructor ForceElement( - StatePropagatorData *statePropagatorData, - EnergyElement *energyElement, - bool isDynamicBox, - FILE *fplog, - const t_commrec *cr, - const t_inputrec *inputrec, - const MDAtoms *mdAtoms, - t_nrnb *nrnb, - t_forcerec *fr, - t_fcdata *fcd, - gmx_wallcycle *wcycle, - MdScheduleWorkload *mdScheduleWork, - gmx_vsite_t *vsite, - ImdSession *imdSession, - pull_t *pull_work); + StatePropagatorData *statePropagatorData, + EnergyElement *energyElement, + bool isDynamicBox, + FILE *fplog, + const t_commrec *cr, + const t_inputrec *inputrec, + const MDAtoms *mdAtoms, + t_nrnb *nrnb, + t_forcerec *fr, + t_fcdata *fcd, + gmx_wallcycle *wcycle, + MdrunScheduleWorkload *runScheduleWork, + gmx_vsite_t *vsite, + ImdSession *imdSession, + pull_t *pull_work); /*! \brief Register force calculation for step / time * @@ -151,29 +151,29 @@ class ForceElement final : // Access to ISimulator data //! Handles logging. - FILE *fplog_; + FILE *fplog_; //! Handles communication. - const t_commrec *cr_; + const t_commrec *cr_; //! Contains user input mdp options. - const t_inputrec *inputrec_; + const t_inputrec *inputrec_; //! Atom parameters for this domain. - const MDAtoms *mdAtoms_; + const MDAtoms *mdAtoms_; //! Manages flop accounting. - t_nrnb *nrnb_; + t_nrnb *nrnb_; //! Manages wall cycle accounting. - gmx_wallcycle *wcycle_; + gmx_wallcycle *wcycle_; //! Parameters for force calculations. - t_forcerec *fr_; + t_forcerec *fr_; //! Handles virtual sites. - gmx_vsite_t *vsite_; + gmx_vsite_t *vsite_; //! The Interactive Molecular Dynamics session. - ImdSession *imdSession_; + ImdSession *imdSession_; //! The pull work object. - pull_t *pull_work_; + pull_t *pull_work_; //! Helper struct for force calculations. - t_fcdata *fcd_; + t_fcdata *fcd_; //! Schedule of work for each MD step for this task. - MdScheduleWorkload *mdScheduleWork_; + MdrunScheduleWorkload *runScheduleWork_; }; //! \} diff --git a/src/gromacs/modularsimulator/modularsimulator.cpp b/src/gromacs/modularsimulator/modularsimulator.cpp index 64e2de68f3..885c3b204e 100644 --- a/src/gromacs/modularsimulator/modularsimulator.cpp +++ b/src/gromacs/modularsimulator/modularsimulator.cpp @@ -545,7 +545,7 @@ std::unique_ptr ModularSimulator::buildForces( { auto shellFCElement = std::make_unique( statePropagatorDataPtr, energyElementPtr, isVerbose, isDynamicBox, fplog, - cr, inputrec, mdAtoms, nrnb, fr, fcd, wcycle, mdScheduleWork, + cr, inputrec, mdAtoms, nrnb, fr, fcd, wcycle, runScheduleWork, vsite, imdSession, pull_work, constr, &topologyHolder_->globalTopology()); topologyHolder_->registerClient(shellFCElement.get()); neighborSearchSignallerBuilder->registerSignallerClient(compat::make_not_null(shellFCElement.get())); @@ -559,7 +559,7 @@ std::unique_ptr ModularSimulator::buildForces( auto forceElement = std::make_unique( statePropagatorDataPtr, energyElementPtr, isDynamicBox, fplog, cr, inputrec, mdAtoms, nrnb, fr, fcd, wcycle, - mdScheduleWork, vsite, imdSession, pull_work); + runScheduleWork, vsite, imdSession, pull_work); topologyHolder_->registerClient(forceElement.get()); neighborSearchSignallerBuilder->registerSignallerClient(compat::make_not_null(forceElement.get())); energySignallerBuilder->registerSignallerClient(compat::make_not_null(forceElement.get())); diff --git a/src/gromacs/modularsimulator/shellfcelement.cpp b/src/gromacs/modularsimulator/shellfcelement.cpp index 7a82459f2d..d1e82a4279 100644 --- a/src/gromacs/modularsimulator/shellfcelement.cpp +++ b/src/gromacs/modularsimulator/shellfcelement.cpp @@ -76,24 +76,24 @@ bool ShellFCElement::doShellsOrFlexConstraints( } ShellFCElement::ShellFCElement( - StatePropagatorData *statePropagatorData, - EnergyElement *energyElement, - bool isVerbose, - bool isDynamicBox, - FILE *fplog, - const t_commrec *cr, - const t_inputrec *inputrec, - const MDAtoms *mdAtoms, - t_nrnb *nrnb, - t_forcerec *fr, - t_fcdata *fcd, - gmx_wallcycle *wcycle, - MdScheduleWorkload *mdScheduleWork, - gmx_vsite_t *vsite, - ImdSession *imdSession, - pull_t *pull_work, - Constraints *constr, - const gmx_mtop_t *globalTopology) : + StatePropagatorData *statePropagatorData, + EnergyElement *energyElement, + bool isVerbose, + bool isDynamicBox, + FILE *fplog, + const t_commrec *cr, + const t_inputrec *inputrec, + const MDAtoms *mdAtoms, + t_nrnb *nrnb, + t_forcerec *fr, + t_fcdata *fcd, + gmx_wallcycle *wcycle, + MdrunScheduleWorkload *runScheduleWork, + gmx_vsite_t *vsite, + ImdSession *imdSession, + pull_t *pull_work, + Constraints *constr, + const gmx_mtop_t *globalTopology) : nextNSStep_(-1), nextEnergyCalculationStep_(-1), nextVirialCalculationStep_(-1), @@ -116,7 +116,7 @@ ShellFCElement::ShellFCElement( imdSession_(imdSession), pull_work_(pull_work), fcd_(fcd), - mdScheduleWork_(mdScheduleWork), + runScheduleWork_(runScheduleWork), constr_(constr) { shellfc_ = init_shell_flexcon( @@ -182,7 +182,7 @@ void ShellFCElement::run(Step step, Time time, unsigned int flags) statePropagatorData_->localNumAtoms(), x, v, box, lambda, hist, forces, force_vir, mdAtoms_->mdatoms(), nrnb_, wcycle_, graph, - shellfc_, fr_, mdScheduleWork_, time, + shellfc_, fr_, runScheduleWork_, time, energyElement_->muTot(), vsite_, ddBalanceRegionHandler_); energyElement_->addToForceVirial(force_vir, step); diff --git a/src/gromacs/modularsimulator/shellfcelement.h b/src/gromacs/modularsimulator/shellfcelement.h index 81cae56179..4c6b225b27 100644 --- a/src/gromacs/modularsimulator/shellfcelement.h +++ b/src/gromacs/modularsimulator/shellfcelement.h @@ -59,7 +59,7 @@ class Awh; class EnergyElement; class ImdSession; class MDAtoms; -class MdScheduleWorkload; +class MdrunScheduleWorkload; class StatePropagatorData; //! \addtogroup module_modularsimulator @@ -79,24 +79,24 @@ class ShellFCElement final : public: //! Constructor ShellFCElement( - StatePropagatorData *statePropagatorData, - EnergyElement *energyElement, - bool isVerbose, - bool isDynamicBox, - FILE *fplog, - const t_commrec *cr, - const t_inputrec *inputrec, - const MDAtoms *mdAtoms, - t_nrnb *nrnb, - t_forcerec *fr, - t_fcdata *fcd, - gmx_wallcycle *wcycle, - MdScheduleWorkload *mdScheduleWork, - gmx_vsite_t *vsite, - ImdSession *imdSession, - pull_t *pull_work, - Constraints *constr, - const gmx_mtop_t *globalTopology); + StatePropagatorData *statePropagatorData, + EnergyElement *energyElement, + bool isVerbose, + bool isDynamicBox, + FILE *fplog, + const t_commrec *cr, + const t_inputrec *inputrec, + const MDAtoms *mdAtoms, + t_nrnb *nrnb, + t_forcerec *fr, + t_fcdata *fcd, + gmx_wallcycle *wcycle, + MdrunScheduleWorkload *runScheduleWork, + gmx_vsite_t *vsite, + ImdSession *imdSession, + pull_t *pull_work, + Constraints *constr, + const gmx_mtop_t *globalTopology); /*! \brief Register shell / flex constraint calculation for step / time * @@ -158,31 +158,31 @@ class ShellFCElement final : // Access to ISimulator data //! Handles logging. - FILE *fplog_; + FILE *fplog_; //! Handles communication. - const t_commrec *cr_; + const t_commrec *cr_; //! Contains user input mdp options. - const t_inputrec *inputrec_; + const t_inputrec *inputrec_; //! Atom parameters for this domain. - const MDAtoms *mdAtoms_; + const MDAtoms *mdAtoms_; //! Manages flop accounting. - t_nrnb *nrnb_; + t_nrnb *nrnb_; //! Manages wall cycle accounting. - gmx_wallcycle *wcycle_; + gmx_wallcycle *wcycle_; //! Parameters for force calculations. - t_forcerec *fr_; + t_forcerec *fr_; //! Handles virtual sites. - gmx_vsite_t *vsite_; + gmx_vsite_t *vsite_; //! The Interactive Molecular Dynamics session. - ImdSession *imdSession_; + ImdSession *imdSession_; //! The pull work object. - pull_t *pull_work_; + pull_t *pull_work_; //! Helper struct for force calculations. - t_fcdata *fcd_; + t_fcdata *fcd_; //! Schedule of work for each MD step for this task. - MdScheduleWorkload *mdScheduleWork_; + MdrunScheduleWorkload *runScheduleWork_; //! Handles constraints. - Constraints *constr_; + Constraints *constr_; }; //! \} diff --git a/src/gromacs/nbnxm/benchmark/bench_setup.cpp b/src/gromacs/nbnxm/benchmark/bench_setup.cpp index a7bf5d59f0..941772cae0 100644 --- a/src/gromacs/nbnxm/benchmark/bench_setup.cpp +++ b/src/gromacs/nbnxm/benchmark/bench_setup.cpp @@ -51,11 +51,11 @@ #include "gromacs/mdlib/force_flags.h" #include "gromacs/mdlib/forcerec.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/enerdata.h" #include "gromacs/mdtypes/forcerec.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gridset.h" #include "gromacs/nbnxm/nbnxm.h" @@ -298,12 +298,12 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system, gmx_enerdata_t enerd(1, 0); - gmx::ForceFlags forceFlags; - forceFlags.computeForces = true; + gmx::StepWorkload stepWork; + stepWork.computeForces = true; if (options.computeVirialAndEnergy) { - forceFlags.computeVirial = true; - forceFlags.computeEnergy = true; + stepWork.computeVirial = true; + stepWork.computeEnergy = true; } const gmx::EnumerationArray kernelNames = { "auto", "no", "4xM", "2xMM" }; @@ -323,7 +323,7 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system, for (int iter = 0; iter < options.numPreIterations; iter++) { nbv->dispatchNonbondedKernel(InteractionLocality::Local, - ic, forceFlags, enbvClearFYes, system.forceRec, + ic, stepWork, enbvClearFYes, system.forceRec, &enerd, &nrnb); } @@ -336,7 +336,7 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem &system, { // Run the kernel without force clearing nbv->dispatchNonbondedKernel(InteractionLocality::Local, - ic, forceFlags, enbvClearFNo, system.forceRec, + ic, stepWork, enbvClearFNo, system.forceRec, &enerd, &nrnb); } diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 05c0278abc..10a6f0a0d3 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -56,7 +56,7 @@ #include "gromacs/gpu_utils/cudautils.cuh" #include "gromacs/gpu_utils/gpueventsynchronizer.cuh" #include "gromacs/gpu_utils/vectype_ops.cuh" -#include "gromacs/mdlib/ppforceworkload.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_common.h" #include "gromacs/nbnxm/gpu_common_utils.h" @@ -402,7 +402,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t *nb, with this event in the non-local stream before launching the non-bonded kernel. */ void gpu_launch_kernel(gmx_nbnxn_cuda_t *nb, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, const InteractionLocality iloc) { cu_atomdata_t *adat = nb->atdat; @@ -485,10 +485,10 @@ void gpu_launch_kernel(gmx_nbnxn_cuda_t *nb, auto *timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr; const auto kernel = select_nbnxn_kernel(nbp->eeltype, nbp->vdwtype, - forceFlags.computeEnergy, + stepWork.computeEnergy, (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->dev_info); - const auto kernelArgs = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &forceFlags.computeVirial); + const auto kernelArgs = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial); launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs); if (bDoTime) @@ -640,11 +640,11 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t *nb, } } -void gpu_launch_cpyback(gmx_nbnxn_cuda_t *nb, - nbnxn_atomdata_t *nbatom, - const gmx::ForceFlags &forceFlags, - const AtomLocality atomLocality, - const bool copyBackNbForce) +void gpu_launch_cpyback(gmx_nbnxn_cuda_t *nb, + nbnxn_atomdata_t *nbatom, + const gmx::StepWorkload &stepWork, + const AtomLocality atomLocality, + const bool copyBackNbForce) { GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); @@ -703,14 +703,14 @@ void gpu_launch_cpyback(gmx_nbnxn_cuda_t *nb, if (iloc == InteractionLocality::Local) { /* DtoH fshift when virial is needed */ - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { cu_copy_D2H_async(nb->nbst.fshift, adat->fshift, SHIFTS * sizeof(*nb->nbst.fshift), stream); } /* DtoH energies */ - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { cu_copy_D2H_async(nb->nbst.e_lj, adat->e_lj, sizeof(*nb->nbst.e_lj), stream); diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h index 599c97edd4..5e670d4e16 100644 --- a/src/gromacs/nbnxm/gpu_common.h +++ b/src/gromacs/nbnxm/gpu_common.h @@ -58,7 +58,7 @@ #include "gromacs/gpu_utils/gpu_utils.h" #include "gromacs/listed_forces/gpubonded.h" #include "gromacs/math/vec.h" -#include "gromacs/mdlib/ppforceworkload.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/timing/gpu_timing.h" @@ -367,7 +367,7 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings, //TODO: move into shared source file with gmx_compile_cpp_as_cuda //NOLINTNEXTLINE(misc-definitions-in-headers) bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, const AtomLocality aloc, real *e_lj, real *e_el, @@ -410,10 +410,10 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, gpuStreamSynchronize(nb->stream[iLocality]); } - gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, forceFlags.computeEnergy, + gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, stepWork.computeEnergy, nb->bDoTime != 0); - gpu_reduce_staged_outputs(nb->nbst, iLocality, forceFlags.computeEnergy, forceFlags.computeVirial, + gpu_reduce_staged_outputs(nb->nbst, iLocality, stepWork.computeEnergy, stepWork.computeVirial, e_lj, e_el, as_rvec_array(shiftForces.data())); } @@ -435,7 +435,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, * pruning flags. * * \param[in] nb The nonbonded data GPU structure - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Force schedule flags * \param[in] aloc Atom locality identifier * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into @@ -445,7 +445,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, */ //NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file float gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, AtomLocality aloc, real *e_lj, real *e_el, @@ -456,7 +456,7 @@ float gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb, (gpuAtomToInteractionLocality(aloc) == InteractionLocality::Local) ? ewcWAIT_GPU_NB_L : ewcWAIT_GPU_NB_NL; wallcycle_start(wcycle, cycleCounter); - gpu_try_finish_task(nb, forceFlags, aloc, e_lj, e_el, shiftForces, + gpu_try_finish_task(nb, stepWork, aloc, e_lj, e_el, shiftForces, GpuTaskCompletion::Wait, wcycle); float waitTime = wallcycle_stop(wcycle, cycleCounter); diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp index 2d53a92dc7..24d693b47a 100644 --- a/src/gromacs/nbnxm/kerneldispatch.cpp +++ b/src/gromacs/nbnxm/kerneldispatch.cpp @@ -43,13 +43,13 @@ #include "gromacs/mdlib/enerdata_utils.h" #include "gromacs/mdlib/force.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/enerdata.h" #include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/nbnxm/nbnxm_simd.h" @@ -143,7 +143,7 @@ reduceGroupEnergySimdBuffers(int numGroups, * \param[in,out] nbat The atomdata for the interactions * \param[in] ic Non-bonded interaction constants * \param[in] shiftVectors The PBC shift vectors - * \param[in] forceFlags Flags that tell what to compute + * \param[in] stepWork Flags that tell what to compute * \param[in] clearF Enum that tells if to clear the force output buffer * \param[out] vCoulomb Output buffer for Coulomb energies * \param[out] vVdw Output buffer for Van der Waals energies @@ -155,7 +155,7 @@ nbnxn_kernel_cpu(const PairlistSet &pairlistSet, nbnxn_atomdata_t *nbat, const interaction_const_t &ic, rvec *shiftVectors, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, int clearF, real *vCoulomb, real *vVdw, @@ -266,7 +266,7 @@ nbnxn_kernel_cpu(const PairlistSet &pairlistSet, // TODO: Change to reference const NbnxnPairlistCpu *pairlist = &pairlists[nb]; - if (!forceFlags.computeEnergy) + if (!stepWork.computeEnergy) { /* Don't calculate energies */ switch (kernelSetup.kernelType) @@ -396,7 +396,7 @@ nbnxn_kernel_cpu(const PairlistSet &pairlistSet, } wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL); - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { reduce_energies_over_lists(nbat, pairlists.ssize(), vVdw, vCoulomb); } @@ -406,7 +406,7 @@ static void accountFlops(t_nrnb *nrnb, const PairlistSet &pairlistSet, const nonbonded_verlet_t &nbv, const interaction_const_t &ic, - const gmx::ForceFlags &forceFlags) + const gmx::StepWorkload &stepWork) { const bool usingGpuKernels = nbv.useGpu(); @@ -425,7 +425,7 @@ static void accountFlops(t_nrnb *nrnb, enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB; } int enr_nbnxn_kernel_lj = eNR_NBNXN_LJ; - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */ enr_nbnxn_kernel_ljc += 1; @@ -443,19 +443,19 @@ static void accountFlops(t_nrnb *nrnb, if (ic.vdw_modifier == eintmodFORCESWITCH) { /* We add up the switch cost separately */ - inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (forceFlags.computeEnergy ? 1 : 0), + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (stepWork.computeEnergy ? 1 : 0), pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_); } if (ic.vdw_modifier == eintmodPOTSWITCH) { /* We add up the switch cost separately */ - inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (forceFlags.computeEnergy ? 1 : 0), + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (stepWork.computeEnergy ? 1 : 0), pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_); } if (ic.vdwtype == evdwPME) { /* We add up the LJ Ewald cost separately */ - inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (forceFlags.computeEnergy ? 1 : 0), + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (stepWork.computeEnergy ? 1 : 0), pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_); } } @@ -463,7 +463,7 @@ static void accountFlops(t_nrnb *nrnb, void nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality, const interaction_const_t &ic, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, int clearF, const t_forcerec &fr, gmx_enerdata_t *enerd, @@ -481,7 +481,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality nbat.get(), ic, fr.shift_vec, - forceFlags, + stepWork, clearF, enerd->grpp.ener[egCOULSR].data(), fr.bBHAM ? @@ -491,14 +491,14 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality break; case Nbnxm::KernelType::Gpu8x8x8: - Nbnxm::gpu_launch_kernel(gpu_nbv, forceFlags, iLocality); + Nbnxm::gpu_launch_kernel(gpu_nbv, stepWork, iLocality); break; case Nbnxm::KernelType::Cpu8x8x8_PlainC: nbnxn_kernel_gpu_ref(pairlistSet.gpuList(), nbat.get(), &ic, fr.shift_vec, - forceFlags, + stepWork, clearF, nbat->out[0].f, nbat->out[0].fshift.data(), @@ -513,7 +513,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality } - accountFlops(nrnb, pairlistSet, *this, ic, forceFlags); + accountFlops(nrnb, pairlistSet, *this, ic, stepWork); } void @@ -525,7 +525,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali t_lambda *fepvals, real *lambda, gmx_enerdata_t *enerd, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, t_nrnb *nrnb) { const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists(); @@ -541,15 +541,15 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ - if (forceFlags.computeForces) + if (stepWork.computeForces) { donb_flags |= GMX_NONBONDED_DO_FORCE; } - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } @@ -592,7 +592,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ - if (fepvals->n_lambda > 0 && forceFlags.computeDhdl && fepvals->sc_alpha != 0) + if (fepvals->n_lambda > 0 && stepWork.computeDhdl && fepvals->sc_alpha != 0) { real lam_i[efptNR]; kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA; diff --git a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp index 16a4397112..d9489ec746 100644 --- a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp +++ b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp @@ -43,8 +43,8 @@ #include "gromacs/math/functions.h" #include "gromacs/math/utilities.h" #include "gromacs/math/vec.h" -#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/nbnxm/pairlist.h" @@ -59,7 +59,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, const nbnxn_atomdata_t *nbat, const interaction_const_t *iconst, rvec *shift_vec, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, int clearF, gmx::ArrayRef f, real * fshift, @@ -262,7 +262,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, /* Reaction-field */ krsq = iconst->k_rf*rsq; fscal = qq*(int_bit*rinv - 2*krsq)*rinvsq; - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { vcoul = qq*(int_bit*rinv + krsq - iconst->c_rf); } @@ -278,7 +278,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, fscal = qq*(int_bit*rinvsq - fexcl)*rinv; - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { vcoul = qq*((int_bit - std::erf(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald); } @@ -297,7 +297,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, Vvdw_rep = c12*rinvsix*rinvsix; fscal += (Vvdw_rep - Vvdw_disp)*rinvsq; - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { vctot += vcoul; @@ -347,7 +347,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, } } - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { ggid = 0; Vc[ggid] = Vc[ggid] + vctot; diff --git a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h index d26747e0c7..c508f1e34c 100644 --- a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h +++ b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h @@ -46,7 +46,7 @@ struct nbnxn_atomdata_t; namespace gmx { -class ForceFlags; +class StepWorkload; } /* Reference (slow) kernel for nb n vs n GPU type pair lists */ @@ -55,7 +55,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, const nbnxn_atomdata_t *nbat, const interaction_const_t *iconst, rvec *shift_vec, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, int clearF, gmx::ArrayRef f, real * fshift, diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index caa21d9c7f..c6d12d2e87 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -329,7 +329,7 @@ struct nonbonded_verlet_t //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU void dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality, const interaction_const_t &ic, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, int clearF, const t_forcerec &fr, gmx_enerdata_t *enerd, @@ -344,7 +344,7 @@ struct nonbonded_verlet_t t_lambda *fepvals, real *lambda, gmx_enerdata_t *enerd, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, t_nrnb *nrnb); /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index 635b9d9790..fef2e749bc 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -59,7 +59,7 @@ enum class GpuTaskCompletion; namespace gmx { class GpuBonded; -class ForceFlags; +class StepWorkload; } namespace Nbnxm @@ -93,9 +93,9 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused *nb, * */ GPU_FUNC_QUALIFIER -void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb, - const gmx::ForceFlags gmx_unused &forceFlags, - InteractionLocality gmx_unused iloc) GPU_FUNC_TERM; +void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb, + const gmx::StepWorkload gmx_unused &stepWork, + InteractionLocality gmx_unused iloc) GPU_FUNC_TERM; /*! \brief * Launch asynchronously the nonbonded prune-only kernel. @@ -142,11 +142,11 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb, * (and energies/shift forces if required). */ GPU_FUNC_QUALIFIER -void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, - nbnxn_atomdata_t gmx_unused *nbatom, - const gmx::ForceFlags gmx_unused &forceFlags, - AtomLocality gmx_unused aloc, - bool gmx_unused copyBackNbForce) GPU_FUNC_TERM; +void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, + nbnxn_atomdata_t gmx_unused *nbatom, + const gmx::StepWorkload gmx_unused &stepWork, + AtomLocality gmx_unused aloc, + bool gmx_unused copyBackNbForce) GPU_FUNC_TERM; /*! \brief Attempts to complete nonbonded GPU task. * @@ -176,7 +176,7 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, * the energy and Fshift contributions for some external/centralized reduction. * * \param[in] nb The nonbonded data GPU structure - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Step schedule flags * \param[in] aloc Atom locality identifier * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into @@ -186,8 +186,8 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, * \returns True if the nonbonded tasks associated with \p aloc locality have completed */ GPU_FUNC_QUALIFIER -bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, - const gmx::ForceFlags gmx_unused &forceFlags, +bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, + const gmx::StepWorkload gmx_unused &stepWork, AtomLocality gmx_unused aloc, real gmx_unused *e_lj, real gmx_unused *e_el, @@ -203,7 +203,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, * pruning flags. * * \param[in] nb The nonbonded data GPU structure - * \param[in] forceFlags Force schedule flags + * \param[in] stepWork Step schedule flags * \param[in] aloc Atom locality identifier * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into @@ -211,7 +211,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, */ GPU_FUNC_QUALIFIER float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, - const gmx::ForceFlags gmx_unused &forceFlags, + const gmx::StepWorkload gmx_unused &stepWork, AtomLocality gmx_unused aloc, real gmx_unused *e_lj, real gmx_unused *e_el, diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index 1a0fbd7705..634801dc44 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -72,7 +72,7 @@ #include "gromacs/gpu_utils/gputraits_ocl.h" #include "gromacs/gpu_utils/oclutils.h" #include "gromacs/hardware/hw_info.h" -#include "gromacs/mdlib/ppforceworkload.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_common.h" #include "gromacs/nbnxm/gpu_common_utils.h" @@ -468,7 +468,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t *nb, are finished and synchronize with this event in the non-local stream. */ void gpu_launch_kernel(gmx_nbnxn_ocl_t *nb, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, const Nbnxm::InteractionLocality iloc) { cl_atomdata_t *adat = nb->atdat; @@ -546,13 +546,13 @@ void gpu_launch_kernel(gmx_nbnxn_ocl_t *nb, const auto kernel = select_nbnxn_kernel(nb, nbp->eeltype, nbp->vdwtype, - forceFlags.computeEnergy, + stepWork.computeEnergy, (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune)); // The OpenCL kernel takes int as second to last argument because bool is // not supported as a kernel argument type (sizeof(bool) is implementation defined). - const int computeFshift = static_cast(forceFlags.computeVirial); + const int computeFshift = static_cast(stepWork.computeVirial); if (useLjCombRule(nb->nbparam->vdwtype)) { const auto kernelArgs = prepareGpuKernelArguments(kernel, config, @@ -734,7 +734,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t *nb, */ void gpu_launch_cpyback(gmx_nbnxn_ocl_t *nb, struct nbnxn_atomdata_t *nbatom, - const gmx::ForceFlags &forceFlags, + const gmx::StepWorkload &stepWork, const AtomLocality aloc, const bool gmx_unused copyBackNbForce) { @@ -804,14 +804,14 @@ void gpu_launch_cpyback(gmx_nbnxn_ocl_t *nb, if (iloc == InteractionLocality::Local) { /* DtoH fshift when virial is needed */ - if (forceFlags.computeVirial) + if (stepWork.computeVirial) { ocl_copy_D2H_async(nb->nbst.fshift, adat->fshift, 0, SHIFTS * adat->fshift_elem_size, stream, bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr); } /* DtoH energies */ - if (forceFlags.computeEnergy) + if (stepWork.computeEnergy) { ocl_copy_D2H_async(nb->nbst.e_lj, adat->e_lj, 0, sizeof(float), stream, bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr); -- 2.22.0