From 75361276956e747cfd2f4bc6e5cf2a77dd578541 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Fri, 4 Jun 2021 06:46:49 +0000 Subject: [PATCH] Move useMts flag from forcerec to simulationWorkload --- src/gromacs/mdlib/forcerec.cpp | 15 ++++----- src/gromacs/mdlib/forcerec.h | 3 ++ src/gromacs/mdlib/sim_util.cpp | 32 +++++++++++-------- src/gromacs/mdrun/md.cpp | 10 +++--- src/gromacs/mdrun/runner.cpp | 1 + src/gromacs/mdtypes/forcerec.h | 3 -- src/gromacs/mdtypes/simulation_workload.h | 4 +++ .../decidesimulationworkload.cpp | 3 +- 8 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index afbb29366b..1862b75125 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -80,6 +80,7 @@ #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/multipletimestepping.h" #include "gromacs/mdtypes/nblist.h" +#include "gromacs/mdtypes/simulation_workload.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/pbc.h" @@ -647,6 +648,7 @@ real cutoff_inf(real cutoff) void init_forcerec(FILE* fplog, const gmx::MDLogger& mdlog, + const gmx::SimulationWorkload& simulationWork, t_forcerec* forcerec, const t_inputrec& inputrec, const gmx_mtop_t& mtop, @@ -871,10 +873,7 @@ void init_forcerec(FILE* fplog, /* 1-4 interaction electrostatics */ forcerec->fudgeQQ = mtop.ffparams.fudgeQQ; - // Multiple time stepping - forcerec->useMts = inputrec.useMts; - - if (forcerec->useMts) + if (simulationWork.useMts) { GMX_ASSERT(gmx::checkMtsRequirements(inputrec).empty(), "All MTS requirements should be met here"); @@ -886,11 +885,11 @@ void init_forcerec(FILE* fplog, || inputrec.bRot || inputrec.bIMD; const bool haveDirectVirialContributionsSlow = EEL_FULL(interactionConst->eeltype) || EVDW_PME(interactionConst->vdwtype); - for (int i = 0; i < (forcerec->useMts ? 2 : 1); i++) + for (int i = 0; i < (simulationWork.useMts ? 2 : 1); i++) { bool haveDirectVirialContributions = - (((!forcerec->useMts || i == 0) && haveDirectVirialContributionsFast) - || ((!forcerec->useMts || i == 1) && haveDirectVirialContributionsSlow)); + (((!simulationWork.useMts || i == 0) && haveDirectVirialContributionsFast) + || ((!simulationWork.useMts || i == 1) && haveDirectVirialContributionsSlow)); forcerec->forceHelperBuffers.emplace_back(haveDirectVirialContributions); } @@ -1005,7 +1004,7 @@ void init_forcerec(FILE* fplog, } /* Initialize the thread working data for bonded interactions */ - if (forcerec->useMts) + if (simulationWork.useMts) { // Add one ListedForces object for each MTS level bool isFirstLevel = true; diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h index 88ab95af87..11d0e42510 100644 --- a/src/gromacs/mdlib/forcerec.h +++ b/src/gromacs/mdlib/forcerec.h @@ -58,6 +58,7 @@ namespace gmx { class MDLogger; class PhysicalNodeCommunicator; +class SimulationWorkload; } // namespace gmx /*! \brief Create nonbonded parameter lists @@ -107,6 +108,7 @@ void init_interaction_const_tables(FILE* fp, interaction_const_t* ic, real rlist * \param[in] fplog File for printing * \param[in] mdlog File for printing * \param[out] forcerec The forcerec + * \param[in] simulationWork Simulation workload flags * \param[in] inputrec Inputrec structure * \param[in] mtop Molecular topology * \param[in] commrec Communication structures @@ -118,6 +120,7 @@ void init_interaction_const_tables(FILE* fp, interaction_const_t* ic, real rlist */ void init_forcerec(FILE* fplog, const gmx::MDLogger& mdlog, + const gmx::SimulationWorkload& simulationWork, t_forcerec* forcerec, const t_inputrec& inputrec, const gmx_mtop_t& mtop, diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 1439c40199..8795ec6188 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -992,6 +992,10 @@ static StepWorkload setupStepWorkload(const int legacyFlags, flags.useGpuXHalo = simulationWork.useGpuHaloExchange; flags.useGpuFHalo = simulationWork.useGpuHaloExchange && flags.useGpuFBufferOps; flags.haveGpuPmeOnThisRank = simulationWork.useGpuPme && rankHasPmeDuty && flags.computeSlowForces; + flags.combineMtsForcesBeforeHaloExchange = + (flags.computeForces && simulationWork.useMts && flags.computeSlowForces + && flags.useOnlyMtsCombinedForceBuffer + && !(flags.computeVirial || simulationWork.useGpuNonbonded || flags.haveGpuPmeOnThisRank)); return flags; } @@ -1202,7 +1206,8 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork, */ static int getLocalAtomCount(const gmx_domdec_t* dd, const t_mdatoms& mdatoms, bool havePPDomainDecomposition) { - GMX_ASSERT(!(havePPDomainDecomposition && (dd == nullptr)), "Can't have PP decomposition with dd uninitialized!"); + GMX_ASSERT(!(havePPDomainDecomposition && (dd == nullptr)), + "Can't have PP decomposition with dd uninitialized!"); return havePPDomainDecomposition ? dd_numAtomsZones(*dd) : mdatoms.homenr; } @@ -1735,7 +1740,7 @@ void do_force(FILE* fplog, // Force output for MTS combined forces, only set at level1 MTS steps std::optional forceOutMts = - (fr->useMts && stepWork.computeSlowForces) + (simulationWork.useMts && stepWork.computeSlowForces) ? std::optional(setupForceOutputs(&fr->forceHelperBuffers[1], forceView->forceMtsCombinedWithPadding(), domainWork, @@ -1745,7 +1750,8 @@ void do_force(FILE* fplog, : std::nullopt; ForceOutputs* forceOutMtsLevel1 = - fr->useMts ? (stepWork.computeSlowForces ? &forceOutMts.value() : nullptr) : &forceOutMtsLevel0; + simulationWork.useMts ? (stepWork.computeSlowForces ? &forceOutMts.value() : nullptr) + : &forceOutMtsLevel0; const bool nonbondedAtMtsLevel1 = runScheduleWork->simulationWork.computeNonbondedAtMtsLevel1; @@ -1914,7 +1920,8 @@ void do_force(FILE* fplog, set_pbc_dd(&pbc, fr->pbcType, DOMAINDECOMP(cr) ? cr->dd->numCells : nullptr, TRUE, box); } - for (int mtsIndex = 0; mtsIndex < (fr->useMts && stepWork.computeSlowForces ? 2 : 1); mtsIndex++) + for (int mtsIndex = 0; mtsIndex < (simulationWork.useMts && stepWork.computeSlowForces ? 2 : 1); + mtsIndex++) { ListedForces& listedForces = fr->listedForces[mtsIndex]; ForceOutputs& forceOut = (mtsIndex == 0 ? forceOutMtsLevel0 : *forceOutMtsLevel1); @@ -2073,10 +2080,7 @@ void do_force(FILE* fplog, /* Combining the forces for multiple time stepping before the halo exchange, when possible, * avoids an extra halo exchange (when DD is used) and post-processing step. */ - const bool combineMtsForcesBeforeHaloExchange = - (stepWork.computeForces && fr->useMts && stepWork.computeSlowForces && stepWork.useOnlyMtsCombinedForceBuffer - && !(stepWork.computeVirial || simulationWork.useGpuNonbonded || stepWork.haveGpuPmeOnThisRank)); - if (combineMtsForcesBeforeHaloExchange) + if (stepWork.combineMtsForcesBeforeHaloExchange) { combineMtsForces(getLocalAtomCount(cr->dd, *mdatoms, havePPDomainDecomposition(cr)), force.unpaddedArrayRef(), @@ -2117,12 +2121,12 @@ void do_force(FILE* fplog, // Without MTS or with MTS at slow steps with uncombined forces we need to // communicate the fast forces - if (!fr->useMts || !combineMtsForcesBeforeHaloExchange) + if (!simulationWork.useMts || !stepWork.combineMtsForcesBeforeHaloExchange) { dd_move_f(cr->dd, &forceOutMtsLevel0.forceWithShiftForces(), wcycle); } // With MTS we need to communicate the slow or combined (in forceOutMtsLevel1) forces - if (fr->useMts && stepWork.computeSlowForces) + if (simulationWork.useMts && stepWork.computeSlowForces) { dd_move_f(cr->dd, &forceOutMtsLevel1->forceWithShiftForces(), wcycle); } @@ -2288,14 +2292,14 @@ void do_force(FILE* fplog, dd_force_flop_stop(cr->dd, nrnb); } - const bool haveCombinedMtsForces = (stepWork.computeForces && fr->useMts && stepWork.computeSlowForces - && combineMtsForcesBeforeHaloExchange); + const bool haveCombinedMtsForces = (stepWork.computeForces && simulationWork.useMts && stepWork.computeSlowForces + && stepWork.combineMtsForcesBeforeHaloExchange); if (stepWork.computeForces) { postProcessForceWithShiftForces( nrnb, wcycle, box, x.unpaddedArrayRef(), &forceOutMtsLevel0, vir_force, *mdatoms, *fr, vsite, stepWork); - if (fr->useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) + if (simulationWork.useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) { postProcessForceWithShiftForces( nrnb, wcycle, box, x.unpaddedArrayRef(), forceOutMtsLevel1, vir_force, *mdatoms, *fr, vsite, stepWork); @@ -2328,7 +2332,7 @@ void do_force(FILE* fplog, postProcessForces( cr, step, nrnb, wcycle, box, x.unpaddedArrayRef(), &forceOutCombined, vir_force, mdatoms, fr, vsite, stepWork); - if (fr->useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) + if (simulationWork.useMts && stepWork.computeSlowForces && !haveCombinedMtsForces) { postProcessForces( cr, step, nrnb, wcycle, box, x.unpaddedArrayRef(), forceOutMtsLevel1, vir_force, mdatoms, fr, vsite, stepWork); diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index a6a3263c54..a7d344c4a0 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -363,7 +363,7 @@ void gmx::LegacySimulator::do_md() gmx_localtop_t top(top_global.ffparams); - ForceBuffers f(fr->useMts, + ForceBuffers f(simulationWork.useMts, ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate) ? PinningPolicy::PinnedIfSupported : PinningPolicy::CannotBePinned); @@ -1126,7 +1126,7 @@ void gmx::LegacySimulator::do_md() force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0) | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0)); - if (fr->useMts && !do_per_step(step, ir->nstfout)) + if (simulationWork.useMts && !do_per_step(step, ir->nstfout)) { // TODO: merge this with stepWork.useOnlyMtsCombinedForceBuffer force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE; @@ -1584,7 +1584,7 @@ void gmx::LegacySimulator::do_md() * Using that acceleration would result in a virial with the slow * force contribution would be a factor mtsFactor too large. */ - if (fr->useMts && bCalcVir && constr != nullptr) + if (simulationWork.useMts && bCalcVir && constr != nullptr) { upd.update_for_constraint_virial(*ir, md->homenr, @@ -1607,7 +1607,7 @@ void gmx::LegacySimulator::do_md() } ArrayRefWithPadding forceCombined = - (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0) + (simulationWork.useMts && step % ir->mtsLevels[1].stepFactor == 0) ? f.view().forceMtsCombinedWithPadding() : f.view().forceWithPadding(); upd.update_coords(*ir, @@ -1635,7 +1635,7 @@ void gmx::LegacySimulator::do_md() state, upd.xp()->arrayRefWithPadding(), &dvdl_constr, - bCalcVir && !fr->useMts, + bCalcVir && !simulationWork.useMts, shake_vir); upd.update_sd_second_half(*ir, diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 0fdd4d8f7c..a1cc472958 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1626,6 +1626,7 @@ int Mdrunner::mdrunner() fr->forceProviders = mdModules_->initForceProviders(); init_forcerec(fplog, mdlog, + runScheduleWork.simulationWork, fr.get(), *inputrec, mtop, diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h index 44477aa6f3..838988ce35 100644 --- a/src/gromacs/mdtypes/forcerec.h +++ b/src/gromacs/mdtypes/forcerec.h @@ -251,9 +251,6 @@ struct t_forcerec real userreal3 = 0; real userreal4 = 0; - /* Tells whether we use multiple time stepping, computing some forces less frequently */ - bool useMts = false; - /* Data for special listed force calculations */ std::unique_ptr fcdata; diff --git a/src/gromacs/mdtypes/simulation_workload.h b/src/gromacs/mdtypes/simulation_workload.h index 73fd19a54a..66efe112ac 100644 --- a/src/gromacs/mdtypes/simulation_workload.h +++ b/src/gromacs/mdtypes/simulation_workload.h @@ -101,6 +101,8 @@ public: bool useGpuFHalo = false; //! Whether GPU PME work is compute this step (can be false also on fast steps with MTS) bool haveGpuPmeOnThisRank = false; + //! Whether to combine the forces for multiple time stepping before the halo exchange + bool combineMtsForcesBeforeHaloExchange = false; }; /*! \libinternal @@ -186,6 +188,8 @@ public: bool useGpuDirectCommunication = false; //! If there is an Ewald surface (dipole) term to compute bool haveEwaldSurfaceContribution = false; + //! Whether to use multiple time stepping + bool useMts = false; }; class MdrunScheduleWorkload diff --git a/src/gromacs/taskassignment/decidesimulationworkload.cpp b/src/gromacs/taskassignment/decidesimulationworkload.cpp index 5c5cdaeb3f..bd0c5a55a6 100644 --- a/src/gromacs/taskassignment/decidesimulationworkload.cpp +++ b/src/gromacs/taskassignment/decidesimulationworkload.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019,2020, by the GROMACS development team, led by + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -82,6 +82,7 @@ SimulationWorkload createSimulationWorkload(const t_inputrec& inputrec, simulationWorkload.useGpuDirectCommunication = devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm; simulationWorkload.haveEwaldSurfaceContribution = haveEwaldSurfaceContribution(inputrec); + simulationWorkload.useMts = inputrec.useMts; return simulationWorkload; } -- 2.22.0