From 65d00d307b80996b49ae2ce08b54b31b1e8a49dd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Thu, 19 Aug 2021 20:16:20 +0000 Subject: [PATCH] Avoid using cr->duty in setupStepWorkload --- src/gromacs/mdlib/sim_util.cpp | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 51777faf13..6ecc338f70 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -950,15 +950,13 @@ static DomainLifetimeWorkload setupDomainLifetimeWorkload(const t_inputrec& * \param[in] mtsLevels The multiple time-stepping levels, either empty or 2 levels * \param[in] step The current MD step * \param[in] simulationWork Simulation workload description. - * \param[in] rankHasPmeDuty If this rank computes PME. * * \returns New Stepworkload description. */ static StepWorkload setupStepWorkload(const int legacyFlags, ArrayRef mtsLevels, const int64_t step, - const SimulationWorkload& simulationWork, - const bool rankHasPmeDuty) + const SimulationWorkload& simulationWork) { GMX_ASSERT(mtsLevels.empty() || mtsLevels.size() == 2, "Expect 0 or 2 MTS levels"); const bool computeSlowForces = (mtsLevels.empty() || step % mtsLevels[1].stepFactor == 0); @@ -985,15 +983,13 @@ static StepWorkload setupStepWorkload(const int legacyFlags, } flags.useGpuXBufferOps = simulationWork.useGpuBufferOps; // on virial steps the CPU reduction path is taken - flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial; - flags.useGpuPmeFReduction = flags.computeSlowForces && flags.useGpuFBufferOps && simulationWork.useGpuPme - && (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication); - flags.useGpuXHalo = simulationWork.useGpuHaloExchange; - flags.useGpuFHalo = simulationWork.useGpuHaloExchange && flags.useGpuFBufferOps; - // Note that rankHasPmeDuty is used confusingly due to the way cr->duty is set up (can be true even for non-PME runs), - // but the haveGpuPmeOnThisRank still ends up correct as simulationWork.useGpuPme == false in such cases. - // TODO: improve this when duty-reliance is eliminated - flags.haveGpuPmeOnThisRank = simulationWork.useGpuPme && rankHasPmeDuty && flags.computeSlowForces; + flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !flags.computeVirial; + const bool rankHasGpuPmeTask = simulationWork.useGpuPme && !simulationWork.haveSeparatePmeRank; + flags.useGpuPmeFReduction = flags.computeSlowForces && flags.useGpuFBufferOps + && (rankHasGpuPmeTask || simulationWork.useGpuPmePpCommunication); + flags.useGpuXHalo = simulationWork.useGpuHaloExchange; + flags.useGpuFHalo = simulationWork.useGpuHaloExchange && flags.useGpuFBufferOps; + flags.haveGpuPmeOnThisRank = rankHasGpuPmeTask && flags.computeSlowForces; flags.combineMtsForcesBeforeHaloExchange = (flags.computeForces && simulationWork.useMts && flags.computeSlowForces && flags.useOnlyMtsCombinedForceBuffer @@ -1255,8 +1251,7 @@ void do_force(FILE* fplog, const SimulationWorkload& simulationWork = runScheduleWork->simulationWork; - runScheduleWork->stepWork = setupStepWorkload( - legacyFlags, inputrec.mtsLevels, step, simulationWork, thisRankHasDuty(cr, DUTY_PME)); + runScheduleWork->stepWork = setupStepWorkload(legacyFlags, inputrec.mtsLevels, step, simulationWork); const StepWorkload& stepWork = runScheduleWork->stepWork; /* At a search step we need to start the first balancing region -- 2.22.0