From 6ecc09b00ff348aeafb9a230ad0b694907de4902 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Tue, 22 Oct 2019 20:30:22 +0200 Subject: [PATCH] Add haveCpuLocalForces flag to DomainLifetimeWorkload The flag haveCpuLocalForces is moved to DomainLifetimeWorkload. The name and meaning is also slightly changed, haveCpuLocalForceWork now now indicates whether any forces in the local domain are computed on the CPU. Consequently, the currently redundant PME f reduction conditional has been removed from the haveCpuLocalForceWork initialization. The relationship between local force work and existence of local forces on the CPU however needs to be clarified in relationship to #3160 and how the PP-PME direct communication is improved/simplified. Change-Id: Idaed4da41bf5b72435c47bab8aeb7130b36b03c7 --- src/gromacs/mdlib/sim_util.cpp | 34 +++++++++++------------ src/gromacs/mdtypes/simulation_workload.h | 2 ++ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 249af42f5f..ba80dc084c 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -777,14 +777,15 @@ setupForceOutputs(t_forcerec *fr, /*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute. */ static DomainLifetimeWorkload -setupDomainLifetimeWorkload(const t_inputrec &inputrec, - const t_forcerec &fr, - const pull_t *pull_work, - const gmx_edsam *ed, - const t_idef &idef, - const t_fcdata &fcd, - const t_mdatoms &mdatoms, - const StepWorkload &stepWork) +setupDomainLifetimeWorkload(const t_inputrec &inputrec, + const t_forcerec &fr, + const pull_t *pull_work, + const gmx_edsam *ed, + const t_idef &idef, + const t_fcdata &fcd, + const t_mdatoms &mdatoms, + const SimulationWorkload &simulationWork, + const StepWorkload &stepWork) { DomainLifetimeWorkload domainWork; // Note that haveSpecialForces is constant over the whole run @@ -795,6 +796,10 @@ setupDomainLifetimeWorkload(const t_inputrec &inputrec, domainWork.haveCpuListedForceWork = haveCpuListedForces(fr, idef, fcd); // Note that haveFreeEnergyWork is constant over the whole run domainWork.haveFreeEnergyWork = (fr.efep != efepNO && mdatoms.nPerturbed != 0); + // We assume we have local force work if there are CPU + // force tasks including PME or nonbondeds. + domainWork.haveCpuLocalForceWork = domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || domainWork.haveFreeEnergyWork || + simulationWork.useCpuNonbonded || simulationWork.useCpuPme; return domainWork; } @@ -1140,6 +1145,7 @@ void do_force(FILE *fplog, top->idef, *fcd, *mdatoms, + simulationWork, stepWork); wallcycle_start_nocount(wcycle, ewcNS); @@ -1573,12 +1579,6 @@ void do_force(FILE *fplog, } } - // TODO move this into StepWorkload - const bool useCpuPmeFReduction = thisRankHasDuty(cr, DUTY_PME) && !stepWork.useGpuPmeFReduction; - // TODO: move this into DomainLifetimeWorkload, including the second part of the condition - const bool haveCpuLocalForces = (domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || useCpuPmeFReduction || - (fr->efep != efepNO)); - if (havePPDomainDecomposition(cr)) { /* We are done with the CPU compute. @@ -1593,11 +1593,11 @@ void do_force(FILE *fplog, if (useGpuForcesHaloExchange) { - if (haveCpuLocalForces) + if (domainWork.haveCpuLocalForceWork) { stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local); } - gpuHaloExchange->communicateHaloForces(haveCpuLocalForces); + gpuHaloExchange->communicateHaloForces(domainWork.haveCpuLocalForceWork); } else { @@ -1715,7 +1715,7 @@ void do_force(FILE *fplog, // local atoms. This depends on whether there are CPU-based force tasks // or when DD is active the halo exchange has resulted in contributions // from the non-local part. - const bool haveLocalForceContribInCpuBuffer = (haveCpuLocalForces || havePPDomainDecomposition(cr)); + const bool haveLocalForceContribInCpuBuffer = (domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr)); // TODO: move these steps as early as possible: // - CPU f H2D should be as soon as all CPU-side forces are done diff --git a/src/gromacs/mdtypes/simulation_workload.h b/src/gromacs/mdtypes/simulation_workload.h index a95ae9591b..6594081c50 100644 --- a/src/gromacs/mdtypes/simulation_workload.h +++ b/src/gromacs/mdtypes/simulation_workload.h @@ -116,6 +116,8 @@ class DomainLifetimeWorkload bool haveCpuListedForceWork = false; //! Whether the current nstlist step-range has special forces on the CPU. bool haveSpecialForces = false; + //! Whether there are currently any local forces to be computed on the CPU + bool haveCpuLocalForceWork = false; // TODO //! Whether the current nstlist step-range Free energy work on the CPU. -- 2.22.0