From 2e4259ae7c3f011b30fb03f14b3b9426ce39e088 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Wed, 23 Oct 2019 19:17:46 +0200 Subject: [PATCH] Extend SimulationWorkload with CPU flags Added flags for PME and Nonbondeds to indicate whether there is CPU workload; this is useful as the lack of GPU work does not imply the existence of CPU work. Also made createSimulationWorkload() take the PME runmode class enum instead of a bunch of bools. Made some naming consistency improvements. Refs #3181 Change-Id: I66233f1c790fc5092fb1babaed2ec3ebf16416de --- src/gromacs/mdlib/sim_util.cpp | 20 +++++----- src/gromacs/mdrun/md.cpp | 2 +- src/gromacs/mdrun/runner.cpp | 3 +- src/gromacs/mdtypes/simulation_workload.h | 10 +++-- .../decidesimulationworkload.cpp | 39 ++++++++++--------- .../taskassignment/decidesimulationworkload.h | 20 +++++----- 6 files changed, 49 insertions(+), 45 deletions(-) diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index f9fd797164..249af42f5f 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -832,8 +832,8 @@ setupStepWorkload(const int legacyFlags, // on virial steps the CPU reduction path is taken // TODO: remove flags.computeEnergy, ref #3128 flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !(flags.computeVirial || flags.computeEnergy); - flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.usePmeGpu && - (rankHasPmeDuty || simulationWork.useGpuPmePPCommunication)); + flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.useGpuPme && + (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication)); return flags; } @@ -943,7 +943,7 @@ void do_force(FILE *fplog, const StepWorkload &stepWork = runScheduleWork->stepWork; - const bool useGpuPmeOnThisRank = simulationWork.usePmeGpu && thisRankHasDuty(cr, DUTY_PME); + const bool useGpuPmeOnThisRank = simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME); const int pmeFlags = makePmeFlags(stepWork); // Switches on whether to use GPU for position and force buffer operations @@ -1037,12 +1037,12 @@ void do_force(FILE *fplog, * and domain decomposition does not use the graph, * we do not need to worry about shifting. */ - bool reinitGpuPmePpComms = simulationWork.useGpuPmePPCommunication && (stepWork.doNeighborSearch); - bool sendCoordinatesFromGpu = simulationWork.useGpuPmePPCommunication && !(stepWork.doNeighborSearch); + bool reinitGpuPmePpComms = simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch); + bool sendCoordinatesFromGpu = simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch); gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL], lambda[efptVDW], (stepWork.computeVirial || stepWork.computeEnergy), - step, simulationWork.useGpuPmePPCommunication, reinitGpuPmePpComms, + step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms, sendCoordinatesFromGpu, wcycle); } #endif /* GMX_MPI */ @@ -1673,12 +1673,12 @@ void do_force(FILE *fplog, // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops // TODO refoactor this and unify with below default-path call to the same function - if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePPCommunication) + if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePpCommunication) { /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ - pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePPCommunication, stepWork.useGpuPmeFReduction, wcycle); + pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePpCommunication, stepWork.useGpuPmeFReduction, wcycle); } @@ -1791,13 +1791,13 @@ void do_force(FILE *fplog, } // TODO refoactor this and unify with above PME-PP GPU communication path call to the same function - if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePPCommunication) + if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication) { /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, - simulationWork.useGpuPmePPCommunication, false, wcycle); + simulationWork.useGpuPmePpCommunication, false, wcycle); } if (stepWork.computeForces) diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index 803dfb1103..db4548c946 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -331,7 +331,7 @@ void gmx::LegacySimulator::do_md() // 2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate // stream owned by the StatePropagatorDataGpu const auto &simulationWork = runScheduleWork->simulationWork; - const bool useGpuForPme = simulationWork.usePmeGpu; + const bool useGpuForPme = simulationWork.useGpuPme; const bool useGpuForNonbonded = simulationWork.useGpuNonbonded; // Temporary solution to make sure that the buffer ops are offloaded when update is offloaded const bool useGpuForBufferOps = simulationWork.useGpuBufferOps; diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 5b7788ee67..bf6532b3e0 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1593,8 +1593,7 @@ int Mdrunner::mdrunner() MdrunScheduleWorkload runScheduleWork; // Also populates the simulation constant workload description. runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded, - useGpuForPme, - (pmeRunMode == PmeRunMode::GPU), + pmeRunMode, useGpuForBonded, useGpuForUpdate, devFlags.enableGpuBufferOps, diff --git a/src/gromacs/mdtypes/simulation_workload.h b/src/gromacs/mdtypes/simulation_workload.h index 147199771c..a95ae9591b 100644 --- a/src/gromacs/mdtypes/simulation_workload.h +++ b/src/gromacs/mdtypes/simulation_workload.h @@ -134,12 +134,16 @@ class DomainLifetimeWorkload class SimulationWorkload { public: + //! If we have calculation of short range nonbondeds on CPU + bool useCpuNonbonded = false; //! If we have calculation of short range nonbondeds on GPU bool useGpuNonbonded = false; //! If we have calculation of long range PME in GPU - bool usePmeGpu = false; + bool useCpuPme = false; + //! If we have calculation of long range PME in GPU + bool useGpuPme = false; //! If PME FFT solving is done on GPU. - bool usePmeFftGpu = false; + bool useGpuPmeFft = false; //! If bonded interactions are calculated on GPU. bool useGpuBonded = false; //! If update and constraint solving is performed on GPU. @@ -149,7 +153,7 @@ class SimulationWorkload //! If domain decomposition halo exchange is performed on GPU. bool useGpuHaloExchange = false; //! If direct PP-PME communication between GPU is used. - bool useGpuPmePPCommunication = false; + bool useGpuPmePpCommunication = false; //! If direct GPU-GPU communication is enabled. bool useGpuDirectCommunication = false; }; diff --git a/src/gromacs/taskassignment/decidesimulationworkload.cpp b/src/gromacs/taskassignment/decidesimulationworkload.cpp index 17c3dc06af..597cf1a815 100644 --- a/src/gromacs/taskassignment/decidesimulationworkload.cpp +++ b/src/gromacs/taskassignment/decidesimulationworkload.cpp @@ -43,32 +43,33 @@ #include "decidesimulationworkload.h" +#include "gromacs/ewald/pme.h" #include "gromacs/taskassignment/taskassignment.h" #include "gromacs/utility/arrayref.h" namespace gmx { -SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, - bool useGpuForPme, - bool useGpuForPmeFft, - bool useGpuForBonded, - bool useGpuForUpdateConstraints, - bool useGpuForBufferOps, - bool useGpuHaloExchange, - bool useGpuPmePpComm) +SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, + PmeRunMode pmeRunMode, + bool useGpuForBonded, + bool useGpuForUpdateConstraints, + bool useGpuForBufferOps, + bool useGpuHaloExchange, + bool useGpuPmePpComm) { - SimulationWorkload simulationWorkload { - useGpuForNonbonded, - useGpuForPme, - useGpuForPmeFft, - useGpuForBonded, - useGpuForUpdateConstraints, - useGpuForBufferOps, - useGpuHaloExchange, - useGpuPmePpComm, - useGpuHaloExchange || useGpuPmePpComm - }; + SimulationWorkload simulationWorkload; + simulationWorkload.useCpuNonbonded = !useGpuForNonbonded; + simulationWorkload.useGpuNonbonded = useGpuForNonbonded; + simulationWorkload.useCpuPme = (pmeRunMode == PmeRunMode::CPU); + simulationWorkload.useGpuPme = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed); + simulationWorkload.useGpuPmeFft = (pmeRunMode == PmeRunMode::Mixed); + simulationWorkload.useGpuBonded = useGpuForBonded; + simulationWorkload.useGpuUpdate = useGpuForUpdateConstraints; + simulationWorkload.useGpuBufferOps = useGpuForBufferOps; + simulationWorkload.useGpuHaloExchange = useGpuHaloExchange; + simulationWorkload.useGpuPmePpCommunication = useGpuPmePpComm; + simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm; return simulationWorkload; } diff --git a/src/gromacs/taskassignment/decidesimulationworkload.h b/src/gromacs/taskassignment/decidesimulationworkload.h index 664a307b93..0e87da68c5 100644 --- a/src/gromacs/taskassignment/decidesimulationworkload.h +++ b/src/gromacs/taskassignment/decidesimulationworkload.h @@ -46,6 +46,8 @@ #include "gromacs/mdtypes/simulation_workload.h" +enum class PmeRunMode; + namespace gmx { @@ -55,8 +57,7 @@ namespace gmx * * \param[in] useGpuForNonbonded If we have short-range nonbonded interactions * calculations on GPU(s). - * \param[in] useGpuForPme If long range PME interactions are calculated on GPU(s). - * \param[in] useGpuForPmeFft If FFT solving for PME is done on the GPU. + * \param[in] pmeRunMode Run mode indicating what resource is PME execured on. * \param[in] useGpuForBonded If bonded interactions are calculated on GPU(s). * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on * GPU(s). @@ -65,14 +66,13 @@ namespace gmx * \param[in] useGpuPmePpComm If GPu direct communication is used in PME-PP communication. * \returns Simulation lifetime constant workload description. */ -SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, - bool useGpuForPme, - bool useGpuForPmeFft, - bool useGpuForBonded, - bool useGpuForUpdateConstraints, - bool useGpuForBufferOps, - bool useGpuHaloExchange, - bool useGpuPmePpComm); +SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, + PmeRunMode pmeRunMode, + bool useGpuForBonded, + bool useGpuForUpdateConstraints, + bool useGpuForBufferOps, + bool useGpuHaloExchange, + bool useGpuPmePpComm); } // namespace gmx -- 2.22.0