Extend SimulationWorkload with CPU flags
authorSzilárd Páll <pall.szilard@gmail.com>
Wed, 23 Oct 2019 17:17:46 +0000 (19:17 +0200)
committerArtem Zhmurov <zhmurov@gmail.com>
Mon, 28 Oct 2019 16:49:26 +0000 (17:49 +0100)
Added flags for PME and Nonbondeds to indicate whether there is CPU
workload; this is useful as the lack of GPU work does not imply the
existence of CPU work.

Also made createSimulationWorkload() take the PME runmode class enum
instead of a bunch of bools.

Made some naming consistency improvements.

Refs #3181

Change-Id: I66233f1c790fc5092fb1babaed2ec3ebf16416de

src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdrun/md.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdtypes/simulation_workload.h
src/gromacs/taskassignment/decidesimulationworkload.cpp
src/gromacs/taskassignment/decidesimulationworkload.h

index f9fd797164b6bb6281909cccc4d494d23e39827c..249af42f5feaa601c8bb2a69bedc79f50a7c293a 100644 (file)
@@ -832,8 +832,8 @@ setupStepWorkload(const int                 legacyFlags,
     // on virial steps the CPU reduction path is taken
     // TODO: remove flags.computeEnergy, ref #3128
     flags.useGpuFBufferOps    = simulationWork.useGpuBufferOps && !(flags.computeVirial || flags.computeEnergy);
-    flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.usePmeGpu &&
-                                                           (rankHasPmeDuty || simulationWork.useGpuPmePPCommunication));
+    flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.useGpuPme &&
+                                                           (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication));
 
     return flags;
 }
@@ -943,7 +943,7 @@ void do_force(FILE                                     *fplog,
     const StepWorkload &stepWork = runScheduleWork->stepWork;
 
 
-    const bool useGpuPmeOnThisRank = simulationWork.usePmeGpu && thisRankHasDuty(cr, DUTY_PME);
+    const bool useGpuPmeOnThisRank = simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME);
     const int  pmeFlags            = makePmeFlags(stepWork);
 
     // Switches on whether to use GPU for position and force buffer operations
@@ -1037,12 +1037,12 @@ void do_force(FILE                                     *fplog,
          * and domain decomposition does not use the graph,
          * we do not need to worry about shifting.
          */
-        bool reinitGpuPmePpComms    = simulationWork.useGpuPmePPCommunication && (stepWork.doNeighborSearch);
-        bool sendCoordinatesFromGpu = simulationWork.useGpuPmePPCommunication && !(stepWork.doNeighborSearch);
+        bool reinitGpuPmePpComms    = simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
+        bool sendCoordinatesFromGpu = simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch);
         gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()),
                                  lambda[efptCOUL], lambda[efptVDW],
                                  (stepWork.computeVirial || stepWork.computeEnergy),
-                                 step, simulationWork.useGpuPmePPCommunication, reinitGpuPmePpComms,
+                                 step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms,
                                  sendCoordinatesFromGpu, wcycle);
     }
 #endif /* GMX_MPI */
@@ -1673,12 +1673,12 @@ void do_force(FILE                                     *fplog,
 
     // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
     // TODO refoactor this and unify with below default-path call to the same function
-    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePPCommunication)
+    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePpCommunication)
     {
         /* In case of node-splitting, the PP nodes receive the long-range
          * forces, virial and energy from the PME nodes here.
          */
-        pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePPCommunication, stepWork.useGpuPmeFReduction, wcycle);
+        pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePpCommunication, stepWork.useGpuPmeFReduction, wcycle);
     }
 
 
@@ -1791,13 +1791,13 @@ void do_force(FILE                                     *fplog,
     }
 
     // TODO refoactor this and unify with above PME-PP GPU communication path call to the same function
-    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePPCommunication)
+    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication)
     {
         /* In case of node-splitting, the PP nodes receive the long-range
          * forces, virial and energy from the PME nodes here.
          */
         pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd,
-                               simulationWork.useGpuPmePPCommunication, false, wcycle);
+                               simulationWork.useGpuPmePpCommunication, false, wcycle);
     }
 
     if (stepWork.computeForces)
index 803dfb1103222ed3c3b3da796f41d08331b4cb76..db4548c9462653321916f9dfa886b26cd9ed7133 100644 (file)
@@ -331,7 +331,7 @@ void gmx::LegacySimulator::do_md()
 //       2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate
 //          stream owned by the StatePropagatorDataGpu
     const auto &simulationWork     = runScheduleWork->simulationWork;
-    const bool  useGpuForPme       = simulationWork.usePmeGpu;
+    const bool  useGpuForPme       = simulationWork.useGpuPme;
     const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
     // Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
     const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
index 5b7788ee67e5270e677f2d3bbab2017286969fb3..bf6532b3e01d38baba2934422e9f4de34bebc198 100644 (file)
@@ -1593,8 +1593,7 @@ int Mdrunner::mdrunner()
         MdrunScheduleWorkload runScheduleWork;
         // Also populates the simulation constant workload description.
         runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
-                                                                  useGpuForPme,
-                                                                  (pmeRunMode == PmeRunMode::GPU),
+                                                                  pmeRunMode,
                                                                   useGpuForBonded,
                                                                   useGpuForUpdate,
                                                                   devFlags.enableGpuBufferOps,
index 147199771cb7ed5d5ff396b7e90fb4c945a9d2c6..a95ae9591bffdc82052af8d65124a972b24a9959 100644 (file)
@@ -134,12 +134,16 @@ class DomainLifetimeWorkload
 class SimulationWorkload
 {
     public:
+        //! If we have calculation of short range nonbondeds on CPU
+        bool useCpuNonbonded           = false;
         //! If we have calculation of short range nonbondeds on GPU
         bool useGpuNonbonded           = false;
         //! If we have calculation of long range PME in GPU
-        bool usePmeGpu                 = false;
+        bool useCpuPme                 = false;
+        //! If we have calculation of long range PME in GPU
+        bool useGpuPme                 = false;
         //! If PME FFT solving is done on GPU.
-        bool usePmeFftGpu              = false;
+        bool useGpuPmeFft              = false;
         //! If bonded interactions are calculated on GPU.
         bool useGpuBonded              = false;
         //! If update and constraint solving is performed on GPU.
@@ -149,7 +153,7 @@ class SimulationWorkload
         //! If domain decomposition halo exchange is performed on GPU.
         bool useGpuHaloExchange        = false;
         //! If direct PP-PME communication between GPU is used.
-        bool useGpuPmePPCommunication  = false;
+        bool useGpuPmePpCommunication  = false;
         //! If direct GPU-GPU communication is enabled.
         bool useGpuDirectCommunication = false;
 };
index 17c3dc06af349f9a5620d797952335ba15cb712a..597cf1a81580119c6a48907574c9aa9f43661953 100644 (file)
 
 #include "decidesimulationworkload.h"
 
+#include "gromacs/ewald/pme.h"
 #include "gromacs/taskassignment/taskassignment.h"
 #include "gromacs/utility/arrayref.h"
 
 namespace gmx
 {
 
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
-                                            bool useGpuForPme,
-                                            bool useGpuForPmeFft,
-                                            bool useGpuForBonded,
-                                            bool useGpuForUpdateConstraints,
-                                            bool useGpuForBufferOps,
-                                            bool useGpuHaloExchange,
-                                            bool useGpuPmePpComm)
+SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
+                                            PmeRunMode pmeRunMode,
+                                            bool       useGpuForBonded,
+                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForBufferOps,
+                                            bool       useGpuHaloExchange,
+                                            bool       useGpuPmePpComm)
 {
-    SimulationWorkload simulationWorkload {
-        useGpuForNonbonded,
-        useGpuForPme,
-        useGpuForPmeFft,
-        useGpuForBonded,
-        useGpuForUpdateConstraints,
-        useGpuForBufferOps,
-        useGpuHaloExchange,
-        useGpuPmePpComm,
-        useGpuHaloExchange || useGpuPmePpComm
-    };
+    SimulationWorkload simulationWorkload;
+    simulationWorkload.useCpuNonbonded           = !useGpuForNonbonded;
+    simulationWorkload.useGpuNonbonded           = useGpuForNonbonded;
+    simulationWorkload.useCpuPme                 = (pmeRunMode == PmeRunMode::CPU);
+    simulationWorkload.useGpuPme                 = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuPmeFft              = (pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuBonded              = useGpuForBonded;
+    simulationWorkload.useGpuUpdate              = useGpuForUpdateConstraints;
+    simulationWorkload.useGpuBufferOps           = useGpuForBufferOps;
+    simulationWorkload.useGpuHaloExchange        = useGpuHaloExchange;
+    simulationWorkload.useGpuPmePpCommunication  = useGpuPmePpComm;
+    simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm;
 
     return simulationWorkload;
 }
index 664a307b93c632fccd0de1f0e1a5701a1be7adef..0e87da68c599e5520b71618064f8b6f55c39d029 100644 (file)
@@ -46,6 +46,8 @@
 
 #include "gromacs/mdtypes/simulation_workload.h"
 
+enum class PmeRunMode;
+
 namespace gmx
 {
 
@@ -55,8 +57,7 @@ namespace gmx
  *
  * \param[in] useGpuForNonbonded If we have short-range nonbonded interactions
  *                               calculations on GPU(s).
- * \param[in] useGpuForPme       If long range PME interactions are calculated on GPU(s).
- * \param[in] useGpuForPmeFft    If FFT solving for PME is done on the GPU.
+ * \param[in] pmeRunMode         Run mode indicating what resource is PME execured on.
  * \param[in] useGpuForBonded    If bonded interactions are calculated on GPU(s).
  * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on
  *                                       GPU(s).
@@ -65,14 +66,13 @@ namespace gmx
  * \param[in] useGpuPmePpComm    If GPu direct communication is used in PME-PP communication.
  * \returns Simulation lifetime constant workload description.
  */
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
-                                            bool useGpuForPme,
-                                            bool useGpuForPmeFft,
-                                            bool useGpuForBonded,
-                                            bool useGpuForUpdateConstraints,
-                                            bool useGpuForBufferOps,
-                                            bool useGpuHaloExchange,
-                                            bool useGpuPmePpComm);
+SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
+                                            PmeRunMode pmeRunMode,
+                                            bool       useGpuForBonded,
+                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForBufferOps,
+                                            bool       useGpuHaloExchange,
+                                            bool       useGpuPmePpComm);
 
 
 }  // namespace gmx