Extend SimulationWorkload with CPU flags

author Szilárd Páll <pall.szilard@gmail.com>

Wed, 23 Oct 2019 17:17:46 +0000 (19:17 +0200)

committer Artem Zhmurov <zhmurov@gmail.com>

Mon, 28 Oct 2019 16:49:26 +0000 (17:49 +0100)
author Szilárd Páll <pall.szilard@gmail.com>
Wed, 23 Oct 2019 17:17:46 +0000 (19:17 +0200)
committer Artem Zhmurov <zhmurov@gmail.com>
Mon, 28 Oct 2019 16:49:26 +0000 (17:49 +0100)
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index f9fd797164b6bb6281909cccc4d494d23e39827c..249af42f5feaa601c8bb2a69bedc79f50a7c293a 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -832,8 +832,8 @@ setupStepWorkload(const int                 legacyFlags,
      // on virial steps the CPU reduction path is taken
      // TODO: remove flags.computeEnergy, ref #3128
      flags.useGpuFBufferOps    = simulationWork.useGpuBufferOps && !(flags.computeVirial || flags.computeEnergy);
-    flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.usePmeGpu &&
-                                                           (rankHasPmeDuty || simulationWork.useGpuPmePPCommunication));
+    flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.useGpuPme &&
+                                                           (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication));
  
      return flags;
  }
@@ -943,7 +943,7 @@ void do_force(FILE                                     *fplog,
      const StepWorkload &stepWork = runScheduleWork->stepWork;
  
  
-    const bool useGpuPmeOnThisRank = simulationWork.usePmeGpu && thisRankHasDuty(cr, DUTY_PME);
+    const bool useGpuPmeOnThisRank = simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME);
      const int  pmeFlags            = makePmeFlags(stepWork);
  
      // Switches on whether to use GPU for position and force buffer operations
@@ -1037,12 +1037,12 @@ void do_force(FILE                                     *fplog,
           * and domain decomposition does not use the graph,
           * we do not need to worry about shifting.
           */
-        bool reinitGpuPmePpComms    = simulationWork.useGpuPmePPCommunication && (stepWork.doNeighborSearch);
-        bool sendCoordinatesFromGpu = simulationWork.useGpuPmePPCommunication && !(stepWork.doNeighborSearch);
+        bool reinitGpuPmePpComms    = simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
+        bool sendCoordinatesFromGpu = simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch);
          gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()),
                                   lambda[efptCOUL], lambda[efptVDW],
                                   (stepWork.computeVirial || stepWork.computeEnergy),
-                                 step, simulationWork.useGpuPmePPCommunication, reinitGpuPmePpComms,
+                                 step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms,
                                   sendCoordinatesFromGpu, wcycle);
      }
  #endif /* GMX_MPI */
@@ -1673,12 +1673,12 @@ void do_force(FILE                                     *fplog,
  
      // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
      // TODO refoactor this and unify with below default-path call to the same function
-    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePPCommunication)
+    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePpCommunication)
      {
          /* In case of node-splitting, the PP nodes receive the long-range
           * forces, virial and energy from the PME nodes here.
           */
-        pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePPCommunication, stepWork.useGpuPmeFReduction, wcycle);
+        pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePpCommunication, stepWork.useGpuPmeFReduction, wcycle);
      }
  
  
@@ -1791,13 +1791,13 @@ void do_force(FILE                                     *fplog,
      }
  
      // TODO refoactor this and unify with above PME-PP GPU communication path call to the same function
-    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePPCommunication)
+    if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication)
      {
          /* In case of node-splitting, the PP nodes receive the long-range
           * forces, virial and energy from the PME nodes here.
           */
          pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd,
-                               simulationWork.useGpuPmePPCommunication, false, wcycle);
+                               simulationWork.useGpuPmePpCommunication, false, wcycle);
      }
  
      if (stepWork.computeForces)
diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp

index 803dfb1103222ed3c3b3da796f41d08331b4cb76..db4548c9462653321916f9dfa886b26cd9ed7133 100644 (file)
--- a/src/gromacs/mdrun/md.cpp
+++ b/src/gromacs/mdrun/md.cpp
@@ -331,7 +331,7 @@ void gmx::LegacySimulator::do_md()
  //       2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate
  //          stream owned by the StatePropagatorDataGpu
      const auto &simulationWork     = runScheduleWork->simulationWork;
-    const bool  useGpuForPme       = simulationWork.usePmeGpu;
+    const bool  useGpuForPme       = simulationWork.useGpuPme;
      const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
      // Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
      const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 5b7788ee67e5270e677f2d3bbab2017286969fb3..bf6532b3e01d38baba2934422e9f4de34bebc198 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1593,8 +1593,7 @@ int Mdrunner::mdrunner()
          MdrunScheduleWorkload runScheduleWork;
          // Also populates the simulation constant workload description.
          runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
-                                                                  useGpuForPme,
-                                                                  (pmeRunMode == PmeRunMode::GPU),
+                                                                  pmeRunMode,
                                                                    useGpuForBonded,
                                                                    useGpuForUpdate,
                                                                    devFlags.enableGpuBufferOps,
diff --git a/src/gromacs/mdtypes/simulation_workload.h b/src/gromacs/mdtypes/simulation_workload.h

index 147199771cb7ed5d5ff396b7e90fb4c945a9d2c6..a95ae9591bffdc82052af8d65124a972b24a9959 100644 (file)
--- a/src/gromacs/mdtypes/simulation_workload.h
+++ b/src/gromacs/mdtypes/simulation_workload.h
@@ -134,12 +134,16 @@ class DomainLifetimeWorkload
  class SimulationWorkload
  {
      public:
+        //! If we have calculation of short range nonbondeds on CPU
+        bool useCpuNonbonded           = false;
          //! If we have calculation of short range nonbondeds on GPU
          bool useGpuNonbonded           = false;
          //! If we have calculation of long range PME in GPU
-        bool usePmeGpu                 = false;
+        bool useCpuPme                 = false;
+        //! If we have calculation of long range PME in GPU
+        bool useGpuPme                 = false;
          //! If PME FFT solving is done on GPU.
-        bool usePmeFftGpu              = false;
+        bool useGpuPmeFft              = false;
          //! If bonded interactions are calculated on GPU.
          bool useGpuBonded              = false;
          //! If update and constraint solving is performed on GPU.
@@ -149,7 +153,7 @@ class SimulationWorkload
          //! If domain decomposition halo exchange is performed on GPU.
          bool useGpuHaloExchange        = false;
          //! If direct PP-PME communication between GPU is used.
-        bool useGpuPmePPCommunication  = false;
+        bool useGpuPmePpCommunication  = false;
          //! If direct GPU-GPU communication is enabled.
          bool useGpuDirectCommunication = false;
  };
diff --git a/src/gromacs/taskassignment/decidesimulationworkload.cpp b/src/gromacs/taskassignment/decidesimulationworkload.cpp

index 17c3dc06af349f9a5620d797952335ba15cb712a..597cf1a81580119c6a48907574c9aa9f43661953 100644 (file)
--- a/src/gromacs/taskassignment/decidesimulationworkload.cpp
+++ b/src/gromacs/taskassignment/decidesimulationworkload.cpp
@@ -43,32 +43,33 @@
  
  #include "decidesimulationworkload.h"
  
+#include "gromacs/ewald/pme.h"
  #include "gromacs/taskassignment/taskassignment.h"
  #include "gromacs/utility/arrayref.h"
  
  namespace gmx
  {
  
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
-                                            bool useGpuForPme,
-                                            bool useGpuForPmeFft,
-                                            bool useGpuForBonded,
-                                            bool useGpuForUpdateConstraints,
-                                            bool useGpuForBufferOps,
-                                            bool useGpuHaloExchange,
-                                            bool useGpuPmePpComm)
+SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
+                                            PmeRunMode pmeRunMode,
+                                            bool       useGpuForBonded,
+                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForBufferOps,
+                                            bool       useGpuHaloExchange,
+                                            bool       useGpuPmePpComm)
  {
-    SimulationWorkload simulationWorkload {
-        useGpuForNonbonded,
-        useGpuForPme,
-        useGpuForPmeFft,
-        useGpuForBonded,
-        useGpuForUpdateConstraints,
-        useGpuForBufferOps,
-        useGpuHaloExchange,
-        useGpuPmePpComm,
-        useGpuHaloExchange || useGpuPmePpComm
-    };
+    SimulationWorkload simulationWorkload;
+    simulationWorkload.useCpuNonbonded           = !useGpuForNonbonded;
+    simulationWorkload.useGpuNonbonded           = useGpuForNonbonded;
+    simulationWorkload.useCpuPme                 = (pmeRunMode == PmeRunMode::CPU);
+    simulationWorkload.useGpuPme                 = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuPmeFft              = (pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuBonded              = useGpuForBonded;
+    simulationWorkload.useGpuUpdate              = useGpuForUpdateConstraints;
+    simulationWorkload.useGpuBufferOps           = useGpuForBufferOps;
+    simulationWorkload.useGpuHaloExchange        = useGpuHaloExchange;
+    simulationWorkload.useGpuPmePpCommunication  = useGpuPmePpComm;
+    simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm;
  
      return simulationWorkload;
  }
diff --git a/src/gromacs/taskassignment/decidesimulationworkload.h b/src/gromacs/taskassignment/decidesimulationworkload.h

index 664a307b93c632fccd0de1f0e1a5701a1be7adef..0e87da68c599e5520b71618064f8b6f55c39d029 100644 (file)
--- a/src/gromacs/taskassignment/decidesimulationworkload.h
+++ b/src/gromacs/taskassignment/decidesimulationworkload.h
@@ -46,6 +46,8 @@
  
  #include "gromacs/mdtypes/simulation_workload.h"
  
+enum class PmeRunMode;
+
  namespace gmx
  {
  
@@ -55,8 +57,7 @@ namespace gmx
   *
   * \param[in] useGpuForNonbonded If we have short-range nonbonded interactions
   *                               calculations on GPU(s).
- * \param[in] useGpuForPme       If long range PME interactions are calculated on GPU(s).
- * \param[in] useGpuForPmeFft    If FFT solving for PME is done on the GPU.
+ * \param[in] pmeRunMode         Run mode indicating what resource is PME execured on.
   * \param[in] useGpuForBonded    If bonded interactions are calculated on GPU(s).
   * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on
   *                                       GPU(s).
@@ -65,14 +66,13 @@ namespace gmx
   * \param[in] useGpuPmePpComm    If GPu direct communication is used in PME-PP communication.
   * \returns Simulation lifetime constant workload description.
   */
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
-                                            bool useGpuForPme,
-                                            bool useGpuForPmeFft,
-                                            bool useGpuForBonded,
-                                            bool useGpuForUpdateConstraints,
-                                            bool useGpuForBufferOps,
-                                            bool useGpuHaloExchange,
-                                            bool useGpuPmePpComm);
+SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
+                                            PmeRunMode pmeRunMode,
+                                            bool       useGpuForBonded,
+                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForBufferOps,
+                                            bool       useGpuHaloExchange,
+                                            bool       useGpuPmePpComm);
  
  
  }  // namespace gmx
author	Szilárd Páll <pall.szilard@gmail.com>
	Wed, 23 Oct 2019 17:17:46 +0000 (19:17 +0200)
committer	Artem Zhmurov <zhmurov@gmail.com>
	Mon, 28 Oct 2019 16:49:26 +0000 (17:49 +0100)
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdrun/md.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdtypes/simulation_workload.h		patch \| blob \| history
src/gromacs/taskassignment/decidesimulationworkload.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidesimulationworkload.h		patch \| blob \| history