Split simulationWork.useGpuBufferOps into separate x and f flags
[alexxy/gromacs.git] / src / gromacs / taskassignment / decidesimulationworkload.cpp
index b9ba44e94caeacc89e45f68272b4fc44002e2026..a6a98046241c7c93c8d5c69c6aa079ba1b0741bd 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -44,6 +44,8 @@
 #include "decidesimulationworkload.h"
 
 #include "gromacs/ewald/pme.h"
+#include "gromacs/mdtypes/multipletimestepping.h"
+#include "gromacs/taskassignment/decidegpuusage.h"
 #include "gromacs/taskassignment/taskassignment.h"
 #include "gromacs/utility/arrayref.h"
 
@@ -51,28 +53,57 @@ namespace gmx
 {
 
 SimulationWorkload createSimulationWorkload(const t_inputrec& inputrec,
-                                            bool              useGpuForNonbonded,
-                                            PmeRunMode        pmeRunMode,
-                                            bool              useGpuForBonded,
-                                            bool              useGpuForUpdate,
-                                            bool              useGpuForBufferOps,
-                                            bool              useGpuHaloExchange,
-                                            bool              useGpuPmePpComm)
+                                            const bool        disableNonbondedCalculation,
+                                            const DevelopmentFeatureFlags& devFlags,
+                                            bool       havePpDomainDecomposition,
+                                            bool       haveSeparatePmeRank,
+                                            bool       useGpuForNonbonded,
+                                            PmeRunMode pmeRunMode,
+                                            bool       useGpuForBonded,
+                                            bool       useGpuForUpdate,
+                                            bool       useGpuDirectHalo)
 {
     SimulationWorkload simulationWorkload;
+    simulationWorkload.computeNonbonded = !disableNonbondedCalculation;
+    simulationWorkload.computeNonbondedAtMtsLevel1 =
+            simulationWorkload.computeNonbonded && inputrec.useMts
+            && inputrec.mtsLevels.back().forceGroups[static_cast<int>(MtsForceGroups::Nonbonded)];
     simulationWorkload.computeMuTot    = inputrecNeedMutot(&inputrec);
     simulationWorkload.useCpuNonbonded = !useGpuForNonbonded;
     simulationWorkload.useGpuNonbonded = useGpuForNonbonded;
     simulationWorkload.useCpuPme       = (pmeRunMode == PmeRunMode::CPU);
     simulationWorkload.useGpuPme = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
-    simulationWorkload.useGpuPmeFft             = (pmeRunMode == PmeRunMode::Mixed);
-    simulationWorkload.useGpuBonded             = useGpuForBonded;
-    simulationWorkload.useGpuUpdate             = useGpuForUpdate;
-    simulationWorkload.useGpuBufferOps          = useGpuForBufferOps || useGpuForUpdate;
-    simulationWorkload.useGpuHaloExchange       = useGpuHaloExchange;
-    simulationWorkload.useGpuPmePpCommunication = useGpuPmePpComm && (pmeRunMode == PmeRunMode::GPU);
-    simulationWorkload.useGpuDirectCommunication    = useGpuHaloExchange || useGpuPmePpComm;
+    simulationWorkload.useGpuPmeFft = (pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuBonded = useGpuForBonded;
+    simulationWorkload.useGpuUpdate = useGpuForUpdate;
+    simulationWorkload.useGpuXBufferOps =
+            (devFlags.enableGpuBufferOps || useGpuForUpdate) && !inputrec.useMts;
+    simulationWorkload.useGpuFBufferOps =
+            (devFlags.enableGpuBufferOps || useGpuForUpdate) && !inputrec.useMts;
+    if (simulationWorkload.useGpuXBufferOps || simulationWorkload.useGpuFBufferOps)
+    {
+        GMX_ASSERT(simulationWorkload.useGpuNonbonded,
+                   "Can only offload X/F buffer ops if nonbonded computation is also offloaded");
+    }
+    simulationWorkload.havePpDomainDecomposition = havePpDomainDecomposition;
+    simulationWorkload.useCpuHaloExchange        = havePpDomainDecomposition && !useGpuDirectHalo;
+    simulationWorkload.useGpuHaloExchange        = useGpuDirectHalo;
+    if (pmeRunMode == PmeRunMode::None)
+    {
+        GMX_RELEASE_ASSERT(!haveSeparatePmeRank, "Can not have separate PME rank(s) without PME.");
+    }
+    simulationWorkload.haveSeparatePmeRank = haveSeparatePmeRank;
+    simulationWorkload.useGpuPmePpCommunication =
+            haveSeparatePmeRank && devFlags.enableGpuPmePPComm && (pmeRunMode == PmeRunMode::GPU);
+    simulationWorkload.useCpuPmePpCommunication =
+            haveSeparatePmeRank && !simulationWorkload.useGpuPmePpCommunication;
+    GMX_RELEASE_ASSERT(!(simulationWorkload.useGpuPmePpCommunication
+                         && simulationWorkload.useCpuPmePpCommunication),
+                       "Cannot do PME-PP communication on both CPU and GPU");
+    simulationWorkload.useGpuDirectCommunication =
+            devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm;
     simulationWorkload.haveEwaldSurfaceContribution = haveEwaldSurfaceContribution(inputrec);
+    simulationWorkload.useMts                       = inputrec.useMts;
 
     return simulationWorkload;
 }