Split simulationWork.useGpuBufferOps into separate x and f flags
[alexxy/gromacs.git] / src / gromacs / taskassignment / decidesimulationworkload.cpp
index be1b5d6002d2604f9bfb5f3878199a93efeb43d6..a6a98046241c7c93c8d5c69c6aa079ba1b0741bd 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include "decidesimulationworkload.h"
 
+#include "gromacs/ewald/pme.h"
+#include "gromacs/mdtypes/multipletimestepping.h"
+#include "gromacs/taskassignment/decidegpuusage.h"
 #include "gromacs/taskassignment/taskassignment.h"
 #include "gromacs/utility/arrayref.h"
 
 namespace gmx
 {
 
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
-                                            bool useGpuForPme,
-                                            bool useGpuForPmeFft,
-                                            bool useGpuForBonded,
-                                            bool useGpuForUpdateConstraints)
+SimulationWorkload createSimulationWorkload(const t_inputrec& inputrec,
+                                            const bool        disableNonbondedCalculation,
+                                            const DevelopmentFeatureFlags& devFlags,
+                                            bool       havePpDomainDecomposition,
+                                            bool       haveSeparatePmeRank,
+                                            bool       useGpuForNonbonded,
+                                            PmeRunMode pmeRunMode,
+                                            bool       useGpuForBonded,
+                                            bool       useGpuForUpdate,
+                                            bool       useGpuDirectHalo)
 {
-    SimulationWorkload simulationWorkload {
-        useGpuForNonbonded,
-        useGpuForPme,
-        useGpuForPmeFft,
-        useGpuForBonded,
-        useGpuForUpdateConstraints,
-        (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr),
-        (getenv("GMX_GPU_DD_COMMS") != nullptr),
-        (getenv("GMX_GPU_PME_PP_COMMS") != nullptr),
-        (getenv("GMX_GPU_DD_COMMS") != nullptr) || (getenv("GMX_GPU_PME_PP_COMMS") != nullptr)
-    };
+    SimulationWorkload simulationWorkload;
+    simulationWorkload.computeNonbonded = !disableNonbondedCalculation;
+    simulationWorkload.computeNonbondedAtMtsLevel1 =
+            simulationWorkload.computeNonbonded && inputrec.useMts
+            && inputrec.mtsLevels.back().forceGroups[static_cast<int>(MtsForceGroups::Nonbonded)];
+    simulationWorkload.computeMuTot    = inputrecNeedMutot(&inputrec);
+    simulationWorkload.useCpuNonbonded = !useGpuForNonbonded;
+    simulationWorkload.useGpuNonbonded = useGpuForNonbonded;
+    simulationWorkload.useCpuPme       = (pmeRunMode == PmeRunMode::CPU);
+    simulationWorkload.useGpuPme = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuPmeFft = (pmeRunMode == PmeRunMode::Mixed);
+    simulationWorkload.useGpuBonded = useGpuForBonded;
+    simulationWorkload.useGpuUpdate = useGpuForUpdate;
+    simulationWorkload.useGpuXBufferOps =
+            (devFlags.enableGpuBufferOps || useGpuForUpdate) && !inputrec.useMts;
+    simulationWorkload.useGpuFBufferOps =
+            (devFlags.enableGpuBufferOps || useGpuForUpdate) && !inputrec.useMts;
+    if (simulationWorkload.useGpuXBufferOps || simulationWorkload.useGpuFBufferOps)
+    {
+        GMX_ASSERT(simulationWorkload.useGpuNonbonded,
+                   "Can only offload X/F buffer ops if nonbonded computation is also offloaded");
+    }
+    simulationWorkload.havePpDomainDecomposition = havePpDomainDecomposition;
+    simulationWorkload.useCpuHaloExchange        = havePpDomainDecomposition && !useGpuDirectHalo;
+    simulationWorkload.useGpuHaloExchange        = useGpuDirectHalo;
+    if (pmeRunMode == PmeRunMode::None)
+    {
+        GMX_RELEASE_ASSERT(!haveSeparatePmeRank, "Can not have separate PME rank(s) without PME.");
+    }
+    simulationWorkload.haveSeparatePmeRank = haveSeparatePmeRank;
+    simulationWorkload.useGpuPmePpCommunication =
+            haveSeparatePmeRank && devFlags.enableGpuPmePPComm && (pmeRunMode == PmeRunMode::GPU);
+    simulationWorkload.useCpuPmePpCommunication =
+            haveSeparatePmeRank && !simulationWorkload.useGpuPmePpCommunication;
+    GMX_RELEASE_ASSERT(!(simulationWorkload.useGpuPmePpCommunication
+                         && simulationWorkload.useCpuPmePpCommunication),
+                       "Cannot do PME-PP communication on both CPU and GPU");
+    simulationWorkload.useGpuDirectCommunication =
+            devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm;
+    simulationWorkload.haveEwaldSurfaceContribution = haveEwaldSurfaceContribution(inputrec);
+    simulationWorkload.useMts                       = inputrec.useMts;
 
     return simulationWorkload;
 }
 
-}  // namespace gmx
+} // namespace gmx