Split simulationWork.useGpuBufferOps into separate x and f flags

[alexxy/gromacs.git] / src / gromacs / mdtypes / simulation_workload.h
diff --git a/src/gromacs/mdtypes/simulation_workload.h b/src/gromacs/mdtypes/simulation_workload.h

index 9bf230a5938222e00dff0449441eb7351a502b9d..d871be32de47fcc88bad175c640312c393cadffd 100644 (file)
--- a/src/gromacs/mdtypes/simulation_workload.h
+++ b/src/gromacs/mdtypes/simulation_workload.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -47,18 +47,19 @@ namespace gmx
  {
  
  /*! \libinternal
- * \brief Describes work done on this domain that may change per-step.
+ * \brief Describes work done on this domain by the current rank that may change per-step.
   *
   * This work description is based on the SimulationWorkload in the context of the
   * current particle interactions assigned to this domain as well as other
   * factors that may change during the lifetime of a domain.
   *
+ * Note that unlike the other workload descriptors, these flags are also used on
+ * dedicated PME ranks, hence the content is rank-specific (at least when it
+ * comes to flags related to PME).
+ *
   * Note that the contents of an object of this type is valid for
   * a single step and it is expected to be set at the beginning each step.
   *
- * The initial set of flags map the legacy force flags to boolean flags;
- * these have the role of directing per-step compute tasks undertaken by a PP rank.
- *
   */
  class StepWorkload
  {
@@ -69,12 +70,16 @@ public:
      bool haveDynamicBox = false;
      //! Whether neighbor searching needs to be done this step
      bool doNeighborSearch = false;
+    //! Whether the slow forces need to be computed this step (in addition to the faster forces)
+    bool computeSlowForces = false;
      //! Whether virial needs to be computed this step
      bool computeVirial = false;
      //! Whether energies need to be computed this step this step
      bool computeEnergy = false;
      //! Whether (any) forces need to be computed this step, not only energies
      bool computeForces = false;
+    //! Whether only the MTS combined force buffers are needed and not the separate normal force buffer.
+    bool useOnlyMtsCombinedForceBuffer = false;
      //! Whether nonbonded forces need to be computed this step
      bool computeNonbondedForces = false;
      //! Whether listed forces need to be computed this step
@@ -91,6 +96,14 @@ public:
      bool useGpuFBufferOps = false;
      //! Whether PME forces are reduced with other contributions on the GPU this step
      bool useGpuPmeFReduction = false; // TODO: add this flag to the internal PME GPU data structures too
+    //! Whether GPU coordinates halo exchange is active this step
+    bool useGpuXHalo = false;
+    //! Whether GPU forces halo exchange is active this step
+    bool useGpuFHalo = false;
+    //! Whether GPU PME work is computed on the current rank this step (can be false on PP-only ranks or on fast steps with MTS)
+    bool haveGpuPmeOnThisRank = false;
+    //! Whether to combine the forces for multiple time stepping before the halo exchange
+    bool combineMtsForcesBeforeHaloExchange = false;
  };
  
  /*! \libinternal
@@ -111,12 +124,9 @@ class DomainLifetimeWorkload
  public:
      //! Whether the current nstlist step-range has bonded work to run on a GPU.
      bool haveGpuBondedWork = false;
-    //! Whether the current nstlist step-range has bonded work to run on he CPU.
+    //! Whether the current nstlist step-range has bonded work to run on the CPU.
      bool haveCpuBondedWork = false;
-    //! Whether the current nstlist step-range has restraints work to run on he CPU.
-    bool haveRestraintsWork = false;
-    //! Whether the current nstlist step-range has listed forces work to run on he CPU.
-    //  Note: currently this is haveCpuBondedWork | haveRestraintsWork
+    //! Whether the current nstlist step-range has listed (bonded + restraints) forces work to run on the CPU.
      bool haveCpuListedForceWork = false;
      //! Whether the current nstlist step-range has special forces on the CPU.
      bool haveSpecialForces = false;
@@ -125,6 +135,13 @@ public:
  
      //! Whether the current nstlist step-range Free energy work on the CPU.
      bool haveFreeEnergyWork = false;
+    //! Whether the CPU force buffer has contributions to local atoms that need to be reduced on the GPU (with DD).
+    // This depends on whether there are CPU-based force tasks
+    // or when DD is active the halo exchange has resulted in contributions
+    // from the non-local part.
+    bool haveLocalForceContribInCpuBuffer = false;
+    //! Whether the CPU force buffer has contributions to nonlocal atoms that need to be reduced on the GPU (with DD).
+    bool haveNonLocalForceContribInCpuBuffer = false;
  };
  
  /*! \libinternal
@@ -142,6 +159,12 @@ public:
  class SimulationWorkload
  {
  public:
+    //! Whether to compute nonbonded pair interactions
+    bool computeNonbonded = false;
+    //! Whether nonbonded pair forces are to be computed at slow MTS steps only
+    bool computeNonbondedAtMtsLevel1 = false;
+    //! Whether total dipole needs to be computed
+    bool computeMuTot = false;
      //! If we have calculation of short range nonbondeds on CPU
      bool useCpuNonbonded = false;
      //! If we have calculation of short range nonbondeds on GPU
@@ -156,16 +179,28 @@ public:
      bool useGpuBonded = false;
      //! If update and constraint solving is performed on GPU.
      bool useGpuUpdate = false;
-    //! If buffer operations are performed on GPU.
-    bool useGpuBufferOps = false;
+    //! If X buffer operations are performed on GPU.
+    bool useGpuXBufferOps = false;
+    //! If F buffer operations are performed on GPU.
+    bool useGpuFBufferOps = false;
+    //! If PP domain decomposition is active.
+    bool havePpDomainDecomposition = false;
+    //! If domain decomposition halo exchange is performed on CPU (in CPU-only runs or with staged GPU communication).
+    bool useCpuHaloExchange = false;
      //! If domain decomposition halo exchange is performed on GPU.
      bool useGpuHaloExchange = false;
+    //! If separate PME rank(s) are used.
+    bool haveSeparatePmeRank = false;
+    //! If PP-PME communication is done purely on CPU (in CPU-only runs or with staged GPU communication).
+    bool useCpuPmePpCommunication = false;
      //! If direct PP-PME communication between GPU is used.
      bool useGpuPmePpCommunication = false;
      //! If direct GPU-GPU communication is enabled.
      bool useGpuDirectCommunication = false;
      //! If there is an Ewald surface (dipole) term to compute
      bool haveEwaldSurfaceContribution = false;
+    //! Whether to use multiple time stepping
+    bool useMts = false;
  };
  
  class MdrunScheduleWorkload