Add haveCpuLocalForces flag to DomainLifetimeWorkload
authorArtem Zhmurov <zhmurov@gmail.com>
Tue, 22 Oct 2019 18:30:22 +0000 (20:30 +0200)
committerSzilárd Páll <pall.szilard@gmail.com>
Mon, 28 Oct 2019 16:49:38 +0000 (17:49 +0100)
The flag haveCpuLocalForces is moved to DomainLifetimeWorkload.
The name and meaning is also slightly changed, haveCpuLocalForceWork
now now indicates whether any forces in the local domain are computed
on the CPU.
Consequently, the currently redundant PME f reduction conditional has
been removed from the haveCpuLocalForceWork initialization. The
relationship between local force work and existence of local forces on
the CPU however needs to be clarified in relationship to #3160 and how
the PP-PME direct communication is improved/simplified.

Change-Id: Idaed4da41bf5b72435c47bab8aeb7130b36b03c7

src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdtypes/simulation_workload.h

index 249af42f5feaa601c8bb2a69bedc79f50a7c293a..ba80dc084c152d018694d9b55fb096bca18b6fea 100644 (file)
@@ -777,14 +777,15 @@ setupForceOutputs(t_forcerec                          *fr,
 /*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute.
  */
 static DomainLifetimeWorkload
-setupDomainLifetimeWorkload(const t_inputrec       &inputrec,
-                            const t_forcerec       &fr,
-                            const pull_t           *pull_work,
-                            const gmx_edsam        *ed,
-                            const t_idef           &idef,
-                            const t_fcdata         &fcd,
-                            const t_mdatoms        &mdatoms,
-                            const StepWorkload     &stepWork)
+setupDomainLifetimeWorkload(const t_inputrec         &inputrec,
+                            const t_forcerec         &fr,
+                            const pull_t             *pull_work,
+                            const gmx_edsam          *ed,
+                            const t_idef             &idef,
+                            const t_fcdata           &fcd,
+                            const t_mdatoms          &mdatoms,
+                            const SimulationWorkload &simulationWork,
+                            const StepWorkload       &stepWork)
 {
     DomainLifetimeWorkload domainWork;
     // Note that haveSpecialForces is constant over the whole run
@@ -795,6 +796,10 @@ setupDomainLifetimeWorkload(const t_inputrec       &inputrec,
     domainWork.haveCpuListedForceWork = haveCpuListedForces(fr, idef, fcd);
     // Note that haveFreeEnergyWork is constant over the whole run
     domainWork.haveFreeEnergyWork     = (fr.efep != efepNO && mdatoms.nPerturbed != 0);
+    // We assume we have local force work if there are CPU
+    // force tasks including PME or nonbondeds.
+    domainWork.haveCpuLocalForceWork  = domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || domainWork.haveFreeEnergyWork ||
+        simulationWork.useCpuNonbonded || simulationWork.useCpuPme;
     return domainWork;
 }
 
@@ -1140,6 +1145,7 @@ void do_force(FILE                                     *fplog,
                                         top->idef,
                                         *fcd,
                                         *mdatoms,
+                                        simulationWork,
                                         stepWork);
 
         wallcycle_start_nocount(wcycle, ewcNS);
@@ -1573,12 +1579,6 @@ void do_force(FILE                                     *fplog,
         }
     }
 
-    // TODO move this into StepWorkload
-    const bool useCpuPmeFReduction      = thisRankHasDuty(cr, DUTY_PME) && !stepWork.useGpuPmeFReduction;
-    // TODO: move this into DomainLifetimeWorkload, including the second part of the condition
-    const bool haveCpuLocalForces     = (domainWork.haveSpecialForces || domainWork.haveCpuListedForceWork || useCpuPmeFReduction ||
-                                         (fr->efep != efepNO));
-
     if (havePPDomainDecomposition(cr))
     {
         /* We are done with the CPU compute.
@@ -1593,11 +1593,11 @@ void do_force(FILE                                     *fplog,
 
             if (useGpuForcesHaloExchange)
             {
-                if (haveCpuLocalForces)
+                if (domainWork.haveCpuLocalForceWork)
                 {
                     stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local);
                 }
-                gpuHaloExchange->communicateHaloForces(haveCpuLocalForces);
+                gpuHaloExchange->communicateHaloForces(domainWork.haveCpuLocalForceWork);
             }
             else
             {
@@ -1715,7 +1715,7 @@ void do_force(FILE                                     *fplog,
             // local atoms. This depends on whether there are CPU-based force tasks
             // or when DD is active the halo exchange has resulted in contributions
             // from the non-local part.
-            const bool haveLocalForceContribInCpuBuffer = (haveCpuLocalForces || havePPDomainDecomposition(cr));
+            const bool haveLocalForceContribInCpuBuffer = (domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr));
 
             // TODO: move these steps as early as possible:
             // - CPU f H2D should be as soon as all CPU-side forces are done
index a95ae9591bffdc82052af8d65124a972b24a9959..6594081c509036399646ee972944d7f6278be54d 100644 (file)
@@ -116,6 +116,8 @@ class DomainLifetimeWorkload
         bool haveCpuListedForceWork = false;
         //! Whether the current nstlist step-range has special forces on the CPU.
         bool haveSpecialForces = false;
+        //! Whether there are currently any local forces to be computed on the CPU
+        bool haveCpuLocalForceWork = false;
 
         // TODO
         //! Whether the current nstlist step-range Free energy work on the CPU.