Fix GPU atom data init timer issue
authorBerk Hess <hess@kth.se>
Wed, 2 Oct 2019 09:28:06 +0000 (11:28 +0200)
committerSzilárd Páll <pall.szilard@gmail.com>
Thu, 3 Oct 2019 14:36:18 +0000 (16:36 +0200)
The GPU atom data init timer was read conditionally on the timing
of the local pairlist transfer. But the local pairlist transfer
is not timed with an empty list, leading to an inconsistent timer
state.

Change-Id: Ifc2a63c7273ae65ae66708c6a8b0fb526041ee38

src/gromacs/nbnxm/gpu_common.h

index 5e670d4e1673872c4ef1cdaa69ae42f84b864e36..f39e40ea8a2dd251cbf501f22d1a6034363f6b0b 100644 (file)
@@ -302,7 +302,7 @@ gpu_reduce_staged_outputs(const StagingData         &nbst,
  * \param[in]  timers            Pointer to GPU timers data
  * \param[in]  plist             Pointer to the pair list data
  * \param[in]  atomLocality      Atom locality specifier
- * \param[in]  didEnergyKernels  True if energy kernels have been called in the current step
+ * \param[in]  stepWork          Force schedule flags
  * \param[in]  doTiming          True if timing is enabled.
  *
  */
@@ -312,7 +312,7 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings,
                        GpuTimers                 *timers,
                        const GpuPairlist         *plist,
                        AtomLocality               atomLocality,
-                       bool                       didEnergyKernels,
+                       const gmx::StepWorkload   &stepWork,
                        bool                       doTiming)
 {
     /* timing data accumulation */
@@ -322,7 +322,8 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings,
     }
 
     /* determine interaction locality from atom locality */
-    const InteractionLocality iLocality = gpuAtomToInteractionLocality(atomLocality);
+    const InteractionLocality iLocality        = gpuAtomToInteractionLocality(atomLocality);
+    const bool                didEnergyKernels = stepWork.computeEnergy;
 
     /* only increase counter once (at local F wait) */
     if (iLocality == InteractionLocality::Local)
@@ -347,16 +348,17 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings,
        for the force D2H). */
     countPruneKernelTime(timers, timings, iLocality);
 
-    /* only count atdat and pair-list H2D at pair-search step */
-    if (timers->interaction[iLocality].didPairlistH2D)
+    /* only count atdat at pair-search steps (add only once, at local F wait) */
+    if (stepWork.doNeighborSearch && atomLocality == AtomLocality::Local)
     {
-        /* atdat transfer timing (add only once, at local F wait) */
-        if (atomLocality == AtomLocality::Local)
-        {
-            timings->pl_h2d_c++;
-            timings->pl_h2d_t += timers->atdat.getLastRangeTime();
-        }
+        /* atdat transfer timing */
+        timings->pl_h2d_c++;
+        timings->pl_h2d_t += timers->atdat.getLastRangeTime();
+    }
 
+    /* only count pair-list H2D when actually performed */
+    if (timers->interaction[iLocality].didPairlistH2D)
+    {
         timings->pl_h2d_t += timers->interaction[iLocality].pl_h2d.getLastRangeTime();
 
         /* Clear the timing flag for the next step */
@@ -410,7 +412,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t          *nb,
             gpuStreamSynchronize(nb->stream[iLocality]);
         }
 
-        gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, stepWork.computeEnergy,
+        gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, stepWork,
                                nb->bDoTime != 0);
 
         gpu_reduce_staged_outputs(nb->nbst, iLocality, stepWork.computeEnergy, stepWork.computeVirial,