Add Bonded GPU launch overhead cycle subcounter
authorSzilárd Páll <pall.szilard@gmail.com>
Mon, 15 Oct 2018 18:35:17 +0000 (20:35 +0200)
committerBerk Hess <hess@kth.se>
Thu, 6 Dec 2018 10:06:24 +0000 (11:06 +0100)
ALso fixed missing counters around the transfer/clearing launch.

Change-Id: Ib3f18b8285b979b818ab79713253bc7f7bb89e2a

src/gromacs/mdlib/sim_util.cpp
src/gromacs/timing/wallcycle.cpp
src/gromacs/timing/wallcycle.h

index 763ca3ec3f74c5ca6271291f645a97382ab97dbe..e5b269f6c8cbd4a400102c52698eda2634610095 100644 (file)
@@ -1284,7 +1284,9 @@ static void do_force_cutsVERLET(FILE *fplog,
         // we can only launch the kernel after non-local coordinates have been received.
         if (ppForceWorkload->haveGpuBondedWork && !DOMAINDECOMP(cr))
         {
+            wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
             fr->gpuBonded->launchKernels(fr, flags, box);
+            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
         }
 
         /* launch local nonbonded work on GPU */
@@ -1356,7 +1358,9 @@ static void do_force_cutsVERLET(FILE *fplog,
 
             if (ppForceWorkload->haveGpuBondedWork)
             {
+                wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
                 fr->gpuBonded->launchKernels(fr, flags, box);
+                wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
             }
 
             wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
@@ -1705,13 +1709,15 @@ static void do_force_cutsVERLET(FILE *fplog,
 
     if (ppForceWorkload->haveGpuBondedWork && (flags & GMX_FORCE_ENERGY))
     {
-        // TODO The launch call could come earlier in the
-        // force-calculation sequence.
+        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_BONDED);
         fr->gpuBonded->launchEnergyTransfer();
         fr->gpuBonded->accumulateEnergyTerms(enerd);
         // TODO The clearing call could come later in the
         // force-calculation sequence.
         fr->gpuBonded->clearEnergies();
+        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
+        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
     }
 
     if (DOMAINDECOMP(cr))
index e521c47010a6e27a5a0d15eb4a35e7a18f994596..99da970cdaeb8c43a61b1831c7ce0502004e0aca 100644 (file)
@@ -128,6 +128,7 @@ static const char *wcsn[ewcsNR] =
     "Nonbonded pruning",
     "Nonbonded F",
     "Launch NB GPU tasks",
+    "Launch Bonded GPU tasks",
     "Launch PME GPU tasks",
     "Ewald F correction",
     "NB X buffer ops.",
index 42897a001ec7d1b706e379ea601ae701713212e1..123feda178a9a00da382ad58ccd5fe00d9eb6366 100644 (file)
@@ -74,6 +74,7 @@ enum {
     ewcsNONBONDED_PRUNING,
     ewcsNONBONDED,
     ewcsLAUNCH_GPU_NONBONDED,
+    ewcsLAUNCH_GPU_BONDED,
     ewcsLAUNCH_GPU_PME,
     ewcsEWALD_CORRECTION,
     ewcsNB_X_BUF_OPS,