Add missing cycle counting for GPU halo exchange calls
authorSzilárd Páll <pall.szilard@gmail.com>
Tue, 20 Oct 2020 22:41:59 +0000 (00:41 +0200)
committerPaul Bauer <paul.bauer.q@gmail.com>
Wed, 21 Oct 2020 15:17:21 +0000 (15:17 +0000)
Calls lesft un-timed that were leaking into the "Rest" timer are now
accounted for.

src/gromacs/domdec/gpuhaloexchange_impl.cu
src/gromacs/timing/wallcycle.cpp
src/gromacs/timing/wallcycle.h

index d519445f74d48d0dd2fbea7b1f623e3b7c8c5371..9efebf69046f39b924781df34245549857c83cd1 100644 (file)
@@ -132,6 +132,8 @@ __global__ void unpackRecvBufKernel(float3* __restrict__ data,
 
 void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_forcesBuffer)
 {
+    wallcycle_start(wcycle_, ewcDOMDEC);
+    wallcycle_sub_start(wcycle_, ewcsDD_GPU);
 
     d_x_ = d_coordinatesBuffer;
     d_f_ = d_forcesBuffer;
@@ -209,6 +211,9 @@ void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_fo
                  MPI_BYTE, sendRankF_, 0, mpi_comm_mysim_, MPI_STATUS_IGNORE);
 #endif
 
+    wallcycle_sub_stop(wcycle_, ewcsDD_GPU);
+    wallcycle_stop(wcycle_, ewcDOMDEC);
+
     return;
 }
 
@@ -216,13 +221,13 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
                                                        GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
 {
 
+    wallcycle_start(wcycle_, ewcLAUNCH_GPU);
     if (pulse_ == 0)
     {
         // ensure stream waits until coordinate data is available on device
         coordinatesReadyOnDeviceEvent->enqueueWaitEvent(nonLocalStream_);
     }
 
-    wallcycle_start(wcycle_, ewcLAUNCH_GPU);
     wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_MOVEX);
 
     // launch kernel to pack send buffer
index 45714a1da2cca611d7ea0709687ed15dae5aff0b..e69df666c1d9f40c340856b8774fbd5b07c3a852 100644 (file)
@@ -155,6 +155,7 @@ static const char* wcsn[ewcsNR] = {
     "DD make top.",
     "DD make constr.",
     "DD top. other",
+    "DD GPU ops.",
     "NS grid local",
     "NS grid non-loc.",
     "NS search local",
index 83be456671553574acb6d265310769794f342278..0bc53f0fe69f9db152776eaf2230b9c4b36001ea 100644 (file)
@@ -107,6 +107,7 @@ enum
     ewcsDD_MAKETOP,
     ewcsDD_MAKECONSTR,
     ewcsDD_TOPOTHER,
+    ewcsDD_GPU,
     ewcsNBS_GRID_LOCAL,
     ewcsNBS_GRID_NONLOCAL,
     ewcsNBS_SEARCH_LOCAL,