Modernize wallcycle counting

[alexxy/gromacs.git] / src / gromacs / domdec / gpuhaloexchange_impl.cu
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cu b/src/gromacs/domdec/gpuhaloexchange_impl.cu

index 65af08d35d73aee7b8a594a0f832962e9bdd44e1..f32a95bae7b8d9a1e331cc4f19cf96afe317f8e9 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cu
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cu
@@ -134,8 +134,8 @@ __global__ void unpackRecvBufKernel(float3* __restrict__ data,
  
  void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_forcesBuffer)
  {
-    wallcycle_start(wcycle_, ewcDOMDEC);
-    wallcycle_sub_start(wcycle_, ewcsDD_GPU);
+    wallcycle_start(wcycle_, WallCycleCounter::Domdec);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::DDGpu);
  
      d_x_ = d_coordinatesBuffer;
      d_f_ = d_forcesBuffer;
@@ -249,8 +249,8 @@ void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_fo
      }
  #endif
  
-    wallcycle_sub_stop(wcycle_, ewcsDD_GPU);
-    wallcycle_stop(wcycle_, ewcDOMDEC);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::DDGpu);
+    wallcycle_stop(wcycle_, WallCycleCounter::Domdec);
  
      return;
  }
@@ -283,14 +283,14 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
                                                         GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
  {
  
-    wallcycle_start(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_start(wcycle_, WallCycleCounter::LaunchGpu);
      if (pulse_ == 0)
      {
          // ensure stream waits until coordinate data is available on device
          coordinatesReadyOnDeviceEvent->enqueueWaitEvent(nonLocalStream_);
      }
  
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_MOVEX);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuMoveX);
  
      // launch kernel to pack send buffer
      KernelLaunchConfig config;
@@ -328,12 +328,12 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
                  kernelFn, config, nonLocalStream_, nullptr, "Domdec GPU Apply X Halo Exchange", kernelArgs);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_MOVEX);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuMoveX);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  
      // Consider time spent in communicateHaloData as Comm.X counter
      // ToDo: We need further refinement here as communicateHaloData includes launch time for cudamemcpyasync
-    wallcycle_start(wcycle_, ewcMOVEX);
+    wallcycle_start(wcycle_, WallCycleCounter::MoveX);
  
      // wait for remote co-ordinates is implicit with process-MPI as non-local stream is synchronized before MPI calls
      // and MPI_Waitall call makes sure both neighboring ranks' non-local stream is synchronized before data transfer is initiated
@@ -345,7 +345,7 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
      float3* recvPtr = GMX_THREAD_MPI ? remoteXPtr_ : &d_x_[atomOffset_];
      communicateHaloData(d_sendBuf_, xSendSize_, sendRankX_, recvPtr, xRecvSize_, recvRankX_);
  
-    wallcycle_stop(wcycle_, ewcMOVEX);
+    wallcycle_stop(wcycle_, WallCycleCounter::MoveX);
  
      return;
  }
@@ -356,17 +356,17 @@ void GpuHaloExchange::Impl::communicateHaloForces(bool accumulateForces)
  {
      // Consider time spent in communicateHaloData as Comm.F counter
      // ToDo: We need further refinement here as communicateHaloData includes launch time for cudamemcpyasync
-    wallcycle_start(wcycle_, ewcMOVEF);
+    wallcycle_start(wcycle_, WallCycleCounter::MoveF);
  
      float3* recvPtr = GMX_THREAD_MPI ? remoteFPtr_ : d_recvBuf_;
  
      // Communicate halo data (in non-local stream)
      communicateHaloData(&(d_f_[atomOffset_]), fSendSize_, sendRankF_, recvPtr, fRecvSize_, recvRankF_);
  
-    wallcycle_stop(wcycle_, ewcMOVEF);
+    wallcycle_stop(wcycle_, WallCycleCounter::MoveF);
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_MOVEF);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuMoveF);
  
      float3* d_f = d_f_;
      // If this is the last pulse and index (noting the force halo
@@ -422,8 +422,8 @@ void GpuHaloExchange::Impl::communicateHaloForces(bool accumulateForces)
          fReadyOnDevice_.markEvent(nonLocalStream_);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_MOVEF);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuMoveF);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void GpuHaloExchange::Impl::communicateHaloData(float3* sendPtr,