Modernize wallcycle counting

author Paul Bauer <paul.bauer.q@gmail.com>

Wed, 14 Apr 2021 13:52:51 +0000 (13:52 +0000)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 14 Apr 2021 13:52:51 +0000 (13:52 +0000)
author Paul Bauer <paul.bauer.q@gmail.com>
Wed, 14 Apr 2021 13:52:51 +0000 (13:52 +0000)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 14 Apr 2021 13:52:51 +0000 (13:52 +0000)
diff --git a/api/nblib/gmxsetup.cpp b/api/nblib/gmxsetup.cpp

index fd3cfd2a878eda8f0bf826062f882b9ddb83ba92..007e0cf3dbefdd72707b3e4b7efbc2cf95012cef 100644 (file)
--- a/api/nblib/gmxsetup.cpp
+++ b/api/nblib/gmxsetup.cpp
@@ -206,7 +206,7 @@ void NbvSetupUtil::setupNbnxmInstance(const size_t numParticleTypes, const NBKer
  
      // Put everything together
      auto nbv = std::make_unique<nonbonded_verlet_t>(
-            std::move(pairlistSets), std::move(pairSearch), std::move(atomData), kernelSetup, nullptr, nullWallcycle);
+            std::move(pairlistSets), std::move(pairSearch), std::move(atomData), kernelSetup, nullptr, nullptr);
  
      gmxForceCalculator_->nbv_ = std::move(nbv);
  }
diff --git a/src/gromacs/applied_forces/awh/awh.cpp b/src/gromacs/applied_forces/awh/awh.cpp

index 51948de02b57aea04faa77a4ec1003afa68466b3..6f2a8db5ea06a5e9da7b55bed29bd175d3d5e491 100644 (file)
--- a/src/gromacs/applied_forces/awh/awh.cpp
+++ b/src/gromacs/applied_forces/awh/awh.cpp
@@ -304,7 +304,7 @@ real Awh::applyBiasForcesAndUpdateBias(PbcType                pbcType,
          GMX_ASSERT(forceWithVirial, "Need a valid ForceWithVirial object");
      }
  
-    wallcycle_start(wallcycle, ewcAWH);
+    wallcycle_start(wallcycle, WallCycleCounter::Awh);
  
      t_pbc pbc;
      set_pbc(&pbc, pbcType, box);
@@ -394,7 +394,7 @@ real Awh::applyBiasForcesAndUpdateBias(PbcType                pbcType,
          }
      }
  
-    wallcycle_stop(wallcycle, ewcAWH);
+    wallcycle_stop(wallcycle, WallCycleCounter::Awh);
  
      return MASTER(commRecord_) ? static_cast<real>(awhPotential) : 0;
  }
diff --git a/src/gromacs/domdec/cellsizes.cpp b/src/gromacs/domdec/cellsizes.cpp

index 37813d8aba745d42d55f92da42c0c8e5f20d98a6..ed1e6dbbcd389ed62f103df083cfdef73b86194e 100644 (file)
--- a/src/gromacs/domdec/cellsizes.cpp
+++ b/src/gromacs/domdec/cellsizes.cpp
@@ -848,15 +848,15 @@ static void set_dd_cell_sizes_dlb(gmx_domdec_t*      dd,
                                    gmx_bool           bUniform,
                                    gmx_bool           bDoDLB,
                                    int64_t            step,
-                                  gmx_wallcycle_t    wcycle)
+                                  gmx_wallcycle*     wcycle)
  {
      gmx_domdec_comm_t* comm = dd->comm;
  
      if (bDoDLB)
      {
-        wallcycle_start(wcycle, ewcDDCOMMBOUND);
+        wallcycle_start(wcycle, WallCycleCounter::DDCommBound);
          set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
-        wallcycle_stop(wcycle, ewcDDCOMMBOUND);
+        wallcycle_stop(wcycle, WallCycleCounter::DDCommBound);
      }
      else if (bDynamicBox)
      {
@@ -885,7 +885,7 @@ void set_dd_cell_sizes(gmx_domdec_t*      dd,
                         gmx_bool           bUniform,
                         gmx_bool           bDoDLB,
                         int64_t            step,
-                       gmx_wallcycle_t    wcycle)
+                       gmx_wallcycle*     wcycle)
  {
      gmx_domdec_comm_t* comm = dd->comm;
  
diff --git a/src/gromacs/domdec/cellsizes.h b/src/gromacs/domdec/cellsizes.h

index cf07585e89c62133cae9a41c677865a55608d826..794eb27457972125d3ccf940868e547df6d8a952 100644 (file)
--- a/src/gromacs/domdec/cellsizes.h
+++ b/src/gromacs/domdec/cellsizes.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -84,6 +84,6 @@ void set_dd_cell_sizes(gmx_domdec_t*      dd,
                         gmx_bool           bUniform,
                         gmx_bool           bDoDLB,
                         int64_t            step,
-                       gmx_wallcycle_t    wcycle);
+                       gmx_wallcycle*     wcycle);
  
  #endif
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index 088189b6027ef82b8c483c9c1b4754725e2b9a47..4f7cab78622be62aed00e2b0406a9b1a1f61644d 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -258,7 +258,7 @@ void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end)
  
  void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
  {
-    wallcycle_start(wcycle, ewcMOVEX);
+    wallcycle_start(wcycle, WallCycleCounter::MoveX);
  
      rvec shift = { 0, 0, 0 };
  
@@ -347,12 +347,12 @@ void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, g
          nzone += nzone;
      }
  
-    wallcycle_stop(wcycle, ewcMOVEX);
+    wallcycle_stop(wcycle, WallCycleCounter::MoveX);
  }
  
  void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
  {
-    wallcycle_start(wcycle, ewcMOVEF);
+    wallcycle_start(wcycle, WallCycleCounter::MoveF);
  
      gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
      gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
@@ -456,7 +456,7 @@ void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces
          }
          nzone /= 2;
      }
-    wallcycle_stop(wcycle, ewcMOVEF);
+    wallcycle_stop(wcycle, WallCycleCounter::MoveF);
  }
  
  /* Convenience function for extracting a real buffer from an rvec buffer
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cu b/src/gromacs/domdec/gpuhaloexchange_impl.cu

index 65af08d35d73aee7b8a594a0f832962e9bdd44e1..f32a95bae7b8d9a1e331cc4f19cf96afe317f8e9 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cu
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cu
@@ -134,8 +134,8 @@ __global__ void unpackRecvBufKernel(float3* __restrict__ data,
  
  void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_forcesBuffer)
  {
-    wallcycle_start(wcycle_, ewcDOMDEC);
-    wallcycle_sub_start(wcycle_, ewcsDD_GPU);
+    wallcycle_start(wcycle_, WallCycleCounter::Domdec);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::DDGpu);
  
      d_x_ = d_coordinatesBuffer;
      d_f_ = d_forcesBuffer;
@@ -249,8 +249,8 @@ void GpuHaloExchange::Impl::reinitHalo(float3* d_coordinatesBuffer, float3* d_fo
      }
  #endif
  
-    wallcycle_sub_stop(wcycle_, ewcsDD_GPU);
-    wallcycle_stop(wcycle_, ewcDOMDEC);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::DDGpu);
+    wallcycle_stop(wcycle_, WallCycleCounter::Domdec);
  
      return;
  }
@@ -283,14 +283,14 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
                                                         GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
  {
  
-    wallcycle_start(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_start(wcycle_, WallCycleCounter::LaunchGpu);
      if (pulse_ == 0)
      {
          // ensure stream waits until coordinate data is available on device
          coordinatesReadyOnDeviceEvent->enqueueWaitEvent(nonLocalStream_);
      }
  
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_MOVEX);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuMoveX);
  
      // launch kernel to pack send buffer
      KernelLaunchConfig config;
@@ -328,12 +328,12 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
                  kernelFn, config, nonLocalStream_, nullptr, "Domdec GPU Apply X Halo Exchange", kernelArgs);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_MOVEX);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuMoveX);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  
      // Consider time spent in communicateHaloData as Comm.X counter
      // ToDo: We need further refinement here as communicateHaloData includes launch time for cudamemcpyasync
-    wallcycle_start(wcycle_, ewcMOVEX);
+    wallcycle_start(wcycle_, WallCycleCounter::MoveX);
  
      // wait for remote co-ordinates is implicit with process-MPI as non-local stream is synchronized before MPI calls
      // and MPI_Waitall call makes sure both neighboring ranks' non-local stream is synchronized before data transfer is initiated
@@ -345,7 +345,7 @@ void GpuHaloExchange::Impl::communicateHaloCoordinates(const matrix          box
      float3* recvPtr = GMX_THREAD_MPI ? remoteXPtr_ : &d_x_[atomOffset_];
      communicateHaloData(d_sendBuf_, xSendSize_, sendRankX_, recvPtr, xRecvSize_, recvRankX_);
  
-    wallcycle_stop(wcycle_, ewcMOVEX);
+    wallcycle_stop(wcycle_, WallCycleCounter::MoveX);
  
      return;
  }
@@ -356,17 +356,17 @@ void GpuHaloExchange::Impl::communicateHaloForces(bool accumulateForces)
  {
      // Consider time spent in communicateHaloData as Comm.F counter
      // ToDo: We need further refinement here as communicateHaloData includes launch time for cudamemcpyasync
-    wallcycle_start(wcycle_, ewcMOVEF);
+    wallcycle_start(wcycle_, WallCycleCounter::MoveF);
  
      float3* recvPtr = GMX_THREAD_MPI ? remoteFPtr_ : d_recvBuf_;
  
      // Communicate halo data (in non-local stream)
      communicateHaloData(&(d_f_[atomOffset_]), fSendSize_, sendRankF_, recvPtr, fRecvSize_, recvRankF_);
  
-    wallcycle_stop(wcycle_, ewcMOVEF);
+    wallcycle_stop(wcycle_, WallCycleCounter::MoveF);
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_MOVEF);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuMoveF);
  
      float3* d_f = d_f_;
      // If this is the last pulse and index (noting the force halo
@@ -422,8 +422,8 @@ void GpuHaloExchange::Impl::communicateHaloForces(bool accumulateForces)
          fReadyOnDevice_.markEvent(nonLocalStream_);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_MOVEF);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuMoveF);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void GpuHaloExchange::Impl::communicateHaloData(float3* sendPtr,
diff --git a/src/gromacs/domdec/partition.cpp b/src/gromacs/domdec/partition.cpp

index c87a3077fd315ede89af0f8d5830fd607b2cc531..2aa7c35c971ab31db63640950601ffcbbc185bb2 100644 (file)
--- a/src/gromacs/domdec/partition.cpp
+++ b/src/gromacs/domdec/partition.cpp
@@ -754,7 +754,7 @@ static void comm_dd_ns_cell_sizes(gmx_domdec_t* dd, gmx_ddbox_t* ddbox, rvec cel
  }
  
  //! Compute and communicate to determine the load distribution across PP ranks.
-static void get_load_distribution(gmx_domdec_t* dd, gmx_wallcycle_t wcycle)
+static void get_load_distribution(gmx_domdec_t* dd, gmx_wallcycle* wcycle)
  {
      gmx_domdec_comm_t* comm;
      domdec_load_t*     load;
@@ -766,7 +766,7 @@ static void get_load_distribution(gmx_domdec_t* dd, gmx_wallcycle_t wcycle)
          fprintf(debug, "get_load_distribution start\n");
      }
  
-    wallcycle_start(wcycle, ewcDDCOMMLOAD);
+    wallcycle_start(wcycle, WallCycleCounter::DDCommLoad);
  
      comm = dd->comm;
  
@@ -937,7 +937,7 @@ static void get_load_distribution(gmx_domdec_t* dd, gmx_wallcycle_t wcycle)
          }
      }
  
-    wallcycle_stop(wcycle, ewcDDCOMMLOAD);
+    wallcycle_stop(wcycle, WallCycleCounter::DDCommLoad);
  
      if (debug)
      {
@@ -2760,7 +2760,7 @@ void dd_partition_system(FILE*                     fplog,
      int         ncgindex_set;
      char        sbuf[22];
  
-    wallcycle_start(wcycle, ewcDOMDEC);
+    wallcycle_start(wcycle, WallCycleCounter::Domdec);
  
      gmx_domdec_t*      dd   = cr->dd;
      gmx_domdec_comm_t* comm = dd->comm;
@@ -3059,7 +3059,7 @@ void dd_partition_system(FILE*                     fplog,
      int ncg_moved = 0;
      if (bRedist)
      {
-        wallcycle_sub_start(wcycle, ewcsDD_REDIST);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::DDRedist);
  
          ncgindex_set = dd->ncg_home;
          dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir, state_local, fr, nrnb, &ncg_moved);
@@ -3073,7 +3073,7 @@ void dd_partition_system(FILE*                     fplog,
                      state_local->x);
          }
  
-        wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::DDRedist);
      }
  
      RVec cell_ns_x0, cell_ns_x1;
@@ -3095,7 +3095,7 @@ void dd_partition_system(FILE*                     fplog,
  
      if (bSortCG)
      {
-        wallcycle_sub_start(wcycle, ewcsDD_GRID);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::DDGrid);
  
          /* Sort the state on charge group position.
           * This enables exact restarts from this step.
@@ -3136,7 +3136,7 @@ void dd_partition_system(FILE*                     fplog,
          dd->ga2la->clear();
          ncgindex_set = 0;
  
-        wallcycle_sub_stop(wcycle, ewcsDD_GRID);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::DDGrid);
      }
      else
      {
@@ -3157,7 +3157,7 @@ void dd_partition_system(FILE*                     fplog,
          comm->updateGroupsCog->clear();
      }
  
-    wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::DDSetupComm);
  
      /* Set the induces for the home atoms */
      set_zones_ncg_home(dd);
@@ -3175,14 +3175,14 @@ void dd_partition_system(FILE*                     fplog,
      /* When bSortCG=true, we have already set the size for zone 0 */
      set_zones_size(dd, state_local->box, &ddbox, bSortCG ? 1 : 0, comm->zones.n, 0);
  
-    wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::DDSetupComm);
  
      /*
         write_dd_pdb("dd_home",step,"dump",top_global,cr,
                   -1,state_local->x.rvec_array(),state_local->box);
       */
  
-    wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::DDMakeTop);
  
      /* Extract a local topology from the global topology */
      IVec numPulses;
@@ -3201,9 +3201,9 @@ void dd_partition_system(FILE*                     fplog,
                        top_global,
                        top_local);
  
-    wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::DDMakeTop);
  
-    wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::DDMakeConstr);
  
      /* Set up the special atom communication */
      int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
@@ -3238,9 +3238,9 @@ void dd_partition_system(FILE*                     fplog,
          comm->atomRanges.setEnd(range, n);
      }
  
-    wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::DDMakeConstr);
  
-    wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::DDTopOther);
  
      /* Make space for the extra coordinates for virtual site
       * or constraint communication.
@@ -3325,11 +3325,11 @@ void dd_partition_system(FILE*                     fplog,
       */
      dd_move_x_vsites(*dd, state_local->box, state_local->x.rvec_array());
  
-    wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::DDTopOther);
  
      if (comm->ddSettings.nstDDDump > 0 && step % comm->ddSettings.nstDDDump == 0)
      {
-        dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
+        dd_move_x(dd, state_local->box, state_local->x, nullptr);
          write_dd_pdb("dd_dump",
                       step,
                       "dump",
@@ -3361,7 +3361,7 @@ void dd_partition_system(FILE*                     fplog,
          check_index_consistency(dd, top_global.natoms, "after partitioning");
      }
  
-    wallcycle_stop(wcycle, ewcDOMDEC);
+    wallcycle_stop(wcycle, WallCycleCounter::Domdec);
  }
  
  } // namespace gmx
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index bd96d590703f15e8a91c7d3f90966d799b72dae7..5f9ffb42d964eb589e4669839abaa4cca967f35a 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -1202,10 +1202,10 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
          }
          else
          {
-            wallcycle_start(wcycle, ewcPME_REDISTXF);
+            wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
              do_redist_pos_coeffs(pme, cr, bFirst, coordinates, coefficient);
  
-            wallcycle_stop(wcycle, ewcPME_REDISTXF);
+            wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
          }
  
          if (debug)
@@ -1213,7 +1213,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              fprintf(debug, "Rank= %6d, pme local particles=%6d\n", cr->nodeid, atc.numAtoms());
          }
  
-        wallcycle_start(wcycle, ewcPME_SPREAD);
+        wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
  
          /* Spread the coefficients on a grid */
          spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
@@ -1237,7 +1237,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
          }
  
-        wallcycle_stop(wcycle, ewcPME_SPREAD);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeSpread);
  
          /* TODO If the OpenMP and single-threaded implementations
             converge, then spread_on_grid() and
@@ -1256,18 +1256,20 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                  /* do 3d-fft */
                  if (thread == 0)
                  {
-                    wallcycle_start(wcycle, ewcPME_FFT);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                  }
                  gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle);
                  if (thread == 0)
                  {
-                    wallcycle_stop(wcycle, ewcPME_FFT);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
                  }
  
                  /* solve in k-space for our local cells */
                  if (thread == 0)
                  {
-                    wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+                    wallcycle_start(
+                            wcycle,
+                            (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme));
                  }
                  if (grid_index < DO_Q)
                  {
@@ -1292,19 +1294,21 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
  
                  if (thread == 0)
                  {
-                    wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+                    wallcycle_stop(
+                            wcycle,
+                            (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme));
                      inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
                  }
  
                  /* do 3d-invfft */
                  if (thread == 0)
                  {
-                    wallcycle_start(wcycle, ewcPME_FFT);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                  }
                  gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle);
                  if (thread == 0)
                  {
-                    wallcycle_stop(wcycle, ewcPME_FFT);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
  
  
                      if (pme->nodeid == 0)
@@ -1317,7 +1321,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      /* Note: this wallcycle region is closed below
                         outside an OpenMP region, so take care if
                         refactoring code here. */
-                    wallcycle_start(wcycle, ewcPME_GATHER);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeGather);
                  }
  
                  copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
@@ -1366,7 +1370,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              inc_nrnb(nrnb, eNR_GATHERFBSP, pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms());
              /* Note: this wallcycle region is opened above inside an OpenMP
                 region, so take care if refactoring code here. */
-            wallcycle_stop(wcycle, ewcPME_GATHER);
+            wallcycle_stop(wcycle, WallCycleCounter::PmeGather);
          }
  
          if (computeEnergyAndVirial)
@@ -1431,7 +1435,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                          break;
                      default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
                  }
-                wallcycle_start(wcycle, ewcPME_REDISTXF);
+                wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
  
                  do_redist_pos_coeffs(pme, cr, bFirst, coordinates, RedistC6);
                  pme->lb_buf1.resize(atc.numAtoms());
@@ -1449,7 +1453,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      local_sigma[i] = atc.coefficient[i];
                  }
  
-                wallcycle_stop(wcycle, ewcPME_REDISTXF);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
              }
              atc.coefficient = coefficientBuffer;
              calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
@@ -1464,7 +1468,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                  calc_next_lb_coeffs(coefficientBuffer, local_sigma);
                  grid = pmegrid->grid.grid;
  
-                wallcycle_start(wcycle, ewcPME_SPREAD);
+                wallcycle_start(wcycle, WallCycleCounter::PmeSpread);
                  /* Spread the c6 on a grid */
                  spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
  
@@ -1486,7 +1490,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      }
                      copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
                  }
-                wallcycle_stop(wcycle, ewcPME_SPREAD);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeSpread);
  
                  /*Here we start a large thread parallel region*/
  #pragma omp parallel num_threads(pme->nthread) private(thread)
@@ -1497,13 +1501,13 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                          /* do 3d-fft */
                          if (thread == 0)
                          {
-                            wallcycle_start(wcycle, ewcPME_FFT);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                          }
  
                          gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle);
                          if (thread == 0)
                          {
-                            wallcycle_stop(wcycle, ewcPME_FFT);
+                            wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
                          }
                      }
                      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -1519,7 +1523,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                      thread = gmx_omp_get_thread_num();
                      if (thread == 0)
                      {
-                        wallcycle_start(wcycle, ewcLJPME);
+                        wallcycle_start(wcycle, WallCycleCounter::LJPme);
                      }
  
                      loop_count =
@@ -1532,7 +1536,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                                               thread);
                      if (thread == 0)
                      {
-                        wallcycle_stop(wcycle, ewcLJPME);
+                        wallcycle_stop(wcycle, WallCycleCounter::LJPme);
                          inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
                      }
                  }
@@ -1565,13 +1569,13 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                          /* do 3d-invfft */
                          if (thread == 0)
                          {
-                            wallcycle_start(wcycle, ewcPME_FFT);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeFft);
                          }
  
                          gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle);
                          if (thread == 0)
                          {
-                            wallcycle_stop(wcycle, ewcPME_FFT);
+                            wallcycle_stop(wcycle, WallCycleCounter::PmeFft);
  
  
                              if (pme->nodeid == 0)
@@ -1580,7 +1584,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                                  npme      = static_cast<int>(ntot * std::log(ntot) / std::log(2.0));
                                  inc_nrnb(nrnb, eNR_FFT, 2 * npme);
                              }
-                            wallcycle_start(wcycle, ewcPME_GATHER);
+                            wallcycle_start(wcycle, WallCycleCounter::PmeGather);
                          }
  
                          copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
@@ -1619,7 +1623,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                               eNR_GATHERFBSP,
                               pme->pme_order * pme->pme_order * pme->pme_order * pme->atc[0].numAtoms());
                  }
-                wallcycle_stop(wcycle, ewcPME_GATHER);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeGather);
  
                  bFirst = FALSE;
              } /* for (grid_index = 8; grid_index >= 2; --grid_index) */
@@ -1628,7 +1632,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
  
      if (stepWork.computeForces && pme->nnodes > 1)
      {
-        wallcycle_start(wcycle, ewcPME_REDISTXF);
+        wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF);
          for (d = 0; d < pme->ndecompdim; d++)
          {
              gmx::ArrayRef<gmx::RVec> forcesRef;
@@ -1648,7 +1652,7 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
              }
          }
  
-        wallcycle_stop(wcycle, ewcPME_REDISTXF);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF);
      }
  
      if (computeEnergyAndVirial)
diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp

index 225fb1050a281a33cfc3cff9bec8dea32d03e7b9..564e213af9a7918fa4cc29992d040dd47685ff59 100644 (file)
--- a/src/gromacs/ewald/pme_gpu.cpp
+++ b/src/gromacs/ewald/pme_gpu.cpp
@@ -123,25 +123,25 @@ int pme_gpu_get_block_size(const gmx_pme_t* pme)
  void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t*             pme,
                                                 const int              gridIndex,
                                                 enum gmx_fft_direction dir,
-                                               gmx_wallcycle_t        wcycle)
+                                               gmx_wallcycle*         wcycle)
  {
      if (pme_gpu_settings(pme->gpu).performGPUFFT)
      {
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuPme);
          pme_gpu_3dfft(pme->gpu, dir, gridIndex);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuPme);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
      }
      else
      {
-        wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE);
+        wallcycle_start(wcycle, WallCycleCounter::PmeFftMixedMode);
  #pragma omp parallel for num_threads(pme->nthread) schedule(static)
          for (int thread = 0; thread < pme->nthread; thread++)
          {
              gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle);
          }
-        wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE);
+        wallcycle_stop(wcycle, WallCycleCounter::PmeFftMixedMode);
      }
  }
  
@@ -172,11 +172,11 @@ void pme_gpu_prepare_computation(gmx_pme_t*               pme,
  
      if (stepWork.haveDynamicBox || shouldUpdateBox) // || is to make the first computation always update
      {
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuPme);
          pme_gpu_update_input_box(pmeGpu, box);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuPme);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
  
          if (!pme_gpu_settings(pmeGpu).performGPUSolve)
          {
@@ -213,11 +213,11 @@ void pme_gpu_launch_spread(gmx_pme_t*            pme,
      /* Spread the coefficients on a grid */
      const bool computeSplines = true;
      const bool spreadCharges  = true;
-    wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-    wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
+    wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuPme);
      pme_gpu_spread(pmeGpu, xReadyOnDevice, fftgrids, computeSplines, spreadCharges, lambdaQ);
-    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
-    wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuPme);
+    wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
  }
  
  void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle, const gmx::StepWorkload& stepWork)
@@ -228,9 +228,9 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle, co
      const bool computeEnergyAndVirial = stepWork.computeEnergy || stepWork.computeVirial;
      if (!settings.performGPUFFT)
      {
-        wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD);
+        wallcycle_start(wcycle, WallCycleCounter::WaitGpuPmeSpread);
          pme_gpu_sync_spread_grid(pme->gpu);
-        wallcycle_stop(wcycle, ewcWAIT_GPU_PME_SPREAD);
+        wallcycle_stop(wcycle, WallCycleCounter::WaitGpuPmeSpread);
      }
  
      try
@@ -248,21 +248,21 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle, co
              {
                  const auto gridOrdering =
                          settings.useDecomposition ? GridOrdering::YZX : GridOrdering::XYZ;
-                wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-                wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
+                wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+                wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuPme);
                  pme_gpu_solve(pmeGpu, gridIndex, cfftgrid, gridOrdering, computeEnergyAndVirial);
-                wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
-                wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+                wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuPme);
+                wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
              }
              else
              {
-                wallcycle_start(wcycle, ewcPME_SOLVE_MIXED_MODE);
+                wallcycle_start(wcycle, WallCycleCounter::PmeSolveMixedMode);
  #pragma omp parallel for num_threads(pme->nthread) schedule(static)
                  for (int thread = 0; thread < pme->nthread; thread++)
                  {
                      solve_pme_yzx(pme, cfftgrid, pme->boxVolume, computeEnergyAndVirial, pme->nthread, thread);
                  }
-                wallcycle_stop(wcycle, ewcPME_SOLVE_MIXED_MODE);
+                wallcycle_stop(wcycle, WallCycleCounter::PmeSolveMixedMode);
              }
  
              parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_COMPLEX_TO_REAL, wcycle);
@@ -280,13 +280,13 @@ void pme_gpu_launch_gather(const gmx_pme_t* pme, gmx_wallcycle gmx_unused* wcycl
          return;
      }
  
-    wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-    wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
+    wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuPme);
  
      float** fftgrids = pme->fftgrid;
      pme_gpu_gather(pme->gpu, fftgrids, lambdaQ);
-    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
-    wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuPme);
+    wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
  }
  
  //! Accumulate the \c forcesToAdd to \c f, using the available threads.
@@ -309,7 +309,7 @@ static void pme_gpu_reduce_outputs(const bool            computeEnergyAndVirial,
                                     gmx::ForceWithVirial* forceWithVirial,
                                     gmx_enerdata_t*       enerd)
  {
-    wallcycle_start(wcycle, ewcPME_GPU_F_REDUCTION);
+    wallcycle_start(wcycle, WallCycleCounter::PmeGpuFReduction);
      GMX_ASSERT(forceWithVirial, "Invalid force pointer");
  
      if (computeEnergyAndVirial)
@@ -323,7 +323,7 @@ static void pme_gpu_reduce_outputs(const bool            computeEnergyAndVirial,
      {
          sum_forces(forceWithVirial->force_, output.forces_);
      }
-    wallcycle_stop(wcycle, ewcPME_GPU_F_REDUCTION);
+    wallcycle_stop(wcycle, WallCycleCounter::PmeGpuFReduction);
  }
  
  bool pme_gpu_try_finish_task(gmx_pme_t*               pme,
@@ -348,11 +348,11 @@ bool pme_gpu_try_finish_task(gmx_pme_t*               pme,
      // TODO: implement c_streamQuerySupported with an additional GpuEventSynchronizer per stream (#2521)
      if ((completionKind == GpuTaskCompletion::Check) && c_streamQuerySupported)
      {
-        wallcycle_start_nocount(wcycle, ewcWAIT_GPU_PME_GATHER);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::WaitGpuPmeGather);
          // Query the PME stream for completion of all tasks enqueued and
          // if we're not done, stop the timer before early return.
          const bool pmeGpuDone = pme_gpu_stream_query(pme->gpu);
-        wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER);
+        wallcycle_stop(wcycle, WallCycleCounter::WaitGpuPmeGather);
  
          if (!pmeGpuDone)
          {
@@ -361,7 +361,7 @@ bool pme_gpu_try_finish_task(gmx_pme_t*               pme,
          needToSynchronize = false;
      }
  
-    wallcycle_start(wcycle, ewcWAIT_GPU_PME_GATHER);
+    wallcycle_start(wcycle, WallCycleCounter::WaitGpuPmeGather);
      // If the above check passed, then there is no need to make an
      // explicit synchronization call.
      if (needToSynchronize)
@@ -374,7 +374,7 @@ bool pme_gpu_try_finish_task(gmx_pme_t*               pme,
      const bool computeEnergyAndVirial = stepWork.computeEnergy || stepWork.computeVirial;
      PmeOutput  output                 = pme_gpu_getOutput(
              *pme, computeEnergyAndVirial, pme->gpu->common->ngrids > 1 ? lambdaQ : 1.0);
-    wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER);
+    wallcycle_stop(wcycle, WallCycleCounter::WaitGpuPmeGather);
  
      GMX_ASSERT(pme->gpu->settings.useGpuForceReduction == !output.haveForceOutput_,
                 "When forces are reduced on the CPU, there needs to be force output");
@@ -391,7 +391,7 @@ PmeOutput pme_gpu_wait_finish_task(gmx_pme_t*     pme,
  {
      GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
  
-    wallcycle_start(wcycle, ewcWAIT_GPU_PME_GATHER);
+    wallcycle_start(wcycle, WallCycleCounter::WaitGpuPmeGather);
  
      // Synchronize the whole PME stream at once, including D2H result transfers
      // if there are outputs we need to wait for at this step; we still call getOutputs
@@ -403,7 +403,7 @@ PmeOutput pme_gpu_wait_finish_task(gmx_pme_t*     pme,
  
      PmeOutput output = pme_gpu_getOutput(
              *pme, computeEnergyAndVirial, pme->gpu->common->ngrids > 1 ? lambdaQ : 1.0);
-    wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER);
+    wallcycle_stop(wcycle, WallCycleCounter::WaitGpuPmeGather);
      return output;
  }
  
@@ -428,16 +428,16 @@ void pme_gpu_reinit_computation(const gmx_pme_t* pme, gmx_wallcycle* wcycle)
  {
      GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
  
-    wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-    wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
+    wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuPme);
  
      pme_gpu_update_timings(pme->gpu);
  
      pme_gpu_clear_grids(pme->gpu);
      pme_gpu_clear_energy_virial(pme->gpu);
  
-    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
-    wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuPme);
+    wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
  }
  
  DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* pme)
diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp

index 19778b8999b897ac9ea29cbbcb454a5f2a5548c2..2e900d3bd4fa6550535707b4e6ad46040017f9d6 100644 (file)
--- a/src/gromacs/ewald/pme_load_balancing.cpp
+++ b/src/gromacs/ewald/pme_load_balancing.cpp
@@ -928,7 +928,7 @@ void pme_loadbal_do(pme_load_balancing_t*          pme_lb,
                      t_forcerec*                    fr,
                      const matrix                   box,
                      gmx::ArrayRef<const gmx::RVec> x,
-                    gmx_wallcycle_t                wcycle,
+                    gmx_wallcycle*                 wcycle,
                      int64_t                        step,
                      int64_t                        step_rel,
                      gmx_bool*                      bPrinting,
@@ -946,7 +946,7 @@ void pme_loadbal_do(pme_load_balancing_t*          pme_lb,
  
      n_prev      = pme_lb->cycles_n;
      cycles_prev = pme_lb->cycles_c;
-    wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c);
+    wallcycle_get(wcycle, WallCycleCounter::Step, &pme_lb->cycles_n, &pme_lb->cycles_c);
  
      /* Before the first step we haven't done any steps yet.
       * Also handle cases where ir.init_step % ir.nstlist != 0.
diff --git a/src/gromacs/ewald/pme_load_balancing.h b/src/gromacs/ewald/pme_load_balancing.h

index bb98635ca239238b9fd262317641f5508dc15d22..38ba5c74e64674ac4ef06d9a9dc7055dad260463 100644 (file)
--- a/src/gromacs/ewald/pme_load_balancing.h
+++ b/src/gromacs/ewald/pme_load_balancing.h
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -92,7 +92,7 @@ void pme_loadbal_init(pme_load_balancing_t**     pme_lb_p,
   *
   * Process the cycles measured over the last nstlist steps and then
   * either continue balancing or check if we need to trigger balancing.
- * Should be called after the ewcSTEP cycle counter has been stopped.
+ * Should be called after the WallCycleCounter::Step cycle counter has been stopped.
   * Returns if the load balancing is printing to fp_err.
   */
  void pme_loadbal_do(pme_load_balancing_t*          pme_lb,
@@ -104,7 +104,7 @@ void pme_loadbal_do(pme_load_balancing_t*          pme_lb,
                      t_forcerec*                    fr,
                      const matrix                   box,
                      gmx::ArrayRef<const gmx::RVec> x,
-                    gmx_wallcycle_t                wcycle,
+                    gmx_wallcycle*                 wcycle,
                      int64_t                        step,
                      int64_t                        step_rel,
                      gmx_bool*                      bPrinting,
diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp

index 4542a05f4291463cad63cffa6ea79d609aa7a66f..5130034a9fb3c06b2fc1a1ba565c91ac2e743674 100644 (file)
--- a/src/gromacs/ewald/pme_only.cpp
+++ b/src/gromacs/ewald/pme_only.cpp
@@ -167,17 +167,17 @@ static std::unique_ptr<gmx_pme_pp> gmx_pme_pp_init(const t_commrec* cr)
      return pme_pp;
  }
  
-static void reset_pmeonly_counters(gmx_wallcycle_t           wcycle,
+static void reset_pmeonly_counters(gmx_wallcycle*            wcycle,
                                     gmx_walltime_accounting_t walltime_accounting,
                                     t_nrnb*                   nrnb,
                                     int64_t                   step,
                                     bool                      useGpuForPme)
  {
      /* Reset all the counters related to performance over the run */
-    wallcycle_stop(wcycle, ewcRUN);
+    wallcycle_stop(wcycle, WallCycleCounter::Run);
      wallcycle_reset_all(wcycle);
      *nrnb = { 0 };
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      walltime_accounting_reset_time(walltime_accounting, step);
  
      if (useGpuForPme)
@@ -708,11 +708,11 @@ int gmx_pmeonly(struct gmx_pme_t*               pme,
  
          if (count == 0)
          {
-            wallcycle_start(wcycle, ewcRUN);
+            wallcycle_start(wcycle, WallCycleCounter::Run);
              walltime_accounting_start_time(walltime_accounting);
          }
  
-        wallcycle_start(wcycle, ewcPMEMESH);
+        wallcycle_start(wcycle, WallCycleCounter::PmeMesh);
  
          dvdlambda_q  = 0;
          dvdlambda_lj = 0;
@@ -779,7 +779,7 @@ int gmx_pmeonly(struct gmx_pme_t*               pme,
              output.forces_ = pme_pp->f;
          }
  
-        cycles = wallcycle_stop(wcycle, ewcPMEMESH);
+        cycles = wallcycle_stop(wcycle, WallCycleCounter::PmeMesh);
          gmx_pme_send_force_vir_ener(pme_pp.get(), output, dvdlambda_q, dvdlambda_lj, cycles);
  
          count++;
diff --git a/src/gromacs/ewald/pme_pp.cpp b/src/gromacs/ewald/pme_pp.cpp

index 3a929daf1f5437e337fa8f0122ed1990316c6923..63693ed6a631eefa32d71151cc46d928b5cb89ec 100644 (file)
--- a/src/gromacs/ewald/pme_pp.cpp
+++ b/src/gromacs/ewald/pme_pp.cpp
@@ -371,7 +371,7 @@ void gmx_pme_send_coordinates(t_forcerec*           fr,
                                GpuEventSynchronizer* coordinatesReadyOnDeviceEvent,
                                gmx_wallcycle*        wcycle)
  {
-    wallcycle_start(wcycle, ewcPP_PMESENDX);
+    wallcycle_start(wcycle, WallCycleCounter::PpPmeSendX);
  
      unsigned int flags = PP_PME_COORD;
      if (computeEnergyAndVirial)
@@ -399,7 +399,7 @@ void gmx_pme_send_coordinates(t_forcerec*           fr,
                                 sendCoordinatesFromGpu,
                                 coordinatesReadyOnDeviceEvent);
  
-    wallcycle_stop(wcycle, ewcPP_PMESENDX);
+    wallcycle_stop(wcycle, WallCycleCounter::PpPmeSendX);
  }
  
  void gmx_pme_send_finish(const t_commrec* cr)
diff --git a/src/gromacs/fft/fft5d.cpp b/src/gromacs/fft/fft5d.cpp

index 71020eec6ba83823df27f78bc7346e4f6648b9b5..be74cbaf6dbf951854f7323d2479ba139679756f 100644 (file)
--- a/src/gromacs/fft/fft5d.cpp
+++ b/src/gromacs/fft/fft5d.cpp
@@ -1286,7 +1286,7 @@ void fft5d_execute(fft5d_plan plan, int thread, fft5d_time times)
                      time = MPI_Wtime();
                  }
  #else
-                wallcycle_start(times, ewcPME_FFTCOMM);
+                wallcycle_start(times, WallCycleCounter::PmeFftComm);
  #endif
  #ifdef FFT5D_MPI_TRANSPOSE
                  FFTW(execute)(mpip[s]);
@@ -1323,7 +1323,7 @@ void fft5d_execute(fft5d_plan plan, int thread, fft5d_time times)
                      time_mpi[s] = MPI_Wtime() - time;
                  }
  #else
-                wallcycle_stop(times, ewcPME_FFTCOMM);
+                wallcycle_stop(times, WallCycleCounter::PmeFftComm);
  #endif
              }       /*master*/
          }           /* bPrallelDim */
diff --git a/src/gromacs/fft/fft5d.h b/src/gromacs/fft/fft5d.h

index 71d86817a07e1079ad62a523fca175d0d46716fb..b4e5d008e4d7aba64416cc27d44331b4ee1bdea9 100644 (file)
--- a/src/gromacs/fft/fft5d.h
+++ b/src/gromacs/fft/fft5d.h
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 2009-2017, The GROMACS development team.
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -73,7 +73,7 @@ struct fft5d_time_t
  typedef struct fft5d_time_t* fft5d_time;
  #else
  #    include "gromacs/timing/wallcycle.h"
-typedef gmx_wallcycle_t fft5d_time;
+typedef gmx_wallcycle* fft5d_time;
  #endif
  
  namespace gmx
diff --git a/src/gromacs/fft/parallel_3dfft.cpp b/src/gromacs/fft/parallel_3dfft.cpp

index 24a679fc301efbd80e49c57e9a651d8d95fba4ce..33d916ec20bf731fab6033a0f4f54ce552bcdf64 100644 (file)
--- a/src/gromacs/fft/parallel_3dfft.cpp
+++ b/src/gromacs/fft/parallel_3dfft.cpp
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 1991-2005 David van der Spoel, Erik Lindahl, University of Groningen.
- * Copyright (c) 2013,2014,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -170,7 +170,7 @@ int gmx_parallel_3dfft_complex_limits(gmx_parallel_3dfft_t pfft_setup,
  int gmx_parallel_3dfft_execute(gmx_parallel_3dfft_t   pfft_setup,
                                 enum gmx_fft_direction dir,
                                 int                    thread,
-                               gmx_wallcycle_t        wcycle)
+                               gmx_wallcycle*         wcycle)
  {
      if (((pfft_setup->p1->flags & FFT5D_REALCOMPLEX) == 0)
          ^ (dir == GMX_FFT_FORWARD || dir == GMX_FFT_BACKWARD))
diff --git a/src/gromacs/fft/parallel_3dfft.h b/src/gromacs/fft/parallel_3dfft.h

index 91cd66103f9ec39f37d5dd72ae1b024815155e46..5652f4c42237bd4f19e04046b76a97c7de0ad09b 100644 (file)
--- a/src/gromacs/fft/parallel_3dfft.h
+++ b/src/gromacs/fft/parallel_3dfft.h
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 1991-2005 David van der Spoel, Erik Lindahl, University of Groningen.
- * Copyright (c) 2013,2014,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2017,2018,2019,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -104,7 +104,7 @@ int gmx_parallel_3dfft_complex_limits(gmx_parallel_3dfft_t pfft_setup,
  int gmx_parallel_3dfft_execute(gmx_parallel_3dfft_t   pfft_setup,
                                 enum gmx_fft_direction dir,
                                 int                    thread,
-                               gmx_wallcycle_t        wcycle);
+                               gmx_wallcycle*         wcycle);
  
  
  /*! \brief Release all data in parallel fft setup
diff --git a/src/gromacs/imd/imd.cpp b/src/gromacs/imd/imd.cpp

index cf475be7ccfa5e6378938e0ae9a9d28ad729501c..33285ca106d2ac30dbe3249f8185fc8c2d743153 100644 (file)
--- a/src/gromacs/imd/imd.cpp
+++ b/src/gromacs/imd/imd.cpp
@@ -1525,7 +1525,7 @@ bool ImdSession::Impl::run(int64_t step, bool bNS, const matrix box, gmx::ArrayR
          return false;
      }
  
-    wallcycle_start(wcycle, ewcIMD);
+    wallcycle_start(wcycle, WallCycleCounter::Imd);
  
      /* read command from client and check if new incoming connection */
      if (MASTER(cr))
@@ -1576,7 +1576,7 @@ bool ImdSession::Impl::run(int64_t step, bool bNS, const matrix box, gmx::ArrayR
          }
      }
  
-    wallcycle_stop(wcycle, ewcIMD);
+    wallcycle_stop(wcycle, WallCycleCounter::Imd);
  
      return imdstep;
  }
@@ -1641,7 +1641,7 @@ void ImdSession::updateEnergyRecordAndSendPositionsAndEnergies(bool bIMDstep, in
          return;
      }
  
-    wallcycle_start(impl_->wcycle, ewcIMD);
+    wallcycle_start(impl_->wcycle, WallCycleCounter::Imd);
  
      /* Update time step for IMD and prepare IMD energy record if we have new energies. */
      fillEnergyRecord(step, bHaveNewEnergies);
@@ -1652,7 +1652,7 @@ void ImdSession::updateEnergyRecordAndSendPositionsAndEnergies(bool bIMDstep, in
          sendPositionsAndEnergies();
      }
  
-    wallcycle_stop(impl_->wcycle, ewcIMD);
+    wallcycle_stop(impl_->wcycle, WallCycleCounter::Imd);
  }
  
  void ImdSession::applyForces(gmx::ArrayRef<gmx::RVec> force)
@@ -1662,7 +1662,7 @@ void ImdSession::applyForces(gmx::ArrayRef<gmx::RVec> force)
          return;
      }
  
-    wallcycle_start(impl_->wcycle, ewcIMD);
+    wallcycle_start(impl_->wcycle, WallCycleCounter::Imd);
  
      for (int i = 0; i < impl_->nforces; i++)
      {
@@ -1680,7 +1680,7 @@ void ImdSession::applyForces(gmx::ArrayRef<gmx::RVec> force)
          rvec_inc(force[j], impl_->f[i]);
      }
  
-    wallcycle_stop(impl_->wcycle, ewcIMD);
+    wallcycle_stop(impl_->wcycle, WallCycleCounter::Imd);
  }
  
  ImdSession::ImdSession(const MDLogger& mdlog) : impl_(new Impl(mdlog)) {}
diff --git a/src/gromacs/listed_forces/gpubonded_impl.cu b/src/gromacs/listed_forces/gpubonded_impl.cu

index 4a8bbe41ceb89ef73ae2906d162ed4885075886f..c5fbd00e4669d0b46f918bb250fdd3236e4d4c73 100644 (file)
--- a/src/gromacs/listed_forces/gpubonded_impl.cu
+++ b/src/gromacs/listed_forces/gpubonded_impl.cu
@@ -311,11 +311,11 @@ void GpuBonded::Impl::launchEnergyTransfer()
      GMX_ASSERT(haveInteractions_,
                 "No GPU bonded interactions, so no energies will be computed, so transfer should "
                 "not be called");
-    wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_GPU_BONDED);
+    wallcycle_sub_start_nocount(wcycle_, WallCycleSubCounter::LaunchGpuBonded);
      // TODO add conditional on whether there has been any compute (and make sure host buffer doesn't contain garbage)
      float* h_vTot = vTot_.data();
      copyFromDeviceBuffer(h_vTot, &d_vTot_, 0, F_NRE, deviceStream_, GpuApiCallBehavior::Async, nullptr);
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_BONDED);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuBonded);
  }
  
  void GpuBonded::Impl::waitAccumulateEnergyTerms(gmx_enerdata_t* enerd)
@@ -324,10 +324,10 @@ void GpuBonded::Impl::waitAccumulateEnergyTerms(gmx_enerdata_t* enerd)
                 "No GPU bonded interactions, so no energies will be computed or transferred, so "
                 "accumulation should not occur");
  
-    wallcycle_start(wcycle_, ewcWAIT_GPU_BONDED);
+    wallcycle_start(wcycle_, WallCycleCounter::WaitGpuBonded);
      cudaError_t stat = cudaStreamSynchronize(deviceStream_.stream());
      CU_RET_ERR(stat, "D2H transfer of bonded energies failed");
-    wallcycle_stop(wcycle_, ewcWAIT_GPU_BONDED);
+    wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuBonded);
  
      for (int fType : fTypesOnGpu)
      {
@@ -346,11 +346,11 @@ void GpuBonded::Impl::waitAccumulateEnergyTerms(gmx_enerdata_t* enerd)
  
  void GpuBonded::Impl::clearEnergies()
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_GPU_BONDED);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start_nocount(wcycle_, WallCycleSubCounter::LaunchGpuBonded);
      clearDeviceBufferAsync(&d_vTot_, 0, F_NRE, deviceStream_);
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_BONDED);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuBonded);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  // ---- GpuBonded
diff --git a/src/gromacs/listed_forces/gpubondedkernels.cu b/src/gromacs/listed_forces/gpubondedkernels.cu

index 57fbbc1be4b03ca5e7e3a3124c58127e9dbabb98..1537c58d58b653252441cfe63547f3c06f527ade 100644 (file)
--- a/src/gromacs/listed_forces/gpubondedkernels.cu
+++ b/src/gromacs/listed_forces/gpubondedkernels.cu
@@ -914,8 +914,8 @@ void GpuBonded::Impl::launchKernel()
      GMX_ASSERT(haveInteractions_,
                 "Cannot launch bonded GPU kernels unless bonded GPU work was scheduled");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_BONDED);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuBonded);
  
      int fTypeRangeEnd = kernelParams_.fTypeRangeEnd[numFTypesOnGpu - 1];
  
@@ -935,8 +935,8 @@ void GpuBonded::Impl::launchKernel()
                      "exec_kernel_gpu<calcVir, calcEner>",
                      kernelArgs);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_BONDED);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuBonded);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void GpuBonded::launchKernel(const gmx::StepWorkload& stepWork)
diff --git a/src/gromacs/listed_forces/listed_forces.cpp b/src/gromacs/listed_forces/listed_forces.cpp

index b0d83be8434bc645160c111a79e65af32eacb95c..a35e58911cbda26533b7de417c3206291df4045a 100644 (file)
--- a/src/gromacs/listed_forces/listed_forces.cpp
+++ b/src/gromacs/listed_forces/listed_forces.cpp
@@ -657,7 +657,7 @@ void calc_listed(struct gmx_wallcycle*         wcycle,
      {
          gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
  
-        wallcycle_sub_start(wcycle, ewcsLISTED);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::Listed);
          /* The dummy array is to have a place to store the dhdl at other values
             of lambda, which will be thrown away in the end */
          gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
@@ -675,9 +675,9 @@ void calc_listed(struct gmx_wallcycle*         wcycle,
                           fcd,
                           stepWork,
                           global_atom_index);
-        wallcycle_sub_stop(wcycle, ewcsLISTED);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::Listed);
  
-        wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedBufOps);
          reduce_thread_output(&forceWithShiftForces, enerd->term.data(), &enerd->grpp, dvdl, bt, stepWork);
  
          if (stepWork.computeDhdl)
@@ -687,7 +687,7 @@ void calc_listed(struct gmx_wallcycle*         wcycle,
                  enerd->dvdl_nonlin[i] += dvdl[i];
              }
          }
-        wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedBufOps);
      }
  
      /* Copy the sum of violations for the distance restraints from fcd */
@@ -829,7 +829,7 @@ void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
             awkward to account to this subtimer properly in the present
             code. We don't test / care much about performance with
             restraints, anyway. */
-        wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::Restraints);
  
          if (!idef.il[F_POSRES].empty())
          {
@@ -868,7 +868,7 @@ void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
                              hist);
          }
  
-        wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::Restraints);
      }
  
      calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
@@ -885,7 +885,7 @@ void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
          }
          if (idef.ilsort != ilsortNO_FE)
          {
-            wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedFep);
              if (idef.ilsort != ilsortFE_SORTED)
              {
                  gmx_incons("The bonded interactions are not sorted for free energy");
@@ -919,7 +919,7 @@ void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
                  std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
                  enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
              }
-            wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedFep);
          }
      }
  }
diff --git a/src/gromacs/listed_forces/position_restraints.cpp b/src/gromacs/listed_forces/position_restraints.cpp

index aa881af20d6c3e0ec7b556797bb533666b959863..165b8932083fe8952b2ad79db54e1d505610874a 100644 (file)
--- a/src/gromacs/listed_forces/position_restraints.cpp
+++ b/src/gromacs/listed_forces/position_restraints.cpp
@@ -463,7 +463,7 @@ void posres_wrapper_lambda(struct gmx_wallcycle*         wcycle,
                             gmx::ArrayRef<const real>     lambda,
                             const t_forcerec*             fr)
  {
-    wallcycle_sub_start_nocount(wcycle, ewcsRESTRAINTS);
+    wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::Restraints);
  
      auto& foreignTerms = enerd->foreignLambdaTerms;
      for (int i = 0; i < 1 + foreignTerms.numLambdas(); i++)
@@ -487,7 +487,7 @@ void posres_wrapper_lambda(struct gmx_wallcycle*         wcycle,
                                       fr->posres_comB);
          foreignTerms.accumulate(i, v, dvdl);
      }
-    wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::Restraints);
  }
  
  /*! \brief Helper function that wraps calls to fbposres for
diff --git a/src/gromacs/mdlib/constr.cpp b/src/gromacs/mdlib/constr.cpp

index 398841179cf4d26e5744ada8e2fc9aeaec2bd921..dca3379ce84772ce7516badb877449754012460e 100644 (file)
--- a/src/gromacs/mdlib/constr.cpp
+++ b/src/gromacs/mdlib/constr.cpp
@@ -414,7 +414,7 @@ bool Constraints::Impl::apply(bool                      bLog,
      char  buf[22];
      int   nth;
  
-    wallcycle_start(wcycle, ewcCONSTR);
+    wallcycle_start(wcycle, WallCycleCounter::Constr);
  
      if (econq == ConstraintVariable::ForceDispl && !EI_ENERGY_MINIMIZATION(ir.eI))
      {
@@ -781,7 +781,7 @@ bool Constraints::Impl::apply(bool                      bLog,
              do_edsam(&ir, step, cr, xprime.unpaddedArrayRef(), v.unpaddedArrayRef(), box, ed);
          }
      }
-    wallcycle_stop(wcycle, ewcCONSTR);
+    wallcycle_stop(wcycle, WallCycleCounter::Constr);
  
      const bool haveVelocities = (!v.empty() || econq == ConstraintVariable::Velocities);
      if (haveVelocities && !cFREEZE_.empty())
diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp

index 081d603dc1e5f39c508f1468087a3af1e705630d..ad08c862fb026093653ad7fe820fb36de2ff75a6 100644 (file)
--- a/src/gromacs/mdlib/force.cpp
+++ b/src/gromacs/mdlib/force.cpp
@@ -104,7 +104,7 @@ void calculateLongRangeNonbondeds(t_forcerec*                    fr,
                                    const t_inputrec&              ir,
                                    const t_commrec*               cr,
                                    t_nrnb*                        nrnb,
-                                  gmx_wallcycle_t                wcycle,
+                                  gmx_wallcycle*                 wcycle,
                                    const t_mdatoms*               md,
                                    gmx::ArrayRef<const RVec>      coordinates,
                                    gmx::ForceWithVirial*          forceWithVirial,
@@ -141,7 +141,7 @@ void calculateLongRangeNonbondeds(t_forcerec*                    fr,
              /* Calculate the Ewald surface force and energy contributions, when necessary */
              if (haveEwaldSurfaceTerm)
              {
-                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
+                wallcycle_sub_start(wcycle, WallCycleSubCounter::EwaldCorrection);
  
                  int nthreads = fr->nthread_ewc;
  #pragma omp parallel for num_threads(nthreads) schedule(static)
@@ -184,7 +184,7 @@ void calculateLongRangeNonbondeds(t_forcerec*                    fr,
                  {
                      reduceEwaldThreadOuput(nthreads, fr->ewc_t);
                  }
-                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
+                wallcycle_sub_stop(wcycle, WallCycleSubCounter::EwaldCorrection);
              }
  
              if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
@@ -212,7 +212,7 @@ void calculateLongRangeNonbondeds(t_forcerec*                    fr,
                       */
                      ddBalanceRegionHandler.closeAfterForceComputationCpu();
  
-                    wallcycle_start(wcycle, ewcPMEMESH);
+                    wallcycle_start(wcycle, WallCycleCounter::PmeMesh);
                      status = gmx_pme_do(
                              fr->pmedata,
                              gmx::constArrayRefFromArray(coordinates.data(), md->homenr - fr->n_tpi),
@@ -238,7 +238,7 @@ void calculateLongRangeNonbondeds(t_forcerec*                    fr,
                              &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul],
                              &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Vdw],
                              stepWork);
-                    wallcycle_stop(wcycle, ewcPMEMESH);
+                    wallcycle_stop(wcycle, WallCycleCounter::PmeMesh);
                      if (status != 0)
                      {
                          gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cu b/src/gromacs/mdlib/gpuforcereduction_impl.cu

index 80aad0ed38b648eaa81dfe05bea0c7fc78de21a2..471cc1d5c7d3d30880e37b73bef4b9c889c7794c 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cu
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.cu
@@ -130,7 +130,7 @@ void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3>  baseForcePtr,
      completionMarker_ = completionMarker;
      cellInfo_.cell    = cell.data();
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
      reallocateDeviceBuffer(
              &cellInfo_.d_cell, numAtoms_, &cellInfo_.cellSize, &cellInfo_.cellSizeAlloc, deviceContext_);
      copyToDeviceBuffer(&cellInfo_.d_cell,
@@ -140,7 +140,7 @@ void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3>  baseForcePtr,
                         deviceStream_,
                         GpuApiCallBehavior::Async,
                         nullptr);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  
      dependencyList_.clear();
  };
@@ -164,8 +164,8 @@ void GpuForceReduction::Impl::addDependency(GpuEventSynchronizer* const dependen
  
  void GpuForceReduction::Impl::execute()
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_NB_F_BUF_OPS);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
  
      if (numAtoms_ == 0)
      {
@@ -209,8 +209,8 @@ void GpuForceReduction::Impl::execute()
          completionMarker_->markEvent(deviceStream_);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NB_F_BUF_OPS);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  GpuForceReduction::Impl::~Impl(){};
diff --git a/src/gromacs/mdlib/md_support.cpp b/src/gromacs/mdlib/md_support.cpp

index 24ae64beec043192ed4645b0b68ba4e9ab58704a..b991293a5661b8daa4577e8c8b685c7617f30913 100644 (file)
--- a/src/gromacs/mdlib/md_support.cpp
+++ b/src/gromacs/mdlib/md_support.cpp
@@ -292,7 +292,7 @@ void compute_globals(gmx_global_stat*               gstat,
                       const t_mdatoms*               mdatoms,
                       t_nrnb*                        nrnb,
                       t_vcm*                         vcm,
-                     gmx_wallcycle_t                wcycle,
+                     gmx_wallcycle*                 wcycle,
                       gmx_enerdata_t*                enerd,
                       tensor                         force_vir,
                       tensor                         shake_vir,
@@ -359,7 +359,7 @@ void compute_globals(gmx_global_stat*               gstat,
              gmx::ArrayRef<real> signalBuffer = signalCoordinator->getCommunicationBuffer();
              if (PAR(cr))
              {
-                wallcycle_start(wcycle, ewcMoveE);
+                wallcycle_start(wcycle, WallCycleCounter::MoveE);
                  global_stat(*gstat,
                              cr,
                              enerd,
@@ -372,7 +372,7 @@ void compute_globals(gmx_global_stat*               gstat,
                              signalBuffer,
                              *bSumEkinhOld,
                              flags);
-                wallcycle_stop(wcycle, ewcMoveE);
+                wallcycle_stop(wcycle, WallCycleCounter::MoveE);
              }
              signalCoordinator->finalizeSignals();
              *bSumEkinhOld = FALSE;
diff --git a/src/gromacs/mdlib/md_support.h b/src/gromacs/mdlib/md_support.h

index 40e1437c13beeef9e176dc4e5fc01b58a39b2cad..da949e10d1165f0e573936a14e59f7f06c28093f 100644 (file)
--- a/src/gromacs/mdlib/md_support.h
+++ b/src/gromacs/mdlib/md_support.h
@@ -123,7 +123,7 @@ void compute_globals(gmx_global_stat*               gstat,
                       const t_mdatoms*               mdatoms,
                       t_nrnb*                        nrnb,
                       t_vcm*                         vcm,
-                     gmx_wallcycle_t                wcycle,
+                     gmx_wallcycle*                 wcycle,
                       gmx_enerdata_t*                enerd,
                       tensor                         force_vir,
                       tensor                         shake_vir,
diff --git a/src/gromacs/mdlib/mdoutf.cpp b/src/gromacs/mdlib/mdoutf.cpp

index 8f8fd006a41fe8155f3c5d067b4bfff060820c0b..9769bac325fb30a21757bcfc817211ea89db09fd 100644 (file)
--- a/src/gromacs/mdlib/mdoutf.cpp
+++ b/src/gromacs/mdlib/mdoutf.cpp
@@ -91,7 +91,7 @@ struct gmx_mdoutf
      int                            natoms_global;
      int                            natoms_x_compressed;
      const SimulationGroups*        groups; /* for compressed position writing */
-    gmx_wallcycle_t                wcycle;
+    gmx_wallcycle*                 wcycle;
      rvec*                          f_global;
      gmx::IMDOutputProvider*        outputProvider;
      const gmx::MDModulesNotifiers* mdModulesNotifiers;
@@ -110,7 +110,7 @@ gmx_mdoutf_t init_mdoutf(FILE*                          fplog,
                           const t_inputrec*              ir,
                           const gmx_mtop_t&              top_global,
                           const gmx_output_env_t*        oenv,
-                         gmx_wallcycle_t                wcycle,
+                         gmx_wallcycle*                 wcycle,
                           const gmx::StartingBehavior    startingBehavior,
                           bool                           simulationsShareState,
                           const gmx_multisim_t*          ms)
@@ -261,7 +261,7 @@ FILE* mdoutf_get_fp_dhdl(gmx_mdoutf_t of)
      return of->fp_dhdl;
  }
  
-gmx_wallcycle_t mdoutf_get_wcycle(gmx_mdoutf_t of)
+gmx_wallcycle* mdoutf_get_wcycle(gmx_mdoutf_t of)
  {
      return of->wcycle;
  }
@@ -753,10 +753,10 @@ void mdoutf_tng_close(gmx_mdoutf_t of)
  {
      if (of->tng || of->tng_low_prec)
      {
-        wallcycle_start(of->wcycle, ewcTRAJ);
+        wallcycle_start(of->wcycle, WallCycleCounter::Traj);
          gmx_tng_close(&of->tng);
          gmx_tng_close(&of->tng_low_prec);
-        wallcycle_stop(of->wcycle, ewcTRAJ);
+        wallcycle_stop(of->wcycle, WallCycleCounter::Traj);
      }
  }
  
diff --git a/src/gromacs/mdlib/mdoutf.h b/src/gromacs/mdlib/mdoutf.h

index 6461f9c74789c5ba0fdecad5b5d88aaee5901050..308f08cecc898d1b47be9ef1a074ceb720083248 100644 (file)
--- a/src/gromacs/mdlib/mdoutf.h
+++ b/src/gromacs/mdlib/mdoutf.h
@@ -79,7 +79,7 @@ gmx_mdoutf_t init_mdoutf(FILE*                          fplog,
                           const t_inputrec*              ir,
                           const gmx_mtop_t&              mtop,
                           const gmx_output_env_t*        oenv,
-                         gmx_wallcycle_t                wcycle,
+                         gmx_wallcycle*                 wcycle,
                           gmx::StartingBehavior          startingBehavior,
                           bool                           simulationsShareState,
                           const gmx_multisim_t*          ms);
@@ -91,7 +91,7 @@ ener_file_t mdoutf_get_fp_ene(gmx_mdoutf_t of);
  FILE* mdoutf_get_fp_dhdl(gmx_mdoutf_t of);
  
  /*! \brief Getter for wallcycle timer */
-gmx_wallcycle_t mdoutf_get_wcycle(gmx_mdoutf_t of);
+gmx_wallcycle* mdoutf_get_wcycle(gmx_mdoutf_t of);
  
  /*! \brief Close TNG files if they are open.
   *
diff --git a/src/gromacs/mdlib/resethandler.cpp b/src/gromacs/mdlib/resethandler.cpp

index 7e807061ac970663c1c93cc43519a220dbb447db..a471a6c828721422171c68b0347fd28556d380b3 100644 (file)
--- a/src/gromacs/mdlib/resethandler.cpp
+++ b/src/gromacs/mdlib/resethandler.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -80,7 +80,7 @@ ResetHandler::ResetHandler(compat::not_null<SimulationSignal*> signal,
                             bool                                resetHalfway,
                             real                                maximumHoursToRun,
                             const MDLogger&                     mdlog,
-                           gmx_wallcycle_t                     wcycle,
+                           gmx_wallcycle*                      wcycle,
                             gmx_walltime_accounting_t           walltime_accounting) :
      signal_(*signal),
      rankCanSetSignal_(false),
@@ -144,7 +144,7 @@ bool ResetHandler::resetCountersImpl(int64_t                     step,
                                       t_nrnb*                     nrnb,
                                       const gmx_pme_t*            pme,
                                       const pme_load_balancing_t* pme_loadbal,
-                                     gmx_wallcycle_t             wcycle,
+                                     gmx_wallcycle*              wcycle,
                                       gmx_walltime_accounting_t   walltime_accounting)
  {
      /* Reset either if signal has been passed, or if reset step has been reached */
@@ -194,14 +194,14 @@ bool ResetHandler::resetCountersImpl(int64_t                     step,
              resetGpuProfiler();
          }
  
-        wallcycle_stop(wcycle, ewcRUN);
+        wallcycle_stop(wcycle, WallCycleCounter::Run);
          wallcycle_reset_all(wcycle);
          if (DOMAINDECOMP(cr))
          {
              reset_dd_statistics_counters(cr->dd);
          }
          clear_nrnb(nrnb);
-        wallcycle_start(wcycle, ewcRUN);
+        wallcycle_start(wcycle, WallCycleCounter::Run);
          walltime_accounting_reset_time(walltime_accounting, step);
          print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime());
  
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 3da4bc541c493761f074a58523c5a34676cc65db..105245ebfdd6c037b8d3c7c331507bdfb11d5e07 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -193,7 +193,7 @@ static void pull_potential_wrapper(const t_commrec*               cr,
                                     pull_t*                        pull_work,
                                     const real*                    lambda,
                                     double                         t,
-                                   gmx_wallcycle_t                wcycle)
+                                   gmx_wallcycle*                 wcycle)
  {
      t_pbc pbc;
      real  dvdl;
@@ -201,7 +201,7 @@ static void pull_potential_wrapper(const t_commrec*               cr,
      /* Calculate the center of mass forces, this requires communication,
       * which is why pull_potential is called close to other communication.
       */
-    wallcycle_start(wcycle, ewcPULLPOT);
+    wallcycle_start(wcycle, WallCycleCounter::PullPot);
      set_pbc(&pbc, ir.pbcType, box);
      dvdl = 0;
      enerd->term[F_COM_PULL] +=
@@ -215,7 +215,7 @@ static void pull_potential_wrapper(const t_commrec*               cr,
                             force,
                             &dvdl);
      enerd->dvdl_lin[FreeEnergyPerturbationCouplingType::Restraint] += dvdl;
-    wallcycle_stop(wcycle, ewcPULLPOT);
+    wallcycle_stop(wcycle, WallCycleCounter::PullPot);
  }
  
  static void pme_receive_force_ener(t_forcerec*           fr,
@@ -224,18 +224,18 @@ static void pme_receive_force_ener(t_forcerec*           fr,
                                     gmx_enerdata_t*       enerd,
                                     bool                  useGpuPmePpComms,
                                     bool                  receivePmeForceToGpu,
-                                   gmx_wallcycle_t       wcycle)
+                                   gmx_wallcycle*        wcycle)
  {
      real  e_q, e_lj, dvdl_q, dvdl_lj;
      float cycles_ppdpme, cycles_seppme;
  
-    cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
+    cycles_ppdpme = wallcycle_stop(wcycle, WallCycleCounter::PpDuringPme);
      dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
  
      /* In case of node-splitting, the PP nodes receive the long-range
       * forces, virial and energy from the PME nodes here.
       */
-    wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
+    wallcycle_start(wcycle, WallCycleCounter::PpPmeWaitRecvF);
      dvdl_q  = 0;
      dvdl_lj = 0;
      gmx_pme_receive_f(fr->pmePpCommGpu.get(),
@@ -257,7 +257,7 @@ static void pme_receive_force_ener(t_forcerec*           fr,
      {
          dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
      }
-    wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
+    wallcycle_stop(wcycle, WallCycleCounter::PpPmeWaitRecvF);
  }
  
  static void print_large_forces(FILE*                fp,
@@ -302,7 +302,7 @@ static void print_large_forces(FILE*                fp,
  
  //! When necessary, spreads forces on vsites and computes the virial for \p forceOutputs->forceWithShiftForces()
  static void postProcessForceWithShiftForces(t_nrnb*                   nrnb,
-                                            gmx_wallcycle_t           wcycle,
+                                            gmx_wallcycle*            wcycle,
                                              const matrix              box,
                                              ArrayRef<const RVec>      x,
                                              ForceOutputs*             forceOutputs,
@@ -342,7 +342,7 @@ static void postProcessForceWithShiftForces(t_nrnb*                   nrnb,
  static void postProcessForces(const t_commrec*          cr,
                                int64_t                   step,
                                t_nrnb*                   nrnb,
-                              gmx_wallcycle_t           wcycle,
+                              gmx_wallcycle*            wcycle,
                                const matrix              box,
                                ArrayRef<const RVec>      x,
                                ForceOutputs*             forceOutputs,
@@ -417,7 +417,7 @@ static void do_nb_verlet(t_forcerec*                fr,
                           const int                  clearF,
                           const int64_t              step,
                           t_nrnb*                    nrnb,
-                         gmx_wallcycle_t            wcycle)
+                         gmx_wallcycle*             wcycle)
  {
      if (!stepWork.computeNonbondedForces)
      {
@@ -438,9 +438,9 @@ static void do_nb_verlet(t_forcerec*                fr,
              /* Prune the pair-list beyond fr->ic->rlistPrune using
               * the current coordinates of the atoms.
               */
-            wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NonbondedPruning);
              nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec);
-            wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NonbondedPruning);
          }
      }
  
@@ -624,7 +624,7 @@ static void computeSpecialForces(FILE*                          fplog,
                                   pull_t*                        pull_work,
                                   int64_t                        step,
                                   double                         t,
-                                 gmx_wallcycle_t                wcycle,
+                                 gmx_wallcycle*                 wcycle,
                                   gmx::ForceProviders*           forceProviders,
                                   const matrix                   box,
                                   gmx::ArrayRef<const gmx::RVec> x,
@@ -697,10 +697,10 @@ static void computeSpecialForces(FILE*                          fplog,
      /* Add the forces from enforced rotation potentials (if any) */
      if (inputrec.bRot)
      {
-        wallcycle_start(wcycle, ewcROTadd);
+        wallcycle_start(wcycle, WallCycleCounter::RotAdd);
          enerd->term[F_COM_PULL] +=
                  add_rot_forces(enforcedRotation, forceWithVirialMtsLevel0->force_, cr, step, t);
-        wallcycle_stop(wcycle, ewcROTadd);
+        wallcycle_stop(wcycle, WallCycleCounter::RotAdd);
      }
  
      if (ed)
@@ -734,7 +734,7 @@ static inline void launchPmeGpuSpread(gmx_pme_t*            pmedata,
                                        const StepWorkload&   stepWork,
                                        GpuEventSynchronizer* xReadyOnDevice,
                                        const real            lambdaQ,
-                                      gmx_wallcycle_t       wcycle)
+                                      gmx_wallcycle*        wcycle)
  {
      pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
      pme_gpu_launch_spread(pmedata, xReadyOnDevice, wcycle, lambdaQ);
@@ -751,7 +751,7 @@ static inline void launchPmeGpuSpread(gmx_pme_t*            pmedata,
   */
  static void launchPmeGpuFftAndGather(gmx_pme_t*               pmedata,
                                       const real               lambdaQ,
-                                     gmx_wallcycle_t          wcycle,
+                                     gmx_wallcycle*           wcycle,
                                       const gmx::StepWorkload& stepWork)
  {
      pme_gpu_launch_complex_transforms(pmedata, wcycle, stepWork);
@@ -783,7 +783,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t* nbv,
                                          gmx_enerdata_t*     enerd,
                                          const real          lambdaQ,
                                          const StepWorkload& stepWork,
-                                        gmx_wallcycle_t     wcycle)
+                                        gmx_wallcycle*      wcycle)
  {
      bool isPmeGpuDone = false;
      bool isNbGpuDone  = false;
@@ -839,9 +839,9 @@ static ForceOutputs setupForceOutputs(ForceHelperBuffers*                 forceH
                                        const DomainLifetimeWorkload&       domainWork,
                                        const StepWorkload&                 stepWork,
                                        const bool                          havePpDomainDecomposition,
-                                      gmx_wallcycle_t                     wcycle)
+                                      gmx_wallcycle*                      wcycle)
  {
-    wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::ClearForceBuffer);
  
      /* NOTE: We assume fr->shiftForces is all zeros here */
      gmx::ForceWithShiftForces forceWithShiftForces(
@@ -882,7 +882,7 @@ static ForceOutputs setupForceOutputs(ForceHelperBuffers*                 forceH
          clearRVecs(forceWithVirial.force_, true);
      }
  
-    wallcycle_sub_stop(wcycle, ewcsCLEAR_FORCE_BUFFER);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::ClearForceBuffer);
  
      return ForceOutputs(
              forceWithShiftForces, forceHelperBuffers->haveDirectVirialContributions(), forceWithVirial);
@@ -992,7 +992,7 @@ static void launchGpuEndOfStepTasks(nonbonded_verlet_t*               nbv,
                                      const gmx::MdrunScheduleWorkload& runScheduleWork,
                                      bool                              useGpuPmeOnThisRank,
                                      int64_t                           step,
-                                    gmx_wallcycle_t                   wcycle)
+                                    gmx_wallcycle*                    wcycle)
  {
      if (runScheduleWork.simulationWork.useGpuNonbonded && runScheduleWork.stepWork.computeNonbondedForces)
      {
@@ -1006,11 +1006,11 @@ static void launchGpuEndOfStepTasks(nonbonded_verlet_t*               nbv,
          }
  
          /* now clear the GPU outputs while we finish the step on the CPU */
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
          Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
      }
  
      if (useGpuPmeOnThisRank)
@@ -1195,7 +1195,7 @@ void do_force(FILE*                               fplog,
                pull_t*                             pull_work,
                int64_t                             step,
                t_nrnb*                             nrnb,
-              gmx_wallcycle_t                     wcycle,
+              gmx_wallcycle*                      wcycle,
                const gmx_localtop_t*               top,
                const matrix                        box,
                gmx::ArrayRefWithPadding<gmx::RVec> x,
@@ -1374,12 +1374,12 @@ void do_force(FILE*                               fplog,
              fr->wholeMoleculeTransform->updateForAtomPbcJumps(x.unpaddedArrayRef(), box);
          }
  
-        wallcycle_start(wcycle, ewcNS);
+        wallcycle_start(wcycle, WallCycleCounter::NS);
          if (!DOMAINDECOMP(cr))
          {
              const rvec vzero       = { 0.0_real, 0.0_real, 0.0_real };
              const rvec boxDiagonal = { box[XX][XX], box[YY][YY], box[ZZ][ZZ] };
-            wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSGridLocal);
              nbnxn_put_on_grid(nbv,
                                box,
                                0,
@@ -1392,30 +1392,30 @@ void do_force(FILE*                               fplog,
                                x.unpaddedArrayRef(),
                                0,
                                nullptr);
-            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSGridLocal);
          }
          else
          {
-            wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSGridNonLocal);
              nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd), fr->cginfo, x.unpaddedArrayRef());
-            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSGridNonLocal);
          }
  
          nbv->setAtomProperties(gmx::constArrayRefFromArray(mdatoms->typeA, mdatoms->nr),
                                 gmx::constArrayRefFromArray(mdatoms->chargeA, mdatoms->nr),
                                 fr->cginfo);
  
-        wallcycle_stop(wcycle, ewcNS);
+        wallcycle_stop(wcycle, WallCycleCounter::NS);
  
          /* initialize the GPU nbnxm atom data and bonded data structures */
          if (simulationWork.useGpuNonbonded)
          {
              // Note: cycle counting only nononbondeds, gpuBonded counts internally
-            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-            wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+            wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
              Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
-            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+            wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
  
              if (fr->gpuBonded)
              {
@@ -1440,15 +1440,15 @@ void do_force(FILE*                               fplog,
          runScheduleWork->domainWork = setupDomainLifetimeWorkload(
                  inputrec, *fr, pull_work, ed, *mdatoms, simulationWork, stepWork);
  
-        wallcycle_start_nocount(wcycle, ewcNS);
-        wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::NS);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSSearchLocal);
          /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
          nbv->constructPairlist(InteractionLocality::Local, top->excls, step, nrnb);
  
          nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local);
  
-        wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
-        wallcycle_stop(wcycle, ewcNS);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchLocal);
+        wallcycle_stop(wcycle, WallCycleCounter::NS);
  
          if (stepWork.useGpuXBufferOps)
          {
@@ -1484,15 +1484,15 @@ void do_force(FILE*                               fplog,
      {
          ddBalanceRegionHandler.openBeforeForceComputationGpu();
  
-        wallcycle_start(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
          Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
          if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
          {
              Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local);
          }
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
          // with X buffer ops offloaded to the GPU on all but the search steps
  
          // bonded work not split into separate local and non-local, so with DD
@@ -1503,11 +1503,11 @@ void do_force(FILE*                               fplog,
          }
  
          /* launch local nonbonded work on GPU */
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
          do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
      }
  
      if (useGpuPmeOnThisRank)
@@ -1529,14 +1529,14 @@ void do_force(FILE*                               fplog,
          if (stepWork.doNeighborSearch)
          {
              // TODO: fuse this branch with the above large stepWork.doNeighborSearch block
-            wallcycle_start_nocount(wcycle, ewcNS);
-            wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::NS);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSSearchNonLocal);
              /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
              nbv->constructPairlist(InteractionLocality::NonLocal, top->excls, step, nrnb);
  
              nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal);
-            wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-            wallcycle_stop(wcycle, ewcNS);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchNonLocal);
+            wallcycle_stop(wcycle, WallCycleCounter::NS);
              // TODO refactor this GPU halo exchange re-initialisation
              // to location in do_md where GPU halo exchange is
              // constructed at partitioning, after above stateGpu
@@ -1593,11 +1593,11 @@ void do_force(FILE*                               fplog,
  
              if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
              {
-                wallcycle_start(wcycle, ewcLAUNCH_GPU);
-                wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+                wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
+                wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
                  Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::NonLocal);
-                wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-                wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+                wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+                wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
              }
  
              if (domainWork.haveGpuBondedWork)
@@ -1606,32 +1606,32 @@ void do_force(FILE*                               fplog,
              }
  
              /* launch non-local nonbonded tasks on GPU */
-            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-            wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
              do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle);
-            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+            wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
          }
      }
  
      if (simulationWork.useGpuNonbonded && stepWork.computeNonbondedForces)
      {
          /* launch D2H copy-back F */
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
  
          if (havePPDomainDecomposition(cr))
          {
              Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::NonLocal);
          }
          Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::Local);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
  
          if (domainWork.haveGpuBondedWork && stepWork.computeEnergy)
          {
              fr->gpuBonded->launchEnergyTransfer();
          }
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
      }
  
      gmx::ArrayRef<const gmx::RVec> xWholeMolecules;
@@ -1676,7 +1676,7 @@ void do_force(FILE*                               fplog,
  
      if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME))
      {
-        wallcycle_start(wcycle, ewcPPDURINGPME);
+        wallcycle_start(wcycle, WallCycleCounter::PpDuringPme);
          dd_force_flop_start(cr->dd, nrnb);
      }
  
@@ -1691,15 +1691,15 @@ void do_force(FILE*                               fplog,
  
      if (inputrec.bRot)
      {
-        wallcycle_start(wcycle, ewcROT);
+        wallcycle_start(wcycle, WallCycleCounter::Rot);
          do_rotation(cr, enforcedRotation, box, x.unpaddedConstArrayRef(), t, step, stepWork.doNeighborSearch);
-        wallcycle_stop(wcycle, ewcROT);
+        wallcycle_stop(wcycle, WallCycleCounter::Rot);
      }
  
      /* Start the force cycle counter.
       * Note that a different counter is used for dynamic load balancing.
       */
-    wallcycle_start(wcycle, ewcFORCE);
+    wallcycle_start(wcycle, WallCycleCounter::Force);
  
      /* Set up and clear force outputs:
       * forceOutMtsLevel0:  everything except what is in the other two outputs
@@ -1799,10 +1799,10 @@ void do_force(FILE*                               fplog,
               * This can be split into a local and a non-local part when overlapping
               * communication with calculation with domain decomposition.
               */
-            wallcycle_stop(wcycle, ewcFORCE);
+            wallcycle_stop(wcycle, WallCycleCounter::Force);
              nbv->atomdata_add_nbat_f_to_f(AtomLocality::All,
                                            forceOutNonbonded->forceWithShiftForces().force());
-            wallcycle_start_nocount(wcycle, ewcFORCE);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
          }
  
          /* If there are multiple fshift output buffers we need to reduce them */
@@ -1818,10 +1818,10 @@ void do_force(FILE*                               fplog,
      // TODO Force flags should include haveFreeEnergyWork for this domain
      if (stepWork.useGpuXHalo && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
      {
-        wallcycle_stop(wcycle, ewcFORCE);
+        wallcycle_stop(wcycle, WallCycleCounter::Force);
          /* Wait for non-local coordinate data to be copied from device */
          stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal);
-        wallcycle_start_nocount(wcycle, ewcFORCE);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
      }
  
      // Compute wall interactions, when present.
@@ -1906,7 +1906,7 @@ void do_force(FILE*                               fplog,
                                       ddBalanceRegionHandler);
      }
  
-    wallcycle_stop(wcycle, ewcFORCE);
+    wallcycle_stop(wcycle, WallCycleCounter::Force);
  
      // VdW dispersion correction, only computed on master rank to avoid double counting
      if ((stepWork.computeEnergy || stepWork.computeVirial) && fr->dispersionCorrection && MASTER(cr))
@@ -1983,10 +1983,10 @@ void do_force(FILE*                               fplog,
              }
              else
              {
-                wallcycle_start_nocount(wcycle, ewcFORCE);
+                wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
                  do_nb_verlet(
                          fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes, step, nrnb, wcycle);
-                wallcycle_stop(wcycle, ewcFORCE);
+                wallcycle_stop(wcycle, WallCycleCounter::Force);
              }
  
              if (stepWork.useGpuFBufferOps)
@@ -2152,7 +2152,7 @@ void do_force(FILE*                               fplog,
      {
          // NOTE: emulation kernel is not included in the balancing region,
          // but emulation mode does not target performance anyway
-        wallcycle_start_nocount(wcycle, ewcFORCE);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
          do_nb_verlet(fr,
                       ic,
                       enerd,
@@ -2162,7 +2162,7 @@ void do_force(FILE*                               fplog,
                       step,
                       nrnb,
                       wcycle);
-        wallcycle_stop(wcycle, ewcFORCE);
+        wallcycle_stop(wcycle, WallCycleCounter::Force);
      }
  
      // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
diff --git a/src/gromacs/mdlib/trajectory_writing.cpp b/src/gromacs/mdlib/trajectory_writing.cpp

index a7787ea97d5430c1a65ab376a11aeb4070411464..53f1e7ff6ddfc6dfc9b517ffbc64cc4f6f6d269b 100644 (file)
--- a/src/gromacs/mdlib/trajectory_writing.cpp
+++ b/src/gromacs/mdlib/trajectory_writing.cpp
@@ -121,7 +121,7 @@ void do_md_trajectory_writing(FILE*                          fplog,
  
      if (mdof_flags != 0)
      {
-        wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ);
+        wallcycle_start(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
          if (bCPT)
          {
              if (MASTER(cr))
@@ -191,7 +191,7 @@ void do_md_trajectory_writing(FILE*                          fplog,
                  sfree(x_for_confout);
              }
          }
-        wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
+        wallcycle_stop(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
      }
  #if GMX_FAHCORE
      if (MASTER(cr))
diff --git a/src/gromacs/mdlib/update.cpp b/src/gromacs/mdlib/update.cpp

index 14f03826edd06cbd0d758faa22887f166ca775e5..a2876a3123240d047142a32b83da41582d48cce3 100644 (file)
--- a/src/gromacs/mdlib/update.cpp
+++ b/src/gromacs/mdlib/update.cpp
@@ -139,7 +139,7 @@ public:
      void finish_update(const t_inputrec& inputRecord,
                         const t_mdatoms*  md,
                         t_state*          state,
-                       gmx_wallcycle_t   wcycle,
+                       gmx_wallcycle*    wcycle,
                         bool              haveConstraints);
  
      void update_sd_second_half(const t_inputrec&                 inputRecord,
@@ -151,7 +151,7 @@ public:
                                 t_state*                          state,
                                 const t_commrec*                  cr,
                                 t_nrnb*                           nrnb,
-                               gmx_wallcycle_t                   wcycle,
+                               gmx_wallcycle*                    wcycle,
                                 gmx::Constraints*                 constr,
                                 bool                              do_log,
                                 bool                              do_ene);
@@ -246,7 +246,7 @@ void Update::update_coords(const t_inputrec&                                inpu
  void Update::finish_update(const t_inputrec& inputRecord,
                             const t_mdatoms*  md,
                             t_state*          state,
-                           gmx_wallcycle_t   wcycle,
+                           gmx_wallcycle*    wcycle,
                             const bool        haveConstraints)
  {
      return impl_->finish_update(inputRecord, md, state, wcycle, haveConstraints);
@@ -259,7 +259,7 @@ void Update::update_sd_second_half(const t_inputrec& inputRecord,
                                     t_state*          state,
                                     const t_commrec*  cr,
                                     t_nrnb*           nrnb,
-                                   gmx_wallcycle_t   wcycle,
+                                   gmx_wallcycle*    wcycle,
                                     gmx::Constraints* constr,
                                     bool              do_log,
                                     bool              do_ene)
@@ -1394,7 +1394,7 @@ void Update::Impl::update_sd_second_half(const t_inputrec&                 input
                                           t_state*                          state,
                                           const t_commrec*                  cr,
                                           t_nrnb*                           nrnb,
-                                         gmx_wallcycle_t                   wcycle,
+                                         gmx_wallcycle*                    wcycle,
                                           gmx::Constraints*                 constr,
                                           bool                              do_log,
                                           bool                              do_ene)
@@ -1415,7 +1415,7 @@ void Update::Impl::update_sd_second_half(const t_inputrec&                 input
           */
          real dt = inputRecord.delta_t;
  
-        wallcycle_start(wcycle, ewcUPDATE);
+        wallcycle_start(wcycle, WallCycleCounter::Update);
  
          int nth = gmx_omp_nthreads_get(emntUpdate);
  
@@ -1448,7 +1448,7 @@ void Update::Impl::update_sd_second_half(const t_inputrec&                 input
              GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
          }
          inc_nrnb(nrnb, eNR_UPDATE, homenr);
-        wallcycle_stop(wcycle, ewcUPDATE);
+        wallcycle_stop(wcycle, WallCycleCounter::Update);
  
          /* Constrain the coordinates upd->xp for half a time step */
          bool computeVirial = false;
@@ -1473,14 +1473,14 @@ void Update::Impl::update_sd_second_half(const t_inputrec&                 input
  void Update::Impl::finish_update(const t_inputrec& inputRecord,
                                   const t_mdatoms*  md,
                                   t_state*          state,
-                                 gmx_wallcycle_t   wcycle,
+                                 gmx_wallcycle*    wcycle,
                                   const bool        haveConstraints)
  {
      /* NOTE: Currently we always integrate to a temporary buffer and
       * then copy the results back here.
       */
  
-    wallcycle_start_nocount(wcycle, ewcUPDATE);
+    wallcycle_start_nocount(wcycle, WallCycleCounter::Update);
  
      const int homenr = md->homenr;
      auto      xp     = makeConstArrayRef(xp_).subArray(0, homenr);
@@ -1521,7 +1521,7 @@ void Update::Impl::finish_update(const t_inputrec& inputRecord,
          }
      }
  
-    wallcycle_stop(wcycle, ewcUPDATE);
+    wallcycle_stop(wcycle, WallCycleCounter::Update);
  }
  
  void Update::Impl::update_coords(const t_inputrec&                 inputRecord,
diff --git a/src/gromacs/mdlib/update.h b/src/gromacs/mdlib/update.h

index 10a93faa51871728db8f856be1f8f56326e0b88a..5992a69e45b9b96aad000803a9e1be70f143bcf7 100644 (file)
--- a/src/gromacs/mdlib/update.h
+++ b/src/gromacs/mdlib/update.h
@@ -144,7 +144,7 @@ public:
      void finish_update(const t_inputrec& inputRecord,
                         const t_mdatoms*  md,
                         t_state*          state,
-                       gmx_wallcycle_t   wcycle,
+                       gmx_wallcycle*    wcycle,
                         bool              haveConstraints);
  
      /*! \brief Secong part of the SD integrator.
@@ -172,7 +172,7 @@ public:
                                 t_state*          state,
                                 const t_commrec*  cr,
                                 t_nrnb*           nrnb,
-                               gmx_wallcycle_t   wcycle,
+                               gmx_wallcycle*    wcycle,
                                 gmx::Constraints* constr,
                                 bool              do_log,
                                 bool              do_ene);
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu

index 5fc4fb86092938a64cfcd6687904792916728b6e..5a950d34e5f1f559072cc0cc519d13ffc01c7901 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu
@@ -109,8 +109,8 @@ void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer*             fRead
                                           const float                       dtPressureCouple,
                                           const matrix                      prVelocityScalingMatrix)
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
  
      // Clearing virial matrix
      // TODO There is no point in having separate virial matrix for constraints
@@ -141,16 +141,16 @@ void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer*             fRead
  
      coordinatesReady_->markEvent(deviceStream_);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  
      return;
  }
  
  void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
  
      ScalingMatrix mu(scalingMatrix);
  
@@ -166,14 +166,14 @@ void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
      //       can affect the performance if nstpcouple is small.
      deviceStream_.synchronize();
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
  
      ScalingMatrix mu(scalingMatrix);
  
@@ -189,8 +189,8 @@ void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
      //       can affect the performance if nstpcouple is small.
      deviceStream_.synchronize();
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  UpdateConstrainGpu::Impl::Impl(const t_inputrec&     ir,
@@ -227,8 +227,8 @@ void UpdateConstrainGpu::Impl::set(DeviceBuffer<Float3>          d_x,
                                     const t_mdatoms&              md)
  {
      // TODO wallcycle
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
  
      GMX_ASSERT(d_x != nullptr, "Coordinates device buffer should not be null.");
      GMX_ASSERT(d_v != nullptr, "Velocities device buffer should not be null.");
@@ -253,8 +253,8 @@ void UpdateConstrainGpu::Impl::set(DeviceBuffer<Float3>          d_x,
      coordinateScalingKernelLaunchConfig_.gridSize[0] =
              (numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void UpdateConstrainGpu::Impl::setPbc(const PbcType pbcType, const matrix box)
diff --git a/src/gromacs/mdlib/update_vv.cpp b/src/gromacs/mdlib/update_vv.cpp

index 76334de0c5d9c7c07b390115f2a97b617f3fa8c0..0db7c2479194dee70833ae2117f5c9674f7ca43a 100644 (file)
--- a/src/gromacs/mdlib/update_vv.cpp
+++ b/src/gromacs/mdlib/update_vv.cpp
@@ -119,7 +119,7 @@ void integrateVVFirstStep(int64_t                                  step,
          /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
          rvec* vbuf = nullptr;
  
-        wallcycle_start(wcycle, ewcUPDATE);
+        wallcycle_start(wcycle, WallCycleCounter::Update);
          if (ir->eI == IntegrationAlgorithm::VV && bInitStep)
          {
              /* if using velocity verlet with full time step Ekin,
@@ -140,9 +140,9 @@ void integrateVVFirstStep(int64_t                                  step,
          upd->update_coords(
                  *ir, step, mdatoms, state, f->view().forceWithPadding(), fcdata, ekind, M, etrtVELOCITY1, cr, constr != nullptr);
  
-        wallcycle_stop(wcycle, ewcUPDATE);
+        wallcycle_stop(wcycle, WallCycleCounter::Update);
          constrain_velocities(constr, do_log, do_ene, step, state, nullptr, bCalcVir, shake_vir);
-        wallcycle_start(wcycle, ewcUPDATE);
+        wallcycle_start(wcycle, WallCycleCounter::Update);
          /* if VV, compute the pressure and constraints */
          /* For VV2, we strictly only need this if using pressure
           * control, but we really would like to have accurate pressures
@@ -163,7 +163,7 @@ void integrateVVFirstStep(int64_t                                  step,
              So we need information from the last step in the first half of the integration */
          if (bGStat || do_per_step(step - 1, nstglobalcomm))
          {
-            wallcycle_stop(wcycle, ewcUPDATE);
+            wallcycle_stop(wcycle, WallCycleCounter::Update);
              int cglo_flags =
                      ((bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
                       | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
@@ -212,7 +212,7 @@ void integrateVVFirstStep(int64_t                                  step,
                          fplog, vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
                  inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
              }
-            wallcycle_start(wcycle, ewcUPDATE);
+            wallcycle_start(wcycle, WallCycleCounter::Update);
          }
          /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
          if (!bInitStep)
@@ -241,7 +241,7 @@ void integrateVVFirstStep(int64_t                                  step,
              }
              else if (bExchanged)
              {
-                wallcycle_stop(wcycle, ewcUPDATE);
+                wallcycle_stop(wcycle, WallCycleCounter::Update);
                  /* We need the kinetic energy at minus the half step for determining
                   * the full step kinetic energy and possibly for T-coupling.*/
                  /* This may not be quite working correctly yet . . . . */
@@ -267,7 +267,7 @@ void integrateVVFirstStep(int64_t                                  step,
                                  state->box,
                                  bSumEkinhOld,
                                  CGLO_GSTAT | CGLO_TEMPERATURE);
-                wallcycle_start(wcycle, ewcUPDATE);
+                wallcycle_start(wcycle, WallCycleCounter::Update);
              }
          }
          /* if it's the initial step, we performed this first step just to get the constraint virial */
@@ -276,7 +276,7 @@ void integrateVVFirstStep(int64_t                                  step,
              copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
              sfree(vbuf);
          }
-        wallcycle_stop(wcycle, ewcUPDATE);
+        wallcycle_stop(wcycle, WallCycleCounter::Update);
      }
  
      /* compute the conserved quantity */
@@ -355,7 +355,7 @@ void integrateVVSecondStep(int64_t                                  step,
      upd->update_coords(
              *ir, step, mdatoms, state, f->view().forceWithPadding(), fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
  
-    wallcycle_stop(wcycle, ewcUPDATE);
+    wallcycle_stop(wcycle, WallCycleCounter::Update);
  
      constrain_coordinates(
              constr, do_log, do_ene, step, state, upd->xp()->arrayRefWithPadding(), dvdl_constr, bCalcVir, shake_vir);
@@ -390,14 +390,14 @@ void integrateVVSecondStep(int64_t                                  step,
                          lastbox,
                          bSumEkinhOld,
                          (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
-        wallcycle_start(wcycle, ewcUPDATE);
+        wallcycle_start(wcycle, WallCycleCounter::Update);
          trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, MassQ, trotter_seq, ettTSEQ4);
          /* now we know the scaling, we can compute the positions again */
          std::copy(cbuf->begin(), cbuf->end(), state->x.begin());
  
          upd->update_coords(
                  *ir, step, mdatoms, state, f->view().forceWithPadding(), fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
-        wallcycle_stop(wcycle, ewcUPDATE);
+        wallcycle_stop(wcycle, WallCycleCounter::Update);
  
          /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
          /* are the small terms in the shake_vir here due
diff --git a/src/gromacs/mdlib/vsite.cpp b/src/gromacs/mdlib/vsite.cpp

index 6dd3c6f104db804e0911330d095b7c9c44a88cc8..9be178c243ddc82bbde88639c1c54b1df3f651ad 100644 (file)
--- a/src/gromacs/mdlib/vsite.cpp
+++ b/src/gromacs/mdlib/vsite.cpp
@@ -2283,7 +2283,7 @@ void VirtualSitesHandler::Impl::spreadForces(ArrayRef<const RVec> x,
                                               const matrix         box,
                                               gmx_wallcycle*       wcycle)
  {
-    wallcycle_start(wcycle, ewcVSITESPREAD);
+    wallcycle_start(wcycle, WallCycleCounter::VsiteSpread);
  
      const bool useDomdec = domainInfo_.useDomdec();
  
@@ -2477,7 +2477,7 @@ void VirtualSitesHandler::Impl::spreadForces(ArrayRef<const RVec> x,
      inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(ilists_, F_VSITE4FDN));
      inc_nrnb(nrnb, eNR_VSITEN, vsite_count(ilists_, F_VSITEN));
  
-    wallcycle_stop(wcycle, ewcVSITESPREAD);
+    wallcycle_stop(wcycle, WallCycleCounter::VsiteSpread);
  }
  
  /*! \brief Returns the an array with group indices for each atom
diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp

index e398e7693ee77aef4699e453d981c6340636751a..ecb0778652dad663adeba59e78eb2e7bbced75bd 100644 (file)
--- a/src/gromacs/mdrun/md.cpp
+++ b/src/gromacs/mdrun/md.cpp
@@ -796,7 +796,7 @@ void gmx::LegacySimulator::do_md()
      }
  
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, "mdrun");
  
      /***********************************************************
@@ -890,7 +890,7 @@ void gmx::LegacySimulator::do_md()
                             simulationWork.useGpuPmePpCommunication);
          }
  
-        wallcycle_start(wcycle, ewcSTEP);
+        wallcycle_start(wcycle, WallCycleCounter::Step);
  
          bLastStep = (step_rel == ir->nsteps);
          t         = t0 + step * ir->delta_t;
@@ -962,7 +962,7 @@ void gmx::LegacySimulator::do_md()
          if (vsite != nullptr)
          {
              // Virtual sites need to be updated before domain decomposition and forces are calculated
-            wallcycle_start(wcycle, ewcVSITECONSTR);
+            wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
              // md-vv calculates virtual velocities once it has full-step real velocities
              vsite->construct(state->x,
                               state->v,
@@ -970,7 +970,7 @@ void gmx::LegacySimulator::do_md()
                               (!EI_VV(inputrec->eI) && needVirtualVelocitiesThisStep)
                                       ? VSiteOperation::PositionsAndVelocities
                                       : VSiteOperation::Positions);
-            wallcycle_stop(wcycle, ewcVSITECONSTR);
+            wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
          }
  
          if (bNS && !(bFirstStep && ir->bContinuation))
@@ -1264,9 +1264,9 @@ void gmx::LegacySimulator::do_md()
              if (vsite != nullptr && needVirtualVelocitiesThisStep)
              {
                  // Positions were calculated earlier
-                wallcycle_start(wcycle, ewcVSITECONSTR);
+                wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
                  vsite->construct(state->x, state->v, state->box, VSiteOperation::Velocities);
-                wallcycle_stop(wcycle, ewcVSITECONSTR);
+                wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
              }
          }
  
@@ -1407,7 +1407,7 @@ void gmx::LegacySimulator::do_md()
  
          if (!useGpuForUpdate)
          {
-            wallcycle_start(wcycle, ewcUPDATE);
+            wallcycle_start(wcycle, WallCycleCounter::Update);
          }
          /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
          if (bTrotter)
@@ -1564,7 +1564,7 @@ void gmx::LegacySimulator::do_md()
                  upd.update_coords(
                          *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
  
-                wallcycle_stop(wcycle, ewcUPDATE);
+                wallcycle_stop(wcycle, WallCycleCounter::Update);
  
                  constrain_coordinates(constr,
                                        do_log,
@@ -1934,7 +1934,7 @@ void gmx::LegacySimulator::do_md()
              rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
          }
  
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
+        cycles = wallcycle_stop(wcycle, WallCycleCounter::Step);
          if (DOMAINDECOMP(cr) && wcycle)
          {
              dd_cycles_add(cr->dd, cycles, ddCyclStep);
diff --git a/src/gromacs/mdrun/mimic.cpp b/src/gromacs/mdrun/mimic.cpp

index d5403f19454b787b670876be4ca5b487e2e948cc..25feef03cd6163637adb110a880d630486b97d55 100644 (file)
--- a/src/gromacs/mdrun/mimic.cpp
+++ b/src/gromacs/mdrun/mimic.cpp
@@ -387,7 +387,7 @@ void gmx::LegacySimulator::do_mimic()
      }
  
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, "mdrun");
  
      /***********************************************************
@@ -438,7 +438,7 @@ void gmx::LegacySimulator::do_mimic()
      while (!isLastStep)
      {
          isLastStep = (isLastStep || (ir->nsteps >= 0 && step_rel == ir->nsteps));
-        wallcycle_start(wcycle, ewcSTEP);
+        wallcycle_start(wcycle, WallCycleCounter::Step);
  
          t = step;
  
@@ -464,9 +464,9 @@ void gmx::LegacySimulator::do_mimic()
              }
              if (constructVsites)
              {
-                wallcycle_start(wcycle, ewcVSITECONSTR);
+                wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
                  vsite->construct(state->x, state->v, state->box, VSiteOperation::PositionsAndVelocities);
-                wallcycle_stop(wcycle, ewcVSITECONSTR);
+                wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
              }
          }
  
@@ -756,7 +756,7 @@ void gmx::LegacySimulator::do_mimic()
              print_time(stderr, walltime_accounting, step, ir, cr);
          }
  
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
+        cycles = wallcycle_stop(wcycle, WallCycleCounter::Step);
          if (DOMAINDECOMP(cr) && wcycle)
          {
              dd_cycles_add(cr->dd, cycles, ddCyclStep);
diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp

index e564223b81cfafb6fde6eb33b88b9495f10db50c..2725353d5c0cf8bba0a6db93c12450eb1e4e2b72 100644 (file)
--- a/src/gromacs/mdrun/minimize.cpp
+++ b/src/gromacs/mdrun/minimize.cpp
@@ -140,18 +140,18 @@ typedef struct em_state
  static void print_em_start(FILE*                     fplog,
                             const t_commrec*          cr,
                             gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
+                           gmx_wallcycle*            wcycle,
                             const char*               name)
  {
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, name);
  }
  
  //! Stop counting time for EM
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle)
+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle* wcycle)
  {
-    wallcycle_stop(wcycle, ewcRUN);
+    wallcycle_stop(wcycle, WallCycleCounter::Run);
  
      walltime_accounting_end_time(walltime_accounting);
  }
@@ -516,7 +516,7 @@ static void init_em(FILE*                fplog,
  static void finish_em(const t_commrec*          cr,
                        gmx_mdoutf_t              outf,
                        gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t           wcycle)
+                      gmx_wallcycle*            wcycle)
  {
      if (!thisRankHasDuty(cr, DUTY_PME))
      {
@@ -793,7 +793,7 @@ static void em_dd_partition_system(FILE*                fplog,
                                     VirtualSitesHandler* vsite,
                                     gmx::Constraints*    constr,
                                     t_nrnb*              nrnb,
-                                   gmx_wallcycle_t      wcycle)
+                                   gmx_wallcycle*       wcycle)
  {
      /* Repartition the domain decomposition */
      dd_partition_system(fplog,
@@ -915,7 +915,7 @@ public:
      //! Manages flop accounting.
      t_nrnb* nrnb;
      //! Manages wall cycle accounting.
-    gmx_wallcycle_t wcycle;
+    gmx_wallcycle* wcycle;
      //! Coordinates global reduction.
      gmx_global_stat_t gstat;
      //! Handles virtual sites.
@@ -1026,7 +1026,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
      /* Communicate stuff when parallel */
      if (PAR(cr) && inputrec->eI != IntegrationAlgorithm::NM)
      {
-        wallcycle_start(wcycle, ewcMoveE);
+        wallcycle_start(wcycle, WallCycleCounter::MoveE);
  
          global_stat(*gstat,
                      cr,
@@ -1041,7 +1041,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
                      FALSE,
                      CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
  
-        wallcycle_stop(wcycle, ewcMoveE);
+        wallcycle_stop(wcycle, WallCycleCounter::MoveE);
      }
  
      if (fr->dispersionCorrection)
diff --git a/src/gromacs/mdrun/rerun.cpp b/src/gromacs/mdrun/rerun.cpp

index e1234ade76079ca051257b9d52b6ab225868e260..157d18e55bcf548dc19d4bcf9731587ae35c659a 100644 (file)
--- a/src/gromacs/mdrun/rerun.cpp
+++ b/src/gromacs/mdrun/rerun.cpp
@@ -431,7 +431,7 @@ void gmx::LegacySimulator::do_rerun()
      }
  
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, "mdrun");
  
      /***********************************************************
@@ -528,7 +528,7 @@ void gmx::LegacySimulator::do_rerun()
      isLastStep = (isLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
      while (!isLastStep)
      {
-        wallcycle_start(wcycle, ewcSTEP);
+        wallcycle_start(wcycle, WallCycleCounter::Step);
  
          if (rerun_fr.bStep)
          {
@@ -863,7 +863,7 @@ void gmx::LegacySimulator::do_rerun()
              rerun_parallel_comm(cr, &rerun_fr, &isLastStep);
          }
  
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
+        cycles = wallcycle_stop(wcycle, WallCycleCounter::Step);
          if (DOMAINDECOMP(cr) && wcycle)
          {
              dd_cycles_add(cr->dd, cycles, ddCyclStep);
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 33f9145889d2ce1899c38971f230e823691561e3..45fbace426dd5d59b788f85f29ace0233511cabc 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -653,7 +653,7 @@ static void finish_run(FILE*                     fplog,
                         const t_commrec*          cr,
                         const t_inputrec&         inputrec,
                         t_nrnb                    nrnb[],
-                       gmx_wallcycle_t           wcycle,
+                       gmx_wallcycle*            wcycle,
                         gmx_walltime_accounting_t walltime_accounting,
                         nonbonded_verlet_t*       nbv,
                         const gmx_pme_t*          pme,
@@ -803,7 +803,6 @@ int Mdrunner::mdrunner()
      real                        ewaldcoeff_q     = 0;
      real                        ewaldcoeff_lj    = 0;
      int                         nChargePerturbed = -1, nTypePerturbed = 0;
-    gmx_wallcycle_t             wcycle;
      gmx_walltime_accounting_t   walltime_accounting = nullptr;
      MembedHolder                membedHolder(filenames.size(), filenames.data());
  
@@ -1598,15 +1597,16 @@ int Mdrunner::mdrunner()
                          "The -resetstep functionality is deprecated, and may be removed in a "
                          "future version.");
      }
-    wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
+    std::unique_ptr<gmx_wallcycle> wcycle =
+            wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
  
      if (PAR(cr))
      {
          /* Master synchronizes its value of reset_counters with all nodes
           * including PME only nodes */
-        int64_t reset_counters = wcycle_get_reset_counters(wcycle);
+        int64_t reset_counters = wcycle_get_reset_counters(wcycle.get());
          gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim);
-        wcycle_set_reset_counters(wcycle, reset_counters);
+        wcycle_set_reset_counters(wcycle.get(), reset_counters);
      }
  
      // Membrane embedding must be initialized before we call init_forcerec()
@@ -1680,7 +1680,7 @@ int Mdrunner::mdrunner()
                                          deviceStreamManager.get(),
                                          mtop,
                                          box,
-                                        wcycle);
+                                        wcycle.get());
          // TODO: Move the logic below to a GPU bonded builder
          if (runScheduleWork.simulationWork.useGpuBonded)
          {
@@ -1692,7 +1692,7 @@ int Mdrunner::mdrunner()
                      fr->ic->epsfac * fr->fudgeQQ,
                      deviceStreamManager->context(),
                      deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)),
-                    wcycle);
+                    wcycle.get());
              fr->gpuBonded = gpuBonded.get();
          }
  
@@ -1910,7 +1910,7 @@ int Mdrunner::mdrunner()
  
          /* Let makeConstraints know whether we have essential dynamics constraints. */
          auto constr = makeConstraints(
-                mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr, ms, &nrnb, wcycle, fr->bMolPBC);
+                mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr, ms, &nrnb, wcycle.get(), fr->bMolPBC);
  
          /* Energy terms and groups */
          gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
@@ -1927,7 +1927,7 @@ int Mdrunner::mdrunner()
          /* Set up interactive MD (IMD) */
          auto imdSession = makeImdSession(inputrec.get(),
                                           cr,
-                                         wcycle,
+                                         wcycle.get(),
                                           &enerd,
                                           ms,
                                           mtop,
@@ -1960,11 +1960,11 @@ int Mdrunner::mdrunner()
              fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
                      deviceStreamManager->context(),
                      deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal),
-                    wcycle);
+                    wcycle.get());
              fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
                      deviceStreamManager->context(),
                      deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal),
-                    wcycle);
+                    wcycle.get());
          }
  
          std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
@@ -1979,7 +1979,7 @@ int Mdrunner::mdrunner()
              GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
                                 "GPU device stream manager should be initialized to use GPU.");
              stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                    *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle);
+                    *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle.get());
              fr->stateGpu = stateGpu.get();
          }
  
@@ -1993,7 +1993,7 @@ int Mdrunner::mdrunner()
  
  
          simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv));
-        simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle));
+        simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle.get()));
          simulatorBuilder.add(ConstraintsParam(
                  constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, vsite.get()));
          // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly.
@@ -2033,14 +2033,14 @@ int Mdrunner::mdrunner()
          gmx_pmeonly(pmedata,
                      cr,
                      &nrnb,
-                    wcycle,
+                    wcycle.get(),
                      walltime_accounting,
                      inputrec.get(),
                      pmeRunMode,
                      deviceStreamManager.get());
      }
  
-    wallcycle_stop(wcycle, ewcRUN);
+    wallcycle_stop(wcycle.get(), WallCycleCounter::Run);
  
      /* Finish up, write some stuff
       * if rerunMD, don't write last frame again
@@ -2050,14 +2050,12 @@ int Mdrunner::mdrunner()
                 cr,
                 *inputrec,
                 &nrnb,
-               wcycle,
+               wcycle.get(),
                 walltime_accounting,
                 fr ? fr->nbv.get() : nullptr,
                 pmedata,
                 EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
  
-    // clean up cycle counter
-    wallcycle_destroy(wcycle);
  
      deviceStreamManager.reset(nullptr);
      // Free PME data
diff --git a/src/gromacs/mdrun/shellfc.cpp b/src/gromacs/mdrun/shellfc.cpp

index 0b630b6a5292a234de64df43505c9933df9ca8dd..af47d79566dfe08425692341b5a65f473dd0146b 100644 (file)
--- a/src/gromacs/mdrun/shellfc.cpp
+++ b/src/gromacs/mdrun/shellfc.cpp
@@ -957,7 +957,7 @@ void relax_shell_flexcon(FILE*                         fplog,
                           tensor                        force_vir,
                           const t_mdatoms&              md,
                           t_nrnb*                       nrnb,
-                         gmx_wallcycle_t               wcycle,
+                         gmx_wallcycle*                wcycle,
                           gmx_shellfc_t*                shfc,
                           t_forcerec*                   fr,
                           gmx::MdrunScheduleWorkload*   runScheduleWork,
diff --git a/src/gromacs/mdrun/shellfc.h b/src/gromacs/mdrun/shellfc.h

index 6d3120fd98c7f58c9a43a13d9dc5875352958726..b8a314679c3638fb3d0b5235b0721ce073c09ee6 100644 (file)
--- a/src/gromacs/mdrun/shellfc.h
+++ b/src/gromacs/mdrun/shellfc.h
@@ -116,7 +116,7 @@ void relax_shell_flexcon(FILE*                               log,
                           tensor                              force_vir,
                           const t_mdatoms&                    md,
                           t_nrnb*                             nrnb,
-                         gmx_wallcycle_t                     wcycle,
+                         gmx_wallcycle*                      wcycle,
                           gmx_shellfc_t*                      shfc,
                           t_forcerec*                         fr,
                           gmx::MdrunScheduleWorkload*         runScheduleWork,
diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp

index 056cbb29d4c3dbf277f8071ea1f2a39cf7f3b744..758e2ae6917d9adc9162469b77a7b86724538b5c 100644 (file)
--- a/src/gromacs/mdrun/tpi.cpp
+++ b/src/gromacs/mdrun/tpi.cpp
@@ -305,7 +305,7 @@ void LegacySimulator::do_tpi()
  
      /* Print to log file  */
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, "Test Particle Insertion");
  
      /* The last charge group is the group to be inserted */
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp

index 16fbe131f86ad1c733b25a05b3f96079ff835cea..69e11d69c0671f3ce2765f900f1a0baa462e6a11 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
@@ -136,8 +136,8 @@ StatePropagatorDataGpu::Impl::~Impl() {}
  
  void StatePropagatorDataGpu::Impl::reinit(int numAtomsLocal, int numAtomsAll)
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start_nocount(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      numAtomsLocal_ = numAtomsLocal;
      numAtomsAll_   = numAtomsAll;
@@ -174,8 +174,8 @@ void StatePropagatorDataGpu::Impl::reinit(int numAtomsLocal, int numAtomsAll)
          clearDeviceBufferAsync(&d_f_, 0, d_fCapacity_, *localStream_);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  std::tuple<int, int> StatePropagatorDataGpu::Impl::getAtomRangesFromAtomLocality(AtomLocality atomLocality)
@@ -316,8 +316,8 @@ void StatePropagatorDataGpu::Impl::copyCoordinatesToGpu(const gmx::ArrayRef<cons
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for copying positions with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      copyToDevice(d_x_, h_x, d_xSize_, atomLocality, *deviceStream);
  
@@ -330,8 +330,8 @@ void StatePropagatorDataGpu::Impl::copyCoordinatesToGpu(const gmx::ArrayRef<cons
          xReadyOnDevice_[atomLocality].markEvent(*deviceStream);
      }
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  GpuEventSynchronizer*
@@ -363,10 +363,10 @@ StatePropagatorDataGpu::Impl::getCoordinatesReadyOnDeviceEvent(AtomLocality atom
  
  void StatePropagatorDataGpu::Impl::waitCoordinatesCopiedToDevice(AtomLocality atomLocality)
  {
-    wallcycle_start(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_start(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
      GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
      xReadyOnDevice_[atomLocality].waitForEvent();
-    wallcycle_stop(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
  }
  
  GpuEventSynchronizer* StatePropagatorDataGpu::Impl::xUpdatedOnDevice()
@@ -381,22 +381,22 @@ void StatePropagatorDataGpu::Impl::copyCoordinatesFromGpu(gmx::ArrayRef<gmx::RVe
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for copying positions with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      copyFromDevice(h_x, d_x_, d_xSize_, atomLocality, *deviceStream);
      // Note: unlike copyCoordinatesToGpu this is not used in OpenCL, and the conditional is not needed.
      xReadyOnHost_[atomLocality].markEvent(*deviceStream);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void StatePropagatorDataGpu::Impl::waitCoordinatesReadyOnHost(AtomLocality atomLocality)
  {
-    wallcycle_start(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_start(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
      xReadyOnHost_[atomLocality].waitForEvent();
-    wallcycle_stop(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
  }
  
  
@@ -413,14 +413,14 @@ void StatePropagatorDataGpu::Impl::copyVelocitiesToGpu(const gmx::ArrayRef<const
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for copying velocities with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      copyToDevice(d_v_, h_v, d_vSize_, atomLocality, *deviceStream);
      vReadyOnDevice_[atomLocality].markEvent(*deviceStream);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
@@ -436,21 +436,21 @@ void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for copying velocities with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, *deviceStream);
      vReadyOnHost_[atomLocality].markEvent(*deviceStream);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void StatePropagatorDataGpu::Impl::waitVelocitiesReadyOnHost(AtomLocality atomLocality)
  {
-    wallcycle_start(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_start(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
      vReadyOnHost_[atomLocality].waitForEvent();
-    wallcycle_stop(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
  }
  
  
@@ -467,14 +467,14 @@ void StatePropagatorDataGpu::Impl::copyForcesToGpu(const gmx::ArrayRef<const gmx
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for copying forces with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      copyToDevice(d_f_, h_f, d_fSize_, atomLocality, *deviceStream);
      fReadyOnDevice_[atomLocality].markEvent(*deviceStream);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void StatePropagatorDataGpu::Impl::clearForcesOnGpu(AtomLocality atomLocality)
@@ -484,13 +484,13 @@ void StatePropagatorDataGpu::Impl::clearForcesOnGpu(AtomLocality atomLocality)
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for clearing forces with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      clearOnDevice(d_f_, d_fSize_, atomLocality, *deviceStream);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
@@ -518,21 +518,21 @@ void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_
      GMX_ASSERT(deviceStream != nullptr,
                 "No stream is valid for copying forces with given atom locality.");
  
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
  
      copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, *deviceStream);
      fReadyOnHost_[atomLocality].markEvent(*deviceStream);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_STATE_PROPAGATOR_DATA);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  void StatePropagatorDataGpu::Impl::waitForcesReadyOnHost(AtomLocality atomLocality)
  {
-    wallcycle_start(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_start(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
      fReadyOnHost_[atomLocality].waitForEvent();
-    wallcycle_stop(wcycle_, ewcWAIT_GPU_STATE_PROPAGATOR_DATA);
+    wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
  }
  
  const DeviceStream* StatePropagatorDataGpu::Impl::getUpdateStream()
diff --git a/src/gromacs/modularsimulator/propagator.cpp b/src/gromacs/modularsimulator/propagator.cpp

index 7a6074d0cf9c1ba2776fa1ac35d34118d6adcfc7..09edcd0890f84a972b27ecc8aad04a7c37f0a153 100644 (file)
--- a/src/gromacs/modularsimulator/propagator.cpp
+++ b/src/gromacs/modularsimulator/propagator.cpp
@@ -154,7 +154,7 @@ template<NumVelocityScalingValues        numStartVelocityScalingValues,
           NumVelocityScalingValues        numEndVelocityScalingValues>
  void Propagator<IntegrationStep::PositionsOnly>::run()
  {
-    wallcycle_start(wcycle_, ewcUPDATE);
+    wallcycle_start(wcycle_, WallCycleCounter::Update);
  
      auto xp = as_rvec_array(statePropagatorData_->positionsView().paddedArrayRef().data());
      auto x  = as_rvec_array(statePropagatorData_->constPositionsView().paddedArrayRef().data());
@@ -178,7 +178,7 @@ void Propagator<IntegrationStep::PositionsOnly>::run()
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
      }
-    wallcycle_stop(wcycle_, ewcUPDATE);
+    wallcycle_stop(wcycle_, WallCycleCounter::Update);
  }
  
  //! Propagation (velocity only)
@@ -188,7 +188,7 @@ template<NumVelocityScalingValues        numStartVelocityScalingValues,
           NumVelocityScalingValues        numEndVelocityScalingValues>
  void Propagator<IntegrationStep::VelocitiesOnly>::run()
  {
-    wallcycle_start(wcycle_, ewcUPDATE);
+    wallcycle_start(wcycle_, WallCycleCounter::Update);
  
      auto v = as_rvec_array(statePropagatorData_->velocitiesView().paddedArrayRef().data());
      auto f = as_rvec_array(statePropagatorData_->constForcesView().force().data());
@@ -258,7 +258,7 @@ void Propagator<IntegrationStep::VelocitiesOnly>::run()
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
      }
-    wallcycle_stop(wcycle_, ewcUPDATE);
+    wallcycle_stop(wcycle_, WallCycleCounter::Update);
  }
  
  //! Propagation (leapfrog case - position and velocity)
@@ -268,7 +268,7 @@ template<NumVelocityScalingValues        numStartVelocityScalingValues,
           NumVelocityScalingValues        numEndVelocityScalingValues>
  void Propagator<IntegrationStep::LeapFrog>::run()
  {
-    wallcycle_start(wcycle_, ewcUPDATE);
+    wallcycle_start(wcycle_, WallCycleCounter::Update);
  
      auto xp = as_rvec_array(statePropagatorData_->positionsView().paddedArrayRef().data());
      auto x  = as_rvec_array(statePropagatorData_->constPositionsView().paddedArrayRef().data());
@@ -342,7 +342,7 @@ void Propagator<IntegrationStep::LeapFrog>::run()
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
      }
-    wallcycle_stop(wcycle_, ewcUPDATE);
+    wallcycle_stop(wcycle_, WallCycleCounter::Update);
  }
  
  //! Propagation (velocity verlet stage 2 - velocity and position)
@@ -352,7 +352,7 @@ template<NumVelocityScalingValues        numStartVelocityScalingValues,
           NumVelocityScalingValues        numEndVelocityScalingValues>
  void Propagator<IntegrationStep::VelocityVerletPositionsAndVelocities>::run()
  {
-    wallcycle_start(wcycle_, ewcUPDATE);
+    wallcycle_start(wcycle_, WallCycleCounter::Update);
  
      auto xp = as_rvec_array(statePropagatorData_->positionsView().paddedArrayRef().data());
      auto x  = as_rvec_array(statePropagatorData_->constPositionsView().paddedArrayRef().data());
@@ -426,7 +426,7 @@ void Propagator<IntegrationStep::VelocityVerletPositionsAndVelocities>::run()
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
      }
-    wallcycle_stop(wcycle_, ewcUPDATE);
+    wallcycle_stop(wcycle_, WallCycleCounter::Update);
  }
  
  template<IntegrationStep algorithm>
diff --git a/src/gromacs/modularsimulator/simulatoralgorithm.cpp b/src/gromacs/modularsimulator/simulatoralgorithm.cpp

index afce03938b06058c0db5913c5efe100d819d836d..ff20ef7cabd0c4157f2f60f4fa046aa66660a405 100644 (file)
--- a/src/gromacs/modularsimulator/simulatoralgorithm.cpp
+++ b/src/gromacs/modularsimulator/simulatoralgorithm.cpp
@@ -228,7 +228,7 @@ void ModularSimulatorAlgorithm::simulatorSetup()
      }
  
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, "mdrun");
  
      step_ = inputrec->init_step;
@@ -275,7 +275,7 @@ void ModularSimulatorAlgorithm::preStep(Step step, Time gmx_unused time, bool is
      stophandlerCurrentStep_ = step;
      stopHandler_->setSignal();
  
-    wallcycle_start(wcycle, ewcSTEP);
+    wallcycle_start(wcycle, WallCycleCounter::Step);
  }
  
  void ModularSimulatorAlgorithm::postStep(Step step, Time gmx_unused time)
@@ -301,7 +301,7 @@ void ModularSimulatorAlgorithm::postStep(Step step, Time gmx_unused time)
          print_time(stderr, walltime_accounting, step, inputrec, cr);
      }
  
-    double cycles = wallcycle_stop(wcycle, ewcSTEP);
+    double cycles = wallcycle_stop(wcycle, WallCycleCounter::Step);
      if (DOMAINDECOMP(cr) && wcycle)
      {
          dd_cycles_add(cr->dd, static_cast<float>(cycles), ddCyclStep);
diff --git a/src/gromacs/modularsimulator/statepropagatordata.cpp b/src/gromacs/modularsimulator/statepropagatordata.cpp

index 1cf9df615ecd4f2cfeb28725ade97e7df243d98f..6c1d1a6ad92f41218407d62251b5b25f0ebcfd4d 100644 (file)
--- a/src/gromacs/modularsimulator/statepropagatordata.cpp
+++ b/src/gromacs/modularsimulator/statepropagatordata.cpp
@@ -393,7 +393,7 @@ StatePropagatorData::Element::registerTrajectoryWriterCallback(TrajectoryEvent e
  
  void StatePropagatorData::Element::write(gmx_mdoutf_t outf, Step currentStep, Time currentTime)
  {
-    wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ);
+    wallcycle_start(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
      unsigned int mdof_flags = 0;
      if (do_per_step(currentStep, nstxout_))
      {
@@ -430,7 +430,7 @@ void StatePropagatorData::Element::write(gmx_mdoutf_t outf, Step currentStep, Ti
  
      if (mdof_flags == 0)
      {
-        wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
+        wallcycle_stop(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
          return;
      }
      GMX_ASSERT(localStateBackup_, "Trajectory writing called, but no state saved.");
@@ -455,7 +455,7 @@ void StatePropagatorData::Element::write(gmx_mdoutf_t outf, Step currentStep, Ti
      {
          localStateBackup_.reset();
      }
-    wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
+    wallcycle_stop(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
  }
  
  void StatePropagatorData::Element::elementSetup()
@@ -619,7 +619,7 @@ void StatePropagatorData::Element::trajectoryWriterTeardown(gmx_mdoutf* gmx_unus
  
      GMX_ASSERT(localStateBackup_, "Final trajectory writing called, but no state saved.");
  
-    wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ);
+    wallcycle_start(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
      if (DOMAINDECOMP(cr_))
      {
          auto globalXRef =
@@ -664,7 +664,7 @@ void StatePropagatorData::Element::trajectoryWriterTeardown(gmx_mdoutf* gmx_unus
                              pbcType_,
                              localStateBackup_->box);
      }
-    wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
+    wallcycle_stop(mdoutf_get_wcycle(outf), WallCycleCounter::Traj);
  }
  
  std::optional<SignallerCallback> StatePropagatorData::Element::registerLastStepCallback()
diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h

index cdc35d093aff7cb89498ce7a7c41531932724194..bf9c25a5e70a16db2942bebf4822ee763a200fce 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common.h
+++ b/src/gromacs/nbnxm/gpu_common.h
@@ -286,18 +286,18 @@ bool gpu_try_finish_task(NbnxmGpu*                nb,
              // we start without counting and only when the task finished we issue a
              // start/stop to increment.
              // GpuTaskCompletion::Wait mode the timing is expected to be done in the caller.
-            wallcycle_start_nocount(wcycle, ewcWAIT_GPU_NB_L);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::WaitGpuNbL);
  
              if (!haveStreamTasksCompleted(*nb->deviceStreams[iLocality]))
              {
-                wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
+                wallcycle_stop(wcycle, WallCycleCounter::WaitGpuNbL);
  
                  // Early return to skip the steps below that we have to do only
                  // after the NB task completed
                  return false;
              }
  
-            wallcycle_increment_event_count(wcycle, ewcWAIT_GPU_NB_L);
+            wallcycle_increment_event_count(wcycle, WallCycleCounter::WaitGpuNbL);
          }
          else if (haveResultToWaitFor)
          {
@@ -361,8 +361,8 @@ float gpu_wait_finish_task(NbnxmGpu*                nb,
                             gmx_wallcycle*           wcycle)
  {
      auto cycleCounter = (atomToInteractionLocality(aloc) == InteractionLocality::Local)
-                                ? ewcWAIT_GPU_NB_L
-                                : ewcWAIT_GPU_NB_NL;
+                                ? WallCycleCounter::WaitGpuNbL
+                                : WallCycleCounter::WaitGpuNbNL;
  
      wallcycle_start(wcycle, cycleCounter);
      gpu_try_finish_task(nb, stepWork, aloc, e_lj, e_el, shiftForces, GpuTaskCompletion::Wait, wcycle);
diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp

index 846e2bbdb32454cc85d818122bcaa6feb449fa3d..93e28f8487da8041679dc4c6578aee1544727626 100644 (file)
--- a/src/gromacs/nbnxm/kerneldispatch.cpp
+++ b/src/gromacs/nbnxm/kerneldispatch.cpp
@@ -259,7 +259,7 @@ static void nbnxn_kernel_cpu(const PairlistSet&             pairlistSet,
      auto shiftVecPointer = as_rvec_array(shiftVectors.data());
  
      int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
-    wallcycle_sub_start(wcycle, ewcsNONBONDED_CLEAR);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::NonbondedClear);
  #pragma omp parallel for schedule(static) num_threads(nthreads)
      for (gmx::index nb = 0; nb < pairlists.ssize(); nb++)
      {
@@ -276,8 +276,8 @@ static void nbnxn_kernel_cpu(const PairlistSet&             pairlistSet,
  
          if (nb == 0)
          {
-            wallcycle_sub_stop(wcycle, ewcsNONBONDED_CLEAR);
-            wallcycle_sub_start(wcycle, ewcsNONBONDED_KERNEL);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NonbondedClear);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NonbondedKernel);
          }
  
          // TODO: Change to reference
@@ -375,7 +375,7 @@ static void nbnxn_kernel_cpu(const PairlistSet&             pairlistSet,
              }
          }
      }
-    wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::NonbondedKernel);
  
      if (stepWork.computeEnergy)
      {
@@ -547,7 +547,7 @@ void nonbonded_verlet_t::dispatchFreeEnergyKernel(gmx::InteractionLocality
      GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(),
                 "Number of lists should be same as number of NB threads");
  
-    wallcycle_sub_start(wcycle_, ewcsNONBONDED_FEP);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::NonbondedFep);
  #pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
      for (gmx::index th = 0; th < nbl_fep.ssize(); th++)
      {
@@ -640,5 +640,5 @@ void nonbonded_verlet_t::dispatchFreeEnergyKernel(gmx::InteractionLocality
                              + dvdl_nb[FreeEnergyPerturbationCouplingType::Coul]);
          }
      }
-    wallcycle_sub_stop(wcycle_, ewcsNONBONDED_FEP);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::NonbondedFep);
  }
diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp

index 62febc366f525bd4c42c984220912c6efe4ea225..ecc0ecc2ca6c9ee5dcd9e1bb84fa0216ec69b156 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.cpp
+++ b/src/gromacs/nbnxm/nbnxm.cpp
@@ -147,27 +147,27 @@ void nonbonded_verlet_t::setAtomProperties(gmx::ArrayRef<const int>  atomTypes,
  void nonbonded_verlet_t::convertCoordinates(const gmx::AtomLocality        locality,
                                              gmx::ArrayRef<const gmx::RVec> coordinates)
  {
-    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
-    wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
+    wallcycle_start(wcycle_, WallCycleCounter::NbXFBufOps);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::NBXBufOps);
  
      nbnxn_atomdata_copy_x_to_nbat_x(
              pairSearch_->gridSet(), locality, as_rvec_array(coordinates.data()), nbat.get());
  
-    wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
-    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::NBXBufOps);
+    wallcycle_stop(wcycle_, WallCycleCounter::NbXFBufOps);
  }
  
  void nonbonded_verlet_t::convertCoordinatesGpu(const gmx::AtomLocality locality,
                                                 DeviceBuffer<gmx::RVec> d_x,
                                                 GpuEventSynchronizer*   xReadyOnDevice)
  {
-    wallcycle_start(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS);
+    wallcycle_start(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuNBXBufOps);
  
      nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, gpu_nbv, d_x, xReadyOnDevice);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuNBXBufOps);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
  
  gmx::ArrayRef<const int> nonbonded_verlet_t::getGridIndices() const
@@ -186,13 +186,13 @@ void nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const gmx::AtomLocality  local
          return;
      }
  
-    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
-    wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
+    wallcycle_start(wcycle_, WallCycleCounter::NbXFBufOps);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::NBFBufOps);
  
      reduceForces(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()));
  
-    wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
-    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::NBFBufOps);
+    wallcycle_stop(wcycle_, WallCycleCounter::NbXFBufOps);
  }
  
  int nonbonded_verlet_t::getNumAtoms(const gmx::AtomLocality locality) const
diff --git a/src/gromacs/nbnxm/prunekerneldispatch.cpp b/src/gromacs/nbnxm/prunekerneldispatch.cpp

index f3e4dee503f8e732ac046feab005adf5fce5027f..98d05e13b1884e57c90d5d9f3f91c0ff14b74532 100644 (file)
--- a/src/gromacs/nbnxm/prunekerneldispatch.cpp
+++ b/src/gromacs/nbnxm/prunekerneldispatch.cpp
@@ -100,8 +100,8 @@ void nonbonded_verlet_t::dispatchPruneKernelCpu(const gmx::InteractionLocality i
  
  void nonbonded_verlet_t::dispatchPruneKernelGpu(int64_t step)
  {
-    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
-    wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_GPU_NONBONDED);
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start_nocount(wcycle_, WallCycleSubCounter::LaunchGpuNonBonded);
  
      const bool stepIsEven =
              (pairlistSets().numStepsWithPairlist(step) % (2 * pairlistSets().params().mtsFactor) == 0);
@@ -111,6 +111,6 @@ void nonbonded_verlet_t::dispatchPruneKernelGpu(int64_t step)
              stepIsEven ? gmx::InteractionLocality::Local : gmx::InteractionLocality::NonLocal,
              pairlistSets().params().numRollingPruningParts);
  
-    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NONBONDED);
-    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuNonBonded);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
  }
diff --git a/src/gromacs/swap/swapcoords.cpp b/src/gromacs/swap/swapcoords.cpp

index 11c152313cdb12faa5f00ddfec4c4a7e1d53ec8f..941496387347c6440056e2e6019d3c328e0ce60c 100644 (file)
--- a/src/gromacs/swap/swapcoords.cpp
+++ b/src/gromacs/swap/swapcoords.cpp
@@ -2053,7 +2053,7 @@ gmx_bool do_swapcoords(t_commrec*        cr,
      rvec                com_solvent, com_particle; /* solvent and swap molecule's center of mass */
  
  
-    wallcycle_start(wcycle, ewcSWAP);
+    wallcycle_start(wcycle, WallCycleCounter::Swap);
  
      set_pbc(s->pbc, ir->pbcType, box);
  
@@ -2242,7 +2242,7 @@ gmx_bool do_swapcoords(t_commrec*        cr,
  
      } /* end of if(bSwap) */
  
-    wallcycle_stop(wcycle, ewcSWAP);
+    wallcycle_stop(wcycle, WallCycleCounter::Swap);
  
      return bSwap;
  }
diff --git a/src/gromacs/timing/tests/timing.cpp b/src/gromacs/timing/tests/timing.cpp

index 158d6aba69c9a7e8c81cbfa18e190cd630b10c0c..3155ad4e144949d95db7de440a1a443165fcf7c7 100644 (file)
--- a/src/gromacs/timing/tests/timing.cpp
+++ b/src/gromacs/timing/tests/timing.cpp
@@ -44,6 +44,7 @@
  #include "config.h"
  
  #include <chrono>
+#include <memory>
  #include <thread>
  
  #include "gromacs/timing/cyclecounter.h"
@@ -72,31 +73,30 @@ class TimingTest : public ::testing::Test
  {
  public:
      TimingTest() : wcycle(wallcycle_init(nullptr, 0, nullptr)) {}
-    ~TimingTest() override { wallcycle_destroy(wcycle); }
  
  protected:
-    const int       delayInMilliseconds = 1;
-    gmx_wallcycle_t wcycle;
+    const int                      delayInMilliseconds = 1;
+    std::unique_ptr<gmx_wallcycle> wcycle;
  };
  
  
  //! Test whether the we can run the cycle counter.
  TEST_F(TimingTest, RunWallCycle)
  {
-    int    probe = 0, ref = 1;
-    int    n1, n2;
-    double c1, c2;
+    WallCycleCounter probe = WallCycleCounter::Run, ref = WallCycleCounter::Step;
+    int              n1, n2;
+    double           c1, c2;
  
      //! credit cycles from enclosing call to the ref field of wcycle
-    wallcycle_start(wcycle, ref);
+    wallcycle_start(wcycle.get(), ref);
      //! cycles from the probe call
-    wallcycle_start(wcycle, probe);
+    wallcycle_start(wcycle.get(), probe);
      sleepForMilliseconds(delayInMilliseconds);
-    wallcycle_stop(wcycle, probe);
-    wallcycle_stop(wcycle, ref);
+    wallcycle_stop(wcycle.get(), probe);
+    wallcycle_stop(wcycle.get(), ref);
      //! extract both
-    wallcycle_get(wcycle, probe, &n1, &c1);
-    wallcycle_get(wcycle, ref, &n2, &c2);
+    wallcycle_get(wcycle.get(), probe, &n1, &c1);
+    wallcycle_get(wcycle.get(), ref, &n2, &c2);
  
      EXPECT_EQ(n1, n2);
      EXPECT_DOUBLE_EQ_TOL(c1, c2, relativeToleranceAsFloatingPoint(c1, 5e-3));
@@ -108,17 +108,17 @@ TEST_F(TimingTest, RunWallCycleSub)
  {
      if (useCycleSubcounters)
      {
-        int    probe = 0;
-        int    ref   = 1;
-        int    n1, n2;
-        double c1, c2;
-        wallcycle_sub_start(wcycle, ref);
-        wallcycle_sub_start(wcycle, probe);
+        WallCycleSubCounter probe = WallCycleSubCounter::DDRedist;
+        WallCycleSubCounter ref   = WallCycleSubCounter::DDGrid;
+        int                 n1, n2;
+        double              c1, c2;
+        wallcycle_sub_start(wcycle.get(), ref);
+        wallcycle_sub_start(wcycle.get(), probe);
          sleepForMilliseconds(delayInMilliseconds);
-        wallcycle_sub_stop(wcycle, probe);
-        wallcycle_sub_stop(wcycle, ref);
-        wallcycle_sub_get(wcycle, probe, &n1, &c1);
-        wallcycle_sub_get(wcycle, ref, &n2, &c2);
+        wallcycle_sub_stop(wcycle.get(), probe);
+        wallcycle_sub_stop(wcycle.get(), ref);
+        wallcycle_sub_get(wcycle.get(), probe, &n1, &c1);
+        wallcycle_sub_get(wcycle.get(), ref, &n2, &c2);
  
          EXPECT_EQ(n1, n2);
          EXPECT_DOUBLE_EQ_TOL(c1, c2, relativeToleranceAsFloatingPoint(c1, 5e-3));
diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp

index 4ea520268ccbef468a4fd4eec9ffaf214582ab17..6f499587133707491ee5fe710e9f4e5d6ab04729 100644 (file)
--- a/src/gromacs/timing/wallcycle.cpp
+++ b/src/gromacs/timing/wallcycle.cpp
@@ -44,6 +44,7 @@
  #include <cstdlib>
  
  #include <array>
+#include <memory>
  #include <vector>
  
  #include "gromacs/math/functions.h"
@@ -51,12 +52,15 @@
  #include "gromacs/timing/cyclecounter.h"
  #include "gromacs/timing/gpu_timing.h"
  #include "gromacs/timing/wallcyclereporting.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/enumerationhelpers.h"
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/gmxmpi.h"
  #include "gromacs/utility/logger.h"
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/snprintf.h"
+#include "gromacs/utility/stringutil.h"
  
  //! Whether wallcycle debugging is enabled
  constexpr bool gmx_unused enableWallcycleDebug = (DEBUG_WCYCLE != 0);
@@ -71,87 +75,97 @@ constexpr bool gmx_unused debugPrintDepth = false /* enableWallcycleDebug */;
  
  /* Each name should not exceed 19 printing characters
     (ie. terminating null can be twentieth) */
-static const char* wcn[ewcNR] = { "Run",
-                                  "Step",
-                                  "PP during PME",
-                                  "Domain decomp.",
-                                  "DD comm. load",
-                                  "DD comm. bounds",
-                                  "Vsite constr.",
-                                  "Send X to PME",
-                                  "Neighbor search",
-                                  "Launch GPU ops.",
-                                  "Comm. coord.",
-                                  "Force",
-                                  "Wait + Comm. F",
-                                  "PME mesh",
-                                  "PME redist. X/F",
-                                  "PME spread",
-                                  "PME gather",
-                                  "PME 3D-FFT",
-                                  "PME 3D-FFT Comm.",
-                                  "PME solve LJ",
-                                  "PME solve Elec",
-                                  "PME wait for PP",
-                                  "Wait + Recv. PME F",
-                                  "Wait PME GPU spread",
-                                  "PME 3D-FFT",
-                                  "PME solve", /* the strings for FFT/solve are repeated here for mixed mode counters */
-                                  "Wait PME GPU gather",
-                                  "Wait Bonded GPU",
-                                  "Reduce GPU PME F",
-                                  "Wait GPU NB nonloc.",
-                                  "Wait GPU NB local",
-                                  "Wait GPU state copy",
-                                  "NB X/F buffer ops.",
-                                  "Vsite spread",
-                                  "COM pull force",
-                                  "AWH",
-                                  "Write traj.",
-                                  "Update",
-                                  "Constraints",
-                                  "Comm. energies",
-                                  "Enforced rotation",
-                                  "Add rot. forces",
-                                  "Position swapping",
-                                  "IMD",
-                                  "Test" };
-
-static const char* wcsn[ewcsNR] = {
-    "DD redist.",
-    "DD NS grid + sort",
-    "DD setup comm.",
-    "DD make top.",
-    "DD make constr.",
-    "DD top. other",
-    "DD GPU ops.",
-    "NS grid local",
-    "NS grid non-loc.",
-    "NS search local",
-    "NS search non-loc.",
-    "Bonded F",
-    "Bonded-FEP F",
-    "Restraints F",
-    "Listed buffer ops.",
-    "Nonbonded pruning",
-    "Nonbonded F kernel",
-    "Nonbonded F clear",
-    "Nonbonded FEP",
-    "Launch NB GPU tasks",
-    "Launch Bonded GPU tasks",
-    "Launch PME GPU tasks",
-    "Launch state copy",
-    "Ewald F correction",
-    "NB X buffer ops.",
-    "NB F buffer ops.",
-    "Clear force buffer",
-    "Launch GPU NB X buffer ops.",
-    "Launch GPU NB F buffer ops.",
-    "Launch GPU Comm. coord.",
-    "Launch GPU Comm. force.",
-    "Launch GPU update",
-    "Test subcounter",
-};
+static const char* enumValuetoString(WallCycleCounter enumValue)
+{
+    constexpr gmx::EnumerationArray<WallCycleCounter, const char*> wallCycleCounterNames = {
+        "Run",
+        "Step",
+        "PP during PME",
+        "Domain decomp.",
+        "DD comm. load",
+        "DD comm. bounds",
+        "Vsite constr.",
+        "Send X to PME",
+        "Neighbor search",
+        "Launch GPU ops.",
+        "Comm. coord.",
+        "Force",
+        "Wait + Comm. F",
+        "PME mesh",
+        "PME redist. X/F",
+        "PME spread",
+        "PME gather",
+        "PME 3D-FFT",
+        "PME 3D-FFT Comm.",
+        "PME solve LJ",
+        "PME solve Elec",
+        "PME wait for PP",
+        "Wait + Recv. PME F",
+        "Wait PME GPU spread",
+        "PME 3D-FFT",
+        "PME solve", /* the strings for FFT/solve are repeated here for mixed mode counters */
+        "Wait PME GPU gather",
+        "Wait Bonded GPU",
+        "Reduce GPU PME F",
+        "Wait GPU NB nonloc.",
+        "Wait GPU NB local",
+        "Wait GPU state copy",
+        "NB X/F buffer ops.",
+        "Vsite spread",
+        "COM pull force",
+        "AWH",
+        "Write traj.",
+        "Update",
+        "Constraints",
+        "Comm. energies",
+        "Enforced rotation",
+        "Add rot. forces",
+        "Position swapping",
+        "IMD",
+        "Test"
+    };
+    return wallCycleCounterNames[enumValue];
+}
+
+static const char* enumValuetoString(WallCycleSubCounter enumValue)
+{
+    constexpr gmx::EnumerationArray<WallCycleSubCounter, const char*> wallCycleSubCounterNames = {
+        "DD redist.",
+        "DD NS grid + sort",
+        "DD setup comm.",
+        "DD make top.",
+        "DD make constr.",
+        "DD top. other",
+        "DD GPU ops.",
+        "NS grid local",
+        "NS grid non-loc.",
+        "NS search local",
+        "NS search non-loc.",
+        "Bonded F",
+        "Bonded-FEP F",
+        "Restraints F",
+        "Listed buffer ops.",
+        "Nonbonded pruning",
+        "Nonbonded F kernel",
+        "Nonbonded F clear",
+        "Nonbonded FEP",
+        "Launch NB GPU tasks",
+        "Launch Bonded GPU tasks",
+        "Launch PME GPU tasks",
+        "Launch state copy",
+        "Ewald F correction",
+        "NB X buffer ops.",
+        "NB F buffer ops.",
+        "Clear force buffer",
+        "Launch GPU NB X buffer ops.",
+        "Launch GPU NB F buffer ops.",
+        "Launch GPU Comm. coord.",
+        "Launch GPU Comm. force.",
+        "Launch GPU update",
+        "Test subcounter"
+    };
+    return wallCycleSubCounterNames[enumValue];
+}
  
  /* PME GPU timing events' names - correspond to the enum in the gpu_timing.h */
  static const char* enumValuetoString(PmeStage enumValue)
@@ -168,23 +182,22 @@ bool wallcycle_have_counter()
      return gmx_cycles_have_counter();
  }
  
-gmx_wallcycle_t wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr)
+std::unique_ptr<gmx_wallcycle> wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr)
  {
-    gmx_wallcycle_t wc;
+    std::unique_ptr<gmx_wallcycle> wc;
  
  
      if (!wallcycle_have_counter())
      {
-        return nullptr;
+        return wc;
      }
  
-    snew(wc, 1);
+    wc = std::make_unique<gmx_wallcycle>();
  
-    wc->haveInvalidCount = FALSE;
-    wc->wc_barrier       = FALSE;
-    wc->wcc_all          = nullptr;
+    wc->haveInvalidCount = false;
+    wc->wc_barrier       = false;
      wc->wc_depth         = 0;
-    wc->ewc_prev         = -1;
+    wc->ewc_prev         = WallCycleCounter::Count;
      wc->reset_counters   = resetstep;
      wc->cr               = cr;
  
@@ -196,23 +209,17 @@ gmx_wallcycle_t wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr)
          {
              fprintf(fplog, "\nWill call MPI_Barrier before each cycle start/stop call\n\n");
          }
-        wc->wc_barrier = TRUE;
+        wc->wc_barrier = true;
      }
  #endif
  
-    snew(wc->wcc, ewcNR);
      if (getenv("GMX_CYCLE_ALL") != nullptr)
      {
          if (fplog)
          {
              fprintf(fplog, "\nWill time all the code during the run\n\n");
          }
-        snew(wc->wcc_all, ewcNR * ewcNR);
-    }
-
-    if (sc_useCycleSubcounters)
-    {
-        snew(wc->wcsc, ewcsNR);
+        wc->wcc_all.resize(sc_numWallCycleCountersSquared);
      }
  
  #if DEBUG_WCYCLE
@@ -223,32 +230,10 @@ gmx_wallcycle_t wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr)
      return wc;
  }
  
-void wallcycle_destroy(gmx_wallcycle_t wc)
-{
-    if (wc == nullptr)
-    {
-        return;
-    }
-
-    if (wc->wcc != nullptr)
-    {
-        sfree(wc->wcc);
-    }
-    if (wc->wcc_all != nullptr)
-    {
-        sfree(wc->wcc_all);
-    }
-    if (wc->wcsc != nullptr)
-    {
-        sfree(wc->wcsc);
-    }
-    sfree(wc);
-}
-
  #if DEBUG_WCYCLE
-static void debug_start_check(gmx_wallcycle_t wc, int ewc)
+static void debug_start_check(gmx_wallcycle* wc, WallCycleCounter ewc)
  {
-    if (wc->count_depth < 0 || wc->count_depth >= DEPTH_MAX)
+    if (wc->count_depth < 0 || wc->count_depth >= c_MaxWallCycleDepth)
      {
          gmx_fatal(FARGS, "wallcycle counter depth out of range: %d", wc->count_depth + 1);
      }
@@ -258,41 +243,44 @@ static void debug_start_check(gmx_wallcycle_t wc, int ewc)
      if (debugPrintDepth && (!onlyMasterDebugPrints || wc->isMasterRank))
      {
          std::string indentStr(4 * wc->count_depth, ' ');
-        fprintf(stderr, "%swcycle_start depth %d, %s\n", indentStr.c_str(), wc->count_depth, wcn[ewc]);
+        fprintf(stderr, "%swcycle_start depth %d, %s\n", indentStr.c_str(), wc->count_depth, enumValuetoString(ewc));
      }
  }
  
-static void debug_stop_check(gmx_wallcycle_t wc, int ewc)
+static void debug_stop_check(gmx_wallcycle* wc, WallCycleCounter ewc)
  {
      if (debugPrintDepth && (!onlyMasterDebugPrints || wc->isMasterRank))
      {
          std::string indentStr(4 * wc->count_depth, ' ');
-        fprintf(stderr, "%swcycle_stop  depth %d, %s\n", indentStr.c_str(), wc->count_depth, wcn[ewc]);
+        fprintf(stderr, "%swcycle_stop  depth %d, %s\n", indentStr.c_str(), wc->count_depth, enumValuetoString(ewc));
      }
  
      wc->count_depth--;
  
      if (wc->count_depth < 0)
      {
-        gmx_fatal(FARGS, "wallcycle counter depth out of range when stopping %s: %d", wcn[ewc], wc->count_depth);
+        gmx_fatal(FARGS,
+                  "wallcycle counter depth out of range when stopping %s: %d",
+                  enumValuetoString(ewc),
+                  wc->count_depth);
      }
      if (wc->counterlist[wc->count_depth] != ewc)
      {
          gmx_fatal(FARGS,
                    "wallcycle mismatch at stop, start %s, stop %s",
-                  wcn[wc->counterlist[wc->count_depth]],
-                  wcn[ewc]);
+                  enumValuetoString(wc->counterlist[wc->count_depth]),
+                  enumValuetoString(ewc));
      }
  }
  #endif
  
-void wallcycle_get(gmx_wallcycle_t wc, int ewc, int* n, double* c)
+void wallcycle_get(gmx_wallcycle* wc, WallCycleCounter ewc, int* n, double* c)
  {
      *n = wc->wcc[ewc].n;
      *c = static_cast<double>(wc->wcc[ewc].c);
  }
  
-void wallcycle_sub_get(gmx_wallcycle_t wc, int ewcs, int* n, double* c)
+void wallcycle_sub_get(gmx_wallcycle* wc, WallCycleSubCounter ewcs, int* n, double* c)
  {
      if (sc_useCycleSubcounters && wc != nullptr)
      {
@@ -301,48 +289,43 @@ void wallcycle_sub_get(gmx_wallcycle_t wc, int ewcs, int* n, double* c)
      }
  }
  
-void wallcycle_reset_all(gmx_wallcycle_t wc)
+void wallcycle_reset_all(gmx_wallcycle* wc)
  {
-    int i;
-
      if (wc == nullptr)
      {
          return;
      }
  
-    for (i = 0; i < ewcNR; i++)
+    for (auto& counter : wc->wcc)
      {
-        wc->wcc[i].n = 0;
-        wc->wcc[i].c = 0;
+        counter.n = 0;
+        counter.c = 0;
      }
-    wc->haveInvalidCount = FALSE;
+    wc->haveInvalidCount = false;
  
-    if (wc->wcc_all)
+    if (!wc->wcc_all.empty())
      {
-        for (i = 0; i < ewcNR * ewcNR; i++)
+        for (int i = 0; i < sc_numWallCycleCountersSquared; i++)
          {
              wc->wcc_all[i].n = 0;
              wc->wcc_all[i].c = 0;
          }
      }
-    if (wc->wcsc)
+    for (auto& counter : wc->wcsc)
      {
-        for (i = 0; i < ewcsNR; i++)
-        {
-            wc->wcsc[i].n = 0;
-            wc->wcsc[i].c = 0;
-        }
+        counter.n = 0;
+        counter.c = 0;
      }
  }
  
-static gmx_bool is_pme_counter(int ewc)
+static bool is_pme_counter(WallCycleCounter ewc)
  {
-    return (ewc >= ewcPMEMESH && ewc <= ewcPMEWAITCOMM);
+    return (ewc >= WallCycleCounter::PmeMesh && ewc <= WallCycleCounter::PmeWaitComm);
  }
  
-static gmx_bool is_pme_subcounter(int ewc)
+static bool is_pme_subcounter(WallCycleCounter ewc)
  {
-    return (ewc >= ewcPME_REDISTXF && ewc < ewcPMEWAITCOMM);
+    return (ewc >= WallCycleCounter::PmeRedistXF && ewc < WallCycleCounter::PmeWaitComm);
  }
  
  void wallcycleBarrier(gmx_wallcycle* wc)
@@ -358,7 +341,10 @@ void wallcycleBarrier(gmx_wallcycle* wc)
  }
  
  /* Subtract counter ewc_sub timed inside a timing block for ewc_main */
-static void subtract_cycles(wallcc_t* wcc, int ewc_main, int ewc_sub)
+// NOLINTNEXTLINE(google-runtime-references)
+static void subtract_cycles(gmx::EnumerationArray<WallCycleCounter, wallcc_t>& wcc,
+                            WallCycleCounter                                   ewc_main,
+                            WallCycleCounter                                   ewc_sub)
  {
      if (wcc[ewc_sub].n > 0)
      {
@@ -374,45 +360,47 @@ static void subtract_cycles(wallcc_t* wcc, int ewc_main, int ewc_sub)
      }
  }
  
-void wallcycle_scale_by_num_threads(gmx_wallcycle_t wc, bool isPmeRank, int nthreads_pp, int nthreads_pme)
+void wallcycle_scale_by_num_threads(gmx_wallcycle* wc, bool isPmeRank, int nthreads_pp, int nthreads_pme)
  {
      if (wc == nullptr)
      {
          return;
      }
  
-    for (int i = 0; i < ewcNR; i++)
+    for (auto key : keysOf(wc->wcc))
      {
-        if (is_pme_counter(i) || (i == ewcRUN && isPmeRank))
+        if (is_pme_counter(key) || (key == WallCycleCounter::Run && isPmeRank))
          {
-            wc->wcc[i].c *= nthreads_pme;
+            wc->wcc[key].c *= nthreads_pme;
  
-            if (wc->wcc_all)
+            if (!wc->wcc_all.empty())
              {
-                for (int j = 0; j < ewcNR; j++)
+                const int current = static_cast<int>(key);
+                for (int j = 0; j < sc_numWallCycleCounters; j++)
                  {
-                    wc->wcc_all[i * ewcNR + j].c *= nthreads_pme;
+                    wc->wcc_all[current * sc_numWallCycleCounters + j].c *= nthreads_pme;
                  }
              }
          }
          else
          {
-            wc->wcc[i].c *= nthreads_pp;
+            wc->wcc[key].c *= nthreads_pp;
  
-            if (wc->wcc_all)
+            if (!wc->wcc_all.empty())
              {
-                for (int j = 0; j < ewcNR; j++)
+                const int current = static_cast<int>(key);
+                for (int j = 0; j < sc_numWallCycleCounters; j++)
                  {
-                    wc->wcc_all[i * ewcNR + j].c *= nthreads_pp;
+                    wc->wcc_all[current * sc_numWallCycleCounters + j].c *= nthreads_pp;
                  }
              }
          }
      }
-    if (sc_useCycleSubcounters && wc->wcsc && !isPmeRank)
+    if (sc_useCycleSubcounters && !isPmeRank)
      {
-        for (int i = 0; i < ewcsNR; i++)
+        for (auto counter : wc->wcsc)
          {
-            wc->wcsc[i].c *= nthreads_pp;
+            counter.c *= nthreads_pp;
          }
      }
  }
@@ -429,16 +417,15 @@ void wallcycle_scale_by_num_threads(gmx_wallcycle_t wc, bool isPmeRank, int nthr
   * wcc_all are unused by the GPU reporting, but it is not satisfactory
   * for the future. Also, there's no need for MPI_Allreduce, since
   * only MASTERRANK uses any of the results. */
-WallcycleCounts wallcycle_sum(const t_commrec* cr, gmx_wallcycle_t wc)
+WallcycleCounts wallcycle_sum(const t_commrec* cr, gmx_wallcycle* wc)
  {
-    WallcycleCounts cycles_sum;
-    wallcc_t*       wcc;
-    double          cycles[int(ewcNR) + int(ewcsNR)];
+    WallcycleCounts                                    cycles_sum;
+    gmx::EnumerationArray<WallCycleCounter, double>    cyclesMain;
+    gmx::EnumerationArray<WallCycleSubCounter, double> cyclesSub;
  #if GMX_MPI
-    double cycles_n[int(ewcNR) + int(ewcsNR) + 1];
+    gmx::EnumerationArray<WallCycleCounter, double>    cyclesMainOnNode;
+    gmx::EnumerationArray<WallCycleSubCounter, double> cyclesSubOnNode;
  #endif
-    int i;
-    int nsum;
  
      if (wc == nullptr)
      {
@@ -448,104 +435,128 @@ WallcycleCounts wallcycle_sum(const t_commrec* cr, gmx_wallcycle_t wc)
          return cycles_sum;
      }
  
-    wcc = wc->wcc;
+    auto& wcc = wc->wcc;
  
-    subtract_cycles(wcc, ewcDOMDEC, ewcDDCOMMLOAD);
-    subtract_cycles(wcc, ewcDOMDEC, ewcDDCOMMBOUND);
+    subtract_cycles(wcc, WallCycleCounter::Domdec, WallCycleCounter::DDCommLoad);
+    subtract_cycles(wcc, WallCycleCounter::Domdec, WallCycleCounter::DDCommBound);
  
-    subtract_cycles(wcc, ewcPME_FFT, ewcPME_FFTCOMM);
+    subtract_cycles(wcc, WallCycleCounter::PmeFft, WallCycleCounter::PmeFftComm);
  
      if (cr->npmenodes == 0)
      {
          /* All nodes do PME (or no PME at all) */
-        subtract_cycles(wcc, ewcFORCE, ewcPMEMESH);
+        subtract_cycles(wcc, WallCycleCounter::Force, WallCycleCounter::PmeMesh);
      }
      else
      {
          /* The are PME-only nodes */
-        if (wcc[ewcPMEMESH].n > 0)
+        if (wcc[WallCycleCounter::PmeMesh].n > 0)
          {
              /* This must be a PME only node, calculate the Wait + Comm. time */
-            GMX_ASSERT(wcc[ewcRUN].c >= wcc[ewcPMEMESH].c,
+            GMX_ASSERT(wcc[WallCycleCounter::Run].c >= wcc[WallCycleCounter::PmeMesh].c,
                         "Total run ticks must be greater than PME-only ticks");
-            wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH].c;
+            wcc[WallCycleCounter::PmeWaitComm].c =
+                    wcc[WallCycleCounter::Run].c - wcc[WallCycleCounter::PmeMesh].c;
          }
      }
  
      /* Store the cycles in a double buffer for summing */
-    for (i = 0; i < ewcNR; i++)
+    for (auto key : keysOf(wcc))
      {
  #if GMX_MPI
-        cycles_n[i] = static_cast<double>(wcc[i].n);
+        cyclesMainOnNode[key] = static_cast<double>(wcc[key].n);
  #endif
-        cycles[i] = static_cast<double>(wcc[i].c);
+        cyclesMain[key] = static_cast<double>(wcc[key].c);
      }
-    nsum = ewcNR;
-    if (wc->wcsc)
+    if (sc_useCycleSubcounters)
      {
-        for (i = 0; i < ewcsNR; i++)
+        for (auto key : keysOf(wc->wcsc))
          {
  #if GMX_MPI
-            cycles_n[ewcNR + i] = static_cast<double>(wc->wcsc[i].n);
+            cyclesSubOnNode[key] = static_cast<double>(wc->wcsc[key].n);
  #endif
-            cycles[ewcNR + i] = static_cast<double>(wc->wcsc[i].c);
+            cyclesSub[key] = static_cast<double>(wc->wcsc[key].c);
          }
-        nsum += ewcsNR;
      }
  
  #if GMX_MPI
      if (cr->nnodes > 1)
      {
-        double buf[int(ewcNR) + int(ewcsNR) + 1];
+        gmx::EnumerationArray<WallCycleCounter, double>    bufMain;
+        gmx::EnumerationArray<WallCycleSubCounter, double> bufSub;
  
          // TODO this code is used only at the end of the run, so we
          // can just do a simple reduce of haveInvalidCount in
          // wallcycle_print, and avoid bugs
-        cycles_n[nsum] = (wc->haveInvalidCount ? 1 : 0);
+        double haveInvalidCount = (wc->haveInvalidCount ? 1 : 0);
          // TODO Use MPI_Reduce
-        MPI_Allreduce(cycles_n, buf, nsum + 1, MPI_DOUBLE, MPI_MAX, cr->mpi_comm_mysim);
-        for (i = 0; i < ewcNR; i++)
+        MPI_Allreduce(cyclesMainOnNode.data(), bufMain.data(), bufMain.size(), MPI_DOUBLE, MPI_MAX, cr->mpi_comm_mysim);
+        if (sc_useCycleSubcounters)
+        {
+            MPI_Allreduce(cyclesSubOnNode.data(), bufSub.data(), bufSub.size(), MPI_DOUBLE, MPI_MAX, cr->mpi_comm_mysim);
+        }
+        MPI_Allreduce(MPI_IN_PLACE, &haveInvalidCount, 1, MPI_DOUBLE, MPI_MAX, cr->mpi_comm_mysim);
+        for (auto key : keysOf(wcc))
          {
-            wcc[i].n = gmx::roundToInt(buf[i]);
+            wcc[key].n = gmx::roundToInt(bufMain[key]);
          }
-        wc->haveInvalidCount = (buf[nsum] > 0);
-        if (wc->wcsc)
+        wc->haveInvalidCount = (haveInvalidCount > 0);
+        if (sc_useCycleSubcounters)
          {
-            for (i = 0; i < ewcsNR; i++)
+            for (auto key : keysOf(wc->wcsc))
              {
-                wc->wcsc[i].n = gmx::roundToInt(buf[ewcNR + i]);
+                wc->wcsc[key].n = gmx::roundToInt(bufSub[key]);
              }
          }
  
          // TODO Use MPI_Reduce
-        MPI_Allreduce(cycles, cycles_sum.data(), nsum, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
+        MPI_Allreduce(cyclesMain.data(), cycles_sum.data(), cyclesMain.size(), MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
+        if (sc_useCycleSubcounters)
+        {
+            MPI_Allreduce(cyclesSub.data(),
+                          cycles_sum.data() + sc_numWallCycleCounters,
+                          cyclesSub.size(),
+                          MPI_DOUBLE,
+                          MPI_SUM,
+                          cr->mpi_comm_mysim);
+        }
  
-        if (wc->wcc_all != nullptr)
+        if (!wc->wcc_all.empty())
          {
-            double *buf_all, *cyc_all;
+            std::array<double, sc_numWallCycleCountersSquared> cyc_all;
+            std::array<double, sc_numWallCycleCountersSquared> buf_all;
  
-            snew(cyc_all, ewcNR * ewcNR);
-            snew(buf_all, ewcNR * ewcNR);
-            for (i = 0; i < ewcNR * ewcNR; i++)
+            for (int i = 0; i < sc_numWallCycleCountersSquared; i++)
              {
                  cyc_all[i] = wc->wcc_all[i].c;
              }
              // TODO Use MPI_Reduce
-            MPI_Allreduce(cyc_all, buf_all, ewcNR * ewcNR, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
-            for (i = 0; i < ewcNR * ewcNR; i++)
+            MPI_Allreduce(cyc_all.data(),
+                          buf_all.data(),
+                          sc_numWallCycleCountersSquared,
+                          MPI_DOUBLE,
+                          MPI_SUM,
+                          cr->mpi_comm_mysim);
+            for (int i = 0; i < sc_numWallCycleCountersSquared; i++)
              {
                  wc->wcc_all[i].c = static_cast<gmx_cycles_t>(buf_all[i]);
              }
-            sfree(buf_all);
-            sfree(cyc_all);
          }
      }
      else
  #endif
      {
-        for (i = 0; i < nsum; i++)
+        for (auto key : keysOf(cyclesMain))
          {
-            cycles_sum[i] = cycles[i];
+            cycles_sum[static_cast<int>(key)] = cyclesMain[key];
+        }
+        if (sc_useCycleSubcounters)
+        {
+            for (auto key : keysOf(cyclesSub))
+            {
+                const int offset   = static_cast<int>(key) + sc_numWallCycleCounters;
+                cycles_sum[offset] = cyclesSub[key];
+            }
          }
      }
  
@@ -669,14 +680,14 @@ void wallcycle_print(FILE*                            fplog,
                       int                              nth_pp,
                       int                              nth_pme,
                       double                           realtime,
-                     gmx_wallcycle_t                  wc,
+                     gmx_wallcycle*                   wc,
                       const WallcycleCounts&           cyc_sum,
                       const gmx_wallclock_gpu_nbnxn_t* gpu_nbnxn_t,
                       const gmx_wallclock_gpu_pme_t*   gpu_pme_t)
  {
      double      tot, tot_for_pp, tot_for_rest, tot_cpu_overlap, gpu_cpu_ratio;
      double      c2t, c2t_pp, c2t_pme = 0;
-    int         i, j, npp, nth_tot;
+    int         npp, nth_tot;
      char        buf[STRLEN];
      const char* hline =
              "-----------------------------------------------------------------------------";
@@ -699,7 +710,7 @@ void wallcycle_print(FILE*                            fplog,
      /* When using PME-only nodes, the next line is valid for both
         PP-only and PME-only nodes because they started ewcRUN at the
         same time. */
-    tot        = cyc_sum[ewcRUN];
+    tot        = cyc_sum[static_cast<int>(WallCycleCounter::Run)];
      tot_for_pp = 0;
  
      if (tot <= 0.0)
@@ -749,44 +760,63 @@ void wallcycle_print(FILE*                            fplog,
      print_header(fplog, npp, nth_pp, npme, nth_pme);
  
      fprintf(fplog, "%s\n", hline);
-    for (i = ewcPPDURINGPME + 1; i < ewcNR; i++)
+    gmx::EnumerationWrapper<WallCycleCounter> iter;
+    for (auto key = gmx::EnumerationIterator<WallCycleCounter>(WallCycleCounter::Domdec);
+         key != iter.end();
+         ++key)
      {
-        if (is_pme_subcounter(i))
+
+        if (is_pme_subcounter(*key))
          {
              /* Do not count these at all */
          }
-        else if (npme > 0 && is_pme_counter(i))
+        else if (npme > 0 && is_pme_counter(*key))
          {
              /* Print timing information for PME-only nodes, but add an
               * asterisk so the reader of the table can know that the
               * walltimes are not meant to add up. The asterisk still
               * fits in the required maximum of 19 characters. */
-            char buffer[STRLEN];
-            snprintf(buffer, STRLEN, "%s *", wcn[i]);
-            print_cycles(fplog, c2t_pme, buffer, npme, nth_pme, wc->wcc[i].n, cyc_sum[i], tot);
+            std::string message = gmx::formatString("%s *", enumValuetoString(*key));
+            print_cycles(fplog,
+                         c2t_pme,
+                         message.c_str(),
+                         npme,
+                         nth_pme,
+                         wc->wcc[*key].n,
+                         cyc_sum[static_cast<int>(*key)],
+                         tot);
          }
          else
          {
              /* Print timing information when it is for a PP or PP+PME
                 node */
-            print_cycles(fplog, c2t_pp, wcn[i], npp, nth_pp, wc->wcc[i].n, cyc_sum[i], tot);
-            tot_for_pp += cyc_sum[i];
+            print_cycles(fplog,
+                         c2t_pp,
+                         enumValuetoString(*key),
+                         npp,
+                         nth_pp,
+                         wc->wcc[*key].n,
+                         cyc_sum[static_cast<int>(*key)],
+                         tot);
+            tot_for_pp += cyc_sum[static_cast<int>(*key)];
          }
      }
-    if (wc->wcc_all != nullptr)
+    if (!wc->wcc_all.empty())
      {
-        for (i = 0; i < ewcNR; i++)
+        for (auto i : keysOf(wc->wcc))
          {
-            for (j = 0; j < ewcNR; j++)
+            const int countI = static_cast<int>(i);
+            for (auto j : keysOf(wc->wcc))
              {
-                snprintf(buf, 20, "%-9.9s %-9.9s", wcn[i], wcn[j]);
+                const int countJ = static_cast<int>(j);
+                snprintf(buf, 20, "%-9.9s %-9.9s", enumValuetoString(i), enumValuetoString(j));
                  print_cycles(fplog,
                               c2t_pp,
                               buf,
                               npp,
                               nth_pp,
-                             wc->wcc_all[i * ewcNR + j].n,
-                             wc->wcc_all[i * ewcNR + j].c,
+                             wc->wcc_all[countI * sc_numWallCycleCounters + countJ].n,
+                             wc->wcc_all[countI * sc_numWallCycleCounters + countJ].c,
                               tot);
              }
          }
@@ -806,16 +836,18 @@ void wallcycle_print(FILE*                            fplog,
                  hline);
      }
  
-    if (wc->wcc[ewcPMEMESH].n > 0)
+    if (wc->wcc[WallCycleCounter::PmeMesh].n > 0)
      {
          // A workaround to not print breakdown when no subcounters were recorded.
          // TODO: figure out and record PME GPU counters (what to do with the waiting ones?)
-        std::vector<int> validPmeSubcounterIndices;
-        for (i = ewcPPDURINGPME + 1; i < ewcNR; i++)
+        std::vector<WallCycleCounter> validPmeSubcounterIndices;
+        for (auto key = gmx::EnumerationIterator<WallCycleCounter>(WallCycleCounter::Domdec);
+             key != iter.end();
+             key++)
          {
-            if (is_pme_subcounter(i) && wc->wcc[i].n > 0)
+            if (is_pme_subcounter(*key) && wc->wcc[*key].n > 0)
              {
-                validPmeSubcounterIndices.push_back(i);
+                validPmeSubcounterIndices.push_back(*key);
              }
          }
  
@@ -827,24 +859,31 @@ void wallcycle_print(FILE*                            fplog,
              {
                  print_cycles(fplog,
                               npme > 0 ? c2t_pme : c2t_pp,
-                             wcn[i],
+                             enumValuetoString(i),
                               npme > 0 ? npme : npp,
                               nth_pme,
                               wc->wcc[i].n,
-                             cyc_sum[i],
+                             cyc_sum[static_cast<int>(i)],
                               tot);
              }
              fprintf(fplog, "%s\n", hline);
          }
      }
  
-    if (sc_useCycleSubcounters && wc->wcsc)
+    if (sc_useCycleSubcounters)
      {
          fprintf(fplog, " Breakdown of PP computation\n");
          fprintf(fplog, "%s\n", hline);
-        for (i = 0; i < ewcsNR; i++)
+        for (auto key : keysOf(wc->wcsc))
          {
-            print_cycles(fplog, c2t_pp, wcsn[i], npp, nth_pp, wc->wcsc[i].n, cyc_sum[ewcNR + i], tot);
+            print_cycles(fplog,
+                         c2t_pp,
+                         enumValuetoString(key),
+                         npp,
+                         nth_pp,
+                         wc->wcsc[key].n,
+                         cyc_sum[sc_numWallCycleCounters + static_cast<int>(key)],
+                         tot);
          }
          fprintf(fplog, "%s\n", hline);
      }
@@ -865,19 +904,19 @@ void wallcycle_print(FILE*                            fplog,
          tot_gpu += gpu_nbnxn_t->pl_h2d_t + gpu_nbnxn_t->nb_h2d_t + gpu_nbnxn_t->nb_d2h_t;
  
          /* add up the kernel timings */
-        for (i = 0; i < 2; i++)
+        for (int i = 0; i < 2; i++)
          {
-            for (j = 0; j < 2; j++)
+            for (int j = 0; j < 2; j++)
              {
                  tot_gpu += gpu_nbnxn_t->ktime[i][j].t;
              }
          }
          tot_gpu += gpu_nbnxn_t->pruneTime.t;
  
-        tot_cpu_overlap = wc->wcc[ewcFORCE].c;
-        if (wc->wcc[ewcPMEMESH].n > 0)
+        tot_cpu_overlap = wc->wcc[WallCycleCounter::Force].c;
+        if (wc->wcc[WallCycleCounter::PmeMesh].n > 0)
          {
-            tot_cpu_overlap += wc->wcc[ewcPMEMESH].c;
+            tot_cpu_overlap += wc->wcc[WallCycleCounter::PmeMesh].c;
          }
          tot_cpu_overlap *= realtime * 1000 / tot; /* convert s to ms */
  
@@ -889,9 +928,9 @@ void wallcycle_print(FILE*                            fplog,
          print_gputimes(fplog, "Pair list H2D", gpu_nbnxn_t->pl_h2d_c, gpu_nbnxn_t->pl_h2d_t, tot_gpu);
          print_gputimes(fplog, "X / q H2D", gpu_nbnxn_t->nb_c, gpu_nbnxn_t->nb_h2d_t, tot_gpu);
  
-        for (i = 0; i < 2; i++)
+        for (int i = 0; i < 2; i++)
          {
-            for (j = 0; j < 2; j++)
+            for (int j = 0; j < 2; j++)
              {
                  if (gpu_nbnxn_t->ktime[i][j].c)
                  {
@@ -939,18 +978,18 @@ void wallcycle_print(FILE*                            fplog,
              fprintf(fplog, "%s\n", hline);
          }
          gpu_cpu_ratio = tot_gpu / tot_cpu_overlap;
-        if (gpu_nbnxn_t->nb_c > 0 && wc->wcc[ewcFORCE].n > 0)
+        if (gpu_nbnxn_t->nb_c > 0 && wc->wcc[WallCycleCounter::Force].n > 0)
          {
              fprintf(fplog,
                      "\nAverage per-step force GPU/CPU evaluation time ratio: %.3f ms/%.3f ms = "
                      "%.3f\n",
                      tot_gpu / gpu_nbnxn_t->nb_c,
-                    tot_cpu_overlap / wc->wcc[ewcFORCE].n,
+                    tot_cpu_overlap / wc->wcc[WallCycleCounter::Force].n,
                      gpu_cpu_ratio);
          }
  
          /* only print notes related to CPU-GPU load balance with PME */
-        if (wc->wcc[ewcPMEMESH].n > 0)
+        if (wc->wcc[WallCycleCounter::PmeMesh].n > 0)
          {
              fprintf(fplog, "For optimal resource utilization this ratio should be close to 1\n");
  
@@ -1011,10 +1050,12 @@ void wallcycle_print(FILE*                            fplog,
                          "call, so timings are not those of real runs.");
      }
  
-    if (wc->wcc[ewcNB_XF_BUF_OPS].n > 0 && (cyc_sum[ewcDOMDEC] > tot * 0.1 || cyc_sum[ewcNS] > tot * 0.1))
+    if (wc->wcc[WallCycleCounter::NbXFBufOps].n > 0
+        && (cyc_sum[static_cast<int>(WallCycleCounter::Domdec)] > tot * 0.1
+            || cyc_sum[static_cast<int>(WallCycleCounter::NS)] > tot * 0.1))
      {
          /* Only the sim master calls this function, so always print to stderr */
-        if (wc->wcc[ewcDOMDEC].n == 0)
+        if (wc->wcc[WallCycleCounter::Domdec].n == 0)
          {
              GMX_LOG(mdlog.warning)
                      .asParagraph()
@@ -1022,7 +1063,7 @@ void wallcycle_print(FILE*                            fplog,
                              "NOTE: %d %% of the run time was spent in pair search,\n"
                              "      you might want to increase nstlist (this has no effect on "
                              "accuracy)\n",
-                            gmx::roundToInt(100 * cyc_sum[ewcNS] / tot));
+                            gmx::roundToInt(100 * cyc_sum[static_cast<int>(WallCycleCounter::NS)] / tot));
          }
          else
          {
@@ -1033,38 +1074,36 @@ void wallcycle_print(FILE*                            fplog,
                              "      %d %% of the run time was spent in pair search,\n"
                              "      you might want to increase nstlist (this has no effect on "
                              "accuracy)\n",
-                            gmx::roundToInt(100 * cyc_sum[ewcDOMDEC] / tot),
-                            gmx::roundToInt(100 * cyc_sum[ewcNS] / tot));
+                            gmx::roundToInt(100 * cyc_sum[static_cast<int>(WallCycleCounter::Domdec)] / tot),
+                            gmx::roundToInt(100 * cyc_sum[static_cast<int>(WallCycleCounter::NS)] / tot));
          }
      }
  
-    if (cyc_sum[ewcMoveE] > tot * 0.05)
+    if (cyc_sum[static_cast<int>(WallCycleCounter::MoveE)] > tot * 0.05)
      {
          GMX_LOG(mdlog.warning)
                  .asParagraph()
                  .appendTextFormatted(
                          "NOTE: %d %% of the run time was spent communicating energies,\n"
                          "      you might want to increase some nst* mdp options\n",
-                        gmx::roundToInt(100 * cyc_sum[ewcMoveE] / tot));
+                        gmx::roundToInt(100 * cyc_sum[static_cast<int>(WallCycleCounter::MoveE)] / tot));
      }
  }
  
-extern int64_t wcycle_get_reset_counters(gmx_wallcycle_t wc)
+int64_t wcycle_get_reset_counters(gmx_wallcycle* wc)
  {
      if (wc == nullptr)
      {
          return -1;
      }
-
      return wc->reset_counters;
  }
  
-extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, int64_t reset_counters)
+void wcycle_set_reset_counters(gmx_wallcycle* wc, int64_t reset_counters)
  {
      if (wc == nullptr)
      {
          return;
      }
-
      wc->reset_counters = reset_counters;
  }
diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h

index 133b90216cb37f5b57fb1ecae18863672cc5d229..f68035f2a53db83c78bcbeb93318c2f65594e9b4 100644 (file)
--- a/src/gromacs/timing/wallcycle.h
+++ b/src/gromacs/timing/wallcycle.h
@@ -43,12 +43,24 @@
  
  #include <stdio.h>
  
+#include <array>
+#include <memory>
+#include <vector>
+
  #include "gromacs/timing/cyclecounter.h"
  #include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/enumerationhelpers.h"
+
+#ifndef DEBUG_WCYCLE
+/*! \brief Enables consistency checking for the counters.
+ *
+ * If the macro is set to 1, code checks if you stop a counter different from the last
+ * one that was opened and if you do nest too deep.
+ */
+#    define DEBUG_WCYCLE 0
+#endif
  
-typedef struct gmx_wallcycle* gmx_wallcycle_t;
  struct t_commrec;
-static constexpr gmx_wallcycle* nullWallcycle = nullptr;
  
  #ifndef DEBUG_WCYCLE
  /*! \brief Enables consistency checking for the counters.
@@ -59,95 +71,98 @@ static constexpr gmx_wallcycle* nullWallcycle = nullptr;
  #    define DEBUG_WCYCLE 0
  #endif
  
-enum
+enum class WallCycleCounter : int
  {
-    ewcRUN,
-    ewcSTEP,
-    ewcPPDURINGPME,
-    ewcDOMDEC,
-    ewcDDCOMMLOAD,
-    ewcDDCOMMBOUND,
-    ewcVSITECONSTR,
-    ewcPP_PMESENDX,
-    ewcNS,
-    ewcLAUNCH_GPU,
-    ewcMOVEX,
-    ewcFORCE,
-    ewcMOVEF,
-    ewcPMEMESH,
-    ewcPME_REDISTXF,
-    ewcPME_SPREAD,
-    ewcPME_GATHER,
-    ewcPME_FFT,
-    ewcPME_FFTCOMM,
-    ewcLJPME,
-    ewcPME_SOLVE,
-    ewcPMEWAITCOMM,
-    ewcPP_PMEWAITRECVF,
-    ewcWAIT_GPU_PME_SPREAD,
-    ewcPME_FFT_MIXED_MODE,
-    ewcPME_SOLVE_MIXED_MODE,
-    ewcWAIT_GPU_PME_GATHER,
-    ewcWAIT_GPU_BONDED,
-    ewcPME_GPU_F_REDUCTION,
-    ewcWAIT_GPU_NB_NL,
-    ewcWAIT_GPU_NB_L,
-    ewcWAIT_GPU_STATE_PROPAGATOR_DATA,
-    ewcNB_XF_BUF_OPS,
-    ewcVSITESPREAD,
-    ewcPULLPOT,
-    ewcAWH,
-    ewcTRAJ,
-    ewcUPDATE,
-    ewcCONSTR,
-    ewcMoveE,
-    ewcROT,
-    ewcROTadd,
-    ewcSWAP,
-    ewcIMD,
-    ewcTEST,
-    ewcNR
+    Run,
+    Step,
+    PpDuringPme,
+    Domdec,
+    DDCommLoad,
+    DDCommBound,
+    VsiteConstr,
+    PpPmeSendX,
+    NS,
+    LaunchGpu,
+    MoveX,
+    Force,
+    MoveF,
+    PmeMesh,
+    PmeRedistXF,
+    PmeSpread,
+    PmeGather,
+    PmeFft,
+    PmeFftComm,
+    LJPme,
+    PmeSolve,
+    PmeWaitComm,
+    PpPmeWaitRecvF,
+    WaitGpuPmeSpread,
+    PmeFftMixedMode,
+    PmeSolveMixedMode,
+    WaitGpuPmeGather,
+    WaitGpuBonded,
+    PmeGpuFReduction,
+    WaitGpuNbNL,
+    WaitGpuNbL,
+    WaitGpuStatePropagatorData,
+    NbXFBufOps,
+    VsiteSpread,
+    PullPot,
+    Awh,
+    Traj,
+    Update,
+    Constr,
+    MoveE,
+    Rot,
+    RotAdd,
+    Swap,
+    Imd,
+    Test,
+    Count
  };
  
-enum
+enum class WallCycleSubCounter : int
  {
-    ewcsDD_REDIST,
-    ewcsDD_GRID,
-    ewcsDD_SETUPCOMM,
-    ewcsDD_MAKETOP,
-    ewcsDD_MAKECONSTR,
-    ewcsDD_TOPOTHER,
-    ewcsDD_GPU,
-    ewcsNBS_GRID_LOCAL,
-    ewcsNBS_GRID_NONLOCAL,
-    ewcsNBS_SEARCH_LOCAL,
-    ewcsNBS_SEARCH_NONLOCAL,
-    ewcsLISTED,
-    ewcsLISTED_FEP,
-    ewcsRESTRAINTS,
-    ewcsLISTED_BUF_OPS,
-    ewcsNONBONDED_PRUNING,
-    ewcsNONBONDED_KERNEL,
-    ewcsNONBONDED_CLEAR,
-    ewcsNONBONDED_FEP,
-    ewcsLAUNCH_GPU_NONBONDED,
-    ewcsLAUNCH_GPU_BONDED,
-    ewcsLAUNCH_GPU_PME,
-    ewcsLAUNCH_STATE_PROPAGATOR_DATA,
-    ewcsEWALD_CORRECTION,
-    ewcsNB_X_BUF_OPS,
-    ewcsNB_F_BUF_OPS,
-    ewcsCLEAR_FORCE_BUFFER,
-    ewcsLAUNCH_GPU_NB_X_BUF_OPS,
-    ewcsLAUNCH_GPU_NB_F_BUF_OPS,
-    ewcsLAUNCH_GPU_MOVEX,
-    ewcsLAUNCH_GPU_MOVEF,
-    ewcsLAUNCH_GPU_UPDATE_CONSTRAIN,
-    ewcsTEST,
-    ewcsNR
+    DDRedist,
+    DDGrid,
+    DDSetupComm,
+    DDMakeTop,
+    DDMakeConstr,
+    DDTopOther,
+    DDGpu,
+    NBSGridLocal,
+    NBSGridNonLocal,
+    NBSSearchLocal,
+    NBSSearchNonLocal,
+    Listed,
+    ListedFep,
+    Restraints,
+    ListedBufOps,
+    NonbondedPruning,
+    NonbondedKernel,
+    NonbondedClear,
+    NonbondedFep,
+    LaunchGpuNonBonded,
+    LaunchGpuBonded,
+    LaunchGpuPme,
+    LaunchStatePropagatorData,
+    EwaldCorrection,
+    NBXBufOps,
+    NBFBufOps,
+    ClearForceBuffer,
+    LaunchGpuNBXBufOps,
+    LaunchGpuNBFBufOps,
+    LaunchGpuMoveX,
+    LaunchGpuMoveF,
+    LaunchGpuUpdateConstrain,
+    Test,
+    Count
  };
  
-static constexpr const bool sc_useCycleSubcounters = GMX_CYCLE_SUBCOUNTERS;
+static constexpr int sc_numWallCycleCounters        = static_cast<int>(WallCycleCounter::Count);
+static constexpr int sc_numWallCycleSubCounters     = static_cast<int>(WallCycleSubCounter::Count);
+static constexpr int sc_numWallCycleCountersSquared = sc_numWallCycleCounters * sc_numWallCycleCounters;
+static constexpr bool sc_useCycleSubcounters        = GMX_CYCLE_SUBCOUNTERS;
  
  struct wallcc_t
  {
@@ -163,57 +178,53 @@ static constexpr int c_MaxWallCycleDepth = 6;
  
  struct gmx_wallcycle
  {
-    wallcc_t* wcc;
+    gmx::EnumerationArray<WallCycleCounter, wallcc_t> wcc;
      /* did we detect one or more invalid cycle counts */
      bool haveInvalidCount;
      /* variables for testing/debugging */
-    bool      wc_barrier;
-    wallcc_t* wcc_all;
-    int       wc_depth;
+    bool                  wc_barrier;
+    std::vector<wallcc_t> wcc_all;
+    int                   wc_depth;
  #if DEBUG_WCYCLE
-    int* counterlist;
-    int  count_depth;
-    bool isMasterRank;
+    std::array<WallCycleCounter, c_MaxWallCycleDepth> counterlist;
+    int                                               count_depth;
+    bool                                              isMasterRank;
  #endif
-    int              ewc_prev;
-    gmx_cycles_t     cycle_prev;
-    int64_t          reset_counters;
-    const t_commrec* cr;
-    wallcc_t*        wcsc;
+    WallCycleCounter                                     ewc_prev;
+    gmx_cycles_t                                         cycle_prev;
+    int64_t                                              reset_counters;
+    const t_commrec*                                     cr;
+    gmx::EnumerationArray<WallCycleSubCounter, wallcc_t> wcsc;
  };
  
-//! Returns whether cycle counting is supported.
+//! Returns if cycle counting is supported
  bool wallcycle_have_counter();
  
-/*! \brief
- * Returns a wallcycle datastructure.
- *
- * If cycle counting is not supported, returns nullptr instead.
- */
-gmx_wallcycle_t wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr);
-
-//! Cleans up wallcycle structure.
-void wallcycle_destroy(gmx_wallcycle_t wc);
+//! Returns the wall cycle structure.
+std::unique_ptr<gmx_wallcycle> wallcycle_init(FILE* fplog, int resetstep, const t_commrec* cr);
  
  //! Adds custom barrier for wallcycle counting.
  void wallcycleBarrier(gmx_wallcycle* wc);
  
-inline void wallcycle_all_start(gmx_wallcycle* wc, int ewc, gmx_cycles_t cycle)
+void wallcycle_sub_get(gmx_wallcycle* wc, WallCycleSubCounter ewcs, int* n, double* c);
+/* Returns the cumulative count and sub cycle count for ewcs */
+
+inline void wallcycle_all_start(gmx_wallcycle* wc, WallCycleCounter ewc, gmx_cycles_t cycle)
  {
      wc->ewc_prev   = ewc;
      wc->cycle_prev = cycle;
  }
  
-inline void wallcycle_all_stop(gmx_wallcycle* wc, int ewc, gmx_cycles_t cycle)
+inline void wallcycle_all_stop(gmx_wallcycle* wc, WallCycleCounter ewc, gmx_cycles_t cycle)
  {
-    const int prev    = wc->ewc_prev;
-    const int current = ewc;
-    wc->wcc_all[prev * ewcNR + current].n += 1;
-    wc->wcc_all[prev * ewcNR + current].c += cycle - wc->cycle_prev;
+    const int prev    = static_cast<int>(wc->ewc_prev);
+    const int current = static_cast<int>(ewc);
+    wc->wcc_all[prev * sc_numWallCycleCounters + current].n += 1;
+    wc->wcc_all[prev * sc_numWallCycleCounters + current].c += cycle - wc->cycle_prev;
  }
  
-//! Starts the cycle counter for \c ewc (and increases the call count).
-inline void wallcycle_start(gmx_wallcycle_t wc, int ewc)
+//! Starts the cycle counter (and increases the call count)
+inline void wallcycle_start(gmx_wallcycle* wc, WallCycleCounter ewc)
  {
      if (wc == nullptr)
      {
@@ -227,10 +238,10 @@ inline void wallcycle_start(gmx_wallcycle_t wc, int ewc)
  #endif
      gmx_cycles_t cycle = gmx_cycles_read();
      wc->wcc[ewc].start = cycle;
-    if (wc->wcc_all)
+    if (!wc->wcc_all.empty())
      {
          wc->wc_depth++;
-        if (ewc == ewcRUN)
+        if (ewc == WallCycleCounter::Run)
          {
              wallcycle_all_start(wc, ewc, cycle);
          }
@@ -241,8 +252,8 @@ inline void wallcycle_start(gmx_wallcycle_t wc, int ewc)
      }
  }
  
-//! Starts the cycle counter for \c ewc without increasing the call count.
-inline void wallcycle_start_nocount(gmx_wallcycle_t wc, int ewc)
+//! Starts the cycle counter without increasing the call count
+inline void wallcycle_start_nocount(gmx_wallcycle* wc, WallCycleCounter ewc)
  {
      if (wc == nullptr)
      {
@@ -251,8 +262,8 @@ inline void wallcycle_start_nocount(gmx_wallcycle_t wc, int ewc)
      wc->wcc[ewc].n++;
  }
  
-//! Stop the cycle count for \c ewc, returns the last cycle count.
-inline double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
+//! Stop the cycle count for ewc , returns the last cycle count
+inline double wallcycle_stop(gmx_wallcycle* wc, WallCycleCounter ewc)
  {
      gmx_cycles_t cycle, last;
  
@@ -287,10 +298,10 @@ inline double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
      }
      wc->wcc[ewc].c += last;
      wc->wcc[ewc].n++;
-    if (wc->wcc_all)
+    if (!wc->wcc_all.empty())
      {
          wc->wc_depth--;
-        if (ewc == ewcRUN)
+        if (ewc == WallCycleCounter::Run)
          {
              wallcycle_all_stop(wc, ewc, cycle);
          }
@@ -303,8 +314,8 @@ inline double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
      return last;
  }
  
-//! Only increment call count for \c ewc by one.
-inline void wallcycle_increment_event_count(gmx_wallcycle_t wc, int ewc)
+//! Only increment call count for ewc by one
+inline void wallcycle_increment_event_count(gmx_wallcycle* wc, WallCycleCounter ewc)
  {
      if (wc == nullptr)
      {
@@ -313,26 +324,23 @@ inline void wallcycle_increment_event_count(gmx_wallcycle_t wc, int ewc)
      wc->wcc[ewc].n++;
  }
  
-//! Returns the cumulative count and cycle count for \c ewc.
-void wallcycle_get(gmx_wallcycle_t wc, int ewc, int* n, double* c);
-
-//! Returns the cumulative count and sub cycle count for \c ewcs.
-void wallcycle_sub_get(gmx_wallcycle_t wc, int ewcs, int* n, double* c);
+//! Returns the cumulative count and cycle count for ewc
+void wallcycle_get(gmx_wallcycle* wc, WallCycleCounter ewc, int* n, double* c);
  
-//! Resets all cycle counters to zero.
-void wallcycle_reset_all(gmx_wallcycle_t wc);
+//! Resets all cycle counters to zero
+void wallcycle_reset_all(gmx_wallcycle* wc);
  
-//! Scale the cycle counts to reflect how many threads run for that number of cycles.
-void wallcycle_scale_by_num_threads(gmx_wallcycle_t wc, bool isPmeRank, int nthreads_pp, int nthreads_pme);
+//! Scale the cycle counts to reflect how many threads run for that number of cycles
+void wallcycle_scale_by_num_threads(gmx_wallcycle* wc, bool isPmeRank, int nthreads_pp, int nthreads_pme);
  
-//! Return reset_counters.
-int64_t wcycle_get_reset_counters(gmx_wallcycle_t wc);
+//! Return reset_counters from wc struct
+int64_t wcycle_get_reset_counters(gmx_wallcycle* wc);
  
-//! Set reset_counters.
-void wcycle_set_reset_counters(gmx_wallcycle_t wc, int64_t reset_counters);
+//! Set reset_counters
+void wcycle_set_reset_counters(gmx_wallcycle* wc, int64_t reset_counters);
  
-//! Set the start sub cycle count for \c ewcs.
-inline void wallcycle_sub_start(gmx_wallcycle_t wc, int ewcs)
+//! Set the start sub cycle count for ewcs
+inline void wallcycle_sub_start(gmx_wallcycle* wc, WallCycleSubCounter ewcs)
  {
      if (sc_useCycleSubcounters && wc != nullptr)
      {
@@ -340,8 +348,8 @@ inline void wallcycle_sub_start(gmx_wallcycle_t wc, int ewcs)
      }
  }
  
-//! Set the start sub cycle count for \c ewcs without increasing the call count.
-inline void wallcycle_sub_start_nocount(gmx_wallcycle_t wc, int ewcs)
+//! Set the start sub cycle count for ewcs without increasing the call count
+inline void wallcycle_sub_start_nocount(gmx_wallcycle* wc, WallCycleSubCounter ewcs)
  {
      if (sc_useCycleSubcounters && wc != nullptr)
      {
@@ -349,8 +357,8 @@ inline void wallcycle_sub_start_nocount(gmx_wallcycle_t wc, int ewcs)
      }
  }
  
-//! Stop the sub cycle count for \c ewcs.
-inline void wallcycle_sub_stop(gmx_wallcycle_t wc, int ewcs)
+//! Stop the sub cycle count for ewcs
+inline void wallcycle_sub_stop(gmx_wallcycle* wc, WallCycleSubCounter ewcs)
  {
      if (sc_useCycleSubcounters && wc != nullptr)
      {
diff --git a/src/gromacs/timing/wallcyclereporting.h b/src/gromacs/timing/wallcyclereporting.h

index 1bf3096fea4871a5e8d23d8565f3373123be4557..950b5b8af660596426f5fb7cc86763f6182d16aa 100644 (file)
--- a/src/gromacs/timing/wallcyclereporting.h
+++ b/src/gromacs/timing/wallcyclereporting.h
@@ -4,7 +4,7 @@
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2008, The GROMACS development team.
   * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -45,6 +45,7 @@
  
  #include <array>
  
+#include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/basedefinitions.h"
  
  struct t_commrec;
@@ -54,14 +55,13 @@ namespace gmx
  class MDLogger;
  }
  
-typedef struct gmx_wallcycle* gmx_wallcycle_t;
  struct gmx_wallclock_gpu_nbnxn_t;
  struct gmx_wallclock_gpu_pme_t;
  
-typedef std::array<double, int(ewcNR) + int(ewcsNR)> WallcycleCounts;
+using WallcycleCounts = std::array<double, sc_numWallCycleCounters + sc_numWallCycleSubCounters>;
  /* Convenience typedef */
  
-WallcycleCounts wallcycle_sum(const t_commrec* cr, gmx_wallcycle_t wc);
+WallcycleCounts wallcycle_sum(const t_commrec* cr, gmx_wallcycle* wc);
  /* Return a vector of the sum of cycle counts over the nodes in
     cr->mpi_comm_mysim. */
  
@@ -72,7 +72,7 @@ void wallcycle_print(FILE*                            fplog,
                       int                              nth_pp,
                       int                              nth_pme,
                       double                           realtime,
-                     gmx_wallcycle_t                  wc,
+                     gmx_wallcycle*                   wc,
                       const WallcycleCounts&           cyc_sum,
                       const gmx_wallclock_gpu_nbnxn_t* gpu_nbnxn_t,
                       const gmx_wallclock_gpu_pme_t*   gpu_pme_t);
author	Paul Bauer <paul.bauer.q@gmail.com>
	Wed, 14 Apr 2021 13:52:51 +0000 (13:52 +0000)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 14 Apr 2021 13:52:51 +0000 (13:52 +0000)
api/nblib/gmxsetup.cpp		patch \| blob \| history
src/gromacs/applied_forces/awh/awh.cpp		patch \| blob \| history
src/gromacs/domdec/cellsizes.cpp		patch \| blob \| history
src/gromacs/domdec/cellsizes.h		patch \| blob \| history
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange_impl.cu		patch \| blob \| history
src/gromacs/domdec/partition.cpp		patch \| blob \| history
src/gromacs/ewald/pme.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu.cpp		patch \| blob \| history
src/gromacs/ewald/pme_load_balancing.cpp		patch \| blob \| history
src/gromacs/ewald/pme_load_balancing.h		patch \| blob \| history
src/gromacs/ewald/pme_only.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp.cpp		patch \| blob \| history
src/gromacs/fft/fft5d.cpp		patch \| blob \| history
src/gromacs/fft/fft5d.h		patch \| blob \| history
src/gromacs/fft/parallel_3dfft.cpp		patch \| blob \| history
src/gromacs/fft/parallel_3dfft.h		patch \| blob \| history
src/gromacs/imd/imd.cpp		patch \| blob \| history
src/gromacs/listed_forces/gpubonded_impl.cu		patch \| blob \| history
src/gromacs/listed_forces/gpubondedkernels.cu		patch \| blob \| history
src/gromacs/listed_forces/listed_forces.cpp		patch \| blob \| history
src/gromacs/listed_forces/position_restraints.cpp		patch \| blob \| history
src/gromacs/mdlib/constr.cpp		patch \| blob \| history
src/gromacs/mdlib/force.cpp		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.cu		patch \| blob \| history
src/gromacs/mdlib/md_support.cpp		patch \| blob \| history
src/gromacs/mdlib/md_support.h		patch \| blob \| history
src/gromacs/mdlib/mdoutf.cpp		patch \| blob \| history
src/gromacs/mdlib/mdoutf.h		patch \| blob \| history
src/gromacs/mdlib/resethandler.cpp		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdlib/trajectory_writing.cpp		patch \| blob \| history
src/gromacs/mdlib/update.cpp		patch \| blob \| history
src/gromacs/mdlib/update.h		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cu		patch \| blob \| history
src/gromacs/mdlib/update_vv.cpp		patch \| blob \| history
src/gromacs/mdlib/vsite.cpp		patch \| blob \| history
src/gromacs/mdrun/md.cpp		patch \| blob \| history
src/gromacs/mdrun/mimic.cpp		patch \| blob \| history
src/gromacs/mdrun/minimize.cpp		patch \| blob \| history
src/gromacs/mdrun/rerun.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdrun/shellfc.cpp		patch \| blob \| history
src/gromacs/mdrun/shellfc.h		patch \| blob \| history
src/gromacs/mdrun/tpi.cpp		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp		patch \| blob \| history
src/gromacs/modularsimulator/propagator.cpp		patch \| blob \| history
src/gromacs/modularsimulator/simulatoralgorithm.cpp		patch \| blob \| history
src/gromacs/modularsimulator/statepropagatordata.cpp		patch \| blob \| history
src/gromacs/nbnxm/gpu_common.h		patch \| blob \| history
src/gromacs/nbnxm/kerneldispatch.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.cpp		patch \| blob \| history
src/gromacs/nbnxm/prunekerneldispatch.cpp		patch \| blob \| history
src/gromacs/swap/swapcoords.cpp		patch \| blob \| history
src/gromacs/timing/tests/timing.cpp		patch \| blob \| history
src/gromacs/timing/wallcycle.cpp		patch \| blob \| history
src/gromacs/timing/wallcycle.h		patch \| blob \| history
src/gromacs/timing/wallcyclereporting.h		patch \| blob \| history