Modernize wallcycle counting
[alexxy/gromacs.git] / src / gromacs / mdlib / sim_util.cpp
index 3da4bc541c493761f074a58523c5a34676cc65db..105245ebfdd6c037b8d3c7c331507bdfb11d5e07 100644 (file)
@@ -193,7 +193,7 @@ static void pull_potential_wrapper(const t_commrec*               cr,
                                    pull_t*                        pull_work,
                                    const real*                    lambda,
                                    double                         t,
-                                   gmx_wallcycle_t                wcycle)
+                                   gmx_wallcycle                wcycle)
 {
     t_pbc pbc;
     real  dvdl;
@@ -201,7 +201,7 @@ static void pull_potential_wrapper(const t_commrec*               cr,
     /* Calculate the center of mass forces, this requires communication,
      * which is why pull_potential is called close to other communication.
      */
-    wallcycle_start(wcycle, ewcPULLPOT);
+    wallcycle_start(wcycle, WallCycleCounter::PullPot);
     set_pbc(&pbc, ir.pbcType, box);
     dvdl = 0;
     enerd->term[F_COM_PULL] +=
@@ -215,7 +215,7 @@ static void pull_potential_wrapper(const t_commrec*               cr,
                            force,
                            &dvdl);
     enerd->dvdl_lin[FreeEnergyPerturbationCouplingType::Restraint] += dvdl;
-    wallcycle_stop(wcycle, ewcPULLPOT);
+    wallcycle_stop(wcycle, WallCycleCounter::PullPot);
 }
 
 static void pme_receive_force_ener(t_forcerec*           fr,
@@ -224,18 +224,18 @@ static void pme_receive_force_ener(t_forcerec*           fr,
                                    gmx_enerdata_t*       enerd,
                                    bool                  useGpuPmePpComms,
                                    bool                  receivePmeForceToGpu,
-                                   gmx_wallcycle_t       wcycle)
+                                   gmx_wallcycle       wcycle)
 {
     real  e_q, e_lj, dvdl_q, dvdl_lj;
     float cycles_ppdpme, cycles_seppme;
 
-    cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
+    cycles_ppdpme = wallcycle_stop(wcycle, WallCycleCounter::PpDuringPme);
     dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
 
     /* In case of node-splitting, the PP nodes receive the long-range
      * forces, virial and energy from the PME nodes here.
      */
-    wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
+    wallcycle_start(wcycle, WallCycleCounter::PpPmeWaitRecvF);
     dvdl_q  = 0;
     dvdl_lj = 0;
     gmx_pme_receive_f(fr->pmePpCommGpu.get(),
@@ -257,7 +257,7 @@ static void pme_receive_force_ener(t_forcerec*           fr,
     {
         dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
     }
-    wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
+    wallcycle_stop(wcycle, WallCycleCounter::PpPmeWaitRecvF);
 }
 
 static void print_large_forces(FILE*                fp,
@@ -302,7 +302,7 @@ static void print_large_forces(FILE*                fp,
 
 //! When necessary, spreads forces on vsites and computes the virial for \p forceOutputs->forceWithShiftForces()
 static void postProcessForceWithShiftForces(t_nrnb*                   nrnb,
-                                            gmx_wallcycle_t           wcycle,
+                                            gmx_wallcycle           wcycle,
                                             const matrix              box,
                                             ArrayRef<const RVec>      x,
                                             ForceOutputs*             forceOutputs,
@@ -342,7 +342,7 @@ static void postProcessForceWithShiftForces(t_nrnb*                   nrnb,
 static void postProcessForces(const t_commrec*          cr,
                               int64_t                   step,
                               t_nrnb*                   nrnb,
-                              gmx_wallcycle_t           wcycle,
+                              gmx_wallcycle           wcycle,
                               const matrix              box,
                               ArrayRef<const RVec>      x,
                               ForceOutputs*             forceOutputs,
@@ -417,7 +417,7 @@ static void do_nb_verlet(t_forcerec*                fr,
                          const int                  clearF,
                          const int64_t              step,
                          t_nrnb*                    nrnb,
-                         gmx_wallcycle_t            wcycle)
+                         gmx_wallcycle            wcycle)
 {
     if (!stepWork.computeNonbondedForces)
     {
@@ -438,9 +438,9 @@ static void do_nb_verlet(t_forcerec*                fr,
             /* Prune the pair-list beyond fr->ic->rlistPrune using
              * the current coordinates of the atoms.
              */
-            wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NonbondedPruning);
             nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec);
-            wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NonbondedPruning);
         }
     }
 
@@ -624,7 +624,7 @@ static void computeSpecialForces(FILE*                          fplog,
                                  pull_t*                        pull_work,
                                  int64_t                        step,
                                  double                         t,
-                                 gmx_wallcycle_t                wcycle,
+                                 gmx_wallcycle                wcycle,
                                  gmx::ForceProviders*           forceProviders,
                                  const matrix                   box,
                                  gmx::ArrayRef<const gmx::RVec> x,
@@ -697,10 +697,10 @@ static void computeSpecialForces(FILE*                          fplog,
     /* Add the forces from enforced rotation potentials (if any) */
     if (inputrec.bRot)
     {
-        wallcycle_start(wcycle, ewcROTadd);
+        wallcycle_start(wcycle, WallCycleCounter::RotAdd);
         enerd->term[F_COM_PULL] +=
                 add_rot_forces(enforcedRotation, forceWithVirialMtsLevel0->force_, cr, step, t);
-        wallcycle_stop(wcycle, ewcROTadd);
+        wallcycle_stop(wcycle, WallCycleCounter::RotAdd);
     }
 
     if (ed)
@@ -734,7 +734,7 @@ static inline void launchPmeGpuSpread(gmx_pme_t*            pmedata,
                                       const StepWorkload&   stepWork,
                                       GpuEventSynchronizer* xReadyOnDevice,
                                       const real            lambdaQ,
-                                      gmx_wallcycle_t       wcycle)
+                                      gmx_wallcycle       wcycle)
 {
     pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
     pme_gpu_launch_spread(pmedata, xReadyOnDevice, wcycle, lambdaQ);
@@ -751,7 +751,7 @@ static inline void launchPmeGpuSpread(gmx_pme_t*            pmedata,
  */
 static void launchPmeGpuFftAndGather(gmx_pme_t*               pmedata,
                                      const real               lambdaQ,
-                                     gmx_wallcycle_t          wcycle,
+                                     gmx_wallcycle          wcycle,
                                      const gmx::StepWorkload& stepWork)
 {
     pme_gpu_launch_complex_transforms(pmedata, wcycle, stepWork);
@@ -783,7 +783,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t* nbv,
                                         gmx_enerdata_t*     enerd,
                                         const real          lambdaQ,
                                         const StepWorkload& stepWork,
-                                        gmx_wallcycle_t     wcycle)
+                                        gmx_wallcycle     wcycle)
 {
     bool isPmeGpuDone = false;
     bool isNbGpuDone  = false;
@@ -839,9 +839,9 @@ static ForceOutputs setupForceOutputs(ForceHelperBuffers*                 forceH
                                       const DomainLifetimeWorkload&       domainWork,
                                       const StepWorkload&                 stepWork,
                                       const bool                          havePpDomainDecomposition,
-                                      gmx_wallcycle_t                     wcycle)
+                                      gmx_wallcycle                     wcycle)
 {
-    wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER);
+    wallcycle_sub_start(wcycle, WallCycleSubCounter::ClearForceBuffer);
 
     /* NOTE: We assume fr->shiftForces is all zeros here */
     gmx::ForceWithShiftForces forceWithShiftForces(
@@ -882,7 +882,7 @@ static ForceOutputs setupForceOutputs(ForceHelperBuffers*                 forceH
         clearRVecs(forceWithVirial.force_, true);
     }
 
-    wallcycle_sub_stop(wcycle, ewcsCLEAR_FORCE_BUFFER);
+    wallcycle_sub_stop(wcycle, WallCycleSubCounter::ClearForceBuffer);
 
     return ForceOutputs(
             forceWithShiftForces, forceHelperBuffers->haveDirectVirialContributions(), forceWithVirial);
@@ -992,7 +992,7 @@ static void launchGpuEndOfStepTasks(nonbonded_verlet_t*               nbv,
                                     const gmx::MdrunScheduleWorkload& runScheduleWork,
                                     bool                              useGpuPmeOnThisRank,
                                     int64_t                           step,
-                                    gmx_wallcycle_t                   wcycle)
+                                    gmx_wallcycle                   wcycle)
 {
     if (runScheduleWork.simulationWork.useGpuNonbonded && runScheduleWork.stepWork.computeNonbondedForces)
     {
@@ -1006,11 +1006,11 @@ static void launchGpuEndOfStepTasks(nonbonded_verlet_t*               nbv,
         }
 
         /* now clear the GPU outputs while we finish the step on the CPU */
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
         Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, runScheduleWork.stepWork.computeVirial);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
     }
 
     if (useGpuPmeOnThisRank)
@@ -1195,7 +1195,7 @@ void do_force(FILE*                               fplog,
               pull_t*                             pull_work,
               int64_t                             step,
               t_nrnb*                             nrnb,
-              gmx_wallcycle_t                     wcycle,
+              gmx_wallcycle                     wcycle,
               const gmx_localtop_t*               top,
               const matrix                        box,
               gmx::ArrayRefWithPadding<gmx::RVec> x,
@@ -1374,12 +1374,12 @@ void do_force(FILE*                               fplog,
             fr->wholeMoleculeTransform->updateForAtomPbcJumps(x.unpaddedArrayRef(), box);
         }
 
-        wallcycle_start(wcycle, ewcNS);
+        wallcycle_start(wcycle, WallCycleCounter::NS);
         if (!DOMAINDECOMP(cr))
         {
             const rvec vzero       = { 0.0_real, 0.0_real, 0.0_real };
             const rvec boxDiagonal = { box[XX][XX], box[YY][YY], box[ZZ][ZZ] };
-            wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSGridLocal);
             nbnxn_put_on_grid(nbv,
                               box,
                               0,
@@ -1392,30 +1392,30 @@ void do_force(FILE*                               fplog,
                               x.unpaddedArrayRef(),
                               0,
                               nullptr);
-            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSGridLocal);
         }
         else
         {
-            wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSGridNonLocal);
             nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd), fr->cginfo, x.unpaddedArrayRef());
-            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSGridNonLocal);
         }
 
         nbv->setAtomProperties(gmx::constArrayRefFromArray(mdatoms->typeA, mdatoms->nr),
                                gmx::constArrayRefFromArray(mdatoms->chargeA, mdatoms->nr),
                                fr->cginfo);
 
-        wallcycle_stop(wcycle, ewcNS);
+        wallcycle_stop(wcycle, WallCycleCounter::NS);
 
         /* initialize the GPU nbnxm atom data and bonded data structures */
         if (simulationWork.useGpuNonbonded)
         {
             // Note: cycle counting only nononbondeds, gpuBonded counts internally
-            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-            wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+            wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
             Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
-            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+            wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
 
             if (fr->gpuBonded)
             {
@@ -1440,15 +1440,15 @@ void do_force(FILE*                               fplog,
         runScheduleWork->domainWork = setupDomainLifetimeWorkload(
                 inputrec, *fr, pull_work, ed, *mdatoms, simulationWork, stepWork);
 
-        wallcycle_start_nocount(wcycle, ewcNS);
-        wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::NS);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSSearchLocal);
         /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
         nbv->constructPairlist(InteractionLocality::Local, top->excls, step, nrnb);
 
         nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local);
 
-        wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
-        wallcycle_stop(wcycle, ewcNS);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchLocal);
+        wallcycle_stop(wcycle, WallCycleCounter::NS);
 
         if (stepWork.useGpuXBufferOps)
         {
@@ -1484,15 +1484,15 @@ void do_force(FILE*                               fplog,
     {
         ddBalanceRegionHandler.openBeforeForceComputationGpu();
 
-        wallcycle_start(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
         Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
         if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
         {
             Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local);
         }
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
         // with X buffer ops offloaded to the GPU on all but the search steps
 
         // bonded work not split into separate local and non-local, so with DD
@@ -1503,11 +1503,11 @@ void do_force(FILE*                               fplog,
         }
 
         /* launch local nonbonded work on GPU */
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
         do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
     }
 
     if (useGpuPmeOnThisRank)
@@ -1529,14 +1529,14 @@ void do_force(FILE*                               fplog,
         if (stepWork.doNeighborSearch)
         {
             // TODO: fuse this branch with the above large stepWork.doNeighborSearch block
-            wallcycle_start_nocount(wcycle, ewcNS);
-            wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::NS);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::NBSSearchNonLocal);
             /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
             nbv->constructPairlist(InteractionLocality::NonLocal, top->excls, step, nrnb);
 
             nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal);
-            wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-            wallcycle_stop(wcycle, ewcNS);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::NBSSearchNonLocal);
+            wallcycle_stop(wcycle, WallCycleCounter::NS);
             // TODO refactor this GPU halo exchange re-initialisation
             // to location in do_md where GPU halo exchange is
             // constructed at partitioning, after above stateGpu
@@ -1593,11 +1593,11 @@ void do_force(FILE*                               fplog,
 
             if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
             {
-                wallcycle_start(wcycle, ewcLAUNCH_GPU);
-                wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+                wallcycle_start(wcycle, WallCycleCounter::LaunchGpu);
+                wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
                 Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::NonLocal);
-                wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-                wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+                wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+                wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
             }
 
             if (domainWork.haveGpuBondedWork)
@@ -1606,32 +1606,32 @@ void do_force(FILE*                               fplog,
             }
 
             /* launch non-local nonbonded tasks on GPU */
-            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-            wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+            wallcycle_sub_start(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
             do_nb_verlet(fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle);
-            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+            wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
+            wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
         }
     }
 
     if (simulationWork.useGpuNonbonded && stepWork.computeNonbondedForces)
     {
         /* launch D2H copy-back F */
-        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::LaunchGpu);
+        wallcycle_sub_start_nocount(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
 
         if (havePPDomainDecomposition(cr))
         {
             Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::NonLocal);
         }
         Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), stepWork, AtomLocality::Local);
-        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
+        wallcycle_sub_stop(wcycle, WallCycleSubCounter::LaunchGpuNonBonded);
 
         if (domainWork.haveGpuBondedWork && stepWork.computeEnergy)
         {
             fr->gpuBonded->launchEnergyTransfer();
         }
-        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
+        wallcycle_stop(wcycle, WallCycleCounter::LaunchGpu);
     }
 
     gmx::ArrayRef<const gmx::RVec> xWholeMolecules;
@@ -1676,7 +1676,7 @@ void do_force(FILE*                               fplog,
 
     if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME))
     {
-        wallcycle_start(wcycle, ewcPPDURINGPME);
+        wallcycle_start(wcycle, WallCycleCounter::PpDuringPme);
         dd_force_flop_start(cr->dd, nrnb);
     }
 
@@ -1691,15 +1691,15 @@ void do_force(FILE*                               fplog,
 
     if (inputrec.bRot)
     {
-        wallcycle_start(wcycle, ewcROT);
+        wallcycle_start(wcycle, WallCycleCounter::Rot);
         do_rotation(cr, enforcedRotation, box, x.unpaddedConstArrayRef(), t, step, stepWork.doNeighborSearch);
-        wallcycle_stop(wcycle, ewcROT);
+        wallcycle_stop(wcycle, WallCycleCounter::Rot);
     }
 
     /* Start the force cycle counter.
      * Note that a different counter is used for dynamic load balancing.
      */
-    wallcycle_start(wcycle, ewcFORCE);
+    wallcycle_start(wcycle, WallCycleCounter::Force);
 
     /* Set up and clear force outputs:
      * forceOutMtsLevel0:  everything except what is in the other two outputs
@@ -1799,10 +1799,10 @@ void do_force(FILE*                               fplog,
              * This can be split into a local and a non-local part when overlapping
              * communication with calculation with domain decomposition.
              */
-            wallcycle_stop(wcycle, ewcFORCE);
+            wallcycle_stop(wcycle, WallCycleCounter::Force);
             nbv->atomdata_add_nbat_f_to_f(AtomLocality::All,
                                           forceOutNonbonded->forceWithShiftForces().force());
-            wallcycle_start_nocount(wcycle, ewcFORCE);
+            wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
         }
 
         /* If there are multiple fshift output buffers we need to reduce them */
@@ -1818,10 +1818,10 @@ void do_force(FILE*                               fplog,
     // TODO Force flags should include haveFreeEnergyWork for this domain
     if (stepWork.useGpuXHalo && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
     {
-        wallcycle_stop(wcycle, ewcFORCE);
+        wallcycle_stop(wcycle, WallCycleCounter::Force);
         /* Wait for non-local coordinate data to be copied from device */
         stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal);
-        wallcycle_start_nocount(wcycle, ewcFORCE);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
     }
 
     // Compute wall interactions, when present.
@@ -1906,7 +1906,7 @@ void do_force(FILE*                               fplog,
                                      ddBalanceRegionHandler);
     }
 
-    wallcycle_stop(wcycle, ewcFORCE);
+    wallcycle_stop(wcycle, WallCycleCounter::Force);
 
     // VdW dispersion correction, only computed on master rank to avoid double counting
     if ((stepWork.computeEnergy || stepWork.computeVirial) && fr->dispersionCorrection && MASTER(cr))
@@ -1983,10 +1983,10 @@ void do_force(FILE*                               fplog,
             }
             else
             {
-                wallcycle_start_nocount(wcycle, ewcFORCE);
+                wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
                 do_nb_verlet(
                         fr, ic, enerd, stepWork, InteractionLocality::NonLocal, enbvClearFYes, step, nrnb, wcycle);
-                wallcycle_stop(wcycle, ewcFORCE);
+                wallcycle_stop(wcycle, WallCycleCounter::Force);
             }
 
             if (stepWork.useGpuFBufferOps)
@@ -2152,7 +2152,7 @@ void do_force(FILE*                               fplog,
     {
         // NOTE: emulation kernel is not included in the balancing region,
         // but emulation mode does not target performance anyway
-        wallcycle_start_nocount(wcycle, ewcFORCE);
+        wallcycle_start_nocount(wcycle, WallCycleCounter::Force);
         do_nb_verlet(fr,
                      ic,
                      enerd,
@@ -2162,7 +2162,7 @@ void do_force(FILE*                               fplog,
                      step,
                      nrnb,
                      wcycle);
-        wallcycle_stop(wcycle, ewcFORCE);
+        wallcycle_stop(wcycle, WallCycleCounter::Force);
     }
 
     // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops