From bb53feb3a44ffe7ff85b3632a6149689518581c1 Mon Sep 17 00:00:00 2001 From: Paul Bauer Date: Fri, 12 Mar 2021 11:44:11 +0000 Subject: [PATCH] Modernize PME GPU timing enums Part of general modernization of enums to enum classes. Used enumerationarray in places where it made sense. --- src/gromacs/ewald/pme_gpu_internal.cpp | 24 ++++++------- src/gromacs/ewald/pme_gpu_timings.cpp | 38 ++++++++++----------- src/gromacs/ewald/pme_gpu_timings.h | 12 ++++--- src/gromacs/ewald/pme_gpu_types_host_impl.h | 5 +-- src/gromacs/timing/gpu_timing.h | 24 +++++++------ src/gromacs/timing/wallcycle.cpp | 25 +++++++++----- 6 files changed, 69 insertions(+), 59 deletions(-) diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index d66865569b..5f4a4fd289 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -1016,23 +1016,23 @@ void pme_gpu_reinit_atoms(PmeGpu* pmeGpu, const int nAtoms, const real* chargesA * In CUDA result can be nullptr stub, per GpuRegionTimer implementation. * * \param[in] pmeGpu The PME GPU data structure. - * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h + * \param[in] pmeStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h */ -static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId) +static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, PmeStage pmeStageId) { CommandEvent* timingEvent = nullptr; if (pme_gpu_timings_enabled(pmeGpu)) { - GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(), - "Wrong PME GPU timing event index"); - timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent(); + GMX_ASSERT(pmeStageId < PmeStage::Count, "Wrong PME GPU timing event index"); + timingEvent = pmeGpu->archSpecific->timingEvents[pmeStageId].fetchNextEvent(); } return timingEvent; } void pme_gpu_3dfft(const PmeGpu* pmeGpu, gmx_fft_direction dir, const int grid_index) { - int timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? gtPME_FFT_R2C : gtPME_FFT_C2R; + PmeStage timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? PmeStage::FftTransformR2C + : PmeStage::FftTransformC2R; pme_gpu_start_timing(pmeGpu, timerId); pmeGpu->archSpecific->fftSetup[grid_index]->perform3dFft( @@ -1313,13 +1313,13 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, config.gridSize[0] = dimGrid.first; config.gridSize[1] = dimGrid.second; - int timingId; + PmeStage timingId; PmeGpuProgramImpl::PmeKernelHandle kernelPtr = nullptr; if (computeSplines) { if (spreadCharges) { - timingId = gtPME_SPLINEANDSPREAD; + timingId = PmeStage::SplineAndSpread; kernelPtr = selectSplineAndSpreadKernelPtr(pmeGpu, pmeGpu->settings.threadsPerAtom, writeGlobal || (!recalculateSplines), @@ -1327,7 +1327,7 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, } else { - timingId = gtPME_SPLINE; + timingId = PmeStage::Spline; kernelPtr = selectSplineKernelPtr(pmeGpu, pmeGpu->settings.threadsPerAtom, writeGlobal || (!recalculateSplines), @@ -1336,7 +1336,7 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, } else { - timingId = gtPME_SPREAD; + timingId = PmeStage::Spread; kernelPtr = selectSpreadKernelPtr(pmeGpu, pmeGpu->settings.threadsPerAtom, writeGlobal || (!recalculateSplines), @@ -1463,7 +1463,7 @@ void pme_gpu_solve(const PmeGpu* pmeGpu, / gridLinesPerBlock; config.gridSize[2] = pmeGpu->kernelParams->grid.complexGridSize[majorDim]; - int timingId = gtPME_SOLVE; + PmeStage timingId = PmeStage::Solve; PmeGpuProgramImpl::PmeKernelHandle kernelPtr = nullptr; if (gridOrdering == GridOrdering::YZX) { @@ -1655,7 +1655,7 @@ void pme_gpu_gather(PmeGpu* pmeGpu, real** h_grids, const float lambda) // TODO test different cache configs - int timingId = gtPME_GATHER; + PmeStage timingId = PmeStage::Gather; PmeGpuProgramImpl::PmeKernelHandle kernelPtr = selectGatherKernelPtr(pmeGpu, pmeGpu->settings.threadsPerAtom, diff --git a/src/gromacs/ewald/pme_gpu_timings.cpp b/src/gromacs/ewald/pme_gpu_timings.cpp index 3a1f457468..205564b6a5 100644 --- a/src/gromacs/ewald/pme_gpu_timings.cpp +++ b/src/gromacs/ewald/pme_gpu_timings.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -55,23 +55,21 @@ bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu) return pmeGpu->archSpecific->useTiming; } -void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId) +void pme_gpu_start_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId) { if (pme_gpu_timings_enabled(pmeGpu)) { - GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(), - "Wrong PME GPU timing event index"); - pmeGpu->archSpecific->timingEvents[PMEStageId].openTimingRegion(pmeGpu->archSpecific->pmeStream_); + GMX_ASSERT(pmeStageId < PmeStage::Count, "Wrong PME GPU timing event index"); + pmeGpu->archSpecific->timingEvents[pmeStageId].openTimingRegion(pmeGpu->archSpecific->pmeStream_); } } -void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId) +void pme_gpu_stop_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId) { if (pme_gpu_timings_enabled(pmeGpu)) { - GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(), - "Wrong PME GPU timing event index"); - pmeGpu->archSpecific->timingEvents[PMEStageId].closeTimingRegion(pmeGpu->archSpecific->pmeStream_); + GMX_ASSERT(pmeStageId < PmeStage::Count, "Wrong PME GPU timing event index"); + pmeGpu->archSpecific->timingEvents[pmeStageId].closeTimingRegion(pmeGpu->archSpecific->pmeStream_); } } @@ -80,10 +78,10 @@ void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings) if (pme_gpu_timings_enabled(pmeGpu)) { GMX_RELEASE_ASSERT(timings, "Null GPU timing pointer"); - for (size_t i = 0; i < pmeGpu->archSpecific->timingEvents.size(); i++) + for (auto key : keysOf(timings->timing)) { - timings->timing[i].t = pmeGpu->archSpecific->timingEvents[i].getTotalTime(); - timings->timing[i].c = pmeGpu->archSpecific->timingEvents[i].getCallCount(); + timings->timing[key].t = pmeGpu->archSpecific->timingEvents[key].getTotalTime(); + timings->timing[key].c = pmeGpu->archSpecific->timingEvents[key].getCallCount(); } } } @@ -92,7 +90,7 @@ void pme_gpu_update_timings(const PmeGpu* pmeGpu) { if (pme_gpu_timings_enabled(pmeGpu)) { - for (const size_t& activeTimer : pmeGpu->archSpecific->activeTimers) + for (const auto& activeTimer : pmeGpu->archSpecific->activeTimers) { pmeGpu->archSpecific->timingEvents[activeTimer].getLastRangeTime(); } @@ -104,21 +102,21 @@ void pme_gpu_reinit_timings(const PmeGpu* pmeGpu) if (pme_gpu_timings_enabled(pmeGpu)) { pmeGpu->archSpecific->activeTimers.clear(); - pmeGpu->archSpecific->activeTimers.insert(gtPME_SPLINEANDSPREAD); + pmeGpu->archSpecific->activeTimers.insert(PmeStage::SplineAndSpread); const auto& settings = pme_gpu_settings(pmeGpu); // TODO: no separate gtPME_SPLINE and gtPME_SPREAD as they are not used currently if (settings.performGPUFFT) { - pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_C2R); - pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_R2C); + pmeGpu->archSpecific->activeTimers.insert(PmeStage::FftTransformC2R); + pmeGpu->archSpecific->activeTimers.insert(PmeStage::FftTransformR2C); } if (settings.performGPUSolve) { - pmeGpu->archSpecific->activeTimers.insert(gtPME_SOLVE); + pmeGpu->archSpecific->activeTimers.insert(PmeStage::Solve); } if (settings.performGPUGather) { - pmeGpu->archSpecific->activeTimers.insert(gtPME_GATHER); + pmeGpu->archSpecific->activeTimers.insert(PmeStage::Gather); } } } @@ -127,9 +125,9 @@ void pme_gpu_reset_timings(const PmeGpu* pmeGpu) { if (pme_gpu_timings_enabled(pmeGpu)) { - for (size_t i = 0; i < pmeGpu->archSpecific->timingEvents.size(); i++) + for (auto key : keysOf(pmeGpu->archSpecific->timingEvents)) { - pmeGpu->archSpecific->timingEvents[i].reset(); + pmeGpu->archSpecific->timingEvents[key].reset(); } } } diff --git a/src/gromacs/ewald/pme_gpu_timings.h b/src/gromacs/ewald/pme_gpu_timings.h index f7c222b6b2..d57452215e 100644 --- a/src/gromacs/ewald/pme_gpu_timings.h +++ b/src/gromacs/ewald/pme_gpu_timings.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -48,21 +48,23 @@ struct gmx_wallclock_gpu_pme_t; struct PmeGpu; +enum class PmeStage : int; + /*! \libinternal \brief * Starts timing the certain PME GPU stage during a single computation (if timings are enabled). * * \param[in] pmeGpu The PME GPU data structure. - * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h + * \param[in] pmeStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h */ -void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId); +void pme_gpu_start_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId); /*! \libinternal \brief * Stops timing the certain PME GPU stage during a single computation (if timings are enabled). * * \param[in] pmeGpu The PME GPU data structure. - * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h + * \param[in] pmeStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h */ -void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId); +void pme_gpu_stop_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId); /*! \brief * Tells if CUDA-based performance tracking is enabled for PME. diff --git a/src/gromacs/ewald/pme_gpu_types_host_impl.h b/src/gromacs/ewald/pme_gpu_types_host_impl.h index 976e064cb2..8204a9123f 100644 --- a/src/gromacs/ewald/pme_gpu_types_host_impl.h +++ b/src/gromacs/ewald/pme_gpu_types_host_impl.h @@ -45,6 +45,7 @@ #define PMEGPUTYPESHOSTIMPL_H #include "config.h" +#include "gromacs/utility/enumerationhelpers.h" #include #include @@ -119,10 +120,10 @@ struct PmeGpuSpecific std::vector> fftSetup; //! All the timers one might use - std::array timingEvents; + gmx::EnumerationArray timingEvents; //! Indices of timingEvents actually used - std::set activeTimers; + std::set activeTimers; /* GPU arrays element counts (not the arrays sizes in bytes!). * They might be larger than the actual meaningful data sizes. diff --git a/src/gromacs/timing/gpu_timing.h b/src/gromacs/timing/gpu_timing.h index bcefcac5ba..24768bc59a 100644 --- a/src/gromacs/timing/gpu_timing.h +++ b/src/gromacs/timing/gpu_timing.h @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2014,2015,2016,2017 by the GROMACS development team. - * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -44,6 +44,8 @@ #ifndef GMX_TIMING_GPU_TIMING_H #define GMX_TIMING_GPU_TIMING_H +#include "gromacs/utility/enumerationhelpers.h" + /*! \internal \brief GPU kernel time and call count. */ struct gmx_kernel_timing_data_t { @@ -54,16 +56,16 @@ struct gmx_kernel_timing_data_t /*! \internal \brief * PME GPU stages timing events indices, corresponding to the string in PMEStageNames in wallcycle.cpp. */ -enum +enum class PmeStage : int { - gtPME_SPLINE = 0, - gtPME_SPREAD, - gtPME_SPLINEANDSPREAD, - gtPME_FFT_R2C, - gtPME_SOLVE, - gtPME_FFT_C2R, - gtPME_GATHER, - gtPME_EVENT_COUNT /* not a stage ID but a static array size */ + Spline = 0, + Spread, + SplineAndSpread, + FftTransformR2C, + Solve, + FftTransformC2R, + Gather, + Count /* not a stage ID but a static array size */ }; /*! \internal \brief GPU timings for PME. */ @@ -73,7 +75,7 @@ struct gmx_wallclock_gpu_pme_t * TODO: devise a better GPU timing data structuring. */ /*! \brief Array of PME GPU timing data. */ - gmx_kernel_timing_data_t timing[gtPME_EVENT_COUNT]; + gmx::EnumerationArray timing; }; /*! \internal \brief GPU NB timings for kernels and H2d/D2H transfers. */ diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp index 899c7ea709..22f0c45e4a 100644 --- a/src/gromacs/timing/wallcycle.cpp +++ b/src/gromacs/timing/wallcycle.cpp @@ -195,9 +195,13 @@ static const char* wcsn[ewcsNR] = { }; /* PME GPU timing events' names - correspond to the enum in the gpu_timing.h */ -static const char* PMEStageNames[] = { - "PME spline", "PME spread", "PME spline + spread", "PME 3D-FFT r2c", - "PME solve", "PME 3D-FFT c2r", "PME gather", +static const char* enumValuetoString(PmeStage enumValue) +{ + constexpr gmx::EnumerationArray pmeStageNames = { + "PME spline", "PME spread", "PME spline + spread", "PME 3D-FFT r2c", + "PME solve", "PME 3D-FFT c2r", "PME gather" + }; + return pmeStageNames[enumValue]; }; gmx_bool wallcycle_have_counter() @@ -994,9 +998,9 @@ void wallcycle_print(FILE* fplog, double tot_gpu = 0.0; if (gpu_pme_t) { - for (size_t k = 0; k < gtPME_EVENT_COUNT; k++) + for (auto key : keysOf(gpu_pme_t->timing)) { - tot_gpu += gpu_pme_t->timing[k].t; + tot_gpu += gpu_pme_t->timing[key].t; } } if (gpu_nbnxn_t) @@ -1046,12 +1050,15 @@ void wallcycle_print(FILE* fplog, } if (gpu_pme_t) { - for (size_t k = 0; k < gtPME_EVENT_COUNT; k++) + for (auto key : keysOf(gpu_pme_t->timing)) { - if (gpu_pme_t->timing[k].c) + if (gpu_pme_t->timing[key].c) { - print_gputimes( - fplog, PMEStageNames[k], gpu_pme_t->timing[k].c, gpu_pme_t->timing[k].t, tot_gpu); + print_gputimes(fplog, + enumValuetoString(key), + gpu_pme_t->timing[key].c, + gpu_pme_t->timing[key].t, + tot_gpu); } } } -- 2.22.0