Part of general modernization of enums to enum classes.
Used enumerationarray in places where it made sense.
* In CUDA result can be nullptr stub, per GpuRegionTimer implementation.
*
* \param[in] pmeGpu The PME GPU data structure.
- * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
+ * \param[in] pmeStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
*/
-static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId)
+static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, PmeStage pmeStageId)
{
CommandEvent* timingEvent = nullptr;
if (pme_gpu_timings_enabled(pmeGpu))
{
- GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
- "Wrong PME GPU timing event index");
- timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent();
+ GMX_ASSERT(pmeStageId < PmeStage::Count, "Wrong PME GPU timing event index");
+ timingEvent = pmeGpu->archSpecific->timingEvents[pmeStageId].fetchNextEvent();
}
return timingEvent;
}
void pme_gpu_3dfft(const PmeGpu* pmeGpu, gmx_fft_direction dir, const int grid_index)
{
- int timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? gtPME_FFT_R2C : gtPME_FFT_C2R;
+ PmeStage timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? PmeStage::FftTransformR2C
+ : PmeStage::FftTransformC2R;
pme_gpu_start_timing(pmeGpu, timerId);
pmeGpu->archSpecific->fftSetup[grid_index]->perform3dFft(
config.gridSize[0] = dimGrid.first;
config.gridSize[1] = dimGrid.second;
- int timingId;
+ PmeStage timingId;
PmeGpuProgramImpl::PmeKernelHandle kernelPtr = nullptr;
if (computeSplines)
{
if (spreadCharges)
{
- timingId = gtPME_SPLINEANDSPREAD;
+ timingId = PmeStage::SplineAndSpread;
kernelPtr = selectSplineAndSpreadKernelPtr(pmeGpu,
pmeGpu->settings.threadsPerAtom,
writeGlobal || (!recalculateSplines),
}
else
{
- timingId = gtPME_SPLINE;
+ timingId = PmeStage::Spline;
kernelPtr = selectSplineKernelPtr(pmeGpu,
pmeGpu->settings.threadsPerAtom,
writeGlobal || (!recalculateSplines),
}
else
{
- timingId = gtPME_SPREAD;
+ timingId = PmeStage::Spread;
kernelPtr = selectSpreadKernelPtr(pmeGpu,
pmeGpu->settings.threadsPerAtom,
writeGlobal || (!recalculateSplines),
/ gridLinesPerBlock;
config.gridSize[2] = pmeGpu->kernelParams->grid.complexGridSize[majorDim];
- int timingId = gtPME_SOLVE;
+ PmeStage timingId = PmeStage::Solve;
PmeGpuProgramImpl::PmeKernelHandle kernelPtr = nullptr;
if (gridOrdering == GridOrdering::YZX)
{
// TODO test different cache configs
- int timingId = gtPME_GATHER;
+ PmeStage timingId = PmeStage::Gather;
PmeGpuProgramImpl::PmeKernelHandle kernelPtr =
selectGatherKernelPtr(pmeGpu,
pmeGpu->settings.threadsPerAtom,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
return pmeGpu->archSpecific->useTiming;
}
-void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId)
+void pme_gpu_start_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId)
{
if (pme_gpu_timings_enabled(pmeGpu))
{
- GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
- "Wrong PME GPU timing event index");
- pmeGpu->archSpecific->timingEvents[PMEStageId].openTimingRegion(pmeGpu->archSpecific->pmeStream_);
+ GMX_ASSERT(pmeStageId < PmeStage::Count, "Wrong PME GPU timing event index");
+ pmeGpu->archSpecific->timingEvents[pmeStageId].openTimingRegion(pmeGpu->archSpecific->pmeStream_);
}
}
-void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId)
+void pme_gpu_stop_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId)
{
if (pme_gpu_timings_enabled(pmeGpu))
{
- GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
- "Wrong PME GPU timing event index");
- pmeGpu->archSpecific->timingEvents[PMEStageId].closeTimingRegion(pmeGpu->archSpecific->pmeStream_);
+ GMX_ASSERT(pmeStageId < PmeStage::Count, "Wrong PME GPU timing event index");
+ pmeGpu->archSpecific->timingEvents[pmeStageId].closeTimingRegion(pmeGpu->archSpecific->pmeStream_);
}
}
if (pme_gpu_timings_enabled(pmeGpu))
{
GMX_RELEASE_ASSERT(timings, "Null GPU timing pointer");
- for (size_t i = 0; i < pmeGpu->archSpecific->timingEvents.size(); i++)
+ for (auto key : keysOf(timings->timing))
{
- timings->timing[i].t = pmeGpu->archSpecific->timingEvents[i].getTotalTime();
- timings->timing[i].c = pmeGpu->archSpecific->timingEvents[i].getCallCount();
+ timings->timing[key].t = pmeGpu->archSpecific->timingEvents[key].getTotalTime();
+ timings->timing[key].c = pmeGpu->archSpecific->timingEvents[key].getCallCount();
}
}
}
{
if (pme_gpu_timings_enabled(pmeGpu))
{
- for (const size_t& activeTimer : pmeGpu->archSpecific->activeTimers)
+ for (const auto& activeTimer : pmeGpu->archSpecific->activeTimers)
{
pmeGpu->archSpecific->timingEvents[activeTimer].getLastRangeTime();
}
if (pme_gpu_timings_enabled(pmeGpu))
{
pmeGpu->archSpecific->activeTimers.clear();
- pmeGpu->archSpecific->activeTimers.insert(gtPME_SPLINEANDSPREAD);
+ pmeGpu->archSpecific->activeTimers.insert(PmeStage::SplineAndSpread);
const auto& settings = pme_gpu_settings(pmeGpu);
// TODO: no separate gtPME_SPLINE and gtPME_SPREAD as they are not used currently
if (settings.performGPUFFT)
{
- pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_C2R);
- pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_R2C);
+ pmeGpu->archSpecific->activeTimers.insert(PmeStage::FftTransformC2R);
+ pmeGpu->archSpecific->activeTimers.insert(PmeStage::FftTransformR2C);
}
if (settings.performGPUSolve)
{
- pmeGpu->archSpecific->activeTimers.insert(gtPME_SOLVE);
+ pmeGpu->archSpecific->activeTimers.insert(PmeStage::Solve);
}
if (settings.performGPUGather)
{
- pmeGpu->archSpecific->activeTimers.insert(gtPME_GATHER);
+ pmeGpu->archSpecific->activeTimers.insert(PmeStage::Gather);
}
}
}
{
if (pme_gpu_timings_enabled(pmeGpu))
{
- for (size_t i = 0; i < pmeGpu->archSpecific->timingEvents.size(); i++)
+ for (auto key : keysOf(pmeGpu->archSpecific->timingEvents))
{
- pmeGpu->archSpecific->timingEvents[i].reset();
+ pmeGpu->archSpecific->timingEvents[key].reset();
}
}
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
struct gmx_wallclock_gpu_pme_t;
struct PmeGpu;
+enum class PmeStage : int;
+
/*! \libinternal \brief
* Starts timing the certain PME GPU stage during a single computation (if timings are enabled).
*
* \param[in] pmeGpu The PME GPU data structure.
- * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
+ * \param[in] pmeStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
*/
-void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
+void pme_gpu_start_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId);
/*! \libinternal \brief
* Stops timing the certain PME GPU stage during a single computation (if timings are enabled).
*
* \param[in] pmeGpu The PME GPU data structure.
- * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
+ * \param[in] pmeStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
*/
-void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
+void pme_gpu_stop_timing(const PmeGpu* pmeGpu, PmeStage pmeStageId);
/*! \brief
* Tells if CUDA-based performance tracking is enabled for PME.
#define PMEGPUTYPESHOSTIMPL_H
#include "config.h"
+#include "gromacs/utility/enumerationhelpers.h"
#include <array>
#include <set>
std::vector<std::unique_ptr<GpuParallel3dFft>> fftSetup;
//! All the timers one might use
- std::array<GpuRegionTimer, gtPME_EVENT_COUNT> timingEvents;
+ gmx::EnumerationArray<PmeStage, GpuRegionTimer> timingEvents;
//! Indices of timingEvents actually used
- std::set<size_t> activeTimers;
+ std::set<PmeStage> activeTimers;
/* GPU arrays element counts (not the arrays sizes in bytes!).
* They might be larger than the actual meaningful data sizes.
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2014,2015,2016,2017 by the GROMACS development team.
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_TIMING_GPU_TIMING_H
#define GMX_TIMING_GPU_TIMING_H
+#include "gromacs/utility/enumerationhelpers.h"
+
/*! \internal \brief GPU kernel time and call count. */
struct gmx_kernel_timing_data_t
{
/*! \internal \brief
* PME GPU stages timing events indices, corresponding to the string in PMEStageNames in wallcycle.cpp.
*/
-enum
+enum class PmeStage : int
{
- gtPME_SPLINE = 0,
- gtPME_SPREAD,
- gtPME_SPLINEANDSPREAD,
- gtPME_FFT_R2C,
- gtPME_SOLVE,
- gtPME_FFT_C2R,
- gtPME_GATHER,
- gtPME_EVENT_COUNT /* not a stage ID but a static array size */
+ Spline = 0,
+ Spread,
+ SplineAndSpread,
+ FftTransformR2C,
+ Solve,
+ FftTransformC2R,
+ Gather,
+ Count /* not a stage ID but a static array size */
};
/*! \internal \brief GPU timings for PME. */
* TODO: devise a better GPU timing data structuring.
*/
/*! \brief Array of PME GPU timing data. */
- gmx_kernel_timing_data_t timing[gtPME_EVENT_COUNT];
+ gmx::EnumerationArray<PmeStage, gmx_kernel_timing_data_t> timing;
};
/*! \internal \brief GPU NB timings for kernels and H2d/D2H transfers. */
};
/* PME GPU timing events' names - correspond to the enum in the gpu_timing.h */
-static const char* PMEStageNames[] = {
- "PME spline", "PME spread", "PME spline + spread", "PME 3D-FFT r2c",
- "PME solve", "PME 3D-FFT c2r", "PME gather",
+static const char* enumValuetoString(PmeStage enumValue)
+{
+ constexpr gmx::EnumerationArray<PmeStage, const char*> pmeStageNames = {
+ "PME spline", "PME spread", "PME spline + spread", "PME 3D-FFT r2c",
+ "PME solve", "PME 3D-FFT c2r", "PME gather"
+ };
+ return pmeStageNames[enumValue];
};
gmx_bool wallcycle_have_counter()
double tot_gpu = 0.0;
if (gpu_pme_t)
{
- for (size_t k = 0; k < gtPME_EVENT_COUNT; k++)
+ for (auto key : keysOf(gpu_pme_t->timing))
{
- tot_gpu += gpu_pme_t->timing[k].t;
+ tot_gpu += gpu_pme_t->timing[key].t;
}
}
if (gpu_nbnxn_t)
}
if (gpu_pme_t)
{
- for (size_t k = 0; k < gtPME_EVENT_COUNT; k++)
+ for (auto key : keysOf(gpu_pme_t->timing))
{
- if (gpu_pme_t->timing[k].c)
+ if (gpu_pme_t->timing[key].c)
{
- print_gputimes(
- fplog, PMEStageNames[k], gpu_pme_t->timing[k].c, gpu_pme_t->timing[k].t, tot_gpu);
+ print_gputimes(fplog,
+ enumValuetoString(key),
+ gpu_pme_t->timing[key].c,
+ gpu_pme_t->timing[key].t,
+ tot_gpu);
}
}
}