From: Mark Abraham Date: Fri, 13 Apr 2018 10:09:09 +0000 (+0100) Subject: Merge branch release-2018 X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=4fa7c38d51c58534f4c26df7c975077328dfc251;p=alexxy%2Fgromacs.git Merge branch release-2018 Minor conflicts documenting gpuregiontimer_ocl.h, resolved with a suitable mix from both branches. Change-Id: I5bb417acaff941f18730f0417c5e46c8f4c2fc1e --- 4fa7c38d51c58534f4c26df7c975077328dfc251 diff --cc src/gromacs/gpu_utils/gpuregiontimer_ocl.h index 91e9e8de23,81444cafdc..00f28c633f --- a/src/gromacs/gpu_utils/gpuregiontimer_ocl.h +++ b/src/gromacs/gpu_utils/gpuregiontimer_ocl.h @@@ -51,14 -50,32 +51,18 @@@ #include "gpuregiontimer.h" -template <> struct GpuTraits -{ - using CommandStream = cl_command_queue; - using CommandEvent = cl_event; -}; - -//! Short-hand for external use -using GpuRegionTimer = GpuRegionTimerWrapper; - -// cppcheck-suppress noConstructor /*! \libinternal \brief - * This is a GPU region timing implementation for OpenCL. - * It provides methods for measuring the last timespan. - * Copying/assignment is disabled since the underlying timing events are owned by this. + * The OpenCL implementation of the GPU code region timing. + * With OpenCL, one has to use cl_event handle for each API call that has to be timed, and + * accumulate the timing afterwards. As we would like to avoid overhead on API calls, + * we only query and accumulate cl_event timing at the end of time steps, not after the API calls. + * Thus, this implementation does not reuse a single cl_event for multiple calls, but instead + * maintains an array of cl_events to be used within any single code region. + * The array size is fixed at a small but sufficiently large value for the number of cl_events + * that might contribute to a timer region, currently 10. */ - // cppcheck-suppress noConstructor -template <> class GpuRegionTimerImpl +class GpuRegionTimerImpl { - //! Short-hands - using CommandStream = typename GpuTraits::CommandStream; - using CommandEvent = typename GpuTraits::CommandEvent; - /*! \brief The underlying individual timing events array. * The maximum size is chosen arbitrarily to work with current code, and can be changed. * There is simply no need for run-time resizing, and it's unlikely we'll ever need more than 10. @@@ -69,22 -86,11 +73,20 @@@ public: + GpuRegionTimerImpl() = default; + ~GpuRegionTimerImpl() = default; + //! No copying + GpuRegionTimerImpl(const GpuRegionTimerImpl &) = delete; + //! No assignment + GpuRegionTimerImpl &operator=(GpuRegionTimerImpl &&) = delete; + //! Moving is disabled but can be considered in the future if needed + GpuRegionTimerImpl(GpuRegionTimerImpl &&) = delete; + - /*! \brief Will be called before the region start. */ + /*! \brief Should be called before the region start. */ inline void openTimingRegion(CommandStream){} - - /*! \brief Will be called after the region end. */ + /*! \brief Should be called after the region end. */ inline void closeTimingRegion(CommandStream){} - - /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset() */ + /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset(). */ inline double getLastRangeTime() { double milliseconds = 0.0; @@@ -122,11 -128,7 +124,10 @@@ // As long as we're doing nullptr checks, we might want to be extra cautious. events_.fill(nullptr); } - - /*! \brief Provides next unused cl_event for OpenCL API consumption. */ + /*! \brief Returns a new raw timing event + * for passing into individual GPU API calls + * within the region if the API requires it (e.g. on OpenCL). + */ inline CommandEvent *fetchNextEvent() { GMX_ASSERT(currentEvent_ < events_.size(), "Increase c_maxEventNumber_ if needed");