From 32d2afab126e046224befd74f5785b77ad63e3ef Mon Sep 17 00:00:00 2001 From: Aleksei Iupinov Date: Thu, 28 Sep 2017 15:08:50 +0200 Subject: [PATCH] Add timing accumulation capability into GpuRegionTimer Added a TODO to deprecate NB timing structures in favor of new functionality. Change-Id: Idb78e5a36a7f372f01378a580a05b928bd728c57 --- src/gromacs/gpu_utils/gpuregiontimer.h | 23 +++++++++++++++++-- .../mdlib/nbnxn_cuda/nbnxn_cuda_types.h | 2 +- src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h | 2 +- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/gromacs/gpu_utils/gpuregiontimer.h b/src/gromacs/gpu_utils/gpuregiontimer.h index 0207f96143..2cbbe575d2 100644 --- a/src/gromacs/gpu_utils/gpuregiontimer.h +++ b/src/gromacs/gpu_utils/gpuregiontimer.h @@ -104,7 +104,7 @@ template class GpuRegionTimerImpl /*! \libinternal \brief * This is a GPU region timing wrapper class. - * It allows for host-side tracking of the execution timespans in GPU code + * It allows for host-side tracking of the accumulated execution timespans in GPU code * (measuring kernel or transfers duration). * It also partially tracks the correctness of the timer state transitions, * as far as current implementation allows (see TODO in getLastRangeTime() for a disabled check). @@ -123,6 +123,10 @@ template class GpuRegionTimerWrapper Stopped } debugState_; + //! The number of times the timespan has been measured + unsigned int callCount_; + //! The accumulated duration of the timespans measured (milliseconds) + double totalMilliseconds_; //! The underlying region timer implementation GpuRegionTimerImpl impl_; @@ -160,10 +164,12 @@ template class GpuRegionTimerWrapper GMX_ASSERT(debugState_ == TimerState::Recording, error.c_str()); debugState_ = TimerState::Stopped; } + callCount_++; impl_.closeTimingRegion(s); } /*! \brief - * Returns the last timespan, and resets the internal timer state. + * Accumulates the last timespan of all the events used into the the total duration, + * and resets the internal timer state. * To be called after closeTimingRegion() and the command stream of the event having been synchronized. * \returns The last timespan (in milliseconds). */ @@ -186,6 +192,7 @@ template class GpuRegionTimerWrapper debugState_ = TimerState::Idle; } double milliseconds = impl_.getLastRangeTime(); + totalMilliseconds_ += milliseconds; return milliseconds; } /*! \brief Resets the implementation and total time/call count to zeroes. */ @@ -195,8 +202,20 @@ template class GpuRegionTimerWrapper { debugState_ = TimerState::Idle; } + totalMilliseconds_ = 0.0; + callCount_ = 0; impl_.reset(); } + /*! \brief Gets total time recorded (in milliseconds). */ + double getTotalTime() const + { + return totalMilliseconds_; + } + /*! \brief Gets total call count recorded. */ + unsigned int getCallCount() const + { + return callCount_; + } /*! \brief * Gets a pointer to a new timing event for passing into individual GPU API calls * within the region if they require it (e.g. on OpenCL). diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h index ceb3a3dafd..0e0be3c1e1 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h @@ -276,7 +276,7 @@ struct gmx_nbnxn_cuda_t * setting bDoTime needs to be change if this CUDA "feature" gets fixed. */ bool bDoTime; /**< True if event-based timing is enabled. */ cu_timers_t *timers; /**< CUDA event-based timers. */ - gmx_wallclock_gpu_t *timings; /**< Timing data. */ + gmx_wallclock_gpu_t *timings; /**< Timing data. TODO: deprecate this and query timers for accumulated data instead */ }; #ifdef __cplusplus diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h index 74e98db9e3..0e9d527cee 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h @@ -368,7 +368,7 @@ struct gmx_nbnxn_ocl_t cl_bool bDoTime; /**< True if event-based timing is enabled. */ cl_timers_t *timers; /**< OpenCL event-based timers. */ - struct gmx_wallclock_gpu_t *timings; /**< Timing data. */ + struct gmx_wallclock_gpu_t *timings; /**< Timing data. TODO: deprecate this and query timers for accumulated data instead */ }; #ifdef __cplusplus -- 2.22.0