From 0eb1526f8f97d588381d09133652ef0bc1fe5568 Mon Sep 17 00:00:00 2001 From: Aleksei Iupinov Date: Tue, 13 Feb 2018 15:44:27 +0100 Subject: [PATCH] Move GPU traits into separate headers Avoid using templated traits struct in favor of simplicity, as CUDA/OpenCL code being isolated suffices for now. Change-Id: I2c43edb7cff2d3711141abf044e5fedd76e4444d --- src/gromacs/gpu_utils/gpuregiontimer.cuh | 35 ++++++---- src/gromacs/gpu_utils/gpuregiontimer.h | 69 ++----------------- src/gromacs/gpu_utils/gpuregiontimer_ocl.h | 44 ++++++++---- src/gromacs/gpu_utils/gputraits.cuh | 50 ++++++++++++++ src/gromacs/gpu_utils/gputraits_ocl.h | 52 ++++++++++++++ src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp | 1 + src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h | 3 +- 7 files changed, 160 insertions(+), 94 deletions(-) create mode 100644 src/gromacs/gpu_utils/gputraits.cuh create mode 100644 src/gromacs/gpu_utils/gputraits_ocl.h diff --git a/src/gromacs/gpu_utils/gpuregiontimer.cuh b/src/gromacs/gpu_utils/gpuregiontimer.cuh index b709c1206e..17321f7c62 100644 --- a/src/gromacs/gpu_utils/gpuregiontimer.cuh +++ b/src/gromacs/gpu_utils/gpuregiontimer.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -45,22 +45,17 @@ #define GMX_GPU_UTILS_GPUREGIONTIMER_CUH #include "gromacs/gpu_utils/cudautils.cuh" +#include "gromacs/gpu_utils/gputraits.cuh" #include "gpuregiontimer.h" -template <> struct GpuTraits -{ - using CommandStream = cudaStream_t; - using CommandEvent = void; -}; - -//! Short-hand for external use -using GpuRegionTimer = GpuRegionTimerWrapper; - -template <> class GpuRegionTimerImpl +/*! \libinternal \brief + * This is a GPU region timing implementation for CUDA. + * It provides methods for measuring the last timespan. + * Copying/assignment is disabled since the underlying timing events are owned by this. + */ +class GpuRegionTimerImpl { - //! Short-hand - using CommandStream = typename GpuTraits::CommandStream; //! The underlying timing event pair - the beginning and the end of the timespan cudaEvent_t eventStart_, eventStop_; @@ -72,23 +67,31 @@ template <> class GpuRegionTimerImpl CU_RET_ERR(cudaEventCreate(&eventStart_, eventFlags), "GPU timing creation failure"); CU_RET_ERR(cudaEventCreate(&eventStop_, eventFlags), "GPU timing creation failure"); } - ~GpuRegionTimerImpl() { CU_RET_ERR(cudaEventDestroy(eventStart_), "GPU timing destruction failure"); CU_RET_ERR(cudaEventDestroy(eventStop_), "GPU timing destruction failure"); } + //! No copying + GpuRegionTimerImpl(const GpuRegionTimerImpl &) = delete; + //! No assignment + GpuRegionTimerImpl &operator = (GpuRegionTimerImpl &&) = delete; + //! Moving is disabled but can be considered in the future if needed + GpuRegionTimerImpl(GpuRegionTimerImpl &&) = delete; + /*! \brief Will be called before the region start. */ inline void openTimingRegion(CommandStream s) { CU_RET_ERR(cudaEventRecord(eventStart_, s), "GPU timing recording failure"); } + /*! \brief Will be called after the region end. */ inline void closeTimingRegion(CommandStream s) { CU_RET_ERR(cudaEventRecord(eventStop_, s), "GPU timing recording failure"); } + /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset() */ inline double getLastRangeTime() { float milliseconds = 0.0; @@ -97,7 +100,11 @@ template <> class GpuRegionTimerImpl return milliseconds; } + /*! \brief Resets internal state */ inline void reset(){} }; +//! Short-hand for external use +using GpuRegionTimer = GpuRegionTimerWrapper; + #endif diff --git a/src/gromacs/gpu_utils/gpuregiontimer.h b/src/gromacs/gpu_utils/gpuregiontimer.h index e453a5e18b..64c2a7918b 100644 --- a/src/gromacs/gpu_utils/gpuregiontimer.h +++ b/src/gromacs/gpu_utils/gpuregiontimer.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -54,62 +54,6 @@ static const bool c_debugTimerState = false; static const bool c_debugTimerState = true; #endif -/*! \libinternal \brief - * Enumeration of possible GPU build-paths. - * \todo Move somewhere general? - */ -enum class GpuFramework -{ - CUDA, - OpenCL -}; - -/*! \libinternal \brief - * GPU build-path traits such as types. - * \todo Move somewhere general? - */ -template struct GpuTraits -{ - using CommandStream = void; //!< GPU command stream - using CommandEvent = void; //!< Single command call timing event - used in OpenCL -}; - -/*! \libinternal \brief - * This is a GPU region timing implementation interface. - * It should provide methods for measuring the last timespan. - * Copying/assignment is disabled since the underlying timing events are owned by this. - */ -template class GpuRegionTimerImpl -{ - //! Short-hands - using CommandStream = typename GpuTraits::CommandStream; - using CommandEvent = typename GpuTraits::CommandEvent; - - public: - - GpuRegionTimerImpl() = default; - ~GpuRegionTimerImpl() = default; - //! No copying - GpuRegionTimerImpl(const GpuRegionTimerImpl &) = delete; - //! No assignment - GpuRegionTimerImpl &operator=(GpuRegionTimerImpl &&) = delete; - //! Moving is disabled but can be considered in the future if needed - GpuRegionTimerImpl(GpuRegionTimerImpl &&) = delete; - - /*! \brief Will be called before the region start. */ - inline void openTimingRegion(CommandStream) = 0; - /*! \brief Will be called after the region end. */ - inline void closeTimingRegion(CommandStream) = 0; - /*! \brief Resets any internal state if needed */ - inline void reset() = 0; - /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset() */ - inline double getLastRangeTime() = 0; - /*! \brief Returns a new raw timing event - * for passing into individual GPU API calls - * within the region if the API requires it (e.g. on OpenCL). */ - inline CommandEvent *fetchNextEvent() = 0; -}; - /*! \libinternal \brief * This is a GPU region timing wrapper class. * It allows for host-side tracking of the accumulated execution timespans in GPU code @@ -118,11 +62,8 @@ template class GpuRegionTimerImpl * as far as current implementation allows (see TODO in getLastRangeTime() for a disabled check). * Internally it uses GpuRegionTimerImpl for measuring regions. */ -template class GpuRegionTimerWrapper +template class GpuRegionTimerWrapper { - //! Short-hands - using CommandStream = typename GpuTraits::CommandStream; - using CommandEvent = typename GpuTraits::CommandEvent; //! The timer state used for debug-only assertions enum class TimerState { @@ -132,11 +73,11 @@ template class GpuRegionTimerWrapper } debugState_ = TimerState::Idle; //! The number of times the timespan has been measured - unsigned int callCount_ = 0; + unsigned int callCount_ = 0; //! The accumulated duration of the timespans measured (milliseconds) - double totalMilliseconds_ = 0.0; + double totalMilliseconds_ = 0.0; //! The underlying region timer implementation - GpuRegionTimerImpl impl_; + GpuRegionTimerImpl impl_; public: diff --git a/src/gromacs/gpu_utils/gpuregiontimer_ocl.h b/src/gromacs/gpu_utils/gpuregiontimer_ocl.h index fc64d04e96..91e9e8de23 100644 --- a/src/gromacs/gpu_utils/gpuregiontimer_ocl.h +++ b/src/gromacs/gpu_utils/gpuregiontimer_ocl.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,26 +46,19 @@ #include +#include "gromacs/gpu_utils/gputraits_ocl.h" #include "gromacs/gpu_utils/oclutils.h" #include "gpuregiontimer.h" -template <> struct GpuTraits -{ - using CommandStream = cl_command_queue; - using CommandEvent = cl_event; -}; - -//! Short-hand for external use -using GpuRegionTimer = GpuRegionTimerWrapper; - +/*! \libinternal \brief + * This is a GPU region timing implementation for OpenCL. + * It provides methods for measuring the last timespan. + * Copying/assignment is disabled since the underlying timing events are owned by this. + */ // cppcheck-suppress noConstructor -template <> class GpuRegionTimerImpl +class GpuRegionTimerImpl { - //! Short-hands - using CommandStream = typename GpuTraits::CommandStream; - using CommandEvent = typename GpuTraits::CommandEvent; - /*! \brief The underlying individual timing events array. * The maximum size is chosen arbitrarily to work with current code, and can be changed. * There is simply no need for run-time resizing, and it's unlikely we'll ever need more than 10. @@ -76,9 +69,22 @@ template <> class GpuRegionTimerImpl public: + GpuRegionTimerImpl() = default; + ~GpuRegionTimerImpl() = default; + //! No copying + GpuRegionTimerImpl(const GpuRegionTimerImpl &) = delete; + //! No assignment + GpuRegionTimerImpl &operator=(GpuRegionTimerImpl &&) = delete; + //! Moving is disabled but can be considered in the future if needed + GpuRegionTimerImpl(GpuRegionTimerImpl &&) = delete; + + /*! \brief Will be called before the region start. */ inline void openTimingRegion(CommandStream){} + + /*! \brief Will be called after the region end. */ inline void closeTimingRegion(CommandStream){} + /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset() */ inline double getLastRangeTime() { double milliseconds = 0.0; @@ -102,6 +108,7 @@ template <> class GpuRegionTimerImpl return milliseconds; } + /*! \brief Resets internal state */ inline void reset() { for (size_t i = 0; i < currentEvent_; i++) @@ -116,6 +123,10 @@ template <> class GpuRegionTimerImpl events_.fill(nullptr); } + /*! \brief Returns a new raw timing event + * for passing into individual GPU API calls + * within the region if the API requires it (e.g. on OpenCL). + */ inline CommandEvent *fetchNextEvent() { GMX_ASSERT(currentEvent_ < events_.size(), "Increase c_maxEventNumber_ if needed"); @@ -125,4 +136,7 @@ template <> class GpuRegionTimerImpl } }; +//! Short-hand for external use +using GpuRegionTimer = GpuRegionTimerWrapper; + #endif diff --git a/src/gromacs/gpu_utils/gputraits.cuh b/src/gromacs/gpu_utils/gputraits.cuh new file mode 100644 index 0000000000..8ec95a29b9 --- /dev/null +++ b/src/gromacs/gpu_utils/gputraits.cuh @@ -0,0 +1,50 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifndef GMX_GPU_UTILS_GPUTRAITS_CUH +#define GMX_GPU_UTILS_GPUTRAITS_CUH + +/*! \libinternal \file + * \brief Declares the CUDA type traits. + * \author Aleksei Iupinov + * + * \inlibraryapi + */ + +//! \brief GPU command stream +using CommandStream = cudaStream_t; +//! \brief Single GPU call timing event - meaningless in CUDA +using CommandEvent = void; + +#endif diff --git a/src/gromacs/gpu_utils/gputraits_ocl.h b/src/gromacs/gpu_utils/gputraits_ocl.h new file mode 100644 index 0000000000..bc16239497 --- /dev/null +++ b/src/gromacs/gpu_utils/gputraits_ocl.h @@ -0,0 +1,52 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifndef GMX_GPU_UTILS_GPUTRAITS_OCL_H +#define GMX_GPU_UTILS_GPUTRAITS_OCL_H + +/*! \libinternal \file + * \brief Declares the OpenCL type traits. + * \author Aleksei Iupinov + * + * \inlibraryapi + */ + +#include "gromacs/gpu_utils/gmxopencl.h" + +//! \brief GPU command stream +using CommandStream = cl_command_queue; +//! \brief Single GPU call timing event +using CommandEvent = cl_event; + +#endif diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp index b91074075c..02f89e06ea 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp @@ -69,6 +69,7 @@ #include "thread_mpi/atomic.h" +#include "gromacs/gpu_utils/gputraits_ocl.h" #include "gromacs/gpu_utils/oclutils.h" #include "gromacs/hardware/hw_info.h" #include "gromacs/mdlib/force_flags.h" diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h index 4d428afa84..ac55b49ba9 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,6 +46,7 @@ #define NBNXN_OPENCL_TYPES_H #include "gromacs/gpu_utils/gmxopencl.h" +#include "gromacs/gpu_utils/gputraits_ocl.h" #include "gromacs/gpu_utils/oclutils.h" #include "gromacs/mdlib/nbnxn_gpu_types_common.h" #include "gromacs/mdlib/nbnxn_pairlist.h" -- 2.22.0