From 975583fcdaf5d37a298ca5aee4a62921aafc662a Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Fri, 11 Sep 2020 09:06:16 +0000 Subject: [PATCH] Define stubs for GPU data structures in SYCL to make SYCL compilation possible --- src/gromacs/gpu_utils/devicebuffer.h | 2 + src/gromacs/gpu_utils/devicebuffer_datatype.h | 8 + src/gromacs/gpu_utils/devicebuffer_sycl.h | 216 ++++++++++++++++++ src/gromacs/gpu_utils/gpu_macros.h | 4 +- .../gpu_utils/gpueventsynchronizer_sycl.h | 100 ++++++++ .../mdtypes/state_propagator_data_gpu_impl.h | 2 + src/gromacs/taskassignment/decidegpuusage.cpp | 3 + 7 files changed, 333 insertions(+), 2 deletions(-) create mode 100644 src/gromacs/gpu_utils/devicebuffer_sycl.h create mode 100644 src/gromacs/gpu_utils/gpueventsynchronizer_sycl.h diff --git a/src/gromacs/gpu_utils/devicebuffer.h b/src/gromacs/gpu_utils/devicebuffer.h index 8aaea72263..a13f0c6262 100644 --- a/src/gromacs/gpu_utils/devicebuffer.h +++ b/src/gromacs/gpu_utils/devicebuffer.h @@ -54,6 +54,8 @@ # include "gromacs/gpu_utils/devicebuffer.cuh" #elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/devicebuffer_ocl.h" +#elif GMX_GPU_SYCL +# include "gromacs/gpu_utils/devicebuffer_sycl.h" #else # error "devicebuffer.h included on non-GPU build!" #endif diff --git a/src/gromacs/gpu_utils/devicebuffer_datatype.h b/src/gromacs/gpu_utils/devicebuffer_datatype.h index c1aec1fa51..48235f4bad 100644 --- a/src/gromacs/gpu_utils/devicebuffer_datatype.h +++ b/src/gromacs/gpu_utils/devicebuffer_datatype.h @@ -85,6 +85,14 @@ public: template using DeviceBuffer = TypedClMemory; +#elif GMX_GPU_SYCL + +// SYCL-TODO: + +//! \brief A device-side buffer of ValueTypes +template +using DeviceBuffer = ValueType*; + #else //! \brief A device-side buffer of ValueTypes diff --git a/src/gromacs/gpu_utils/devicebuffer_sycl.h b/src/gromacs/gpu_utils/devicebuffer_sycl.h new file mode 100644 index 0000000000..a7a1b500ec --- /dev/null +++ b/src/gromacs/gpu_utils/devicebuffer_sycl.h @@ -0,0 +1,216 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifndef GMX_GPU_UTILS_DEVICEBUFFER_SYCL_H +#define GMX_GPU_UTILS_DEVICEBUFFER_SYCL_H + +/*! \libinternal \file + * \brief Implements the DeviceBuffer type and routines for SYCL. + * Should only be included directly by the main DeviceBuffer file devicebuffer.h. + * TODO: the intent is for DeviceBuffer to become a class. + * + * \author Artem Zhmurov + * + * \inlibraryapi + */ + +#include "gromacs/gpu_utils/device_context.h" +#include "gromacs/gpu_utils/device_stream.h" +#include "gromacs/gpu_utils/devicebuffer_datatype.h" +#include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior +#include "gromacs/gpu_utils/gputraits_ocl.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/stringutil.h" + +/*! \libinternal \brief + * Allocates a device-side buffer. + * It is currently a caller's responsibility to call it only on not-yet allocated buffers. + * + * \tparam ValueType Raw value type of the \p buffer. + * \param[in,out] buffer Pointer to the device-side buffer. + * \param[in] numValues Number of values to accomodate. + * \param[in] deviceContext The buffer's device context-to-be. + */ +template +void allocateDeviceBuffer(DeviceBuffer* /* buffer */, + size_t /* numValues */, + const DeviceContext& /* deviceContext */) +{ + // SYCL-TODO +} + +/*! \brief + * Frees a device-side buffer. + * This does not reset separately stored size/capacity integers, + * as this is planned to be a destructor of DeviceBuffer as a proper class, + * and no calls on \p buffer should be made afterwards. + * + * \param[in] buffer Pointer to the buffer to free. + */ +template +void freeDeviceBuffer(DeviceBuffer* /* buffer */) +{ + // SYCL-TODO +} + +/*! \brief + * Performs the host-to-device data copy, synchronous or asynchronously on request. + * + * Note that synchronous copy will not synchronize the stream in case of zero \p numValues + * because of the early return. + * + * \tparam ValueType Raw value type of the \p buffer. + * \param[in,out] buffer Pointer to the device-side buffer + * \param[in] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType + * \param[in] startingOffset Offset (in values) at the device-side buffer to copy into. + * \param[in] numValues Number of values to copy. + * \param[in] deviceStream GPU stream to perform asynchronous copy in. + * \param[in] transferKind Copy type: synchronous or asynchronous. + * \param[out] timingEvent A pointer to the H2D copy timing event to be filled in. + * If the pointer is not null, the event can further be used + * to queue a wait for this operation or to query profiling information. + */ +template +void copyToDeviceBuffer(DeviceBuffer* /* buffer */, + const ValueType* /* hostBuffer */, + size_t /* startingOffset */, + size_t /* numValues */, + const DeviceStream& /* deviceStream */, + GpuApiCallBehavior /* transferKind */, + CommandEvent* /* timingEvent */) +{ + // SYCL-TODO +} + +/*! \brief + * Performs the device-to-host data copy, synchronous or asynchronously on request. + * + * Note that synchronous copy will not synchronize the stream in case of zero \p numValues + * because of the early return. + * + * \tparam ValueType Raw value type of the \p buffer. + * \param[in,out] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType + * \param[in] buffer Pointer to the device-side buffer + * \param[in] startingOffset Offset (in values) at the device-side buffer to copy from. + * \param[in] numValues Number of values to copy. + * \param[in] deviceStream GPU stream to perform asynchronous copy in. + * \param[in] transferKind Copy type: synchronous or asynchronous. + * \param[out] timingEvent A pointer to the H2D copy timing event to be filled in. + * If the pointer is not null, the event can further be used + * to queue a wait for this operation or to query profiling information. + */ +template +void copyFromDeviceBuffer(ValueType* /* hostBuffer */, + DeviceBuffer* /* buffer */, + size_t /* startingOffset */, + size_t /* numValues */, + const DeviceStream& /* deviceStream */, + GpuApiCallBehavior /* transferKind */, + CommandEvent* /* timingEvent */) +{ + // SYCL-TODO +} + +/*! \brief + * Clears the device buffer asynchronously. + * + * \tparam ValueType Raw value type of the \p buffer. + * \param[in,out] buffer Pointer to the device-side buffer + * \param[in] startingOffset Offset (in values) at the device-side buffer to start clearing at. + * \param[in] numValues Number of values to clear. + * \param[in] deviceStream GPU stream. + */ +template +void clearDeviceBufferAsync(DeviceBuffer* /* buffer */, + size_t /* startingOffset */, + size_t /* numValues */, + const DeviceStream& /* deviceStream */) +{ + // SYCL-TODO +} + +/*! \brief Check the validity of the device buffer. + * + * Checks if the buffer is not nullptr and if its allocation is big enough. + * + * \param[in] buffer Device buffer to be checked. + * \param[in] requiredSize Number of elements that the buffer will have to accommodate. + * + * \returns Whether the device buffer can be set. + */ +template +static bool checkDeviceBuffer(DeviceBuffer /* buffer */, int /* requiredSize */) +{ + // SYCL-TODO +} + +//! Device texture wrapper. +using DeviceTexture = void*; + +/*! \brief Create a texture object for an array of type ValueType. + * + * Creates the device buffer and copies read-only data for an array of type ValueType. + * + * \todo Decide if using image2d is most efficient. + * + * \tparam ValueType Raw data type. + * + * \param[out] deviceBuffer Device buffer to store data in. + * \param[in] hostBuffer Host buffer to get date from. + * \param[in] numValues Number of elements in the buffer. + * \param[in] deviceContext GPU device context. + */ +template +void initParamLookupTable(DeviceBuffer* /* deviceBuffer */, + DeviceTexture* /* deviceTexture */, + const ValueType* /* hostBuffer */, + int /* numValues */, + const DeviceContext& /* deviceContext */) +{ + // SYCL-TODO +} + +/*! \brief Release the OpenCL device buffer. + * + * \tparam ValueType Raw data type. + * + * \param[in,out] deviceBuffer Device buffer to store data in. + */ +template +void destroyParamLookupTable(DeviceBuffer* /* deviceBuffer */, DeviceTexture& /* deviceTexture*/) +{ + // SYCL-TODO +} + +#endif // GMX_GPU_UTILS_DEVICEBUFFER_SYCL_H diff --git a/src/gromacs/gpu_utils/gpu_macros.h b/src/gromacs/gpu_utils/gpu_macros.h index 8eef2307d9..e7ecf83f57 100644 --- a/src/gromacs/gpu_utils/gpu_macros.h +++ b/src/gromacs/gpu_utils/gpu_macros.h @@ -74,7 +74,7 @@ # define OPENCL_FUNC_TERM REAL_FUNC_TERM # define OPENCL_FUNC_TERM_WITH_RETURN(arg) REAL_FUNC_TERM_WITH_RETURN(arg) -#elif GMX_GPU +#elif GMX_GPU_OPENCL || GMX_GPU_CUDA /* GPU support is enabled, so these functions will have real code * defined somewhere */ @@ -114,7 +114,7 @@ # endif -#elif !GMX_GPU +#elif !GMX_GPU || GMX_GPU_SYCL /* No GPU support is configured, so none of these functions will have * real definitions. */ diff --git a/src/gromacs/gpu_utils/gpueventsynchronizer_sycl.h b/src/gromacs/gpu_utils/gpueventsynchronizer_sycl.h new file mode 100644 index 0000000000..faf34dfda9 --- /dev/null +++ b/src/gromacs/gpu_utils/gpueventsynchronizer_sycl.h @@ -0,0 +1,100 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * \brief Implements a GpuEventSynchronizer class for OpenCL. + * + * \author Aleksei Iupinov + * \inlibraryapi + */ +#ifndef GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_OCL_H +#define GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_OCL_H + +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/gmxassert.h" + +/*! \libinternal \brief + * A class which allows for CPU thread to mark and wait for certain GPU stream execution point. + * The event can be put into the stream with markEvent() and then later waited on with waitForEvent(). + * This can be repeated as necessary, but the current implementation does not allow waiting on + * completed event more than once, expecting only exact pairs of markEvent(stream); waitForEvent(). + * The class generally attempts to track the correctness of its state transitions, but + * please note that calling waitForEvent() right after the construction will fail with OpenCL but succeed with CUDA. + * + * Another possible mode of operation can be implemented if needed: + * multiple calls to waitForEvent() after a single markEvent(). For this, clReleaseEvent() call + * from waitForEvent() should instead happen conditionally at the beginning of markEvent(), replacing + * the GMX_ASSERT(). This was tested to work both with CUDA and NVidia OpenCL, but not with AMD/Intel OpenCl. + */ +class GpuEventSynchronizer +{ +public: + //! A constructor + GpuEventSynchronizer() {} + //! A destructor + ~GpuEventSynchronizer() {} + //! No copying + GpuEventSynchronizer(const GpuEventSynchronizer&) = delete; + //! No assignment + GpuEventSynchronizer& operator=(GpuEventSynchronizer&&) = delete; + //! Moving is disabled but can be considered in the future if needed + GpuEventSynchronizer(GpuEventSynchronizer&&) = delete; + + /*! \brief Marks the synchronization point in the \p stream. + * Should be called first and then followed by waitForEvent(). + */ + inline void markEvent(const DeviceStream& /* deviceStream */) + { + // SYCL-TODO + } + /*! \brief Synchronizes the host thread on the marked event. */ + inline void waitForEvent() + { + // SYCL-TODO + } + /*! \brief Enqueues a wait for the recorded event in stream \p stream + * + * After enqueue, the associated event is released, so this method should + * be only called once per markEvent() call. + */ + inline void enqueueWaitEvent(const DeviceStream& /* deviceStream */) + { + // SYCL-TODO + } + +private: + // SYCL-TODO +}; + +#endif diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h index f4457311e1..17737b5477 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h @@ -52,6 +52,8 @@ # include "gromacs/gpu_utils/gpueventsynchronizer.cuh" #elif GMX_GPU_OPENCL # include "gromacs/gpu_utils/gpueventsynchronizer_ocl.h" +#elif GMX_GPU_SYCL +# include "gromacs/gpu_utils/gpueventsynchronizer_sycl.h" #endif #include "gromacs/math/vectypes.h" #include "gromacs/mdtypes/state_propagator_data_gpu.h" diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index 38d61a6720..2d11adbfed 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -95,6 +95,9 @@ const char* g_specifyEverythingFormatString = // OpenCL standard, but the only current relevant case for GROMACS // is AMD OpenCL, which offers this variable. "GPU_DEVICE_ORDINAL" +# elif GMX_GPU_SYCL + // SYCL-TODO: + "How to restrict visible devices in SYCL?" # else # error "Unreachable branch" # endif -- 2.22.0