From 874f1bcda2449fe3e67508e869b0d1c9beffcd6d Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Thu, 1 Oct 2020 10:04:11 +0000 Subject: [PATCH] Device management: Add SYCL DeviceBuffer --- src/gromacs/gpu_utils/CMakeLists.txt | 2 + src/gromacs/gpu_utils/devicebuffer_datatype.h | 33 ++- src/gromacs/gpu_utils/devicebuffer_sycl.cpp | 48 ++++ src/gromacs/gpu_utils/devicebuffer_sycl.h | 228 +++++++++++++----- src/gromacs/gpu_utils/gputraits.h | 4 + src/gromacs/gpu_utils/gputraits_sycl.h | 76 ++++++ src/gromacs/gpu_utils/tests/CMakeLists.txt | 6 +- src/gromacs/gpu_utils/tests/device_buffer.cpp | 4 +- .../gpu_utils/tests/devicetransfers_sycl.cpp | 89 +++++++ 9 files changed, 420 insertions(+), 70 deletions(-) create mode 100644 src/gromacs/gpu_utils/devicebuffer_sycl.cpp create mode 100644 src/gromacs/gpu_utils/gputraits_sycl.h create mode 100644 src/gromacs/gpu_utils/tests/devicetransfers_sycl.cpp diff --git a/src/gromacs/gpu_utils/CMakeLists.txt b/src/gromacs/gpu_utils/CMakeLists.txt index 22c0a4cb82..8fbf9c1686 100644 --- a/src/gromacs/gpu_utils/CMakeLists.txt +++ b/src/gromacs/gpu_utils/CMakeLists.txt @@ -60,10 +60,12 @@ elseif(GMX_GPU_CUDA) ) elseif(GMX_GPU_SYCL) gmx_add_libgromacs_sources( + devicebuffer_sycl.cpp device_context_sycl.cpp device_stream_sycl.cpp ) _gmx_add_files_to_property(SYCL_SOURCES + devicebuffer_sycl.cpp device_context_manager.cpp device_context_sycl.cpp device_stream_manager.cpp diff --git a/src/gromacs/gpu_utils/devicebuffer_datatype.h b/src/gromacs/gpu_utils/devicebuffer_datatype.h index 48235f4bad..e88782b1ac 100644 --- a/src/gromacs/gpu_utils/devicebuffer_datatype.h +++ b/src/gromacs/gpu_utils/devicebuffer_datatype.h @@ -46,6 +46,10 @@ #include "config.h" +#include + +#include "gromacs/math/vectypes.h" + #if GMX_GPU_CUDA //! \brief A device-side buffer of ValueTypes @@ -87,11 +91,31 @@ using DeviceBuffer = TypedClMemory; #elif GMX_GPU_SYCL -// SYCL-TODO: - -//! \brief A device-side buffer of ValueTypes +/*! \libinternal \brief + * A minimal wrapper around \c cl::sycl::buffer to hide it away and simplify compilation. + */ template -using DeviceBuffer = ValueType*; +struct DeviceBuffer +{ + class ClSyclBufferWrapper; + std::unique_ptr buffer_; + + DeviceBuffer(); + ~DeviceBuffer(); + DeviceBuffer(DeviceBuffer const& src); + DeviceBuffer(DeviceBuffer&& src) noexcept; + DeviceBuffer& operator=(DeviceBuffer const& src); + DeviceBuffer& operator=(DeviceBuffer&& src) noexcept; + + //! Helper function to get the size in bytes of a single element + static constexpr size_t elementSize() { return sizeof(ValueType); } + + // static_case is used in MPI+CUDA code, this stub is necessary for compilation. + explicit operator void*() const { throw; } +}; + +// Must explicitly instantiate for some types. +extern template struct DeviceBuffer; #else @@ -101,5 +125,4 @@ using DeviceBuffer = void*; #endif - #endif // GMX_GPU_UTILS_DEVICEBUFFER_DATATYPE_H diff --git a/src/gromacs/gpu_utils/devicebuffer_sycl.cpp b/src/gromacs/gpu_utils/devicebuffer_sycl.cpp new file mode 100644 index 0000000000..68aa4dbfb3 --- /dev/null +++ b/src/gromacs/gpu_utils/devicebuffer_sycl.cpp @@ -0,0 +1,48 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * \brief Implements the DeviceBuffer type and routines for SYCL. + * + * This CPP file is only used to explicitly instantiate some templates. + * + * \author Andrey Alekseenko + * + * \inlibraryapi + */ +#include "gmxpre.h" + +#include "devicebuffer_sycl.h" + +template struct DeviceBuffer; diff --git a/src/gromacs/gpu_utils/devicebuffer_sycl.h b/src/gromacs/gpu_utils/devicebuffer_sycl.h index e487160b23..63a3b16fb8 100644 --- a/src/gromacs/gpu_utils/devicebuffer_sycl.h +++ b/src/gromacs/gpu_utils/devicebuffer_sycl.h @@ -41,6 +41,8 @@ * TODO: the intent is for DeviceBuffer to become a class. * * \author Artem Zhmurov + * \author Erik Lindahl + * \author Andrey Alekseenko * * \inlibraryapi */ @@ -48,26 +50,75 @@ #include "gromacs/gpu_utils/device_context.h" #include "gromacs/gpu_utils/device_stream.h" #include "gromacs/gpu_utils/devicebuffer_datatype.h" +#include "gromacs/gpu_utils/gmxsycl.h" #include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior -#include "gromacs/gpu_utils/gputraits_ocl.h" +#include "gromacs/gpu_utils/gputraits_sycl.h" #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/stringutil.h" +#ifndef DOXYGEN +template +class DeviceBuffer::ClSyclBufferWrapper : public cl::sycl::buffer +{ + using cl::sycl::buffer::buffer; // Get all the constructors +}; + +template +using ClSyclBufferWrapper = typename DeviceBuffer::ClSyclBufferWrapper; + +//! Constructor. +template +DeviceBuffer::DeviceBuffer() : buffer_(nullptr) +{ +} + +//! Destructor. +template +DeviceBuffer::~DeviceBuffer() = default; + +//! Copy constructor (references the same underlying SYCL buffer). +template +DeviceBuffer::DeviceBuffer(DeviceBuffer const& src) : + buffer_(new ClSyclBufferWrapper(*src.buffer_)) +{ +} + +//! Move constructor. +template +DeviceBuffer::DeviceBuffer(DeviceBuffer&& src) noexcept = default; + +//! Copy assignment (references the same underlying SYCL buffer). +template +DeviceBuffer& DeviceBuffer::operator=(DeviceBuffer const& src) +{ + buffer_.reset(new ClSyclBufferWrapper(*src.buffer_)); + return *this; +} + +//! Move assignment. +template +DeviceBuffer& DeviceBuffer::operator=(DeviceBuffer&& src) noexcept = default; + +#endif // #ifndef DOXYGEN + /*! \libinternal \brief * Allocates a device-side buffer. * It is currently a caller's responsibility to call it only on not-yet allocated buffers. * * \tparam ValueType Raw value type of the \p buffer. * \param[in,out] buffer Pointer to the device-side buffer. - * \param[in] numValues Number of values to accomodate. + * \param[in] numValues Number of values to accommodate. * \param[in] deviceContext The buffer's device context-to-be. */ template -void allocateDeviceBuffer(DeviceBuffer* gmx_unused buffer, - size_t gmx_unused numValues, - const DeviceContext& gmx_unused deviceContext) +void allocateDeviceBuffer(DeviceBuffer* buffer, size_t numValues, const DeviceContext& deviceContext) { - // SYCL-TODO + /* SYCL does not require binding buffer to a specific context or device. The ::context_bound + * property only enforces the use of only given context, and possibly offers some optimizations */ + const cl::sycl::property_list bufferProperties{ cl::sycl::property::buffer::context_bound( + deviceContext.context()) }; + buffer->buffer_.reset( + new ClSyclBufferWrapper(cl::sycl::range<1>(numValues), bufferProperties)); } /*! \brief @@ -78,128 +129,187 @@ void allocateDeviceBuffer(DeviceBuffer* gmx_unused buffer, * * \param[in] buffer Pointer to the buffer to free. */ -template -void freeDeviceBuffer(DeviceBuffer* gmx_unused buffer) +template +void freeDeviceBuffer(DeviceBuffer* buffer) { - // SYCL-TODO + buffer->buffer_.reset(nullptr); } /*! \brief * Performs the host-to-device data copy, synchronous or asynchronously on request. * - * Note that synchronous copy will not synchronize the stream in case of zero \p numValues - * because of the early return. + * Unlike in CUDA and OpenCL, synchronous call does not guarantee that all previously + * submitted operations are complete, only the ones that are required for \p buffer consistency. * * \tparam ValueType Raw value type of the \p buffer. - * \param[in,out] buffer Pointer to the device-side buffer - * \param[in] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType + * \param[in,out] buffer Pointer to the device-side buffer. + * \param[in] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType. * \param[in] startingOffset Offset (in values) at the device-side buffer to copy into. * \param[in] numValues Number of values to copy. * \param[in] deviceStream GPU stream to perform asynchronous copy in. * \param[in] transferKind Copy type: synchronous or asynchronous. * \param[out] timingEvent A pointer to the H2D copy timing event to be filled in. - * If the pointer is not null, the event can further be used - * to queue a wait for this operation or to query profiling information. + * Ignored in SYCL. */ template -void copyToDeviceBuffer(DeviceBuffer* gmx_unused buffer, - const ValueType* gmx_unused hostBuffer, - size_t gmx_unused startingOffset, - size_t gmx_unused numValues, - const DeviceStream& gmx_unused deviceStream, - GpuApiCallBehavior gmx_unused transferKind, +void copyToDeviceBuffer(DeviceBuffer* buffer, + const ValueType* hostBuffer, + size_t startingOffset, + size_t numValues, + const DeviceStream& deviceStream, + GpuApiCallBehavior transferKind, CommandEvent* gmx_unused timingEvent) { - // SYCL-TODO + if (numValues == 0) + { + return; // such calls are actually made with empty domains + } + GMX_ASSERT(buffer, "needs a buffer pointer"); + GMX_ASSERT(buffer->buffer_, "needs an initialized buffer pointer"); + GMX_ASSERT(hostBuffer, "needs a host buffer pointer"); + + cl::sycl::buffer& syclBuffer = *buffer->buffer_; + + cl::sycl::event ev = deviceStream.stream().submit([&](cl::sycl::handler& cgh) { + /* Here and elsewhere in this file, accessor constructor is user instead of a more common + * buffer::get_access, since the compiler (icpx 2021.1-beta09) occasionally gets confused + * by all the overloads */ + auto d_bufferAccessor = cl::sycl::accessor{ + syclBuffer, cgh, cl::sycl::range(numValues), cl::sycl::id(startingOffset) + }; + cgh.copy(hostBuffer, d_bufferAccessor); + }); + if (transferKind == GpuApiCallBehavior::Sync) + { + ev.wait_and_throw(); + } } /*! \brief * Performs the device-to-host data copy, synchronous or asynchronously on request. * - * Note that synchronous copy will not synchronize the stream in case of zero \p numValues - * because of the early return. + * Unlike in CUDA and OpenCL, synchronous call does not guarantee that all previously + * submitted operations are complete, only the ones that are required for \p buffer consistency. * * \tparam ValueType Raw value type of the \p buffer. * \param[in,out] hostBuffer Pointer to the raw host-side memory, also typed \p ValueType - * \param[in] buffer Pointer to the device-side buffer + * \param[in] buffer Pointer to the device-side buffer. * \param[in] startingOffset Offset (in values) at the device-side buffer to copy from. * \param[in] numValues Number of values to copy. * \param[in] deviceStream GPU stream to perform asynchronous copy in. * \param[in] transferKind Copy type: synchronous or asynchronous. * \param[out] timingEvent A pointer to the H2D copy timing event to be filled in. - * If the pointer is not null, the event can further be used - * to queue a wait for this operation or to query profiling information. + * Ignored in SYCL. */ template -void copyFromDeviceBuffer(ValueType* gmx_unused hostBuffer, - DeviceBuffer* gmx_unused buffer, - size_t gmx_unused startingOffset, - size_t gmx_unused numValues, - const DeviceStream& gmx_unused deviceStream, - GpuApiCallBehavior gmx_unused transferKind, +void copyFromDeviceBuffer(ValueType* hostBuffer, + DeviceBuffer* buffer, + size_t startingOffset, + size_t numValues, + const DeviceStream& deviceStream, + GpuApiCallBehavior transferKind, CommandEvent* gmx_unused timingEvent) { - // SYCL-TODO + if (numValues == 0) + { + return; // such calls are actually made with empty domains + } + GMX_ASSERT(buffer, "needs a buffer pointer"); + GMX_ASSERT(hostBuffer, "needs a host buffer pointer"); + + cl::sycl::buffer& syclBuffer = *buffer->buffer_; + + cl::sycl::event ev = deviceStream.stream().submit([&](cl::sycl::handler& cgh) { + const auto d_bufferAccessor = cl::sycl::accessor{ + syclBuffer, cgh, cl::sycl::range(numValues), cl::sycl::id(startingOffset) + }; + cgh.copy(d_bufferAccessor, hostBuffer); + }); + if (transferKind == GpuApiCallBehavior::Sync) + { + ev.wait_and_throw(); + } } /*! \brief * Clears the device buffer asynchronously. * * \tparam ValueType Raw value type of the \p buffer. - * \param[in,out] buffer Pointer to the device-side buffer + * \param[in,out] buffer Pointer to the device-side buffer. * \param[in] startingOffset Offset (in values) at the device-side buffer to start clearing at. * \param[in] numValues Number of values to clear. * \param[in] deviceStream GPU stream. */ template -void clearDeviceBufferAsync(DeviceBuffer* gmx_unused buffer, - size_t gmx_unused startingOffset, - size_t gmx_unused numValues, - const DeviceStream& gmx_unused deviceStream) +void clearDeviceBufferAsync(DeviceBuffer* buffer, + size_t startingOffset, + size_t numValues, + const DeviceStream& deviceStream) { - // SYCL-TODO + if (numValues == 0) + { + return; + } + GMX_ASSERT(buffer, "needs a buffer pointer"); + + const ValueType pattern{}; + cl::sycl::buffer& syclBuffer = *(buffer->buffer_); + + cl::sycl::event ev = deviceStream.stream().submit([&](cl::sycl::handler& cgh) { + auto d_bufferAccessor = cl::sycl::accessor{ + syclBuffer, cgh, cl::sycl::range(numValues), cl::sycl::id(startingOffset) + }; + cgh.fill(d_bufferAccessor, pattern); + }); } /*! \brief Check the validity of the device buffer. * - * Checks if the buffer is not nullptr and if its allocation is big enough. + * Checks if the buffer is valid and if its allocation is big enough. * * \param[in] buffer Device buffer to be checked. * \param[in] requiredSize Number of elements that the buffer will have to accommodate. * - * \returns Whether the device buffer can be set. + * \returns Whether the device buffer exists and has enough capacity. */ template -static bool gmx_unused checkDeviceBuffer(DeviceBuffer gmx_unused buffer, int gmx_unused requiredSize) +static gmx_unused bool checkDeviceBuffer(DeviceBuffer buffer, int requiredSize) { - // SYCL-TODO + return buffer.buffer_ && (static_cast(buffer.buffer_->get_count()) >= requiredSize); } -//! Device texture wrapper. -using DeviceTexture = void*; - /*! \brief Create a texture object for an array of type ValueType. * * Creates the device buffer and copies read-only data for an array of type ValueType. - * - * \todo Decide if using image2d is most efficient. + * Like OpenCL, does not really do anything with textures, simply creates a buffer + * and initializes it. * * \tparam ValueType Raw data type. * * \param[out] deviceBuffer Device buffer to store data in. - * \param[out] deviceTexture New texture object * \param[in] hostBuffer Host buffer to get date from. * \param[in] numValues Number of elements in the buffer. * \param[in] deviceContext GPU device context. */ template -void initParamLookupTable(DeviceBuffer* gmx_unused deviceBuffer, - DeviceTexture* gmx_unused deviceTexture, - const ValueType* gmx_unused hostBuffer, - int gmx_unused numValues, - const DeviceContext& gmx_unused deviceContext) +void initParamLookupTable(DeviceBuffer* deviceBuffer, + DeviceTexture* /* deviceTexture */, + const ValueType* hostBuffer, + int numValues, + const DeviceContext& deviceContext) { - // SYCL-TODO + GMX_ASSERT(hostBuffer, "Host buffer should be specified."); + GMX_ASSERT(deviceBuffer, "Device buffer should be specified."); + + /* Constructing buffer with cl::sycl::buffer(T* data, size_t size) will take ownership + * of this memory region making it unusable, which might lead to side-effects. + * On the other hand, cl::sycl::buffer(InputIterator begin, InputIterator end) will + * initialize the buffer without affecting ownership of the memory, although + * it will consume extra memory on host. */ + const cl::sycl::property_list bufferProperties{ cl::sycl::property::buffer::context_bound( + deviceContext.context()) }; + deviceBuffer->buffer_.reset(new ClSyclBufferWrapper( + hostBuffer, hostBuffer + numValues, bufferProperties)); } /*! \brief Release the OpenCL device buffer. @@ -207,13 +317,11 @@ void initParamLookupTable(DeviceBuffer* gmx_unused deviceBuffer, * \tparam ValueType Raw data type. * * \param[in,out] deviceBuffer Device buffer to store data in. - * \param[in] deviceTexture Reference to texture object */ template -void destroyParamLookupTable(DeviceBuffer* gmx_unused deviceBuffer, - DeviceTexture& gmx_unused deviceTexture) +void destroyParamLookupTable(DeviceBuffer* deviceBuffer, DeviceTexture& /* deviceTexture */) { - // SYCL-TODO + deviceBuffer->buffer_.reset(nullptr); } #endif // GMX_GPU_UTILS_DEVICEBUFFER_SYCL_H diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h index 79ccaa64aa..38c5edf8a9 100644 --- a/src/gromacs/gpu_utils/gputraits.h +++ b/src/gromacs/gpu_utils/gputraits.h @@ -55,6 +55,10 @@ # include "gromacs/gpu_utils/gputraits_ocl.h" +#elif GMX_GPU_SYCL + +# include "gromacs/gpu_utils/gputraits_sycl.h" + #else using DeviceTexture = void*; diff --git a/src/gromacs/gpu_utils/gputraits_sycl.h b/src/gromacs/gpu_utils/gputraits_sycl.h new file mode 100644 index 0000000000..ab121c8858 --- /dev/null +++ b/src/gromacs/gpu_utils/gputraits_sycl.h @@ -0,0 +1,76 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifndef GMX_GPU_UTILS_GPUTRAITS_SYCL_H +#define GMX_GPU_UTILS_GPUTRAITS_SYCL_H + +/*! \libinternal \file + * \brief Declares the SYCL type traits. + * + * \author Andrey Alekseenko + * + * \inlibraryapi + * \ingroup module_gpu_utils + */ + +#include + +#include "gromacs/gpu_utils/gmxsycl.h" + +using DeviceTexture = void*; + +//! \brief Single GPU call timing event, not used with SYCL +using CommandEvent = void*; + +/*! \internal \brief + * GPU kernels scheduling description. This is same in OpenCL/CUDA. + * Provides reasonable defaults, one typically only needs to set the GPU stream + * and non-1 work sizes. + */ +struct KernelLaunchConfig +{ + //! Work groups (CUDA blocks) counts + size_t gridSize[3] = { 1, 1, 1 }; + //! Per work group (CUDA block) thread counts + size_t blockSize[3] = { 1, 1, 1 }; + //! Shared memory size in bytes + size_t sharedMemorySize = 0; +}; + +/*! \brief Sets whether device code can use arrays that are embedded in structs. + * \todo Probably can, must check + */ +#define c_canEmbedBuffers false + +#endif diff --git a/src/gromacs/gpu_utils/tests/CMakeLists.txt b/src/gromacs/gpu_utils/tests/CMakeLists.txt index 4c65ac4390..6e4a700930 100644 --- a/src/gromacs/gpu_utils/tests/CMakeLists.txt +++ b/src/gromacs/gpu_utils/tests/CMakeLists.txt @@ -58,9 +58,9 @@ gmx_add_unit_test(GpuUtilsUnitTests gpu_utils-test HARDWARE_DETECTION OPENCL_CPP_SOURCE_FILES devicetransfers_ocl.cpp - NON_GPU_CPP_SOURCE_FILES - devicetransfers.cpp + SYCL_CPP_SOURCE_FILES + devicetransfers_sycl.cpp - SYCL_CPP_SOURCE_FILES # SYCL-TODO: proper test + NON_GPU_CPP_SOURCE_FILES devicetransfers.cpp ) diff --git a/src/gromacs/gpu_utils/tests/device_buffer.cpp b/src/gromacs/gpu_utils/tests/device_buffer.cpp index 7ce4d681d1..3acfee0204 100644 --- a/src/gromacs/gpu_utils/tests/device_buffer.cpp +++ b/src/gromacs/gpu_utils/tests/device_buffer.cpp @@ -43,7 +43,7 @@ #include "config.h" -#if GMX_GPU && !GMX_GPU_SYCL +#if GMX_GPU # include # include @@ -213,4 +213,4 @@ TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDeviceWithOffset) } // namespace test } // namespace gmx -#endif // GMX_GPU && !GMX_GPU_SYCL +#endif // GMX_GPU diff --git a/src/gromacs/gpu_utils/tests/devicetransfers_sycl.cpp b/src/gromacs/gpu_utils/tests/devicetransfers_sycl.cpp new file mode 100644 index 0000000000..bb6d001a51 --- /dev/null +++ b/src/gromacs/gpu_utils/tests/devicetransfers_sycl.cpp @@ -0,0 +1,89 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief Defines helper functionality for device transfers for tests + * for GPU host allocator. + * + * \author Andrey Alekseenko + */ +#include "gmxpre.h" + +#include "gromacs/gpu_utils/gmxsycl.h" +#include "gromacs/hardware/device_information.h" +#include "gromacs/utility/arrayref.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/stringutil.h" + +#include "devicetransfers.h" + +namespace gmx +{ + +void doDeviceTransfers(const DeviceInformation& deviceInfo, ArrayRef input, ArrayRef output) +{ + GMX_RELEASE_ASSERT(input.size() == output.size(), "Input and output must have matching size"); + + try + { + cl::sycl::queue syclQueue(deviceInfo.syclDevice); + + cl::sycl::property_list syclBufferProperties{ cl::sycl::property::buffer::context_bound( + syclQueue.get_context()) }; + + cl::sycl::buffer syclBuffer(::sycl::range<1>(input.size()), syclBufferProperties); + + syclQueue + .submit([&](cl::sycl::handler& cgh) { + auto accessor = syclBuffer.get_access(cgh); + cgh.copy(input.data(), accessor); + }) + .wait_and_throw(); + + syclQueue + .submit([&](cl::sycl::handler& cgh) { + auto accessor = syclBuffer.get_access(cgh); + cgh.copy(accessor, output.data()); + }) + .wait_and_throw(); + } + catch (cl::sycl::exception& e) + { + GMX_THROW(InternalError( + formatString("Failure while checking data transfer, error was %s", e.what()))); + } +} + +} // namespace gmx -- 2.22.0