From 92e1eccca1e1f6a687d11915cf391217fdeeb8ad Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Mon, 9 Nov 2020 17:57:21 +0000 Subject: [PATCH] Add SYCL implementation of LeapFrogGpu An implementation of LeapFrogGpu integrator using SYCL. Also, some additions to SYCL's DeviceBuffer. Code pretty much follows CUDA version. For float3, I had to use gmx::RVec instead of SYCL's built-in cl::sycl::float3, since latter is 16 bytes. --- src/gromacs/gpu_utils/devicebuffer_datatype.h | 2 +- src/gromacs/gpu_utils/devicebuffer_sycl.h | 125 ++++++- src/gromacs/gpu_utils/gputraits_sycl.h | 9 + src/gromacs/mdlib/CMakeLists.txt | 11 +- src/gromacs/mdlib/leapfrog_gpu.cu | 2 +- src/gromacs/mdlib/leapfrog_gpu.h | 19 +- src/gromacs/mdlib/leapfrog_gpu_sycl.cpp | 307 ++++++++++++++++++ src/gromacs/mdlib/tests/leapfrogtestrunners.h | 2 +- .../mdlib/tests/leapfrogtestrunners_gpu.cpp | 8 + 9 files changed, 459 insertions(+), 26 deletions(-) create mode 100644 src/gromacs/mdlib/leapfrog_gpu_sycl.cpp diff --git a/src/gromacs/gpu_utils/devicebuffer_datatype.h b/src/gromacs/gpu_utils/devicebuffer_datatype.h index b4be8bd619..d1c27bde9c 100644 --- a/src/gromacs/gpu_utils/devicebuffer_datatype.h +++ b/src/gromacs/gpu_utils/devicebuffer_datatype.h @@ -116,7 +116,7 @@ struct DeviceBuffer //! Allow implicit conversion to bool to check buffer status for compatibility with other implementations. operator bool() const { return buffer_.get() != nullptr; } - //! An assignment operator to allow resetting buffer by assigning nullptr to it. Necessery for compilation. + //! An assignment operator to allow resetting buffer by assigning nullptr to it. Necessary for compilation. DeviceBuffer& operator=(std::nullptr_t nullPtr); }; diff --git a/src/gromacs/gpu_utils/devicebuffer_sycl.h b/src/gromacs/gpu_utils/devicebuffer_sycl.h index a75e238448..5efc4dd963 100644 --- a/src/gromacs/gpu_utils/devicebuffer_sycl.h +++ b/src/gromacs/gpu_utils/devicebuffer_sycl.h @@ -116,8 +116,108 @@ DeviceBuffer& DeviceBuffer::operator=(std::nullptr_t nullPtr) return *this; } + +namespace gmx::internal +{ +//! Shorthand alias to create a placeholder SYCL accessor with chosen data type and access mode. +template +using PlaceholderAccessor = + cl::sycl::accessor; +} // namespace gmx::internal + +/** \brief + * Thin wrapper around placeholder accessor that allows implicit construction from \c DeviceBuffer. + * + * "Placeholder accessor" is an indicator of the intent to create an accessor for certain buffer + * of a certain type, that is not yet bound to a specific command group handler (device). Such + * accessors can be created outside SYCL kernels, which is helpful if we want to pass them as + * function arguments. + * + * \tparam T Type of buffer content. + * \tparam mode Access mode. + */ +template +class DeviceAccessor : public gmx::internal::PlaceholderAccessor +{ +public: + // Inherit all the constructors + using gmx::internal::PlaceholderAccessor::PlaceholderAccessor; + //! Construct Accessor from DeviceBuffer (must be initialized) + DeviceAccessor(DeviceBuffer& buffer) : + gmx::internal::PlaceholderAccessor(getSyclBuffer(buffer)) + { + } + +private: + //! Helper function to get sycl:buffer object from DeviceBuffer wrapper, with a sanity check. + static inline cl::sycl::buffer& getSyclBuffer(DeviceBuffer& buffer) + { + GMX_ASSERT(bool(buffer), "Trying to construct accessor from an uninitialized buffer"); + return *buffer.buffer_; + } +}; + +namespace gmx::internal +{ +//! A "blackhole" class to be used when we want to ignore an argument to a function. +struct EmptyClassThatIgnoresConstructorArguments +{ + template + [[maybe_unused]] EmptyClassThatIgnoresConstructorArguments(Args&&... /*args*/) + { + } +}; +} // namespace gmx::internal + +/** \brief + * Helper class to be used as function argument. Will either correspond to a device accessor, or an empty class. + * + * Example usage: + * \code + template + void getBarKernel(handler& cgh, OptionalAccessor a_fooPrms) + { + if constexpr (doFoo) + cgh.require(a_fooPrms); + // Can only use a_fooPrms if doFoo == true + } + + template + void callBar(DeviceBuffer b_fooPrms) + { + // If doFoo is false, b_fooPrms will be ignored (can be not initialized). + // Otherwise, an accessor will be built (b_fooPrms must be a valid buffer). + auto kernel = getBarKernel(b_fooPrms); + // If the accessor in not enabled, anything can be passed as its ctor argument. + auto kernel2 = getBarKernel(nullptr_t); + } + * \endcode + * + * \tparam T Data type of the underlying buffer + * \tparam mode Access mode of the accessor + * \tparam enabled Compile-time flag indicating whether we want to actually create an accessor. + */ +template +using OptionalAccessor = + std::conditional_t, gmx::internal::EmptyClassThatIgnoresConstructorArguments>; + #endif // #ifndef DOXYGEN +/*! \brief Check the validity of the device buffer. + * + * Checks if the buffer is valid and if its allocation is big enough. + * + * \param[in] buffer Device buffer to be checked. + * \param[in] requiredSize Number of elements that the buffer will have to accommodate. + * + * \returns Whether the device buffer exists and has enough capacity. + */ +template +static gmx_unused bool checkDeviceBuffer(const DeviceBuffer& buffer, int requiredSize) +{ + return buffer.buffer_ && (static_cast(buffer.buffer_->get_count()) >= requiredSize); +} + /*! \libinternal \brief * Allocates a device-side buffer. * It is currently a caller's responsibility to call it only on not-yet allocated buffers. @@ -182,9 +282,11 @@ void copyToDeviceBuffer(DeviceBuffer* buffer, return; // such calls are actually made with empty domains } GMX_ASSERT(buffer, "needs a buffer pointer"); - GMX_ASSERT(buffer->buffer_, "needs an initialized buffer pointer"); GMX_ASSERT(hostBuffer, "needs a host buffer pointer"); + GMX_ASSERT(checkDeviceBuffer(*buffer, startingOffset + numValues), + "buffer too small or not initialized"); + cl::sycl::buffer& syclBuffer = *buffer->buffer_; cl::sycl::event ev = deviceStream.stream().submit([&](cl::sycl::handler& cgh) { @@ -234,6 +336,9 @@ void copyFromDeviceBuffer(ValueType* hostBuffer, GMX_ASSERT(buffer, "needs a buffer pointer"); GMX_ASSERT(hostBuffer, "needs a host buffer pointer"); + GMX_ASSERT(checkDeviceBuffer(*buffer, startingOffset + numValues), + "buffer too small or not initialized"); + cl::sycl::buffer& syclBuffer = *buffer->buffer_; cl::sycl::event ev = deviceStream.stream().submit([&](cl::sycl::handler& cgh) { @@ -269,6 +374,9 @@ void clearDeviceBufferAsync(DeviceBuffer* buffer, } GMX_ASSERT(buffer, "needs a buffer pointer"); + GMX_ASSERT(checkDeviceBuffer(*buffer, startingOffset + numValues), + "buffer too small or not initialized"); + const ValueType pattern{}; cl::sycl::buffer& syclBuffer = *(buffer->buffer_); @@ -280,21 +388,6 @@ void clearDeviceBufferAsync(DeviceBuffer* buffer, }); } -/*! \brief Check the validity of the device buffer. - * - * Checks if the buffer is valid and if its allocation is big enough. - * - * \param[in] buffer Device buffer to be checked. - * \param[in] requiredSize Number of elements that the buffer will have to accommodate. - * - * \returns Whether the device buffer exists and has enough capacity. - */ -template -static gmx_unused bool checkDeviceBuffer(DeviceBuffer buffer, int requiredSize) -{ - return buffer.buffer_ && (static_cast(buffer.buffer_->get_count()) >= requiredSize); -} - /*! \brief Create a texture object for an array of type ValueType. * * Creates the device buffer and copies read-only data for an array of type ValueType. diff --git a/src/gromacs/gpu_utils/gputraits_sycl.h b/src/gromacs/gpu_utils/gputraits_sycl.h index ab121c8858..52e40bb836 100644 --- a/src/gromacs/gpu_utils/gputraits_sycl.h +++ b/src/gromacs/gpu_utils/gputraits_sycl.h @@ -53,6 +53,15 @@ using DeviceTexture = void*; //! \brief Single GPU call timing event, not used with SYCL using CommandEvent = void*; +//! Convenience alias. +using float4 = cl::sycl::float4; + +//! Convenience alias. Not using cl::sycl::float3 due to alignment issues. +using float3 = gmx::RVec; + +//! Convenience alias for cl::sycl::float2 +using float2 = cl::sycl::float2; + /*! \internal \brief * GPU kernels scheduling description. This is same in OpenCL/CUDA. * Provides reasonable defaults, one typically only needs to set the GPU stream diff --git a/src/gromacs/mdlib/CMakeLists.txt b/src/gromacs/mdlib/CMakeLists.txt index 6cb02ee517..949a305797 100644 --- a/src/gromacs/mdlib/CMakeLists.txt +++ b/src/gromacs/mdlib/CMakeLists.txt @@ -34,6 +34,8 @@ # the research papers on the package. Check out http://www.gromacs.org. file(GLOB MDLIB_SOURCES *.cpp) +# To avoid listing all the necessary files manually, we will remove SYCL-specfific one here: +list(REMOVE_ITEM MDLIB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu_sycl.cpp) set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE) if (BUILD_TESTING) @@ -48,4 +50,11 @@ if(GMX_GPU_CUDA) gpuforcereduction_impl.cu ) endif() - +if(GMX_GPU_SYCL) + gmx_add_libgromacs_sources( + leapfrog_gpu_sycl.cpp + ) + _gmx_add_files_to_property(SYCL_SOURCES + leapfrog_gpu_sycl.cpp + ) +endif() diff --git a/src/gromacs/mdlib/leapfrog_gpu.cu b/src/gromacs/mdlib/leapfrog_gpu.cu index 1b5ec5f42b..1793a386b2 100644 --- a/src/gromacs/mdlib/leapfrog_gpu.cu +++ b/src/gromacs/mdlib/leapfrog_gpu.cu @@ -97,7 +97,6 @@ constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock; * \param[in] dt Timestep. * \param[in] gm_lambdas Temperature scaling factors (one per group) * \param[in] gm_tempScaleGroups Mapping of atoms into groups. - * \param[in] dtPressureCouple Time step for pressure coupling * \param[in] prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix */ template @@ -138,6 +137,7 @@ __launch_bounds__(c_maxThreadsPerBlock) __global__ // Swapping places for xp and x so that the x will contain the updated coordinates and xp - the // coordinates before update. This should be taken into account when (if) constraints are applied // after the update: x and xp have to be passed to constraints in the 'wrong' order. + // TODO: Issue #3727 gm_xp[threadIndex] = x; if (numTempScaleValues != NumTempScaleValues::None || velocityScaling != VelocityScalingType::None) diff --git a/src/gromacs/mdlib/leapfrog_gpu.h b/src/gromacs/mdlib/leapfrog_gpu.h index 8bcf5e37a5..7c76805d09 100644 --- a/src/gromacs/mdlib/leapfrog_gpu.h +++ b/src/gromacs/mdlib/leapfrog_gpu.h @@ -50,6 +50,10 @@ # include "gromacs/gpu_utils/devicebuffer.cuh" # include "gromacs/gpu_utils/gputraits.cuh" #endif +#if GMX_GPU_SYCL +# include "gromacs/gpu_utils/devicebuffer_sycl.h" +# include "gromacs/gpu_utils/gputraits_sycl.h" +#endif #include "gromacs/gpu_utils/hostallocator.h" #include "gromacs/pbcutil/pbc.h" @@ -72,9 +76,10 @@ namespace gmx */ enum class NumTempScaleValues { - None, //!< No temperature coupling - Single, //!< Single T-scaling value (one group) - Multiple //!< Multiple T-scaling values, need to use T-group indices + None = 0, //!< No temperature coupling + Single = 1, //!< Single T-scaling value (one group) + Multiple = 2, //!< Multiple T-scaling values, need to use T-group indices + Count = 3 //!< Number of valid values }; /*! \brief Different variants of the Parrinello-Rahman velocity scaling @@ -84,8 +89,9 @@ enum class NumTempScaleValues */ enum class VelocityScalingType { - None, //!< Do not apply velocity scaling (not a PR-coupling run or step) - Diagonal, //!< Apply velocity scaling using a diagonal matrix + None = 0, //!< Do not apply velocity scaling (not a PR-coupling run or step) + Diagonal = 1, //!< Apply velocity scaling using a diagonal matrix + Count = 2 //!< Number of valid values }; class LeapFrogGpu @@ -166,7 +172,8 @@ private: //! Number of temperature coupling groups (zero = no coupling) int numTempScaleValues_ = 0; /*! \brief Array with temperature scaling factors. - * This is temporary solution to remap data from t_grp_tcstat into plain array + * This is temporary solution to remap data from t_grp_tcstat into plain array. + * Not used in SYCL. * \todo Replace with better solution. */ gmx::HostVector h_lambdas_; diff --git a/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp b/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp new file mode 100644 index 0000000000..61484f8968 --- /dev/null +++ b/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp @@ -0,0 +1,307 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements Leap-Frog using SYCL + * + * This file contains implementation of basic Leap-Frog integrator + * using SYCL, including class initialization, data-structures management + * and GPU kernel. + * + * \author Artem Zhmurov + * \author Andrey Alekseenko + * + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "gromacs/gpu_utils/devicebuffer.h" +#include "gromacs/gpu_utils/gmxsycl.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/leapfrog_gpu.h" +#include "gromacs/mdtypes/group.h" +#include "gromacs/utility/arrayref.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/template_mp.h" + +namespace gmx +{ + +using cl::sycl::access::mode; + +/*! \brief Main kernel for the Leap-Frog integrator. + * + * The coordinates and velocities are updated on the GPU. Also saves the intermediate values of the coordinates for + * further use in constraints. + * + * Each GPU thread works with a single particle. + * + * \tparam numTempScaleValues The number of different T-couple values. + * \tparam velocityScaling Type of the Parrinello-Rahman velocity rescaling. + * \param cgh SYCL's command group handler. + * \param[in,out] a_x Coordinates to update upon integration. + * \param[out] a_xp A copy of the coordinates before the integration (for constraints). + * \param[in,out] a_v Velocities to update. + * \param[in] a_f Atomic forces. + * \param[in] a_inverseMasses Reciprocal masses. + * \param[in] dt Timestep. + * \param[in] a_lambdas Temperature scaling factors (one per group). + * \param[in] a_tempScaleGroups Mapping of atoms into groups. + * \param[in] prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix + */ +template +auto leapFrogKernel( + cl::sycl::handler& cgh, + DeviceAccessor a_x, + DeviceAccessor a_xp, + DeviceAccessor a_v, + DeviceAccessor a_f, + DeviceAccessor a_inverseMasses, + float dt, + OptionalAccessor a_lambdas, + OptionalAccessor a_tempScaleGroups, + float3 prVelocityScalingMatrixDiagonal) +{ + cgh.require(a_x); + cgh.require(a_xp); + cgh.require(a_v); + cgh.require(a_f); + cgh.require(a_inverseMasses); + if constexpr (numTempScaleValues != NumTempScaleValues::None) + { + cgh.require(a_lambdas); + } + if constexpr (numTempScaleValues == NumTempScaleValues::Multiple) + { + cgh.require(a_tempScaleGroups); + } + + return [=](cl::sycl::id<1> itemIdx) { + const float3 x = a_x[itemIdx]; + const float3 v = a_v[itemIdx]; + const float3 f = a_f[itemIdx]; + const float im = a_inverseMasses[itemIdx]; + const float imdt = im * dt; + + // Swapping places for xp and x so that the x will contain the updated coordinates and xp - + // the coordinates before update. This should be taken into account when (if) constraints + // are applied after the update: x and xp have to be passed to constraints in the 'wrong' + // order. See Issue #3727 + a_xp[itemIdx] = x; + + const float lambda = [=]() { + if constexpr (numTempScaleValues == NumTempScaleValues::None) + { + return 1.0F; + } + else if constexpr (numTempScaleValues == NumTempScaleValues::Single) + { + return a_lambdas[0]; + } + else if constexpr (numTempScaleValues == NumTempScaleValues::Multiple) + { + const int tempScaleGroup = a_tempScaleGroups[itemIdx]; + return a_lambdas[tempScaleGroup]; + } + }(); + + const float3 prVelocityDelta = [=]() { + if constexpr (velocityScaling == VelocityScalingType::Diagonal) + { + return float3{ prVelocityScalingMatrixDiagonal[0] * v[0], + prVelocityScalingMatrixDiagonal[1] * v[1], + prVelocityScalingMatrixDiagonal[2] * v[2] }; + } + else if constexpr (velocityScaling == VelocityScalingType::None) + { + return float3{ 0, 0, 0 }; + } + }(); + + const float3 v_new = v * lambda - prVelocityDelta + f * imdt; + a_v[itemIdx] = v_new; + a_x[itemIdx] = x + v_new * dt; + }; +} + +// SYCL 1.2.1 requires providing a unique type for a kernel. Should not be needed for SYCL2020. +template +class LeapFrogKernelName; + +template +static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, int numAtoms, Args&&... args) +{ + // Should not be needed for SYCL2020. + using kernelNameType = LeapFrogKernelName; + + const cl::sycl::range<1> rangeAllAtoms(numAtoms); + cl::sycl::queue q = deviceStream.stream(); + + cl::sycl::event e = q.submit([&](cl::sycl::handler& cgh) { + auto kernel = + leapFrogKernel(cgh, std::forward(args)...); + cgh.parallel_for(rangeAllAtoms, kernel); + }); + + return e; +} + +static NumTempScaleValues getTempScalingType(bool doTemperatureScaling, int numTempScaleValues) +{ + if (!doTemperatureScaling) + { + return NumTempScaleValues::None; + } + else if (numTempScaleValues == 1) + { + return NumTempScaleValues::Single; + } + else if (numTempScaleValues > 1) + { + return NumTempScaleValues::Multiple; + } + else + { + gmx_incons("Temperature coupling was requested with no temperature coupling groups."); + } +} + +/*! \brief Select templated kernel and launch it. */ +template +static inline cl::sycl::event launchLeapFrogKernel(NumTempScaleValues tempScalingType, + VelocityScalingType prVelocityScalingType, + Args&&... args) +{ + GMX_ASSERT(prVelocityScalingType == VelocityScalingType::None + || prVelocityScalingType == VelocityScalingType::Diagonal, + "Only isotropic Parrinello-Rahman pressure coupling is supported."); + + return dispatchTemplatedFunction( + [&](auto tempScalingType_, auto prScalingType_) { + return launchLeapFrogKernel(std::forward(args)...); + }, + tempScalingType, prVelocityScalingType); +} + +void LeapFrogGpu::integrate(DeviceBuffer d_x, + DeviceBuffer d_xp, + DeviceBuffer d_v, + DeviceBuffer d_f, + const real dt, + const bool doTemperatureScaling, + gmx::ArrayRef tcstat, + const bool doParrinelloRahman, + const float dtPressureCouple, + const matrix prVelocityScalingMatrix) +{ + if (doTemperatureScaling) + { + GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_), + "Number of temperature scaling factors changed since it was set for the " + "last time."); + { // Explicitly limiting the scope of host accessor. Not strictly necessary here. + auto ha_lambdas_ = d_lambdas_.buffer_->get_access(); + for (int i = 0; i < numTempScaleValues_; i++) + { + ha_lambdas_[i] = tcstat[i].lambda; + } + } + } + NumTempScaleValues tempVelocityScalingType = + getTempScalingType(doTemperatureScaling, numTempScaleValues_); + + VelocityScalingType prVelocityScalingType = VelocityScalingType::None; + if (doParrinelloRahman) + { + prVelocityScalingType = VelocityScalingType::Diagonal; + GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0 + && prVelocityScalingMatrix[ZZ][YY] == 0 && prVelocityScalingMatrix[XX][YY] == 0 + && prVelocityScalingMatrix[XX][ZZ] == 0 && prVelocityScalingMatrix[YY][ZZ] == 0, + "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported " + "in GPU version of Leap-Frog integrator."); + prVelocityScalingMatrixDiagonal_ = + dtPressureCouple + * float3{ prVelocityScalingMatrix[XX][XX], prVelocityScalingMatrix[YY][YY], + prVelocityScalingMatrix[ZZ][ZZ] }; + } + + launchLeapFrogKernel(tempVelocityScalingType, prVelocityScalingType, deviceStream_, numAtoms_, + d_x, d_xp, d_v, d_f, d_inverseMasses_, dt, d_lambdas_, d_tempScaleGroups_, + prVelocityScalingMatrixDiagonal_); +} + +LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext, const DeviceStream& deviceStream) : + deviceContext_(deviceContext), + deviceStream_(deviceStream), + numAtoms_(0) +{ +} + +LeapFrogGpu::~LeapFrogGpu() +{ + freeDeviceBuffer(&d_inverseMasses_); +} + +void LeapFrogGpu::set(const int numAtoms, + const real* inverseMasses, + const int numTempScaleValues, + const unsigned short* tempScaleGroups) +{ + numAtoms_ = numAtoms; + numTempScaleValues_ = numTempScaleValues; + + reallocateDeviceBuffer(&d_inverseMasses_, numAtoms_, &numInverseMasses_, + &numInverseMassesAlloc_, deviceContext_); + copyToDeviceBuffer(&d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, + GpuApiCallBehavior::Sync, nullptr); + + // Temperature scale group map only used if there are more then one group + if (numTempScaleValues_ > 1) + { + reallocateDeviceBuffer(&d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, + &numTempScaleGroupsAlloc_, deviceContext_); + copyToDeviceBuffer(&d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_, + GpuApiCallBehavior::Sync, nullptr); + } + + // If the temperature coupling is enabled, we need to make space for scaling factors + if (numTempScaleValues_ > 0) + { + reallocateDeviceBuffer(&d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, + deviceContext_); + } +} + +} // namespace gmx diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners.h b/src/gromacs/mdlib/tests/leapfrogtestrunners.h index 599cc63517..7590aa2dfa 100644 --- a/src/gromacs/mdlib/tests/leapfrogtestrunners.h +++ b/src/gromacs/mdlib/tests/leapfrogtestrunners.h @@ -54,7 +54,7 @@ #include "leapfrogtestdata.h" -#define HAVE_GPU_LEAPFROG (GMX_GPU_CUDA) +#define HAVE_GPU_LEAPFROG (GMX_GPU_CUDA || GMX_GPU_SYCL) namespace gmx { diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp b/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp index 5783b66332..1e102035d6 100644 --- a/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp +++ b/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp @@ -50,6 +50,12 @@ #if GMX_GPU_CUDA # include "gromacs/gpu_utils/devicebuffer.cuh" +#endif +#if GMX_GPU_SYCL +# include "gromacs/gpu_utils/devicebuffer_sycl.h" +#endif + +#if HAVE_GPU_LEAPFROG # include "gromacs/mdlib/leapfrog_gpu.h" #endif @@ -70,6 +76,8 @@ void LeapFrogDeviceTestRunner::integrate(LeapFrogTestData* testData, int numStep int numAtoms = testData->numAtoms_; + static_assert(sizeof(float3) == sizeof(*testData->x_.data()), "Incompatible types"); + float3* h_x = reinterpret_cast(testData->x_.data()); float3* h_xp = reinterpret_cast(testData->xPrime_.data()); float3* h_v = reinterpret_cast(testData->v_.data()); -- 2.22.0