2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Implements Leap-Frog using SYCL
39 * This file contains implementation of basic Leap-Frog integrator
40 * using SYCL, including class initialization, data-structures management
43 * \author Artem Zhmurov <zhmurov@gmail.com>
44 * \author Andrey Alekseenko <al42and@gmail.com>
46 * \ingroup module_mdlib
50 #include "gromacs/gpu_utils/devicebuffer.h"
51 #include "gromacs/gpu_utils/gmxsycl.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdlib/leapfrog_gpu.h"
54 #include "gromacs/mdtypes/group.h"
55 #include "gromacs/utility/arrayref.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/template_mp.h"
62 using cl::sycl::access::mode;
64 /*! \brief Main kernel for the Leap-Frog integrator.
66 * The coordinates and velocities are updated on the GPU. Also saves the intermediate values of the coordinates for
67 * further use in constraints.
69 * Each GPU thread works with a single particle.
71 * \tparam numTempScaleValues The number of different T-couple values.
72 * \tparam velocityScaling Type of the Parrinello-Rahman velocity rescaling.
73 * \param cgh SYCL's command group handler.
74 * \param[in,out] a_x Coordinates to update upon integration.
75 * \param[out] a_xp A copy of the coordinates before the integration (for constraints).
76 * \param[in,out] a_v Velocities to update.
77 * \param[in] a_f Atomic forces.
78 * \param[in] a_inverseMasses Reciprocal masses.
79 * \param[in] dt Timestep.
80 * \param[in] a_lambdas Temperature scaling factors (one per group).
81 * \param[in] a_tempScaleGroups Mapping of atoms into groups.
82 * \param[in] prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix
84 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
86 cl::sycl::handler& cgh,
87 DeviceAccessor<Float3, mode::read_write> a_x,
88 DeviceAccessor<Float3, mode::discard_write> a_xp,
89 DeviceAccessor<Float3, mode::read_write> a_v,
90 DeviceAccessor<Float3, mode::read> a_f,
91 DeviceAccessor<float, mode::read> a_inverseMasses,
93 OptionalAccessor<float, mode::read, numTempScaleValues != NumTempScaleValues::None> a_lambdas,
94 OptionalAccessor<unsigned short, mode::read, numTempScaleValues == NumTempScaleValues::Multiple> a_tempScaleGroups,
95 Float3 prVelocityScalingMatrixDiagonal)
101 cgh.require(a_inverseMasses);
102 if constexpr (numTempScaleValues != NumTempScaleValues::None)
104 cgh.require(a_lambdas);
106 if constexpr (numTempScaleValues == NumTempScaleValues::Multiple)
108 cgh.require(a_tempScaleGroups);
111 return [=](cl::sycl::id<1> itemIdx) {
112 const Float3 x = a_x[itemIdx];
113 const Float3 v = a_v[itemIdx];
114 const Float3 f = a_f[itemIdx];
115 const float im = a_inverseMasses[itemIdx];
116 const float imdt = im * dt;
118 // Swapping places for xp and x so that the x will contain the updated coordinates and xp -
119 // the coordinates before update. This should be taken into account when (if) constraints
120 // are applied after the update: x and xp have to be passed to constraints in the 'wrong'
121 // order. See Issue #3727
124 const float lambda = [=]() {
125 if constexpr (numTempScaleValues == NumTempScaleValues::None)
129 else if constexpr (numTempScaleValues == NumTempScaleValues::Single)
133 else if constexpr (numTempScaleValues == NumTempScaleValues::Multiple)
135 const int tempScaleGroup = a_tempScaleGroups[itemIdx];
136 return a_lambdas[tempScaleGroup];
140 const Float3 prVelocityDelta = [=]() {
141 if constexpr (velocityScaling == VelocityScalingType::Diagonal)
143 return Float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
144 prVelocityScalingMatrixDiagonal[1] * v[1],
145 prVelocityScalingMatrixDiagonal[2] * v[2] };
147 else if constexpr (velocityScaling == VelocityScalingType::None)
149 return Float3{ 0, 0, 0 };
153 const Float3 v_new = v * lambda - prVelocityDelta + f * imdt;
154 a_v[itemIdx] = v_new;
155 a_x[itemIdx] = x + v_new * dt;
159 // SYCL 1.2.1 requires providing a unique type for a kernel. Should not be needed for SYCL2020.
160 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
161 class LeapFrogKernelName;
163 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling, class... Args>
164 static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, int numAtoms, Args&&... args)
166 // Should not be needed for SYCL2020.
167 using kernelNameType = LeapFrogKernelName<numTempScaleValues, velocityScaling>;
169 const cl::sycl::range<1> rangeAllAtoms(numAtoms);
170 cl::sycl::queue q = deviceStream.stream();
172 cl::sycl::event e = q.submit([&](cl::sycl::handler& cgh) {
174 leapFrogKernel<numTempScaleValues, velocityScaling>(cgh, std::forward<Args>(args)...);
175 cgh.parallel_for<kernelNameType>(rangeAllAtoms, kernel);
181 static NumTempScaleValues getTempScalingType(bool doTemperatureScaling, int numTempScaleValues)
183 if (!doTemperatureScaling)
185 return NumTempScaleValues::None;
187 else if (numTempScaleValues == 1)
189 return NumTempScaleValues::Single;
191 else if (numTempScaleValues > 1)
193 return NumTempScaleValues::Multiple;
197 gmx_incons("Temperature coupling was requested with no temperature coupling groups.");
201 /*! \brief Select templated kernel and launch it. */
202 template<class... Args>
203 static inline cl::sycl::event launchLeapFrogKernel(NumTempScaleValues tempScalingType,
204 VelocityScalingType prVelocityScalingType,
207 GMX_ASSERT(prVelocityScalingType == VelocityScalingType::None
208 || prVelocityScalingType == VelocityScalingType::Diagonal,
209 "Only isotropic Parrinello-Rahman pressure coupling is supported.");
211 return dispatchTemplatedFunction(
212 [&](auto tempScalingType_, auto prScalingType_) {
213 return launchLeapFrogKernel<tempScalingType_, prScalingType_>(std::forward<Args>(args)...);
216 prVelocityScalingType);
219 void LeapFrogGpu::integrate(DeviceBuffer<Float3> d_x,
220 DeviceBuffer<Float3> d_xp,
221 DeviceBuffer<Float3> d_v,
222 DeviceBuffer<Float3> d_f,
224 const bool doTemperatureScaling,
225 gmx::ArrayRef<const t_grp_tcstat> tcstat,
226 const bool doParrinelloRahman,
227 const float dtPressureCouple,
228 const matrix prVelocityScalingMatrix)
230 if (doTemperatureScaling)
232 GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_),
233 "Number of temperature scaling factors changed since it was set for the "
235 GMX_RELEASE_ASSERT(gmx::ssize(h_lambdas_) == numTempScaleValues_,
236 "Number of temperature scaling factors changed since it was set for the "
238 /* We could use host accessors here, without h_lambdas_.
239 * According to a quick test, host accessor is slightly faster when using DPC++ and
240 * LevelZero compared to using h_lambdas_ + cgh.copy. But with DPC++ and OpenCL, the host
241 * accessor waits for fReadyOnDevice in UpdateConstrainGpu::Impl::integrate. See #4023. */
243 for (int i = 0; i < numTempScaleValues_; i++)
245 h_lambdas_[i] = tcstat[i].lambda;
247 copyToDeviceBuffer(&d_lambdas_,
252 GpuApiCallBehavior::Async,
255 NumTempScaleValues tempVelocityScalingType =
256 getTempScalingType(doTemperatureScaling, numTempScaleValues_);
258 VelocityScalingType prVelocityScalingType = VelocityScalingType::None;
259 if (doParrinelloRahman)
261 prVelocityScalingType = VelocityScalingType::Diagonal;
262 GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0
263 && prVelocityScalingMatrix[ZZ][YY] == 0 && prVelocityScalingMatrix[XX][YY] == 0
264 && prVelocityScalingMatrix[XX][ZZ] == 0 && prVelocityScalingMatrix[YY][ZZ] == 0,
265 "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
266 "in GPU version of Leap-Frog integrator.");
267 prVelocityScalingMatrixDiagonal_ = dtPressureCouple
268 * Float3{ prVelocityScalingMatrix[XX][XX],
269 prVelocityScalingMatrix[YY][YY],
270 prVelocityScalingMatrix[ZZ][ZZ] };
273 launchLeapFrogKernel(tempVelocityScalingType,
274 prVelocityScalingType,
285 prVelocityScalingMatrixDiagonal_);
288 LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext,
289 const DeviceStream& deviceStream,
290 const int numTempScaleValues) :
291 deviceContext_(deviceContext),
292 deviceStream_(deviceStream),
294 numTempScaleValues_(numTempScaleValues)
296 // If the temperature coupling is enabled, we need to make space for scaling factors
297 if (numTempScaleValues_ > 0)
299 h_lambdas_.resize(numTempScaleValues_);
300 reallocateDeviceBuffer(
301 &d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, deviceContext_);
305 LeapFrogGpu::~LeapFrogGpu()
307 freeDeviceBuffer(&d_inverseMasses_);
310 void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsigned short* tempScaleGroups)
312 numAtoms_ = numAtoms;
314 reallocateDeviceBuffer(
315 &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
317 &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
319 // Temperature scale group map only used if there are more then one group
320 if (numTempScaleValues_ > 1)
322 reallocateDeviceBuffer(
323 &d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, deviceContext_);
325 &d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);