2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Implements Leap-Frog using CUDA
39 * This file contains CUDA implementation of back-end specific code for Leap-Frog.
41 * \author Artem Zhmurov <zhmurov@gmail.com>
43 * \ingroup module_mdlib
47 #include "leapfrog_gpu_internal.h"
49 #include "gromacs/gpu_utils/cudautils.cuh"
50 #include "gromacs/gpu_utils/devicebuffer.h"
51 #include "gromacs/gpu_utils/typecasts.cuh"
52 #include "gromacs/gpu_utils/vectype_ops.cuh"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/group.h"
55 #include "gromacs/pbcutil/pbc.h"
56 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
57 #include "gromacs/utility/arrayref.h"
62 /*!\brief Number of CUDA threads in a block
64 * \todo Check if using smaller block size will lead to better performance.
66 constexpr static int c_threadsPerBlock = 256;
67 //! Maximum number of threads in a block (for __launch_bounds__)
68 constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
70 /*! \brief Main kernel for Leap-Frog integrator.
72 * The coordinates and velocities are updated on the GPU. Also saves the intermediate values of the coordinates for
73 * further use in constraints.
75 * Each GPU thread works with a single particle. Empty declaration is needed to
76 * avoid "no previous prototype for function" clang warning.
78 * \todo Check if the force should be set to zero here.
79 * \todo This kernel can also accumulate incidental temperatures for each atom.
81 * \tparam numTempScaleValues The number of different T-couple values.
82 * \tparam velocityScaling Type of the Parrinello-Rahman velocity rescaling.
83 * \param[in] numAtoms Total number of atoms.
84 * \param[in,out] gm_x Coordinates to update upon integration.
85 * \param[out] gm_xp A copy of the coordinates before the integration (for constraints).
86 * \param[in,out] gm_v Velocities to update.
87 * \param[in] gm_f Atomic forces.
88 * \param[in] gm_inverseMasses Reciprocal masses.
89 * \param[in] dt Timestep.
90 * \param[in] gm_lambdas Temperature scaling factors (one per group)
91 * \param[in] gm_tempScaleGroups Mapping of atoms into groups.
92 * \param[in] prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix
94 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
95 __launch_bounds__(c_maxThreadsPerBlock) __global__
96 void leapfrog_kernel(const int numAtoms,
97 float3* __restrict__ gm_x,
98 float3* __restrict__ gm_xp,
99 float3* __restrict__ gm_v,
100 const float3* __restrict__ gm_f,
101 const float* __restrict__ gm_inverseMasses,
103 const float* __restrict__ gm_lambdas,
104 const unsigned short* __restrict__ gm_tempScaleGroups,
105 const float3 prVelocityScalingMatrixDiagonal)
107 int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
108 if (threadIndex < numAtoms)
110 float3 x = gm_x[threadIndex];
111 float3 v = gm_v[threadIndex];
112 float3 f = gm_f[threadIndex];
113 float im = gm_inverseMasses[threadIndex];
114 float imdt = im * dt;
116 // Swapping places for xp and x so that the x will contain the updated coordinates and xp - the
117 // coordinates before update. This should be taken into account when (if) constraints are applied
118 // after the update: x and xp have to be passed to constraints in the 'wrong' order.
120 gm_xp[threadIndex] = x;
122 if (numTempScaleValues != NumTempScaleValues::None || velocityScaling != VelocityScalingType::None)
126 if (numTempScaleValues != NumTempScaleValues::None)
129 if (numTempScaleValues == NumTempScaleValues::Single)
131 lambda = gm_lambdas[0];
133 else if (numTempScaleValues == NumTempScaleValues::Multiple)
135 int tempScaleGroup = gm_tempScaleGroups[threadIndex];
136 lambda = gm_lambdas[tempScaleGroup];
141 if (velocityScaling == VelocityScalingType::Diagonal)
143 vp.x -= prVelocityScalingMatrixDiagonal.x * v.x;
144 vp.y -= prVelocityScalingMatrixDiagonal.y * v.y;
145 vp.z -= prVelocityScalingMatrixDiagonal.z * v.z;
154 gm_v[threadIndex] = v;
155 gm_x[threadIndex] = x;
159 /*! \brief Select templated kernel.
161 * Returns pointer to a CUDA kernel based on the number of temperature coupling groups and
162 * whether or not the temperature and(or) pressure coupling is enabled.
164 * \param[in] doTemperatureScaling If the kernel with temperature coupling velocity scaling
165 * should be selected.
166 * \param[in] numTempScaleValues Number of temperature coupling groups in the system.
167 * \param[in] prVelocityScalingType Type of the Parrinello-Rahman velocity scaling.
169 * \retrun Pointer to CUDA kernel
171 inline auto selectLeapFrogKernelPtr(bool doTemperatureScaling,
172 int numTempScaleValues,
173 VelocityScalingType prVelocityScalingType)
175 // Check input for consistency: if there is temperature coupling, at least one coupling group should be defined.
176 GMX_ASSERT(!doTemperatureScaling || (numTempScaleValues > 0),
177 "Temperature coupling was requested with no temperature coupling groups.");
178 auto kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
180 if (prVelocityScalingType == VelocityScalingType::None)
182 if (!doTemperatureScaling)
184 kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
186 else if (numTempScaleValues == 1)
188 kernelPtr = leapfrog_kernel<NumTempScaleValues::Single, VelocityScalingType::None>;
190 else if (numTempScaleValues > 1)
192 kernelPtr = leapfrog_kernel<NumTempScaleValues::Multiple, VelocityScalingType::None>;
195 else if (prVelocityScalingType == VelocityScalingType::Diagonal)
197 if (!doTemperatureScaling)
199 kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::Diagonal>;
201 else if (numTempScaleValues == 1)
203 kernelPtr = leapfrog_kernel<NumTempScaleValues::Single, VelocityScalingType::Diagonal>;
205 else if (numTempScaleValues > 1)
207 kernelPtr = leapfrog_kernel<NumTempScaleValues::Multiple, VelocityScalingType::Diagonal>;
212 GMX_RELEASE_ASSERT(false,
213 "Only isotropic Parrinello-Rahman pressure coupling is supported.");
219 void launchLeapFrogKernel(const int numAtoms,
220 DeviceBuffer<Float3> d_x,
221 DeviceBuffer<Float3> d_xp,
222 DeviceBuffer<Float3> d_v,
223 const DeviceBuffer<Float3> d_f,
224 const DeviceBuffer<float> d_inverseMasses,
226 const bool doTemperatureScaling,
227 const int numTempScaleValues,
228 const DeviceBuffer<unsigned short> d_tempScaleGroups,
229 const DeviceBuffer<float> d_lambdas,
230 const VelocityScalingType prVelocityScalingType,
231 const Float3 prVelocityScalingMatrixDiagonal,
232 const DeviceStream& deviceStream)
234 // Checking the buffer types against the kernel argument types
235 static_assert(sizeof(*d_inverseMasses) == sizeof(float), "Incompatible types");
237 KernelLaunchConfig kernelLaunchConfig;
239 kernelLaunchConfig.gridSize[0] = (numAtoms + c_threadsPerBlock - 1) / c_threadsPerBlock;
240 kernelLaunchConfig.blockSize[0] = c_threadsPerBlock;
241 kernelLaunchConfig.blockSize[1] = 1;
242 kernelLaunchConfig.blockSize[2] = 1;
243 kernelLaunchConfig.sharedMemorySize = 0;
246 selectLeapFrogKernelPtr(doTemperatureScaling, numTempScaleValues, prVelocityScalingType);
248 const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
251 asFloat3Pointer(&d_x),
252 asFloat3Pointer(&d_xp),
253 asFloat3Pointer(&d_v),
254 asFloat3Pointer(&d_f),
259 &prVelocityScalingMatrixDiagonal);
260 launchGpuKernel(kernelPtr, kernelLaunchConfig, deviceStream, nullptr, "leapfrog_kernel", kernelArgs);