src/gromacs/mdlib/leapfrog_gpu_internal.cu

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *
  37  * \brief Implements Leap-Frog using CUDA
  38  *
  39  * This file contains CUDA implementation of back-end specific code for Leap-Frog.
  40  *
  41  * \author Artem Zhmurov <zhmurov@gmail.com>
  42  *
  43  * \ingroup module_mdlib
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "leapfrog_gpu_internal.h"
  48
  49 #include "gromacs/gpu_utils/cudautils.cuh"
  50 #include "gromacs/gpu_utils/devicebuffer.h"
  51 #include "gromacs/gpu_utils/typecasts.cuh"
  52 #include "gromacs/gpu_utils/vectype_ops.cuh"
  53 #include "gromacs/math/vec.h"
  54 #include "gromacs/mdtypes/group.h"
  55 #include "gromacs/pbcutil/pbc.h"
  56 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
  57 #include "gromacs/utility/arrayref.h"
  58
  59 namespace gmx
  60 {
  61
  62 /*!\brief Number of CUDA threads in a block
  63  *
  64  * \todo Check if using smaller block size will lead to better performance.
  65  */
  66 constexpr static int c_threadsPerBlock = 256;
  67 //! Maximum number of threads in a block (for __launch_bounds__)
  68 constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
  69
  70 /*! \brief Main kernel for Leap-Frog integrator.
  71  *
  72  *  The coordinates and velocities are updated on the GPU. Also saves the intermediate values of the coordinates for
  73  *   further use in constraints.
  74  *
  75  *  Each GPU thread works with a single particle. Empty declaration is needed to
  76  *  avoid "no previous prototype for function" clang warning.
  77  *
  78  *  \todo Check if the force should be set to zero here.
  79  *  \todo This kernel can also accumulate incidental temperatures for each atom.
  80  *
  81  * \tparam        numTempScaleValues               The number of different T-couple values.
  82  * \tparam        velocityScaling                  Type of the Parrinello-Rahman velocity rescaling.
  83  * \param[in]     numAtoms                         Total number of atoms.
  84  * \param[in,out] gm_x                             Coordinates to update upon integration.
  85  * \param[out]    gm_xp                            A copy of the coordinates before the integration (for constraints).
  86  * \param[in,out] gm_v                             Velocities to update.
  87  * \param[in]     gm_f                             Atomic forces.
  88  * \param[in]     gm_inverseMasses                 Reciprocal masses.
  89  * \param[in]     dt                               Timestep.
  90  * \param[in]     gm_lambdas                       Temperature scaling factors (one per group)
  91  * \param[in]     gm_tempScaleGroups               Mapping of atoms into groups.
  92  * \param[in]     prVelocityScalingMatrixDiagonal  Diagonal elements of Parrinello-Rahman velocity scaling matrix
  93  */
  94 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
  95 __launch_bounds__(c_maxThreadsPerBlock) __global__
  96         void leapfrog_kernel(const int numAtoms,
  97                              float3* __restrict__ gm_x,
  98                              float3* __restrict__ gm_xp,
  99                              float3* __restrict__ gm_v,
 100                              const float3* __restrict__ gm_f,
 101                              const float* __restrict__ gm_inverseMasses,
 102                              const float dt,
 103                              const float* __restrict__ gm_lambdas,
 104                              const unsigned short* __restrict__ gm_tempScaleGroups,
 105                              const float3 prVelocityScalingMatrixDiagonal)
 106 {
 107     int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
 108     if (threadIndex < numAtoms)
 109     {
 110         float3 x    = gm_x[threadIndex];
 111         float3 v    = gm_v[threadIndex];
 112         float3 f    = gm_f[threadIndex];
 113         float  im   = gm_inverseMasses[threadIndex];
 114         float  imdt = im * dt;
 115
 116         // Swapping places for xp and x so that the x will contain the updated coordinates and xp - the
 117         // coordinates before update. This should be taken into account when (if) constraints are applied
 118         // after the update: x and xp have to be passed to constraints in the 'wrong' order.
 119         // TODO: Issue #3727
 120         gm_xp[threadIndex] = x;
 121
 122         if (numTempScaleValues != NumTempScaleValues::None || velocityScaling != VelocityScalingType::None)
 123         {
 124             float3 vp = v;
 125
 126             if (numTempScaleValues != NumTempScaleValues::None)
 127             {
 128                 float lambda = 1.0F;
 129                 if (numTempScaleValues == NumTempScaleValues::Single)
 130                 {
 131                     lambda = gm_lambdas[0];
 132                 }
 133                 else if (numTempScaleValues == NumTempScaleValues::Multiple)
 134                 {
 135                     int tempScaleGroup = gm_tempScaleGroups[threadIndex];
 136                     lambda             = gm_lambdas[tempScaleGroup];
 137                 }
 138                 vp *= lambda;
 139             }
 140
 141             if (velocityScaling == VelocityScalingType::Diagonal)
 142             {
 143                 vp.x -= prVelocityScalingMatrixDiagonal.x * v.x;
 144                 vp.y -= prVelocityScalingMatrixDiagonal.y * v.y;
 145                 vp.z -= prVelocityScalingMatrixDiagonal.z * v.z;
 146             }
 147
 148             v = vp;
 149         }
 150
 151         v += f * imdt;
 152
 153         x += v * dt;
 154         gm_v[threadIndex] = v;
 155         gm_x[threadIndex] = x;
 156     }
 157 }
 158
 159 /*! \brief Select templated kernel.
 160  *
 161  * Returns pointer to a CUDA kernel based on the number of temperature coupling groups and
 162  * whether or not the temperature and(or) pressure coupling is enabled.
 163  *
 164  * \param[in]  doTemperatureScaling   If the kernel with temperature coupling velocity scaling
 165  *                                    should be selected.
 166  * \param[in]  numTempScaleValues     Number of temperature coupling groups in the system.
 167  * \param[in]  prVelocityScalingType  Type of the Parrinello-Rahman velocity scaling.
 168  *
 169  * \retrun                         Pointer to CUDA kernel
 170  */
 171 inline auto selectLeapFrogKernelPtr(bool                doTemperatureScaling,
 172                                     int                 numTempScaleValues,
 173                                     VelocityScalingType prVelocityScalingType)
 174 {
 175     // Check input for consistency: if there is temperature coupling, at least one coupling group should be defined.
 176     GMX_ASSERT(!doTemperatureScaling || (numTempScaleValues > 0),
 177                "Temperature coupling was requested with no temperature coupling groups.");
 178     auto kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
 179
 180     if (prVelocityScalingType == VelocityScalingType::None)
 181     {
 182         if (!doTemperatureScaling)
 183         {
 184             kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
 185         }
 186         else if (numTempScaleValues == 1)
 187         {
 188             kernelPtr = leapfrog_kernel<NumTempScaleValues::Single, VelocityScalingType::None>;
 189         }
 190         else if (numTempScaleValues > 1)
 191         {
 192             kernelPtr = leapfrog_kernel<NumTempScaleValues::Multiple, VelocityScalingType::None>;
 193         }
 194     }
 195     else if (prVelocityScalingType == VelocityScalingType::Diagonal)
 196     {
 197         if (!doTemperatureScaling)
 198         {
 199             kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::Diagonal>;
 200         }
 201         else if (numTempScaleValues == 1)
 202         {
 203             kernelPtr = leapfrog_kernel<NumTempScaleValues::Single, VelocityScalingType::Diagonal>;
 204         }
 205         else if (numTempScaleValues > 1)
 206         {
 207             kernelPtr = leapfrog_kernel<NumTempScaleValues::Multiple, VelocityScalingType::Diagonal>;
 208         }
 209     }
 210     else
 211     {
 212         GMX_RELEASE_ASSERT(false,
 213                            "Only isotropic Parrinello-Rahman pressure coupling is supported.");
 214     }
 215     return kernelPtr;
 216 }
 217
 218
 219 void launchLeapFrogKernel(const int                          numAtoms,
 220                           DeviceBuffer<Float3>               d_x,
 221                           DeviceBuffer<Float3>               d_xp,
 222                           DeviceBuffer<Float3>               d_v,
 223                           const DeviceBuffer<Float3>         d_f,
 224                           const DeviceBuffer<float>          d_inverseMasses,
 225                           const float                        dt,
 226                           const bool                         doTemperatureScaling,
 227                           const int                          numTempScaleValues,
 228                           const DeviceBuffer<unsigned short> d_tempScaleGroups,
 229                           const DeviceBuffer<float>          d_lambdas,
 230                           const VelocityScalingType          prVelocityScalingType,
 231                           const Float3                       prVelocityScalingMatrixDiagonal,
 232                           const DeviceStream&                deviceStream)
 233 {
 234     // Checking the buffer types against the kernel argument types
 235     static_assert(sizeof(*d_inverseMasses) == sizeof(float), "Incompatible types");
 236
 237     KernelLaunchConfig kernelLaunchConfig;
 238
 239     kernelLaunchConfig.gridSize[0]      = (numAtoms + c_threadsPerBlock - 1) / c_threadsPerBlock;
 240     kernelLaunchConfig.blockSize[0]     = c_threadsPerBlock;
 241     kernelLaunchConfig.blockSize[1]     = 1;
 242     kernelLaunchConfig.blockSize[2]     = 1;
 243     kernelLaunchConfig.sharedMemorySize = 0;
 244
 245     auto kernelPtr =
 246             selectLeapFrogKernelPtr(doTemperatureScaling, numTempScaleValues, prVelocityScalingType);
 247
 248     const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
 249                                                       kernelLaunchConfig,
 250                                                       &numAtoms,
 251                                                       asFloat3Pointer(&d_x),
 252                                                       asFloat3Pointer(&d_xp),
 253                                                       asFloat3Pointer(&d_v),
 254                                                       asFloat3Pointer(&d_f),
 255                                                       &d_inverseMasses,
 256                                                       &dt,
 257                                                       &d_lambdas,
 258                                                       &d_tempScaleGroups,
 259                                                       &prVelocityScalingMatrixDiagonal);
 260     launchGpuKernel(kernelPtr, kernelLaunchConfig, deviceStream, nullptr, "leapfrog_kernel", kernelArgs);
 261 }
 262
 263 } // namespace gmx