src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *
  37  * \brief Implements Leap-Frog using SYCL
  38  *
  39  * This file contains SYCL implementation of back-end specific code for Leap-Frog.
  40  *
  41  * \author Artem Zhmurov <zhmurov@gmail.com>
  42  * \author Andrey Alekseenko <al42and@gmail.com>
  43  *
  44  * \ingroup module_mdlib
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "leapfrog_gpu_internal.h"
  49
  50 #include "gromacs/gpu_utils/devicebuffer.h"
  51 #include "gromacs/gpu_utils/gmxsycl.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdlib/leapfrog_gpu.h"
  54 #include "gromacs/mdtypes/group.h"
  55 #include "gromacs/utility/arrayref.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/template_mp.h"
  58
  59 //! \brief Class name for leap-frog kernel
  60 template<gmx::NumTempScaleValues numTempScaleValues, gmx::VelocityScalingType velocityScaling>
  61 class LeapFrogKernel;
  62
  63 namespace gmx
  64 {
  65
  66 using cl::sycl::access::mode;
  67
  68 /*! \brief Main kernel for the Leap-Frog integrator.
  69  *
  70  *  The coordinates and velocities are updated on the GPU. Also saves the intermediate values of the coordinates for
  71  *   further use in constraints.
  72  *
  73  *  Each GPU thread works with a single particle.
  74  *
  75  * \tparam        numTempScaleValues               The number of different T-couple values.
  76  * \tparam        velocityScaling                  Type of the Parrinello-Rahman velocity rescaling.
  77  * \param         cgh                              SYCL's command group handler.
  78  * \param[in,out] a_x                              Coordinates to update upon integration.
  79  * \param[out]    a_xp                             A copy of the coordinates before the integration (for constraints).
  80  * \param[in,out] a_v                              Velocities to update.
  81  * \param[in]     a_f                              Atomic forces.
  82  * \param[in]     a_inverseMasses                  Reciprocal masses.
  83  * \param[in]     dt                               Timestep.
  84  * \param[in]     a_lambdas                        Temperature scaling factors (one per group).
  85  * \param[in]     a_tempScaleGroups                Mapping of atoms into groups.
  86  * \param[in]     prVelocityScalingMatrixDiagonal  Diagonal elements of Parrinello-Rahman velocity scaling matrix.
  87  */
  88 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
  89 auto leapFrogKernel(
  90         cl::sycl::handler&                          cgh,
  91         DeviceAccessor<Float3, mode::read_write>    a_x,
  92         DeviceAccessor<Float3, mode::discard_write> a_xp,
  93         DeviceAccessor<Float3, mode::read_write>    a_v,
  94         DeviceAccessor<Float3, mode::read>          a_f,
  95         DeviceAccessor<float, mode::read>           a_inverseMasses,
  96         float                                       dt,
  97         OptionalAccessor<float, mode::read, numTempScaleValues != NumTempScaleValues::None> a_lambdas,
  98         OptionalAccessor<unsigned short, mode::read, numTempScaleValues == NumTempScaleValues::Multiple> a_tempScaleGroups,
  99         Float3 prVelocityScalingMatrixDiagonal)
 100 {
 101     a_x.bind(cgh);
 102     a_xp.bind(cgh);
 103     a_v.bind(cgh);
 104     a_f.bind(cgh);
 105     a_inverseMasses.bind(cgh);
 106     if constexpr (numTempScaleValues != NumTempScaleValues::None)
 107     {
 108         a_lambdas.bind(cgh);
 109     }
 110     if constexpr (numTempScaleValues == NumTempScaleValues::Multiple)
 111     {
 112         a_tempScaleGroups.bind(cgh);
 113     }
 114
 115     return [=](cl::sycl::id<1> itemIdx) {
 116         const Float3 x    = a_x[itemIdx];
 117         const Float3 v    = a_v[itemIdx];
 118         const Float3 f    = a_f[itemIdx];
 119         const float  im   = a_inverseMasses[itemIdx];
 120         const float  imdt = im * dt;
 121
 122         // Swapping places for xp and x so that the x will contain the updated coordinates and xp -
 123         // the coordinates before update. This should be taken into account when (if) constraints
 124         // are applied after the update: x and xp have to be passed to constraints in the 'wrong'
 125         // order. See Issue #3727
 126         a_xp[itemIdx] = x;
 127
 128         const float lambda = [=]() {
 129             if constexpr (numTempScaleValues == NumTempScaleValues::None)
 130             {
 131                 return 1.0F;
 132             }
 133             else if constexpr (numTempScaleValues == NumTempScaleValues::Single)
 134             {
 135                 return a_lambdas[0];
 136             }
 137             else if constexpr (numTempScaleValues == NumTempScaleValues::Multiple)
 138             {
 139                 const int tempScaleGroup = a_tempScaleGroups[itemIdx];
 140                 return a_lambdas[tempScaleGroup];
 141             }
 142         }();
 143
 144         const Float3 prVelocityDelta = [=]() {
 145             if constexpr (velocityScaling == VelocityScalingType::Diagonal)
 146             {
 147                 return Float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
 148                                prVelocityScalingMatrixDiagonal[1] * v[1],
 149                                prVelocityScalingMatrixDiagonal[2] * v[2] };
 150             }
 151             else if constexpr (velocityScaling == VelocityScalingType::None)
 152             {
 153                 return Float3{ 0, 0, 0 };
 154             }
 155         }();
 156
 157         const Float3 v_new = v * lambda - prVelocityDelta + f * imdt;
 158         a_v[itemIdx]       = v_new;
 159         a_x[itemIdx]       = x + v_new * dt;
 160     };
 161 }
 162
 163 //! \brief Leap Frog SYCL kernel launch code.
 164 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling, class... Args>
 165 static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, int numAtoms, Args&&... args)
 166 {
 167     // Should not be needed for SYCL2020.
 168     using kernelNameType = LeapFrogKernel<numTempScaleValues, velocityScaling>;
 169
 170     const cl::sycl::range<1> rangeAllAtoms(numAtoms);
 171     cl::sycl::queue          q = deviceStream.stream();
 172
 173     cl::sycl::event e = q.submit([&](cl::sycl::handler& cgh) {
 174         auto kernel =
 175                 leapFrogKernel<numTempScaleValues, velocityScaling>(cgh, std::forward<Args>(args)...);
 176         cgh.parallel_for<kernelNameType>(rangeAllAtoms, kernel);
 177     });
 178
 179     return e;
 180 }
 181
 182 //! Convert \p doTemperatureScaling and \p numTempScaleValues to \ref NumTempScaleValues.
 183 static NumTempScaleValues getTempScalingType(bool doTemperatureScaling, int numTempScaleValues)
 184 {
 185     if (!doTemperatureScaling)
 186     {
 187         return NumTempScaleValues::None;
 188     }
 189     else if (numTempScaleValues == 1)
 190     {
 191         return NumTempScaleValues::Single;
 192     }
 193     else if (numTempScaleValues > 1)
 194     {
 195         return NumTempScaleValues::Multiple;
 196     }
 197     else
 198     {
 199         gmx_incons("Temperature coupling was requested with no temperature coupling groups.");
 200     }
 201 }
 202
 203 /*! \brief Select templated kernel and launch it. */
 204 template<class... Args>
 205 static inline cl::sycl::event launchLeapFrogKernel(NumTempScaleValues  tempScalingType,
 206                                                    VelocityScalingType prVelocityScalingType,
 207                                                    Args&&... args)
 208 {
 209     GMX_ASSERT(prVelocityScalingType == VelocityScalingType::None
 210                        || prVelocityScalingType == VelocityScalingType::Diagonal,
 211                "Only isotropic Parrinello-Rahman pressure coupling is supported.");
 212
 213     return dispatchTemplatedFunction(
 214             [&](auto tempScalingType_, auto prScalingType_) {
 215                 return launchLeapFrogKernel<tempScalingType_, prScalingType_>(std::forward<Args>(args)...);
 216             },
 217             tempScalingType,
 218             prVelocityScalingType);
 219 }
 220
 221 void launchLeapFrogKernel(int                                numAtoms,
 222                           DeviceBuffer<Float3>               d_x,
 223                           DeviceBuffer<Float3>               d_xp,
 224                           DeviceBuffer<Float3>               d_v,
 225                           const DeviceBuffer<Float3>         d_f,
 226                           const DeviceBuffer<float>          d_inverseMasses,
 227                           const float                        dt,
 228                           const bool                         doTemperatureScaling,
 229                           const int                          numTempScaleValues,
 230                           const DeviceBuffer<unsigned short> d_tempScaleGroups,
 231                           const DeviceBuffer<float>          d_lambdas,
 232                           const VelocityScalingType          prVelocityScalingType,
 233                           const Float3                       prVelocityScalingMatrixDiagonal,
 234                           const DeviceStream&                deviceStream)
 235 {
 236     NumTempScaleValues tempVelocityScalingType =
 237             getTempScalingType(doTemperatureScaling, numTempScaleValues);
 238
 239
 240     launchLeapFrogKernel(tempVelocityScalingType,
 241                          prVelocityScalingType,
 242                          deviceStream,
 243                          numAtoms,
 244                          d_x,
 245                          d_xp,
 246                          d_v,
 247                          d_f,
 248                          d_inverseMasses,
 249                          dt,
 250                          d_lambdas,
 251                          d_tempScaleGroups,
 252                          prVelocityScalingMatrixDiagonal);
 253 }
 254
 255 } // namespace gmx