src/gromacs/mdlib/leapfrog_gpu.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *
  37  * \brief Implements Leap-Frog using CUDA
  38  *
  39  * This file contains backend-agnostic code for Leap-Frog integrator class on GPU,
  40  * including class initialization, and data-structures management.
  41  *
  42  * \author Artem Zhmurov <zhmurov@gmail.com>
  43  *
  44  * \ingroup module_mdlib
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "leapfrog_gpu.h"
  49
  50 #include <assert.h>
  51 #include <stdio.h>
  52
  53 #include <algorithm>
  54 #include <cmath>
  55
  56 #include "gromacs/gpu_utils/devicebuffer.h"
  57 #include "gromacs/math/vec.h"
  58 #include "gromacs/mdlib/leapfrog_gpu_internal.h"
  59 #include "gromacs/mdtypes/group.h"
  60 #include "gromacs/pbcutil/pbc.h"
  61 #include "gromacs/utility/arrayref.h"
  62
  63 namespace gmx
  64 {
  65
  66 void LeapFrogGpu::integrate(DeviceBuffer<Float3>              d_x,
  67                             DeviceBuffer<Float3>              d_xp,
  68                             DeviceBuffer<Float3>              d_v,
  69                             const DeviceBuffer<Float3>        d_f,
  70                             const float                       dt,
  71                             const bool                        doTemperatureScaling,
  72                             gmx::ArrayRef<const t_grp_tcstat> tcstat,
  73                             const bool                        doParrinelloRahman,
  74                             const float                       dtPressureCouple,
  75                             const matrix                      prVelocityScalingMatrix)
  76 {
  77
  78     if (doTemperatureScaling)
  79     {
  80         GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_),
  81                    "Number of temperature scaling factors changed since it was set for the "
  82                    "last time.");
  83         GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_),
  84                    "Number of temperature scaling factors changed since it was set for the "
  85                    "last time.");
  86
  87         /* In SYCL, we could use host accessors here, without h_lambdas_.
  88          * According to a quick test, host accessor is slightly faster when using DPC++ and
  89          * LevelZero compared to using h_lambdas_ + cgh.copy. But with DPC++ and OpenCL, the host
  90          * accessor waits for fReadyOnDevice in UpdateConstrainGpu::Impl::integrate. See #4023. */
  91         for (int i = 0; i < numTempScaleValues_; i++)
  92         {
  93             h_lambdas_[i] = tcstat[i].lambda;
  94         }
  95         copyToDeviceBuffer(&d_lambdas_,
  96                            h_lambdas_.data(),
  97                            0,
  98                            numTempScaleValues_,
  99                            deviceStream_,
 100                            GpuApiCallBehavior::Async,
 101                            nullptr);
 102     }
 103     VelocityScalingType prVelocityScalingType = VelocityScalingType::None;
 104     if (doParrinelloRahman)
 105     {
 106         prVelocityScalingType = VelocityScalingType::Diagonal;
 107         GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0
 108                            && prVelocityScalingMatrix[ZZ][YY] == 0 && prVelocityScalingMatrix[XX][YY] == 0
 109                            && prVelocityScalingMatrix[XX][ZZ] == 0 && prVelocityScalingMatrix[YY][ZZ] == 0,
 110                    "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
 111                    "in GPU version of Leap-Frog integrator.");
 112         prVelocityScalingMatrixDiagonal_ = Float3{ dtPressureCouple * prVelocityScalingMatrix[XX][XX],
 113                                                    dtPressureCouple * prVelocityScalingMatrix[YY][YY],
 114                                                    dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ] };
 115     }
 116
 117     launchLeapFrogKernel(numAtoms_,
 118                          d_x,
 119                          d_xp,
 120                          d_v,
 121                          d_f,
 122                          d_inverseMasses_,
 123                          dt,
 124                          doTemperatureScaling,
 125                          numTempScaleValues_,
 126                          d_tempScaleGroups_,
 127                          d_lambdas_,
 128                          prVelocityScalingType,
 129                          prVelocityScalingMatrixDiagonal_,
 130                          deviceStream_);
 131 }
 132
 133 LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext,
 134                          const DeviceStream&  deviceStream,
 135                          const int            numTempScaleValues) :
 136     deviceContext_(deviceContext), deviceStream_(deviceStream), numTempScaleValues_(numTempScaleValues)
 137 {
 138     numAtoms_ = 0;
 139
 140     changePinningPolicy(&h_lambdas_, gmx::PinningPolicy::PinnedIfSupported);
 141
 142     // If the temperature coupling is enabled, we need to make space for scaling factors
 143     if (numTempScaleValues_ > 0)
 144     {
 145         h_lambdas_.resize(numTempScaleValues_);
 146         reallocateDeviceBuffer(
 147                 &d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, deviceContext_);
 148     }
 149 }
 150
 151 LeapFrogGpu::~LeapFrogGpu()
 152 {
 153     freeDeviceBuffer(&d_inverseMasses_);
 154 }
 155
 156 void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsigned short* tempScaleGroups)
 157 {
 158     numAtoms_ = numAtoms;
 159
 160     reallocateDeviceBuffer(
 161             &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
 162     copyToDeviceBuffer(
 163             &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
 164
 165     // Temperature scale group map only used if there are more than one group
 166     if (numTempScaleValues_ > 1)
 167     {
 168         reallocateDeviceBuffer(
 169                 &d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, deviceContext_);
 170         copyToDeviceBuffer(
 171                 &d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
 172     }
 173 }
 174
 175 } // namespace gmx