src/gromacs/mdlib/lincs_gpu.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  *
  37  * \brief Declares the class for GPU implementation of LINCS.
  38  *
  39  * \author Artem Zhmurov <zhmurov@gmail.com>
  40  *
  41  * \ingroup module_mdlib
  42  * \inlibraryapi
  43  */
  44 #ifndef GMX_MDLIB_LINCS_GPU_CUH
  45 #define GMX_MDLIB_LINCS_GPU_CUH
  46
  47 #include <memory>
  48
  49 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  50 #include "gromacs/gpu_utils/device_context.h"
  51 #include "gromacs/gpu_utils/device_stream.h"
  52 #include "gromacs/gpu_utils/gputraits.h"
  53 #include "gromacs/mdlib/constr.h"
  54 #include "gromacs/pbcutil/pbc_aiuc.h"
  55
  56 class InteractionDefinitions;
  57
  58 namespace gmx
  59 {
  60
  61 //! A pair of atoms indexes
  62 struct AtomPair
  63 {
  64     //! First atom
  65     int i;
  66     //! Second atom
  67     int j;
  68 };
  69
  70 /* \brief LINCS parameters and GPU pointers
  71  *
  72  * This is used to accumulate all the parameters and pointers so they can be passed
  73  * to the GPU as a single structure.
  74  *
  75  */
  76 struct LincsGpuKernelParameters
  77 {
  78     //! Periodic boundary data
  79     PbcAiuc pbcAiuc;
  80     //! Order of expansion when inverting the matrix
  81     int expansionOrder;
  82     //! Number of iterations used to correct the projection
  83     int numIterations;
  84     //! 1/mass for all atoms (GPU)
  85     DeviceBuffer<float> d_inverseMasses;
  86     //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ], GPU)
  87     DeviceBuffer<float> d_virialScaled;
  88     /*! \brief Total number of threads.
  89      *
  90      *  This covers all constraints and gaps in the ends of the thread blocks
  91      *  that are necessary to avoid inter-block synchronizations.
  92      *  Should be a multiple of block size (the last block is filled with dummy to the end).
  93      */
  94     int numConstraintsThreads;
  95     //! List of constrained atoms (GPU memory)
  96     DeviceBuffer<AtomPair> d_constraints;
  97     //! Equilibrium distances for the constraints (GPU)
  98     DeviceBuffer<float> d_constraintsTargetLengths;
  99     /*! \brief Whether there are coupled constraints.
 100      *
 101      * In SYCL, the accessors can not be initialized with an empty buffer.
 102      * In case there are no coupled constraints, the respective buffers below are
 103      * empty. So we need to inform the kernel launcher that these are optional.
 104      */
 105     bool haveCoupledConstraints = false;
 106     //! Number of constraints, coupled with the current one (GPU)
 107     DeviceBuffer<int> d_coupledConstraintsCounts;
 108     //! List of coupled with the current one (GPU)
 109     DeviceBuffer<int> d_coupledConstraintsIndices;
 110     //! Elements of the coupling matrix.
 111     DeviceBuffer<float> d_matrixA;
 112     //! Mass factors (GPU)
 113     DeviceBuffer<float> d_massFactors;
 114 };
 115
 116 /*! \internal \brief Class with interfaces and data for GPU version of LINCS. */
 117 class LincsGpu
 118 {
 119
 120 public:
 121     /*! \brief Constructor.
 122      *
 123      * \param[in] numIterations    Number of iteration for the correction of the projection.
 124      * \param[in] expansionOrder   Order of the matrix inversion algorithm.
 125      * \param[in] deviceContext    Device context (dummy in CUDA).
 126      * \param[in] deviceStream     Device command stream.
 127      */
 128     LincsGpu(int                  numIterations,
 129              int                  expansionOrder,
 130              const DeviceContext& deviceContext,
 131              const DeviceStream&  deviceStream);
 132     /*! \brief Destructor.*/
 133     ~LincsGpu();
 134
 135     /*! \brief Apply LINCS.
 136      *
 137      * Applies LINCS to coordinates and velocities, stored on GPU.
 138      * The results are not automatically copied back to the CPU memory.
 139      * Method uses this class data structures which should be updated
 140      * when needed using set() method.
 141      *
 142      * \param[in]     d_x               Coordinates before timestep (in GPU memory)
 143      * \param[in,out] d_xp              Coordinates after timestep (in GPU memory). The
 144      *                                  resulting constrained coordinates will be saved here.
 145      * \param[in]     updateVelocities  If the velocities should be updated.
 146      * \param[in,out] d_v               Velocities to update (in GPU memory, can be nullptr
 147      *                                  if not updated)
 148      * \param[in]     invdt             Reciprocal timestep (to scale Lagrange
 149      *                                  multipliers when velocities are updated)
 150      * \param[in]     computeVirial     If virial should be updated.
 151      * \param[in,out] virialScaled      Scaled virial tensor to be updated.
 152      * \param[in]     pbcAiuc           PBC data.
 153      */
 154     void apply(const DeviceBuffer<Float3>& d_x,
 155                DeviceBuffer<Float3>        d_xp,
 156                bool                        updateVelocities,
 157                DeviceBuffer<Float3>        d_v,
 158                real                        invdt,
 159                bool                        computeVirial,
 160                tensor                      virialScaled,
 161                const PbcAiuc&              pbcAiuc);
 162
 163     /*! \brief
 164      * Update data-structures (e.g. after NB search step).
 165      *
 166      * Updates the constraints data and copies it to the GPU. Should be
 167      * called if the particles were sorted, redistributed between domains, etc.
 168      * This version uses common data formats so it can be called from anywhere
 169      * in the code. Does not recycle the data preparation routines from the CPU
 170      * version. Works only with simple case when all the constraints in idef are
 171      * are handled by a single GPU. Triangles are not handled as special case.
 172      *
 173      * Information about constraints is taken from:
 174      *     idef.il[F_CONSTR].iatoms  --- type (T) of constraint and two atom indexes (i1, i2)
 175      *     idef.iparams[T].constr.dA --- target length for constraint of type T
 176      *
 177      * \param[in] idef      Local topology data to get information on constraints from.
 178      * \param[in] numAtoms  Number of atoms.
 179      * \param[in] invmass   Inverse masses of atoms.
 180      */
 181     void set(const InteractionDefinitions& idef, int numAtoms, const real* invmass);
 182
 183     /*! \brief
 184      * Returns whether the maximum number of coupled constraints is supported
 185      * by the GPU LINCS code.
 186      *
 187      * \param[in] mtop The molecular topology
 188      */
 189     static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
 190
 191 private:
 192     //! GPU context object
 193     const DeviceContext& deviceContext_;
 194     //! GPU stream
 195     const DeviceStream& deviceStream_;
 196
 197     //! Parameters and pointers, passed to the GPU kernel
 198     LincsGpuKernelParameters kernelParams_;
 199
 200     //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ])
 201     std::vector<float> h_virialScaled_;
 202
 203     /*! \brief Maximum total number of constraints so far.
 204      *
 205      * If the new number of constraints is larger then previous maximum, the GPU data arrays are
 206      * reallocated.
 207      */
 208     int numConstraintsThreadsAlloc_;
 209
 210     /*! \brief Maximum total number of atoms so far.
 211      *
 212      * If the new number of atoms is larger then previous maximum, the GPU array with masses is
 213      * reallocated.
 214      */
 215     int numAtomsAlloc_;
 216 };
 217
 218 } // namespace gmx
 219
 220 #endif // GMX_MDLIB_LINCS_GPU_CUH