src/gromacs/mdlib/lincs_cuda.cuh

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  *
  37  * \brief Declares the class for CUDA implementation of LINCS.
  38  *
  39  * \author Artem Zhmurov <zhmurov@gmail.com>
  40  *
  41  * \ingroup module_mdlib
  42  * \inlibraryapi
  43  */
  44 #ifndef GMX_MDLIB_LINCS_CUDA_CUH
  45 #define GMX_MDLIB_LINCS_CUDA_CUH
  46
  47 #include "gromacs/gpu_utils/gputraits.cuh"
  48 #include "gromacs/mdlib/constr.h"
  49 #include "gromacs/mdtypes/mdatom.h"
  50 #include "gromacs/pbcutil/pbc_aiuc.h"
  51 #include "gromacs/topology/idef.h"
  52 #include "gromacs/utility/classhelpers.h"
  53
  54 namespace gmx
  55 {
  56
  57 /* \brief LINCS parameters and GPU pointers
  58  *
  59  * This is used to accumulate all the parameters and pointers so they can be passed
  60  * to the GPU as a single structure.
  61  *
  62  */
  63 struct LincsCudaKernelParameters
  64 {
  65     //! Periodic boundary data
  66     PbcAiuc pbcAiuc;
  67     //! Order of expansion when inverting the matrix
  68     int expansionOrder;
  69     //! Number of iterations used to correct the projection
  70     int numIterations;
  71     //! 1/mass for all atoms (GPU)
  72     float* d_inverseMasses;
  73     //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ], GPU)
  74     float* d_virialScaled;
  75     /*! \brief Total number of threads.
  76      *
  77      *  This covers all constraints and gaps in the ends of the thread blocks
  78      *  that are necessary to avoid inter-block synchronizations.
  79      *  Should be a multiple of block size (the last block is filled with dummy to the end).
  80      */
  81     int numConstraintsThreads;
  82     //! List of constrained atoms (GPU memory)
  83     int2* d_constraints;
  84     //! Equilibrium distances for the constraints (GPU)
  85     float* d_constraintsTargetLengths;
  86     //! Number of constraints, coupled with the current one (GPU)
  87     int* d_coupledConstraintsCounts;
  88     //! List of coupled with the current one (GPU)
  89     int* d_coupledConstraintsIndices;
  90     //! Elements of the coupling matrix.
  91     float* d_matrixA;
  92     //! Mass factors (GPU)
  93     float* d_massFactors;
  94 };
  95
  96 /*! \internal \brief Class with interfaces and data for CUDA version of LINCS. */
  97 class LincsCuda
  98 {
  99
 100 public:
 101     /*! \brief Constructor.
 102      *
 103      * \param[in] numIterations    Number of iteration for the correction of the projection.
 104      * \param[in] expansionOrder   Order of the matrix inversion algorithm.
 105      * \param[in] commandStream    Device command stream.
 106      */
 107     LincsCuda(int numIterations, int expansionOrder, CommandStream commandStream);
 108     /*! \brief Destructor.*/
 109     ~LincsCuda();
 110
 111     /*! \brief Apply LINCS.
 112      *
 113      * Applies LINCS to coordinates and velocities, stored on GPU.
 114      * The results are not automatically copied back to the CPU memory.
 115      * Method uses this class data structures which should be updated
 116      * when needed using set() and setPbc() method.
 117      *
 118      * \param[in]     d_x               Coordinates before timestep (in GPU memory)
 119      * \param[in,out] d_xp              Coordinates after timestep (in GPU memory). The
 120      *                                  resulting constrained coordinates will be saved here.
 121      * \param[in]     updateVelocities  If the velocities should be updated.
 122      * \param[in,out] d_v               Velocities to update (in GPU memory, can be nullptr
 123      *                                  if not updated)
 124      * \param[in]     invdt             Reciprocal timestep (to scale Lagrange
 125      *                                  multipliers when velocities are updated)
 126      * \param[in]     computeVirial     If virial should be updated.
 127      * \param[in,out] virialScaled      Scaled virial tensor to be updated.
 128      */
 129     void apply(const float3* d_x,
 130                float3*       d_xp,
 131                const bool    updateVelocities,
 132                float3*       d_v,
 133                const real    invdt,
 134                const bool    computeVirial,
 135                tensor        virialScaled);
 136
 137     /*! \brief
 138      * Update data-structures (e.g. after NB search step).
 139      *
 140      * Updates the constraints data and copies it to the GPU. Should be
 141      * called if the particles were sorted, redistributed between domains, etc.
 142      * This version uses common data formats so it can be called from anywhere
 143      * in the code. Does not recycle the data preparation routines from the CPU
 144      * version. Works only with simple case when all the constraints in idef are
 145      * are handled by a single GPU. Triangles are not handled as special case.
 146      *
 147      * Information about constraints is taken from:
 148      *     idef.il[F_CONSTR].iatoms  --- type (T) of constraint and two atom indexes (i1, i2)
 149      *     idef.iparams[T].constr.dA --- target length for constraint of type T
 150      * From t_mdatom, the code takes:
 151      *     md.invmass  --- array of inverse square root of masses for each atom in the system.
 152      *
 153      * \param[in] idef  Local topology data to get information on constraints from.
 154      * \param[in] md    Atoms data to get atom masses from.
 155      */
 156     void set(const t_idef& idef, const t_mdatoms& md);
 157
 158     /*! \brief
 159      * Update PBC data.
 160      *
 161      * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
 162      *
 163      * \todo Remove this method. LINCS should not manage PBC.
 164      *
 165      * \param[in] pbc The PBC data in t_pbc format.
 166      */
 167     void setPbc(const t_pbc* pbc);
 168
 169
 170 private:
 171     //! CUDA stream
 172     CommandStream commandStream_;
 173
 174     //! Parameters and pointers, passed to the CUDA kernel
 175     LincsCudaKernelParameters kernelParams_;
 176
 177     //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ])
 178     std::vector<float> h_virialScaled_;
 179
 180     /*! \brief Maximum total number of constraints so far.
 181      *
 182      * If the new number of constraints is larger then previous maximum, the GPU data arrays are
 183      * reallocated.
 184      */
 185     int numConstraintsThreadsAlloc_;
 186
 187     /*! \brief Maximum total number of atoms so far.
 188      *
 189      * If the new number of atoms is larger then previous maximum, the GPU array with masses is
 190      * reallocated.
 191      */
 192     int numAtomsAlloc_;
 193 };
 194
 195 } // namespace gmx
 196
 197 #endif // GMX_MDLIB_LINCS_CUDA_CUH