src/gromacs/nbnxm/atomdata.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \libinternal \file
  37  *  \brief
  38  *  Functionality for per-atom data in the nbnxm module
  39  *
  40  *  \author Berk Hess <hess@kth.se>
  41  *  \ingroup module_nbnxm
  42  *  \inlibraryapi
  43  */
  44
  45
  46 #ifndef GMX_NBNXN_ATOMDATA_H
  47 #define GMX_NBNXN_ATOMDATA_H
  48
  49 #include <cstdio>
  50
  51 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  52 #include "gromacs/gpu_utils/hostallocator.h"
  53 #include "gromacs/math/vectypes.h"
  54 #include "gromacs/mdtypes/locality.h"
  55 #include "gromacs/utility/bitmask.h"
  56 #include "gromacs/utility/real.h"
  57
  58 namespace gmx
  59 {
  60 class MDLogger;
  61 }
  62
  63 struct NbnxmGpu;
  64 struct nbnxn_atomdata_t;
  65 struct nonbonded_verlet_t;
  66
  67 class GpuEventSynchronizer;
  68
  69 namespace Nbnxm
  70 {
  71 class GridSet;
  72 enum class KernelType;
  73 } // namespace Nbnxm
  74
  75 //! Convenience type for vector with aligned memory
  76 template<typename T>
  77 using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
  78
  79 enum
  80 {
  81     nbatXYZ,
  82     nbatXYZQ,
  83     nbatX4,
  84     nbatX8
  85 };
  86
  87 //! Stride for coordinate/force arrays with xyz coordinate storage
  88 static constexpr int STRIDE_XYZ = 3;
  89 //! Stride for coordinate/force arrays with xyzq coordinate storage
  90 static constexpr int STRIDE_XYZQ = 4;
  91 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
  92 static constexpr int c_packX4 = 4;
  93 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
  94 static constexpr int c_packX8 = 8;
  95 //! Stridefor a pack of 4 coordinates/forces
  96 static constexpr int STRIDE_P4 = DIM * c_packX4;
  97 //! Stridefor a pack of 8 coordinates/forces
  98 static constexpr int STRIDE_P8 = DIM * c_packX8;
  99
 100 //! Returns the index in a coordinate array corresponding to atom a
 101 template<int packSize>
 102 static inline int atom_to_x_index(int a)
 103 {
 104     return DIM * (a & ~(packSize - 1)) + (a & (packSize - 1));
 105 }
 106
 107 /*! \internal
 108  * \brief Struct that holds force and energy output buffers */
 109 struct nbnxn_atomdata_output_t
 110 {
 111     /*! \brief Constructor
 112      *
 113      * \param[in] kernelType              Type of non-bonded kernel
 114      * \param[in] numEnergyGroups         The number of energy groups
 115      * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
 116      * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
 117      */
 118     nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
 119                             int                numEnergyGroups,
 120                             int                simdEnergyBufferStride,
 121                             gmx::PinningPolicy pinningPolicy);
 122
 123     //! f, size natoms*fstride
 124     gmx::HostVector<real> f;
 125     //! Shift force array, size c_numShiftVectors*DIM
 126     gmx::HostVector<real> fshift;
 127     //! Temporary Van der Waals group energy storage
 128     gmx::HostVector<real> Vvdw;
 129     //! Temporary Coulomb group energy storage
 130     gmx::HostVector<real> Vc;
 131     //! Temporary SIMD Van der Waals group energy storage
 132     AlignedVector<real> VSvdw;
 133     //! Temporary SIMD Coulomb group energy storage
 134     AlignedVector<real> VSc;
 135 };
 136
 137 /*! \brief Block size in atoms for the non-bonded thread force-buffer reduction.
 138  *
 139  * Should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
 140  * Should be small to reduce the reduction and zeroing cost,
 141  * but too small will result in overhead.
 142  * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
 143  */
 144 #if GMX_DOUBLE
 145 #    define NBNXN_BUFFERFLAG_SIZE 8
 146 #else
 147 #    define NBNXN_BUFFERFLAG_SIZE 16
 148 #endif
 149
 150 /*! \brief We store the reduction flags as gmx_bitmask_t.
 151  * This limits the number of flags to BITMASK_SIZE.
 152  */
 153 #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
 154
 155
 156 //! LJ combination rules
 157 enum class LJCombinationRule : int
 158 {
 159     //! Geometric
 160     Geometric,
 161     //! Lorentz-Berthelot
 162     LorentzBerthelot,
 163     //! No rule
 164     None,
 165     //! Size of the enum
 166     Count
 167 };
 168
 169 //! String corresponding to LJ combination rule
 170 const char* enumValueToString(LJCombinationRule enumValue);
 171
 172 /*! \internal
 173  * \brief Struct that stores atom related data for the nbnxn module
 174  *
 175  * Note: performance would improve slightly when all std::vector containers
 176  *       in this struct would not initialize during resize().
 177  */
 178 struct nbnxn_atomdata_t
 179 { //NOLINT(clang-analyzer-optin.performance.Padding)
 180     /*! \internal
 181      * \brief The actual atom data parameter values */
 182     struct Params
 183     {
 184         /*! \brief Constructor
 185          *
 186          * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 187          */
 188         Params(gmx::PinningPolicy pinningPolicy);
 189
 190         //! The number of different atom types
 191         int numTypes;
 192         //! Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
 193         gmx::HostVector<real> nbfp;
 194         //! Combination rule, see enum defined above
 195         LJCombinationRule ljCombinationRule;
 196         //! LJ parameters per atom type, size numTypes*2
 197         gmx::HostVector<real> nbfp_comb;
 198         //! As nbfp, but with a stride for the present SIMD architecture
 199         AlignedVector<real> nbfp_aligned;
 200         //! Atom types per atom
 201         gmx::HostVector<int> type;
 202         //! LJ parameters per atom for fast SIMD loading
 203         gmx::HostVector<real> lj_comb;
 204         //! Charges per atom, not set with format nbatXYZQ
 205         gmx::HostVector<real> q;
 206         //! The number of energy groups
 207         int nenergrp;
 208         //! 2log(nenergrp)
 209         int neg_2log;
 210         //! The energy groups, one int entry per cluster, only set when needed
 211         gmx::HostVector<int> energrp;
 212     };
 213
 214     /*! \internal
 215      * \brief Diagonal and topology exclusion helper data for all SIMD kernels. */
 216     struct SimdMasks
 217     {
 218         SimdMasks();
 219
 220         //! Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
 221         AlignedVector<real> diagonal_4xn_j_minus_i;
 222         //! Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
 223         AlignedVector<real> diagonal_2xnn_j_minus_i;
 224         //! Filters for topology exclusion masks for the SIMD kernels
 225         AlignedVector<uint32_t> exclusion_filter;
 226         //! Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
 227         AlignedVector<uint64_t> exclusion_filter64;
 228         //! Array of masks needed for exclusions
 229         AlignedVector<real> interaction_array;
 230     };
 231
 232     /*! \brief Constructor
 233      *
 234      * \param[in] pinningPolicy      Sets the pinning policy for all data that might be transferred
 235      *                               to a GPU
 236      * \param[in] mdlog              The logger
 237      * \param[in] kernelType         Nonbonded NxN kernel type
 238      * \param[in] enbnxninitcombrule LJ combination rule
 239      * \param[in] ntype              Number of atom types
 240      * \param[in] nbfp               Non-bonded force parameters
 241      * \param[in] n_energygroups     Number of energy groups
 242      * \param[in] nout               Number of output data structures
 243      */
 244     nbnxn_atomdata_t(gmx::PinningPolicy        pinningPolicy,
 245                      const gmx::MDLogger&      mdlog,
 246                      Nbnxm::KernelType         kernelType,
 247                      int                       enbnxninitcombrule,
 248                      int                       ntype,
 249                      gmx::ArrayRef<const real> nbfp,
 250                      int                       n_energygroups,
 251                      int                       nout);
 252
 253     //! Returns a const reference to the parameters
 254     const Params& params() const { return params_; }
 255
 256     //! Returns a non-const reference to the parameters
 257     Params& paramsDeprecated() { return params_; }
 258
 259     //! Returns the current total number of atoms stored
 260     int numAtoms() const { return numAtoms_; }
 261
 262     //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
 263     gmx::ArrayRef<const real> x() const { return x_; }
 264
 265     //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
 266     gmx::ArrayRef<real> x() { return x_; }
 267
 268     //! Resizes the coordinate buffer and sets the number of atoms
 269     void resizeCoordinateBuffer(int numAtoms);
 270
 271     //! Resizes the force buffers for the current number of atoms
 272     void resizeForceBuffers();
 273
 274 private:
 275     //! The LJ and charge parameters
 276     Params params_;
 277     //! The total number of atoms currently stored
 278     int numAtoms_;
 279
 280 public:
 281     //! Number of local atoms
 282     int natoms_local;
 283     //! The format of x (and q), enum
 284     int XFormat;
 285     //! The format of f, enum
 286     int FFormat;
 287     //! Do we need to update shift_vec every step?
 288     bool bDynamicBox;
 289     //! Shift vectors, copied from t_forcerec
 290     gmx::HostVector<gmx::RVec> shift_vec;
 291     //! stride for a coordinate in x (usually 3 or 4)
 292     int xstride;
 293     //! stride for a coordinate in f (usually 3 or 4)
 294     int fstride;
 295
 296 private:
 297     //! x and possibly q, size natoms*xstride
 298     gmx::HostVector<real> x_;
 299
 300 public:
 301     //! Masks for handling exclusions in the SIMD kernels
 302     const SimdMasks simdMasks;
 303
 304     //! Output data structures, 1 per thread
 305     std::vector<nbnxn_atomdata_output_t> out;
 306
 307     //! Reduction related data
 308     //! \{
 309     //! Use the flags or operate on all atoms
 310     bool bUseBufferFlags;
 311     //! Flags for buffer zeroing+reduc.
 312     std::vector<gmx_bitmask_t> buffer_flags;
 313     //! \}
 314 };
 315
 316 /*! \brief Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
 317  * and fills up to na_round with coordinates that are far away.
 318  */
 319 void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
 320
 321 //! Describes the combination rule in use by this force field
 322 enum
 323 {
 324     enbnxninitcombruleDETECT,
 325     enbnxninitcombruleGEOM,
 326     enbnxninitcombruleLB,
 327     enbnxninitcombruleNONE
 328 };
 329
 330 //! Sets the atomdata after pair search
 331 void nbnxn_atomdata_set(nbnxn_atomdata_t*            nbat,
 332                         const Nbnxm::GridSet&        gridSet,
 333                         gmx::ArrayRef<const int>     atomTypes,
 334                         gmx::ArrayRef<const real>    atomCharges,
 335                         gmx::ArrayRef<const int64_t> atomInfo);
 336
 337 //! Copy the shift vectors to nbat
 338 void nbnxn_atomdata_copy_shiftvec(bool dynamic_box, gmx::ArrayRef<gmx::RVec> shift_vec, nbnxn_atomdata_t* nbat);
 339
 340 /*! \brief Transform coordinates to xbat layout
 341  *
 342  * Creates a copy of the coordinates buffer using short-range ordering.
 343  *
 344  * \param[in] gridSet      The grids data.
 345  * \param[in] locality     If the transformation should be applied to local or non local coordinates.
 346  * \param[in] coordinates  Coordinates in plain rvec format.
 347  * \param[in,out] nbat     Data in NBNXM format, used for mapping formats and to locate the output buffer.
 348  */
 349 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
 350                                      gmx::AtomLocality     locality,
 351                                      const rvec*           coordinates,
 352                                      nbnxn_atomdata_t*     nbat);
 353
 354 /*! \brief Transform coordinates to xbat layout on GPU
 355  *
 356  * Creates a GPU copy of the coordinates buffer using short-range ordering.
 357  * As input, uses coordinates in plain rvec format in GPU memory.
 358  *
 359  * \param[in]     gridSet    The grids data.
 360  * \param[in]     locality   If the transformation should be applied to local or non local coordinates.
 361  * \param[in,out] gpu_nbv    The NBNXM GPU data structure.
 362  * \param[in]     d_x        Coordinates to be copied (in plain rvec format).
 363  * \param[in]     xReadyOnDevice   Event synchronizer indicating that the coordinates are ready in the device memory.
 364  */
 365 void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet&   gridSet,
 366                                     gmx::AtomLocality       locality,
 367                                     NbnxmGpu*               gpu_nbv,
 368                                     DeviceBuffer<gmx::RVec> d_x,
 369                                     GpuEventSynchronizer*   xReadyOnDevice);
 370
 371 /*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
 372  *
 373  * \param[in]  nbat        Atom data in NBNXM format.
 374  * \param[in]  locality    If the reduction should be performed on local or non-local atoms.
 375  * \param[in]  gridSet     The grids data.
 376  * \param[out] totalForce  Buffer to accumulate resulting force
 377  */
 378 void reduceForces(nbnxn_atomdata_t* nbat, gmx::AtomLocality locality, const Nbnxm::GridSet& gridSet, rvec* totalForce);
 379
 380 //! Add the fshift force stored in nbat to fshift
 381 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
 382
 383 //! Get the atom start index and number of atoms for a given locality
 384 void nbnxn_get_atom_range(gmx::AtomLocality     atomLocality,
 385                           const Nbnxm::GridSet& gridSet,
 386                           int*                  atomStart,
 387                           int*                  nAtoms);
 388 #endif