src/gromacs/nbnxm/atomdata.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 #ifndef GMX_NBNXN_ATOMDATA_H
  38 #define GMX_NBNXN_ATOMDATA_H
  39
  40 #include <cstdio>
  41
  42 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  43 #include "gromacs/gpu_utils/hostallocator.h"
  44 #include "gromacs/math/vectypes.h"
  45 #include "gromacs/mdtypes/locality.h"
  46 #include "gromacs/utility/basedefinitions.h"
  47 #include "gromacs/utility/bitmask.h"
  48 #include "gromacs/utility/real.h"
  49
  50 #include "gpu_types.h"
  51
  52 namespace gmx
  53 {
  54 class MDLogger;
  55 }
  56
  57 struct nbnxn_atomdata_t;
  58 struct nonbonded_verlet_t;
  59 struct t_mdatoms;
  60 struct tMPI_Atomic;
  61
  62 class GpuEventSynchronizer;
  63
  64 namespace Nbnxm
  65 {
  66 class GridSet;
  67 enum class KernelType;
  68 } // namespace Nbnxm
  69
  70 /* Convenience type for vector with aligned memory */
  71 template<typename T>
  72 using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
  73
  74 enum
  75 {
  76     nbatXYZ,
  77     nbatXYZQ,
  78     nbatX4,
  79     nbatX8
  80 };
  81
  82 //! Stride for coordinate/force arrays with xyz coordinate storage
  83 static constexpr int STRIDE_XYZ = 3;
  84 //! Stride for coordinate/force arrays with xyzq coordinate storage
  85 static constexpr int STRIDE_XYZQ = 4;
  86 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
  87 static constexpr int c_packX4 = 4;
  88 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
  89 static constexpr int c_packX8 = 8;
  90 //! Stridefor a pack of 4 coordinates/forces
  91 static constexpr int STRIDE_P4 = DIM * c_packX4;
  92 //! Stridefor a pack of 8 coordinates/forces
  93 static constexpr int STRIDE_P8 = DIM * c_packX8;
  94
  95 //! Returns the index in a coordinate array corresponding to atom a
  96 template<int packSize>
  97 static inline int atom_to_x_index(int a)
  98 {
  99     return DIM * (a & ~(packSize - 1)) + (a & (packSize - 1));
 100 }
 101
 102 // Struct that holds force and energy output buffers
 103 struct nbnxn_atomdata_output_t
 104 {
 105     /* Constructor
 106      *
 107      * \param[in] kernelType              Type of non-bonded kernel
 108      * \param[in] numEnergyGroups         The number of energy groups
 109      * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
 110      * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
 111      */
 112     nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
 113                             int                numEnergyGroups,
 114                             int                simdEnergyBUfferStride,
 115                             gmx::PinningPolicy pinningPolicy);
 116
 117     gmx::HostVector<real> f;      // f, size natoms*fstride
 118     gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
 119     gmx::HostVector<real> Vvdw;   // Temporary Van der Waals group energy storage
 120     gmx::HostVector<real> Vc;     // Temporary Coulomb group energy storage
 121     AlignedVector<real>   VSvdw;  // Temporary SIMD Van der Waals group energy storage
 122     AlignedVector<real>   VSc;    // Temporary SIMD Coulomb group energy storage
 123 };
 124
 125 /* Block size in atoms for the non-bonded thread force-buffer reduction,
 126  * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
 127  * Should be small to reduce the reduction and zeroing cost,
 128  * but too small will result in overhead.
 129  * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
 130  */
 131 #if GMX_DOUBLE
 132 #    define NBNXN_BUFFERFLAG_SIZE 8
 133 #else
 134 #    define NBNXN_BUFFERFLAG_SIZE 16
 135 #endif
 136
 137 /* We store the reduction flags as gmx_bitmask_t.
 138  * This limits the number of flags to BITMASK_SIZE.
 139  */
 140 #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
 141
 142 /* Flags for telling if threads write to force output buffers */
 143 typedef struct
 144 {
 145     int            nflag;       /* The number of flag blocks                         */
 146     gmx_bitmask_t* flag;        /* Bit i is set when thread i writes to a cell-block */
 147     int            flag_nalloc; /* Allocation size of cxy_flag                       */
 148 } nbnxn_buffer_flags_t;
 149
 150 /* LJ combination rules: geometric, Lorentz-Berthelot, none */
 151 enum
 152 {
 153     ljcrGEOM,
 154     ljcrLB,
 155     ljcrNONE,
 156     ljcrNR
 157 };
 158
 159 /* Struct that stores atom related data for the nbnxn module
 160  *
 161  * Note: performance would improve slightly when all std::vector containers
 162  *       in this struct would not initialize during resize().
 163  */
 164 struct nbnxn_atomdata_t
 165 { //NOLINT(clang-analyzer-optin.performance.Padding)
 166     struct Params
 167     {
 168         /* Constructor
 169          *
 170          * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 171          */
 172         Params(gmx::PinningPolicy pinningPolicy);
 173
 174         // The number of different atom types
 175         int numTypes;
 176         // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
 177         gmx::HostVector<real> nbfp;
 178         // Combination rule, see enum defined above
 179         int comb_rule;
 180         // LJ parameters per atom type, size numTypes*2
 181         gmx::HostVector<real> nbfp_comb;
 182         // As nbfp, but with a stride for the present SIMD architecture
 183         AlignedVector<real> nbfp_aligned;
 184         // Atom types per atom
 185         gmx::HostVector<int> type;
 186         // LJ parameters per atom for fast SIMD loading
 187         gmx::HostVector<real> lj_comb;
 188         // Charges per atom, not set with format nbatXYZQ
 189         gmx::HostVector<real> q;
 190         // The number of energy groups
 191         int nenergrp;
 192         // 2log(nenergrp)
 193         int neg_2log;
 194         // The energy groups, one int entry per cluster, only set when needed
 195         gmx::HostVector<int> energrp;
 196     };
 197
 198     // Diagonal and topology exclusion helper data for all SIMD kernels
 199     struct SimdMasks
 200     {
 201         SimdMasks();
 202
 203         // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
 204         AlignedVector<real> diagonal_4xn_j_minus_i;
 205         // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
 206         AlignedVector<real> diagonal_2xnn_j_minus_i;
 207         // Filters for topology exclusion masks for the SIMD kernels
 208         AlignedVector<uint32_t> exclusion_filter;
 209         // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
 210         AlignedVector<uint64_t> exclusion_filter64;
 211         // Array of masks needed for exclusions
 212         AlignedVector<real> interaction_array;
 213     };
 214
 215     /* Constructor
 216      *
 217      * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 218      */
 219     nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
 220
 221     /* Returns a const reference to the parameters */
 222     const Params& params() const { return params_; }
 223
 224     /* Returns a non-const reference to the parameters */
 225     Params& paramsDeprecated() { return params_; }
 226
 227     /* Returns the current total number of atoms stored */
 228     int numAtoms() const { return numAtoms_; }
 229
 230     /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
 231     gmx::ArrayRef<const real> x() const { return x_; }
 232
 233     /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
 234     gmx::ArrayRef<real> x() { return x_; }
 235
 236     /* Resizes the coordinate buffer and sets the number of atoms */
 237     void resizeCoordinateBuffer(int numAtoms);
 238
 239     /* Resizes the force buffers for the current number of atoms */
 240     void resizeForceBuffers();
 241
 242 private:
 243     // The LJ and charge parameters
 244     Params params_;
 245     // The total number of atoms currently stored
 246     int numAtoms_;
 247
 248 public:
 249     int                        natoms_local; /* Number of local atoms                           */
 250     int                        XFormat;     /* The format of x (and q), enum                      */
 251     int                        FFormat;     /* The format of f, enum                              */
 252     gmx_bool                   bDynamicBox; /* Do we need to update shift_vec every step?    */
 253     gmx::HostVector<gmx::RVec> shift_vec;   /* Shift vectors, copied from t_forcerec              */
 254     int                        xstride;     /* stride for a coordinate in x (usually 3 or 4)      */
 255     int                        fstride;     /* stride for a coordinate in f (usually 3 or 4)      */
 256 private:
 257     gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride              */
 258
 259 public:
 260     // Masks for handling exclusions in the SIMD kernels
 261     const SimdMasks simdMasks;
 262
 263     /* Output data */
 264     std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
 265
 266     /* Reduction related data */
 267     gmx_bool             bUseBufferFlags; /* Use the flags or operate on all atoms     */
 268     nbnxn_buffer_flags_t buffer_flags;    /* Flags for buffer zeroing+reduc.  */
 269     gmx_bool             bUseTreeReduce;  /* Use tree for force reduction */
 270     tMPI_Atomic*         syncStep;        /* Synchronization step for tree reduce */
 271 };
 272
 273 /* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
 274  * and fills up to na_round with coordinates that are far away.
 275  */
 276 void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
 277
 278 enum
 279 {
 280     enbnxninitcombruleDETECT,
 281     enbnxninitcombruleGEOM,
 282     enbnxninitcombruleLB,
 283     enbnxninitcombruleNONE
 284 };
 285
 286 /* Initialize the non-bonded atom data structure.
 287  * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
 288  * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
 289  * to the atom data structure.
 290  * enbnxninitcombrule sets what combination rule data gets stored in nbat.
 291  */
 292 void nbnxn_atomdata_init(const gmx::MDLogger&      mdlog,
 293                          nbnxn_atomdata_t*         nbat,
 294                          Nbnxm::KernelType         kernelType,
 295                          int                       enbnxninitcombrule,
 296                          int                       ntype,
 297                          gmx::ArrayRef<const real> nbfp,
 298                          int                       n_energygroups,
 299                          int                       nout);
 300
 301 void nbnxn_atomdata_set(nbnxn_atomdata_t*     nbat,
 302                         const Nbnxm::GridSet& gridSet,
 303                         const t_mdatoms*      mdatoms,
 304                         const int*            atinfo);
 305
 306 /* Copy the shift vectors to nbat */
 307 void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, rvec* shift_vec, nbnxn_atomdata_t* nbat);
 308
 309 /*! \brief Transform coordinates to xbat layout
 310  *
 311  * Creates a copy of the coordinates buffer using short-range ordering.
 312  *
 313  * \param[in] gridSet      The grids data.
 314  * \param[in] locality     If the transformation should be applied to local or non local coordinates.
 315  * \param[in] fillLocal    Tells if the local filler particle coordinates should be zeroed.
 316  * \param[in] coordinates  Coordinates in plain rvec format.
 317  * \param[in,out] nbat     Data in NBNXM format, used for mapping formats and to locate the output buffer.
 318  */
 319 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
 320                                      gmx::AtomLocality     locality,
 321                                      bool                  fillLocal,
 322                                      const rvec*           coordinates,
 323                                      nbnxn_atomdata_t*     nbat);
 324
 325 /*! \brief Transform coordinates to xbat layout on GPU
 326  *
 327  * Creates a GPU copy of the coordinates buffer using short-range ordering.
 328  * As input, uses coordinates in plain rvec format in GPU memory.
 329  *
 330  * \param[in]     gridSet    The grids data.
 331  * \param[in]     locality   If the transformation should be applied to local or non local coordinates.
 332  * \param[in]     fillLocal  Tells if the local filler particle coordinates should be zeroed.
 333  * \param[in,out] gpu_nbv    The NBNXM GPU data structure.
 334  * \param[in]     d_x        Coordinates to be copied (in plain rvec format).
 335  * \param[in]     xReadyOnDevice   Event synchronizer indicating that the coordinates are ready in the device memory.
 336  */
 337 void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
 338                                     gmx::AtomLocality     locality,
 339                                     bool                  fillLocal,
 340                                     gmx_nbnxn_gpu_t*      gpu_nbv,
 341                                     DeviceBuffer<float>   d_x,
 342                                     GpuEventSynchronizer* xReadyOnDevice);
 343
 344 /*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
 345  *
 346  * \param[in]  nbat        Atom data in NBNXM format.
 347  * \param[in]  locality    If the reduction should be performed on local or non-local atoms.
 348  * \param[in]  gridSet     The grids data.
 349  * \param[out] totalForce  Buffer to accumulate resulting force
 350  */
 351 void reduceForces(nbnxn_atomdata_t* nbat, gmx::AtomLocality locality, const Nbnxm::GridSet& gridSet, rvec* totalForce);
 352
 353 /*! \brief Reduce forces on the GPU
 354  *
 355  * \param[in]  locality             If the reduction should be performed on local or non-local atoms.
 356  * \param[out] totalForcesDevice    Device buffer to accumulate resulting force.
 357  * \param[in]  gridSet              The grids data.
 358  * \param[in]  pmeForcesDevice      Device buffer with PME forces.
 359  * \param[in]  dependencyList       List of synchronizers that represent the dependencies the reduction task needs to sync on.
 360  * \param[in]  gpu_nbv              The NBNXM GPU data structure.
 361  * \param[in]  useGpuFPmeReduction  Whether PME forces should be added.
 362  * \param[in]  accumulateForce      Whether there are usefull data already in the total force buffer.
 363  */
 364 void reduceForcesGpu(gmx::AtomLocality                          locality,
 365                      DeviceBuffer<float>                        totalForcesDevice,
 366                      const Nbnxm::GridSet&                      gridSet,
 367                      void*                                      pmeForcesDevice,
 368                      gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
 369                      gmx_nbnxn_gpu_t*                           gpu_nbv,
 370                      bool                                       useGpuFPmeReduction,
 371                      bool                                       accumulateForce);
 372
 373 /* Add the fshift force stored in nbat to fshift */
 374 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
 375
 376 /* Get the atom start index and number of atoms for a given locality */
 377 void nbnxn_get_atom_range(gmx::AtomLocality     atomLocality,
 378                           const Nbnxm::GridSet& gridSet,
 379                           int*                  atomStart,
 380                           int*                  nAtoms);
 381 #endif