src/gromacs/nbnxm/atomdata.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_NBNXN_ATOMDATA_H
  37 #define GMX_NBNXN_ATOMDATA_H
  38
  39 #include <cstdio>
  40
  41 #include "gromacs/gpu_utils/hostallocator.h"
  42 #include "gromacs/math/vectypes.h"
  43 #include "gromacs/utility/basedefinitions.h"
  44 #include "gromacs/utility/bitmask.h"
  45 #include "gromacs/utility/real.h"
  46
  47 #include "gpu_types.h"
  48 #include "locality.h"
  49
  50 namespace gmx
  51 {
  52 class MDLogger;
  53 }
  54
  55 struct nbnxn_atomdata_t;
  56 struct nonbonded_verlet_t;
  57 struct t_mdatoms;
  58 struct tMPI_Atomic;
  59
  60 enum class BufferOpsUseGpu;
  61
  62 class GpuEventSynchronizer;
  63
  64 namespace Nbnxm
  65 {
  66 class GridSet;
  67 enum class KernelType;
  68 }
  69
  70 /* Convenience type for vector with aligned memory */
  71 template<typename T>
  72 using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
  73
  74 enum {
  75     nbatXYZ, nbatXYZQ, nbatX4, nbatX8
  76 };
  77
  78 //! Stride for coordinate/force arrays with xyz coordinate storage
  79 static constexpr int STRIDE_XYZ  = 3;
  80 //! Stride for coordinate/force arrays with xyzq coordinate storage
  81 static constexpr int STRIDE_XYZQ = 4;
  82 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
  83 static constexpr int c_packX4    = 4;
  84 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
  85 static constexpr int c_packX8    = 8;
  86 //! Stridefor a pack of 4 coordinates/forces
  87 static constexpr int STRIDE_P4   = DIM*c_packX4;
  88 //! Stridefor a pack of 8 coordinates/forces
  89 static constexpr int STRIDE_P8   = DIM*c_packX8;
  90
  91 //! Returns the index in a coordinate array corresponding to atom a
  92 template<int packSize> static inline int atom_to_x_index(int a)
  93 {
  94     return DIM*(a & ~(packSize - 1)) + (a & (packSize - 1));
  95 }
  96
  97 // Struct that holds force and energy output buffers
  98 struct nbnxn_atomdata_output_t
  99 {
 100     /* Constructor
 101      *
 102      * \param[in] kernelType              Type of non-bonded kernel
 103      * \param[in] numEnergyGroups         The number of energy groups
 104      * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
 105      * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
 106      */
 107     nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
 108                             int                numEnergyGroups,
 109                             int                simdEnergyBUfferStride,
 110                             gmx::PinningPolicy pinningPolicy);
 111
 112     gmx::HostVector<real> f;      // f, size natoms*fstride
 113     gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
 114     gmx::HostVector<real> Vvdw;   // Temporary Van der Waals group energy storage
 115     gmx::HostVector<real> Vc;     // Temporary Coulomb group energy storage
 116     AlignedVector<real>   VSvdw;  // Temporary SIMD Van der Waals group energy storage
 117     AlignedVector<real>   VSc;    // Temporary SIMD Coulomb group energy storage
 118 };
 119
 120 /* Block size in atoms for the non-bonded thread force-buffer reduction,
 121  * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
 122  * Should be small to reduce the reduction and zeroing cost,
 123  * but too small will result in overhead.
 124  * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
 125  */
 126 #if GMX_DOUBLE
 127 #define NBNXN_BUFFERFLAG_SIZE   8
 128 #else
 129 #define NBNXN_BUFFERFLAG_SIZE  16
 130 #endif
 131
 132 /* We store the reduction flags as gmx_bitmask_t.
 133  * This limits the number of flags to BITMASK_SIZE.
 134  */
 135 #define NBNXN_BUFFERFLAG_MAX_THREADS  (BITMASK_SIZE)
 136
 137 /* Flags for telling if threads write to force output buffers */
 138 typedef struct {
 139     int               nflag;       /* The number of flag blocks                         */
 140     gmx_bitmask_t    *flag;        /* Bit i is set when thread i writes to a cell-block */
 141     int               flag_nalloc; /* Allocation size of cxy_flag                       */
 142 } nbnxn_buffer_flags_t;
 143
 144 /* LJ combination rules: geometric, Lorentz-Berthelot, none */
 145 enum {
 146     ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
 147 };
 148
 149 /* Struct that stores atom related data for the nbnxn module
 150  *
 151  * Note: performance would improve slightly when all std::vector containers
 152  *       in this struct would not initialize during resize().
 153  */
 154 struct nbnxn_atomdata_t
 155 {   //NOLINT(clang-analyzer-optin.performance.Padding)
 156     struct Params
 157     {
 158         /* Constructor
 159          *
 160          * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 161          */
 162         Params(gmx::PinningPolicy pinningPolicy);
 163
 164         // The number of different atom types
 165         int                   numTypes;
 166         // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
 167         gmx::HostVector<real> nbfp;
 168         // Combination rule, see enum defined above
 169         int                   comb_rule;
 170         // LJ parameters per atom type, size numTypes*2
 171         gmx::HostVector<real> nbfp_comb;
 172         // As nbfp, but with a stride for the present SIMD architecture
 173         AlignedVector<real>   nbfp_aligned;
 174         // Atom types per atom
 175         gmx::HostVector<int>  type;
 176         // LJ parameters per atom for fast SIMD loading
 177         gmx::HostVector<real> lj_comb;
 178         // Charges per atom, not set with format nbatXYZQ
 179         gmx::HostVector<real> q;
 180         // The number of energy groups
 181         int                   nenergrp;
 182         // 2log(nenergrp)
 183         int                   neg_2log;
 184         // The energy groups, one int entry per cluster, only set when needed
 185         gmx::HostVector<int>  energrp;
 186     };
 187
 188     // Diagonal and topology exclusion helper data for all SIMD kernels
 189     struct SimdMasks
 190     {
 191         SimdMasks();
 192
 193         // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
 194         AlignedVector<real>     diagonal_4xn_j_minus_i;
 195         // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
 196         AlignedVector<real>     diagonal_2xnn_j_minus_i;
 197         // Filters for topology exclusion masks for the SIMD kernels
 198         AlignedVector<uint32_t> exclusion_filter;
 199         // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
 200         AlignedVector<uint64_t> exclusion_filter64;
 201         // Array of masks needed for exclusions
 202         AlignedVector<real>     interaction_array;
 203     };
 204
 205     /* Constructor
 206      *
 207      * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 208      */
 209     nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
 210
 211     /* Returns a const reference to the parameters */
 212     const Params &params() const
 213     {
 214         return params_;
 215     }
 216
 217     /* Returns a non-const reference to the parameters */
 218     Params &paramsDeprecated()
 219     {
 220         return params_;
 221     }
 222
 223     /* Returns the current total number of atoms stored */
 224     int numAtoms() const
 225     {
 226         return numAtoms_;
 227     }
 228
 229     /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
 230     gmx::ArrayRef<const real> x() const
 231     {
 232         return x_;
 233     }
 234
 235     /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
 236     gmx::ArrayRef<real> x()
 237     {
 238         return x_;
 239     }
 240
 241     /* Resizes the coordinate buffer and sets the number of atoms */
 242     void resizeCoordinateBuffer(int numAtoms);
 243
 244     /* Resizes the force buffers for the current number of atoms */
 245     void resizeForceBuffers();
 246
 247     private:
 248         // The LJ and charge parameters
 249         Params                     params_;
 250         // The total number of atoms currently stored
 251         int                        numAtoms_;
 252     public:
 253         int                        natoms_local; /* Number of local atoms                           */
 254         int                        XFormat;      /* The format of x (and q), enum                      */
 255         int                        FFormat;      /* The format of f, enum                              */
 256         gmx_bool                   bDynamicBox;  /* Do we need to update shift_vec every step?    */
 257         gmx::HostVector<gmx::RVec> shift_vec;    /* Shift vectors, copied from t_forcerec              */
 258         int                        xstride;      /* stride for a coordinate in x (usually 3 or 4)      */
 259         int                        fstride;      /* stride for a coordinate in f (usually 3 or 4)      */
 260     private:
 261         gmx::HostVector<real>      x_;           /* x and possibly q, size natoms*xstride              */
 262
 263     public:
 264         // Masks for handling exclusions in the SIMD kernels
 265         const SimdMasks          simdMasks;
 266
 267         /* Output data */
 268         std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
 269
 270         /* Reduction related data */
 271         gmx_bool                 bUseBufferFlags;     /* Use the flags or operate on all atoms     */
 272         nbnxn_buffer_flags_t     buffer_flags;        /* Flags for buffer zeroing+reduc.  */
 273         gmx_bool                 bUseTreeReduce;      /* Use tree for force reduction */
 274         tMPI_Atomic             *syncStep;            /* Synchronization step for tree reduce */
 275 };
 276
 277 /* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
 278  * and fills up to na_round with coordinates that are far away.
 279  */
 280 void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
 281                             const rvec *x, int nbatFormat,
 282                             real *xnb, int a0);
 283
 284 enum {
 285     enbnxninitcombruleDETECT, enbnxninitcombruleGEOM, enbnxninitcombruleLB, enbnxninitcombruleNONE
 286 };
 287
 288 /* Initialize the non-bonded atom data structure.
 289  * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
 290  * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
 291  * to the atom data structure.
 292  * enbnxninitcombrule sets what combination rule data gets stored in nbat.
 293  */
 294 void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
 295                          nbnxn_atomdata_t *nbat,
 296                          Nbnxm::KernelType kernelType,
 297                          int enbnxninitcombrule,
 298                          int ntype, const real *nbfp,
 299                          int n_energygroups,
 300                          int nout);
 301
 302 void nbnxn_atomdata_set(nbnxn_atomdata_t     *nbat,
 303                         const Nbnxm::GridSet &gridSet,
 304                         const t_mdatoms      *mdatoms,
 305                         const int            *atinfo);
 306
 307 /* Copy the shift vectors to nbat */
 308 void nbnxn_atomdata_copy_shiftvec(gmx_bool          dynamic_box,
 309                                   rvec             *shift_vec,
 310                                   nbnxn_atomdata_t *nbat);
 311
 312 /* Copy x to nbat->x.
 313  * FillLocal tells if the local filler particle coordinates should be zeroed.
 314  */
 315 template <bool useGpu>
 316 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet       &gridSet,
 317                                      Nbnxm::AtomLocality         locality,
 318                                      gmx_bool                    FillLocal,
 319                                      const rvec                 *x,
 320                                      nbnxn_atomdata_t           *nbat,
 321                                      gmx_nbnxn_gpu_t            *gpu_nbv,
 322                                      void                       *xPmeDevicePtr);
 323
 324 extern template
 325 void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
 326                                            const Nbnxm::AtomLocality,
 327                                            gmx_bool,
 328                                            const rvec*,
 329                                            nbnxn_atomdata_t *,
 330                                            gmx_nbnxn_gpu_t*,
 331                                            void *);
 332 extern template
 333 void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
 334                                             const Nbnxm::AtomLocality,
 335                                             gmx_bool,
 336                                             const rvec*,
 337                                             nbnxn_atomdata_t *,
 338                                             gmx_nbnxn_gpu_t*,
 339                                             void *);
 340
 341 //! Add the computed forces to \p f, an internal reduction might be performed as well
 342 template <bool  useGpu>
 343 void reduceForces(nbnxn_atomdata_t                   *nbat,
 344                   Nbnxm::AtomLocality                 locality,
 345                   const Nbnxm::GridSet               &gridSet,
 346                   rvec                               *f,
 347                   void                               *pmeFDeviceBuffer,
 348                   GpuEventSynchronizer               *pmeForcesReady,
 349                   gmx_nbnxn_gpu_t                    *gpu_nbv,
 350                   bool                                useGpuFPmeReduction,
 351                   bool                                accumulateForce);
 352
 353
 354 extern template
 355 void reduceForces<true>(nbnxn_atomdata_t             *nbat,
 356                         const Nbnxm::AtomLocality     locality,
 357                         const Nbnxm::GridSet         &gridSet,
 358                         rvec                         *f,
 359                         void                         *pmeFDeviceBuffer,
 360                         GpuEventSynchronizer         *pmeForcesReady,
 361                         gmx_nbnxn_gpu_t              *gpu_nbv,
 362                         bool                          useGpuFPmeReduction,
 363                         bool                          accumulateForce);
 364
 365 extern template
 366 void reduceForces<false>(nbnxn_atomdata_t             *nbat,
 367                          const Nbnxm::AtomLocality     locality,
 368                          const Nbnxm::GridSet         &gridSet,
 369                          rvec                         *f,
 370                          void                         *pmeFDeviceBuffer,
 371                          GpuEventSynchronizer         *pmeForcesReady,
 372                          gmx_nbnxn_gpu_t              *gpu_nbv,
 373                          bool                          useGpuFPmeReduction,
 374                          bool                          accumulateForce);
 375
 376 /* Add the fshift force stored in nbat to fshift */
 377 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
 378                                               rvec                   *fshift);
 379
 380 /* Get the atom start index and number of atoms for a given locality */
 381 void nbnxn_get_atom_range(Nbnxm::AtomLocality              atomLocality,
 382                           const Nbnxm::GridSet            &gridSet,
 383                           int                             *atomStart,
 384                           int                             *nAtoms);
 385 #endif