src/gromacs/nbnxm/atomdata.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_NBNXN_ATOMDATA_H
  37 #define GMX_NBNXN_ATOMDATA_H
  38
  39 #include <cstdio>
  40
  41 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  42 #include "gromacs/gpu_utils/hostallocator.h"
  43 #include "gromacs/math/vectypes.h"
  44 #include "gromacs/utility/basedefinitions.h"
  45 #include "gromacs/utility/bitmask.h"
  46 #include "gromacs/utility/real.h"
  47
  48 #include "gpu_types.h"
  49 #include "locality.h"
  50
  51 namespace gmx
  52 {
  53 class MDLogger;
  54 }
  55
  56 struct nbnxn_atomdata_t;
  57 struct nonbonded_verlet_t;
  58 struct t_mdatoms;
  59 struct tMPI_Atomic;
  60
  61 enum class BufferOpsUseGpu;
  62
  63 class GpuEventSynchronizer;
  64
  65 namespace Nbnxm
  66 {
  67 class GridSet;
  68 enum class KernelType;
  69 }
  70
  71 /* Convenience type for vector with aligned memory */
  72 template<typename T>
  73 using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
  74
  75 enum {
  76     nbatXYZ, nbatXYZQ, nbatX4, nbatX8
  77 };
  78
  79 //! Stride for coordinate/force arrays with xyz coordinate storage
  80 static constexpr int STRIDE_XYZ  = 3;
  81 //! Stride for coordinate/force arrays with xyzq coordinate storage
  82 static constexpr int STRIDE_XYZQ = 4;
  83 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
  84 static constexpr int c_packX4    = 4;
  85 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
  86 static constexpr int c_packX8    = 8;
  87 //! Stridefor a pack of 4 coordinates/forces
  88 static constexpr int STRIDE_P4   = DIM*c_packX4;
  89 //! Stridefor a pack of 8 coordinates/forces
  90 static constexpr int STRIDE_P8   = DIM*c_packX8;
  91
  92 //! Returns the index in a coordinate array corresponding to atom a
  93 template<int packSize> static inline int atom_to_x_index(int a)
  94 {
  95     return DIM*(a & ~(packSize - 1)) + (a & (packSize - 1));
  96 }
  97
  98 // Struct that holds force and energy output buffers
  99 struct nbnxn_atomdata_output_t
 100 {
 101     /* Constructor
 102      *
 103      * \param[in] kernelType              Type of non-bonded kernel
 104      * \param[in] numEnergyGroups         The number of energy groups
 105      * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
 106      * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
 107      */
 108     nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
 109                             int                numEnergyGroups,
 110                             int                simdEnergyBUfferStride,
 111                             gmx::PinningPolicy pinningPolicy);
 112
 113     gmx::HostVector<real> f;      // f, size natoms*fstride
 114     gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
 115     gmx::HostVector<real> Vvdw;   // Temporary Van der Waals group energy storage
 116     gmx::HostVector<real> Vc;     // Temporary Coulomb group energy storage
 117     AlignedVector<real>   VSvdw;  // Temporary SIMD Van der Waals group energy storage
 118     AlignedVector<real>   VSc;    // Temporary SIMD Coulomb group energy storage
 119 };
 120
 121 /* Block size in atoms for the non-bonded thread force-buffer reduction,
 122  * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
 123  * Should be small to reduce the reduction and zeroing cost,
 124  * but too small will result in overhead.
 125  * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
 126  */
 127 #if GMX_DOUBLE
 128 #define NBNXN_BUFFERFLAG_SIZE   8
 129 #else
 130 #define NBNXN_BUFFERFLAG_SIZE  16
 131 #endif
 132
 133 /* We store the reduction flags as gmx_bitmask_t.
 134  * This limits the number of flags to BITMASK_SIZE.
 135  */
 136 #define NBNXN_BUFFERFLAG_MAX_THREADS  (BITMASK_SIZE)
 137
 138 /* Flags for telling if threads write to force output buffers */
 139 typedef struct {
 140     int               nflag;       /* The number of flag blocks                         */
 141     gmx_bitmask_t    *flag;        /* Bit i is set when thread i writes to a cell-block */
 142     int               flag_nalloc; /* Allocation size of cxy_flag                       */
 143 } nbnxn_buffer_flags_t;
 144
 145 /* LJ combination rules: geometric, Lorentz-Berthelot, none */
 146 enum {
 147     ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
 148 };
 149
 150 /* Struct that stores atom related data for the nbnxn module
 151  *
 152  * Note: performance would improve slightly when all std::vector containers
 153  *       in this struct would not initialize during resize().
 154  */
 155 struct nbnxn_atomdata_t
 156 {   //NOLINT(clang-analyzer-optin.performance.Padding)
 157     struct Params
 158     {
 159         /* Constructor
 160          *
 161          * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 162          */
 163         Params(gmx::PinningPolicy pinningPolicy);
 164
 165         // The number of different atom types
 166         int                   numTypes;
 167         // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
 168         gmx::HostVector<real> nbfp;
 169         // Combination rule, see enum defined above
 170         int                   comb_rule;
 171         // LJ parameters per atom type, size numTypes*2
 172         gmx::HostVector<real> nbfp_comb;
 173         // As nbfp, but with a stride for the present SIMD architecture
 174         AlignedVector<real>   nbfp_aligned;
 175         // Atom types per atom
 176         gmx::HostVector<int>  type;
 177         // LJ parameters per atom for fast SIMD loading
 178         gmx::HostVector<real> lj_comb;
 179         // Charges per atom, not set with format nbatXYZQ
 180         gmx::HostVector<real> q;
 181         // The number of energy groups
 182         int                   nenergrp;
 183         // 2log(nenergrp)
 184         int                   neg_2log;
 185         // The energy groups, one int entry per cluster, only set when needed
 186         gmx::HostVector<int>  energrp;
 187     };
 188
 189     // Diagonal and topology exclusion helper data for all SIMD kernels
 190     struct SimdMasks
 191     {
 192         SimdMasks();
 193
 194         // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
 195         AlignedVector<real>     diagonal_4xn_j_minus_i;
 196         // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
 197         AlignedVector<real>     diagonal_2xnn_j_minus_i;
 198         // Filters for topology exclusion masks for the SIMD kernels
 199         AlignedVector<uint32_t> exclusion_filter;
 200         // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
 201         AlignedVector<uint64_t> exclusion_filter64;
 202         // Array of masks needed for exclusions
 203         AlignedVector<real>     interaction_array;
 204     };
 205
 206     /* Constructor
 207      *
 208      * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 209      */
 210     nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
 211
 212     /* Returns a const reference to the parameters */
 213     const Params &params() const
 214     {
 215         return params_;
 216     }
 217
 218     /* Returns a non-const reference to the parameters */
 219     Params &paramsDeprecated()
 220     {
 221         return params_;
 222     }
 223
 224     /* Returns the current total number of atoms stored */
 225     int numAtoms() const
 226     {
 227         return numAtoms_;
 228     }
 229
 230     /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
 231     gmx::ArrayRef<const real> x() const
 232     {
 233         return x_;
 234     }
 235
 236     /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
 237     gmx::ArrayRef<real> x()
 238     {
 239         return x_;
 240     }
 241
 242     /* Resizes the coordinate buffer and sets the number of atoms */
 243     void resizeCoordinateBuffer(int numAtoms);
 244
 245     /* Resizes the force buffers for the current number of atoms */
 246     void resizeForceBuffers();
 247
 248     private:
 249         // The LJ and charge parameters
 250         Params                     params_;
 251         // The total number of atoms currently stored
 252         int                        numAtoms_;
 253     public:
 254         int                        natoms_local; /* Number of local atoms                           */
 255         int                        XFormat;      /* The format of x (and q), enum                      */
 256         int                        FFormat;      /* The format of f, enum                              */
 257         gmx_bool                   bDynamicBox;  /* Do we need to update shift_vec every step?    */
 258         gmx::HostVector<gmx::RVec> shift_vec;    /* Shift vectors, copied from t_forcerec              */
 259         int                        xstride;      /* stride for a coordinate in x (usually 3 or 4)      */
 260         int                        fstride;      /* stride for a coordinate in f (usually 3 or 4)      */
 261     private:
 262         gmx::HostVector<real>      x_;           /* x and possibly q, size natoms*xstride              */
 263
 264     public:
 265         // Masks for handling exclusions in the SIMD kernels
 266         const SimdMasks          simdMasks;
 267
 268         /* Output data */
 269         std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
 270
 271         /* Reduction related data */
 272         gmx_bool                 bUseBufferFlags;     /* Use the flags or operate on all atoms     */
 273         nbnxn_buffer_flags_t     buffer_flags;        /* Flags for buffer zeroing+reduc.  */
 274         gmx_bool                 bUseTreeReduce;      /* Use tree for force reduction */
 275         tMPI_Atomic             *syncStep;            /* Synchronization step for tree reduce */
 276 };
 277
 278 /* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
 279  * and fills up to na_round with coordinates that are far away.
 280  */
 281 void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
 282                             const rvec *x, int nbatFormat,
 283                             real *xnb, int a0);
 284
 285 enum {
 286     enbnxninitcombruleDETECT, enbnxninitcombruleGEOM, enbnxninitcombruleLB, enbnxninitcombruleNONE
 287 };
 288
 289 /* Initialize the non-bonded atom data structure.
 290  * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
 291  * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
 292  * to the atom data structure.
 293  * enbnxninitcombrule sets what combination rule data gets stored in nbat.
 294  */
 295 void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
 296                          nbnxn_atomdata_t *nbat,
 297                          Nbnxm::KernelType kernelType,
 298                          int enbnxninitcombrule,
 299                          int ntype, const real *nbfp,
 300                          int n_energygroups,
 301                          int nout);
 302
 303 void nbnxn_atomdata_set(nbnxn_atomdata_t     *nbat,
 304                         const Nbnxm::GridSet &gridSet,
 305                         const t_mdatoms      *mdatoms,
 306                         const int            *atinfo);
 307
 308 /* Copy the shift vectors to nbat */
 309 void nbnxn_atomdata_copy_shiftvec(gmx_bool          dynamic_box,
 310                                   rvec             *shift_vec,
 311                                   nbnxn_atomdata_t *nbat);
 312
 313 /*! \brief Transform coordinates to xbat layout
 314  *
 315  * Creates a copy of the coordinates buffer using short-range ordering.
 316  *
 317  * \param[in] gridSet      The grids data.
 318  * \param[in] locality     If the transformation should be applied to local or non local coordinates.
 319  * \param[in] fillLocal    Tells if the local filler particle coordinates should be zeroed.
 320  * \param[in] coordinates  Coordinates in plain rvec format.
 321  * \param[in,out] nbat     Data in NBNXM format, used for mapping formats and to locate the output buffer.
 322  */
 323 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet       &gridSet,
 324                                      Nbnxm::AtomLocality         locality,
 325                                      bool                        fillLocal,
 326                                      const rvec                 *coordinates,
 327                                      nbnxn_atomdata_t           *nbat);
 328
 329 /*! \brief Copies the coordinates to the GPU (in plain rvec format)
 330  *
 331  *  This function copied data to the gpu so that the transformation to the NBNXM format can be done on the GPU.
 332  *
 333  * \param[in] gridSet          The grids data.
 334  * \param[in] locality         If local or non local coordinates should be copied.
 335  * \param[in] fillLocal        If the local filler particle coordinates should be zeroed.
 336  * \param[in] nbat             Data in NBNXM format, used to zero coordinates of filler particles.
 337  * \param[in] gpu_nbv          The NBNXM GPU data structure.
 338  * \param[in] coordinatesHost  Coordinates to be copied (in plain rvec format).
 339  */
 340 void nbnxn_atomdata_copy_x_to_gpu(const Nbnxm::GridSet     &gridSet,
 341                                   Nbnxm::AtomLocality       locality,
 342                                   bool                      fillLocal,
 343                                   nbnxn_atomdata_t         *nbat,
 344                                   gmx_nbnxn_gpu_t          *gpu_nbv,
 345                                   const rvec               *coordinatesHost);
 346
 347 /*!\brief Getter for the GPU coordinates buffer
 348  *
 349  * \param[in] gpu_nbv  The NBNXM GPU data structure.
 350  */
 351 DeviceBuffer<float> nbnxn_atomdata_get_x_gpu(gmx_nbnxn_gpu_t *gpu_nbv);
 352
 353 /*! \brief Transform coordinates to xbat layout on GPU
 354  *
 355  * Creates a GPU copy of the coordinates buffer using short-range ordering.
 356  * As input, uses coordinates in plain rvec format in GPU memory.
 357  *
 358  * \param[in]     gridSet            The grids data.
 359  * \param[in]     locality           If the transformation should be applied to local or non local coordinates.
 360  * \param[in]     fillLocal          Tells if the local filler particle coordinates should be zeroed.
 361  * \param[in,out] gpu_nbv            The NBNXM GPU data structure.
 362  * \param[in]     coordinatesDevice  Coordinates to be copied (in plain rvec format).
 363  */
 364 void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet     &gridSet,
 365                                     Nbnxm::AtomLocality       locality,
 366                                     bool                      fillLocal,
 367                                     gmx_nbnxn_gpu_t          *gpu_nbv,
 368                                     DeviceBuffer<float>       coordinatesDevice);
 369
 370 /*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
 371  *
 372  * \param[in]  nbat        Atom data in NBNXM format.
 373  * \param[in]  locality    If the reduction should be performed on local or non-local atoms.
 374  * \param[in]  gridSet     The grids data.
 375  * \param[out] totalForce  Buffer to accumulate resulting force
 376  */
 377 void reduceForces(nbnxn_atomdata_t                   *nbat,
 378                   Nbnxm::AtomLocality                 locality,
 379                   const Nbnxm::GridSet               &gridSet,
 380                   rvec                               *totalForce);
 381
 382 /*! \brief Reduce forces on the GPU
 383  *
 384  * \param[in]  locality             If the reduction should be performed on local or non-local atoms.
 385  * \param[out] totalForcesDevice    Device buffer to accumulate resulting force.
 386  * \param[in]  gridSet              The grids data.
 387  * \param[in]  pmeForcesDevice      Device buffer with PME forces.
 388  * \param[in]  pmeForcesReady       Event that signals when the PME forces are ready for the reduction.
 389  * \param[in]  gpu_nbv              The NBNXM GPU data structure.
 390  * \param[in]  useGpuFPmeReduction  Whether PME forces should be added.
 391  * \param[in]  accumulateForce      Whether there are usefull data already in the total force buffer.
 392  */
 393 void reduceForcesGpu(Nbnxm::AtomLocality                 locality,
 394                      DeviceBuffer<float>                 totalForcesDevice,
 395                      const Nbnxm::GridSet               &gridSet,
 396                      void                               *pmeForcesDevice,
 397                      GpuEventSynchronizer               *pmeForcesReady,
 398                      gmx_nbnxn_gpu_t                    *gpu_nbv,
 399                      bool                                useGpuFPmeReduction,
 400                      bool                                accumulateForce);
 401
 402 /*!\brief Getter for the GPU forces buffer
 403  *
 404  * \todo Will be removed when the buffer management is lifted out of the NBNXM
 405  *
 406  * \param[in] gpu_nbv  The NBNXM GPU data structure.
 407  *
 408  * \returns Device forces buffer
 409  */
 410 DeviceBuffer<float> nbnxn_atomdata_get_f_gpu(gmx_nbnxn_gpu_t *gpu_nbv);
 411
 412 /* Add the fshift force stored in nbat to fshift */
 413 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t   &nbat,
 414                                               gmx::ArrayRef<gmx::RVec>  fshift);
 415
 416 /* Get the atom start index and number of atoms for a given locality */
 417 void nbnxn_get_atom_range(Nbnxm::AtomLocality              atomLocality,
 418                           const Nbnxm::GridSet            &gridSet,
 419                           int                             *atomStart,
 420                           int                             *nAtoms);
 421 #endif