src/gromacs/nbnxm/atomdata.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \libinternal \file
  37  *  \brief
  38  *  Functionality for per-atom data in the nbnxm module
  39  *
  40  *  \author Berk Hess <hess@kth.se>
  41  *  \ingroup module_nbnxm
  42  *  \inlibraryapi
  43  */
  44
  45
  46 #ifndef GMX_NBNXN_ATOMDATA_H
  47 #define GMX_NBNXN_ATOMDATA_H
  48
  49 #include <cstdio>
  50
  51 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  52 #include "gromacs/gpu_utils/hostallocator.h"
  53 #include "gromacs/math/vectypes.h"
  54 #include "gromacs/mdtypes/locality.h"
  55 #include "gromacs/utility/basedefinitions.h"
  56 #include "gromacs/utility/bitmask.h"
  57 #include "gromacs/utility/real.h"
  58
  59 namespace gmx
  60 {
  61 class MDLogger;
  62 }
  63
  64 struct NbnxmGpu;
  65 struct nbnxn_atomdata_t;
  66 struct nonbonded_verlet_t;
  67 struct t_mdatoms;
  68 struct tMPI_Atomic;
  69
  70 class GpuEventSynchronizer;
  71
  72 namespace Nbnxm
  73 {
  74 class GridSet;
  75 enum class KernelType;
  76 } // namespace Nbnxm
  77
  78 //! Convenience type for vector with aligned memory
  79 template<typename T>
  80 using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
  81
  82 enum
  83 {
  84     nbatXYZ,
  85     nbatXYZQ,
  86     nbatX4,
  87     nbatX8
  88 };
  89
  90 //! Stride for coordinate/force arrays with xyz coordinate storage
  91 static constexpr int STRIDE_XYZ = 3;
  92 //! Stride for coordinate/force arrays with xyzq coordinate storage
  93 static constexpr int STRIDE_XYZQ = 4;
  94 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
  95 static constexpr int c_packX4 = 4;
  96 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
  97 static constexpr int c_packX8 = 8;
  98 //! Stridefor a pack of 4 coordinates/forces
  99 static constexpr int STRIDE_P4 = DIM * c_packX4;
 100 //! Stridefor a pack of 8 coordinates/forces
 101 static constexpr int STRIDE_P8 = DIM * c_packX8;
 102
 103 //! Returns the index in a coordinate array corresponding to atom a
 104 template<int packSize>
 105 static inline int atom_to_x_index(int a)
 106 {
 107     return DIM * (a & ~(packSize - 1)) + (a & (packSize - 1));
 108 }
 109
 110 /*! \internal
 111  * \brief Struct that holds force and energy output buffers */
 112 struct nbnxn_atomdata_output_t
 113 {
 114     /*! \brief Constructor
 115      *
 116      * \param[in] kernelType              Type of non-bonded kernel
 117      * \param[in] numEnergyGroups         The number of energy groups
 118      * \param[in] simdEnergyBufferStride  Stride for entries in the energy buffers for SIMD kernels
 119      * \param[in] pinningPolicy           Sets the pinning policy for all buffers used on the GPU
 120      */
 121     nbnxn_atomdata_output_t(Nbnxm::KernelType  kernelType,
 122                             int                numEnergyGroups,
 123                             int                simdEnergyBufferStride,
 124                             gmx::PinningPolicy pinningPolicy);
 125
 126     //! f, size natoms*fstride
 127     gmx::HostVector<real> f;
 128     //! Shift force array, size SHIFTS*DIM
 129     gmx::HostVector<real> fshift;
 130     //! Temporary Van der Waals group energy storage
 131     gmx::HostVector<real> Vvdw;
 132     //! Temporary Coulomb group energy storage
 133     gmx::HostVector<real> Vc;
 134     //! Temporary SIMD Van der Waals group energy storage
 135     AlignedVector<real> VSvdw;
 136     //! Temporary SIMD Coulomb group energy storage
 137     AlignedVector<real> VSc;
 138 };
 139
 140 /*! \brief Block size in atoms for the non-bonded thread force-buffer reduction.
 141  *
 142  * Should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
 143  * Should be small to reduce the reduction and zeroing cost,
 144  * but too small will result in overhead.
 145  * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
 146  */
 147 #if GMX_DOUBLE
 148 #    define NBNXN_BUFFERFLAG_SIZE 8
 149 #else
 150 #    define NBNXN_BUFFERFLAG_SIZE 16
 151 #endif
 152
 153 /*! \brief We store the reduction flags as gmx_bitmask_t.
 154  * This limits the number of flags to BITMASK_SIZE.
 155  */
 156 #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
 157
 158 /*! \internal
 159  * \brief Flags for telling if threads write to force output buffers */
 160 typedef struct
 161 {
 162     //! The number of flag blocks
 163     int nflag;
 164     //! Bit i is set when thread i writes to a cell-block
 165     gmx_bitmask_t* flag;
 166     //! Allocation size of cxy_flag
 167     int flag_nalloc;
 168 } nbnxn_buffer_flags_t;
 169
 170 /*! \brief LJ combination rules: geometric, Lorentz-Berthelot, none */
 171 enum
 172 {
 173     ljcrGEOM,
 174     ljcrLB,
 175     ljcrNONE,
 176     ljcrNR
 177 };
 178
 179 /*! \internal
 180  * \brief Struct that stores atom related data for the nbnxn module
 181  *
 182  * Note: performance would improve slightly when all std::vector containers
 183  *       in this struct would not initialize during resize().
 184  */
 185 struct nbnxn_atomdata_t
 186 { //NOLINT(clang-analyzer-optin.performance.Padding)
 187     /*! \internal
 188      * \brief The actual atom data parameter values */
 189     struct Params
 190     {
 191         /*! \brief Constructor
 192          *
 193          * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 194          */
 195         Params(gmx::PinningPolicy pinningPolicy);
 196
 197         //! The number of different atom types
 198         int numTypes;
 199         //! Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
 200         gmx::HostVector<real> nbfp;
 201         //! Combination rule, see enum defined above
 202         int comb_rule;
 203         //! LJ parameters per atom type, size numTypes*2
 204         gmx::HostVector<real> nbfp_comb;
 205         //! As nbfp, but with a stride for the present SIMD architecture
 206         AlignedVector<real> nbfp_aligned;
 207         //! Atom types per atom
 208         gmx::HostVector<int> type;
 209         //! LJ parameters per atom for fast SIMD loading
 210         gmx::HostVector<real> lj_comb;
 211         //! Charges per atom, not set with format nbatXYZQ
 212         gmx::HostVector<real> q;
 213         //! The number of energy groups
 214         int nenergrp;
 215         //! 2log(nenergrp)
 216         int neg_2log;
 217         //! The energy groups, one int entry per cluster, only set when needed
 218         gmx::HostVector<int> energrp;
 219     };
 220
 221     /*! \internal
 222      * \brief Diagonal and topology exclusion helper data for all SIMD kernels. */
 223     struct SimdMasks
 224     {
 225         SimdMasks();
 226
 227         //! Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
 228         AlignedVector<real> diagonal_4xn_j_minus_i;
 229         //! Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
 230         AlignedVector<real> diagonal_2xnn_j_minus_i;
 231         //! Filters for topology exclusion masks for the SIMD kernels
 232         AlignedVector<uint32_t> exclusion_filter;
 233         //! Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
 234         AlignedVector<uint64_t> exclusion_filter64;
 235         //! Array of masks needed for exclusions
 236         AlignedVector<real> interaction_array;
 237     };
 238
 239     /*! \brief Constructor
 240      *
 241      * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
 242      */
 243     nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
 244
 245     //! Returns a const reference to the parameters
 246     const Params& params() const { return params_; }
 247
 248     //! Returns a non-const reference to the parameters
 249     Params& paramsDeprecated() { return params_; }
 250
 251     //! Returns the current total number of atoms stored
 252     int numAtoms() const { return numAtoms_; }
 253
 254     //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
 255     gmx::ArrayRef<const real> x() const { return x_; }
 256
 257     //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
 258     gmx::ArrayRef<real> x() { return x_; }
 259
 260     //! Resizes the coordinate buffer and sets the number of atoms
 261     void resizeCoordinateBuffer(int numAtoms);
 262
 263     //! Resizes the force buffers for the current number of atoms
 264     void resizeForceBuffers();
 265
 266 private:
 267     //! The LJ and charge parameters
 268     Params params_;
 269     //! The total number of atoms currently stored
 270     int numAtoms_;
 271
 272 public:
 273     //! Number of local atoms
 274     int natoms_local;
 275     //! The format of x (and q), enum
 276     int XFormat;
 277     //! The format of f, enum
 278     int FFormat;
 279     //! Do we need to update shift_vec every step?
 280     gmx_bool bDynamicBox;
 281     //! Shift vectors, copied from t_forcerec
 282     gmx::HostVector<gmx::RVec> shift_vec;
 283     //! stride for a coordinate in x (usually 3 or 4)
 284     int xstride;
 285     //! stride for a coordinate in f (usually 3 or 4)
 286     int fstride;
 287
 288 private:
 289     //! x and possibly q, size natoms*xstride
 290     gmx::HostVector<real> x_;
 291
 292 public:
 293     //! Masks for handling exclusions in the SIMD kernels
 294     const SimdMasks simdMasks;
 295
 296     //! Output data structures, 1 per thread
 297     std::vector<nbnxn_atomdata_output_t> out;
 298
 299     //! Reduction related data
 300     //! \{
 301     //! Use the flags or operate on all atoms
 302     gmx_bool bUseBufferFlags;
 303     //! Flags for buffer zeroing+reduc.
 304     nbnxn_buffer_flags_t buffer_flags;
 305     //! Use tree for force reduction
 306     gmx_bool bUseTreeReduce;
 307     //! Synchronization step for tree reduce
 308     tMPI_Atomic* syncStep;
 309     //! \}
 310 };
 311
 312 /*! \brief Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
 313  * and fills up to na_round with coordinates that are far away.
 314  */
 315 void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
 316
 317 //! Describes the combination rule in use by this force field
 318 enum
 319 {
 320     enbnxninitcombruleDETECT,
 321     enbnxninitcombruleGEOM,
 322     enbnxninitcombruleLB,
 323     enbnxninitcombruleNONE
 324 };
 325
 326 /*! \brief Initialize the non-bonded atom data structure.
 327  *
 328  * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
 329  * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
 330  * to the atom data structure.
 331  * enbnxninitcombrule sets what combination rule data gets stored in nbat.
 332  */
 333 void nbnxn_atomdata_init(const gmx::MDLogger&      mdlog,
 334                          nbnxn_atomdata_t*         nbat,
 335                          Nbnxm::KernelType         kernelType,
 336                          int                       enbnxninitcombrule,
 337                          int                       ntype,
 338                          gmx::ArrayRef<const real> nbfp,
 339                          int                       n_energygroups,
 340                          int                       nout);
 341
 342 //! Sets the atomdata after pair search
 343 void nbnxn_atomdata_set(nbnxn_atomdata_t*     nbat,
 344                         const Nbnxm::GridSet& gridSet,
 345                         const t_mdatoms*      mdatoms,
 346                         const int*            atinfo);
 347
 348 //! Copy the shift vectors to nbat
 349 void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, rvec* shift_vec, nbnxn_atomdata_t* nbat);
 350
 351 /*! \brief Transform coordinates to xbat layout
 352  *
 353  * Creates a copy of the coordinates buffer using short-range ordering.
 354  *
 355  * \param[in] gridSet      The grids data.
 356  * \param[in] locality     If the transformation should be applied to local or non local coordinates.
 357  * \param[in] fillLocal    Tells if the local filler particle coordinates should be zeroed.
 358  * \param[in] coordinates  Coordinates in plain rvec format.
 359  * \param[in,out] nbat     Data in NBNXM format, used for mapping formats and to locate the output buffer.
 360  */
 361 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
 362                                      gmx::AtomLocality     locality,
 363                                      bool                  fillLocal,
 364                                      const rvec*           coordinates,
 365                                      nbnxn_atomdata_t*     nbat);
 366
 367 /*! \brief Transform coordinates to xbat layout on GPU
 368  *
 369  * Creates a GPU copy of the coordinates buffer using short-range ordering.
 370  * As input, uses coordinates in plain rvec format in GPU memory.
 371  *
 372  * \param[in]     gridSet    The grids data.
 373  * \param[in]     locality   If the transformation should be applied to local or non local coordinates.
 374  * \param[in]     fillLocal  Tells if the local filler particle coordinates should be zeroed.
 375  * \param[in,out] gpu_nbv    The NBNXM GPU data structure.
 376  * \param[in]     d_x        Coordinates to be copied (in plain rvec format).
 377  * \param[in]     xReadyOnDevice   Event synchronizer indicating that the coordinates are ready in the device memory.
 378  */
 379 void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet&   gridSet,
 380                                     gmx::AtomLocality       locality,
 381                                     bool                    fillLocal,
 382                                     NbnxmGpu*               gpu_nbv,
 383                                     DeviceBuffer<gmx::RVec> d_x,
 384                                     GpuEventSynchronizer*   xReadyOnDevice);
 385
 386 /*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
 387  *
 388  * \param[in]  nbat        Atom data in NBNXM format.
 389  * \param[in]  locality    If the reduction should be performed on local or non-local atoms.
 390  * \param[in]  gridSet     The grids data.
 391  * \param[out] totalForce  Buffer to accumulate resulting force
 392  */
 393 void reduceForces(nbnxn_atomdata_t* nbat, gmx::AtomLocality locality, const Nbnxm::GridSet& gridSet, rvec* totalForce);
 394
 395 /*! \brief Reduce forces on the GPU
 396  *
 397  * \param[in]  locality             If the reduction should be performed on local or non-local atoms.
 398  * \param[out] totalForcesDevice    Device buffer to accumulate resulting force.
 399  * \param[in]  gridSet              The grids data.
 400  * \param[in]  pmeForcesDevice      Device buffer with PME forces.
 401  * \param[in]  dependencyList       List of synchronizers that represent the dependencies the reduction task needs to sync on.
 402  * \param[in]  gpu_nbv              The NBNXM GPU data structure.
 403  * \param[in]  useGpuFPmeReduction  Whether PME forces should be added.
 404  * \param[in]  accumulateForce      Whether there are usefull data already in the total force buffer.
 405  */
 406 void reduceForcesGpu(gmx::AtomLocality                          locality,
 407                      DeviceBuffer<gmx::RVec>                    totalForcesDevice,
 408                      const Nbnxm::GridSet&                      gridSet,
 409                      void*                                      pmeForcesDevice,
 410                      gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
 411                      NbnxmGpu*                                  gpu_nbv,
 412                      bool                                       useGpuFPmeReduction,
 413                      bool                                       accumulateForce);
 414
 415 //! Add the fshift force stored in nbat to fshift
 416 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
 417
 418 //! Get the atom start index and number of atoms for a given locality
 419 void nbnxn_get_atom_range(gmx::AtomLocality     atomLocality,
 420                           const Nbnxm::GridSet& gridSet,
 421                           int*                  atomStart,
 422                           int*                  nAtoms);
 423 #endif