2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_NBNXN_ATOMDATA_H
37 #define GMX_NBNXN_ATOMDATA_H
41 #include "gromacs/gpu_utils/hostallocator.h"
42 #include "gromacs/math/vectypes.h"
43 #include "gromacs/utility/basedefinitions.h"
44 #include "gromacs/utility/bitmask.h"
45 #include "gromacs/utility/real.h"
47 #include "gpu_types.h"
55 struct nbnxn_atomdata_t;
56 struct nonbonded_verlet_t;
60 enum class BufferOpsUseGpu;
65 enum class KernelType;
68 enum class GpuBufferOpsAccumulateForce;
70 /* Convenience type for vector with aligned memory */
72 using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
75 nbatXYZ, nbatXYZQ, nbatX4, nbatX8
78 //! Stride for coordinate/force arrays with xyz coordinate storage
79 static constexpr int STRIDE_XYZ = 3;
80 //! Stride for coordinate/force arrays with xyzq coordinate storage
81 static constexpr int STRIDE_XYZQ = 4;
82 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
83 static constexpr int c_packX4 = 4;
84 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
85 static constexpr int c_packX8 = 8;
86 //! Stridefor a pack of 4 coordinates/forces
87 static constexpr int STRIDE_P4 = DIM*c_packX4;
88 //! Stridefor a pack of 8 coordinates/forces
89 static constexpr int STRIDE_P8 = DIM*c_packX8;
91 //! Returns the index in a coordinate array corresponding to atom a
92 template<int packSize> static inline int atom_to_x_index(int a)
94 return DIM*(a & ~(packSize - 1)) + (a & (packSize - 1));
97 // Struct that holds force and energy output buffers
98 struct nbnxn_atomdata_output_t
102 * \param[in] kernelType Type of non-bonded kernel
103 * \param[in] numEnergyGroups The number of energy groups
104 * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
105 * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
107 nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
109 int simdEnergyBUfferStride,
110 gmx::PinningPolicy pinningPolicy);
112 gmx::HostVector<real> f; // f, size natoms*fstride
113 gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
114 gmx::HostVector<real> Vvdw; // Temporary Van der Waals group energy storage
115 gmx::HostVector<real> Vc; // Temporary Coulomb group energy storage
116 AlignedVector<real> VSvdw; // Temporary SIMD Van der Waals group energy storage
117 AlignedVector<real> VSc; // Temporary SIMD Coulomb group energy storage
120 /* Block size in atoms for the non-bonded thread force-buffer reduction,
121 * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
122 * Should be small to reduce the reduction and zeroing cost,
123 * but too small will result in overhead.
124 * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
127 #define NBNXN_BUFFERFLAG_SIZE 8
129 #define NBNXN_BUFFERFLAG_SIZE 16
132 /* We store the reduction flags as gmx_bitmask_t.
133 * This limits the number of flags to BITMASK_SIZE.
135 #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
137 /* Flags for telling if threads write to force output buffers */
139 int nflag; /* The number of flag blocks */
140 gmx_bitmask_t *flag; /* Bit i is set when thread i writes to a cell-block */
141 int flag_nalloc; /* Allocation size of cxy_flag */
142 } nbnxn_buffer_flags_t;
144 /* LJ combination rules: geometric, Lorentz-Berthelot, none */
146 ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
149 /* Struct that stores atom related data for the nbnxn module
151 * Note: performance would improve slightly when all std::vector containers
152 * in this struct would not initialize during resize().
154 struct nbnxn_atomdata_t
155 { //NOLINT(clang-analyzer-optin.performance.Padding)
160 * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
162 Params(gmx::PinningPolicy pinningPolicy);
164 // The number of different atom types
166 // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
167 gmx::HostVector<real> nbfp;
168 // Combination rule, see enum defined above
170 // LJ parameters per atom type, size numTypes*2
171 gmx::HostVector<real> nbfp_comb;
172 // As nbfp, but with a stride for the present SIMD architecture
173 AlignedVector<real> nbfp_aligned;
174 // Atom types per atom
175 gmx::HostVector<int> type;
176 // LJ parameters per atom for fast SIMD loading
177 gmx::HostVector<real> lj_comb;
178 // Charges per atom, not set with format nbatXYZQ
179 gmx::HostVector<real> q;
180 // The number of energy groups
184 // The energy groups, one int entry per cluster, only set when needed
185 gmx::HostVector<int> energrp;
188 // Diagonal and topology exclusion helper data for all SIMD kernels
193 // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
194 AlignedVector<real> diagonal_4xn_j_minus_i;
195 // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
196 AlignedVector<real> diagonal_2xnn_j_minus_i;
197 // Filters for topology exclusion masks for the SIMD kernels
198 AlignedVector<uint32_t> exclusion_filter;
199 // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
200 AlignedVector<uint64_t> exclusion_filter64;
201 // Array of masks needed for exclusions
202 AlignedVector<real> interaction_array;
207 * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
209 nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
211 /* Returns a const reference to the parameters */
212 const Params ¶ms() const
217 /* Returns a non-const reference to the parameters */
218 Params ¶msDeprecated()
223 /* Returns the current total number of atoms stored */
229 /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
230 gmx::ArrayRef<const real> x() const
235 /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
236 gmx::ArrayRef<real> x()
241 /* Resizes the coordinate buffer and sets the number of atoms */
242 void resizeCoordinateBuffer(int numAtoms);
244 /* Resizes the force buffers for the current number of atoms */
245 void resizeForceBuffers();
248 // The LJ and charge parameters
250 // The total number of atoms currently stored
253 int natoms_local; /* Number of local atoms */
254 int XFormat; /* The format of x (and q), enum */
255 int FFormat; /* The format of f, enum */
256 gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
257 gmx::HostVector<gmx::RVec> shift_vec; /* Shift vectors, copied from t_forcerec */
258 int xstride; /* stride for a coordinate in x (usually 3 or 4) */
259 int fstride; /* stride for a coordinate in f (usually 3 or 4) */
261 gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride */
264 // Masks for handling exclusions in the SIMD kernels
265 const SimdMasks simdMasks;
268 std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
270 /* Reduction related data */
271 gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
272 nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
273 gmx_bool bUseTreeReduce; /* Use tree for force reduction */
274 tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */
277 /* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
278 * and fills up to na_round with coordinates that are far away.
280 void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
281 const rvec *x, int nbatFormat,
285 enbnxninitcombruleDETECT, enbnxninitcombruleGEOM, enbnxninitcombruleLB, enbnxninitcombruleNONE
288 /* Initialize the non-bonded atom data structure.
289 * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
290 * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
291 * to the atom data structure.
292 * enbnxninitcombrule sets what combination rule data gets stored in nbat.
294 void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
295 nbnxn_atomdata_t *nbat,
296 Nbnxm::KernelType kernelType,
297 int enbnxninitcombrule,
298 int ntype, const real *nbfp,
302 void nbnxn_atomdata_set(nbnxn_atomdata_t *nbat,
303 const Nbnxm::GridSet &gridSet,
304 const t_mdatoms *mdatoms,
307 /* Copy the shift vectors to nbat */
308 void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box,
310 nbnxn_atomdata_t *nbat);
312 /* Copy x to nbat->x.
313 * FillLocal tells if the local filler particle coordinates should be zeroed.
315 template <bool useGpu>
316 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
317 Nbnxm::AtomLocality locality,
320 nbnxn_atomdata_t *nbat,
321 gmx_nbnxn_gpu_t *gpu_nbv,
322 void *xPmeDevicePtr);
325 void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
326 const Nbnxm::AtomLocality,
333 void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
334 const Nbnxm::AtomLocality,
341 //! Add the computed forces to \p f, an internal reduction might be performed as well
342 template <bool useGpu>
343 void reduceForces(nbnxn_atomdata_t *nbat,
344 Nbnxm::AtomLocality locality,
345 const Nbnxm::GridSet &gridSet,
347 gmx_nbnxn_gpu_t *gpu_nbv,
348 GpuBufferOpsAccumulateForce accumulateForce);
352 void reduceForces<true>(nbnxn_atomdata_t *nbat,
353 const Nbnxm::AtomLocality locality,
354 const Nbnxm::GridSet &gridSet,
356 gmx_nbnxn_gpu_t *gpu_nbv,
357 GpuBufferOpsAccumulateForce accumulateForce);
360 void reduceForces<false>(nbnxn_atomdata_t *nbat,
361 const Nbnxm::AtomLocality locality,
362 const Nbnxm::GridSet &gridSet,
364 gmx_nbnxn_gpu_t *gpu_nbv,
365 GpuBufferOpsAccumulateForce accumulateForce);
367 /* Add the fshift force stored in nbat to fshift */
368 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
371 /* Get the atom start index and number of atoms for a given locality */
372 void nbnxn_get_atom_range(Nbnxm::AtomLocality atomLocality,
373 const Nbnxm::GridSet &gridSet,