2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifndef GMX_NBNXN_ATOMDATA_H
38 #define GMX_NBNXN_ATOMDATA_H
42 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
43 #include "gromacs/gpu_utils/hostallocator.h"
44 #include "gromacs/math/vectypes.h"
45 #include "gromacs/mdtypes/locality.h"
46 #include "gromacs/utility/basedefinitions.h"
47 #include "gromacs/utility/bitmask.h"
48 #include "gromacs/utility/real.h"
50 #include "gpu_types.h"
57 struct nbnxn_atomdata_t;
58 struct nonbonded_verlet_t;
62 class GpuEventSynchronizer;
67 enum class KernelType;
70 /* Convenience type for vector with aligned memory */
72 using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
82 //! Stride for coordinate/force arrays with xyz coordinate storage
83 static constexpr int STRIDE_XYZ = 3;
84 //! Stride for coordinate/force arrays with xyzq coordinate storage
85 static constexpr int STRIDE_XYZQ = 4;
86 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
87 static constexpr int c_packX4 = 4;
88 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
89 static constexpr int c_packX8 = 8;
90 //! Stridefor a pack of 4 coordinates/forces
91 static constexpr int STRIDE_P4 = DIM * c_packX4;
92 //! Stridefor a pack of 8 coordinates/forces
93 static constexpr int STRIDE_P8 = DIM * c_packX8;
95 //! Returns the index in a coordinate array corresponding to atom a
96 template<int packSize>
97 static inline int atom_to_x_index(int a)
99 return DIM * (a & ~(packSize - 1)) + (a & (packSize - 1));
102 // Struct that holds force and energy output buffers
103 struct nbnxn_atomdata_output_t
107 * \param[in] kernelType Type of non-bonded kernel
108 * \param[in] numEnergyGroups The number of energy groups
109 * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
110 * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
112 nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
114 int simdEnergyBUfferStride,
115 gmx::PinningPolicy pinningPolicy);
117 gmx::HostVector<real> f; // f, size natoms*fstride
118 gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
119 gmx::HostVector<real> Vvdw; // Temporary Van der Waals group energy storage
120 gmx::HostVector<real> Vc; // Temporary Coulomb group energy storage
121 AlignedVector<real> VSvdw; // Temporary SIMD Van der Waals group energy storage
122 AlignedVector<real> VSc; // Temporary SIMD Coulomb group energy storage
125 /* Block size in atoms for the non-bonded thread force-buffer reduction,
126 * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
127 * Should be small to reduce the reduction and zeroing cost,
128 * but too small will result in overhead.
129 * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
132 # define NBNXN_BUFFERFLAG_SIZE 8
134 # define NBNXN_BUFFERFLAG_SIZE 16
137 /* We store the reduction flags as gmx_bitmask_t.
138 * This limits the number of flags to BITMASK_SIZE.
140 #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
142 /* Flags for telling if threads write to force output buffers */
145 int nflag; /* The number of flag blocks */
146 gmx_bitmask_t* flag; /* Bit i is set when thread i writes to a cell-block */
147 int flag_nalloc; /* Allocation size of cxy_flag */
148 } nbnxn_buffer_flags_t;
150 /* LJ combination rules: geometric, Lorentz-Berthelot, none */
159 /* Struct that stores atom related data for the nbnxn module
161 * Note: performance would improve slightly when all std::vector containers
162 * in this struct would not initialize during resize().
164 struct nbnxn_atomdata_t
165 { //NOLINT(clang-analyzer-optin.performance.Padding)
170 * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
172 Params(gmx::PinningPolicy pinningPolicy);
174 // The number of different atom types
176 // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
177 gmx::HostVector<real> nbfp;
178 // Combination rule, see enum defined above
180 // LJ parameters per atom type, size numTypes*2
181 gmx::HostVector<real> nbfp_comb;
182 // As nbfp, but with a stride for the present SIMD architecture
183 AlignedVector<real> nbfp_aligned;
184 // Atom types per atom
185 gmx::HostVector<int> type;
186 // LJ parameters per atom for fast SIMD loading
187 gmx::HostVector<real> lj_comb;
188 // Charges per atom, not set with format nbatXYZQ
189 gmx::HostVector<real> q;
190 // The number of energy groups
194 // The energy groups, one int entry per cluster, only set when needed
195 gmx::HostVector<int> energrp;
198 // Diagonal and topology exclusion helper data for all SIMD kernels
203 // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
204 AlignedVector<real> diagonal_4xn_j_minus_i;
205 // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
206 AlignedVector<real> diagonal_2xnn_j_minus_i;
207 // Filters for topology exclusion masks for the SIMD kernels
208 AlignedVector<uint32_t> exclusion_filter;
209 // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
210 AlignedVector<uint64_t> exclusion_filter64;
211 // Array of masks needed for exclusions
212 AlignedVector<real> interaction_array;
217 * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
219 nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
221 /* Returns a const reference to the parameters */
222 const Params& params() const { return params_; }
224 /* Returns a non-const reference to the parameters */
225 Params& paramsDeprecated() { return params_; }
227 /* Returns the current total number of atoms stored */
228 int numAtoms() const { return numAtoms_; }
230 /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
231 gmx::ArrayRef<const real> x() const { return x_; }
233 /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
234 gmx::ArrayRef<real> x() { return x_; }
236 /* Resizes the coordinate buffer and sets the number of atoms */
237 void resizeCoordinateBuffer(int numAtoms);
239 /* Resizes the force buffers for the current number of atoms */
240 void resizeForceBuffers();
243 // The LJ and charge parameters
245 // The total number of atoms currently stored
249 int natoms_local; /* Number of local atoms */
250 int XFormat; /* The format of x (and q), enum */
251 int FFormat; /* The format of f, enum */
252 gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
253 gmx::HostVector<gmx::RVec> shift_vec; /* Shift vectors, copied from t_forcerec */
254 int xstride; /* stride for a coordinate in x (usually 3 or 4) */
255 int fstride; /* stride for a coordinate in f (usually 3 or 4) */
257 gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride */
260 // Masks for handling exclusions in the SIMD kernels
261 const SimdMasks simdMasks;
264 std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
266 /* Reduction related data */
267 gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
268 nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
269 gmx_bool bUseTreeReduce; /* Use tree for force reduction */
270 tMPI_Atomic* syncStep; /* Synchronization step for tree reduce */
273 /* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
274 * and fills up to na_round with coordinates that are far away.
276 void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
280 enbnxninitcombruleDETECT,
281 enbnxninitcombruleGEOM,
282 enbnxninitcombruleLB,
283 enbnxninitcombruleNONE
286 /* Initialize the non-bonded atom data structure.
287 * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
288 * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
289 * to the atom data structure.
290 * enbnxninitcombrule sets what combination rule data gets stored in nbat.
292 void nbnxn_atomdata_init(const gmx::MDLogger& mdlog,
293 nbnxn_atomdata_t* nbat,
294 Nbnxm::KernelType kernelType,
295 int enbnxninitcombrule,
297 gmx::ArrayRef<const real> nbfp,
301 void nbnxn_atomdata_set(nbnxn_atomdata_t* nbat,
302 const Nbnxm::GridSet& gridSet,
303 const t_mdatoms* mdatoms,
306 /* Copy the shift vectors to nbat */
307 void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, rvec* shift_vec, nbnxn_atomdata_t* nbat);
309 /*! \brief Transform coordinates to xbat layout
311 * Creates a copy of the coordinates buffer using short-range ordering.
313 * \param[in] gridSet The grids data.
314 * \param[in] locality If the transformation should be applied to local or non local coordinates.
315 * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed.
316 * \param[in] coordinates Coordinates in plain rvec format.
317 * \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer.
319 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
320 gmx::AtomLocality locality,
322 const rvec* coordinates,
323 nbnxn_atomdata_t* nbat);
325 /*! \brief Transform coordinates to xbat layout on GPU
327 * Creates a GPU copy of the coordinates buffer using short-range ordering.
328 * As input, uses coordinates in plain rvec format in GPU memory.
330 * \param[in] gridSet The grids data.
331 * \param[in] locality If the transformation should be applied to local or non local coordinates.
332 * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed.
333 * \param[in,out] gpu_nbv The NBNXM GPU data structure.
334 * \param[in] d_x Coordinates to be copied (in plain rvec format).
335 * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
337 void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
338 gmx::AtomLocality locality,
340 gmx_nbnxn_gpu_t* gpu_nbv,
341 DeviceBuffer<float> d_x,
342 GpuEventSynchronizer* xReadyOnDevice);
344 /*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
346 * \param[in] nbat Atom data in NBNXM format.
347 * \param[in] locality If the reduction should be performed on local or non-local atoms.
348 * \param[in] gridSet The grids data.
349 * \param[out] totalForce Buffer to accumulate resulting force
351 void reduceForces(nbnxn_atomdata_t* nbat, gmx::AtomLocality locality, const Nbnxm::GridSet& gridSet, rvec* totalForce);
353 /*! \brief Reduce forces on the GPU
355 * \param[in] locality If the reduction should be performed on local or non-local atoms.
356 * \param[out] totalForcesDevice Device buffer to accumulate resulting force.
357 * \param[in] gridSet The grids data.
358 * \param[in] pmeForcesDevice Device buffer with PME forces.
359 * \param[in] dependencyList List of synchronizers that represent the dependencies the reduction task needs to sync on.
360 * \param[in] gpu_nbv The NBNXM GPU data structure.
361 * \param[in] useGpuFPmeReduction Whether PME forces should be added.
362 * \param[in] accumulateForce Whether there are usefull data already in the total force buffer.
364 void reduceForcesGpu(gmx::AtomLocality locality,
365 DeviceBuffer<float> totalForcesDevice,
366 const Nbnxm::GridSet& gridSet,
367 void* pmeForcesDevice,
368 gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
369 gmx_nbnxn_gpu_t* gpu_nbv,
370 bool useGpuFPmeReduction,
371 bool accumulateForce);
373 /* Add the fshift force stored in nbat to fshift */
374 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
376 /* Get the atom start index and number of atoms for a given locality */
377 void nbnxn_get_atom_range(gmx::AtomLocality atomLocality,
378 const Nbnxm::GridSet& gridSet,