2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * Functionality for per-atom data in the nbnxm module
40 * \author Berk Hess <hess@kth.se>
41 * \ingroup module_nbnxm
46 #ifndef GMX_NBNXN_ATOMDATA_H
47 #define GMX_NBNXN_ATOMDATA_H
51 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
52 #include "gromacs/gpu_utils/hostallocator.h"
53 #include "gromacs/math/vectypes.h"
54 #include "gromacs/mdtypes/locality.h"
55 #include "gromacs/utility/basedefinitions.h"
56 #include "gromacs/utility/bitmask.h"
57 #include "gromacs/utility/real.h"
65 struct nbnxn_atomdata_t;
66 struct nonbonded_verlet_t;
68 class GpuEventSynchronizer;
73 enum class KernelType;
76 //! Convenience type for vector with aligned memory
78 using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
88 //! Stride for coordinate/force arrays with xyz coordinate storage
89 static constexpr int STRIDE_XYZ = 3;
90 //! Stride for coordinate/force arrays with xyzq coordinate storage
91 static constexpr int STRIDE_XYZQ = 4;
92 //! Size of packs of x, y or z with SIMD 4-grouped packed coordinates/forces
93 static constexpr int c_packX4 = 4;
94 //! Size of packs of x, y or z with SIMD 8-grouped packed coordinates/forces
95 static constexpr int c_packX8 = 8;
96 //! Stridefor a pack of 4 coordinates/forces
97 static constexpr int STRIDE_P4 = DIM * c_packX4;
98 //! Stridefor a pack of 8 coordinates/forces
99 static constexpr int STRIDE_P8 = DIM * c_packX8;
101 //! Returns the index in a coordinate array corresponding to atom a
102 template<int packSize>
103 static inline int atom_to_x_index(int a)
105 return DIM * (a & ~(packSize - 1)) + (a & (packSize - 1));
109 * \brief Struct that holds force and energy output buffers */
110 struct nbnxn_atomdata_output_t
112 /*! \brief Constructor
114 * \param[in] kernelType Type of non-bonded kernel
115 * \param[in] numEnergyGroups The number of energy groups
116 * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
117 * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
119 nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
121 int simdEnergyBufferStride,
122 gmx::PinningPolicy pinningPolicy);
124 //! f, size natoms*fstride
125 gmx::HostVector<real> f;
126 //! Shift force array, size c_numShiftVectors*DIM
127 gmx::HostVector<real> fshift;
128 //! Temporary Van der Waals group energy storage
129 gmx::HostVector<real> Vvdw;
130 //! Temporary Coulomb group energy storage
131 gmx::HostVector<real> Vc;
132 //! Temporary SIMD Van der Waals group energy storage
133 AlignedVector<real> VSvdw;
134 //! Temporary SIMD Coulomb group energy storage
135 AlignedVector<real> VSc;
138 /*! \brief Block size in atoms for the non-bonded thread force-buffer reduction.
140 * Should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
141 * Should be small to reduce the reduction and zeroing cost,
142 * but too small will result in overhead.
143 * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
146 # define NBNXN_BUFFERFLAG_SIZE 8
148 # define NBNXN_BUFFERFLAG_SIZE 16
151 /*! \brief We store the reduction flags as gmx_bitmask_t.
152 * This limits the number of flags to BITMASK_SIZE.
154 #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
157 //! LJ combination rules
158 enum class LJCombinationRule : int
162 //! Lorentz-Berthelot
170 //! String corresponding to LJ combination rule
171 const char* enumValueToString(LJCombinationRule enumValue);
174 * \brief Struct that stores atom related data for the nbnxn module
176 * Note: performance would improve slightly when all std::vector containers
177 * in this struct would not initialize during resize().
179 struct nbnxn_atomdata_t
180 { //NOLINT(clang-analyzer-optin.performance.Padding)
182 * \brief The actual atom data parameter values */
185 /*! \brief Constructor
187 * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
189 Params(gmx::PinningPolicy pinningPolicy);
191 //! The number of different atom types
193 //! Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
194 gmx::HostVector<real> nbfp;
195 //! Combination rule, see enum defined above
196 LJCombinationRule ljCombinationRule;
197 //! LJ parameters per atom type, size numTypes*2
198 gmx::HostVector<real> nbfp_comb;
199 //! As nbfp, but with a stride for the present SIMD architecture
200 AlignedVector<real> nbfp_aligned;
201 //! Atom types per atom
202 gmx::HostVector<int> type;
203 //! LJ parameters per atom for fast SIMD loading
204 gmx::HostVector<real> lj_comb;
205 //! Charges per atom, not set with format nbatXYZQ
206 gmx::HostVector<real> q;
207 //! The number of energy groups
211 //! The energy groups, one int entry per cluster, only set when needed
212 gmx::HostVector<int> energrp;
216 * \brief Diagonal and topology exclusion helper data for all SIMD kernels. */
221 //! Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
222 AlignedVector<real> diagonal_4xn_j_minus_i;
223 //! Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
224 AlignedVector<real> diagonal_2xnn_j_minus_i;
225 //! Filters for topology exclusion masks for the SIMD kernels
226 AlignedVector<uint32_t> exclusion_filter;
227 //! Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
228 AlignedVector<uint64_t> exclusion_filter64;
229 //! Array of masks needed for exclusions
230 AlignedVector<real> interaction_array;
233 /*! \brief Constructor
235 * \param[in] pinningPolicy Sets the pinning policy for all data that might be transferred
237 * \param[in] mdlog The logger
238 * \param[in] kernelType Nonbonded NxN kernel type
239 * \param[in] enbnxninitcombrule LJ combination rule
240 * \param[in] ntype Number of atom types
241 * \param[in] nbfp Non-bonded force parameters
242 * \param[in] n_energygroups Number of energy groups
243 * \param[in] nout Number of output data structures
245 nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy,
246 const gmx::MDLogger& mdlog,
247 Nbnxm::KernelType kernelType,
248 int enbnxninitcombrule,
250 gmx::ArrayRef<const real> nbfp,
254 //! Returns a const reference to the parameters
255 const Params& params() const { return params_; }
257 //! Returns a non-const reference to the parameters
258 Params& paramsDeprecated() { return params_; }
260 //! Returns the current total number of atoms stored
261 int numAtoms() const { return numAtoms_; }
263 //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
264 gmx::ArrayRef<const real> x() const { return x_; }
266 //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
267 gmx::ArrayRef<real> x() { return x_; }
269 //! Resizes the coordinate buffer and sets the number of atoms
270 void resizeCoordinateBuffer(int numAtoms);
272 //! Resizes the force buffers for the current number of atoms
273 void resizeForceBuffers();
276 //! The LJ and charge parameters
278 //! The total number of atoms currently stored
282 //! Number of local atoms
284 //! The format of x (and q), enum
286 //! The format of f, enum
288 //! Do we need to update shift_vec every step?
289 gmx_bool bDynamicBox;
290 //! Shift vectors, copied from t_forcerec
291 gmx::HostVector<gmx::RVec> shift_vec;
292 //! stride for a coordinate in x (usually 3 or 4)
294 //! stride for a coordinate in f (usually 3 or 4)
298 //! x and possibly q, size natoms*xstride
299 gmx::HostVector<real> x_;
302 //! Masks for handling exclusions in the SIMD kernels
303 const SimdMasks simdMasks;
305 //! Output data structures, 1 per thread
306 std::vector<nbnxn_atomdata_output_t> out;
308 //! Reduction related data
310 //! Use the flags or operate on all atoms
311 gmx_bool bUseBufferFlags;
312 //! Flags for buffer zeroing+reduc.
313 std::vector<gmx_bitmask_t> buffer_flags;
317 /*! \brief Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
318 * and fills up to na_round with coordinates that are far away.
320 void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
322 //! Describes the combination rule in use by this force field
325 enbnxninitcombruleDETECT,
326 enbnxninitcombruleGEOM,
327 enbnxninitcombruleLB,
328 enbnxninitcombruleNONE
331 /*! \brief Initialize the non-bonded atom data structure.
333 * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
334 * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
335 * to the atom data structure.
336 * enbnxninitcombrule sets what combination rule data gets stored in nbat.
338 void nbnxn_atomdata_init(const gmx::MDLogger& mdlog,
339 nbnxn_atomdata_t* nbat,
340 Nbnxm::KernelType kernelType,
341 int enbnxninitcombrule,
343 gmx::ArrayRef<const real> nbfp,
347 //! Sets the atomdata after pair search
348 void nbnxn_atomdata_set(nbnxn_atomdata_t* nbat,
349 const Nbnxm::GridSet& gridSet,
350 gmx::ArrayRef<const int> atomTypes,
351 gmx::ArrayRef<const real> atomCharges,
352 gmx::ArrayRef<const int> atomInfo);
354 //! Copy the shift vectors to nbat
355 void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box,
356 gmx::ArrayRef<gmx::RVec> shift_vec,
357 nbnxn_atomdata_t* nbat);
359 /*! \brief Transform coordinates to xbat layout
361 * Creates a copy of the coordinates buffer using short-range ordering.
363 * \param[in] gridSet The grids data.
364 * \param[in] locality If the transformation should be applied to local or non local coordinates.
365 * \param[in] coordinates Coordinates in plain rvec format.
366 * \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer.
368 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
369 gmx::AtomLocality locality,
370 const rvec* coordinates,
371 nbnxn_atomdata_t* nbat);
373 /*! \brief Transform coordinates to xbat layout on GPU
375 * Creates a GPU copy of the coordinates buffer using short-range ordering.
376 * As input, uses coordinates in plain rvec format in GPU memory.
378 * \param[in] gridSet The grids data.
379 * \param[in] locality If the transformation should be applied to local or non local coordinates.
380 * \param[in,out] gpu_nbv The NBNXM GPU data structure.
381 * \param[in] d_x Coordinates to be copied (in plain rvec format).
382 * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
384 void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
385 gmx::AtomLocality locality,
387 DeviceBuffer<gmx::RVec> d_x,
388 GpuEventSynchronizer* xReadyOnDevice);
390 /*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
392 * \param[in] nbat Atom data in NBNXM format.
393 * \param[in] locality If the reduction should be performed on local or non-local atoms.
394 * \param[in] gridSet The grids data.
395 * \param[out] totalForce Buffer to accumulate resulting force
397 void reduceForces(nbnxn_atomdata_t* nbat, gmx::AtomLocality locality, const Nbnxm::GridSet& gridSet, rvec* totalForce);
399 //! Add the fshift force stored in nbat to fshift
400 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
402 //! Get the atom start index and number of atoms for a given locality
403 void nbnxn_get_atom_range(gmx::AtomLocality atomLocality,
404 const Nbnxm::GridSet& gridSet,