This change is only code motion.
Change-Id: Ibefdd476c93ac2863dfb028fea37d4b00945055a
#include <cstdio>
+#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/bitmask.h"
#include "gromacs/utility/real.h"
#include "locality.h"
class MDLogger;
}
+struct gmx_wallcycle;
struct nbnxn_atomdata_t;
struct nbnxn_search;
struct nonbonded_verlet_t;
struct t_mdatoms;
-struct gmx_wallcycle;
+struct tMPI_Atomic;
namespace Nbnxm
{
enum class KernelType;
}
-/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
-void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n);
+/* Convenience type for vector with aligned memory */
+template<typename T>
+using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
+
+enum {
+ nbatXYZ, nbatXYZQ, nbatX4, nbatX8
+};
+
+// Struct that holds force and energy output buffers
+struct nbnxn_atomdata_output_t
+{
+ /* Constructor
+ *
+ * \param[in] kernelType Type of non-bonded kernel
+ * \param[in] numEnergyGroups The number of energy groups
+ * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
+ * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
+ */
+ nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
+ int numEnergyGroups,
+ int simdEnergyBUfferStride,
+ gmx::PinningPolicy pinningPolicy);
+
+ gmx::HostVector<real> f; // f, size natoms*fstride
+ gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
+ gmx::HostVector<real> Vvdw; // Temporary Van der Waals group energy storage
+ gmx::HostVector<real> Vc; // Temporary Coulomb group energy storage
+ AlignedVector<real> VSvdw; // Temporary SIMD Van der Waals group energy storage
+ AlignedVector<real> VSc; // Temporary SIMD Coulomb group energy storage
+};
+
+/* Block size in atoms for the non-bonded thread force-buffer reduction,
+ * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
+ * Should be small to reduce the reduction and zeroing cost,
+ * but too small will result in overhead.
+ * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
+ */
+#if GMX_DOUBLE
+#define NBNXN_BUFFERFLAG_SIZE 8
+#else
+#define NBNXN_BUFFERFLAG_SIZE 16
+#endif
+
+/* We store the reduction flags as gmx_bitmask_t.
+ * This limits the number of flags to BITMASK_SIZE.
+ */
+#define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
+
+/* Flags for telling if threads write to force output buffers */
+typedef struct {
+ int nflag; /* The number of flag blocks */
+ gmx_bitmask_t *flag; /* Bit i is set when thread i writes to a cell-block */
+ int flag_nalloc; /* Allocation size of cxy_flag */
+} nbnxn_buffer_flags_t;
+
+/* LJ combination rules: geometric, Lorentz-Berthelot, none */
+enum {
+ ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
+};
+
+/* Struct that stores atom related data for the nbnxn module
+ *
+ * Note: performance would improve slightly when all std::vector containers
+ * in this struct would not initialize during resize().
+ */
+struct nbnxn_atomdata_t
+{ //NOLINT(clang-analyzer-optin.performance.Padding)
+ struct Params
+ {
+ /* Constructor
+ *
+ * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
+ */
+ Params(gmx::PinningPolicy pinningPolicy);
+
+ // The number of different atom types
+ int numTypes;
+ // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
+ gmx::HostVector<real> nbfp;
+ // Combination rule, see enum defined above
+ int comb_rule;
+ // LJ parameters per atom type, size numTypes*2
+ gmx::HostVector<real> nbfp_comb;
+ // As nbfp, but with a stride for the present SIMD architecture
+ AlignedVector<real> nbfp_aligned;
+ // Atom types per atom
+ gmx::HostVector<int> type;
+ // LJ parameters per atom for fast SIMD loading
+ gmx::HostVector<real> lj_comb;
+ // Charges per atom, not set with format nbatXYZQ
+ gmx::HostVector<real> q;
+ // The number of energy groups
+ int nenergrp;
+ // 2log(nenergrp)
+ int neg_2log;
+ // The energy groups, one int entry per cluster, only set when needed
+ gmx::HostVector<int> energrp;
+ };
+
+ // Diagonal and topology exclusion helper data for all SIMD kernels
+ struct SimdMasks
+ {
+ SimdMasks();
+
+ // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
+ AlignedVector<real> diagonal_4xn_j_minus_i;
+ // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
+ AlignedVector<real> diagonal_2xnn_j_minus_i;
+ // Filters for topology exclusion masks for the SIMD kernels
+ AlignedVector<uint32_t> exclusion_filter;
+ // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
+ AlignedVector<uint64_t> exclusion_filter64;
+ // Array of masks needed for exclusions
+ AlignedVector<real> interaction_array;
+ };
+
+ /* Constructor
+ *
+ * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
+ */
+ nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
+
+ /* Returns a const reference to the parameters */
+ const Params ¶ms() const
+ {
+ return params_;
+ }
+
+ /* Returns a non-const reference to the parameters */
+ Params ¶msDeprecated()
+ {
+ return params_;
+ }
+
+ /* Returns the current total number of atoms stored */
+ int numAtoms() const
+ {
+ return numAtoms_;
+ }
+
+ /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+ gmx::ArrayRef<const real> x() const
+ {
+ return x_;
+ }
+
+ /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+ gmx::ArrayRef<real> x()
+ {
+ return x_;
+ }
+
+ /* Resizes the coordinate buffer and sets the number of atoms */
+ void resizeCoordinateBuffer(int numAtoms);
+
+ /* Resizes the force buffers for the current number of atoms */
+ void resizeForceBuffers();
+
+ private:
+ // The LJ and charge parameters
+ Params params_;
+ // The total number of atoms currently stored
+ int numAtoms_;
+ public:
+ int natoms_local; /* Number of local atoms */
+ int XFormat; /* The format of x (and q), enum */
+ int FFormat; /* The format of f, enum */
+ gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
+ gmx::HostVector<gmx::RVec> shift_vec; /* Shift vectors, copied from t_forcerec */
+ int xstride; /* stride for a coordinate in x (usually 3 or 4) */
+ int fstride; /* stride for a coordinate in f (usually 3 or 4) */
+ private:
+ gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride */
+
+ public:
+ // Masks for handling exclusions in the SIMD kernels
+ const SimdMasks simdMasks;
+
+ /* Output data */
+ std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
+
+ /* Reduction related data */
+ gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
+ nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
+ gmx_bool bUseTreeReduce; /* Use tree for force reduction */
+ tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */
+};
/* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
* and fills up to na_round with coordinates that are far away.
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_common.h"
#include "gromacs/nbnxm/gpu_common_utils.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/pbcutil/ishift.h"
#include "kernel_ref_prune.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/utility/gmxassert.h"
#include "kernel_prune.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/nbnxm_simd.h"
#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/utility/gmxassert.h"
#include "kernel_prune.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/nbnxm_simd.h"
#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/gpu_utils/oclutils.h"
#include "gromacs/hardware/hw_info.h"
#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_common.h"
#include "gromacs/nbnxm/gpu_common_utils.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/gpu_jit_support.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdtypes/nblist.h"
#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/bitmask.h"
#include "gromacs/utility/defaultinitializationallocator.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
struct NbnxnListParameters;
struct NbnxnPairlistCpuWork;
struct NbnxnPairlistGpuWork;
-struct tMPI_Atomic;
namespace Nbnxm
{
std::vector<t_nblist *> nbl_fep; /* List of free-energy atom pair interactions */
};
-enum {
- nbatXYZ, nbatXYZQ, nbatX4, nbatX8
-};
-
-// Struct that holds force and energy output buffers
-struct nbnxn_atomdata_output_t
-{
- /* Constructor
- *
- * \param[in] kernelType Type of non-bonded kernel
- * \param[in] numEnergyGroups The number of energy groups
- * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
- * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
- */
- nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
- int numEnergyGroups,
- int simdEnergyBUfferStride,
- gmx::PinningPolicy pinningPolicy);
-
- gmx::HostVector<real> f; // f, size natoms*fstride
- gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
- gmx::HostVector<real> Vvdw; // Temporary Van der Waals group energy storage
- gmx::HostVector<real> Vc; // Temporary Coulomb group energy storage
- AlignedVector<real> VSvdw; // Temporary SIMD Van der Waals group energy storage
- AlignedVector<real> VSc; // Temporary SIMD Coulomb group energy storage
-};
-
-/* Block size in atoms for the non-bonded thread force-buffer reduction,
- * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
- * Should be small to reduce the reduction and zeroing cost,
- * but too small will result in overhead.
- * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
- */
-#if GMX_DOUBLE
-#define NBNXN_BUFFERFLAG_SIZE 8
-#else
-#define NBNXN_BUFFERFLAG_SIZE 16
-#endif
-
-/* We store the reduction flags as gmx_bitmask_t.
- * This limits the number of flags to BITMASK_SIZE.
- */
-#define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
-
-/* Flags for telling if threads write to force output buffers */
-typedef struct {
- int nflag; /* The number of flag blocks */
- gmx_bitmask_t *flag; /* Bit i is set when thread i writes to a cell-block */
- int flag_nalloc; /* Allocation size of cxy_flag */
-} nbnxn_buffer_flags_t;
-
-/* LJ combination rules: geometric, Lorentz-Berthelot, none */
-enum {
- ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
-};
-
-/* Struct that stores atom related data for the nbnxn module
- *
- * Note: performance would improve slightly when all std::vector containers
- * in this struct would not initialize during resize().
- */
-struct nbnxn_atomdata_t
-{ //NOLINT(clang-analyzer-optin.performance.Padding)
- struct Params
- {
- /* Constructor
- *
- * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
- */
- Params(gmx::PinningPolicy pinningPolicy);
-
- // The number of different atom types
- int numTypes;
- // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
- gmx::HostVector<real> nbfp;
- // Combination rule, see enum defined above
- int comb_rule;
- // LJ parameters per atom type, size numTypes*2
- gmx::HostVector<real> nbfp_comb;
- // As nbfp, but with a stride for the present SIMD architecture
- AlignedVector<real> nbfp_aligned;
- // Atom types per atom
- gmx::HostVector<int> type;
- // LJ parameters per atom for fast SIMD loading
- gmx::HostVector<real> lj_comb;
- // Charges per atom, not set with format nbatXYZQ
- gmx::HostVector<real> q;
- // The number of energy groups
- int nenergrp;
- // 2log(nenergrp)
- int neg_2log;
- // The energy groups, one int entry per cluster, only set when needed
- gmx::HostVector<int> energrp;
- };
-
- // Diagonal and topology exclusion helper data for all SIMD kernels
- struct SimdMasks
- {
- SimdMasks();
-
- // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
- AlignedVector<real> diagonal_4xn_j_minus_i;
- // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
- AlignedVector<real> diagonal_2xnn_j_minus_i;
- // Filters for topology exclusion masks for the SIMD kernels
- AlignedVector<uint32_t> exclusion_filter;
- // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
- AlignedVector<uint64_t> exclusion_filter64;
- // Array of masks needed for exclusions
- AlignedVector<real> interaction_array;
- };
-
- /* Constructor
- *
- * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
- */
- nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
-
- /* Returns a const reference to the parameters */
- const Params ¶ms() const
- {
- return params_;
- }
-
- /* Returns a non-const reference to the parameters */
- Params ¶msDeprecated()
- {
- return params_;
- }
-
- /* Returns the current total number of atoms stored */
- int numAtoms() const
- {
- return numAtoms_;
- }
-
- /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
- gmx::ArrayRef<const real> x() const
- {
- return x_;
- }
-
- /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
- gmx::ArrayRef<real> x()
- {
- return x_;
- }
-
- /* Resizes the coordinate buffer and sets the number of atoms */
- void resizeCoordinateBuffer(int numAtoms);
-
- /* Resizes the force buffers for the current number of atoms */
- void resizeForceBuffers();
-
- private:
- // The LJ and charge parameters
- Params params_;
- // The total number of atoms currently stored
- int numAtoms_;
- public:
- int natoms_local; /* Number of local atoms */
- int XFormat; /* The format of x (and q), enum */
- int FFormat; /* The format of f, enum */
- gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
- gmx::HostVector<gmx::RVec> shift_vec; /* Shift vectors, copied from t_forcerec */
- int xstride; /* stride for a coordinate in x (usually 3 or 4) */
- int fstride; /* stride for a coordinate in f (usually 3 or 4) */
- private:
- gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride */
-
- public:
- // Masks for handling exclusions in the SIMD kernels
- const SimdMasks simdMasks;
-
- /* Output data */
- std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
-
- /* Reduction related data */
- gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
- nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
- gmx_bool bUseTreeReduce; /* Use tree for force reduction */
- tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */
-};
-
#endif