From: Berk Hess Date: Thu, 28 Feb 2019 09:47:30 +0000 (+0100) Subject: Move nbnxm atomdata types to atomdata.h X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=df69cc7c9ec50c226a0a251e65a7dd98f37bb76e;p=alexxy%2Fgromacs.git Move nbnxm atomdata types to atomdata.h This change is only code motion. Change-Id: Ibefdd476c93ac2863dfb028fea37d4b00945055a --- diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index 7deeb2c181..45c6c6c963 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -38,8 +38,10 @@ #include +#include "gromacs/gpu_utils/hostallocator.h" #include "gromacs/math/vectypes.h" #include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/bitmask.h" #include "gromacs/utility/real.h" #include "locality.h" @@ -49,19 +51,205 @@ namespace gmx class MDLogger; } +struct gmx_wallcycle; struct nbnxn_atomdata_t; struct nbnxn_search; struct nonbonded_verlet_t; struct t_mdatoms; -struct gmx_wallcycle; +struct tMPI_Atomic; namespace Nbnxm { enum class KernelType; } -/* Reallocate the nbnxn_atomdata_t for a size of n atoms */ -void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n); +/* Convenience type for vector with aligned memory */ +template +using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>; + +enum { + nbatXYZ, nbatXYZQ, nbatX4, nbatX8 +}; + +// Struct that holds force and energy output buffers +struct nbnxn_atomdata_output_t +{ + /* Constructor + * + * \param[in] kernelType Type of non-bonded kernel + * \param[in] numEnergyGroups The number of energy groups + * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels + * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU + */ + nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType, + int numEnergyGroups, + int simdEnergyBUfferStride, + gmx::PinningPolicy pinningPolicy); + + gmx::HostVector f; // f, size natoms*fstride + gmx::HostVector fshift; // Shift force array, size SHIFTS*DIM + gmx::HostVector Vvdw; // Temporary Van der Waals group energy storage + gmx::HostVector Vc; // Temporary Coulomb group energy storage + AlignedVector VSvdw; // Temporary SIMD Van der Waals group energy storage + AlignedVector VSc; // Temporary SIMD Coulomb group energy storage +}; + +/* Block size in atoms for the non-bonded thread force-buffer reduction, + * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8). + * Should be small to reduce the reduction and zeroing cost, + * but too small will result in overhead. + * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes. + */ +#if GMX_DOUBLE +#define NBNXN_BUFFERFLAG_SIZE 8 +#else +#define NBNXN_BUFFERFLAG_SIZE 16 +#endif + +/* We store the reduction flags as gmx_bitmask_t. + * This limits the number of flags to BITMASK_SIZE. + */ +#define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE) + +/* Flags for telling if threads write to force output buffers */ +typedef struct { + int nflag; /* The number of flag blocks */ + gmx_bitmask_t *flag; /* Bit i is set when thread i writes to a cell-block */ + int flag_nalloc; /* Allocation size of cxy_flag */ +} nbnxn_buffer_flags_t; + +/* LJ combination rules: geometric, Lorentz-Berthelot, none */ +enum { + ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR +}; + +/* Struct that stores atom related data for the nbnxn module + * + * Note: performance would improve slightly when all std::vector containers + * in this struct would not initialize during resize(). + */ +struct nbnxn_atomdata_t +{ //NOLINT(clang-analyzer-optin.performance.Padding) + struct Params + { + /* Constructor + * + * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU + */ + Params(gmx::PinningPolicy pinningPolicy); + + // The number of different atom types + int numTypes; + // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2 + gmx::HostVector nbfp; + // Combination rule, see enum defined above + int comb_rule; + // LJ parameters per atom type, size numTypes*2 + gmx::HostVector nbfp_comb; + // As nbfp, but with a stride for the present SIMD architecture + AlignedVector nbfp_aligned; + // Atom types per atom + gmx::HostVector type; + // LJ parameters per atom for fast SIMD loading + gmx::HostVector lj_comb; + // Charges per atom, not set with format nbatXYZQ + gmx::HostVector q; + // The number of energy groups + int nenergrp; + // 2log(nenergrp) + int neg_2log; + // The energy groups, one int entry per cluster, only set when needed + gmx::HostVector energrp; + }; + + // Diagonal and topology exclusion helper data for all SIMD kernels + struct SimdMasks + { + SimdMasks(); + + // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels + AlignedVector diagonal_4xn_j_minus_i; + // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels + AlignedVector diagonal_2xnn_j_minus_i; + // Filters for topology exclusion masks for the SIMD kernels + AlignedVector exclusion_filter; + // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support + AlignedVector exclusion_filter64; + // Array of masks needed for exclusions + AlignedVector interaction_array; + }; + + /* Constructor + * + * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU + */ + nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy); + + /* Returns a const reference to the parameters */ + const Params ¶ms() const + { + return params_; + } + + /* Returns a non-const reference to the parameters */ + Params ¶msDeprecated() + { + return params_; + } + + /* Returns the current total number of atoms stored */ + int numAtoms() const + { + return numAtoms_; + } + + /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */ + gmx::ArrayRef x() const + { + return x_; + } + + /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */ + gmx::ArrayRef x() + { + return x_; + } + + /* Resizes the coordinate buffer and sets the number of atoms */ + void resizeCoordinateBuffer(int numAtoms); + + /* Resizes the force buffers for the current number of atoms */ + void resizeForceBuffers(); + + private: + // The LJ and charge parameters + Params params_; + // The total number of atoms currently stored + int numAtoms_; + public: + int natoms_local; /* Number of local atoms */ + int XFormat; /* The format of x (and q), enum */ + int FFormat; /* The format of f, enum */ + gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */ + gmx::HostVector shift_vec; /* Shift vectors, copied from t_forcerec */ + int xstride; /* stride for a coordinate in x (usually 3 or 4) */ + int fstride; /* stride for a coordinate in f (usually 3 or 4) */ + private: + gmx::HostVector x_; /* x and possibly q, size natoms*xstride */ + + public: + // Masks for handling exclusions in the SIMD kernels + const SimdMasks simdMasks; + + /* Output data */ + std::vector out; /* Output data structures, 1 per thread */ + + /* Reduction related data */ + gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */ + nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */ + gmx_bool bUseTreeReduce; /* Use tree for force reduction */ + tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */ +}; /* Copy na rvec elements from x to xnb using nbatFormat, start dest a0, * and fills up to na_round with coordinates that are far away. diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 7ce9b9fa5e..1a638c4158 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -55,6 +55,7 @@ #include "gromacs/gpu_utils/cudautils.cuh" #include "gromacs/mdlib/force_flags.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_common.h" #include "gromacs/nbnxm/gpu_common_utils.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index 61104dd57b..b19b7f6c4c 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -57,6 +57,7 @@ #include "gromacs/mdlib/force_flags.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/md_enums.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/pbcutil/ishift.h" diff --git a/src/gromacs/nbnxm/kernels_reference/kernel_ref_prune.cpp b/src/gromacs/nbnxm/kernels_reference/kernel_ref_prune.cpp index 65a8a2d87b..77360e90fc 100644 --- a/src/gromacs/nbnxm/kernels_reference/kernel_ref_prune.cpp +++ b/src/gromacs/nbnxm/kernels_reference/kernel_ref_prune.cpp @@ -37,6 +37,7 @@ #include "kernel_ref_prune.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/pairlist.h" #include "gromacs/utility/gmxassert.h" diff --git a/src/gromacs/nbnxm/kernels_simd_2xmm/kernel_prune.cpp b/src/gromacs/nbnxm/kernels_simd_2xmm/kernel_prune.cpp index f8816a4fa4..30e67265af 100644 --- a/src/gromacs/nbnxm/kernels_simd_2xmm/kernel_prune.cpp +++ b/src/gromacs/nbnxm/kernels_simd_2xmm/kernel_prune.cpp @@ -37,6 +37,7 @@ #include "kernel_prune.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/nbnxm_simd.h" #include "gromacs/nbnxm/pairlist.h" #include "gromacs/utility/gmxassert.h" diff --git a/src/gromacs/nbnxm/kernels_simd_4xm/kernel_prune.cpp b/src/gromacs/nbnxm/kernels_simd_4xm/kernel_prune.cpp index f563056597..6d46acff1e 100644 --- a/src/gromacs/nbnxm/kernels_simd_4xm/kernel_prune.cpp +++ b/src/gromacs/nbnxm/kernels_simd_4xm/kernel_prune.cpp @@ -37,6 +37,7 @@ #include "kernel_prune.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/nbnxm_simd.h" #include "gromacs/nbnxm/pairlist.h" #include "gromacs/utility/gmxassert.h" diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index 34132dbbd7..0eb6ff9d05 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -73,6 +73,7 @@ #include "gromacs/gpu_utils/oclutils.h" #include "gromacs/hardware/hw_info.h" #include "gromacs/mdlib/force_flags.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_common.h" #include "gromacs/nbnxm/gpu_common_utils.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index 85de6a2746..f669d63a93 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -62,6 +62,7 @@ #include "gromacs/mdlib/force_flags.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/md_enums.h" +#include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/nbnxm/gpu_jit_support.h" #include "gromacs/nbnxm/nbnxm.h" diff --git a/src/gromacs/nbnxm/pairlist.h b/src/gromacs/nbnxm/pairlist.h index 72a028be78..98a138ec74 100644 --- a/src/gromacs/nbnxm/pairlist.h +++ b/src/gromacs/nbnxm/pairlist.h @@ -44,7 +44,6 @@ #include "gromacs/math/vectypes.h" #include "gromacs/mdtypes/nblist.h" #include "gromacs/utility/basedefinitions.h" -#include "gromacs/utility/bitmask.h" #include "gromacs/utility/defaultinitializationallocator.h" #include "gromacs/utility/enumerationhelpers.h" #include "gromacs/utility/real.h" @@ -56,7 +55,6 @@ struct NbnxnListParameters; struct NbnxnPairlistCpuWork; struct NbnxnPairlistGpuWork; -struct tMPI_Atomic; namespace Nbnxm { @@ -287,188 +285,4 @@ struct nbnxn_pairlist_set_t std::vector nbl_fep; /* List of free-energy atom pair interactions */ }; -enum { - nbatXYZ, nbatXYZQ, nbatX4, nbatX8 -}; - -// Struct that holds force and energy output buffers -struct nbnxn_atomdata_output_t -{ - /* Constructor - * - * \param[in] kernelType Type of non-bonded kernel - * \param[in] numEnergyGroups The number of energy groups - * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels - * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU - */ - nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType, - int numEnergyGroups, - int simdEnergyBUfferStride, - gmx::PinningPolicy pinningPolicy); - - gmx::HostVector f; // f, size natoms*fstride - gmx::HostVector fshift; // Shift force array, size SHIFTS*DIM - gmx::HostVector Vvdw; // Temporary Van der Waals group energy storage - gmx::HostVector Vc; // Temporary Coulomb group energy storage - AlignedVector VSvdw; // Temporary SIMD Van der Waals group energy storage - AlignedVector VSc; // Temporary SIMD Coulomb group energy storage -}; - -/* Block size in atoms for the non-bonded thread force-buffer reduction, - * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8). - * Should be small to reduce the reduction and zeroing cost, - * but too small will result in overhead. - * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes. - */ -#if GMX_DOUBLE -#define NBNXN_BUFFERFLAG_SIZE 8 -#else -#define NBNXN_BUFFERFLAG_SIZE 16 -#endif - -/* We store the reduction flags as gmx_bitmask_t. - * This limits the number of flags to BITMASK_SIZE. - */ -#define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE) - -/* Flags for telling if threads write to force output buffers */ -typedef struct { - int nflag; /* The number of flag blocks */ - gmx_bitmask_t *flag; /* Bit i is set when thread i writes to a cell-block */ - int flag_nalloc; /* Allocation size of cxy_flag */ -} nbnxn_buffer_flags_t; - -/* LJ combination rules: geometric, Lorentz-Berthelot, none */ -enum { - ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR -}; - -/* Struct that stores atom related data for the nbnxn module - * - * Note: performance would improve slightly when all std::vector containers - * in this struct would not initialize during resize(). - */ -struct nbnxn_atomdata_t -{ //NOLINT(clang-analyzer-optin.performance.Padding) - struct Params - { - /* Constructor - * - * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU - */ - Params(gmx::PinningPolicy pinningPolicy); - - // The number of different atom types - int numTypes; - // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2 - gmx::HostVector nbfp; - // Combination rule, see enum defined above - int comb_rule; - // LJ parameters per atom type, size numTypes*2 - gmx::HostVector nbfp_comb; - // As nbfp, but with a stride for the present SIMD architecture - AlignedVector nbfp_aligned; - // Atom types per atom - gmx::HostVector type; - // LJ parameters per atom for fast SIMD loading - gmx::HostVector lj_comb; - // Charges per atom, not set with format nbatXYZQ - gmx::HostVector q; - // The number of energy groups - int nenergrp; - // 2log(nenergrp) - int neg_2log; - // The energy groups, one int entry per cluster, only set when needed - gmx::HostVector energrp; - }; - - // Diagonal and topology exclusion helper data for all SIMD kernels - struct SimdMasks - { - SimdMasks(); - - // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels - AlignedVector diagonal_4xn_j_minus_i; - // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels - AlignedVector diagonal_2xnn_j_minus_i; - // Filters for topology exclusion masks for the SIMD kernels - AlignedVector exclusion_filter; - // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support - AlignedVector exclusion_filter64; - // Array of masks needed for exclusions - AlignedVector interaction_array; - }; - - /* Constructor - * - * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU - */ - nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy); - - /* Returns a const reference to the parameters */ - const Params ¶ms() const - { - return params_; - } - - /* Returns a non-const reference to the parameters */ - Params ¶msDeprecated() - { - return params_; - } - - /* Returns the current total number of atoms stored */ - int numAtoms() const - { - return numAtoms_; - } - - /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */ - gmx::ArrayRef x() const - { - return x_; - } - - /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */ - gmx::ArrayRef x() - { - return x_; - } - - /* Resizes the coordinate buffer and sets the number of atoms */ - void resizeCoordinateBuffer(int numAtoms); - - /* Resizes the force buffers for the current number of atoms */ - void resizeForceBuffers(); - - private: - // The LJ and charge parameters - Params params_; - // The total number of atoms currently stored - int numAtoms_; - public: - int natoms_local; /* Number of local atoms */ - int XFormat; /* The format of x (and q), enum */ - int FFormat; /* The format of f, enum */ - gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */ - gmx::HostVector shift_vec; /* Shift vectors, copied from t_forcerec */ - int xstride; /* stride for a coordinate in x (usually 3 or 4) */ - int fstride; /* stride for a coordinate in f (usually 3 or 4) */ - private: - gmx::HostVector x_; /* x and possibly q, size natoms*xstride */ - - public: - // Masks for handling exclusions in the SIMD kernels - const SimdMasks simdMasks; - - /* Output data */ - std::vector out; /* Output data structures, 1 per thread */ - - /* Reduction related data */ - gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */ - nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */ - gmx_bool bUseTreeReduce; /* Use tree for force reduction */ - tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */ -}; - #endif