From: Berk Hess Date: Tue, 8 Jan 2019 15:09:10 +0000 (+0100) Subject: Extract nbnxm grid.h and pairlistwork.h X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=f021aff1972ed9b9340b47baed10fd76a1a7781c;p=alexxy%2Fgromacs.git Extract nbnxm grid.h and pairlistwork.h Change-Id: Ia0da75a21e56d7e473f868cc3758f415ac9e4eb9 --- diff --git a/docs/doxygen/suppressions.txt b/docs/doxygen/suppressions.txt index a8c95b0f7a..b055948a41 100644 --- a/docs/doxygen/suppressions.txt +++ b/docs/doxygen/suppressions.txt @@ -19,6 +19,7 @@ src/gromacs/ewald/pme_simd4.h: warning: should include "pme_simd.h" src/gromacs/ewald/pme_spline_work.cpp: warning: includes "simd.h" unnecessarily src/gromacs/ewald/pme_spline_work.h: warning: includes "simd.h" unnecessarily src/gromacs/ewald/pme_spread.cpp: warning: includes "simd.h" unnecessarily +src/gromacs/nbnxm/grid.h: warning: includes "simd.h" unnecessarily src/gromacs/nbnxm/kernels_simd_2xmm/kernel_inner.h: warning: should include "simd.h" src/gromacs/nbnxm/kernels_simd_2xmm/kernel_outer.h: warning: should include "simd.h" src/gromacs/nbnxm/kernels_simd_4xm/kernel_inner.h: warning: should include "simd.h" diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index 32bc76379b..db4ad05dc6 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -65,6 +65,7 @@ #include "gromacs/utility/strconvert.h" #include "gromacs/utility/stringutil.h" +#include "grid.h" #include "internal.h" using namespace gmx; // TODO: Remove when this file is moved into gmx namespace diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp index 0bc41c0f24..7128d74bf9 100644 --- a/src/gromacs/nbnxm/grid.cpp +++ b/src/gromacs/nbnxm/grid.cpp @@ -35,6 +35,8 @@ #include "gmxpre.h" +#include "grid.h" + #include #include diff --git a/src/gromacs/nbnxm/grid.h b/src/gromacs/nbnxm/grid.h new file mode 100644 index 0000000000..adab79c8c5 --- /dev/null +++ b/src/gromacs/nbnxm/grid.h @@ -0,0 +1,184 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \internal \file + * + * \brief Declares the grid and bounding box objects + * + * \author Berk Hess + * + * \ingroup module_nbnxm + */ + +#ifndef GMX_NBNXM_GRID_H +#define GMX_NBNXM_GRID_H + +#include +#include + +#include "gromacs/math/vectypes.h" +#include "gromacs/simd/simd.h" +#include "gromacs/utility/alignedallocator.h" + + +struct gmx_domdec_zones_t; + + +#ifndef DOXYGEN + +/* Pair search box lower and upper corner in x,y,z. + * Store this in 4 iso 3 reals, which is useful with 4-wide SIMD. + * To avoid complicating the code we also use 4 without 4-wide SIMD. + */ +#define NNBSBB_C 4 +/* Pair search box lower and upper bound in z only. */ +#define NNBSBB_D 2 +/* Pair search box lower and upper corner x,y,z indices, entry 3 is unused */ +#define BB_X 0 +#define BB_Y 1 +#define BB_Z 2 + +#endif // !DOXYGEN + + +/* Bounding box for a nbnxn atom cluster */ +typedef struct { + float lower[NNBSBB_C]; + float upper[NNBSBB_C]; +} nbnxn_bb_t; + + +#ifndef DOXYGEN + +/* Bounding box calculations are (currently) always in single precision, so + * we only need to check for single precision support here. + * This uses less (cache-)memory and SIMD is faster, at least on x86. + */ +#if GMX_SIMD4_HAVE_FLOAT +# define NBNXN_SEARCH_BB_SIMD4 1 +/* Memory alignment in bytes as required by SIMD aligned loads/stores */ +# define NBNXN_SEARCH_BB_MEM_ALIGN (GMX_SIMD4_WIDTH*sizeof(float)) +#else +# define NBNXN_SEARCH_BB_SIMD4 0 +/* No alignment required, but set it so we can call the same routines */ +# define NBNXN_SEARCH_BB_MEM_ALIGN 32 +#endif + + +#if NBNXN_SEARCH_BB_SIMD4 +/* Always use 4-wide SIMD for bounding box calculations */ + +# if !GMX_DOUBLE +/* Single precision BBs + coordinates, we can also load coordinates with SIMD */ +# define NBNXN_SEARCH_SIMD4_FLOAT_X_BB 1 +# else +# define NBNXN_SEARCH_SIMD4_FLOAT_X_BB 0 +# endif + +/* The packed bounding box coordinate stride is always set to 4. + * With AVX we could use 8, but that turns out not to be faster. + */ +# define STRIDE_PBB GMX_SIMD4_WIDTH +# define STRIDE_PBB_2LOG 2 + +/* Store bounding boxes corners as quadruplets: xxxxyyyyzzzz */ +# define NBNXN_BBXXXX 1 +/* Size of bounding box corners quadruplet */ +# define NNBSBB_XXXX (NNBSBB_D*DIM*STRIDE_PBB) + +#else /* NBNXN_SEARCH_BB_SIMD4 */ + +# define NBNXN_SEARCH_SIMD4_FLOAT_X_BB 0 +# define NBNXN_BBXXXX 0 + +#endif /* NBNXN_SEARCH_BB_SIMD4 */ + +#endif // !DOXYGEN + + +/* A pair-search grid struct for one domain decomposition zone + * + * Note that when atom groups, instead of individual atoms, are assigned + * to grid cells, individual atoms can be geometrically outside the cell + * and grid that they have been assigned to (as determined by the center + * or geometry of the atom group they belong to). + */ +struct nbnxn_grid_t +{ + rvec c0; /* The lower corner of the (local) grid */ + rvec c1; /* The upper corner of the (local) grid */ + rvec size; /* c1 - c0 */ + real atom_density; /* The atom number density for the local grid */ + real maxAtomGroupRadius; /* The maximum distance an atom can be outside + * of a cell and outside of the grid + */ + + gmx_bool bSimple; /* Is this grid simple or super/sub */ + int na_c; /* Number of atoms per cluster */ + int na_cj; /* Number of atoms for list j-clusters */ + int na_sc; /* Number of atoms per super-cluster */ + int na_c_2log; /* 2log of na_c */ + + int numCells[DIM - 1]; /* Number of cells along x/y */ + int nc; /* Total number of cells */ + + real cellSize[DIM - 1]; /* size of a cell */ + real invCellSize[DIM - 1]; /* 1/cellSize */ + + int cell0; /* Index in nbs->cell corresponding to cell 0 */ + + /* Grid data */ + std::vector cxy_na; /* The number of atoms for each column in x,y */ + std::vector cxy_ind; /* Grid (super)cell index, offset from cell0 */ + + std::vector nsubc; /* The number of sub cells for each super cell */ + + /* Bounding boxes */ + std::vector bbcz; /* Bounding boxes in z for the cells */ + std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bb; /* 3D bounding boxes for the sub cells */ + std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bbjStorage; /* 3D j-bounding boxes for the case where + * the i- and j-cluster sizes are different */ + gmx::ArrayRef bbj; /* 3D j-bounding boxes */ + std::vector < float, gmx::AlignedAllocator < float>> pbb; /* 3D b. boxes in xxxx format per super cell */ + + /* Bit-flag information */ + std::vector flags; /* Flags for properties of clusters in each cell */ + std::vector fep; /* FEP signal bits for atoms in each cluster */ + + /* Statistics */ + int nsubc_tot; /* Total number of subcell, used for printing */ +}; + +#endif diff --git a/src/gromacs/nbnxm/internal.h b/src/gromacs/nbnxm/internal.h index e00e52eab8..55066e8059 100644 --- a/src/gromacs/nbnxm/internal.h +++ b/src/gromacs/nbnxm/internal.h @@ -58,6 +58,7 @@ #include "gromacs/utility/real.h" struct gmx_domdec_zones_t; +struct nbnxn_grid_t; // TODO Document after refactoring @@ -88,63 +89,6 @@ template static inline int atom_to_x_index(int a) #define NBNXN_MEM_ALIGN 32 #endif - -/* Bounding box calculations are (currently) always in single precision, so - * we only need to check for single precision support here. - * This uses less (cache-)memory and SIMD is faster, at least on x86. - */ -#if GMX_SIMD4_HAVE_FLOAT -# define NBNXN_SEARCH_BB_SIMD4 1 -/* Memory alignment in bytes as required by SIMD aligned loads/stores */ -# define NBNXN_SEARCH_BB_MEM_ALIGN (GMX_SIMD4_WIDTH*sizeof(float)) -#else -# define NBNXN_SEARCH_BB_SIMD4 0 -/* No alignment required, but set it so we can call the same routines */ -# define NBNXN_SEARCH_BB_MEM_ALIGN 32 -#endif - - -/* Pair search box lower and upper corner in x,y,z. - * Store this in 4 iso 3 reals, which is useful with 4-wide SIMD. - * To avoid complicating the code we also use 4 without 4-wide SIMD. - */ -#define NNBSBB_C 4 -/* Pair search box lower and upper bound in z only. */ -#define NNBSBB_D 2 -/* Pair search box lower and upper corner x,y,z indices, entry 3 is unused */ -#define BB_X 0 -#define BB_Y 1 -#define BB_Z 2 - - -#if NBNXN_SEARCH_BB_SIMD4 -/* Always use 4-wide SIMD for bounding box calculations */ - -# if !GMX_DOUBLE -/* Single precision BBs + coordinates, we can also load coordinates with SIMD */ -# define NBNXN_SEARCH_SIMD4_FLOAT_X_BB 1 -# else -# define NBNXN_SEARCH_SIMD4_FLOAT_X_BB 0 -# endif - -/* The packed bounding box coordinate stride is always set to 4. - * With AVX we could use 8, but that turns out not to be faster. - */ -# define STRIDE_PBB GMX_SIMD4_WIDTH -# define STRIDE_PBB_2LOG 2 - -/* Store bounding boxes corners as quadruplets: xxxxyyyyzzzz */ -# define NBNXN_BBXXXX 1 -/* Size of bounding box corners quadruplet */ -# define NNBSBB_XXXX (NNBSBB_D*DIM*STRIDE_PBB) - -#else /* NBNXN_SEARCH_BB_SIMD4 */ - -# define NBNXN_SEARCH_SIMD4_FLOAT_X_BB 0 -# define NBNXN_BBXXXX 0 - -#endif /* NBNXN_SEARCH_BB_SIMD4 */ - #endif // !DOXYGEN @@ -153,157 +97,6 @@ template using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>; -/* Bounding box for a nbnxn atom cluster */ -typedef struct { - float lower[NNBSBB_C]; - float upper[NNBSBB_C]; -} nbnxn_bb_t; - - -/* A pair-search grid struct for one domain decomposition zone - * - * Note that when atom groups, instead of individual atoms, are assigned - * to grid cells, individual atoms can be geometrically outside the cell - * and grid that they have been assigned to (as determined by the center - * or geometry of the atom group they belong to). - */ -struct nbnxn_grid_t -{ - rvec c0; /* The lower corner of the (local) grid */ - rvec c1; /* The upper corner of the (local) grid */ - rvec size; /* c1 - c0 */ - real atom_density; /* The atom number density for the local grid */ - real maxAtomGroupRadius; /* The maximum distance an atom can be outside - * of a cell and outside of the grid - */ - - gmx_bool bSimple; /* Is this grid simple or super/sub */ - int na_c; /* Number of atoms per cluster */ - int na_cj; /* Number of atoms for list j-clusters */ - int na_sc; /* Number of atoms per super-cluster */ - int na_c_2log; /* 2log of na_c */ - - int numCells[DIM - 1]; /* Number of cells along x/y */ - int nc; /* Total number of cells */ - - real cellSize[DIM - 1]; /* size of a cell */ - real invCellSize[DIM - 1]; /* 1/cellSize */ - - int cell0; /* Index in nbs->cell corresponding to cell 0 */ - - /* Grid data */ - std::vector cxy_na; /* The number of atoms for each column in x,y */ - std::vector cxy_ind; /* Grid (super)cell index, offset from cell0 */ - - std::vector nsubc; /* The number of sub cells for each super cell */ - - /* Bounding boxes */ - std::vector bbcz; /* Bounding boxes in z for the cells */ - std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bb; /* 3D bounding boxes for the sub cells */ - std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bbjStorage; /* 3D j-bounding boxes for the case where - * the i- and j-cluster sizes are different */ - gmx::ArrayRef bbj; /* 3D j-bounding boxes */ - std::vector < float, gmx::AlignedAllocator < float>> pbb; /* 3D b. boxes in xxxx format per super cell */ - - /* Bit-flag information */ - std::vector flags; /* Flags for properties of clusters in each cell */ - std::vector fep; /* FEP signal bits for atoms in each cluster */ - - /* Statistics */ - int nsubc_tot; /* Total number of subcell, used for printing */ -}; - -/* Working data for the actual i-supercell during pair search */ -struct NbnxnPairlistCpuWork -{ - // Struct for storing coordinats and bounding box for an i-entry during search - struct IClusterData - { - IClusterData() : - bb(1), - x(c_nbnxnCpuIClusterSize*DIM), - xSimd(c_nbnxnCpuIClusterSize*DIM*GMX_REAL_MAX_SIMD_WIDTH) - { - } - - // The bounding boxes, pbc shifted, for each cluster - AlignedVector bb; - // The coordinates, pbc shifted, for each atom - std::vector x; - // Aligned list for storing 4*DIM*GMX_SIMD_REAL_WIDTH reals - AlignedVector xSimd; - }; - - // Protect data from cache pollution between threads - gmx_cache_protect_t cp0; - - // Work data for generating an IEntry in the pairlist - IClusterData iClusterData; - // The current cj_ind index for the current list - int cj_ind; - // Temporary j-cluster list, used for sorting on exclusions - std::vector cj; - - // Nr. of cluster pairs without Coulomb for flop counting - int ncj_noq; - // Nr. of cluster pairs with 1/2 LJ for flop count - int ncj_hlj; - - // Protect data from cache pollution between threads - gmx_cache_protect_t cp1; -}; - -/* Working data for the actual i-supercell during pair search */ -struct NbnxnPairlistGpuWork -{ - struct ISuperClusterData - { - ISuperClusterData() : - bb(c_gpuNumClusterPerCell), -#if NBNXN_SEARCH_BB_SIMD4 - bbPacked(c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX), -#endif - x(c_gpuNumClusterPerCell*c_nbnxnGpuClusterSize*DIM), - xSimd(c_gpuNumClusterPerCell*c_nbnxnGpuClusterSize*DIM) - { - } - - // The bounding boxes, pbc shifted, for each cluster - AlignedVector bb; - // As bb, but in packed xxxx format - AlignedVector bbPacked; - // The coordinates, pbc shifted, for each atom - AlignedVector x; - // Aligned coordinate list used for 4*DIM*GMX_SIMD_REAL_WIDTH floats - AlignedVector xSimd; - }; - - NbnxnPairlistGpuWork() : - distanceBuffer(c_gpuNumClusterPerCell), - sci_sort({}, {gmx::PinningPolicy::PinnedIfSupported}) - { - } - - // Protect data from cache pollution between threads - gmx_cache_protect_t cp0; - - // Work data for generating an i-entry in the pairlist - ISuperClusterData iSuperClusterData; - // The current j-cluster index for the current list - int cj_ind; - // Bounding box distance work array - AlignedVector distanceBuffer; - - // Buffer for sorting list entries - std::vector sortBuffer; - - // Second sci array, for sorting - gmx::HostVector sci_sort; - - // Protect data from cache pollution between threads - gmx_cache_protect_t cp1; -}; - /* Local cycle count struct for profiling */ typedef struct { int count; diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp index e00d1a09d1..d4dafe11c7 100644 --- a/src/gromacs/nbnxm/nbnxm_setup.cpp +++ b/src/gromacs/nbnxm/nbnxm_setup.cpp @@ -59,6 +59,7 @@ #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/logger.h" +#include "grid.h" #include "internal.h" /*! \brief Returns whether CPU SIMD support exists for the given inputrec diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp index 1877af16a8..17dd8e09fd 100644 --- a/src/gromacs/nbnxm/pairlist.cpp +++ b/src/gromacs/nbnxm/pairlist.cpp @@ -69,7 +69,9 @@ #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" +#include "grid.h" #include "internal.h" +#include "pairlistwork.h" using namespace gmx; // TODO: Remove when this file is moved into gmx namespace diff --git a/src/gromacs/nbnxm/pairlistwork.h b/src/gromacs/nbnxm/pairlistwork.h new file mode 100644 index 0000000000..5542cbecb9 --- /dev/null +++ b/src/gromacs/nbnxm/pairlistwork.h @@ -0,0 +1,147 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \internal \file + * + * \brief Declares working data structures for the CPU and GPU pairlists + * + * \author Berk Hess + * + * \ingroup module_nbnxn + */ + +#ifndef GMX_NBNXM_PAIRLISTWORK_H +#define GMX_NBNXM_PAIRLISTWORK_H + +#include +#include + +#include "gromacs/nbnxm/pairlist.h" +#include "gromacs/simd/simd.h" + +#include "grid.h" + +/* Working data for the actual i-supercell during pair search */ +struct NbnxnPairlistCpuWork +{ + // Struct for storing coordinats and bounding box for an i-entry during search + struct IClusterData + { + IClusterData() : + bb(1), + x(c_nbnxnCpuIClusterSize*DIM), + xSimd(c_nbnxnCpuIClusterSize*DIM*GMX_REAL_MAX_SIMD_WIDTH) + { + } + + // The bounding boxes, pbc shifted, for each cluster + AlignedVector bb; + // The coordinates, pbc shifted, for each atom + std::vector x; + // Aligned list for storing 4*DIM*GMX_SIMD_REAL_WIDTH reals + AlignedVector xSimd; + }; + + // Protect data from cache pollution between threads + gmx_cache_protect_t cp0; + + // Work data for generating an IEntry in the pairlist + IClusterData iClusterData; + // The current cj_ind index for the current list + int cj_ind; + // Temporary j-cluster list, used for sorting on exclusions + std::vector cj; + + // Nr. of cluster pairs without Coulomb for flop counting + int ncj_noq; + // Nr. of cluster pairs with 1/2 LJ for flop count + int ncj_hlj; + + // Protect data from cache pollution between threads + gmx_cache_protect_t cp1; +}; + +/* Working data for the actual i-supercell during pair search */ +struct NbnxnPairlistGpuWork +{ + struct ISuperClusterData + { + ISuperClusterData() : + bb(c_gpuNumClusterPerCell), +#if NBNXN_SEARCH_BB_SIMD4 + bbPacked(c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX), +#endif + x(c_gpuNumClusterPerCell*c_nbnxnGpuClusterSize*DIM), + xSimd(c_gpuNumClusterPerCell*c_nbnxnGpuClusterSize*DIM) + { + } + + // The bounding boxes, pbc shifted, for each cluster + AlignedVector bb; + // As bb, but in packed xxxx format + AlignedVector bbPacked; + // The coordinates, pbc shifted, for each atom + AlignedVector x; + // Aligned coordinate list used for 4*DIM*GMX_SIMD_REAL_WIDTH floats + AlignedVector xSimd; + }; + + NbnxnPairlistGpuWork() : + distanceBuffer(c_gpuNumClusterPerCell), + sci_sort({}, {gmx::PinningPolicy::PinnedIfSupported}) + { + } + + // Protect data from cache pollution between threads + gmx_cache_protect_t cp0; + + // Work data for generating an i-entry in the pairlist + ISuperClusterData iSuperClusterData; + // The current j-cluster index for the current list + int cj_ind; + // Bounding box distance work array + AlignedVector distanceBuffer; + + // Buffer for sorting list entries + std::vector sortBuffer; + + // Second sci array, for sorting + gmx::HostVector sci_sort; + + // Protect data from cache pollution between threads + gmx_cache_protect_t cp1; +}; + +#endif