From a7e51062b4d29061a82b74de3935a627cdce35c9 Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Mon, 8 Jun 2020 07:46:32 +0000 Subject: [PATCH] Replace nbnxn_buffer_flags_t with vector Plugs a memory leak --- src/gromacs/gmxpreprocess/grompp.cpp | 2 + src/gromacs/gmxpreprocess/readir.cpp | 1 + src/gromacs/nbnxm/atomdata.cpp | 34 +++++++-------- src/gromacs/nbnxm/atomdata.h | 13 +----- src/gromacs/nbnxm/kernel_common.cpp | 8 ++-- src/gromacs/nbnxm/pairlist.cpp | 62 ++++++++++++++-------------- src/gromacs/nbnxm/pairsearch.cpp | 9 +--- src/gromacs/nbnxm/pairsearch.h | 2 +- 8 files changed, 56 insertions(+), 75 deletions(-) diff --git a/src/gromacs/gmxpreprocess/grompp.cpp b/src/gromacs/gmxpreprocess/grompp.cpp index 84c5d99328..960e3e196b 100644 --- a/src/gromacs/gmxpreprocess/grompp.cpp +++ b/src/gromacs/gmxpreprocess/grompp.cpp @@ -2384,6 +2384,8 @@ int gmx_grompp(int argc, char* argv[]) gmx::write_IMDgroup_to_file(ir->bIMD, ir, &state, &sys, NFILE, fnm); sfree(opts->define); + sfree(opts->wall_atomtype[0]); + sfree(opts->wall_atomtype[1]); sfree(opts->include); sfree(opts); for (auto& mol : mi) diff --git a/src/gromacs/gmxpreprocess/readir.cpp b/src/gromacs/gmxpreprocess/readir.cpp index 489fd216ab..2e750d20b0 100644 --- a/src/gromacs/gmxpreprocess/readir.cpp +++ b/src/gromacs/gmxpreprocess/readir.cpp @@ -1675,6 +1675,7 @@ static void do_wall_params(t_inputrec* ir, char* wall_atomtype, char* wall_densi gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %zu", ir->nwall, wallAtomTypes.size()); } + GMX_RELEASE_ASSERT(ir->nwall < 3, "Invalid number of walls"); for (int i = 0; i < ir->nwall; i++) { opts->wall_atomtype[i] = gmx_strdup(wallAtomTypes[i].c_str()); diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index 35901f40f1..12003eb236 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -675,8 +675,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger& mdlog, pinningPolicy); } - nbat->buffer_flags.flag = nullptr; - nbat->buffer_flags.flag_nalloc = 0; + nbat->buffer_flags.clear(); const int nth = gmx_omp_nthreads_get(emntNonbonded); @@ -1236,7 +1235,7 @@ static inline unsigned char reverse_bits(unsigned char b) static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t* nbat, int nth) { - const nbnxn_buffer_flags_t* flags = &nbat->buffer_flags; + gmx::ArrayRef flags = nbat->buffer_flags; int next_pow2 = 1 << (gmx::log2I(nth - 1) + 1); @@ -1340,15 +1339,15 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t* nbat, in } /* Calculate the cell-block range for our thread */ - b0 = (flags->nflag * group_pos) / group_size; - b1 = (flags->nflag * (group_pos + 1)) / group_size; + b0 = (flags.size() * group_pos) / group_size; + b1 = (flags.size() * (group_pos + 1)) / group_size; for (b = b0; b < b1; b++) { i0 = b * NBNXN_BUFFERFLAG_SIZE * nbat->fstride; i1 = (b + 1) * NBNXN_BUFFERFLAG_SIZE * nbat->fstride; - if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2) + if (bitmask_is_set(flags[b], index[1]) || group_size > 2) { const real* fIndex1 = nbat->out[index[1]].f.data(); #if GMX_SIMD @@ -1357,10 +1356,10 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t* nbat, in nbnxn_atomdata_reduce_reals #endif (nbat->out[index[0]].f.data(), - bitmask_is_set(flags->flag[b], index[0]) || group_size > 2, - &fIndex1, 1, i0, i1); + bitmask_is_set(flags[b], index[0]) || group_size > 2, &fIndex1, + 1, i0, i1); } - else if (!bitmask_is_set(flags->flag[b], index[0])) + else if (!bitmask_is_set(flags[b], index[0])) { nbnxn_atomdata_clear_reals(nbat->out[index[0]].f, i0, i1); } @@ -1380,15 +1379,14 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t* nbat, int { try { - const nbnxn_buffer_flags_t* flags; - int nfptr; - const real* fptr[NBNXN_BUFFERFLAG_MAX_THREADS]; + int nfptr; + const real* fptr[NBNXN_BUFFERFLAG_MAX_THREADS]; - flags = &nbat->buffer_flags; + gmx::ArrayRef flags = nbat->buffer_flags; /* Calculate the cell-block range for our thread */ - int b0 = (flags->nflag * th) / nth; - int b1 = (flags->nflag * (th + 1)) / nth; + int b0 = (flags.size() * th) / nth; + int b1 = (flags.size() * (th + 1)) / nth; for (int b = b0; b < b1; b++) { @@ -1398,7 +1396,7 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t* nbat, int nfptr = 0; for (gmx::index out = 1; out < gmx::ssize(nbat->out); out++) { - if (bitmask_is_set(flags->flag[b], out)) + if (bitmask_is_set(flags[b], out)) { fptr[nfptr++] = nbat->out[out].f.data(); } @@ -1410,9 +1408,9 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t* nbat, int #else nbnxn_atomdata_reduce_reals #endif - (nbat->out[0].f.data(), bitmask_is_set(flags->flag[b], 0), fptr, nfptr, i0, i1); + (nbat->out[0].f.data(), bitmask_is_set(flags[b], 0), fptr, nfptr, i0, i1); } - else if (!bitmask_is_set(flags->flag[b], 0)) + else if (!bitmask_is_set(flags[b], 0)) { nbnxn_atomdata_clear_reals(nbat->out[0].f, i0, i1); } diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index 4b3a398562..007e2268ef 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -154,17 +154,6 @@ struct nbnxn_atomdata_output_t */ #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE) -/*! \internal - * \brief Flags for telling if threads write to force output buffers */ -typedef struct -{ - //! The number of flag blocks - int nflag; - //! Bit i is set when thread i writes to a cell-block - gmx_bitmask_t* flag; - //! Allocation size of cxy_flag - int flag_nalloc; -} nbnxn_buffer_flags_t; /*! \brief LJ combination rules: geometric, Lorentz-Berthelot, none */ enum @@ -300,7 +289,7 @@ public: //! Use the flags or operate on all atoms gmx_bool bUseBufferFlags; //! Flags for buffer zeroing+reduc. - nbnxn_buffer_flags_t buffer_flags; + std::vector buffer_flags; //! Use tree for force reduction gmx_bool bUseTreeReduce; //! Synchronization step for tree reduce diff --git a/src/gromacs/nbnxm/kernel_common.cpp b/src/gromacs/nbnxm/kernel_common.cpp index 9111a0b101..ac176fa6cd 100644 --- a/src/gromacs/nbnxm/kernel_common.cpp +++ b/src/gromacs/nbnxm/kernel_common.cpp @@ -67,15 +67,15 @@ static void clearBufferAll(gmx::ArrayRef buffer) template static void clearBufferFlagged(const nbnxn_atomdata_t& nbat, int outputIndex, gmx::ArrayRef buffer) { - const nbnxn_buffer_flags_t& flags = nbat.buffer_flags; - gmx_bitmask_t our_flag; + gmx::ArrayRef flags = nbat.buffer_flags; + gmx_bitmask_t our_flag; bitmask_init_bit(&our_flag, outputIndex); constexpr size_t numComponentsPerBlock = NBNXN_BUFFERFLAG_SIZE * numComponentsPerElement; - for (int b = 0; b < flags.nflag; b++) + for (size_t b = 0; b < flags.size(); b++) { - if (!bitmask_is_disjoint(flags.flag[b], our_flag)) + if (!bitmask_is_disjoint(flags[b], our_flag)) { clearBufferAll(buffer.subArray(b * numComponentsPerBlock, numComponentsPerBlock)); } diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp index 66ec8689d9..4365b02a08 100644 --- a/src/gromacs/nbnxm/pairlist.cpp +++ b/src/gromacs/nbnxm/pairlist.cpp @@ -239,20 +239,19 @@ void nbnxn_init_pairlist_fep(t_nblist* nl) nl->excl_fep = nullptr; } -static void init_buffer_flags(nbnxn_buffer_flags_t* flags, int natoms) +static constexpr int sizeNeededForBufferFlags(const int numAtoms) { - flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1) / NBNXN_BUFFERFLAG_SIZE; - if (flags->nflag > flags->flag_nalloc) - { - flags->flag_nalloc = over_alloc_large(flags->nflag); - srenew(flags->flag, flags->flag_nalloc); - } - for (int b = 0; b < flags->nflag; b++) - { - bitmask_clear(&(flags->flag[b])); - } + return (numAtoms + NBNXN_BUFFERFLAG_SIZE - 1) / NBNXN_BUFFERFLAG_SIZE; } +// Resets current flags to 0 and adds more flags if needed. +static void resizeAndZeroBufferFlags(std::vector* flags, const int numAtoms) +{ + flags->clear(); + flags->resize(sizeNeededForBufferFlags(numAtoms), 0); +} + + /* Returns the pair-list cutoff between a bounding box and a grid cell given an atom-to-atom pair-list cutoff * * Given a cutoff distance between atoms, this functions returns the cutoff @@ -3126,7 +3125,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet& gridSet, gridi_flag_shift = getBufferFlagShift(nbl->na_ci); gridj_flag_shift = getBufferFlagShift(nbl->na_cj); - gridj_flag = work->buffer_flags.flag; + gridj_flag = work->buffer_flags.data(); } gridSet.getBox(box); @@ -3560,7 +3559,7 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet& gridSet, if (bFBufferFlag && getNumSimpleJClustersInList(*nbl) > ncj_old_i) { - bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th); + bitmask_init_bit(&(work->buffer_flags[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th); } } @@ -3583,20 +3582,21 @@ static void nbnxn_make_pairlist_part(const Nbnxm::GridSet& gridSet, static void reduce_buffer_flags(gmx::ArrayRef searchWork, int nsrc, - const nbnxn_buffer_flags_t* dest) + gmx::ArrayRef dest) { for (int s = 0; s < nsrc; s++) { - gmx_bitmask_t* flag = searchWork[s].buffer_flags.flag; + gmx::ArrayRef flags(searchWork[s].buffer_flags); - for (int b = 0; b < dest->nflag; b++) + for (size_t b = 0; b < dest.size(); b++) { - bitmask_union(&(dest->flag[b]), flag[b]); + gmx_bitmask_t& flag = dest[b]; + bitmask_union(&flag, flags[b]); } } } -static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout) +static void print_reduction_cost(gmx::ArrayRef flags, int nout) { int nelem, nkeep, ncopy, nred, out; gmx_bitmask_t mask_0; @@ -3606,20 +3606,20 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout) ncopy = 0; nred = 0; bitmask_init_bit(&mask_0, 0); - for (int b = 0; b < flags->nflag; b++) + for (const gmx_bitmask_t& flag_mask : flags) { - if (bitmask_is_equal(flags->flag[b], mask_0)) + if (bitmask_is_equal(flag_mask, mask_0)) { /* Only flag 0 is set, no copy of reduction required */ nelem++; nkeep++; } - else if (!bitmask_is_zero(flags->flag[b])) + else if (!bitmask_is_zero(flag_mask)) { int c = 0; for (out = 0; out < nout; out++) { - if (bitmask_is_set(flags->flag[b], out)) + if (bitmask_is_set(flag_mask, out)) { c++; } @@ -3635,12 +3635,10 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout) } } } - + const auto numFlags = static_cast(flags.size()); fprintf(debug, - "nbnxn reduction: #flag %d #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n", - flags->nflag, nout, nelem / static_cast(flags->nflag), - nkeep / static_cast(flags->nflag), ncopy / static_cast(flags->nflag), - nred / static_cast(flags->nflag)); + "nbnxn reduction: #flag %lu #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n", + flags.size(), nout, nelem / numFlags, nkeep / numFlags, ncopy / numFlags, nred / numFlags); } /* Copies the list entries from src to dest when cjStart <= *cjGlobal < cjEnd. @@ -3740,7 +3738,7 @@ static void rebalanceSimpleLists(gmx::ArrayRef srcSet, /* Note that the flags in the work struct (still) contain flags * for all entries that are present in srcSet->nbl[t]. */ - gmx_bitmask_t* flag = searchWork[t].buffer_flags.flag; + gmx_bitmask_t* flag = &searchWork[t].buffer_flags[0]; int iFlagShift = getBufferFlagShift(dest.na_ci); int jFlagShift = getBufferFlagShift(dest.na_cj); @@ -3946,7 +3944,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet& gridSet, /* We should re-init the flags before making the first list */ if (nbat->bUseBufferFlags && locality_ == InteractionLocality::Local) { - init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms()); + resizeAndZeroBufferFlags(&nbat->buffer_flags, nbat->numAtoms()); } if (!isCpuType_ && minimumIlistCountForGpuBalancing > 0) @@ -4016,7 +4014,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet& gridSet, */ if (nbat->bUseBufferFlags && (iZone == 0 && jZone == 0)) { - init_buffer_flags(&searchWork[th].buffer_flags, nbat->numAtoms()); + resizeAndZeroBufferFlags(&searchWork[th].buffer_flags, nbat->numAtoms()); } if (combineLists_ && th > 0) @@ -4133,7 +4131,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet& gridSet, if (nbat->bUseBufferFlags) { - reduce_buffer_flags(searchWork, numLists, &nbat->buffer_flags); + reduce_buffer_flags(searchWork, numLists, nbat->buffer_flags); } if (gridSet.haveFep()) @@ -4187,7 +4185,7 @@ void PairlistSet::constructPairlists(const Nbnxm::GridSet& gridSet, if (nbat->bUseBufferFlags) { - print_reduction_cost(&nbat->buffer_flags, numLists); + print_reduction_cost(nbat->buffer_flags, numLists); } } diff --git a/src/gromacs/nbnxm/pairsearch.cpp b/src/gromacs/nbnxm/pairsearch.cpp index 34f064b2cc..d8ffbcb5ae 100644 --- a/src/gromacs/nbnxm/pairsearch.cpp +++ b/src/gromacs/nbnxm/pairsearch.cpp @@ -84,12 +84,7 @@ static void free_nblist(t_nblist* nl) #ifndef DOXYGEN -PairsearchWork::PairsearchWork() : - cp0({ { 0 } }), - buffer_flags({ 0, nullptr, 0 }), - ndistc(0), - nbl_fep(new t_nblist), - cp1({ { 0 } }) +PairsearchWork::PairsearchWork() : cp0({ { 0 } }), ndistc(0), nbl_fep(new t_nblist), cp1({ { 0 } }) { nbnxn_init_pairlist_fep(nbl_fep.get()); } @@ -98,8 +93,6 @@ PairsearchWork::PairsearchWork() : PairsearchWork::~PairsearchWork() { - sfree(buffer_flags.flag); - free_nblist(nbl_fep.get()); } diff --git a/src/gromacs/nbnxm/pairsearch.h b/src/gromacs/nbnxm/pairsearch.h index 66d1ea39c9..bd5529dbd5 100644 --- a/src/gromacs/nbnxm/pairsearch.h +++ b/src/gromacs/nbnxm/pairsearch.h @@ -158,7 +158,7 @@ struct PairsearchWork std::vector sortBuffer; //! Flags for force buffer access - nbnxn_buffer_flags_t buffer_flags; + std::vector buffer_flags; //! Number of distance checks for flop counting int ndistc; -- 2.22.0