gmx::write_IMDgroup_to_file(ir->bIMD, ir, &state, &sys, NFILE, fnm);
sfree(opts->define);
+ sfree(opts->wall_atomtype[0]);
+ sfree(opts->wall_atomtype[1]);
sfree(opts->include);
sfree(opts);
for (auto& mol : mi)
gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %zu", ir->nwall,
wallAtomTypes.size());
}
+ GMX_RELEASE_ASSERT(ir->nwall < 3, "Invalid number of walls");
for (int i = 0; i < ir->nwall; i++)
{
opts->wall_atomtype[i] = gmx_strdup(wallAtomTypes[i].c_str());
pinningPolicy);
}
- nbat->buffer_flags.flag = nullptr;
- nbat->buffer_flags.flag_nalloc = 0;
+ nbat->buffer_flags.clear();
const int nth = gmx_omp_nthreads_get(emntNonbonded);
static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t* nbat, int nth)
{
- const nbnxn_buffer_flags_t* flags = &nbat->buffer_flags;
+ gmx::ArrayRef<const gmx_bitmask_t> flags = nbat->buffer_flags;
int next_pow2 = 1 << (gmx::log2I(nth - 1) + 1);
}
/* Calculate the cell-block range for our thread */
- b0 = (flags->nflag * group_pos) / group_size;
- b1 = (flags->nflag * (group_pos + 1)) / group_size;
+ b0 = (flags.size() * group_pos) / group_size;
+ b1 = (flags.size() * (group_pos + 1)) / group_size;
for (b = b0; b < b1; b++)
{
i0 = b * NBNXN_BUFFERFLAG_SIZE * nbat->fstride;
i1 = (b + 1) * NBNXN_BUFFERFLAG_SIZE * nbat->fstride;
- if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2)
+ if (bitmask_is_set(flags[b], index[1]) || group_size > 2)
{
const real* fIndex1 = nbat->out[index[1]].f.data();
#if GMX_SIMD
nbnxn_atomdata_reduce_reals
#endif
(nbat->out[index[0]].f.data(),
- bitmask_is_set(flags->flag[b], index[0]) || group_size > 2,
- &fIndex1, 1, i0, i1);
+ bitmask_is_set(flags[b], index[0]) || group_size > 2, &fIndex1,
+ 1, i0, i1);
}
- else if (!bitmask_is_set(flags->flag[b], index[0]))
+ else if (!bitmask_is_set(flags[b], index[0]))
{
nbnxn_atomdata_clear_reals(nbat->out[index[0]].f, i0, i1);
}
{
try
{
- const nbnxn_buffer_flags_t* flags;
- int nfptr;
- const real* fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
+ int nfptr;
+ const real* fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
- flags = &nbat->buffer_flags;
+ gmx::ArrayRef<const gmx_bitmask_t> flags = nbat->buffer_flags;
/* Calculate the cell-block range for our thread */
- int b0 = (flags->nflag * th) / nth;
- int b1 = (flags->nflag * (th + 1)) / nth;
+ int b0 = (flags.size() * th) / nth;
+ int b1 = (flags.size() * (th + 1)) / nth;
for (int b = b0; b < b1; b++)
{
nfptr = 0;
for (gmx::index out = 1; out < gmx::ssize(nbat->out); out++)
{
- if (bitmask_is_set(flags->flag[b], out))
+ if (bitmask_is_set(flags[b], out))
{
fptr[nfptr++] = nbat->out[out].f.data();
}
#else
nbnxn_atomdata_reduce_reals
#endif
- (nbat->out[0].f.data(), bitmask_is_set(flags->flag[b], 0), fptr, nfptr, i0, i1);
+ (nbat->out[0].f.data(), bitmask_is_set(flags[b], 0), fptr, nfptr, i0, i1);
}
- else if (!bitmask_is_set(flags->flag[b], 0))
+ else if (!bitmask_is_set(flags[b], 0))
{
nbnxn_atomdata_clear_reals(nbat->out[0].f, i0, i1);
}
*/
#define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
-/*! \internal
- * \brief Flags for telling if threads write to force output buffers */
-typedef struct
-{
- //! The number of flag blocks
- int nflag;
- //! Bit i is set when thread i writes to a cell-block
- gmx_bitmask_t* flag;
- //! Allocation size of cxy_flag
- int flag_nalloc;
-} nbnxn_buffer_flags_t;
/*! \brief LJ combination rules: geometric, Lorentz-Berthelot, none */
enum
//! Use the flags or operate on all atoms
gmx_bool bUseBufferFlags;
//! Flags for buffer zeroing+reduc.
- nbnxn_buffer_flags_t buffer_flags;
+ std::vector<gmx_bitmask_t> buffer_flags;
//! Use tree for force reduction
gmx_bool bUseTreeReduce;
//! Synchronization step for tree reduce
template<int numComponentsPerElement>
static void clearBufferFlagged(const nbnxn_atomdata_t& nbat, int outputIndex, gmx::ArrayRef<real> buffer)
{
- const nbnxn_buffer_flags_t& flags = nbat.buffer_flags;
- gmx_bitmask_t our_flag;
+ gmx::ArrayRef<const gmx_bitmask_t> flags = nbat.buffer_flags;
+ gmx_bitmask_t our_flag;
bitmask_init_bit(&our_flag, outputIndex);
constexpr size_t numComponentsPerBlock = NBNXN_BUFFERFLAG_SIZE * numComponentsPerElement;
- for (int b = 0; b < flags.nflag; b++)
+ for (size_t b = 0; b < flags.size(); b++)
{
- if (!bitmask_is_disjoint(flags.flag[b], our_flag))
+ if (!bitmask_is_disjoint(flags[b], our_flag))
{
clearBufferAll(buffer.subArray(b * numComponentsPerBlock, numComponentsPerBlock));
}
nl->excl_fep = nullptr;
}
-static void init_buffer_flags(nbnxn_buffer_flags_t* flags, int natoms)
+static constexpr int sizeNeededForBufferFlags(const int numAtoms)
{
- flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1) / NBNXN_BUFFERFLAG_SIZE;
- if (flags->nflag > flags->flag_nalloc)
- {
- flags->flag_nalloc = over_alloc_large(flags->nflag);
- srenew(flags->flag, flags->flag_nalloc);
- }
- for (int b = 0; b < flags->nflag; b++)
- {
- bitmask_clear(&(flags->flag[b]));
- }
+ return (numAtoms + NBNXN_BUFFERFLAG_SIZE - 1) / NBNXN_BUFFERFLAG_SIZE;
}
+// Resets current flags to 0 and adds more flags if needed.
+static void resizeAndZeroBufferFlags(std::vector<gmx_bitmask_t>* flags, const int numAtoms)
+{
+ flags->clear();
+ flags->resize(sizeNeededForBufferFlags(numAtoms), 0);
+}
+
+
/* Returns the pair-list cutoff between a bounding box and a grid cell given an atom-to-atom pair-list cutoff
*
* Given a cutoff distance between atoms, this functions returns the cutoff
gridi_flag_shift = getBufferFlagShift(nbl->na_ci);
gridj_flag_shift = getBufferFlagShift(nbl->na_cj);
- gridj_flag = work->buffer_flags.flag;
+ gridj_flag = work->buffer_flags.data();
}
gridSet.getBox(box);
if (bFBufferFlag && getNumSimpleJClustersInList(*nbl) > ncj_old_i)
{
- bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th);
+ bitmask_init_bit(&(work->buffer_flags[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th);
}
}
static void reduce_buffer_flags(gmx::ArrayRef<PairsearchWork> searchWork,
int nsrc,
- const nbnxn_buffer_flags_t* dest)
+ gmx::ArrayRef<gmx_bitmask_t> dest)
{
for (int s = 0; s < nsrc; s++)
{
- gmx_bitmask_t* flag = searchWork[s].buffer_flags.flag;
+ gmx::ArrayRef<gmx_bitmask_t> flags(searchWork[s].buffer_flags);
- for (int b = 0; b < dest->nflag; b++)
+ for (size_t b = 0; b < dest.size(); b++)
{
- bitmask_union(&(dest->flag[b]), flag[b]);
+ gmx_bitmask_t& flag = dest[b];
+ bitmask_union(&flag, flags[b]);
}
}
}
-static void print_reduction_cost(const nbnxn_buffer_flags_t* flags, int nout)
+static void print_reduction_cost(gmx::ArrayRef<const gmx_bitmask_t> flags, int nout)
{
int nelem, nkeep, ncopy, nred, out;
gmx_bitmask_t mask_0;
ncopy = 0;
nred = 0;
bitmask_init_bit(&mask_0, 0);
- for (int b = 0; b < flags->nflag; b++)
+ for (const gmx_bitmask_t& flag_mask : flags)
{
- if (bitmask_is_equal(flags->flag[b], mask_0))
+ if (bitmask_is_equal(flag_mask, mask_0))
{
/* Only flag 0 is set, no copy of reduction required */
nelem++;
nkeep++;
}
- else if (!bitmask_is_zero(flags->flag[b]))
+ else if (!bitmask_is_zero(flag_mask))
{
int c = 0;
for (out = 0; out < nout; out++)
{
- if (bitmask_is_set(flags->flag[b], out))
+ if (bitmask_is_set(flag_mask, out))
{
c++;
}
}
}
}
-
+ const auto numFlags = static_cast<double>(flags.size());
fprintf(debug,
- "nbnxn reduction: #flag %d #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n",
- flags->nflag, nout, nelem / static_cast<double>(flags->nflag),
- nkeep / static_cast<double>(flags->nflag), ncopy / static_cast<double>(flags->nflag),
- nred / static_cast<double>(flags->nflag));
+ "nbnxn reduction: #flag %lu #list %d elem %4.2f, keep %4.2f copy %4.2f red %4.2f\n",
+ flags.size(), nout, nelem / numFlags, nkeep / numFlags, ncopy / numFlags, nred / numFlags);
}
/* Copies the list entries from src to dest when cjStart <= *cjGlobal < cjEnd.
/* Note that the flags in the work struct (still) contain flags
* for all entries that are present in srcSet->nbl[t].
*/
- gmx_bitmask_t* flag = searchWork[t].buffer_flags.flag;
+ gmx_bitmask_t* flag = &searchWork[t].buffer_flags[0];
int iFlagShift = getBufferFlagShift(dest.na_ci);
int jFlagShift = getBufferFlagShift(dest.na_cj);
/* We should re-init the flags before making the first list */
if (nbat->bUseBufferFlags && locality_ == InteractionLocality::Local)
{
- init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
+ resizeAndZeroBufferFlags(&nbat->buffer_flags, nbat->numAtoms());
}
if (!isCpuType_ && minimumIlistCountForGpuBalancing > 0)
*/
if (nbat->bUseBufferFlags && (iZone == 0 && jZone == 0))
{
- init_buffer_flags(&searchWork[th].buffer_flags, nbat->numAtoms());
+ resizeAndZeroBufferFlags(&searchWork[th].buffer_flags, nbat->numAtoms());
}
if (combineLists_ && th > 0)
if (nbat->bUseBufferFlags)
{
- reduce_buffer_flags(searchWork, numLists, &nbat->buffer_flags);
+ reduce_buffer_flags(searchWork, numLists, nbat->buffer_flags);
}
if (gridSet.haveFep())
if (nbat->bUseBufferFlags)
{
- print_reduction_cost(&nbat->buffer_flags, numLists);
+ print_reduction_cost(nbat->buffer_flags, numLists);
}
}
#ifndef DOXYGEN
-PairsearchWork::PairsearchWork() :
- cp0({ { 0 } }),
- buffer_flags({ 0, nullptr, 0 }),
- ndistc(0),
- nbl_fep(new t_nblist),
- cp1({ { 0 } })
+PairsearchWork::PairsearchWork() : cp0({ { 0 } }), ndistc(0), nbl_fep(new t_nblist), cp1({ { 0 } })
{
nbnxn_init_pairlist_fep(nbl_fep.get());
}
PairsearchWork::~PairsearchWork()
{
- sfree(buffer_flags.flag);
-
free_nblist(nbl_fep.get());
}
std::vector<int> sortBuffer;
//! Flags for force buffer access
- nbnxn_buffer_flags_t buffer_flags;
+ std::vector<gmx_bitmask_t> buffer_flags;
//! Number of distance checks for flop counting
int ndistc;