*ptr = ptr_new;
}
-/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
-void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n)
+void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms)
{
- GMX_ASSERT(nbat->nalloc >= nbat->natoms, "We should have at least as many elelements allocated as there are set");
-
- int t;
-
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->type),
- nbat->natoms*sizeof(*nbat->type),
- n*sizeof(*nbat->type),
- nbat->alloc, nbat->free);
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->lj_comb),
- nbat->natoms*2*sizeof(*nbat->lj_comb),
- n*2*sizeof(*nbat->lj_comb),
- nbat->alloc, nbat->free);
- if (nbat->XFormat != nbatXYZQ)
- {
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->q),
- nbat->natoms*sizeof(*nbat->q),
- n*sizeof(*nbat->q),
- nbat->alloc, nbat->free);
- }
- if (nbat->nenergrp > 1)
- {
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->energrp),
- nbat->natoms/nbat->na_c*sizeof(*nbat->energrp),
- n/nbat->na_c*sizeof(*nbat->energrp),
- nbat->alloc, nbat->free);
- }
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->x),
- nbat->natoms*nbat->xstride*sizeof(*nbat->x),
- n*nbat->xstride*sizeof(*nbat->x),
- nbat->alloc, nbat->free);
- for (t = 0; t < nbat->nout; t++)
+ numAtoms_ = numAtoms;
+
+ x_.resize(numAtoms*xstride);
+}
+
+void nbnxn_atomdata_t::resizeForceBuffers()
+{
+ /* Force buffers need padding up to a multiple of the buffer flag size */
+ const int paddedSize = (numAtoms() + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE*NBNXN_BUFFERFLAG_SIZE;
+
+ /* Should we let each thread allocate it's own data instead? */
+ for (nbnxn_atomdata_output_t &outBuffer : out)
{
- /* Allocate one element extra for possible signaling with GPUs */
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->out[t].f),
- nbat->natoms*nbat->fstride*sizeof(*nbat->out[t].f),
- n*nbat->fstride*sizeof(*nbat->out[t].f),
- nbat->alloc, nbat->free);
+ outBuffer.f.resize(paddedSize*fstride);
}
- nbat->nalloc = n;
}
/* Initializes an nbnxn_atomdata_output_t data structure */
-static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
- int nb_kernel_type,
- int nenergrp, int stride,
- nbnxn_alloc_t *ma)
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int nb_kernel_type,
+ int numEnergyGroups,
+ int simdEnergyBufferStride,
+ gmx::PinningPolicy pinningPolicy) :
+ f({}, {pinningPolicy}),
+ fshift({}, {pinningPolicy}),
+ Vvdw({}, {pinningPolicy}),
+ Vc({}, {pinningPolicy})
{
- out->f = nullptr;
- ma(reinterpret_cast<void **>(&out->fshift), SHIFTS*DIM*sizeof(*out->fshift));
- out->nV = nenergrp*nenergrp;
- ma(reinterpret_cast<void **>(&out->Vvdw), out->nV*sizeof(*out->Vvdw));
- ma(reinterpret_cast<void **>(&out->Vc), out->nV*sizeof(*out->Vc ));
+ fshift.resize(SHIFTS*DIM);
+ Vvdw.resize(numEnergyGroups*numEnergyGroups);
+ Vc.resize(numEnergyGroups*numEnergyGroups);
if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
{
- int cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
- out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size;
- ma(reinterpret_cast<void **>(&out->VSvdw), out->nVS*sizeof(*out->VSvdw));
- ma(reinterpret_cast<void **>(&out->VSc), out->nVS*sizeof(*out->VSc ));
- }
- else
- {
- out->nVS = 0;
+ int cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+ int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
+ VSvdw.resize(numElements);
+ VSc.resize(numElements);
}
}
}
/* Stores the LJ parameter data in a format convenient for different kernels */
-static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
+static void set_lj_parameter_data(nbnxn_atomdata_t::Params *params, gmx_bool bSIMD)
{
- real c6, c12;
-
- int nt = nbat->ntype;
+ int nt = params->numTypes;
if (bSIMD)
{
* when it might not be used, but introducing the conditional code is not
* really worth it.
*/
- nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_aligned),
- nt*nt*c_simdBestPairAlignment*sizeof(*nbat->nbfp_aligned));
+ params->nbfp_aligned.resize(nt*nt*c_simdBestPairAlignment);
+
for (int i = 0; i < nt; i++)
{
for (int j = 0; j < nt; j++)
{
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = nbat->nbfp[(i*nt+j)*2+0];
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = nbat->nbfp[(i*nt+j)*2+1];
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = params->nbfp[(i*nt+j)*2+0];
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = params->nbfp[(i*nt+j)*2+1];
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
}
}
#endif
* and with LJ-PME kernels. We then only need parameters per atom type,
* not per pair of atom types.
*/
- switch (nbat->comb_rule)
+ params->nbfp_comb.resize(nt*2);
+ switch (params->comb_rule)
{
case ljcrGEOM:
- nbat->comb_rule = ljcrGEOM;
+ params->comb_rule = ljcrGEOM;
for (int i = 0; i < nt; i++)
{
/* Store the sqrt of the diagonal from the nbfp matrix */
- nbat->nbfp_comb[i*2 ] = std::sqrt(nbat->nbfp[(i*nt+i)*2 ]);
- nbat->nbfp_comb[i*2+1] = std::sqrt(nbat->nbfp[(i*nt+i)*2+1]);
+ params->nbfp_comb[i*2 ] = std::sqrt(params->nbfp[(i*nt+i)*2 ]);
+ params->nbfp_comb[i*2+1] = std::sqrt(params->nbfp[(i*nt+i)*2+1]);
}
break;
case ljcrLB:
for (int i = 0; i < nt; i++)
{
/* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */
- c6 = nbat->nbfp[(i*nt+i)*2 ];
- c12 = nbat->nbfp[(i*nt+i)*2+1];
+ const real c6 = params->nbfp[(i*nt+i)*2 ];
+ const real c12 = params->nbfp[(i*nt+i)*2+1];
if (c6 > 0 && c12 > 0)
{
/* We store 0.5*2^1/6*sigma and sqrt(4*3*eps),
* so we get 6*C6 and 12*C12 after combining.
*/
- nbat->nbfp_comb[i*2 ] = 0.5*gmx::sixthroot(c12/c6);
- nbat->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
+ params->nbfp_comb[i*2 ] = 0.5*gmx::sixthroot(c12/c6);
+ params->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
}
else
{
- nbat->nbfp_comb[i*2 ] = 0;
- nbat->nbfp_comb[i*2+1] = 0;
+ params->nbfp_comb[i*2 ] = 0;
+ params->nbfp_comb[i*2+1] = 0;
}
}
break;
}
}
-#if GMX_SIMD
-static void
-nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
+nbnxn_atomdata_t::SimdMasks::SimdMasks()
{
- const int simd_width = GMX_SIMD_REAL_WIDTH;
- int simd_excl_size;
+#if GMX_SIMD
+ constexpr int simd_width = GMX_SIMD_REAL_WIDTH;
/* Set the diagonal cluster pair exclusion mask setup data.
* In the kernel we check 0 < j - i to generate the masks.
* Here we store j - i for generating the mask for the first i,
* we substract 0.5 to avoid rounding issues.
* In the kernel we can subtract 1 to generate the subsequent mask.
*/
- int simd_4xn_diag_size;
-
- simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
- snew_aligned(nbat->simd_4xn_diagonal_j_minus_i, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
+ const int simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
+ diagonal_4xn_j_minus_i.resize(simd_4xn_diag_size);
for (int j = 0; j < simd_4xn_diag_size; j++)
{
- nbat->simd_4xn_diagonal_j_minus_i[j] = j - 0.5;
+ diagonal_4xn_j_minus_i[j] = j - 0.5;
}
- snew_aligned(nbat->simd_2xnn_diagonal_j_minus_i, simd_width, NBNXN_MEM_ALIGN);
+ diagonal_2xnn_j_minus_i.resize(simd_width);
for (int j = 0; j < simd_width/2; j++)
{
/* The j-cluster size is half the SIMD width */
- nbat->simd_2xnn_diagonal_j_minus_i[j] = j - 0.5;
+ diagonal_2xnn_j_minus_i[j] = j - 0.5;
/* The next half of the SIMD width is for i + 1 */
- nbat->simd_2xnn_diagonal_j_minus_i[simd_width/2+j] = j - 1 - 0.5;
+ diagonal_2xnn_j_minus_i[simd_width/2 + j] = j - 1 - 0.5;
}
/* We use up to 32 bits for exclusion masking.
* In single precision this means the real and integer SIMD registers
* are of equal size.
*/
- simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
+ const int simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- snew_aligned(nbat->simd_exclusion_filter64, simd_excl_size, NBNXN_MEM_ALIGN);
+ exclusion_filter64.resize(simd_excl_size);
#else
- snew_aligned(nbat->simd_exclusion_filter, simd_excl_size, NBNXN_MEM_ALIGN);
+ exclusion_filter.resize(simd_excl_size);
#endif
for (int j = 0; j < simd_excl_size; j++)
{
/* Set the consecutive bits for masking pair exclusions */
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- nbat->simd_exclusion_filter64[j] = (1U << j);
+ exclusion_filter64[j] = (1U << j);
#else
- nbat->simd_exclusion_filter[j] = (1U << j);
+ exclusion_filter[j] = (1U << j);
#endif
}
-#if !GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL
- // If the SIMD implementation has no bitwise logical operation support
- // whatsoever we cannot use the normal masking. Instead,
- // we generate a vector of all 2^4 possible ways an i atom
- // interacts with its 4 j atoms. Each array entry contains
- // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
- // Since there is no logical value representation in this case, we use
- // any nonzero value to indicate 'true', while zero mean 'false'.
- // This can then be converted to a SIMD boolean internally in the SIMD
- // module by comparing to zero.
- // Each array entry encodes how this i atom will interact with the 4 j atoms.
- // Matching code exists in set_ci_top_excls() to generate indices into this array.
- // Those indices are used in the kernels.
-
- simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
- const real simdFalse = 0.0;
- const real simdTrue = 1.0;
- real *simd_interaction_array;
-
- snew_aligned(simd_interaction_array, simd_excl_size * GMX_SIMD_REAL_WIDTH, NBNXN_MEM_ALIGN);
- for (int j = 0; j < simd_excl_size; j++)
+ if (!GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL)
{
- int index = j * GMX_SIMD_REAL_WIDTH;
- for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+ // If the SIMD implementation has no bitwise logical operation support
+ // whatsoever we cannot use the normal masking. Instead,
+ // we generate a vector of all 2^4 possible ways an i atom
+ // interacts with its 4 j atoms. Each array entry contains
+ // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
+ // Since there is no logical value representation in this case, we use
+ // any nonzero value to indicate 'true', while zero mean 'false'.
+ // This can then be converted to a SIMD boolean internally in the SIMD
+ // module by comparing to zero.
+ // Each array entry encodes how this i atom will interact with the 4 j atoms.
+ // Matching code exists in set_ci_top_excls() to generate indices into this array.
+ // Those indices are used in the kernels.
+
+ const int simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
+ const real simdFalse = 0.0;
+ const real simdTrue = 1.0;
+
+ interaction_array.resize(simd_excl_size * GMX_SIMD_REAL_WIDTH);
+ for (int j = 0; j < simd_excl_size; j++)
{
- simd_interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+ const int index = j * GMX_SIMD_REAL_WIDTH;
+ for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+ {
+ interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+ }
}
}
- nbat->simd_interaction_array = simd_interaction_array;
#endif
}
-#endif
-/* Initializes the nbnxn_atomdata_t parameters data structure */
+nbnxn_atomdata_t::Params::Params(gmx::PinningPolicy pinningPolicy) :
+ numTypes(0),
+ nbfp({}, {pinningPolicy}),
+ nbfp_comb({}, {pinningPolicy}),
+ type({}, {pinningPolicy}),
+ lj_comb({}, {pinningPolicy}),
+ q({}, {pinningPolicy}),
+ nenergrp(0),
+ neg_2log(0),
+ energrp({}, {pinningPolicy})
+{
+}
+
+nbnxn_atomdata_t::nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy) :
+ params_(pinningPolicy),
+ numAtoms_(0),
+ natoms_local(0),
+ shift_vec({}, {pinningPolicy}),
+ x_({}, {pinningPolicy}),
+ simdMasks(),
+ bUseBufferFlags(FALSE),
+ bUseTreeReduce(FALSE)
+{
+}
+
+/* Initializes an nbnxn_atomdata_t::Params data structure */
static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
- nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t::Params *params,
int nb_kernel_type,
int enbnxninitcombrule,
int ntype, const real *nbfp,
- int n_energygroups,
- nbnxn_alloc_t *alloc,
- nbnxn_free_t *free)
+ int n_energygroups)
{
real c6, c12, tol;
char *ptr;
gmx_bool simple, bCombGeom, bCombLB, bSIMD;
- if (alloc == nullptr)
- {
- nbat->alloc = nbnxn_alloc_aligned;
- }
- else
- {
- nbat->alloc = alloc;
- }
- if (free == nullptr)
- {
- nbat->free = nbnxn_free_aligned;
- }
- else
- {
- nbat->free = free;
- }
-
if (debug)
{
fprintf(debug, "There are %d atom types in the system, adding one for nbnxn_atomdata_t\n", ntype);
}
- nbat->ntype = ntype + 1;
- nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp),
- nbat->ntype*nbat->ntype*2*sizeof(*nbat->nbfp));
- nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_comb), nbat->ntype*2*sizeof(*nbat->nbfp_comb));
+ params->numTypes = ntype + 1;
+ params->nbfp.resize(params->numTypes*params->numTypes*2);
+ params->nbfp_comb.resize(params->numTypes*2);
/* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
* force-field floating point parameters.
bCombGeom = TRUE;
bCombLB = TRUE;
- /* Temporarily fill nbat->nbfp_comb with sigma and epsilon
+ /* Temporarily fill params->nbfp_comb with sigma and epsilon
* to check for the LB rule.
*/
for (int i = 0; i < ntype; i++)
{
- c6 = nbfp[(i*ntype+i)*2 ]/6.0;
- c12 = nbfp[(i*ntype+i)*2+1]/12.0;
+ c6 = nbfp[(i*ntype+i)*2 ]/6.0;
+ c12 = nbfp[(i*ntype+i)*2 + 1]/12.0;
if (c6 > 0 && c12 > 0)
{
- nbat->nbfp_comb[i*2 ] = gmx::sixthroot(c12/c6);
- nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12;
+ params->nbfp_comb[i*2 ] = gmx::sixthroot(c12/c6);
+ params->nbfp_comb[i*2 + 1] = 0.25*c6*c6/c12;
}
else if (c6 == 0 && c12 == 0)
{
- nbat->nbfp_comb[i*2 ] = 0;
- nbat->nbfp_comb[i*2+1] = 0;
+ params->nbfp_comb[i*2 ] = 0;
+ params->nbfp_comb[i*2 + 1] = 0;
}
else
{
}
}
- for (int i = 0; i < nbat->ntype; i++)
+ for (int i = 0; i < params->numTypes; i++)
{
- for (int j = 0; j < nbat->ntype; j++)
+ for (int j = 0; j < params->numTypes; j++)
{
if (i < ntype && j < ntype)
{
/* fr->nbfp has been updated, so that array too now stores c6/c12 including
* the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
*/
- c6 = nbfp[(i*ntype+j)*2 ];
- c12 = nbfp[(i*ntype+j)*2+1];
- nbat->nbfp[(i*nbat->ntype+j)*2 ] = c6;
- nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
+ c6 = nbfp[(i*ntype+j)*2 ];
+ c12 = nbfp[(i*ntype+j)*2 + 1];
+ params->nbfp[(i*params->numTypes+j)*2 ] = c6;
+ params->nbfp[(i*params->numTypes+j)*2 + 1] = c12;
/* Compare 6*C6 and 12*C12 for geometric cobination rule */
bCombGeom = bCombGeom &&
gmx_within_tol(c6*c6, nbfp[(i*ntype+i)*2 ]*nbfp[(j*ntype+j)*2 ], tol) &&
- gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1], tol);
+ gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2 + 1]*nbfp[(j*ntype+j)*2 + 1], tol);
/* Compare C6 and C12 for Lorentz-Berthelot combination rule */
c6 /= 6.0;
c12 /= 12.0;
bCombLB = bCombLB &&
((c6 == 0 && c12 == 0 &&
- (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) ||
+ (params->nbfp_comb[i*2 + 1] == 0 || params->nbfp_comb[j*2 + 1] == 0)) ||
(c6 > 0 && c12 > 0 &&
gmx_within_tol(gmx::sixthroot(c12/c6),
- 0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
- gmx_within_tol(0.25*c6*c6/c12, std::sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
+ 0.5*(params->nbfp_comb[i*2]+params->nbfp_comb[j*2]), tol) &&
+ gmx_within_tol(0.25*c6*c6/c12, std::sqrt(params->nbfp_comb[i*2 + 1]*params->nbfp_comb[j*2 + 1]), tol)));
}
else
{
/* Add zero parameters for the additional dummy atom type */
- nbat->nbfp[(i*nbat->ntype+j)*2 ] = 0;
- nbat->nbfp[(i*nbat->ntype+j)*2+1] = 0;
+ params->nbfp[(i*params->numTypes + j)*2 ] = 0;
+ params->nbfp[(i*params->numTypes + j)*2+1] = 0;
}
}
}
*/
if (bCombGeom)
{
- nbat->comb_rule = ljcrGEOM;
+ params->comb_rule = ljcrGEOM;
}
else if (bCombLB)
{
- nbat->comb_rule = ljcrLB;
+ params->comb_rule = ljcrLB;
}
else
{
- nbat->comb_rule = ljcrNONE;
+ params->comb_rule = ljcrNONE;
- nbat->free(nbat->nbfp_comb);
+ params->nbfp_comb.clear();
}
{
std::string mesg;
- if (nbat->comb_rule == ljcrNONE)
+ if (params->comb_rule == ljcrNONE)
{
mesg = "Using full Lennard-Jones parameter combination matrix";
}
else
{
mesg = gmx::formatString("Using %s Lennard-Jones combination rule",
- nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+ params->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
}
GMX_LOG(mdlog.info).asParagraph().appendText(mesg);
}
break;
case enbnxninitcombruleGEOM:
- nbat->comb_rule = ljcrGEOM;
+ params->comb_rule = ljcrGEOM;
break;
case enbnxninitcombruleLB:
- nbat->comb_rule = ljcrLB;
+ params->comb_rule = ljcrLB;
break;
case enbnxninitcombruleNONE:
- nbat->comb_rule = ljcrNONE;
+ params->comb_rule = ljcrNONE;
- nbat->free(nbat->nbfp_comb);
+ params->nbfp_comb.clear();
break;
default:
gmx_incons("Unknown enbnxninitcombrule");
bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
- set_lj_parameter_data(nbat, bSIMD);
+ set_lj_parameter_data(params, bSIMD);
- nbat->nenergrp = n_energygroups;
+ params->nenergrp = n_energygroups;
if (!simple)
{
// We now check for energy groups already when starting mdrun
GMX_RELEASE_ASSERT(n_energygroups == 1, "GPU kernels do not support energy groups");
}
/* Temporary storage goes as #grp^3*simd_width^2/2, so limit to 64 */
- if (nbat->nenergrp > 64)
+ if (params->nenergrp > 64)
{
gmx_fatal(FARGS, "With NxN kernels not more than 64 energy groups are supported\n");
}
- nbat->neg_2log = 1;
- while (nbat->nenergrp > (1<<nbat->neg_2log))
+ params->neg_2log = 1;
+ while (params->nenergrp > (1<<params->neg_2log))
{
- nbat->neg_2log++;
+ params->neg_2log++;
}
}
int enbnxninitcombrule,
int ntype, const real *nbfp,
int n_energygroups,
- int nout,
- nbnxn_alloc_t *alloc,
- nbnxn_free_t *free)
+ int nout)
{
- nbnxn_atomdata_params_init(mdlog, nbat, nb_kernel_type,
- enbnxninitcombrule, ntype, nbfp, n_energygroups,
- alloc, free);
+ nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+ enbnxninitcombrule, ntype, nbfp, n_energygroups);
const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
const gmx_bool bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
- set_lj_parameter_data(nbat, bSIMD);
-
if (simple)
{
int pack_x;
nbat->FFormat = nbatXYZ;
}
- nbat->alloc(reinterpret_cast<void **>(&nbat->shift_vec), SHIFTS*sizeof(*nbat->shift_vec));
+ nbat->shift_vec.resize(SHIFTS);
nbat->xstride = (nbat->XFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
nbat->fstride = (nbat->FFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
- nbat->x = nullptr;
-
-#if GMX_SIMD
- if (simple)
- {
- nbnxn_atomdata_init_simple_exclusion_masks(nbat);
- }
-#endif
/* Initialize the output data structures */
- nbat->nout = nout;
- snew(nbat->out, nbat->nout);
- nbat->nalloc = 0;
- for (int i = 0; i < nbat->nout; i++)
+ for (int i = 0; i < nout; i++)
{
- nbnxn_atomdata_output_init(&nbat->out[i],
- nb_kernel_type,
- nbat->nenergrp, 1<<nbat->neg_2log,
- nbat->alloc);
+ const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
+ nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+ pinningPolicy);
}
+
nbat->buffer_flags.flag = nullptr;
nbat->buffer_flags.flag_nalloc = 0;
}
template<int packSize>
-static void copy_lj_to_nbat_lj_comb(const real *ljparam_type,
+static void copy_lj_to_nbat_lj_comb(gmx::ArrayRef<const real> ljparam_type,
const int *type, int na,
real *ljparam_at)
{
}
}
+static int numAtomsFromGrids(const nbnxn_search &nbs)
+{
+ const nbnxn_grid_t &lastGrid = nbs.grid.back();
+
+ return (lastGrid.cell0 + lastGrid.nc)*lastGrid.na_sc;
+}
+
/* Sets the atom type in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t *nbat,
- const nbnxn_search *nbs,
- const int *type)
+static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t::Params *params,
+ const nbnxn_search *nbs,
+ const int *type)
{
+ params->type.resize(numAtomsFromGrids(*nbs));
+
for (const nbnxn_grid_t &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
copy_int_to_nbat_int(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
- type, nbat->ntype-1, nbat->type+ash);
+ type, params->numTypes - 1, params->type.data() + ash);
}
}
}
/* Sets the LJ combination rule parameters in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t *nbat,
- const nbnxn_search *nbs)
+static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t::Params *params,
+ const int XFormat,
+ const nbnxn_search *nbs)
{
- if (nbat->comb_rule != ljcrNONE)
+ params->lj_comb.resize(numAtomsFromGrids(*nbs)*2);
+
+ if (params->comb_rule != ljcrNONE)
{
for (const nbnxn_grid_t &grid : nbs->grid)
{
int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
- if (nbat->XFormat == nbatX4)
+ if (XFormat == nbatX4)
{
- copy_lj_to_nbat_lj_comb<c_packX4>(nbat->nbfp_comb,
- nbat->type + ash,
+ copy_lj_to_nbat_lj_comb<c_packX4>(params->nbfp_comb,
+ params->type.data() + ash,
ncz*grid.na_sc,
- nbat->lj_comb + ash*2);
+ params->lj_comb.data() + ash*2);
}
- else if (nbat->XFormat == nbatX8)
+ else if (XFormat == nbatX8)
{
- copy_lj_to_nbat_lj_comb<c_packX8>(nbat->nbfp_comb,
- nbat->type + ash,
+ copy_lj_to_nbat_lj_comb<c_packX8>(params->nbfp_comb,
+ params->type.data() + ash,
ncz*grid.na_sc,
- nbat->lj_comb + ash*2);
+ params->lj_comb.data() + ash*2);
}
- else if (nbat->XFormat == nbatXYZQ)
+ else if (XFormat == nbatXYZQ)
{
- copy_lj_to_nbat_lj_comb<1>(nbat->nbfp_comb,
- nbat->type + ash,
+ copy_lj_to_nbat_lj_comb<1>(params->nbfp_comb,
+ params->type.data() + ash,
ncz*grid.na_sc,
- nbat->lj_comb + ash*2);
+ params->lj_comb.data() + ash*2);
}
}
}
const nbnxn_search *nbs,
const real *charge)
{
+ if (nbat->XFormat != nbatXYZQ)
+ {
+ nbat->paramsDeprecated().q.resize(nbat->numAtoms());
+ }
+
for (const nbnxn_grid_t &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
if (nbat->XFormat == nbatXYZQ)
{
- real *q = nbat->x + ash*STRIDE_XYZQ + ZZ + 1;
+ real *q = nbat->x().data() + ash*STRIDE_XYZQ + ZZ + 1;
int i;
for (i = 0; i < na; i++)
{
}
else
{
- real *q = nbat->q + ash;
+ real *q = nbat->paramsDeprecated().q.data() + ash;
int i;
for (i = 0; i < na; i++)
{
static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat,
const nbnxn_search *nbs)
{
- real *q;
- int stride_q, nsubc;
+ nbnxn_atomdata_t::Params ¶ms = nbat->paramsDeprecated();
+ real *q;
+ int stride_q;
if (nbat->XFormat == nbatXYZQ)
{
- q = nbat->x + ZZ + 1;
+ q = nbat->x().data() + ZZ + 1;
stride_q = STRIDE_XYZQ;
}
else
{
- q = nbat->q;
+ q = params.q.data();
stride_q = 1;
}
for (const nbnxn_grid_t &grid : nbs->grid)
{
+ int nsubc;
if (grid.bSimple)
{
nsubc = 1;
{
int ind = c_offset + c*grid.na_c + i;
/* Set atom type and charge to non-interacting */
- nbat->type[ind] = nbat->ntype - 1;
- q[ind*stride_q] = 0;
+ params.type[ind] = params.numTypes - 1;
+ q[ind*stride_q] = 0;
}
}
}
}
/* Set the energy group indices for atoms in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t *nbat,
- const nbnxn_search *nbs,
- const int *atinfo)
+static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t::Params *params,
+ const nbnxn_search *nbs,
+ const int *atinfo)
{
- if (nbat->nenergrp == 1)
+ if (params->nenergrp == 1)
{
return;
}
+ params->energrp.resize(numAtomsFromGrids(*nbs));
+
for (const nbnxn_grid_t &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
copy_egp_to_nbat_egps(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
- nbat->na_c, nbat->neg_2log,
- atinfo, nbat->energrp+(ash>>grid.na_c_2log));
+ c_nbnxnCpuIClusterSize, params->neg_2log,
+ atinfo,
+ params->energrp.data() + (ash >> grid.na_c_2log));
}
}
}
const t_mdatoms *mdatoms,
const int *atinfo)
{
- nbnxn_atomdata_set_atomtypes(nbat, nbs, mdatoms->typeA);
+ nbnxn_atomdata_t::Params ¶ms = nbat->paramsDeprecated();
+
+ nbnxn_atomdata_set_atomtypes(¶ms, nbs, mdatoms->typeA);
nbnxn_atomdata_set_charges(nbat, nbs, mdatoms->chargeA);
}
/* This must be done after masking types for FEP */
- nbnxn_atomdata_set_ljcombparams(nbat, nbs);
+ nbnxn_atomdata_set_ljcombparams(¶ms, nbat->XFormat, nbs);
- nbnxn_atomdata_set_energygroups(nbat, nbs, atinfo);
+ nbnxn_atomdata_set_energygroups(¶ms, nbs, atinfo);
}
/* Copies the shift vector array to nbnxn_atomdata_t */
na_fill = na;
}
copy_rvec_to_nbat_real(nbs->a.data() + ash, na, na_fill, x,
- nbat->XFormat, nbat->x, ash);
+ nbat->XFormat, nbat->x().data(), ash);
}
}
}
}
static void
-nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
+nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
int i0, int i1)
{
for (int i = i0; i < i1; i++)
gmx_unused static void
nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
gmx_bool bDestSet,
- real ** gmx_restrict src,
+ const real ** gmx_restrict src,
int nsrc,
int i0, int i1)
{
gmx_unused static void
nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
gmx_bool gmx_unused bDestSet,
- real gmx_unused ** gmx_restrict src,
+ const gmx_unused real ** gmx_restrict src,
int gmx_unused nsrc,
int gmx_unused i0, int gmx_unused i1)
{
static void
nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
const nbnxn_atomdata_t *nbat,
- nbnxn_atomdata_output_t *out,
+ gmx::ArrayRef<nbnxn_atomdata_output_t> out,
int nfa,
int a0, int a1,
rvec *f)
case nbatXYZQ:
if (nfa == 1)
{
- const real *fnb = out[0].f;
+ const real *fnb = out[0].f.data();
for (int a = a0; a < a1; a++)
{
case nbatX4:
if (nfa == 1)
{
- const real *fnb = out[0].f;
+ const real *fnb = out[0].f.data();
for (int a = a0; a < a1; a++)
{
case nbatX8:
if (nfa == 1)
{
- const real *fnb = out[0].f;
+ const real *fnb = out[0].f.data();
for (int a = a0; a < a1; a++)
{
return (b * 0x0202020202ULL & 0x010884422010ULL) % 1023;
}
-static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nbat,
- int nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t *nbat,
+ int nth)
{
const nbnxn_buffer_flags_t *flags = &nbat->buffer_flags;
- int next_pow2 = 1<<(gmx::log2I(nth-1)+1);
+ int next_pow2 = 1<<(gmx::log2I(nth-1)+1);
- assert(nbat->nout == nth); /* tree-reduce currently only works for nout==nth */
+ const int numOutputBuffers = nbat->out.size();
+ GMX_ASSERT(numOutputBuffers == nth,
+ "tree-reduce currently only works for numOutputBuffers==nth");
memset(nbat->syncStep, 0, sizeof(*(nbat->syncStep))*nth);
index[1] = index[0] + group_size/2;
/* If no second buffer, nothing to do */
- if (index[1] >= nbat->nout && group_size > 2)
+ if (index[1] >= numOutputBuffers && group_size > 2)
{
continue;
}
if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2)
{
+ const real *fIndex1 = nbat->out[index[1]].f.data();
#if GMX_SIMD
nbnxn_atomdata_reduce_reals_simd
#else
nbnxn_atomdata_reduce_reals
#endif
- (nbat->out[index[0]].f,
+ (nbat->out[index[0]].f.data(),
bitmask_is_set(flags->flag[b], index[0]) || group_size > 2,
- &(nbat->out[index[1]].f), 1, i0, i1);
+ &fIndex1, 1, i0, i1);
}
else if (!bitmask_is_set(flags->flag[b], index[0]))
}
-static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nbat,
- int nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t *nbat,
+ int nth)
{
#pragma omp parallel for num_threads(nth) schedule(static)
for (int th = 0; th < nth; th++)
try
{
const nbnxn_buffer_flags_t *flags;
- int nfptr;
- real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
+ int nfptr;
+ const real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
flags = &nbat->buffer_flags;
int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
nfptr = 0;
- for (int out = 1; out < nbat->nout; out++)
+ for (int out = 1; out < static_cast<gmx::index>(nbat->out.size()); out++)
{
if (bitmask_is_set(flags->flag[b], out))
{
- fptr[nfptr++] = nbat->out[out].f;
+ fptr[nfptr++] = nbat->out[out].f.data();
}
}
if (nfptr > 0)
#else
nbnxn_atomdata_reduce_reals
#endif
- (nbat->out[0].f,
+ (nbat->out[0].f.data(),
bitmask_is_set(flags->flag[b], 0),
fptr, nfptr,
i0, i1);
/* Add the force array(s) from nbnxn_atomdata_t to f */
void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search *nbs,
int locality,
- const nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t *nbat,
rvec *f,
gmx_wallcycle *wcycle)
{
int nth = gmx_omp_nthreads_get(emntNonbonded);
- if (nbat->nout > 1)
+ if (nbat->out.size() > 1)
{
if (locality != eatAll)
{
void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
rvec *fshift)
{
- const nbnxn_atomdata_output_t * out = nbat->out;
+ gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
for (int s = 0; s < SHIFTS; s++)
{
rvec sum;
clear_rvec(sum);
- for (int th = 0; th < nbat->nout; th++)
+ for (const nbnxn_atomdata_output_t &out : outputBuffers)
{
- sum[XX] += out[th].fshift[s*DIM+XX];
- sum[YY] += out[th].fshift[s*DIM+YY];
- sum[ZZ] += out[th].fshift[s*DIM+ZZ];
+ sum[XX] += out.fshift[s*DIM+XX];
+ sum[YY] += out.fshift[s*DIM+YY];
+ sum[ZZ] += out.fshift[s*DIM+ZZ];
}
rvec_inc(fshift[s], sum);
}