Changed all manually managed pointer to std::vector.
Split of a Params and a SimdMasks struct.
Changed some data members to be private, more to be done.
This change is ony refactoring, no functional changes.
Note: minor, negligible performance impact of the nbnxn gridding
due to (unnecessary) initialization of std::vector during resize().
Change-Id: I9c70a1f8f272c80a7cf335fcbd867bd79c4102a2
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
// TODO: template on transferKind to avoid runtime conditionals
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes,
+int cu_copy_H2D(void *d_dest, const void *h_src, size_t bytes,
GpuApiCallBehavior transferKind, cudaStream_t s = nullptr)
{
cudaError_t stat;
return 0;
}
-int cu_copy_H2D_sync(void * d_dest, void * h_src, size_t bytes)
+int cu_copy_H2D_sync(void * d_dest, const void * h_src, size_t bytes)
{
return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Sync);
}
/*!
* The copy is launched in stream s or if not specified, in stream 0.
*/
-int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s = nullptr)
+int cu_copy_H2D_async(void * d_dest, const void * h_src, size_t bytes, cudaStream_t s = nullptr)
{
return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Async, s);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*
* The copy is launched in stream s or if not specified, in stream 0.
*/
-int cu_copy_H2D(void *d_dest, void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
+int cu_copy_H2D(void *d_dest, const void *h_src, size_t bytes, GpuApiCallBehavior transferKind, cudaStream_t /*s = nullptr*/);
/*! Launches synchronous host to device memory copy. */
-int cu_copy_H2D_sync(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
+int cu_copy_H2D_sync(void * /*d_dest*/, const void * /*h_src*/, size_t /*bytes*/);
/*! Launches asynchronous host to device memory copy in stream s. */
-int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
+int cu_copy_H2D_async(void * /*d_dest*/, const void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = nullptr*/);
// TODO: the 2 functions below are pretty much a constructor/destructor of a simple
// GPU table object. There is also almost self-contained fetchFromParamLookupTable()
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
-int ocl_copy_H2D(cl_mem d_dest, void* h_src,
+int ocl_copy_H2D(cl_mem d_dest, const void* h_src,
size_t offset, size_t bytes,
GpuApiCallBehavior transferKind,
cl_command_queue command_queue,
* identifying this particular host to device operation. The event can further
* be used to queue a wait for this operation or to query profiling information.
*/
-int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_async(cl_mem d_dest, const void * h_src,
size_t offset, size_t bytes,
cl_command_queue command_queue,
cl_event *copy_event)
/*! \brief Launches synchronous host to device memory copy.
*/
-int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_sync(cl_mem d_dest, const void * h_src,
size_t offset, size_t bytes,
cl_command_queue command_queue)
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* identifying this particular host to device operation. The event can further
* be used to queue a wait for this operation or to query profiling information.
*/
-int ocl_copy_H2D(cl_mem d_dest, void* h_src,
+int ocl_copy_H2D(cl_mem d_dest, const void* h_src,
size_t offset, size_t bytes,
GpuApiCallBehavior transferKind,
cl_command_queue command_queue,
cl_event *copy_event);
/*! \brief Launches asynchronous host to device memory copy. */
-int ocl_copy_H2D_async(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_async(cl_mem d_dest, const void * h_src,
size_t offset, size_t bytes,
cl_command_queue command_queue,
cl_event *copy_event);
/*! \brief Launches synchronous host to device memory copy. */
-int ocl_copy_H2D_sync(cl_mem d_dest, void * h_src,
+int ocl_copy_H2D_sync(cl_mem d_dest, const void * h_src,
size_t offset, size_t bytes,
cl_command_queue command_queue);
enbnxninitcombrule = enbnxninitcombruleNONE;
}
- snew(nbv->nbat, 1);
+ nbv->nbat = new nbnxn_atomdata_t(nbv->bUseGPU ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
int mimimumNumEnergyGroupNonbonded = ir->opts.ngener;
if (ir->opts.ngener - ir->nwall == 1)
{
enbnxninitcombrule,
fr->ntype, fr->nbfp,
mimimumNumEnergyGroupNonbonded,
- bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
- nb_alloc, nb_free);
+ bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1);
if (nbv->bUseGPU)
{
*ptr = ptr_new;
}
-/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
-void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n)
+void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms)
{
- GMX_ASSERT(nbat->nalloc >= nbat->natoms, "We should have at least as many elelements allocated as there are set");
-
- int t;
-
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->type),
- nbat->natoms*sizeof(*nbat->type),
- n*sizeof(*nbat->type),
- nbat->alloc, nbat->free);
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->lj_comb),
- nbat->natoms*2*sizeof(*nbat->lj_comb),
- n*2*sizeof(*nbat->lj_comb),
- nbat->alloc, nbat->free);
- if (nbat->XFormat != nbatXYZQ)
- {
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->q),
- nbat->natoms*sizeof(*nbat->q),
- n*sizeof(*nbat->q),
- nbat->alloc, nbat->free);
- }
- if (nbat->nenergrp > 1)
- {
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->energrp),
- nbat->natoms/nbat->na_c*sizeof(*nbat->energrp),
- n/nbat->na_c*sizeof(*nbat->energrp),
- nbat->alloc, nbat->free);
- }
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->x),
- nbat->natoms*nbat->xstride*sizeof(*nbat->x),
- n*nbat->xstride*sizeof(*nbat->x),
- nbat->alloc, nbat->free);
- for (t = 0; t < nbat->nout; t++)
+ numAtoms_ = numAtoms;
+
+ x_.resize(numAtoms*xstride);
+}
+
+void nbnxn_atomdata_t::resizeForceBuffers()
+{
+ /* Force buffers need padding up to a multiple of the buffer flag size */
+ const int paddedSize = (numAtoms() + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE*NBNXN_BUFFERFLAG_SIZE;
+
+ /* Should we let each thread allocate it's own data instead? */
+ for (nbnxn_atomdata_output_t &outBuffer : out)
{
- /* Allocate one element extra for possible signaling with GPUs */
- nbnxn_realloc_void(reinterpret_cast<void **>(&nbat->out[t].f),
- nbat->natoms*nbat->fstride*sizeof(*nbat->out[t].f),
- n*nbat->fstride*sizeof(*nbat->out[t].f),
- nbat->alloc, nbat->free);
+ outBuffer.f.resize(paddedSize*fstride);
}
- nbat->nalloc = n;
}
/* Initializes an nbnxn_atomdata_output_t data structure */
-static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
- int nb_kernel_type,
- int nenergrp, int stride,
- nbnxn_alloc_t *ma)
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int nb_kernel_type,
+ int numEnergyGroups,
+ int simdEnergyBufferStride,
+ gmx::PinningPolicy pinningPolicy) :
+ f({}, {pinningPolicy}),
+ fshift({}, {pinningPolicy}),
+ Vvdw({}, {pinningPolicy}),
+ Vc({}, {pinningPolicy})
{
- out->f = nullptr;
- ma(reinterpret_cast<void **>(&out->fshift), SHIFTS*DIM*sizeof(*out->fshift));
- out->nV = nenergrp*nenergrp;
- ma(reinterpret_cast<void **>(&out->Vvdw), out->nV*sizeof(*out->Vvdw));
- ma(reinterpret_cast<void **>(&out->Vc), out->nV*sizeof(*out->Vc ));
+ fshift.resize(SHIFTS*DIM);
+ Vvdw.resize(numEnergyGroups*numEnergyGroups);
+ Vc.resize(numEnergyGroups*numEnergyGroups);
if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
{
- int cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
- out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size;
- ma(reinterpret_cast<void **>(&out->VSvdw), out->nVS*sizeof(*out->VSvdw));
- ma(reinterpret_cast<void **>(&out->VSc), out->nVS*sizeof(*out->VSc ));
- }
- else
- {
- out->nVS = 0;
+ int cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+ int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
+ VSvdw.resize(numElements);
+ VSc.resize(numElements);
}
}
}
/* Stores the LJ parameter data in a format convenient for different kernels */
-static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
+static void set_lj_parameter_data(nbnxn_atomdata_t::Params *params, gmx_bool bSIMD)
{
- real c6, c12;
-
- int nt = nbat->ntype;
+ int nt = params->numTypes;
if (bSIMD)
{
* when it might not be used, but introducing the conditional code is not
* really worth it.
*/
- nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_aligned),
- nt*nt*c_simdBestPairAlignment*sizeof(*nbat->nbfp_aligned));
+ params->nbfp_aligned.resize(nt*nt*c_simdBestPairAlignment);
+
for (int i = 0; i < nt; i++)
{
for (int j = 0; j < nt; j++)
{
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = nbat->nbfp[(i*nt+j)*2+0];
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = nbat->nbfp[(i*nt+j)*2+1];
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
- nbat->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+0] = params->nbfp[(i*nt+j)*2+0];
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+1] = params->nbfp[(i*nt+j)*2+1];
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+2] = 0;
+ params->nbfp_aligned[(i*nt+j)*c_simdBestPairAlignment+3] = 0;
}
}
#endif
* and with LJ-PME kernels. We then only need parameters per atom type,
* not per pair of atom types.
*/
- switch (nbat->comb_rule)
+ params->nbfp_comb.resize(nt*2);
+ switch (params->comb_rule)
{
case ljcrGEOM:
- nbat->comb_rule = ljcrGEOM;
+ params->comb_rule = ljcrGEOM;
for (int i = 0; i < nt; i++)
{
/* Store the sqrt of the diagonal from the nbfp matrix */
- nbat->nbfp_comb[i*2 ] = std::sqrt(nbat->nbfp[(i*nt+i)*2 ]);
- nbat->nbfp_comb[i*2+1] = std::sqrt(nbat->nbfp[(i*nt+i)*2+1]);
+ params->nbfp_comb[i*2 ] = std::sqrt(params->nbfp[(i*nt+i)*2 ]);
+ params->nbfp_comb[i*2+1] = std::sqrt(params->nbfp[(i*nt+i)*2+1]);
}
break;
case ljcrLB:
for (int i = 0; i < nt; i++)
{
/* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */
- c6 = nbat->nbfp[(i*nt+i)*2 ];
- c12 = nbat->nbfp[(i*nt+i)*2+1];
+ const real c6 = params->nbfp[(i*nt+i)*2 ];
+ const real c12 = params->nbfp[(i*nt+i)*2+1];
if (c6 > 0 && c12 > 0)
{
/* We store 0.5*2^1/6*sigma and sqrt(4*3*eps),
* so we get 6*C6 and 12*C12 after combining.
*/
- nbat->nbfp_comb[i*2 ] = 0.5*gmx::sixthroot(c12/c6);
- nbat->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
+ params->nbfp_comb[i*2 ] = 0.5*gmx::sixthroot(c12/c6);
+ params->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
}
else
{
- nbat->nbfp_comb[i*2 ] = 0;
- nbat->nbfp_comb[i*2+1] = 0;
+ params->nbfp_comb[i*2 ] = 0;
+ params->nbfp_comb[i*2+1] = 0;
}
}
break;
}
}
-#if GMX_SIMD
-static void
-nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
+nbnxn_atomdata_t::SimdMasks::SimdMasks()
{
- const int simd_width = GMX_SIMD_REAL_WIDTH;
- int simd_excl_size;
+#if GMX_SIMD
+ constexpr int simd_width = GMX_SIMD_REAL_WIDTH;
/* Set the diagonal cluster pair exclusion mask setup data.
* In the kernel we check 0 < j - i to generate the masks.
* Here we store j - i for generating the mask for the first i,
* we substract 0.5 to avoid rounding issues.
* In the kernel we can subtract 1 to generate the subsequent mask.
*/
- int simd_4xn_diag_size;
-
- simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
- snew_aligned(nbat->simd_4xn_diagonal_j_minus_i, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
+ const int simd_4xn_diag_size = std::max(c_nbnxnCpuIClusterSize, simd_width);
+ diagonal_4xn_j_minus_i.resize(simd_4xn_diag_size);
for (int j = 0; j < simd_4xn_diag_size; j++)
{
- nbat->simd_4xn_diagonal_j_minus_i[j] = j - 0.5;
+ diagonal_4xn_j_minus_i[j] = j - 0.5;
}
- snew_aligned(nbat->simd_2xnn_diagonal_j_minus_i, simd_width, NBNXN_MEM_ALIGN);
+ diagonal_2xnn_j_minus_i.resize(simd_width);
for (int j = 0; j < simd_width/2; j++)
{
/* The j-cluster size is half the SIMD width */
- nbat->simd_2xnn_diagonal_j_minus_i[j] = j - 0.5;
+ diagonal_2xnn_j_minus_i[j] = j - 0.5;
/* The next half of the SIMD width is for i + 1 */
- nbat->simd_2xnn_diagonal_j_minus_i[simd_width/2+j] = j - 1 - 0.5;
+ diagonal_2xnn_j_minus_i[simd_width/2 + j] = j - 1 - 0.5;
}
/* We use up to 32 bits for exclusion masking.
* In single precision this means the real and integer SIMD registers
* are of equal size.
*/
- simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
+ const int simd_excl_size = c_nbnxnCpuIClusterSize*simd_width;
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- snew_aligned(nbat->simd_exclusion_filter64, simd_excl_size, NBNXN_MEM_ALIGN);
+ exclusion_filter64.resize(simd_excl_size);
#else
- snew_aligned(nbat->simd_exclusion_filter, simd_excl_size, NBNXN_MEM_ALIGN);
+ exclusion_filter.resize(simd_excl_size);
#endif
for (int j = 0; j < simd_excl_size; j++)
{
/* Set the consecutive bits for masking pair exclusions */
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- nbat->simd_exclusion_filter64[j] = (1U << j);
+ exclusion_filter64[j] = (1U << j);
#else
- nbat->simd_exclusion_filter[j] = (1U << j);
+ exclusion_filter[j] = (1U << j);
#endif
}
-#if !GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL
- // If the SIMD implementation has no bitwise logical operation support
- // whatsoever we cannot use the normal masking. Instead,
- // we generate a vector of all 2^4 possible ways an i atom
- // interacts with its 4 j atoms. Each array entry contains
- // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
- // Since there is no logical value representation in this case, we use
- // any nonzero value to indicate 'true', while zero mean 'false'.
- // This can then be converted to a SIMD boolean internally in the SIMD
- // module by comparing to zero.
- // Each array entry encodes how this i atom will interact with the 4 j atoms.
- // Matching code exists in set_ci_top_excls() to generate indices into this array.
- // Those indices are used in the kernels.
-
- simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
- const real simdFalse = 0.0;
- const real simdTrue = 1.0;
- real *simd_interaction_array;
-
- snew_aligned(simd_interaction_array, simd_excl_size * GMX_SIMD_REAL_WIDTH, NBNXN_MEM_ALIGN);
- for (int j = 0; j < simd_excl_size; j++)
+ if (!GMX_SIMD_HAVE_LOGICAL && !GMX_SIMD_HAVE_INT32_LOGICAL)
{
- int index = j * GMX_SIMD_REAL_WIDTH;
- for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+ // If the SIMD implementation has no bitwise logical operation support
+ // whatsoever we cannot use the normal masking. Instead,
+ // we generate a vector of all 2^4 possible ways an i atom
+ // interacts with its 4 j atoms. Each array entry contains
+ // GMX_SIMD_REAL_WIDTH values that are read with a single aligned SIMD load.
+ // Since there is no logical value representation in this case, we use
+ // any nonzero value to indicate 'true', while zero mean 'false'.
+ // This can then be converted to a SIMD boolean internally in the SIMD
+ // module by comparing to zero.
+ // Each array entry encodes how this i atom will interact with the 4 j atoms.
+ // Matching code exists in set_ci_top_excls() to generate indices into this array.
+ // Those indices are used in the kernels.
+
+ const int simd_excl_size = c_nbnxnCpuIClusterSize*c_nbnxnCpuIClusterSize;
+ const real simdFalse = 0.0;
+ const real simdTrue = 1.0;
+
+ interaction_array.resize(simd_excl_size * GMX_SIMD_REAL_WIDTH);
+ for (int j = 0; j < simd_excl_size; j++)
{
- simd_interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+ const int index = j * GMX_SIMD_REAL_WIDTH;
+ for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
+ {
+ interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
+ }
}
}
- nbat->simd_interaction_array = simd_interaction_array;
#endif
}
-#endif
-/* Initializes the nbnxn_atomdata_t parameters data structure */
+nbnxn_atomdata_t::Params::Params(gmx::PinningPolicy pinningPolicy) :
+ numTypes(0),
+ nbfp({}, {pinningPolicy}),
+ nbfp_comb({}, {pinningPolicy}),
+ type({}, {pinningPolicy}),
+ lj_comb({}, {pinningPolicy}),
+ q({}, {pinningPolicy}),
+ nenergrp(0),
+ neg_2log(0),
+ energrp({}, {pinningPolicy})
+{
+}
+
+nbnxn_atomdata_t::nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy) :
+ params_(pinningPolicy),
+ numAtoms_(0),
+ natoms_local(0),
+ shift_vec({}, {pinningPolicy}),
+ x_({}, {pinningPolicy}),
+ simdMasks(),
+ bUseBufferFlags(FALSE),
+ bUseTreeReduce(FALSE)
+{
+}
+
+/* Initializes an nbnxn_atomdata_t::Params data structure */
static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
- nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t::Params *params,
int nb_kernel_type,
int enbnxninitcombrule,
int ntype, const real *nbfp,
- int n_energygroups,
- nbnxn_alloc_t *alloc,
- nbnxn_free_t *free)
+ int n_energygroups)
{
real c6, c12, tol;
char *ptr;
gmx_bool simple, bCombGeom, bCombLB, bSIMD;
- if (alloc == nullptr)
- {
- nbat->alloc = nbnxn_alloc_aligned;
- }
- else
- {
- nbat->alloc = alloc;
- }
- if (free == nullptr)
- {
- nbat->free = nbnxn_free_aligned;
- }
- else
- {
- nbat->free = free;
- }
-
if (debug)
{
fprintf(debug, "There are %d atom types in the system, adding one for nbnxn_atomdata_t\n", ntype);
}
- nbat->ntype = ntype + 1;
- nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp),
- nbat->ntype*nbat->ntype*2*sizeof(*nbat->nbfp));
- nbat->alloc(reinterpret_cast<void **>(&nbat->nbfp_comb), nbat->ntype*2*sizeof(*nbat->nbfp_comb));
+ params->numTypes = ntype + 1;
+ params->nbfp.resize(params->numTypes*params->numTypes*2);
+ params->nbfp_comb.resize(params->numTypes*2);
/* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
* force-field floating point parameters.
bCombGeom = TRUE;
bCombLB = TRUE;
- /* Temporarily fill nbat->nbfp_comb with sigma and epsilon
+ /* Temporarily fill params->nbfp_comb with sigma and epsilon
* to check for the LB rule.
*/
for (int i = 0; i < ntype; i++)
{
- c6 = nbfp[(i*ntype+i)*2 ]/6.0;
- c12 = nbfp[(i*ntype+i)*2+1]/12.0;
+ c6 = nbfp[(i*ntype+i)*2 ]/6.0;
+ c12 = nbfp[(i*ntype+i)*2 + 1]/12.0;
if (c6 > 0 && c12 > 0)
{
- nbat->nbfp_comb[i*2 ] = gmx::sixthroot(c12/c6);
- nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12;
+ params->nbfp_comb[i*2 ] = gmx::sixthroot(c12/c6);
+ params->nbfp_comb[i*2 + 1] = 0.25*c6*c6/c12;
}
else if (c6 == 0 && c12 == 0)
{
- nbat->nbfp_comb[i*2 ] = 0;
- nbat->nbfp_comb[i*2+1] = 0;
+ params->nbfp_comb[i*2 ] = 0;
+ params->nbfp_comb[i*2 + 1] = 0;
}
else
{
}
}
- for (int i = 0; i < nbat->ntype; i++)
+ for (int i = 0; i < params->numTypes; i++)
{
- for (int j = 0; j < nbat->ntype; j++)
+ for (int j = 0; j < params->numTypes; j++)
{
if (i < ntype && j < ntype)
{
/* fr->nbfp has been updated, so that array too now stores c6/c12 including
* the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
*/
- c6 = nbfp[(i*ntype+j)*2 ];
- c12 = nbfp[(i*ntype+j)*2+1];
- nbat->nbfp[(i*nbat->ntype+j)*2 ] = c6;
- nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
+ c6 = nbfp[(i*ntype+j)*2 ];
+ c12 = nbfp[(i*ntype+j)*2 + 1];
+ params->nbfp[(i*params->numTypes+j)*2 ] = c6;
+ params->nbfp[(i*params->numTypes+j)*2 + 1] = c12;
/* Compare 6*C6 and 12*C12 for geometric cobination rule */
bCombGeom = bCombGeom &&
gmx_within_tol(c6*c6, nbfp[(i*ntype+i)*2 ]*nbfp[(j*ntype+j)*2 ], tol) &&
- gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1], tol);
+ gmx_within_tol(c12*c12, nbfp[(i*ntype+i)*2 + 1]*nbfp[(j*ntype+j)*2 + 1], tol);
/* Compare C6 and C12 for Lorentz-Berthelot combination rule */
c6 /= 6.0;
c12 /= 12.0;
bCombLB = bCombLB &&
((c6 == 0 && c12 == 0 &&
- (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) ||
+ (params->nbfp_comb[i*2 + 1] == 0 || params->nbfp_comb[j*2 + 1] == 0)) ||
(c6 > 0 && c12 > 0 &&
gmx_within_tol(gmx::sixthroot(c12/c6),
- 0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
- gmx_within_tol(0.25*c6*c6/c12, std::sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
+ 0.5*(params->nbfp_comb[i*2]+params->nbfp_comb[j*2]), tol) &&
+ gmx_within_tol(0.25*c6*c6/c12, std::sqrt(params->nbfp_comb[i*2 + 1]*params->nbfp_comb[j*2 + 1]), tol)));
}
else
{
/* Add zero parameters for the additional dummy atom type */
- nbat->nbfp[(i*nbat->ntype+j)*2 ] = 0;
- nbat->nbfp[(i*nbat->ntype+j)*2+1] = 0;
+ params->nbfp[(i*params->numTypes + j)*2 ] = 0;
+ params->nbfp[(i*params->numTypes + j)*2+1] = 0;
}
}
}
*/
if (bCombGeom)
{
- nbat->comb_rule = ljcrGEOM;
+ params->comb_rule = ljcrGEOM;
}
else if (bCombLB)
{
- nbat->comb_rule = ljcrLB;
+ params->comb_rule = ljcrLB;
}
else
{
- nbat->comb_rule = ljcrNONE;
+ params->comb_rule = ljcrNONE;
- nbat->free(nbat->nbfp_comb);
+ params->nbfp_comb.clear();
}
{
std::string mesg;
- if (nbat->comb_rule == ljcrNONE)
+ if (params->comb_rule == ljcrNONE)
{
mesg = "Using full Lennard-Jones parameter combination matrix";
}
else
{
mesg = gmx::formatString("Using %s Lennard-Jones combination rule",
- nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+ params->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
}
GMX_LOG(mdlog.info).asParagraph().appendText(mesg);
}
break;
case enbnxninitcombruleGEOM:
- nbat->comb_rule = ljcrGEOM;
+ params->comb_rule = ljcrGEOM;
break;
case enbnxninitcombruleLB:
- nbat->comb_rule = ljcrLB;
+ params->comb_rule = ljcrLB;
break;
case enbnxninitcombruleNONE:
- nbat->comb_rule = ljcrNONE;
+ params->comb_rule = ljcrNONE;
- nbat->free(nbat->nbfp_comb);
+ params->nbfp_comb.clear();
break;
default:
gmx_incons("Unknown enbnxninitcombrule");
bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
- set_lj_parameter_data(nbat, bSIMD);
+ set_lj_parameter_data(params, bSIMD);
- nbat->nenergrp = n_energygroups;
+ params->nenergrp = n_energygroups;
if (!simple)
{
// We now check for energy groups already when starting mdrun
GMX_RELEASE_ASSERT(n_energygroups == 1, "GPU kernels do not support energy groups");
}
/* Temporary storage goes as #grp^3*simd_width^2/2, so limit to 64 */
- if (nbat->nenergrp > 64)
+ if (params->nenergrp > 64)
{
gmx_fatal(FARGS, "With NxN kernels not more than 64 energy groups are supported\n");
}
- nbat->neg_2log = 1;
- while (nbat->nenergrp > (1<<nbat->neg_2log))
+ params->neg_2log = 1;
+ while (params->nenergrp > (1<<params->neg_2log))
{
- nbat->neg_2log++;
+ params->neg_2log++;
}
}
int enbnxninitcombrule,
int ntype, const real *nbfp,
int n_energygroups,
- int nout,
- nbnxn_alloc_t *alloc,
- nbnxn_free_t *free)
+ int nout)
{
- nbnxn_atomdata_params_init(mdlog, nbat, nb_kernel_type,
- enbnxninitcombrule, ntype, nbfp, n_energygroups,
- alloc, free);
+ nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+ enbnxninitcombrule, ntype, nbfp, n_energygroups);
const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
const gmx_bool bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
- set_lj_parameter_data(nbat, bSIMD);
-
if (simple)
{
int pack_x;
nbat->FFormat = nbatXYZ;
}
- nbat->alloc(reinterpret_cast<void **>(&nbat->shift_vec), SHIFTS*sizeof(*nbat->shift_vec));
+ nbat->shift_vec.resize(SHIFTS);
nbat->xstride = (nbat->XFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
nbat->fstride = (nbat->FFormat == nbatXYZQ ? STRIDE_XYZQ : DIM);
- nbat->x = nullptr;
-
-#if GMX_SIMD
- if (simple)
- {
- nbnxn_atomdata_init_simple_exclusion_masks(nbat);
- }
-#endif
/* Initialize the output data structures */
- nbat->nout = nout;
- snew(nbat->out, nbat->nout);
- nbat->nalloc = 0;
- for (int i = 0; i < nbat->nout; i++)
+ for (int i = 0; i < nout; i++)
{
- nbnxn_atomdata_output_init(&nbat->out[i],
- nb_kernel_type,
- nbat->nenergrp, 1<<nbat->neg_2log,
- nbat->alloc);
+ const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
+ nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+ pinningPolicy);
}
+
nbat->buffer_flags.flag = nullptr;
nbat->buffer_flags.flag_nalloc = 0;
}
template<int packSize>
-static void copy_lj_to_nbat_lj_comb(const real *ljparam_type,
+static void copy_lj_to_nbat_lj_comb(gmx::ArrayRef<const real> ljparam_type,
const int *type, int na,
real *ljparam_at)
{
}
}
+static int numAtomsFromGrids(const nbnxn_search &nbs)
+{
+ const nbnxn_grid_t &lastGrid = nbs.grid.back();
+
+ return (lastGrid.cell0 + lastGrid.nc)*lastGrid.na_sc;
+}
+
/* Sets the atom type in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t *nbat,
- const nbnxn_search *nbs,
- const int *type)
+static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t::Params *params,
+ const nbnxn_search *nbs,
+ const int *type)
{
+ params->type.resize(numAtomsFromGrids(*nbs));
+
for (const nbnxn_grid_t &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
copy_int_to_nbat_int(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
- type, nbat->ntype-1, nbat->type+ash);
+ type, params->numTypes - 1, params->type.data() + ash);
}
}
}
/* Sets the LJ combination rule parameters in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t *nbat,
- const nbnxn_search *nbs)
+static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t::Params *params,
+ const int XFormat,
+ const nbnxn_search *nbs)
{
- if (nbat->comb_rule != ljcrNONE)
+ params->lj_comb.resize(numAtomsFromGrids(*nbs)*2);
+
+ if (params->comb_rule != ljcrNONE)
{
for (const nbnxn_grid_t &grid : nbs->grid)
{
int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
- if (nbat->XFormat == nbatX4)
+ if (XFormat == nbatX4)
{
- copy_lj_to_nbat_lj_comb<c_packX4>(nbat->nbfp_comb,
- nbat->type + ash,
+ copy_lj_to_nbat_lj_comb<c_packX4>(params->nbfp_comb,
+ params->type.data() + ash,
ncz*grid.na_sc,
- nbat->lj_comb + ash*2);
+ params->lj_comb.data() + ash*2);
}
- else if (nbat->XFormat == nbatX8)
+ else if (XFormat == nbatX8)
{
- copy_lj_to_nbat_lj_comb<c_packX8>(nbat->nbfp_comb,
- nbat->type + ash,
+ copy_lj_to_nbat_lj_comb<c_packX8>(params->nbfp_comb,
+ params->type.data() + ash,
ncz*grid.na_sc,
- nbat->lj_comb + ash*2);
+ params->lj_comb.data() + ash*2);
}
- else if (nbat->XFormat == nbatXYZQ)
+ else if (XFormat == nbatXYZQ)
{
- copy_lj_to_nbat_lj_comb<1>(nbat->nbfp_comb,
- nbat->type + ash,
+ copy_lj_to_nbat_lj_comb<1>(params->nbfp_comb,
+ params->type.data() + ash,
ncz*grid.na_sc,
- nbat->lj_comb + ash*2);
+ params->lj_comb.data() + ash*2);
}
}
}
const nbnxn_search *nbs,
const real *charge)
{
+ if (nbat->XFormat != nbatXYZQ)
+ {
+ nbat->paramsDeprecated().q.resize(nbat->numAtoms());
+ }
+
for (const nbnxn_grid_t &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
if (nbat->XFormat == nbatXYZQ)
{
- real *q = nbat->x + ash*STRIDE_XYZQ + ZZ + 1;
+ real *q = nbat->x().data() + ash*STRIDE_XYZQ + ZZ + 1;
int i;
for (i = 0; i < na; i++)
{
}
else
{
- real *q = nbat->q + ash;
+ real *q = nbat->paramsDeprecated().q.data() + ash;
int i;
for (i = 0; i < na; i++)
{
static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat,
const nbnxn_search *nbs)
{
- real *q;
- int stride_q, nsubc;
+ nbnxn_atomdata_t::Params ¶ms = nbat->paramsDeprecated();
+ real *q;
+ int stride_q;
if (nbat->XFormat == nbatXYZQ)
{
- q = nbat->x + ZZ + 1;
+ q = nbat->x().data() + ZZ + 1;
stride_q = STRIDE_XYZQ;
}
else
{
- q = nbat->q;
+ q = params.q.data();
stride_q = 1;
}
for (const nbnxn_grid_t &grid : nbs->grid)
{
+ int nsubc;
if (grid.bSimple)
{
nsubc = 1;
{
int ind = c_offset + c*grid.na_c + i;
/* Set atom type and charge to non-interacting */
- nbat->type[ind] = nbat->ntype - 1;
- q[ind*stride_q] = 0;
+ params.type[ind] = params.numTypes - 1;
+ q[ind*stride_q] = 0;
}
}
}
}
/* Set the energy group indices for atoms in nbnxn_atomdata_t */
-static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t *nbat,
- const nbnxn_search *nbs,
- const int *atinfo)
+static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t::Params *params,
+ const nbnxn_search *nbs,
+ const int *atinfo)
{
- if (nbat->nenergrp == 1)
+ if (params->nenergrp == 1)
{
return;
}
+ params->energrp.resize(numAtomsFromGrids(*nbs));
+
for (const nbnxn_grid_t &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
copy_egp_to_nbat_egps(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
- nbat->na_c, nbat->neg_2log,
- atinfo, nbat->energrp+(ash>>grid.na_c_2log));
+ c_nbnxnCpuIClusterSize, params->neg_2log,
+ atinfo,
+ params->energrp.data() + (ash >> grid.na_c_2log));
}
}
}
const t_mdatoms *mdatoms,
const int *atinfo)
{
- nbnxn_atomdata_set_atomtypes(nbat, nbs, mdatoms->typeA);
+ nbnxn_atomdata_t::Params ¶ms = nbat->paramsDeprecated();
+
+ nbnxn_atomdata_set_atomtypes(¶ms, nbs, mdatoms->typeA);
nbnxn_atomdata_set_charges(nbat, nbs, mdatoms->chargeA);
}
/* This must be done after masking types for FEP */
- nbnxn_atomdata_set_ljcombparams(nbat, nbs);
+ nbnxn_atomdata_set_ljcombparams(¶ms, nbat->XFormat, nbs);
- nbnxn_atomdata_set_energygroups(nbat, nbs, atinfo);
+ nbnxn_atomdata_set_energygroups(¶ms, nbs, atinfo);
}
/* Copies the shift vector array to nbnxn_atomdata_t */
na_fill = na;
}
copy_rvec_to_nbat_real(nbs->a.data() + ash, na, na_fill, x,
- nbat->XFormat, nbat->x, ash);
+ nbat->XFormat, nbat->x().data(), ash);
}
}
}
}
static void
-nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
+nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
int i0, int i1)
{
for (int i = i0; i < i1; i++)
gmx_unused static void
nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
gmx_bool bDestSet,
- real ** gmx_restrict src,
+ const real ** gmx_restrict src,
int nsrc,
int i0, int i1)
{
gmx_unused static void
nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
gmx_bool gmx_unused bDestSet,
- real gmx_unused ** gmx_restrict src,
+ const gmx_unused real ** gmx_restrict src,
int gmx_unused nsrc,
int gmx_unused i0, int gmx_unused i1)
{
static void
nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
const nbnxn_atomdata_t *nbat,
- nbnxn_atomdata_output_t *out,
+ gmx::ArrayRef<nbnxn_atomdata_output_t> out,
int nfa,
int a0, int a1,
rvec *f)
case nbatXYZQ:
if (nfa == 1)
{
- const real *fnb = out[0].f;
+ const real *fnb = out[0].f.data();
for (int a = a0; a < a1; a++)
{
case nbatX4:
if (nfa == 1)
{
- const real *fnb = out[0].f;
+ const real *fnb = out[0].f.data();
for (int a = a0; a < a1; a++)
{
case nbatX8:
if (nfa == 1)
{
- const real *fnb = out[0].f;
+ const real *fnb = out[0].f.data();
for (int a = a0; a < a1; a++)
{
return (b * 0x0202020202ULL & 0x010884422010ULL) % 1023;
}
-static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nbat,
- int nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbnxn_atomdata_t *nbat,
+ int nth)
{
const nbnxn_buffer_flags_t *flags = &nbat->buffer_flags;
- int next_pow2 = 1<<(gmx::log2I(nth-1)+1);
+ int next_pow2 = 1<<(gmx::log2I(nth-1)+1);
- assert(nbat->nout == nth); /* tree-reduce currently only works for nout==nth */
+ const int numOutputBuffers = nbat->out.size();
+ GMX_ASSERT(numOutputBuffers == nth,
+ "tree-reduce currently only works for numOutputBuffers==nth");
memset(nbat->syncStep, 0, sizeof(*(nbat->syncStep))*nth);
index[1] = index[0] + group_size/2;
/* If no second buffer, nothing to do */
- if (index[1] >= nbat->nout && group_size > 2)
+ if (index[1] >= numOutputBuffers && group_size > 2)
{
continue;
}
if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2)
{
+ const real *fIndex1 = nbat->out[index[1]].f.data();
#if GMX_SIMD
nbnxn_atomdata_reduce_reals_simd
#else
nbnxn_atomdata_reduce_reals
#endif
- (nbat->out[index[0]].f,
+ (nbat->out[index[0]].f.data(),
bitmask_is_set(flags->flag[b], index[0]) || group_size > 2,
- &(nbat->out[index[1]].f), 1, i0, i1);
+ &fIndex1, 1, i0, i1);
}
else if (!bitmask_is_set(flags->flag[b], index[0]))
}
-static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nbat,
- int nth)
+static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbnxn_atomdata_t *nbat,
+ int nth)
{
#pragma omp parallel for num_threads(nth) schedule(static)
for (int th = 0; th < nth; th++)
try
{
const nbnxn_buffer_flags_t *flags;
- int nfptr;
- real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
+ int nfptr;
+ const real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
flags = &nbat->buffer_flags;
int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
nfptr = 0;
- for (int out = 1; out < nbat->nout; out++)
+ for (int out = 1; out < static_cast<gmx::index>(nbat->out.size()); out++)
{
if (bitmask_is_set(flags->flag[b], out))
{
- fptr[nfptr++] = nbat->out[out].f;
+ fptr[nfptr++] = nbat->out[out].f.data();
}
}
if (nfptr > 0)
#else
nbnxn_atomdata_reduce_reals
#endif
- (nbat->out[0].f,
+ (nbat->out[0].f.data(),
bitmask_is_set(flags->flag[b], 0),
fptr, nfptr,
i0, i1);
/* Add the force array(s) from nbnxn_atomdata_t to f */
void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search *nbs,
int locality,
- const nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t *nbat,
rvec *f,
gmx_wallcycle *wcycle)
{
int nth = gmx_omp_nthreads_get(emntNonbonded);
- if (nbat->nout > 1)
+ if (nbat->out.size() > 1)
{
if (locality != eatAll)
{
void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
rvec *fshift)
{
- const nbnxn_atomdata_output_t * out = nbat->out;
+ gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
for (int s = 0; s < SHIFTS; s++)
{
rvec sum;
clear_rvec(sum);
- for (int th = 0; th < nbat->nout; th++)
+ for (const nbnxn_atomdata_output_t &out : outputBuffers)
{
- sum[XX] += out[th].fshift[s*DIM+XX];
- sum[YY] += out[th].fshift[s*DIM+YY];
- sum[ZZ] += out[th].fshift[s*DIM+ZZ];
+ sum[XX] += out.fshift[s*DIM+XX];
+ sum[YY] += out.fshift[s*DIM+YY];
+ sum[ZZ] += out.fshift[s*DIM+ZZ];
}
rvec_inc(fshift[s], sum);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
int enbnxninitcombrule,
int ntype, const real *nbfp,
int n_energygroups,
- int nout,
- nbnxn_alloc_t *alloc,
- nbnxn_free_t *free);
+ int nout);
-/* Copy the atom data to the non-bonded atom data structure */
void nbnxn_atomdata_set(nbnxn_atomdata_t *nbat,
const nbnxn_search *nbs,
const t_mdatoms *mdatoms,
/* Add the forces stored in nbat to f, zeros the forces in nbat */
void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search *nbs,
int locality,
- const nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t *nbat,
rvec *f,
gmx_wallcycle *wcycle);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
/* HtoD x, q */
- cu_copy_H2D_async(adat->xq + adat_begin, nbatom->x + adat_begin * 4,
+ cu_copy_H2D_async(adat->xq + adat_begin,
+ static_cast<const void *>(nbatom->x().data() + adat_begin * 4),
adat_len * sizeof(*adat->xq), stream);
if (bDoTime)
}
void nbnxn_gpu_launch_cpyback(gmx_nbnxn_cuda_t *nb,
- const nbnxn_atomdata_t *nbatom,
+ nbnxn_atomdata_t *nbatom,
int flags,
int aloc,
bool haveOtherWork)
}
/* DtoH f */
- cu_copy_D2H_async(nbatom->out[0].f + adat_begin * 3, adat->f + adat_begin,
+ cu_copy_D2H_async(nbatom->out[0].f.data() + adat_begin * 3, adat->f + adat_begin,
(adat_len)*sizeof(*adat->f), stream);
/* After the non-local D2H is launched the nonlocal_done event can be
}
/*! Initializes the nonbonded parameter data structure. */
-static void init_nbparam(cu_nbparam_t *nbp,
- const interaction_const_t *ic,
- const NbnxnListParameters *listParams,
- const nbnxn_atomdata_t *nbat)
+static void init_nbparam(cu_nbparam_t *nbp,
+ const interaction_const_t *ic,
+ const NbnxnListParameters *listParams,
+ const nbnxn_atomdata_t::Params &nbatParams)
{
int ntypes;
- ntypes = nbat->ntype;
+ ntypes = nbatParams.numTypes;
set_cutoff_parameters(nbp, ic, listParams);
{
case eintmodNONE:
case eintmodPOTSHIFT:
- switch (nbat->comb_rule)
+ switch (nbatParams.comb_rule)
{
case ljcrNONE:
nbp->vdwtype = evdwCuCUT;
{
if (ic->ljpme_comb_rule == ljcrGEOM)
{
- assert(nbat->comb_rule == ljcrGEOM);
+ assert(nbatParams.comb_rule == ljcrGEOM);
nbp->vdwtype = evdwCuEWALDGEOM;
}
else
{
- assert(nbat->comb_rule == ljcrLB);
+ assert(nbatParams.comb_rule == ljcrLB);
nbp->vdwtype = evdwCuEWALDLB;
}
}
if (!useLjCombRule(nbp))
{
initParamLookupTable(nbp->nbfp, nbp->nbfp_texobj,
- nbat->nbfp, 2*ntypes*ntypes);
+ nbatParams.nbfp.data(), 2*ntypes*ntypes);
}
/* set up LJ-PME parameter lookup table */
if (ic->vdwtype == evdwPME)
{
initParamLookupTable(nbp->nbfp_comb, nbp->nbfp_comb_texobj,
- nbat->nbfp_comb, 2*ntypes);
+ nbatParams.nbfp_comb.data(), 2*ntypes);
}
}
static void nbnxn_cuda_init_const(gmx_nbnxn_cuda_t *nb,
const interaction_const_t *ic,
const NbnxnListParameters *listParams,
- const nbnxn_atomdata_t *nbat)
+ const nbnxn_atomdata_t::Params &nbatParams)
{
- init_atomdata_first(nb->atdat, nbat->ntype);
- init_nbparam(nb->nbparam, ic, listParams, nbat);
+ init_atomdata_first(nb->atdat, nbatParams.numTypes);
+ init_nbparam(nb->nbparam, ic, listParams, nbatParams);
/* clear energy and shift force outputs */
nbnxn_cuda_clear_e_fshift(nb);
/* pick L1 cache configuration */
nbnxn_cuda_set_cacheconfig();
- nbnxn_cuda_init_const(nb, ic, listParams, nbat);
+ nbnxn_cuda_init_const(nb, ic, listParams, nbat->params());
*p_nb = nb;
/* only if we have a dynamic box */
if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
{
- cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec,
+ cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec.data(),
SHIFTS * sizeof(*adat->shift_vec), ls);
adat->bShiftVecUploaded = true;
}
cu_atomdata_t *d_atdat = nb->atdat;
cudaStream_t ls = nb->stream[eintLocal];
- natoms = nbat->natoms;
+ natoms = nbat->numAtoms();
realloced = false;
if (bDoTime)
if (useLjCombRule(nb->nbparam))
{
- cu_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb,
+ cu_copy_H2D_async(d_atdat->lj_comb, nbat->params().lj_comb.data(),
natoms*sizeof(*d_atdat->lj_comb), ls);
}
else
{
- cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
+ cu_copy_H2D_async(d_atdat->atom_types, nbat->params().type.data(),
natoms*sizeof(*d_atdat->atom_types), ls);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*/
GPU_FUNC_QUALIFIER
void nbnxn_gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb,
- const struct nbnxn_atomdata_t gmx_unused *nbatom,
+ struct nbnxn_atomdata_t gmx_unused *nbatom,
int gmx_unused flags,
int gmx_unused aloc,
bool gmx_unused haveOtherWork) GPU_FUNC_TERM
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, grid->na_c,
as_rvec_array(x.data()),
- nbat->XFormat, nbat->x, atomStart);
+ nbat->XFormat, nbat->x().data(), atomStart);
if (nbat->XFormat == nbatX4)
{
#if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
if (2*grid->na_cj == grid->na_c)
{
- calc_bounding_box_x_x4_halves(numAtoms, nbat->x + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
+ calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
grid->bbj.data() + offset*2);
}
else
#endif
{
- calc_bounding_box_x_x4(numAtoms, nbat->x + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
+ calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
}
}
else if (nbat->XFormat == nbatX8)
size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
- calc_bounding_box_x_x8(numAtoms, nbat->x + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
+ calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
}
#if NBNXN_BBXXXX
else if (!grid->bSimple)
#if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
if (nbat->XFormat == nbatXYZQ)
{
- calc_bounding_box_xxxx_simd4(numAtoms, nbat->x + atomStart*nbat->xstride,
+ calc_bounding_box_xxxx_simd4(numAtoms, nbat->x().data() + atomStart*nbat->xstride,
bb_work_aligned, pbb_ptr);
}
else
#endif
{
- calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x + atomStart*nbat->xstride,
+ calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
pbb_ptr);
}
if (gmx_debug_at)
/* Store the bounding boxes as xyz.xyz. */
nbnxn_bb_t *bb_ptr = grid->bb.data() + ((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log);
- calc_bounding_box(numAtoms, nbat->xstride, nbat->x + atomStart*nbat->xstride,
+ calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
bb_ptr);
if (gmx_debug_at)
*/
nbs->a.resize(numNbnxnAtoms + numAtomsMoved);
- /* We need padding up to a multiple of the buffer flag size: simply add */
- if (numNbnxnAtoms + NBNXN_BUFFERFLAG_SIZE > nbat->nalloc)
- {
- nbnxn_atomdata_realloc(nbat, numNbnxnAtoms + NBNXN_BUFFERFLAG_SIZE);
- }
-
- nbat->natoms = numNbnxnAtoms;
+ /* Make space in nbat for storing the atom coordinates */
+ nbat->resizeCoordinateBuffer(numNbnxnAtoms);
}
/* Determine in which grid cells the atoms should go */
grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
grid->na_c_2log = get_2log(grid->na_c);
- nbat->na_c = grid->na_c;
-
if (ddZone == 0)
{
grid->cell0 = 0;
if (ddZone == 0)
{
- nbat->natoms_local = nbat->natoms;
+ nbat->natoms_local = nbat->numAtoms();
+ }
+ if (ddZone == static_cast<int>(nbs->grid.size()) - 1)
+ {
+ /* We are done setting up all grids, we can resize the force buffers */
+ nbat->resizeForceBuffers();
}
nbs_cycle_stop(&nbs->cc[enbsCCgrid]);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
static void
clear_f_all(const nbnxn_atomdata_t *nbat, real *f)
{
- int i;
-
- for (i = 0; i < nbat->natoms*nbat->fstride; i++)
+ for (int i = 0; i < nbat->numAtoms()*nbat->fstride; i++)
{
f[i] = 0;
}
real *Vvdw,
real *Vc)
{
- int nb;
- int i, j, ind, indr;
+ const int nenergrp = nbat->params().nenergrp;
- for (nb = 0; nb < nlist; nb++)
+ for (int nb = 0; nb < nlist; nb++)
{
- for (i = 0; i < nbat->nenergrp; i++)
+ for (int i = 0; i < nenergrp; i++)
{
/* Reduce the diagonal terms */
- ind = i*nbat->nenergrp + i;
+ int ind = i*nenergrp + i;
Vvdw[ind] += nbat->out[nb].Vvdw[ind];
Vc[ind] += nbat->out[nb].Vc[ind];
/* Reduce the off-diagonal terms */
- for (j = i+1; j < nbat->nenergrp; j++)
+ for (int j = i + 1; j < nenergrp; j++)
{
/* The output should contain only one off-diagonal part */
- ind = i*nbat->nenergrp + j;
- indr = j*nbat->nenergrp + i;
+ int ind = i*nenergrp + j;
+ int indr = j*nenergrp + i;
Vvdw[ind] += nbat->out[nb].Vvdw[ind] + nbat->out[nb].Vvdw[indr];
Vc[ind] += nbat->out[nb].Vc[ind] + nbat->out[nb].Vc[indr];
}
*/
static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
{
- for (int i = 0; i < out->nV; i++)
- {
- out->Vvdw[i] = 0;
- out->Vc[i] = 0;
- }
-
- for (int i = 0; i < out->nVS; i++)
- {
- out->VSvdw[i] = 0;
- }
- for (int i = 0; i < out->nVS; i++)
- {
- out->VSc[i] = 0;
- }
+ std::fill(out->Vvdw.begin(), out->Vvdw.end(), 0.0_real);
+ std::fill(out->Vc.begin(), out->Vc.end(), 0.0_real);
+ std::fill(out->VSvdw.begin(), out->VSvdw.end(), 0.0_real);
+ std::fill(out->VSc.begin(), out->VSc.end(), 0.0_real);
}
/*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
* \tparam unrollj The unroll size for j-particles in the SIMD kernel
* \param[in] numGroups The number of energy groups
* \param[in] numGroups_2log Log2 of numGroups, rounded up
- * \param[in] vVdwSimd SIMD Van der Waals energy buffers
- * \param[in] vCoulombSimd SIMD Coulomb energy buffers
- * \param[in,out] vVdw Van der Waals energy output buffer
- * \param[in,out] vCoulomb Coulomb energy output buffer
+ * \param[in,out] out Struct with energy buffers
*/
template <int unrollj> static void
reduceGroupEnergySimdBuffers(int numGroups,
int numGroups_2log,
- const real * gmx_restrict vVdwSimd,
- const real * gmx_restrict vCoulombSimd,
- real * gmx_restrict vVdw,
- real * gmx_restrict vCoulomb)
+ nbnxn_atomdata_output_t *out)
{
- const int unrollj_half = unrollj/2;
+ const int unrollj_half = unrollj/2;
/* Energies are stored in SIMD registers with size 2^numGroups_2log */
- const int numGroupsStorage = (1 << numGroups_2log);
+ const int numGroupsStorage = (1 << numGroups_2log);
+
+ const real * gmx_restrict vVdwSimd = out->VSvdw.data();
+ const real * gmx_restrict vCoulombSimd = out->VSc.data();
+ real * gmx_restrict vVdw = out->Vvdw.data();
+ real * gmx_restrict vCoulomb = out->Vc.data();
/* The size of the SIMD energy group buffer array is:
* numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
void
nbnxn_kernel_cpu(nonbonded_verlet_group_t *nbvg,
- const nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
rvec *shiftVectors,
int forceFlags,
}
}
+ const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
int vdwkt = 0;
if (ic->vdwtype == evdwCUT)
{
{
case eintmodNONE:
case eintmodPOTSHIFT:
- switch (nbat->comb_rule)
+ switch (nbatParams.comb_rule)
{
case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
if (clearF == enbvClearFYes)
{
- clear_f(nbat, nb, out->f);
+ clear_f(nbat, nb, out->f.data());
}
real *fshift_p;
}
else
{
- fshift_p = out->fshift;
+ fshift_p = out->fshift.data();
if (clearF == enbvClearFYes)
{
nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p);
break;
#ifdef GMX_NBNXN_SIMD_2XNN
nbnxn_kernel_noener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p);
break;
#endif
nbnxn_kernel_noener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p);
break;
#endif
GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
}
}
- else if (out->nV == 1)
+ else if (out->Vvdw.size() == 1)
{
/* A single energy group (pair) */
out->Vvdw[0] = 0;
nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p,
- out->Vvdw,
- out->Vc);
+ out->Vvdw.data(),
+ out->Vc.data());
break;
#ifdef GMX_NBNXN_SIMD_2XNN
case nbnxnk4xN_SIMD_2xNN:
nbnxn_kernel_ener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p,
- out->Vvdw,
- out->Vc);
+ out->Vvdw.data(),
+ out->Vc.data());
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
nbnxn_kernel_ener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p,
- out->Vvdw,
- out->Vc);
+ out->Vvdw.data(),
+ out->Vc.data());
break;
#endif
default:
nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p,
- out->Vvdw,
- out->Vc);
+ out->Vvdw.data(),
+ out->Vc.data());
break;
#ifdef GMX_NBNXN_SIMD_2XNN
case nbnxnk4xN_SIMD_2xNN:
nbnxn_kernel_energrp_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p,
- out->VSvdw,
- out->VSc);
+ out->VSvdw.data(),
+ out->VSc.data());
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
nbnxn_kernel_energrp_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
ic,
shiftVectors,
- out->f,
+ out->f.data(),
fshift_p,
- out->VSvdw,
- out->VSc);
+ out->VSvdw.data(),
+ out->VSc.data());
break;
#endif
default:
switch (unrollj)
{
case 2:
- reduceGroupEnergySimdBuffers<2>(nbat->nenergrp,
- nbat->neg_2log,
- out->VSvdw, out->VSc,
- out->Vvdw, out->Vc);
+ reduceGroupEnergySimdBuffers<2>(nbatParams.nenergrp,
+ nbatParams.neg_2log,
+ out);
break;
case 4:
- reduceGroupEnergySimdBuffers<4>(nbat->nenergrp,
- nbat->neg_2log,
- out->VSvdw, out->VSc,
- out->Vvdw, out->Vc);
+ reduceGroupEnergySimdBuffers<4>(nbatParams.nenergrp,
+ nbatParams.neg_2log,
+ out);
break;
case 8:
- reduceGroupEnergySimdBuffers<8>(nbat->nenergrp,
- nbat->neg_2log,
- out->VSvdw, out->VSc,
- out->Vvdw, out->Vc);
+ reduceGroupEnergySimdBuffers<8>(nbatParams.nenergrp,
+ nbatParams.neg_2log,
+ out);
break;
default:
GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* within this function.
*
* \param[in,out] nbvg The group (local/non-local) to compute interaction for
- * \param[in] nbat The atomdata for the interactions
+ * \param[in,out] nbat The atomdata for the interactions
* \param[in] ic Non-bonded interaction constants
* \param[in] shiftVectors The PBC shift vectors
* \param[in] forceFlags Flags that tell what to compute
*/
void
nbnxn_kernel_cpu(nonbonded_verlet_group_t *nbvg,
- const nbnxn_atomdata_t *nbat,
+ nbnxn_atomdata_t *nbat,
const interaction_const_t *ic,
rvec *shiftVectors,
int forceFlags,
rvec *shift_vec,
int force_flags,
int clearF,
- real * f,
+ gmx::ArrayRef<real> f,
real * fshift,
real * Vc,
real * Vvdw)
{
const nbnxn_sci_t *nbln;
- const real *x;
gmx_bool bEner;
gmx_bool bEwald;
const real *Ftab = nullptr;
int int_bit;
real fexcl;
real c6, c12;
- const real * shiftvec;
- real * vdwparam;
- int * type;
const nbnxn_excl_t *excl[2];
int npair_tot, npair;
if (clearF == enbvClearFYes)
{
- clear_f(nbat, 0, f);
+ clear_f(nbat, 0, f.data());
}
bEner = ((force_flags & GMX_FORCE_ENERGY) != 0);
Ftab = iconst->tabq_coul_F;
}
- rcut2 = iconst->rcoulomb*iconst->rcoulomb;
- rvdw2 = iconst->rvdw*iconst->rvdw;
+ rcut2 = iconst->rcoulomb*iconst->rcoulomb;
+ rvdw2 = iconst->rvdw*iconst->rvdw;
- rlist2 = nbl->rlist*nbl->rlist;
+ rlist2 = nbl->rlist*nbl->rlist;
- type = nbat->type;
- facel = iconst->epsfac;
- shiftvec = shift_vec[0];
- vdwparam = nbat->nbfp;
- ntype = nbat->ntype;
+ const int *type = nbat->params().type.data();
+ facel = iconst->epsfac;
+ const real *shiftvec = shift_vec[0];
+ const real *vdwparam = nbat->params().nbfp.data();
+ ntype = nbat->params().numTypes;
- x = nbat->x;
+ const real *x = nbat->x().data();
npair_tot = 0;
nhwu = 0;
#include "gromacs/math/vectypes.h"
#include "gromacs/mdlib/nbnxn_pairlist.h"
#include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/real.h"
/* Reference (slow) kernel for nb n vs n GPU type pair lists */
rvec *shift_vec,
int force_flags,
int clearF,
- real * f,
+ gmx::ArrayRef<real> f,
real * fshift,
real * Vc,
real * Vvdw);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
cj = l_cj[cjind].cj;
#ifdef ENERGY_GROUPS
- egp_cj = nbat->energrp[cj];
+ egp_cj = nbatParams.energrp[cj];
#endif
for (i = 0; i < UNROLLI; i++)
{
#ifdef CALC_ENERGIES
#ifdef ENERGY_GROUPS
- Vvdw[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += VLJ;
+ Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += VLJ;
#else
Vvdw_ci += VLJ;
/* 1 flop for LJ energy addition */
#ifdef CALC_ENERGIES
#ifdef ENERGY_GROUPS
- Vc[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += vcoul;
+ Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += vcoul;
#else
Vc_ci += vcoul;
/* 1 flop for Coulomb energy addition */
)
{
const nbnxn_cj_t *l_cj;
- const int *type;
- const real *q;
- const real *shiftvec;
- const real *x;
- const real *nbfp;
real rcut2;
#ifdef VDW_CUTOFF_CHECK
real rvdw2;
#ifdef CALC_ENERGIES
real lje_vc;
#endif
- const real *ljc;
#endif
#ifdef CALC_COUL_RF
swF4 = 5*ic->vdw_switch.c5;
#endif
+ const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
#ifdef LJ_EWALD
lje_coeff2 = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
lje_coeff6_6 = lje_coeff2*lje_coeff2*lje_coeff2/6.0;
lje_vc = ic->sh_lj_ewald;
#endif
- ljc = nbat->nbfp_comb;
+ const real *ljc = nbatParams.nbfp_comb.data();
#endif
#ifdef CALC_COUL_RF
#endif
#ifdef ENERGY_GROUPS
- egp_mask = (1<<nbat->neg_2log) - 1;
+ egp_mask = (1 << nbatParams.neg_2log) - 1;
#endif
- rcut2 = ic->rcoulomb*ic->rcoulomb;
+ rcut2 = ic->rcoulomb*ic->rcoulomb;
#ifdef VDW_CUTOFF_CHECK
- rvdw2 = ic->rvdw*ic->rvdw;
+ rvdw2 = ic->rvdw*ic->rvdw;
#endif
- ntype2 = nbat->ntype*2;
- nbfp = nbat->nbfp;
- q = nbat->q;
- type = nbat->type;
- facel = ic->epsfac;
- shiftvec = shift_vec[0];
- x = nbat->x;
+ ntype2 = nbatParams.numTypes*2;
+ const real *nbfp = nbatParams.nbfp.data();
+ const real *q = nbatParams.q.data();
+ const int *type = nbatParams.type.data();
+ facel = ic->epsfac;
+ const real *shiftvec = shift_vec[0];
+ const real *x = nbat->x().data();
l_cj = nbl->cj.data();
#else
for (i = 0; i < UNROLLI; i++)
{
- egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
+ egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask)*nbatParams.nenergrp;
}
#endif
#endif
{
int egp_ind;
#ifdef ENERGY_GROUPS
- egp_ind = egp_sh_i[i] + ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask);
+ egp_ind = egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask);
#else
egp_ind = 0;
#endif
#ifdef LJ_EWALD
/* LJ Ewald self interaction */
- Vvdw[egp_ind] += 0.5*nbat->nbfp[nbat->type[ci*UNROLLI+i]*(nbat->ntype + 1)*2]/6*lje_coeff6_6;
+ Vvdw[egp_ind] += 0.5*nbatParams.nbfp[nbatParams.type[ci*UNROLLI+i]*(nbatParams.numTypes + 1)*2]/6*lje_coeff6_6;
#endif
}
}
nbnxn_cj_t * gmx_restrict cjInner = nbl->cj.data();
const real * gmx_restrict shiftvec = shift_vec[0];
- const real * gmx_restrict x = nbat->x;
+ const real * gmx_restrict x = nbat->x().data();
const real rlist2 = rlistInner*rlistInner;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
int egps_j;
#if UNROLLJ == 2
- egps_j = nbat->energrp[cj>>1];
+ egps_j = nbatParams.energrp[cj >> 1];
egp_jj[0] = ((egps_j >> ((cj & 1)*egps_jshift)) & egps_jmask)*egps_jstride;
#else
/* We assume UNROLLI <= UNROLLJ */
for (jdi = 0; jdi < UNROLLJ/UNROLLI; jdi++)
{
int jj;
- egps_j = nbat->energrp[cj*(UNROLLJ/UNROLLI)+jdi];
+ egps_j = nbatParams.energrp[cj*(UNROLLJ/UNROLLI) + jdi];
for (jj = 0; jj < (UNROLLI/2); jj++)
{
egp_jj[jdi*(UNROLLI/2)+jj] = ((egps_j >> (jj*egps_jshift)) & egps_jmask)*egps_jstride;
{
using namespace gmx;
const nbnxn_cj_t *l_cj;
- const real *q;
- const real *shiftvec;
- const real *x;
- real facel;
int ci, ci_sh;
int ish, ish3;
gmx_bool do_LJ, half_LJ, do_coul;
SimdBool diagonal_mask1_S0, diagonal_mask1_S2;
#endif
- unsigned *exclusion_filter;
SimdBitMask filter_S0, filter_S2;
SimdReal zero_S(0.0);
#endif
#ifdef LJ_COMB_LB
- const real *ljc;
-
SimdReal hsig_i_S0, seps_i_S0;
SimdReal hsig_i_S2, seps_i_S2;
#else
alignas(GMX_SIMD_ALIGNMENT) real pvdw_c6[2*UNROLLI*UNROLLJ];
real *pvdw_c12 = pvdw_c6 + UNROLLI*UNROLLJ;
#endif
-
-#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
- const real *ljc;
-#endif
#endif /* LJ_COMB_LB */
SimdReal minRsq_S;
int npair = 0;
#endif
+ const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
#if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
- ljc = nbat->lj_comb;
+ const real * gmx_restrict ljc = nbatParams.lj_comb.data();
#endif
#if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
/* No combination rule used */
- real *nbfp_ptr = nbat->nbfp_aligned;
- const int *type = nbat->type;
+ const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+ const int * gmx_restrict type = nbatParams.type.data();
#endif
/* Load j-i for the first i */
- diagonal_jmi_S = load<SimdReal>(nbat->simd_2xnn_diagonal_j_minus_i);
+ diagonal_jmi_S = load<SimdReal>(nbat->simdMasks.diagonal_2xnn_j_minus_i.data());
/* Generate all the diagonal masks as comparison results */
#if UNROLLI == UNROLLJ
diagonal_mask_S0 = (zero_S < diagonal_jmi_S);
/* Load masks for topology exclusion masking. filter_stride is
static const, so the conditional will be optimized away. */
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- exclusion_filter = nbat->simd_exclusion_filter64;
+ const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
#else
- exclusion_filter = nbat->simd_exclusion_filter;
+ const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
#endif
/* Here we cast the exclusion filters from unsigned * to int * or real *.
rcvdw2_S = SimdReal(ic->rvdw*ic->rvdw);
#endif
- minRsq_S = SimdReal(NBNXN_MIN_RSQ);
+ minRsq_S = SimdReal(NBNXN_MIN_RSQ);
- q = nbat->q;
- facel = ic->epsfac;
- shiftvec = shift_vec[0];
- x = nbat->x;
+ const real * gmx_restrict q = nbatParams.q.data();
+ const real facel = ic->epsfac;
+ const real * gmx_restrict shiftvec = shift_vec[0];
+ const real * gmx_restrict x = nbat->x().data();
#ifdef FIX_LJ_C
#endif /* FIX_LJ_C */
#ifdef ENERGY_GROUPS
- egps_ishift = nbat->neg_2log;
+ egps_ishift = nbatParams.neg_2log;
egps_imask = (1<<egps_ishift) - 1;
- egps_jshift = 2*nbat->neg_2log;
+ egps_jshift = 2*nbatParams.neg_2log;
egps_jmask = (1<<egps_jshift) - 1;
egps_jstride = (UNROLLJ>>1)*UNROLLJ;
/* Major division is over i-particle energy groups, determine the stride */
- Vstride_i = nbat->nenergrp*(1<<nbat->neg_2log)*egps_jstride;
+ Vstride_i = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
#endif
l_cj = nbl->cj.data();
half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
#ifdef ENERGY_GROUPS
- egps_i = nbat->energrp[ci];
+ egps_i = nbatParams.energrp[ci];
{
int ia, egp_ia;
{
real c6_i;
- c6_i = nbat->nbfp[nbat->type[sci+ia]*(nbat->ntype + 1)*2]/6;
+ c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
#ifdef ENERGY_GROUPS
vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
#else
c12s_S2 = loadU1DualHsimd(ljc+sci2+STRIDE+2);
}
#elif !defined LJ_COMB_LB && !defined FIX_LJ_C
- const real *nbfp0 = nbfp_ptr + type[sci ]*nbat->ntype*c_simdBestPairAlignment;
- const real *nbfp1 = nbfp_ptr + type[sci+1]*nbat->ntype*c_simdBestPairAlignment;
+ const int numTypes = nbatParams.numTypes;
+ const real *nbfp0 = nbfp_ptr + type[sci ]*numTypes*c_simdBestPairAlignment;
+ const real *nbfp1 = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
const real *nbfp2 = nullptr, *nbfp3 = nullptr;
if (!half_LJ)
{
- nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*c_simdBestPairAlignment;
- nbfp3 = nbfp_ptr + type[sci+3]*nbat->ntype*c_simdBestPairAlignment;
+ nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
+ nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
}
#endif
#endif
nbnxn_cj_t * gmx_restrict cjInner = nbl->cj.data();
const real * gmx_restrict shiftvec = shift_vec[0];
- const real * gmx_restrict x = nbat->x;
+ const real * gmx_restrict x = nbat->x().data();
const SimdReal rlist2_S(rlistInner*rlistInner);
SimdBitMask gmx_unused filter_S1,
SimdBitMask gmx_unused filter_S2,
SimdBitMask gmx_unused filter_S3,
- real gmx_unused *simd_interaction_array,
+ const real gmx_unused *simd_interaction_array,
gmx::SimdBool *interact_S0,
gmx::SimdBool *interact_S1,
gmx::SimdBool *interact_S2,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
gmx_load_simd_4xn_interactions(l_cj[cjind].excl,
filter_S0, filter_S1,
filter_S2, filter_S3,
- nbat->simd_interaction_array,
+ nbat->simdMasks.interaction_array.data(),
&interact_S0, &interact_S1,
&interact_S2, &interact_S3);
#endif /* CHECK_EXCLS */
{
int egps_j;
#if UNROLLJ == 2
- egps_j = nbat->energrp[cj>>1];
+ egps_j = nbatParams.energrp[cj >> 1];
egp_jj[0] = ((egps_j >> ((cj & 1)*egps_jshift)) & egps_jmask)*egps_jstride;
#else
/* We assume UNROLLI <= UNROLLJ */
for (jdi = 0; jdi < UNROLLJ/UNROLLI; jdi++)
{
int jj;
- egps_j = nbat->energrp[cj*(UNROLLJ/UNROLLI)+jdi];
+ egps_j = nbatParams.energrp[cj*(UNROLLJ/UNROLLI) + jdi];
for (jj = 0; jj < (UNROLLI/2); jj++)
{
egp_jj[jdi*(UNROLLI/2)+jj] = ((egps_j >> (jj*egps_jshift)) & egps_jmask)*egps_jstride;
{
using namespace gmx;
const nbnxn_cj_t *l_cj;
- const real * q;
- const real *shiftvec;
- const real *x;
- real facel;
int ci, ci_sh;
int ish, ish3;
gmx_bool do_LJ, half_LJ, do_coul;
SimdBool diagonal_mask1_S0, diagonal_mask1_S1, diagonal_mask1_S2, diagonal_mask1_S3;
#endif
-#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- std::uint64_t *exclusion_filter;
-#else
- std::uint32_t *exclusion_filter;
-#endif
SimdBitMask filter_S0, filter_S1, filter_S2, filter_S3;
SimdReal zero_S(0.0);
#endif
#ifdef LJ_COMB_LB
- const real *ljc;
-
SimdReal hsig_i_S0, seps_i_S0;
SimdReal hsig_i_S1, seps_i_S1;
SimdReal hsig_i_S2, seps_i_S2;
SimdReal hsig_i_S3, seps_i_S3;
-#else
-
-#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
- const real *ljc;
-#endif
#endif /* LJ_COMB_LB */
SimdReal minRsq_S;
int npair = 0;
#endif
+ const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
#if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
- ljc = nbat->lj_comb;
+ const real * gmx_restrict ljc = nbatParams.lj_comb.data();
#endif
#if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
/* No combination rule used */
- real *nbfp_ptr = nbat->nbfp_aligned;
- const int *type = nbat->type;
+ const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+ const int * gmx_restrict type = nbatParams.type.data();
#endif
/* Load j-i for the first i */
- diagonal_jmi_S = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i);
+ diagonal_jmi_S = load<SimdReal>(nbat->simdMasks.diagonal_4xn_j_minus_i.data());
/* Generate all the diagonal masks as comparison results */
#if UNROLLI == UNROLLJ
diagonal_mask_S0 = (zero_S < diagonal_jmi_S);
#if UNROLLI == 2*UNROLLJ
/* Load j-i for the second half of the j-cluster */
- diagonal_jmi_S = load<SimdReal>(nbat->simd_4xn_diagonal_j_minus_i + UNROLLJ);
+ diagonal_jmi_S = load<SimdReal>(nbat->simdMasks.diagonal_4xn_j_minus_i.data() + UNROLLJ);
#endif
diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
#endif
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- exclusion_filter = nbat->simd_exclusion_filter64;
+ const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
#else
- exclusion_filter = nbat->simd_exclusion_filter;
+ const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
#endif
/* Here we cast the exclusion filters from unsigned * to int * or real *.
rcvdw2_S = SimdReal(ic->rvdw*ic->rvdw);
#endif
- minRsq_S = SimdReal(NBNXN_MIN_RSQ);
+ minRsq_S = SimdReal(NBNXN_MIN_RSQ);
- q = nbat->q;
- facel = ic->epsfac;
- shiftvec = shift_vec[0];
- x = nbat->x;
+ const real * gmx_restrict q = nbatParams.q.data();
+ const real facel = ic->epsfac;
+ const real * gmx_restrict shiftvec = shift_vec[0];
+ const real * gmx_restrict x = nbat->x().data();
#ifdef FIX_LJ_C
alignas(GMX_SIMD_ALIGNMENT) real pvdw_c6[2*UNROLLI*UNROLLJ];
#endif /* FIX_LJ_C */
#ifdef ENERGY_GROUPS
- egps_ishift = nbat->neg_2log;
+ egps_ishift = nbatParams.neg_2log;
egps_imask = (1<<egps_ishift) - 1;
- egps_jshift = 2*nbat->neg_2log;
+ egps_jshift = 2*nbatParams.neg_2log;
egps_jmask = (1<<egps_jshift) - 1;
egps_jstride = (UNROLLJ>>1)*UNROLLJ;
/* Major division is over i-particle energy groups, determine the stride */
- Vstride_i = nbat->nenergrp*(1<<nbat->neg_2log)*egps_jstride;
+ Vstride_i = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
#endif
l_cj = nbl->cj.data();
half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
#ifdef ENERGY_GROUPS
- egps_i = nbat->energrp[ci];
+ egps_i = nbatParams.energrp[ci];
{
int ia, egp_ia;
{
real c6_i;
- c6_i = nbat->nbfp[nbat->type[sci+ia]*(nbat->ntype + 1)*2]/6;
+ c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
#ifdef ENERGY_GROUPS
vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
#else
c12s_S3 = setZero();
}
#else
- const real *nbfp0 = nbfp_ptr + type[sci ]*nbat->ntype*c_simdBestPairAlignment;
- const real *nbfp1 = nbfp_ptr + type[sci+1]*nbat->ntype*c_simdBestPairAlignment;
+ const int numTypes = nbatParams.numTypes;
+ const real *nbfp0 = nbfp_ptr + type[sci ]*numTypes*c_simdBestPairAlignment;
+ const real *nbfp1 = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
const real *nbfp2 = nullptr, *nbfp3 = nullptr;
if (!half_LJ)
{
- nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*c_simdBestPairAlignment;
- nbfp3 = nbfp_ptr + type[sci+3]*nbat->ntype*c_simdBestPairAlignment;
+ nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
+ nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
}
#endif
#endif
nbnxn_cj_t * gmx_restrict cjInner = nbl->cj.data();
const real * gmx_restrict shiftvec = shift_vec[0];
- const real * gmx_restrict x = nbat->x;
+ const real * gmx_restrict x = nbat->x().data();
const SimdReal rlist2_S(rlistInner*rlistInner);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
/* HtoD x, q */
- ocl_copy_H2D_async(adat->xq, nbatom->x + adat_begin * 4, adat_begin*sizeof(float)*4,
+ ocl_copy_H2D_async(adat->xq, nbatom->x().data() + adat_begin * 4, adat_begin*sizeof(float)*4,
adat_len * sizeof(float) * 4, stream, bDoTime ? t->nb_h2d[iloc].fetchNextEvent() : nullptr);
if (bDoTime)
* (and energies/shift forces if required).
*/
void nbnxn_gpu_launch_cpyback(gmx_nbnxn_ocl_t *nb,
- const struct nbnxn_atomdata_t *nbatom,
+ struct nbnxn_atomdata_t *nbatom,
int flags,
int aloc,
bool haveOtherWork)
}
/* DtoH f */
- ocl_copy_D2H_async(nbatom->out[0].f + adat_begin * 3, adat->f, adat_begin*3*sizeof(float),
+ ocl_copy_D2H_async(nbatom->out[0].f.data() + adat_begin * 3, adat->f, adat_begin*3*sizeof(float),
(adat_len)* adat->f_elem_size, stream, bDoTime ? t->nb_d2h[iloc].fetchNextEvent() : nullptr);
/* kick off work */
/* An element of the shift_vec device buffer has the same size as one element
of the host side shift_vec buffer. */
- ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec);
+ ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec.data());
ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
static void init_nbparam(cl_nbparam_t *nbp,
const interaction_const_t *ic,
const NbnxnListParameters *listParams,
- const nbnxn_atomdata_t *nbat,
+ const nbnxn_atomdata_t::Params &nbatParams,
const gmx_device_runtime_data_t *runData)
{
cl_int cl_error;
set_cutoff_parameters(nbp, ic, listParams);
map_interaction_types_to_gpu_kernel_flavors(ic,
- nbat->comb_rule,
+ nbatParams.comb_rule,
&(nbp->eeltype),
&(nbp->vdwtype));
{
if (ic->ljpme_comb_rule == ljcrGEOM)
{
- GMX_ASSERT(nbat->comb_rule == ljcrGEOM, "Combination rule mismatch!");
+ GMX_ASSERT(nbatParams.comb_rule == ljcrGEOM, "Combination rule mismatch!");
}
else
{
- GMX_ASSERT(nbat->comb_rule == ljcrLB, "Combination rule mismatch!");
+ GMX_ASSERT(nbatParams.comb_rule == ljcrLB, "Combination rule mismatch!");
}
}
/* generate table for PME */
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
}
- int nnbfp = 2*nbat->ntype*nbat->ntype;
- int nnbfp_comb = 2*nbat->ntype;
+ const int nnbfp = 2*nbatParams.numTypes*nbatParams.numTypes;
+ const int nnbfp_comb = 2*nbatParams.numTypes;
{
/* Switched from using textures to using buffers */
&array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
*/
- nbp->nbfp_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- nnbfp*sizeof(cl_float), nbat->nbfp, &cl_error);
+ nbp->nbfp_climg2d =
+ clCreateBuffer(runData->context,
+ CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+ nnbfp*sizeof(cl_float),
+ const_cast<float *>(nbatParams.nbfp.data()),
+ &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
// TODO: decide which alternative is most efficient - textures or buffers.
/* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
&array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
- nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- nnbfp_comb*sizeof(cl_float), nbat->nbfp_comb, &cl_error);
+ nbp->nbfp_comb_climg2d =
+ clCreateBuffer(runData->context,
+ CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+ nnbfp_comb*sizeof(cl_float),
+ const_cast<float *>(nbatParams.nbfp_comb.data()),
+ &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
}
static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t *nb,
const interaction_const_t *ic,
const NbnxnListParameters *listParams,
- const nbnxn_atomdata_t *nbat)
+ const nbnxn_atomdata_t::Params &nbatParams)
{
- init_atomdata_first(nb->atdat, nbat->ntype, nb->dev_rundata);
- init_nbparam(nb->nbparam, ic, listParams, nbat, nb->dev_rundata);
+ init_atomdata_first(nb->atdat, nbatParams.numTypes, nb->dev_rundata);
+ init_nbparam(nb->nbparam, ic, listParams, nbatParams, nb->dev_rundata);
}
init_timings(nb->timings);
}
- nbnxn_ocl_init_const(nb, ic, listParams, nbat);
+ nbnxn_ocl_init_const(nb, ic, listParams, nbat->params());
/* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
* TODO: decide about NVIDIA
/* only if we have a dynamic box */
if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
{
- ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
+ ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec.data(), 0,
SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
adat->bShiftVecUploaded = CL_TRUE;
}
cl_atomdata_t *d_atdat = nb->atdat;
cl_command_queue ls = nb->stream[eintLocal];
- natoms = nbat->natoms;
+ natoms = nbat->numAtoms();
realloced = false;
if (bDoTime)
if (useLjCombRule(nb->nbparam->vdwtype))
{
- ocl_copy_H2D_async(d_atdat->lj_comb, nbat->lj_comb, 0,
+ ocl_copy_H2D_async(d_atdat->lj_comb, nbat->params().lj_comb.data(), 0,
natoms*sizeof(cl_float2), ls, bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
}
else
{
- ocl_copy_H2D_async(d_atdat->atom_types, nbat->type, 0,
+ ocl_copy_H2D_async(d_atdat->atom_types, nbat->params().type.data(), 0,
natoms*sizeof(int), ls, bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
}
#include <cstddef>
+#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdlib/nbnxn_consts.h"
#include "gromacs/mdtypes/nblist.h"
struct NbnxnPairlistGpuWork;
struct tMPI_Atomic;
+/* Convenience type for vector with aligned memory */
+template<typename T>
+using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
+
/* Convenience type for vector that avoids initialization at resize() */
template<typename T>
using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
nbatXYZ, nbatXYZQ, nbatX4, nbatX8
};
-typedef struct {
- real *f; /* f, size natoms*fstride */
- real *fshift; /* Shift force array, size SHIFTS*DIM */
- int nV; /* The size of *Vvdw and *Vc */
- real *Vvdw; /* Temporary Van der Waals group energy storage */
- real *Vc; /* Temporary Coulomb group energy storage */
- int nVS; /* The size of *VSvdw and *VSc */
- real *VSvdw; /* Temporary SIMD Van der Waals group energy storage */
- real *VSc; /* Temporary SIMD Coulomb group energy storage */
-} nbnxn_atomdata_output_t;
+// Struct that holds force and energy output buffers
+struct nbnxn_atomdata_output_t
+{
+ /* Constructor
+ *
+ * \param[in] nb_kernel_type Type of non-bonded kernel
+ * \param[in] numEnergyGroups The number of energy groups
+ * \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
+ * \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
+ */
+ nbnxn_atomdata_output_t(int nb_kernel_type,
+ int numEnergyGroups,
+ int simdEnergyBUfferStride,
+ gmx::PinningPolicy pinningPolicy);
+
+ gmx::HostVector<real> f; // f, size natoms*fstride
+ gmx::HostVector<real> fshift; // Shift force array, size SHIFTS*DIM
+ gmx::HostVector<real> Vvdw; // Temporary Van der Waals group energy storage
+ gmx::HostVector<real> Vc; // Temporary Coulomb group energy storage
+ AlignedVector<real> VSvdw; // Temporary SIMD Van der Waals group energy storage
+ AlignedVector<real> VSc; // Temporary SIMD Coulomb group energy storage
+};
/* Block size in atoms for the non-bonded thread force-buffer reduction,
* should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
};
-typedef struct nbnxn_atomdata_t { //NOLINT(clang-analyzer-optin.performance.Padding)
- nbnxn_alloc_t *alloc;
- nbnxn_free_t *free;
- int ntype; /* The number of different atom types */
- real *nbfp; /* Lennard-Jones 6*C6 and 12*C12 params, size ntype^2*2 */
- int comb_rule; /* Combination rule, see enum above */
- real *nbfp_comb; /* LJ parameter per atom type, size ntype*2 */
- real *nbfp_aligned; /* As nbfp, but with an alignment (stride) suitable
- * for the present SIMD architectures
- */
- int natoms; /* Number of atoms */
- int natoms_local; /* Number of local atoms */
- int *type; /* Atom types */
- real *lj_comb; /* LJ parameters per atom for combining for pairs */
- int XFormat; /* The format of x (and q), enum */
- int FFormat; /* The format of f, enum */
- real *q; /* Charges, can be NULL if incorporated in x */
- int na_c; /* The number of atoms per cluster */
- int nenergrp; /* The number of energy groups */
- int neg_2log; /* Log2 of nenergrp */
- int *energrp; /* The energy groups per cluster, can be NULL */
- gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
- rvec *shift_vec; /* Shift vectors, copied from t_forcerec */
- int xstride; /* stride for a coordinate in x (usually 3 or 4) */
- int fstride; /* stride for a coordinate in f (usually 3 or 4) */
- real *x; /* x and possibly q, size natoms*xstride */
-
- /* j-atom minus i-atom index for generating self and Newton exclusions
- * cluster-cluster pairs of the diagonal, for 4xn and 2xnn kernels.
+/* Struct that stores atom related data for the nbnxn module
+ *
+ * Note: performance would improve slightly when all std::vector containers
+ * in this struct would not initialize during resize().
+ */
+struct nbnxn_atomdata_t
+{ //NOLINT(clang-analyzer-optin.performance.Padding)
+ struct Params
+ {
+ /* Constructor
+ *
+ * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
+ */
+ Params(gmx::PinningPolicy pinningPolicy);
+
+ // The number of different atom types
+ int numTypes;
+ // Lennard-Jone 6*C6 and 12*C12 parameters, size numTypes*2*2
+ gmx::HostVector<real> nbfp;
+ // Combination rule, see enum defined above
+ int comb_rule;
+ // LJ parameters per atom type, size numTypes*2
+ gmx::HostVector<real> nbfp_comb;
+ // As nbfp, but with a stride for the present SIMD architecture
+ AlignedVector<real> nbfp_aligned;
+ // Atom types per atom
+ gmx::HostVector<int> type;
+ // LJ parameters per atom for fast SIMD loading
+ gmx::HostVector<real> lj_comb;
+ // Charges per atom, not set with format nbatXYZQ
+ gmx::HostVector<real> q;
+ // The number of energy groups
+ int nenergrp;
+ // 2log(nenergrp)
+ int neg_2log;
+ // The energy groups, one int entry per cluster, only set when needed
+ gmx::HostVector<int> energrp;
+ };
+
+ // Diagonal and topology exclusion helper data for all SIMD kernels
+ struct SimdMasks
+ {
+ SimdMasks();
+
+ // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
+ AlignedVector<real> diagonal_4xn_j_minus_i;
+ // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
+ AlignedVector<real> diagonal_2xnn_j_minus_i;
+ // Filters for topology exclusion masks for the SIMD kernels
+ AlignedVector<uint32_t> exclusion_filter;
+ // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
+ AlignedVector<uint64_t> exclusion_filter64;
+ // Array of masks needed for exclusions
+ AlignedVector<real> interaction_array;
+ };
+
+ /* Constructor
+ *
+ * \param[in] pinningPolicy Sets the pinning policy for all data that might be transfered to a GPU
*/
- real *simd_4xn_diagonal_j_minus_i;
- real *simd_2xnn_diagonal_j_minus_i;
- /* Filters for topology exclusion masks for the SIMD kernels. */
- uint32_t *simd_exclusion_filter;
- uint64_t *simd_exclusion_filter64; //!< Used for double w/o SIMD int32 logical support
- real *simd_interaction_array; /* Array of masks needed for exclusions */
- int nout; /* The number of force arrays */
- nbnxn_atomdata_output_t *out; /* Output data structures */
- int nalloc; /* Allocation size of all arrays (for x/f *x/fstride) */
- gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
- nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
- gmx_bool bUseTreeReduce; /* Use tree for force reduction */
- tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */
-} nbnxn_atomdata_t;
+ nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
+
+ /* Returns a const reference to the parameters */
+ const Params ¶ms() const
+ {
+ return params_;
+ }
+
+ /* Returns a non-const reference to the parameters */
+ Params ¶msDeprecated()
+ {
+ return params_;
+ }
+
+ /* Returns the current total number of atoms stored */
+ int numAtoms() const
+ {
+ return numAtoms_;
+ }
+
+ /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+ gmx::ArrayRef<const real> x() const
+ {
+ return x_;
+ }
+
+ /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+ gmx::ArrayRef<real> x()
+ {
+ return x_;
+ }
+
+ /* Resizes the coordinate buffer and sets the number of atoms */
+ void resizeCoordinateBuffer(int numAtoms);
+
+ /* Resizes the force buffers for the current number of atoms */
+ void resizeForceBuffers();
+
+ private:
+ // The LJ and charge parameters
+ Params params_;
+ // The total number of atoms currently stored
+ int numAtoms_;
+ public:
+ int natoms_local; /* Number of local atoms */
+ int XFormat; /* The format of x (and q), enum */
+ int FFormat; /* The format of f, enum */
+ gmx_bool bDynamicBox; /* Do we need to update shift_vec every step? */
+ gmx::HostVector<gmx::RVec> shift_vec; /* Shift vectors, copied from t_forcerec */
+ int xstride; /* stride for a coordinate in x (usually 3 or 4) */
+ int fstride; /* stride for a coordinate in f (usually 3 or 4) */
+ private:
+ gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride */
+
+ public:
+ // Masks for handling exclusions in the SIMD kernels
+ const SimdMasks simdMasks;
+
+ /* Output data */
+ std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
+
+ /* Reduction related data */
+ gmx_bool bUseBufferFlags; /* Use the flags or operate on all atoms */
+ nbnxn_buffer_flags_t buffer_flags; /* Flags for buffer zeroing+reduc. */
+ gmx_bool bUseTreeReduce; /* Use tree for force reduction */
+ tMPI_Atomic *syncStep; /* Synchronization step for tree reduce */
+};
#endif
reallocate_nblist(nlist);
}
- ngid = nbat->nenergrp;
+ const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+
+ ngid = nbatParams.nenergrp;
if (ngid*jGrid.na_cj > gmx::index(sizeof(gid_cj)*8))
{
iGrid.na_c, jGrid.na_cj, (sizeof(gid_cj)*8)/jGrid.na_cj);
}
- egp_shift = nbat->neg_2log;
- egp_mask = (1<<nbat->neg_2log) - 1;
+ egp_shift = nbatParams.neg_2log;
+ egp_mask = (1 << egp_shift) - 1;
/* Loop over the atoms in the i sub-cell */
bFEP_i_all = TRUE;
if (ngid > 1)
{
- gid_i = (nbat->energrp[ci] >> (egp_shift*i)) & egp_mask;
+ gid_i = (nbatParams.energrp[ci] >> (egp_shift*i)) & egp_mask;
}
for (int cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++)
fep_cj = jGrid.fep[cjr];
if (ngid > 1)
{
- gid_cj = nbat->energrp[cja];
+ gid_cj = nbatParams.energrp[cja];
}
}
else if (2*jGrid.na_cj == jGrid.na_c)
fep_cj = (jGrid.fep[cjr>>1] >> ((cjr&1)*jGrid.na_cj)) & ((1<<jGrid.na_cj) - 1);
if (ngid > 1)
{
- gid_cj = nbat->energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
+ gid_cj = nbatParams.energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
}
}
else
fep_cj = jGrid.fep[cjr*2] + (jGrid.fep[cjr*2+1] << jGrid.na_c);
if (ngid > 1)
{
- gid_cj = nbat->energrp[cja*2] + (nbat->energrp[cja*2+1] << (jGrid.na_c*egp_shift));
+ gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.na_c*egp_shift));
}
}
bFEP_i = ((iGrid.fep[c_abs - iGrid.cell0*c_gpuNumClusterPerCell] & (1 << i)) != 0u);
- xi = nbat->x[ind_i*nbat->xstride+XX] + shx;
- yi = nbat->x[ind_i*nbat->xstride+YY] + shy;
- zi = nbat->x[ind_i*nbat->xstride+ZZ] + shz;
+ xi = nbat->x()[ind_i*nbat->xstride+XX] + shx;
+ yi = nbat->x()[ind_i*nbat->xstride+YY] + shy;
+ zi = nbat->x()[ind_i*nbat->xstride+ZZ] + shz;
if ((nlist->nrj + cj4_ind_end - cj4_ind_start)*c_nbnxnGpuJgroupSize*nbl->na_cj > nlist->maxnrj)
{
excl_pair = a_mod_wj(j)*nbl->na_ci + i;
excl_bit = (1U << (gcj*c_gpuNumClusterPerCell + c));
- dx = nbat->x[ind_j*nbat->xstride+XX] - xi;
- dy = nbat->x[ind_j*nbat->xstride+YY] - yi;
- dz = nbat->x[ind_j*nbat->xstride+ZZ] - zi;
+ dx = nbat->x()[ind_j*nbat->xstride+XX] - xi;
+ dy = nbat->x()[ind_j*nbat->xstride+YY] - yi;
+ dz = nbat->x()[ind_j*nbat->xstride+ZZ] - zi;
/* The unpruned GPU list has more than 2/3
* of the atom pairs beyond rlist. Using
makeClusterListSimple(jGrid,
nbl, ci, firstCell, lastCell,
excludeSubDiagonal,
- nbat->x,
+ nbat->x().data(),
rlist2, rbb2,
numDistanceChecks);
break;
makeClusterListSimd4xn(jGrid,
nbl, ci, firstCell, lastCell,
excludeSubDiagonal,
- nbat->x,
+ nbat->x().data(),
rlist2, rbb2,
numDistanceChecks);
break;
makeClusterListSimd2xnn(jGrid,
nbl, ci, firstCell, lastCell,
excludeSubDiagonal,
- nbat->x,
+ nbat->x().data(),
rlist2, rbb2,
numDistanceChecks);
break;
make_cluster_list_supersub(iGrid, jGrid,
nbl, ci, cj,
excludeSubDiagonal,
- nbat->xstride, nbat->x,
+ nbat->xstride, nbat->x().data(),
rlist2, rbb2,
numDistanceChecks);
}
nbl->work);
icell_set_x(cell0_i+ci, shx, shy, shz,
- nbat->xstride, nbat->x,
+ nbat->xstride, nbat->x().data(),
nb_kernel_type,
nbl->work);
fprintf(debug, "ns making %d nblists\n", nnbl);
}
- nbat->bUseBufferFlags = (nbat->nout > 1);
+ nbat->bUseBufferFlags = (nbat->out.size() > 1);
/* We should re-init the flags before making the first list */
if (nbat->bUseBufferFlags && LOCAL_I(iloc))
{
- init_buffer_flags(&nbat->buffer_flags, nbat->natoms);
+ init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
}
int nzi;
*/
if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0)))
{
- init_buffer_flags(&nbs->work[th].buffer_flags, nbat->natoms);
+ init_buffer_flags(&nbs->work[th].buffer_flags, nbat->numAtoms());
}
if (CombineNBLists && th > 0)