Several functions in nbnxm took a bool to determine whether to fill
local atoms with zeros, but the parameter was always set to false.
This is now removed from the function signatures, and some code is
slightly simplified.
/*
* This file is part of the GROMACS molecular simulation package.
*
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
gmx::ArrayRef<gmx::RVec> forceOutput)
{
// update the coordinates in the backend
gmx::ArrayRef<gmx::RVec> forceOutput)
{
// update the coordinates in the backend
- nbv_->convertCoordinates(gmx::AtomLocality::Local, false, coordinateInput);
+ nbv_->convertCoordinates(gmx::AtomLocality::Local, coordinateInput);
nbv_->dispatchNonbondedKernel(gmx::InteractionLocality::Local,
*interactionConst_,
nbv_->dispatchNonbondedKernel(gmx::InteractionLocality::Local,
*interactionConst_,
if (stepWork.useGpuXBufferOps)
{
GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded");
if (stepWork.useGpuXBufferOps)
{
GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded");
- nbv->convertCoordinatesGpu(
- AtomLocality::Local, false, stateGpu->getCoordinates(), localXReadyOnDevice);
+ nbv->convertCoordinatesGpu(AtomLocality::Local, stateGpu->getCoordinates(), localXReadyOnDevice);
"a wait should only be triggered if copy has been scheduled");
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
"a wait should only be triggered if copy has been scheduled");
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
- nbv->convertCoordinates(AtomLocality::Local, false, x.unpaddedArrayRef());
+ nbv->convertCoordinates(AtomLocality::Local, x.unpaddedArrayRef());
stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
}
nbv->convertCoordinatesGpu(AtomLocality::NonLocal,
stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
}
nbv->convertCoordinatesGpu(AtomLocality::NonLocal,
stateGpu->getCoordinates(),
stateGpu->getCoordinatesReadyOnDeviceEvent(
AtomLocality::NonLocal, simulationWork, stepWork));
}
else
{
stateGpu->getCoordinates(),
stateGpu->getCoordinatesReadyOnDeviceEvent(
AtomLocality::NonLocal, simulationWork, stepWork));
}
else
{
- nbv->convertCoordinates(AtomLocality::NonLocal, false, x.unpaddedArrayRef());
+ nbv->convertCoordinates(AtomLocality::NonLocal, x.unpaddedArrayRef());
}
/* Note: NonLocal refers to the inserted molecule */
}
/* Note: NonLocal refers to the inserted molecule */
- fr->nbv->convertCoordinates(AtomLocality::NonLocal, false, x);
+ fr->nbv->convertCoordinates(AtomLocality::NonLocal, x);
/* Clear some matrix variables */
clear_mat(force_vir);
/* Clear some matrix variables */
clear_mat(force_vir);
/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
const gmx::AtomLocality locality,
/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
const gmx::AtomLocality locality,
const rvec* coordinates,
nbnxn_atomdata_t* nbat)
{
const rvec* coordinates,
nbnxn_atomdata_t* nbat)
{
int gridEnd = 0;
getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
int gridEnd = 0;
getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
- if (fillLocal)
- {
- nbat->natoms_local = gridSet.grids()[0].atomIndexEnd();
- }
-
const int nth = gmx_omp_nthreads_get(emntPairsearch);
#pragma omp parallel for num_threads(nth) schedule(static)
for (int th = 0; th < nth; th++)
const int nth = gmx_omp_nthreads_get(emntPairsearch);
#pragma omp parallel for num_threads(nth) schedule(static)
for (int th = 0; th < nth; th++)
const int na = grid.numAtomsInColumn(cxy);
const int ash = grid.firstAtomInColumn(cxy);
const int na = grid.numAtomsInColumn(cxy);
const int ash = grid.firstAtomInColumn(cxy);
- const bool mustFillPadding = (g == 0 && fillLocal);
- /* When false, we fill only the real particle locations.
- * We assume the filling entries at the end have been
- * properly set before during pair-list generation.
- */
- const int na_fill = mustFillPadding ? grid.paddedNumAtomsInColumn(cxy) : na;
-
copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash,
na,
copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash,
na,
coordinates,
nbat->XFormat,
nbat->x().data(),
coordinates,
nbat->XFormat,
nbat->x().data(),
/* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
const gmx::AtomLocality locality,
/* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
const gmx::AtomLocality locality,
NbnxmGpu* gpu_nbv,
DeviceBuffer<RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice)
NbnxmGpu* gpu_nbv,
DeviceBuffer<RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice)
for (int g = gridBegin; g < gridEnd; g++)
{
for (int g = gridBegin; g < gridEnd; g++)
{
- nbnxn_gpu_x_to_nbat_x(gridSet.grids()[g],
- fillLocal && g == 0,
- gpu_nbv,
- d_x,
- xReadyOnDevice,
- locality,
- g,
- gridSet.numColumnsMax());
+ nbnxn_gpu_x_to_nbat_x(
+ gridSet.grids()[g], gpu_nbv, d_x, xReadyOnDevice, locality, g, gridSet.numColumnsMax());
*
* \param[in] gridSet The grids data.
* \param[in] locality If the transformation should be applied to local or non local coordinates.
*
* \param[in] gridSet The grids data.
* \param[in] locality If the transformation should be applied to local or non local coordinates.
- * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed.
* \param[in] coordinates Coordinates in plain rvec format.
* \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer.
*/
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
gmx::AtomLocality locality,
* \param[in] coordinates Coordinates in plain rvec format.
* \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer.
*/
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
gmx::AtomLocality locality,
const rvec* coordinates,
nbnxn_atomdata_t* nbat);
const rvec* coordinates,
nbnxn_atomdata_t* nbat);
*
* \param[in] gridSet The grids data.
* \param[in] locality If the transformation should be applied to local or non local coordinates.
*
* \param[in] gridSet The grids data.
* \param[in] locality If the transformation should be applied to local or non local coordinates.
- * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed.
* \param[in,out] gpu_nbv The NBNXM GPU data structure.
* \param[in] d_x Coordinates to be copied (in plain rvec format).
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
*/
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
gmx::AtomLocality locality,
* \param[in,out] gpu_nbv The NBNXM GPU data structure.
* \param[in] d_x Coordinates to be copied (in plain rvec format).
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
*/
void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
gmx::AtomLocality locality,
NbnxmGpu* gpu_nbv,
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice);
NbnxmGpu* gpu_nbv,
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice);
/*
* This file is part of the GROMACS molecular simulation package.
*
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* \param[in] numColumns Extent of cell-level parallelism.
* \param[out] gm_xq Coordinates buffer in nbnxm layout.
* \param[in] numColumns Extent of cell-level parallelism.
* \param[out] gm_xq Coordinates buffer in nbnxm layout.
- * \tparam setFillerCoords Whether to set the coordinates of the filler particles.
* \param[in] gm_x Coordinates buffer.
* \param[in] gm_atomIndex Atom index mapping.
* \param[in] gm_numAtoms Array of number of atoms.
* \param[in] gm_x Coordinates buffer.
* \param[in] gm_atomIndex Atom index mapping.
* \param[in] gm_numAtoms Array of number of atoms.
* \param[in] cellOffset First cell.
* \param[in] numAtomsPerCell Number of atoms per cell.
*/
* \param[in] cellOffset First cell.
* \param[in] numAtomsPerCell Number of atoms per cell.
*/
-template<bool setFillerCoords>
static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
float4* __restrict__ gm_xq,
const float3* __restrict__ gm_x,
static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
float4* __restrict__ gm_xq,
const float3* __restrict__ gm_x,
const int numAtoms = gm_numAtoms[cxy];
const int offset = (cellOffset + gm_cellIndex[cxy]) * numAtomsPerCell;
const int numAtoms = gm_numAtoms[cxy];
const int offset = (cellOffset + gm_cellIndex[cxy]) * numAtomsPerCell;
- int numAtomsRounded;
- if (setFillerCoords)
- {
- // TODO: This can be done more efficiently
- numAtomsRounded = (gm_cellIndex[cxy + 1] - gm_cellIndex[cxy]) * numAtomsPerCell;
- }
- else
- {
- // We fill only the real particle locations.
- // We assume the filling entries at the end have been
- // properly set before during pair-list generation.
- numAtomsRounded = numAtoms;
- }
const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
float3* gm_xqDest = (float3*)&gm_xq[threadIndex + offset];
// Perform layout conversion of each element.
float3* gm_xqDest = (float3*)&gm_xq[threadIndex + offset];
// Perform layout conversion of each element.
- if (threadIndex < numAtomsRounded)
+ if (threadIndex < numAtoms)
{
if (threadIndex < numAtoms)
{
{
if (threadIndex < numAtoms)
{
/* X buffer operations on GPU: performs conversion from rvec to nb format. */
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid,
/* X buffer operations on GPU: performs conversion from rvec to nb format. */
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid,
NbnxmGpu* nb,
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice,
NbnxmGpu* nb,
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice,
"Can not have empty grid, early return above avoids this");
config.sharedMemorySize = 0;
"Can not have empty grid, early return above avoids this");
config.sharedMemorySize = 0;
- auto kernelFn = setFillerCoords ? nbnxn_gpu_x_to_nbat_x_kernel<true>
- : nbnxn_gpu_x_to_nbat_x_kernel<false>;
+ auto kernelFn = nbnxn_gpu_x_to_nbat_x_kernel;
float4* d_xq = adat->xq;
float3* d_xFloat3 = asFloat3(d_x);
const int* d_atomIndices = nb->atomIndices;
float4* d_xq = adat->xq;
float3* d_xFloat3 = asFloat3(d_x);
const int* d_atomIndices = nb->atomIndices;
}
void nonbonded_verlet_t::convertCoordinates(const gmx::AtomLocality locality,
}
void nonbonded_verlet_t::convertCoordinates(const gmx::AtomLocality locality,
gmx::ArrayRef<const gmx::RVec> coordinates)
{
wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
nbnxn_atomdata_copy_x_to_nbat_x(
gmx::ArrayRef<const gmx::RVec> coordinates)
{
wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
nbnxn_atomdata_copy_x_to_nbat_x(
- pairSearch_->gridSet(), locality, fillLocal, as_rvec_array(coordinates.data()), nbat.get());
+ pairSearch_->gridSet(), locality, as_rvec_array(coordinates.data()), nbat.get());
wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
}
void nonbonded_verlet_t::convertCoordinatesGpu(const gmx::AtomLocality locality,
wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
}
void nonbonded_verlet_t::convertCoordinatesGpu(const gmx::AtomLocality locality,
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice)
{
wallcycle_start(wcycle_, ewcLAUNCH_GPU);
wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS);
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice)
{
wallcycle_start(wcycle_, ewcLAUNCH_GPU);
wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS);
- nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, fillLocal, gpu_nbv, d_x, xReadyOnDevice);
+ nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, gpu_nbv, d_x, xReadyOnDevice);
wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS);
wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS);
wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
* The API function for the transformation of the coordinates from one layout to another.
*
* \param[in] locality Whether coordinates for local or non-local atoms should be
* The API function for the transformation of the coordinates from one layout to another.
*
* \param[in] locality Whether coordinates for local or non-local atoms should be
- * transformed. \param[in] fillLocal If the coordinates for filler particles should be
- * zeroed. \param[in] coordinates Coordinates in plain rvec format to be transformed.
+ * transformed. \param[in] coordinates Coordinates in plain rvec format to be transformed.
- void convertCoordinates(gmx::AtomLocality locality, bool fillLocal, gmx::ArrayRef<const gmx::RVec> coordinates);
+ void convertCoordinates(gmx::AtomLocality locality, gmx::ArrayRef<const gmx::RVec> coordinates);
/*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality
*
* The API function for the transformation of the coordinates from one layout to another in the GPU memory.
*
* \param[in] locality Whether coordinates for local or non-local atoms should be transformed.
/*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality
*
* The API function for the transformation of the coordinates from one layout to another in the GPU memory.
*
* \param[in] locality Whether coordinates for local or non-local atoms should be transformed.
- * \param[in] fillLocal If the coordinates for filler particles should be zeroed.
* \param[in] d_x GPU coordinates buffer in plain rvec format to be transformed.
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
*/
void convertCoordinatesGpu(gmx::AtomLocality locality,
* \param[in] d_x GPU coordinates buffer in plain rvec format to be transformed.
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
*/
void convertCoordinatesGpu(gmx::AtomLocality locality,
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice);
DeviceBuffer<gmx::RVec> d_x,
GpuEventSynchronizer* xReadyOnDevice);
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
*
* \param[in] grid Grid to be converted.
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
*
* \param[in] grid Grid to be converted.
- * \param[in] setFillerCoords If the filler coordinates are used.
* \param[in,out] gpu_nbv The nonbonded data GPU structure.
* \param[in] d_x Device-side coordinates in plain rvec format.
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in
* \param[in,out] gpu_nbv The nonbonded data GPU structure.
* \param[in] d_x Device-side coordinates in plain rvec format.
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in
*/
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
*/
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
- bool gmx_unused setFillerCoords,
NbnxmGpu gmx_unused* gpu_nbv,
DeviceBuffer<gmx::RVec> gmx_unused d_x,
GpuEventSynchronizer gmx_unused* xReadyOnDevice,
NbnxmGpu gmx_unused* gpu_nbv,
DeviceBuffer<gmx::RVec> gmx_unused d_x,
GpuEventSynchronizer gmx_unused* xReadyOnDevice,