From 556f83fe2b7d246ab8dcce41a06323a7a203da14 Mon Sep 17 00:00:00 2001 From: Joe Jordan Date: Wed, 10 Mar 2021 12:37:49 +0000 Subject: [PATCH] Remove param fillLocal, which was always false, from nbnxm call stack Several functions in nbnxm took a bool to determine whether to fill local atoms with zeros, but the parameter was always set to false. This is now removed from the function signatures, and some code is slightly simplified. --- api/nblib/gmxcalculator.cpp | 4 +-- src/gromacs/mdlib/sim_util.cpp | 8 +++--- src/gromacs/mdrun/tpi.cpp | 2 +- src/gromacs/nbnxm/atomdata.cpp | 26 +++---------------- src/gromacs/nbnxm/atomdata.h | 4 --- .../nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh | 19 ++------------ src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 4 +-- src/gromacs/nbnxm/nbnxm.cpp | 6 ++--- src/gromacs/nbnxm/nbnxm.h | 7 ++--- src/gromacs/nbnxm/nbnxm_gpu.h | 2 -- 10 files changed, 16 insertions(+), 66 deletions(-) diff --git a/api/nblib/gmxcalculator.cpp b/api/nblib/gmxcalculator.cpp index d4c8a1f417..12ab681d5e 100644 --- a/api/nblib/gmxcalculator.cpp +++ b/api/nblib/gmxcalculator.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2020, by the GROMACS development team, led by + * Copyright (c) 2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -71,7 +71,7 @@ void GmxForceCalculator::compute(gmx::ArrayRef coordinateInput, gmx::ArrayRef forceOutput) { // update the coordinates in the backend - nbv_->convertCoordinates(gmx::AtomLocality::Local, false, coordinateInput); + nbv_->convertCoordinates(gmx::AtomLocality::Local, coordinateInput); nbv_->dispatchNonbondedKernel(gmx::InteractionLocality::Local, *interactionConst_, diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 0bcf0c5451..80190c6fa8 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1465,8 +1465,7 @@ void do_force(FILE* fplog, if (stepWork.useGpuXBufferOps) { GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded"); - nbv->convertCoordinatesGpu( - AtomLocality::Local, false, stateGpu->getCoordinates(), localXReadyOnDevice); + nbv->convertCoordinatesGpu(AtomLocality::Local, stateGpu->getCoordinates(), localXReadyOnDevice); } else { @@ -1477,7 +1476,7 @@ void do_force(FILE* fplog, "a wait should only be triggered if copy has been scheduled"); stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local); } - nbv->convertCoordinates(AtomLocality::Local, false, x.unpaddedArrayRef()); + nbv->convertCoordinates(AtomLocality::Local, x.unpaddedArrayRef()); } } @@ -1579,14 +1578,13 @@ void do_force(FILE* fplog, stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal); } nbv->convertCoordinatesGpu(AtomLocality::NonLocal, - false, stateGpu->getCoordinates(), stateGpu->getCoordinatesReadyOnDeviceEvent( AtomLocality::NonLocal, simulationWork, stepWork)); } else { - nbv->convertCoordinates(AtomLocality::NonLocal, false, x.unpaddedArrayRef()); + nbv->convertCoordinates(AtomLocality::NonLocal, x.unpaddedArrayRef()); } } diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp index debaef5c31..5c43d0bc9d 100644 --- a/src/gromacs/mdrun/tpi.cpp +++ b/src/gromacs/mdrun/tpi.cpp @@ -749,7 +749,7 @@ void LegacySimulator::do_tpi() } /* Note: NonLocal refers to the inserted molecule */ - fr->nbv->convertCoordinates(AtomLocality::NonLocal, false, x); + fr->nbv->convertCoordinates(AtomLocality::NonLocal, x); /* Clear some matrix variables */ clear_mat(force_vir); diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index b1559ef349..4048bea672 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -985,7 +985,6 @@ static void getAtomRanges(const Nbnxm::GridSet& gridSet, /* Copies (and reorders) the coordinates to nbnxn_atomdata_t */ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet, const gmx::AtomLocality locality, - bool fillLocal, const rvec* coordinates, nbnxn_atomdata_t* nbat) { @@ -994,11 +993,6 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet, int gridEnd = 0; getAtomRanges(gridSet, locality, &gridBegin, &gridEnd); - if (fillLocal) - { - nbat->natoms_local = gridSet.grids()[0].atomIndexEnd(); - } - const int nth = gmx_omp_nthreads_get(emntPairsearch); #pragma omp parallel for num_threads(nth) schedule(static) for (int th = 0; th < nth; th++) @@ -1018,16 +1012,9 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet, const int na = grid.numAtomsInColumn(cxy); const int ash = grid.firstAtomInColumn(cxy); - const bool mustFillPadding = (g == 0 && fillLocal); - /* When false, we fill only the real particle locations. - * We assume the filling entries at the end have been - * properly set before during pair-list generation. - */ - const int na_fill = mustFillPadding ? grid.paddedNumAtomsInColumn(cxy) : na; - copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash, na, - na_fill, + na, coordinates, nbat->XFormat, nbat->x().data(), @@ -1042,7 +1029,6 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet, /* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/ void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet, const gmx::AtomLocality locality, - bool fillLocal, NbnxmGpu* gpu_nbv, DeviceBuffer d_x, GpuEventSynchronizer* xReadyOnDevice) @@ -1054,14 +1040,8 @@ void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet, for (int g = gridBegin; g < gridEnd; g++) { - nbnxn_gpu_x_to_nbat_x(gridSet.grids()[g], - fillLocal && g == 0, - gpu_nbv, - d_x, - xReadyOnDevice, - locality, - g, - gridSet.numColumnsMax()); + nbnxn_gpu_x_to_nbat_x( + gridSet.grids()[g], gpu_nbv, d_x, xReadyOnDevice, locality, g, gridSet.numColumnsMax()); } } diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index 30bcec06a8..39a46794fa 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -345,13 +345,11 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, rvec* shift_vec, nbnxn_a * * \param[in] gridSet The grids data. * \param[in] locality If the transformation should be applied to local or non local coordinates. - * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed. * \param[in] coordinates Coordinates in plain rvec format. * \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer. */ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx::AtomLocality locality, - bool fillLocal, const rvec* coordinates, nbnxn_atomdata_t* nbat); @@ -362,14 +360,12 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet, * * \param[in] gridSet The grids data. * \param[in] locality If the transformation should be applied to local or non local coordinates. - * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed. * \param[in,out] gpu_nbv The NBNXM GPU data structure. * \param[in] d_x Coordinates to be copied (in plain rvec format). * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory. */ void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet, gmx::AtomLocality locality, - bool fillLocal, NbnxmGpu* gpu_nbv, DeviceBuffer d_x, GpuEventSynchronizer* xReadyOnDevice); diff --git a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh index 9a9ffc6c1c..2df2a14ea2 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh +++ b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -53,7 +53,6 @@ * \param[in] numColumns Extent of cell-level parallelism. * \param[out] gm_xq Coordinates buffer in nbnxm layout. - * \tparam setFillerCoords Whether to set the coordinates of the filler particles. * \param[in] gm_x Coordinates buffer. * \param[in] gm_atomIndex Atom index mapping. * \param[in] gm_numAtoms Array of number of atoms. @@ -61,7 +60,6 @@ * \param[in] cellOffset First cell. * \param[in] numAtomsPerCell Number of atoms per cell. */ -template static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns, float4* __restrict__ gm_xq, const float3* __restrict__ gm_x, @@ -83,19 +81,6 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns, const int numAtoms = gm_numAtoms[cxy]; const int offset = (cellOffset + gm_cellIndex[cxy]) * numAtomsPerCell; - int numAtomsRounded; - if (setFillerCoords) - { - // TODO: This can be done more efficiently - numAtomsRounded = (gm_cellIndex[cxy + 1] - gm_cellIndex[cxy]) * numAtomsPerCell; - } - else - { - // We fill only the real particle locations. - // We assume the filling entries at the end have been - // properly set before during pair-list generation. - numAtomsRounded = numAtoms; - } const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; @@ -104,7 +89,7 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns, float3* gm_xqDest = (float3*)&gm_xq[threadIndex + offset]; // Perform layout conversion of each element. - if (threadIndex < numAtomsRounded) + if (threadIndex < numAtoms) { if (threadIndex < numAtoms) { diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 6ed6c1ff78..43bf518ad9 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -839,7 +839,6 @@ void cuda_set_cacheconfig() /* X buffer operations on GPU: performs conversion from rvec to nb format. */ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid, - bool setFillerCoords, NbnxmGpu* nb, DeviceBuffer d_x, GpuEventSynchronizer* xReadyOnDevice, @@ -881,8 +880,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid, "Can not have empty grid, early return above avoids this"); config.sharedMemorySize = 0; - auto kernelFn = setFillerCoords ? nbnxn_gpu_x_to_nbat_x_kernel - : nbnxn_gpu_x_to_nbat_x_kernel; + auto kernelFn = nbnxn_gpu_x_to_nbat_x_kernel; float4* d_xq = adat->xq; float3* d_xFloat3 = asFloat3(d_x); const int* d_atomIndices = nb->atomIndices; diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp index 96714bb154..a8e919ea01 100644 --- a/src/gromacs/nbnxm/nbnxm.cpp +++ b/src/gromacs/nbnxm/nbnxm.cpp @@ -145,28 +145,26 @@ void nonbonded_verlet_t::setAtomProperties(gmx::ArrayRef atomTypes, } void nonbonded_verlet_t::convertCoordinates(const gmx::AtomLocality locality, - const bool fillLocal, gmx::ArrayRef coordinates) { wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS); wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS); nbnxn_atomdata_copy_x_to_nbat_x( - pairSearch_->gridSet(), locality, fillLocal, as_rvec_array(coordinates.data()), nbat.get()); + pairSearch_->gridSet(), locality, as_rvec_array(coordinates.data()), nbat.get()); wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS); wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS); } void nonbonded_verlet_t::convertCoordinatesGpu(const gmx::AtomLocality locality, - const bool fillLocal, DeviceBuffer d_x, GpuEventSynchronizer* xReadyOnDevice) { wallcycle_start(wcycle_, ewcLAUNCH_GPU); wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS); - nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, fillLocal, gpu_nbv, d_x, xReadyOnDevice); + nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, gpu_nbv, d_x, xReadyOnDevice); wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NB_X_BUF_OPS); wallcycle_stop(wcycle_, ewcLAUNCH_GPU); diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index 7ea59e4e5a..30380ebeb2 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -323,22 +323,19 @@ public: * The API function for the transformation of the coordinates from one layout to another. * * \param[in] locality Whether coordinates for local or non-local atoms should be - * transformed. \param[in] fillLocal If the coordinates for filler particles should be - * zeroed. \param[in] coordinates Coordinates in plain rvec format to be transformed. + * transformed. \param[in] coordinates Coordinates in plain rvec format to be transformed. */ - void convertCoordinates(gmx::AtomLocality locality, bool fillLocal, gmx::ArrayRef coordinates); + void convertCoordinates(gmx::AtomLocality locality, gmx::ArrayRef coordinates); /*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality * * The API function for the transformation of the coordinates from one layout to another in the GPU memory. * * \param[in] locality Whether coordinates for local or non-local atoms should be transformed. - * \param[in] fillLocal If the coordinates for filler particles should be zeroed. * \param[in] d_x GPU coordinates buffer in plain rvec format to be transformed. * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory. */ void convertCoordinatesGpu(gmx::AtomLocality locality, - bool fillLocal, DeviceBuffer d_x, GpuEventSynchronizer* xReadyOnDevice); diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index fe83696934..fec64bbe68 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -240,7 +240,6 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet, /*! \brief X buffer operations on GPU: performs conversion from rvec to nb format. * * \param[in] grid Grid to be converted. - * \param[in] setFillerCoords If the filler coordinates are used. * \param[in,out] gpu_nbv The nonbonded data GPU structure. * \param[in] d_x Device-side coordinates in plain rvec format. * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in @@ -251,7 +250,6 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet, */ CUDA_FUNC_QUALIFIER void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid, - bool gmx_unused setFillerCoords, NbnxmGpu gmx_unused* gpu_nbv, DeviceBuffer gmx_unused d_x, GpuEventSynchronizer gmx_unused* xReadyOnDevice, -- 2.22.0