{
class GpuBonded;
class StepWorkload;
-}
+} // namespace gmx
namespace Nbnxm
{
* \param [in] aloc Atom locality flag.
*/
GPU_FUNC_QUALIFIER
-void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused *nb,
- const struct nbnxn_atomdata_t gmx_unused *nbdata,
- gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
+void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused* nb,
+ const struct nbnxn_atomdata_t gmx_unused* nbdata,
+ gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the nonbonded force calculations.
*
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused *nb,
- const gmx::StepWorkload gmx_unused &stepWork,
+void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused* nb,
+ const gmx::StepWorkload gmx_unused& stepWork,
gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
/*! \brief
* \param [in] numParts Number of parts the pair list is split into in the rolling kernel.
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused *nb,
- gmx::InteractionLocality gmx_unused iloc,
- int gmx_unused numParts) GPU_FUNC_TERM;
+void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused* nb,
+ gmx::InteractionLocality gmx_unused iloc,
+ int gmx_unused numParts) GPU_FUNC_TERM;
/*! \brief
* Launch asynchronously the download of short-range forces from the GPU
* (and energies/shift forces if required).
*/
GPU_FUNC_QUALIFIER
-void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb,
- nbnxn_atomdata_t gmx_unused *nbatom,
- const gmx::StepWorkload gmx_unused &stepWork,
- gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
+void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused* nb,
+ nbnxn_atomdata_t gmx_unused* nbatom,
+ const gmx::StepWorkload gmx_unused& stepWork,
+ gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
/*! \brief Attempts to complete nonbonded GPU task.
*
* \returns True if the nonbonded tasks associated with \p aloc locality have completed
*/
GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
- const gmx::StepWorkload gmx_unused &stepWork,
- gmx::AtomLocality gmx_unused aloc,
- real gmx_unused *e_lj,
- real gmx_unused *e_el,
- gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
- GpuTaskCompletion gmx_unused completionKind,
- gmx_wallcycle gmx_unused *wcycle) GPU_FUNC_TERM_WITH_RETURN(false);
+bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
+ const gmx::StepWorkload gmx_unused& stepWork,
+ gmx::AtomLocality gmx_unused aloc,
+ real gmx_unused* e_lj,
+ real gmx_unused* e_el,
+ gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
+ GpuTaskCompletion gmx_unused completionKind,
+ gmx_wallcycle gmx_unused* wcycle) GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Completes the nonbonded GPU task blocking until GPU tasks and data
* transfers to finish.
* \param[out] shiftForces Shift forces buffer to accumulate into
* \param[out] wcycle Pointer to wallcycle data structure */
GPU_FUNC_QUALIFIER
-float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
- const gmx::StepWorkload gmx_unused &stepWork,
- gmx::AtomLocality gmx_unused aloc,
- real gmx_unused *e_lj,
- real gmx_unused *e_el,
- gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
- gmx_wallcycle gmx_unused *wcycle) GPU_FUNC_TERM_WITH_RETURN(0.0);
+float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
+ const gmx::StepWorkload gmx_unused& stepWork,
+ gmx::AtomLocality gmx_unused aloc,
+ real gmx_unused* e_lj,
+ real gmx_unused* e_el,
+ gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
+ gmx_wallcycle gmx_unused* wcycle) GPU_FUNC_TERM_WITH_RETURN(0.0);
/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
GPU_FUNC_QUALIFIER
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused &ic) GPU_FUNC_TERM_WITH_RETURN(-1);
+int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic)
+ GPU_FUNC_TERM_WITH_RETURN(-1);
/*! \brief Initialization for X buffer operations on GPU.
* Called on the NS step and performs (re-)allocations and memory copies. !*/
CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused &gridSet,
- gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM;
+void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
+ gmx_nbnxn_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
*
* \param[in] setFillerCoords If the filler coordinates are used.
* \param[in,out] gpu_nbv The nonbonded data GPU structure.
* \param[in] d_x Device-side coordinates in plain rvec format.
- * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
- * \param[in] locality Copy coordinates for local or non-local atoms.
+ * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in
+ * the device memory. \param[in] locality Copy coordinates for local or non-local atoms.
* \param[in] gridId Index of the grid being converted.
* \param[in] numColumnsMax Maximum number of columns in the grid.
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused &grid,
- bool gmx_unused setFillerCoords,
- gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
- DeviceBuffer<float> gmx_unused d_x,
- GpuEventSynchronizer gmx_unused *xReadyOnDevice,
- gmx::AtomLocality gmx_unused locality,
- int gmx_unused gridId,
- int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
+ bool gmx_unused setFillerCoords,
+ gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+ DeviceBuffer<float> gmx_unused d_x,
+ GpuEventSynchronizer gmx_unused* xReadyOnDevice,
+ gmx::AtomLocality gmx_unused locality,
+ int gmx_unused gridId,
+ int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
/*! \brief Sync the nonlocal stream with dependent tasks in the local queue.
* \param[in] nb The nonbonded data GPU structure
* \param[in] interactionLocality Local or NonLocal sync point
*/
CUDA_FUNC_QUALIFIER
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused *nb,
- gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
+void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused* nb,
+ gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
/*! \brief Set up internal flags that indicate what type of short-range work there is.
*
* \param[in] iLocality Interaction locality identifier
*/
GPU_FUNC_QUALIFIER
-void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused *nb,
- const gmx::GpuBonded gmx_unused *gpuBonded,
- gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
+void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused* nb,
+ const gmx::GpuBonded gmx_unused* gpuBonded,
+ gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
/*! \brief Returns true if there is GPU short-range work for the given atom locality.
*
* \param[in] aLocality Atom locality identifier
*/
GPU_FUNC_QUALIFIER
-bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused *nb,
- gmx::AtomLocality gmx_unused aLocality) GPU_FUNC_TERM_WITH_RETURN(false);
+bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
+ GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Initialization for F buffer operations on GPU */
CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused *cell,
- gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
- int gmx_unused natoms_total,
- GpuEventSynchronizer gmx_unused *localReductionDone) CUDA_FUNC_TERM;
+void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
+ gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+ int gmx_unused natoms_total,
+ GpuEventSynchronizer gmx_unused* localReductionDone) CUDA_FUNC_TERM;
/*! \brief Force buffer operations on GPU.
*
*
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
- DeviceBuffer<float> gmx_unused totalForcesDevice,
- gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
- void gmx_unused *pmeForcesDevice,
- gmx::ArrayRef<GpuEventSynchronizer* const> gmx_unused dependencyList,
- int gmx_unused atomStart,
- int gmx_unused numAtoms,
- bool gmx_unused useGpuFPmeReduction,
- bool gmx_unused accumulateForce) CUDA_FUNC_TERM;
+void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
+ DeviceBuffer<float> gmx_unused totalForcesDevice,
+ gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+ void gmx_unused* pmeForcesDevice,
+ gmx::ArrayRef<GpuEventSynchronizer* const> gmx_unused dependencyList,
+ int gmx_unused atomStart,
+ int gmx_unused numAtoms,
+ bool gmx_unused useGpuFPmeReduction,
+ bool gmx_unused accumulateForce) CUDA_FUNC_TERM;
/*! \brief sync CPU thread on coordinate copy to device
* \param[in] nb The nonbonded data GPU structure
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM;
+void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
/*! \brief Wait for non-local copy of coordinate buffer from device to host
* \param[in] nb The nonbonded data GPU structure
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM;
+void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
/*! \brief Ensure local stream waits for non-local stream
* \param[in] nb The nonbonded data GPU structure
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM;
+void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
} // namespace Nbnxm
#endif