/*! \libinternal \brief
* A GPU spline computation and charge spreading function.
*
- * \param[in] pmeGpu The PME GPU structure.
- * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory;
- * can be nullptr when invoked on a separate PME rank or from PME tests.
- * \param[out] h_grids The host-side grid buffers (used only if the result of the spread is expected on the host,
- * e.g. testing or host-side FFT)
- * \param[in] computeSplines Should the computation of spline parameters and gridline indices be performed.
- * \param[in] spreadCharges Should the charges/coefficients be spread on the grid.
- * \param[in] lambda The lambda value of the current system state.
- */
-GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
- GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
- float** GPU_FUNC_ARGUMENT(h_grids),
- bool GPU_FUNC_ARGUMENT(computeSplines),
- bool GPU_FUNC_ARGUMENT(spreadCharges),
- real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
+ * \param[in] pmeGpu The PME GPU structure.
+ * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are
+ * ready in the device memory; can be nullptr when invoked
+ * on a separate PME rank or from PME tests.
+ * \param[out] h_grids The host-side grid buffers (used only if the result
+ * of the spread is expected on the host, e.g. testing
+ * or host-side FFT)
+ * \param[in] computeSplines Should the computation of spline parameters and gridline
+ * indices be performed.
+ * \param[in] spreadCharges Should the charges/coefficients be spread on the grid.
+ * \param[in] lambda The lambda value of the current system state.
+ * \param[in] useGpuDirectComm Whether direct GPU PME-PP communication is active
+ * \param[in] pmeCoordinateReceiverGpu Coordinate receiver object, which must be valid when
+ * direct GPU PME-PP communication is active
+ */
+GPU_FUNC_QUALIFIER void
+pme_gpu_spread(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
+ GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
+ float** GPU_FUNC_ARGUMENT(h_grids),
+ bool GPU_FUNC_ARGUMENT(computeSplines),
+ bool GPU_FUNC_ARGUMENT(spreadCharges),
+ real GPU_FUNC_ARGUMENT(lambda),
+ const bool GPU_FUNC_ARGUMENT(useGpuDirectComm),
+ gmx::PmeCoordinateReceiverGpu* GPU_FUNC_ARGUMENT(pmeCoordinateReceiverGpu)) GPU_FUNC_TERM;
/*! \libinternal \brief
* 3D FFT R2C/C2R routine.