Pipeline GPU PME Spline/Spread with PP Comms

[alexxy/gromacs.git] / src / gromacs / ewald / pme.h
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 1e32caf265bbd2feb25a20c31d93bcd884248d53..f8653e20dda06838fb2647cc432ca8f23eb8ae56 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -75,6 +75,19 @@ enum class GpuTaskCompletion;
  class PmeGpuProgram;
  class GpuEventSynchronizer;
  
+/*! \brief Hack to selectively enable some parts of PME during unit testing.
+ *
+ * Set to \c false by default. If any of the tests sets it to \c true, it will
+ * make the compatibility check consider PME to be supported in SYCL builds.
+ *
+ * Currently we don't have proper PME implementation with SYCL, but we still want
+ * to run tests for some of the kernels.
+ *
+ * \todo Remove after #3927 is done and PME is fully enabled in SYCL builds.
+ */
+//NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+extern bool g_allowPmeWithSyclForTesting;
+
  namespace gmx
  {
  template<typename>
@@ -106,6 +119,7 @@ private:
      std::vector<std::string> reasons_;
  };
  
+class PmeCoordinateReceiverGpu;
  } // namespace gmx
  
  enum
@@ -360,17 +374,24 @@ GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t*     GPU_FUNC_ARGU
  /*! \brief
   * Launches first stage of PME on GPU - spreading kernel.
   *
- * \param[in] pme                The PME data structure.
- * \param[in] xReadyOnDevice     Event synchronizer indicating that the coordinates
- * are ready in the device memory; nullptr allowed only on separate PME ranks.
- * \param[in] wcycle             The wallclock counter.
- * \param[in] lambdaQ            The Coulomb lambda of the current state of the
- * system. Only used if FEP of Coulomb is active.
+ * \param[in] pme                            The PME data structure.
+ * \param[in] xReadyOnDevice                 Event synchronizer indicating that the coordinates
+ *                                           are ready in the device memory; nullptr allowed only
+ *                                           on separate PME ranks.
+ * \param[in] wcycle                         The wallclock counter.
+ * \param[in] lambdaQ                        The Coulomb lambda of the current state of the
+ *                                           system. Only used if FEP of Coulomb is active.
+ * \param[in] useGpuDirectComm               Whether direct GPU PME-PP communication is active
+ * \param[in]  pmeCoordinateReceiverGpu      Coordinate receiver object, which must be valid when
+ *                                           direct GPU PME-PP communication is active
   */
-GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t*            GPU_FUNC_ARGUMENT(pme),
-                                              GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
-                                              gmx_wallcycle*        GPU_FUNC_ARGUMENT(wcycle),
-                                              real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(
+        gmx_pme_t*                     GPU_FUNC_ARGUMENT(pme),
+        GpuEventSynchronizer*          GPU_FUNC_ARGUMENT(xReadyOnDevice),
+        gmx_wallcycle*                 GPU_FUNC_ARGUMENT(wcycle),
+        real                           GPU_FUNC_ARGUMENT(lambdaQ),
+        const bool                     GPU_FUNC_ARGUMENT(useGpuDirectComm),
+        gmx::PmeCoordinateReceiverGpu* GPU_FUNC_ARGUMENT(pmeCoordinateReceiverGpu)) GPU_FUNC_TERM;
  
  /*! \brief
   * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.