Apply clang-format to source tree

[alexxy/gromacs.git] / src / gromacs / ewald / pme.h
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 507af05bc20c658f584a519925bbf35d4cc91867..c7efd52f9911829d9485660c79a7661452de53c1 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -77,7 +77,7 @@ enum class GpuTaskCompletion;
  class PmeGpuProgram;
  class GpuEventSynchronizer;
  //! Convenience name.
-using PmeGpuProgramHandle = const PmeGpuProgram *;
+using PmeGpuProgramHandle = const PmeGpuProgram*;
  
  namespace gmx
  {
@@ -85,10 +85,12 @@ class PmePpCommGpu;
  class ForceWithVirial;
  class MDLogger;
  enum class PinningPolicy : int;
-}
+} // namespace gmx
  
-enum {
-    GMX_SUM_GRID_FORWARD, GMX_SUM_GRID_BACKWARD
+enum
+{
+    GMX_SUM_GRID_FORWARD,
+    GMX_SUM_GRID_BACKWARD
  };
  
  /*! \brief Possible PME codepaths on a rank.
@@ -96,10 +98,10 @@ enum {
   */
  enum class PmeRunMode
  {
-    None,    //!< No PME task is done
-    CPU,     //!< Whole PME computation is done on CPU
-    GPU,     //!< Whole PME computation is done on GPU
-    Mixed,   //!< Mixed mode: only spread and gather run on GPU; FFT and solving are done on CPU.
+    None,  //!< No PME task is done
+    CPU,   //!< Whole PME computation is done on CPU
+    GPU,   //!< Whole PME computation is done on GPU
+    Mixed, //!< Mixed mode: only spread and gather run on GPU; FFT and solving are done on CPU.
  };
  
  //! PME gathering output forces treatment
@@ -124,9 +126,11 @@ int minimalPmeGridSize(int pmeOrder);
   *
   * The PME GPU restrictions are checked separately during pme_gpu_init().
   */
-bool gmx_pme_check_restrictions(int pme_order,
-                                int nkx, int nky, int nkz,
-                                int numPmeDomainsAlongX,
+bool gmx_pme_check_restrictions(int  pme_order,
+                                int  nkx,
+                                int  nky,
+                                int  nkz,
+                                int  numPmeDomainsAlongX,
                                  bool useThreads,
                                  bool errorsAreFatal);
  
@@ -140,35 +144,37 @@ bool gmx_pme_check_restrictions(int pme_order,
   * related things whose lifetime can/should exceed that of a task (or
   * perhaps task manager). See Redmine #2522.
   */
-gmx_pme_t *gmx_pme_init(const t_commrec *cr,
-                        const NumPmeDomains &numPmeDomains,
-                        const t_inputrec *ir,
-                        gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
-                        gmx_bool bReproducible,
-                        real ewaldcoeff_q, real ewaldcoeff_lj,
-                        int nthread,
-                        PmeRunMode runMode,
-                        PmeGpu *pmeGpu,
-                        const gmx_device_info_t *gpuInfo,
-                        PmeGpuProgramHandle pmeGpuProgram,
-                        const gmx::MDLogger &mdlog);
+gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
+                        const NumPmeDomains&     numPmeDomains,
+                        const t_inputrec*        ir,
+                        gmx_bool                 bFreeEnergy_q,
+                        gmx_bool                 bFreeEnergy_lj,
+                        gmx_bool                 bReproducible,
+                        real                     ewaldcoeff_q,
+                        real                     ewaldcoeff_lj,
+                        int                      nthread,
+                        PmeRunMode               runMode,
+                        PmeGpu*                  pmeGpu,
+                        const gmx_device_info_t* gpuInfo,
+                        PmeGpuProgramHandle      pmeGpuProgram,
+                        const gmx::MDLogger&     mdlog);
  
  /*! \brief Destroys the PME data structure.*/
-void gmx_pme_destroy(gmx_pme_t *pme);
+void gmx_pme_destroy(gmx_pme_t* pme);
  
  //@{
  /*! \brief Flag values that control what gmx_pme_do() will calculate
   *
   * These can be combined with bitwise-OR if more than one thing is required.
   */
-#define GMX_PME_SPREAD        (1<<0)
-#define GMX_PME_SOLVE         (1<<1)
-#define GMX_PME_CALC_F        (1<<2)
-#define GMX_PME_CALC_ENER_VIR (1<<3)
+#define GMX_PME_SPREAD (1 << 0)
+#define GMX_PME_SOLVE (1 << 1)
+#define GMX_PME_CALC_F (1 << 2)
+#define GMX_PME_CALC_ENER_VIR (1 << 3)
  /* This forces the grid to be backtransformed even without GMX_PME_CALC_F */
-#define GMX_PME_CALC_POT      (1<<4)
+#define GMX_PME_CALC_POT (1 << 4)
  
-#define GMX_PME_DO_ALL_F  (GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_F)
+#define GMX_PME_DO_ALL_F (GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_F)
  //@}
  
  /*! \brief Do a PME calculation on a CPU for the long range electrostatics and/or LJ.
@@ -182,27 +188,39 @@ void gmx_pme_destroy(gmx_pme_t *pme);
   *
   * \return 0 indicates all well, non zero is an error code.
   */
-int gmx_pme_do(struct gmx_pme_t *pme,
+int gmx_pme_do(struct gmx_pme_t*              pme,
                 gmx::ArrayRef<const gmx::RVec> coordinates,
                 gmx::ArrayRef<gmx::RVec>       forces,
-               real chargeA[],  real chargeB[],
-               real c6A[],      real c6B[],
-               real sigmaA[],   real sigmaB[],
-               const matrix box, const t_commrec *cr,
-               int  maxshift_x, int maxshift_y,
-               t_nrnb *nrnb,    gmx_wallcycle *wcycle,
-               matrix vir_q,    matrix vir_lj,
-               real *energy_q,  real *energy_lj,
-               real lambda_q,   real lambda_lj,
-               real *dvdlambda_q, real *dvdlambda_lj,
-               int flags);
+               real                           chargeA[],
+               real                           chargeB[],
+               real                           c6A[],
+               real                           c6B[],
+               real                           sigmaA[],
+               real                           sigmaB[],
+               const matrix                   box,
+               const t_commrec*               cr,
+               int                            maxshift_x,
+               int                            maxshift_y,
+               t_nrnb*                        nrnb,
+               gmx_wallcycle*                 wcycle,
+               matrix                         vir_q,
+               matrix                         vir_lj,
+               real*                          energy_q,
+               real*                          energy_lj,
+               real                           lambda_q,
+               real                           lambda_lj,
+               real*                          dvdlambda_q,
+               real*                          dvdlambda_lj,
+               int                            flags);
  
  /*! \brief Called on the nodes that do PME exclusively */
-int gmx_pmeonly(struct gmx_pme_t *pme,
-                const t_commrec *cr,     t_nrnb *mynrnb,
-                gmx_wallcycle  *wcycle,
+int gmx_pmeonly(struct gmx_pme_t*         pme,
+                const t_commrec*          cr,
+                t_nrnb*                   mynrnb,
+                gmx_wallcycle*            wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
-                t_inputrec *ir, PmeRunMode runMode);
+                t_inputrec*               ir,
+                PmeRunMode                runMode);
  
  /*! \brief Calculate the PME grid energy V for n charges.
   *
@@ -212,43 +230,54 @@ int gmx_pmeonly(struct gmx_pme_t *pme,
   * pme struct. Currently does not work in parallel or with free
   * energy.
   */
-void gmx_pme_calc_energy(gmx_pme_t                      *pme,
-                         gmx::ArrayRef<const gmx::RVec>  x,
-                         gmx::ArrayRef<const real>       q,
-                         real                           *V);
+void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q, real* V);
  
  /*! \brief Send the charges and maxshift to out PME-only node. */
-void gmx_pme_send_parameters(const t_commrec *cr,
-                             const interaction_const_t *ic,
-                             gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
-                             real *chargeA, real *chargeB,
-                             real *sqrt_c6A, real *sqrt_c6B,
-                             real *sigmaA, real *sigmaB,
-                             int maxshift_x, int maxshift_y);
+void gmx_pme_send_parameters(const t_commrec*           cr,
+                             const interaction_const_t* ic,
+                             gmx_bool                   bFreeEnergy_q,
+                             gmx_bool                   bFreeEnergy_lj,
+                             real*                      chargeA,
+                             real*                      chargeB,
+                             real*                      sqrt_c6A,
+                             real*                      sqrt_c6B,
+                             real*                      sigmaA,
+                             real*                      sigmaB,
+                             int                        maxshift_x,
+                             int                        maxshift_y);
  
  /*! \brief Send the coordinates to our PME-only node and request a PME calculation */
-void gmx_pme_send_coordinates(t_forcerec *fr, const t_commrec *cr, const matrix box, const rvec *x,
-                              real lambda_q, real lambda_lj,
-                              gmx_bool bEnerVir,
-                              int64_t step, bool useGpuPmePpComms,
-                              bool reinitGpuPmePpComms,
-                              bool sendCoordinatesFromGpu,
-                              GpuEventSynchronizer *coordinatesReadyOnDeviceEvent, gmx_wallcycle *wcycle);
+void gmx_pme_send_coordinates(t_forcerec*           fr,
+                              const t_commrec*      cr,
+                              const matrix          box,
+                              const rvec*           x,
+                              real                  lambda_q,
+                              real                  lambda_lj,
+                              gmx_bool              bEnerVir,
+                              int64_t               step,
+                              bool                  useGpuPmePpComms,
+                              bool                  reinitGpuPmePpComms,
+                              bool                  sendCoordinatesFromGpu,
+                              GpuEventSynchronizer* coordinatesReadyOnDeviceEvent,
+                              gmx_wallcycle*        wcycle);
  
  /*! \brief Tell our PME-only node to finish */
-void gmx_pme_send_finish(const t_commrec *cr);
+void gmx_pme_send_finish(const t_commrec* cr);
  
  /*! \brief Tell our PME-only node to reset all cycle and flop counters */
-void gmx_pme_send_resetcounters(const t_commrec *cr, int64_t step);
+void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step);
  
  /*! \brief PP nodes receive the long range forces from the PME nodes */
-void gmx_pme_receive_f(gmx::PmePpCommGpu *pmePpCommGpu,
-                       const t_commrec *cr,
-                       gmx::ForceWithVirial *forceWithVirial,
-                       real *energy_q, real *energy_lj,
-                       real *dvdlambda_q, real *dvdlambda_lj,
-                       bool useGpuPmePpComms, bool receivePmeForceToGpu,
-                       float *pme_cycles);
+void gmx_pme_receive_f(gmx::PmePpCommGpu*    pmePpCommGpu,
+                       const t_commrec*      cr,
+                       gmx::ForceWithVirial* forceWithVirial,
+                       real*                 energy_q,
+                       real*                 energy_lj,
+                       real*                 dvdlambda_q,
+                       real*                 dvdlambda_lj,
+                       bool                  useGpuPmePpComms,
+                       bool                  receivePmeForceToGpu,
+                       float*                pme_cycles);
  
  /*! \brief
   * This function updates the local atom data on GPU after DD (charges, coordinates, etc.).
@@ -259,9 +288,7 @@ void gmx_pme_receive_f(gmx::PmePpCommGpu *pmePpCommGpu,
   * \param[in]     numAtoms   The number of particles.
   * \param[in]     charges    The pointer to the array of particle charges.
   */
-void gmx_pme_reinit_atoms(gmx_pme_t  *pme,
-                          int         numAtoms,
-                          const real *charges);
+void gmx_pme_reinit_atoms(gmx_pme_t* pme, int numAtoms, const real* charges);
  
  /* A block of PME GPU functions */
  
@@ -274,7 +301,7 @@ void gmx_pme_reinit_atoms(gmx_pme_t  *pme,
   *
   * \returns true if PME can run on GPU on this build, false otherwise.
   */
-bool pme_gpu_supports_build(std::string *error);
+bool pme_gpu_supports_build(std::string* error);
  
  /*! \brief Checks whether the detected (GPU) hardware allows to run PME on GPU.
   *
@@ -283,8 +310,7 @@ bool pme_gpu_supports_build(std::string *error);
   *
   * \returns true if PME can run on GPU on this build, false otherwise.
   */
-bool pme_gpu_supports_hardware(const gmx_hw_info_t &hwinfo,
-                               std::string         *error);
+bool pme_gpu_supports_hardware(const gmx_hw_info_t& hwinfo, std::string* error);
  
  /*! \brief Checks whether the input system allows to run PME on GPU.
   * TODO: this partly duplicates an internal PME assert function
@@ -297,7 +323,7 @@ bool pme_gpu_supports_hardware(const gmx_hw_info_t &hwinfo,
   *
   * \returns true if PME can run on GPU with this input, false otherwise.
   */
-bool pme_gpu_supports_input(const t_inputrec &ir, const gmx_mtop_t &mtop, std::string *error);
+bool pme_gpu_supports_input(const t_inputrec& ir, const gmx_mtop_t& mtop, std::string* error);
  
  /*! \brief
   * Returns the active PME codepath (CPU, GPU, mixed).
@@ -306,7 +332,7 @@ bool pme_gpu_supports_input(const t_inputrec &ir, const gmx_mtop_t &mtop, std::s
   * \param[in]  pme            The PME data structure.
   * \returns active PME codepath.
   */
-PmeRunMode pme_run_mode(const gmx_pme_t *pme);
+PmeRunMode pme_run_mode(const gmx_pme_t* pme);
  
  /*! \libinternal \brief
   * Return the pinning policy appropriate for this build configuration
@@ -322,7 +348,7 @@ gmx::PinningPolicy pme_get_pinning_policy();
   * \param[in]  pme            The PME data structure.
   * \returns true if PME can run on GPU, false otherwise.
   */
-inline bool pme_gpu_task_enabled(const gmx_pme_t *pme)
+inline bool pme_gpu_task_enabled(const gmx_pme_t* pme)
  {
      return (pme != nullptr) && (pme_run_mode(pme) != PmeRunMode::CPU);
  }
@@ -331,7 +357,8 @@ inline bool pme_gpu_task_enabled(const gmx_pme_t *pme)
   *
   * \param[in]  pme  The PME data structure.
   */
-GPU_FUNC_QUALIFIER int pme_gpu_get_padding_size(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(0);
+GPU_FUNC_QUALIFIER int pme_gpu_get_padding_size(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(0);
  
  // The following functions are all the PME GPU entry points,
  // currently inlining to nothing on non-CUDA builds.
@@ -341,7 +368,7 @@ GPU_FUNC_QUALIFIER int pme_gpu_get_padding_size(const gmx_pme_t *GPU_FUNC_ARGUME
   *
   * \param[in] pme            The PME structure.
   */
-GPU_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM;
  
  /*! \brief
   * Copies the PME GPU timings to the gmx_wallclock_gpu_pme_t structure (for log output). To be called at the run end.
@@ -349,8 +376,8 @@ GPU_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t *GPU_FUNC_ARGUMENT
   * \param[in] pme               The PME structure.
   * \param[in] timings           The gmx_wallclock_gpu_pme_t structure.
   */
-GPU_FUNC_QUALIFIER void pme_gpu_get_timings(const gmx_pme_t         *GPU_FUNC_ARGUMENT(pme),
-                                            gmx_wallclock_gpu_pme_t *GPU_FUNC_ARGUMENT(timings)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_get_timings(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
+                                            gmx_wallclock_gpu_pme_t* GPU_FUNC_ARGUMENT(timings)) GPU_FUNC_TERM;
  
  /* The main PME GPU functions */
  
@@ -364,12 +391,12 @@ GPU_FUNC_QUALIFIER void pme_gpu_get_timings(const gmx_pme_t         *GPU_FUNC_AR
   *                              The flags are the GMX_PME_ flags from pme.h.
   * \param[in]  useGpuForceReduction Whether PME forces are reduced on GPU this step or should be downloaded for CPU reduction
   */
-GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t      *GPU_FUNC_ARGUMENT(pme),
-                                                    bool            GPU_FUNC_ARGUMENT(needToUpdateBox),
-                                                    const matrix    GPU_FUNC_ARGUMENT(box),
-                                                    gmx_wallcycle  *GPU_FUNC_ARGUMENT(wcycle),
-                                                    int             GPU_FUNC_ARGUMENT(flags),
-                                                    bool            GPU_FUNC_ARGUMENT(useGpuForceReduction)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t*   GPU_FUNC_ARGUMENT(pme),
+                                                    bool         GPU_FUNC_ARGUMENT(needToUpdateBox),
+                                                    const matrix GPU_FUNC_ARGUMENT(box),
+                                                    gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
+                                                    int            GPU_FUNC_ARGUMENT(flags),
+                                                    bool GPU_FUNC_ARGUMENT(useGpuForceReduction)) GPU_FUNC_TERM;
  
  /*! \brief
   * Launches first stage of PME on GPU - spreading kernel.
@@ -378,9 +405,9 @@ GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t      *GPU_FUNC_ARG
   * \param[in] xReadyOnDevice     Event synchronizer indicating that the coordinates are ready in the device memory; nullptr allowed only on separate PME ranks.
   * \param[in] wcycle             The wallclock counter.
   */
-GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t            *GPU_FUNC_ARGUMENT(pme),
-                                              GpuEventSynchronizer *GPU_FUNC_ARGUMENT(xReadyOnDevice),
-                                              gmx_wallcycle        *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t*            GPU_FUNC_ARGUMENT(pme),
+                                              GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
+                                              gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
  
  /*! \brief
   * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
@@ -388,21 +415,21 @@ GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t            *GPU_FUNC_ARG
   * \param[in] pme               The PME data structure.
   * \param[in] wcycle            The wallclock counter.
   */
-GPU_FUNC_QUALIFIER void pme_gpu_launch_complex_transforms(gmx_pme_t       *GPU_FUNC_ARGUMENT(pme),
-                                                          gmx_wallcycle   *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_launch_complex_transforms(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
+                                                          gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
  
  /*! \brief
   * Launches last stage of PME on GPU - force gathering and D2H force transfer.
   *
   * \param[in]  pme               The PME data structure.
   * \param[in]  wcycle            The wallclock counter.
- * \param[in]  forceTreatment    Tells how data should be treated. The gathering kernel either stores
- *                               the output reciprocal forces into the host array, or copies its contents to the GPU first
+ * \param[in]  forceTreatment    Tells how data should be treated. The gathering kernel either
+ * stores the output reciprocal forces into the host array, or copies its contents to the GPU first
   *                               and accumulates. The reduction is non-atomic.
   */
-GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t        *GPU_FUNC_ARGUMENT(pme),
-                                              gmx_wallcycle          *GPU_FUNC_ARGUMENT(wcycle),
-                                              PmeForceOutputHandling  GPU_FUNC_ARGUMENT(forceTreatment)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
+                                              gmx_wallcycle*   GPU_FUNC_ARGUMENT(wcycle),
+                                              PmeForceOutputHandling GPU_FUNC_ARGUMENT(forceTreatment)) GPU_FUNC_TERM;
  
  /*! \brief
   * Attempts to complete PME GPU tasks.
@@ -422,16 +449,16 @@ GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t        *GPU_FUNC_A
   * \param[out] enerd           The output energies
   * \param[in] flags            The combination of flags to affect this PME computation.
   *                             The flags are the GMX_PME_ flags from pme.h.
- * \param[in]  completionKind  Indicates whether PME task completion should only be checked rather than waited for
- * \returns                   True if the PME GPU tasks have completed
+ * \param[in]  completionKind  Indicates whether PME task completion should only be checked rather
+ * than waited for \returns                   True if the PME GPU tasks have completed
   */
-GPU_FUNC_QUALIFIER bool
-    pme_gpu_try_finish_task(gmx_pme_t            *GPU_FUNC_ARGUMENT(pme),
-                            int                   GPU_FUNC_ARGUMENT(flags),
-                            gmx_wallcycle        *GPU_FUNC_ARGUMENT(wcycle),
-                            gmx::ForceWithVirial *GPU_FUNC_ARGUMENT(forceWithVirial),
-                            gmx_enerdata_t       *GPU_FUNC_ARGUMENT(enerd),
-                            GpuTaskCompletion     GPU_FUNC_ARGUMENT(completionKind)) GPU_FUNC_TERM_WITH_RETURN(false);
+GPU_FUNC_QUALIFIER bool pme_gpu_try_finish_task(gmx_pme_t*            GPU_FUNC_ARGUMENT(pme),
+                                                int                   GPU_FUNC_ARGUMENT(flags),
+                                                gmx_wallcycle*        GPU_FUNC_ARGUMENT(wcycle),
+                                                gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
+                                                gmx_enerdata_t*       GPU_FUNC_ARGUMENT(enerd),
+                                                GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind))
+        GPU_FUNC_TERM_WITH_RETURN(false);
  
  /*! \brief
   * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
@@ -444,12 +471,11 @@ GPU_FUNC_QUALIFIER bool
   * \param[out] forceWithVirial The output force and virial
   * \param[out] enerd           The output energies
   */
-GPU_FUNC_QUALIFIER void
-    pme_gpu_wait_and_reduce(gmx_pme_t            *GPU_FUNC_ARGUMENT(pme),
-                            int                   GPU_FUNC_ARGUMENT(flags),
-                            gmx_wallcycle        *GPU_FUNC_ARGUMENT(wcycle),
-                            gmx::ForceWithVirial *GPU_FUNC_ARGUMENT(forceWithVirial),
-                            gmx_enerdata_t       *GPU_FUNC_ARGUMENT(enerd)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_wait_and_reduce(gmx_pme_t*            GPU_FUNC_ARGUMENT(pme),
+                                                int                   GPU_FUNC_ARGUMENT(flags),
+                                                gmx_wallcycle*        GPU_FUNC_ARGUMENT(wcycle),
+                                                gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
+                                                gmx_enerdata_t* GPU_FUNC_ARGUMENT(enerd)) GPU_FUNC_TERM;
  
  /*! \brief
   * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
@@ -464,45 +490,50 @@ GPU_FUNC_QUALIFIER void
   * \param[in] pme            The PME data structure.
   * \param[in] wcycle         The wallclock counter.
   */
-GPU_FUNC_QUALIFIER void pme_gpu_reinit_computation(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
-                                                   gmx_wallcycle   *GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_reinit_computation(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
+                                                   gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
  
  
  /*! \brief Get pointer to device copy of coordinate data.
   * \param[in] pme            The PME data structure.
   * \returns                  Pointer to coordinate data
   */
-GPU_FUNC_QUALIFIER DeviceBuffer<float> pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
+GPU_FUNC_QUALIFIER DeviceBuffer<float> pme_gpu_get_device_x(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<float>{});
  
  /*! \brief Set pointer to device copy of coordinate data.
   * \param[in] pme            The PME data structure.
   * \param[in] d_x            The pointer to the positions buffer to be set
   */
-GPU_FUNC_QUALIFIER void pme_gpu_set_device_x(const gmx_pme_t     *GPU_FUNC_ARGUMENT(pme),
-                                             DeviceBuffer<float>  GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
+GPU_FUNC_QUALIFIER void pme_gpu_set_device_x(const gmx_pme_t*    GPU_FUNC_ARGUMENT(pme),
+                                             DeviceBuffer<float> GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
  
  /*! \brief Get pointer to device copy of force data.
   * \param[in] pme            The PME data structure.
   * \returns                  Pointer to force data
   */
-GPU_FUNC_QUALIFIER void *pme_gpu_get_device_f(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  /*! \brief Returns the pointer to the GPU stream.
   *  \param[in] pme            The PME data structure.
   *  \returns                  Pointer to GPU stream object.
   */
-GPU_FUNC_QUALIFIER void *pme_gpu_get_device_stream(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER void* pme_gpu_get_device_stream(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  /*! \brief Returns the pointer to the GPU context.
   *  \param[in] pme            The PME data structure.
   *  \returns                  Pointer to GPU context object.
   */
-GPU_FUNC_QUALIFIER void *pme_gpu_get_device_context(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER void* pme_gpu_get_device_context(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  /*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
   * \param[in] pme            The PME data structure.
   * \returns                  Pointer to sychronizer
   */
-GPU_FUNC_QUALIFIER GpuEventSynchronizer *pme_gpu_get_f_ready_synchronizer(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+        GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  #endif