PME GPU/CUDA data framework.

[alexxy/gromacs.git] / src / gromacs / ewald / pme-internal.h
diff --git a/src/gromacs/ewald/pme-internal.h b/src/gromacs/ewald/pme-internal.h

index fbb0dfe37a828b175863244aded1553483a4b522..0feb97741995ce0ced86ed9c0d7a323d473f92fb 100644 (file)
--- a/src/gromacs/ewald/pme-internal.h
+++ b/src/gromacs/ewald/pme-internal.h
@@ -64,11 +64,14 @@
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/utility/gmxmpi.h"
  
+#include "pme-gpu-types.h"
+
  //! A repeat of typedef from parallel_3dfft.h
  typedef struct gmx_parallel_3dfft *gmx_parallel_3dfft_t;
  
  struct t_commrec;
  struct t_inputrec;
+struct pme_gpu_t;
  
  //@{
  //! Grid indices for A state for charge and Lennard-Jones C6
@@ -167,7 +170,7 @@ typedef struct {
  } splinedata_t;
  
  /*! \brief Data structure for coordinating transfer between PP and PME ranks*/
-typedef struct {
+struct pme_atomcomm_t{
      int      dimind;        /* The index of the dimension, 0=x, 1=y */
      int      nslab;
      int      nodeid;
@@ -203,20 +206,20 @@ typedef struct {
      int            *thread_idx; /* Which thread should spread which coefficient */
      thread_plist_t *thread_plist;
      splinedata_t   *spline;
-} pme_atomcomm_t;
+};
  
  /*! \brief Data structure for a single PME grid */
-typedef struct {
+struct pmegrid_t{
      ivec  ci;     /* The spatial location of this grid         */
      ivec  n;      /* The used size of *grid, including order-1 */
      ivec  offset; /* The grid offset from the full node grid   */
      int   order;  /* PME spreading order                       */
      ivec  s;      /* The allocated size of *grid, s >= n       */
      real *grid;   /* The grid local thread, size n             */
-} pmegrid_t;
+};
  
  /*! \brief Data structures for PME grids */
-typedef struct {
+struct pmegrids_t{
      pmegrid_t  grid;         /* The full node grid (non thread-local)            */
      int        nthread;      /* The number of threads operating on this grid     */
      ivec       nc;           /* The local spatial decomposition over the threads */
@@ -224,7 +227,7 @@ typedef struct {
      real      *grid_all;     /* Allocated array for the grids in *grid_th        */
      int       *g2t[DIM];     /* The grid to thread index                         */
      ivec       nthread_comm; /* The number of threads to communicate with        */
-} pmegrids_t;
+};
  
  /*! \brief Data structure for spline-interpolation working buffers */
  struct pme_spline_work;
@@ -233,7 +236,7 @@ struct pme_spline_work;
  struct pme_solve_work_t;
  
  /*! \brief Master PME data structure */
-typedef struct gmx_pme_t {
+struct gmx_pme_t {
      int           ndecompdim; /* The number of decomposition dimensions */
      int           nodeid;     /* Our nodeid in mpi->mpi_comm */
      int           nodeid_major;
@@ -264,7 +267,26 @@ typedef struct gmx_pme_t {
      real       ewaldcoeff_lj; /* Ewald splitting coefficient for r^-6 */
      real       epsilon_r;
  
-    class EwaldBoxZScaler *boxScaler;   /*! The scaling data Ewald uses with walls (set at pme_init constant for the entire run) */
+
+    enum PmeRunMode runMode; /* Which codepath is the PME runner taking - CPU, GPU, mixed;
+                              * TODO: this is the information that should be owned by the task scheduler,
+                              * and ideally not be duplicated here.
+                              */
+
+    pme_gpu_t      *gpu;     /* A pointer to the GPU data.
+                              * TODO: this should be unique or a shared pointer.
+                              * Currently in practice there is a single gmx_pme_t instance while a code
+                              * is partially set up for many of them. The PME tuning calls gmx_pme_reinit()
+                              * which fully reinitializes the one and only PME structure anew while maybe
+                              * keeping the old grid buffers if they were already large enough.
+                              * This small choice should be made clear in the later refactoring -
+                              * do we store many PME objects for different grid sizes,
+                              * or a single PME object that handles different grid sizes gracefully.
+                              */
+
+
+    class EwaldBoxZScaler *boxScaler;   /**< The scaling data Ewald uses with walls (set at pme_init constant for the entire run) */
+
  
      int        ljpme_combination_rule;  /* Type of combination rule in LJ-PME */
  
@@ -279,6 +301,7 @@ typedef struct gmx_pme_t {
                                           * This can probably be done in a better way
                                           * but this simple hack works for now
                                           */
+
      /* The PME coefficient spreading grid sizes/strides, includes pme_order-1 */
      int        pmegrid_nx, pmegrid_ny, pmegrid_nz;
      /* pmegrid_nz might be larger than strictly necessary to ensure
@@ -329,10 +352,40 @@ typedef struct gmx_pme_t {
      /* Work data for sum_qgrid */
      real *   sum_qgrid_tmp;
      real *   sum_qgrid_dd_tmp;
-} t_gmx_pme_t;
+};
  
  //! @endcond
  
+/*! \brief
+ * Finds out if PME is currently running on GPU.
+ * TODO: should this be removed eventually?
+ *
+ * \param[in] pme  The PME structure.
+ * \returns        True if PME runs on GPU currently, false otherwise.
+ */
+inline bool pme_gpu_active(const gmx_pme_t *pme)
+{
+    return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
+}
+
+/*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+ *
+ * With bFatal=TRUE, a fatal error is generated on violation,
+ * bValidSettings=NULL can be passed.
+ * With bFatal=FALSE, *bValidSettings reports the validity of the settings.
+ * bUseThreads tells if any MPI rank doing PME uses more than 1 threads.
+ * If at calling you bUseThreads is unknown, pass TRUE for conservative
+ * checking.
+ *
+ * TODO: the GPU restrictions are checked separately during pme_gpu_init().
+ */
+void gmx_pme_check_restrictions(int pme_order,
+                                int nkx, int nky, int nkz,
+                                int nnodes_major,
+                                gmx_bool bUseThreads,
+                                gmx_bool bFatal,
+                                gmx_bool *bValidSettings);
+
  /*! \brief Initialize the PME-only side of the PME <-> PP communication */
  gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr);
  
@@ -349,11 +402,33 @@ enum {
  
  /*! \brief Called by PME-only ranks to receive coefficients and coordinates
   *
- * The return value is used to control further processing, with meanings:
- * pmerecvqxX:             all parameters set, chargeA and chargeB can be NULL
- * pmerecvqxFINISH:        no parameters set
- * pmerecvqxSWITCHGRID:    only grid_size and *ewaldcoeff are set
- * pmerecvqxRESETCOUNTERS: *step is set
+ * \param[in,out] pme_pp         PME-PP communication structure.
+ * \param[out] natoms            Number of received atoms.
+ * \param[out] chargeA           State A charges, if received.
+ * \param[out] chargeB           State B charges, if received.
+ * \param[out] sqrt_c6A          State A coefficients, if received.
+ * \param[out] sqrt_c6B          State B coefficients, if received.
+ * \param[out] sigmaA            State A coefficients, if received.
+ * \param[out] sigmaB            State B coefficients, if received.
+ * \param[out] box               System box, if received.
+ * \param[out] x                 Atoms' coordinates, if received.
+ * \param[out] f                 Atoms' PME forces, if received.
+ * \param[out] maxshift_x        Maximum shift in X direction, if received.
+ * \param[out] maxshift_y        Maximum shift in Y direction, if received.
+ * \param[out] lambda_q          Free-energy lambda for electrostatics, if received.
+ * \param[out] lambda_lj         Free-energy lambda for Lennard-Jones, if received.
+ * \param[out] bEnerVir          Set to true if this is an energy/virial calculation step, otherwise set to false.
+ * \param[out] step              MD integration step number.
+ * \param[out] grid_size         PME grid size, if received.
+ * \param[out] ewaldcoeff_q      Ewald cut-off parameter for electrostatics, if received.
+ * \param[out] ewaldcoeff_lj     Ewald cut-off parameter for Lennard-Jones, if received.
+ * \param[out] atomSetChanged    Set to true only if the local domain atom data (charges/coefficients)
+ *                               has been received (after DD) and should be reinitialized. Otherwise not changed.
+ *
+ * \retval pmerecvqxX             All parameters were set, chargeA and chargeB can be NULL.
+ * \retval pmerecvqxFINISH        No parameters were set.
+ * \retval pmerecvqxSWITCHGRID    Only grid_size and *ewaldcoeff were set.
+ * \retval pmerecvqxRESETCOUNTERS *step was set.
   */
  int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
                                 int *natoms,
@@ -365,7 +440,10 @@ int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
                                 real *lambda_q, real *lambda_lj,
                                 gmx_bool *bEnerVir,
                                 gmx_int64_t *step,
-                               ivec grid_size, real *ewaldcoeff_q, real *ewaldcoeff_lj);
+                               ivec grid_size,
+                               real *ewaldcoeff_q,
+                               real *ewaldcoeff_lj,
+                               bool *atomSetChanged);
  
  /*! \brief Send the PME mesh force, virial and energy to the PP-only nodes */
  void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,