#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/utility/gmxmpi.h"
+#include "pme-gpu-types.h"
+
//! A repeat of typedef from parallel_3dfft.h
typedef struct gmx_parallel_3dfft *gmx_parallel_3dfft_t;
struct t_commrec;
struct t_inputrec;
+struct pme_gpu_t;
//@{
//! Grid indices for A state for charge and Lennard-Jones C6
} splinedata_t;
/*! \brief Data structure for coordinating transfer between PP and PME ranks*/
-typedef struct {
+struct pme_atomcomm_t{
int dimind; /* The index of the dimension, 0=x, 1=y */
int nslab;
int nodeid;
int *thread_idx; /* Which thread should spread which coefficient */
thread_plist_t *thread_plist;
splinedata_t *spline;
-} pme_atomcomm_t;
+};
/*! \brief Data structure for a single PME grid */
-typedef struct {
+struct pmegrid_t{
ivec ci; /* The spatial location of this grid */
ivec n; /* The used size of *grid, including order-1 */
ivec offset; /* The grid offset from the full node grid */
int order; /* PME spreading order */
ivec s; /* The allocated size of *grid, s >= n */
real *grid; /* The grid local thread, size n */
-} pmegrid_t;
+};
/*! \brief Data structures for PME grids */
-typedef struct {
+struct pmegrids_t{
pmegrid_t grid; /* The full node grid (non thread-local) */
int nthread; /* The number of threads operating on this grid */
ivec nc; /* The local spatial decomposition over the threads */
real *grid_all; /* Allocated array for the grids in *grid_th */
int *g2t[DIM]; /* The grid to thread index */
ivec nthread_comm; /* The number of threads to communicate with */
-} pmegrids_t;
+};
/*! \brief Data structure for spline-interpolation working buffers */
struct pme_spline_work;
struct pme_solve_work_t;
/*! \brief Master PME data structure */
-typedef struct gmx_pme_t {
+struct gmx_pme_t {
int ndecompdim; /* The number of decomposition dimensions */
int nodeid; /* Our nodeid in mpi->mpi_comm */
int nodeid_major;
real ewaldcoeff_lj; /* Ewald splitting coefficient for r^-6 */
real epsilon_r;
- class EwaldBoxZScaler *boxScaler; /*! The scaling data Ewald uses with walls (set at pme_init constant for the entire run) */
+
+ enum PmeRunMode runMode; /* Which codepath is the PME runner taking - CPU, GPU, mixed;
+ * TODO: this is the information that should be owned by the task scheduler,
+ * and ideally not be duplicated here.
+ */
+
+ pme_gpu_t *gpu; /* A pointer to the GPU data.
+ * TODO: this should be unique or a shared pointer.
+ * Currently in practice there is a single gmx_pme_t instance while a code
+ * is partially set up for many of them. The PME tuning calls gmx_pme_reinit()
+ * which fully reinitializes the one and only PME structure anew while maybe
+ * keeping the old grid buffers if they were already large enough.
+ * This small choice should be made clear in the later refactoring -
+ * do we store many PME objects for different grid sizes,
+ * or a single PME object that handles different grid sizes gracefully.
+ */
+
+
+ class EwaldBoxZScaler *boxScaler; /**< The scaling data Ewald uses with walls (set at pme_init constant for the entire run) */
+
int ljpme_combination_rule; /* Type of combination rule in LJ-PME */
* This can probably be done in a better way
* but this simple hack works for now
*/
+
/* The PME coefficient spreading grid sizes/strides, includes pme_order-1 */
int pmegrid_nx, pmegrid_ny, pmegrid_nz;
/* pmegrid_nz might be larger than strictly necessary to ensure
/* Work data for sum_qgrid */
real * sum_qgrid_tmp;
real * sum_qgrid_dd_tmp;
-} t_gmx_pme_t;
+};
//! @endcond
+/*! \brief
+ * Finds out if PME is currently running on GPU.
+ * TODO: should this be removed eventually?
+ *
+ * \param[in] pme The PME structure.
+ * \returns True if PME runs on GPU currently, false otherwise.
+ */
+inline bool pme_gpu_active(const gmx_pme_t *pme)
+{
+ return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
+}
+
+/*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
+ *
+ * With bFatal=TRUE, a fatal error is generated on violation,
+ * bValidSettings=NULL can be passed.
+ * With bFatal=FALSE, *bValidSettings reports the validity of the settings.
+ * bUseThreads tells if any MPI rank doing PME uses more than 1 threads.
+ * If at calling you bUseThreads is unknown, pass TRUE for conservative
+ * checking.
+ *
+ * TODO: the GPU restrictions are checked separately during pme_gpu_init().
+ */
+void gmx_pme_check_restrictions(int pme_order,
+ int nkx, int nky, int nkz,
+ int nnodes_major,
+ gmx_bool bUseThreads,
+ gmx_bool bFatal,
+ gmx_bool *bValidSettings);
+
/*! \brief Initialize the PME-only side of the PME <-> PP communication */
gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr);
/*! \brief Called by PME-only ranks to receive coefficients and coordinates
*
- * The return value is used to control further processing, with meanings:
- * pmerecvqxX: all parameters set, chargeA and chargeB can be NULL
- * pmerecvqxFINISH: no parameters set
- * pmerecvqxSWITCHGRID: only grid_size and *ewaldcoeff are set
- * pmerecvqxRESETCOUNTERS: *step is set
+ * \param[in,out] pme_pp PME-PP communication structure.
+ * \param[out] natoms Number of received atoms.
+ * \param[out] chargeA State A charges, if received.
+ * \param[out] chargeB State B charges, if received.
+ * \param[out] sqrt_c6A State A coefficients, if received.
+ * \param[out] sqrt_c6B State B coefficients, if received.
+ * \param[out] sigmaA State A coefficients, if received.
+ * \param[out] sigmaB State B coefficients, if received.
+ * \param[out] box System box, if received.
+ * \param[out] x Atoms' coordinates, if received.
+ * \param[out] f Atoms' PME forces, if received.
+ * \param[out] maxshift_x Maximum shift in X direction, if received.
+ * \param[out] maxshift_y Maximum shift in Y direction, if received.
+ * \param[out] lambda_q Free-energy lambda for electrostatics, if received.
+ * \param[out] lambda_lj Free-energy lambda for Lennard-Jones, if received.
+ * \param[out] bEnerVir Set to true if this is an energy/virial calculation step, otherwise set to false.
+ * \param[out] step MD integration step number.
+ * \param[out] grid_size PME grid size, if received.
+ * \param[out] ewaldcoeff_q Ewald cut-off parameter for electrostatics, if received.
+ * \param[out] ewaldcoeff_lj Ewald cut-off parameter for Lennard-Jones, if received.
+ * \param[out] atomSetChanged Set to true only if the local domain atom data (charges/coefficients)
+ * has been received (after DD) and should be reinitialized. Otherwise not changed.
+ *
+ * \retval pmerecvqxX All parameters were set, chargeA and chargeB can be NULL.
+ * \retval pmerecvqxFINISH No parameters were set.
+ * \retval pmerecvqxSWITCHGRID Only grid_size and *ewaldcoeff were set.
+ * \retval pmerecvqxRESETCOUNTERS *step was set.
*/
int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
int *natoms,
real *lambda_q, real *lambda_lj,
gmx_bool *bEnerVir,
gmx_int64_t *step,
- ivec grid_size, real *ewaldcoeff_q, real *ewaldcoeff_lj);
+ ivec grid_size,
+ real *ewaldcoeff_q,
+ real *ewaldcoeff_lj,
+ bool *atomSetChanged);
/*! \brief Send the PME mesh force, virial and energy to the PP-only nodes */
void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,