#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_gpu_program.h"
#include "gromacs/gpu_utils/clfftinitializer.h"
-#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior
+#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
*/
bool useGpuForceReduction;
- /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the host.
- * Only intended to be used by the test framework.
+ /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the
+ * host. Only intended to be used by the test framework.
*/
- bool copyAllOutputs;
+ bool copyAllOutputs;
/*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */
GpuApiCallBehavior transferKind;
/*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */
- int currentFlags;
+ int currentFlags;
};
// TODO There's little value in computing the Coulomb and LJ virial
// possible. Use mdspan?
struct PmeOutput
{
- gmx::ArrayRef<gmx::RVec> forces_; //!< Host staging area for PME forces
- bool haveForceOutput_ = false; //!< True if forces have been staged other false (when forces are reduced on the GPU).
- real coulombEnergy_ = 0; //!< Host staging area for PME coulomb energy
- matrix coulombVirial_ = {{0}}; //!< Host staging area for PME coulomb virial contributions
- real lennardJonesEnergy_ = 0; //!< Host staging area for PME LJ energy
- matrix lennardJonesVirial_ = {{0}}; //!< Host staging area for PME LJ virial contributions
-
+ gmx::ArrayRef<gmx::RVec> forces_; //!< Host staging area for PME forces
+ bool haveForceOutput_ =
+ false; //!< True if forces have been staged other false (when forces are reduced on the GPU).
+ real coulombEnergy_ = 0; //!< Host staging area for PME coulomb energy
+ matrix coulombVirial_ = { { 0 } }; //!< Host staging area for PME coulomb virial contributions
+ real lennardJonesEnergy_ = 0; //!< Host staging area for PME LJ energy
+ matrix lennardJonesVirial_ = { { 0 } }; //!< Host staging area for PME LJ virial contributions
};
/*! \internal \brief
gmx::PaddedHostVector<gmx::RVec> h_forces;
/*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
- float *h_virialAndEnergy;
+ float* h_virialAndEnergy;
/*! \brief B-spline values intermediate host-side buffer. */
- float *h_splineModuli;
+ float* h_splineModuli;
/*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */
- float *h_theta;
+ float* h_theta;
/*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */
- float *h_dtheta;
+ float* h_dtheta;
/*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */
- int *h_gridlineIndices;
+ int* h_gridlineIndices;
};
/*! \internal \brief
struct PmeShared
{
/*! \brief Grid count - currently always 1 on GPU */
- int ngrids;
+ int ngrids;
/*! \brief Grid dimensions - nkx, nky, nkz */
- int nk[DIM];
+ int nk[DIM];
/*! \brief PME interpolation order */
- int pme_order;
+ int pme_order;
/*! \brief Ewald splitting coefficient for Coulomb */
- real ewaldcoeff_q;
+ real ewaldcoeff_q;
/*! \brief Electrostatics parameter */
- real epsilon_r;
+ real epsilon_r;
/*! \brief Gridline indices - nnx, nny, nnz */
- std::vector<int> nn;
+ std::vector<int> nn;
/*! \brief Fractional shifts - fshx, fshy, fshz */
- std::vector<real> fsh;
+ std::vector<real> fsh;
/*! \brief Precomputed B-spline values */
- std::vector<real> bsp_mod[DIM];
+ std::vector<real> bsp_mod[DIM];
/*! \brief The PME codepath being taken */
- PmeRunMode runMode;
+ PmeRunMode runMode;
/*! \brief Whether PME execution is happening on a PME-only rank (from gmx_pme_t.bPPnode). */
- bool isRankPmeOnly;
+ bool isRankPmeOnly;
/*! \brief The box scaler based on inputrec - created in pme_init and managed by CPU structure */
- class EwaldBoxZScaler *boxScaler;
+ class EwaldBoxZScaler* boxScaler;
/*! \brief The previous computation box to know if we even need to update the current box params.
* \todo Manage this on higher level.
* \todo Alternatively, when this structure is used by CPU PME code, make use of this field there as well.
int nAtomsAlloc;
/*! \brief A pointer to the device used during the execution. */
- const gmx_device_info_t *deviceInfo;
+ const gmx_device_info_t* deviceInfo;
/*! \brief Kernel scheduling grid width limit in X - derived from deviceinfo compute capability in CUDA.
* Declared as very large int to make it useful in computations with type promotion, to avoid overflows.