/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
# define HIDE_FROM_OPENCL_COMPILER(x) char8
#endif
+#ifndef NUMFEPSTATES
+//! Number of FEP states.
+# define NUMFEPSTATES 2
+#endif
+
/* What follows is all the PME GPU function arguments,
* sorted into several device-side structures depending on the update rate.
* This is GPU agnostic (float3 replaced by float[3], etc.).
*/
struct PmeGpuConstParams
{
- /*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */
+ /*! \brief Electrostatics coefficient = c_one4PiEps0 / pme->epsilon_r */
float elFactor;
/*! \brief Virial and energy GPU array. Size is c_virialAndEnergyCount (7) floats.
* The element order is virxx, viryy, virzz, virxy, virxz, viryz, energy. */
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_virialAndEnergy;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_virialAndEnergy[NUMFEPSTATES];
};
/*! \internal \brief
/* Grid arrays */
/*! \brief Real space grid. */
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_realGrid;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_realGrid[NUMFEPSTATES];
/*! \brief Complex grid - used in FFT/solve. If inplace cu/clFFT is used, then it is the same handle as realGrid. */
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_fourierGrid;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_fourierGrid[NUMFEPSTATES];
/*! \brief Grid spline values as in pme->bsp_mod
* (laid out sequentially (XXX....XYYY......YZZZ.....Z))
*/
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_splineModuli;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_splineModuli[NUMFEPSTATES];
/*! \brief Fractional shifts lookup table as in pme->fshx/fshy/fshz, laid out sequentially (XXX....XYYY......YZZZ.....Z) */
HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_fractShiftsTable;
/*! \brief Gridline indices lookup table
* but reallocation happens only at DD.
*/
HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<gmx::RVec>) d_coordinates;
- /*! \brief Global GPU memory array handle with input atom charges.
+ /*! \brief Global GPU memory array handle with input atom charges in states A and B.
* The charges only need to be reallocated and copied to the GPU at DD step.
*/
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_coefficients;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_coefficients[NUMFEPSTATES];
/*! \brief Global GPU memory array handle with input/output rvec atom forces.
* The forces change and need to be copied from (and possibly to) the GPU for every PME
* computation, but reallocation happens only at DD.
*/
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_forces;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<gmx::RVec>) d_forces;
/*! \brief Global GPU memory array handle with ivec atom gridline indices.
* Computed on GPU in the spline calculation part.
*/
float recipBox[DIM][DIM];
/*! \brief The unit cell volume for solving. */
float boxVolume;
+
+ /*! \brief The current coefficient scaling value. */
+ float scale;
};
/*! \internal \brief
* before launching spreading.
*/
struct PmeGpuDynamicParams current;
+
+ /*! \brief Whether pipelining with PP communications is active
+ * char rather than bool to avoid problem with OpenCL compiler */
+ char usePipeline;
+ /*! \brief Start atom for this stage of pipeline */
+ int pipelineAtomStart;
+ /*! \brief End atom for this stage of pipeline */
+ int pipelineAtomEnd;
+
/* These texture objects are only used in CUDA and are related to the grid size. */
/*! \brief Texture object for accessing grid.d_fractShiftsTable */
HIDE_FROM_OPENCL_COMPILER(DeviceTexture) fractShiftsTableTexture;