src/gromacs/ewald/pme-gpu-types.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal \file
  37  * \brief Defines the GPU-agnostic PME GPU data structures
  38  * (the host-side PME GPU data, and the GPU function parameters).
  39  * \todo Due to Gerrit workflow and time constraints, some renaming/refactoring
  40  * which does not impair the performance will be performed once
  41  * most of the initial PME CUDA implementation is merged
  42  * into the master branch (likely, after release 2017).
  43  * This should include:
  44  * -- bringing the function names up to guidelines
  45  * -- PmeGpuSettings -> PmeGpuTasks
  46  * -- refining GPU notation application (#2053)
  47  * -- renaming coefficients to charges (?)
  48  *
  49  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  50  * \ingroup module_ewald
  51  */
  52
  53 #ifndef GMX_EWALD_PME_GPU_TYPES_H
  54 #define GMX_EWALD_PME_GPU_TYPES_H
  55
  56 #include "config.h"
  57
  58 #include <memory>
  59 #include <vector>
  60
  61 #include "gromacs/ewald/pme.h"
  62 #include "gromacs/math/vectypes.h"
  63 #include "gromacs/utility/basedefinitions.h"
  64
  65 struct gmx_hw_info;
  66 struct gmx_device_info_t;
  67
  68 #if GMX_GPU == GMX_GPU_CUDA
  69
  70 struct PmeGpuCuda;
  71 /*! \brief A typedef for including the GPU host data by pointer */
  72 typedef PmeGpuCuda PmeGpuSpecific;
  73
  74 struct PmeGpuCudaKernelParams;
  75 /*! \brief A typedef for including the GPU kernel arguments data by pointer */
  76 typedef PmeGpuCudaKernelParams PmeGpuKernelParams;
  77
  78 #else
  79
  80 /*! \brief A dummy typedef for the GPU host data placeholder on non-GPU builds */
  81 typedef int PmeGpuSpecific;
  82 /*! \brief A dummy typedef for the GPU kernel arguments data placeholder on non-GPU builds */
  83 typedef int PmeGpuKernelParams;
  84
  85 #endif
  86
  87 /* What follows is all the PME GPU function arguments,
  88  * sorted into several device-side structures depending on the update rate.
  89  * This is GPU agnostic (float3 replaced by float[3], etc.).
  90  * The GPU-framework specifics (e.g. cudaTextureObject_t handles) are described
  91  * in the larger structure PmeGpuCudaKernelParams in the pme.cuh.
  92  */
  93
  94 /*! \internal \brief
  95  * A GPU data structure for storing the constant PME data.
  96  * This only has to be initialized once.
  97  */
  98 struct PmeGpuConstParams
  99 {
 100     /*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */
 101     float elFactor;
 102     /*! \brief Virial and energy GPU array. Size is PME_GPU_ENERGY_AND_VIRIAL_COUNT (7) floats.
 103      * The element order is virxx, viryy, virzz, virxy, virxz, viryz, energy. */
 104     float *d_virialAndEnergy;
 105 };
 106
 107 /*! \internal \brief
 108  * A GPU data structure for storing the PME data related to the grid sizes and cut-off.
 109  * This only has to be updated at every DD step.
 110  */
 111 struct PmeGpuGridParams
 112 {
 113     /* Grid sizes */
 114     /*! \brief Real-space grid data dimensions. */
 115     int   realGridSize[DIM];
 116     /*! \brief Real-space grid dimensions, only converted to floating point. */
 117     float realGridSizeFP[DIM];
 118     /*! \brief Real-space grid dimensions (padded). The padding as compared to realGridSize includes the (order - 1) overlap. */
 119     int   realGridSizePadded[DIM]; /* Is major dimension of this ever used in kernels? */
 120     /*! \brief Fourier grid dimensions. This counts the complex numbers! */
 121     int   complexGridSize[DIM];
 122     /*! \brief Fourier grid dimensions (padded). This counts the complex numbers! */
 123     int   complexGridSizePadded[DIM];
 124
 125     /* Grid pointers */
 126     /*! \brief Real space grid. */
 127     float *d_realGrid;
 128     /*! \brief Complex grid - used in FFT/solve. If inplace cuFFT is used, then it is the same pointer as realGrid. */
 129     float *d_fourierGrid;
 130
 131     /*! \brief Ewald solving factor = (M_PI / pme->ewaldcoeff_q)^2 */
 132     float ewaldFactor;
 133
 134     /*! \brief Grid spline values as in pme->bsp_mod
 135      * (laid out sequentially (XXX....XYYY......YZZZ.....Z))
 136      */
 137     float              *d_splineModuli;
 138     /*! \brief Offsets for X/Y/Z components of d_splineModuli */
 139     int                 splineValuesOffset[DIM];
 140
 141     /*! \brief Fractional shifts lookup table as in pme->fshx/fshy/fshz, laid out sequentially (XXX....XYYY......YZZZ.....Z) */
 142     float               *d_fractShiftsTable;
 143     /*! \brief Gridline indices lookup table
 144      * (modulo lookup table as in pme->nnx/nny/nnz, laid out sequentially (XXX....XYYY......YZZZ.....Z)) */
 145     int                *d_gridlineIndicesTable;
 146     /*! \brief Offsets for X/Y/Z components of d_fractShiftsTable and d_gridlineIndicesTable */
 147     int                 tablesOffsets[DIM];
 148 };
 149
 150 /*! \internal \brief
 151  * A GPU data structure for storing the PME data of the atoms, local to this process' domain partition.
 152  * This only has to be updated every DD step.
 153  */
 154 struct PmeGpuAtomParams
 155 {
 156     /*! \brief Number of local atoms */
 157     int    nAtoms;
 158     /*! \brief Pointer to the global GPU memory with input rvec atom coordinates.
 159      * The coordinates themselves change and need to be copied to the GPU for every PME computation,
 160      * but reallocation happens only at DD.
 161      */
 162     float *d_coordinates;
 163     /*! \brief Pointer to the global GPU memory with input atom charges.
 164      * The charges only need to be reallocated and copied to the GPU at DD step.
 165      */
 166     float  *d_coefficients;
 167     /*! \brief Pointer to the global GPU memory with input/output rvec atom forces.
 168      * The forces change and need to be copied from (and possibly to) the GPU for every PME computation,
 169      * but reallocation happens only at DD.
 170      */
 171     float  *d_forces;
 172     /*! \brief Pointer to the global GPU memory with ivec atom gridline indices.
 173      * Computed on GPU in the spline calculation part.
 174      */
 175     int *d_gridlineIndices;
 176
 177     /* B-spline parameters are computed entirely on GPU for every PME computation, not copied.
 178      * Unless we want to try something like GPU spread + CPU gather?
 179      */
 180     /*! \brief Pointer to the global GPU memory with B-spline values */
 181     float  *d_theta;
 182     /*! \brief Pointer to the global GPU memory with B-spline derivative values */
 183     float  *d_dtheta;
 184 };
 185
 186 /*! \internal \brief
 187  * A GPU data structure for storing the PME data which might change for each new PME computation.
 188  */
 189 struct PmeGpuDynamicParams
 190 {
 191     /* The box parameters. The box only changes size with pressure coupling enabled. */
 192     /*! \brief
 193      * Reciprocal (inverted unit cell) box.
 194      *
 195      * The box is transposed as compared to the CPU pme->recipbox.
 196      * Basically, spread uses matrix columns (while solve and gather use rows).
 197      * This storage format might be not the most optimal since the box is always triangular so there are zeroes.
 198      */
 199     float  recipBox[DIM][DIM];
 200     /*! \brief The unit cell volume for solving. */
 201     float  boxVolume;
 202 };
 203
 204 /*! \internal \brief
 205  * A single structure encompassing almost all the PME data used in GPU kernels on device.
 206  * This is inherited by the GPU framework-specific structure
 207  * (PmeGpuCudaKernelParams in pme.cuh).
 208  * This way, most code preparing the kernel parameters can be GPU-agnostic by casting
 209  * the kernel parameter data pointer to PmeGpuKernelParamsBase.
 210  */
 211 struct PmeGpuKernelParamsBase
 212 {
 213     /*! \brief Constant data that is set once. */
 214     PmeGpuConstParams   constants;
 215     /*! \brief Data dependent on the grid size/cutoff. */
 216     PmeGpuGridParams    grid;
 217     /*! \brief Data dependent on the DD and local atoms. */
 218     PmeGpuAtomParams    atoms;
 219     /*! \brief Data that possibly changes for every new PME computation.
 220      * This should be kept up-to-date by calling pme_gpu_prepare_computation(...)
 221      * before launching spreading.
 222      */
 223     PmeGpuDynamicParams current;
 224 };
 225
 226 /* Here are the host-side structures */
 227
 228 /*! \internal \brief
 229  * The PME GPU settings structure, included in the main PME GPU structure by value.
 230  */
 231 struct PmeGpuSettings
 232 {
 233     /* Permanent settings set on initialization */
 234     /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */
 235     bool performGPUSolve;
 236     /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */
 237     bool performGPUGather;
 238     /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */
 239     bool performGPUFFT;
 240     /*! \brief A convenience boolean which tells if PME decomposition is used. */
 241     bool useDecomposition;
 242     /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the host.
 243      * Only intended to be used by the test framework.
 244      */
 245     bool copyAllOutputs;
 246     /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */
 247     int  currentFlags;
 248 };
 249
 250 /*! \internal \brief
 251  * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value.
 252  * Buffers are managed by the PME GPU module.
 253  */
 254 struct PmeGpuStaging
 255 {
 256     /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
 257     float  *h_virialAndEnergy;
 258     /*! \brief B-spline values intermediate host-side buffer. */
 259     float  *h_splineModuli;
 260
 261     /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */
 262     float  *h_theta;
 263     /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */
 264     float  *h_dtheta;
 265     /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */
 266     int    *h_gridlineIndices;
 267 };
 268
 269 /*! \internal \brief
 270  * The PME GPU structure for all the data copied directly from the CPU PME structure.
 271  * The copying is done when the CPU PME structure is already (re-)initialized
 272  * (pme_gpu_reinit is called at the end of gmx_pme_init).
 273  * All the variables here are named almost the same way as in gmx_pme_t.
 274  * The types are different: pointers are replaced by vectors.
 275  * TODO: use the shared data with the PME CPU.
 276  * Included in the main PME GPU structure by value.
 277  */
 278 struct PmeShared
 279 {
 280     /*! \brief Grid count - currently always 1 on GPU */
 281     int ngrids;
 282     /*! \brief Grid dimensions - nkx, nky, nkz */
 283     int nk[DIM];
 284     /*! \brief Padded grid dimensions - pmegrid_nx, pmegrid_ny, pmegrid_nz
 285      * TODO: find out if these are really needed for the CPU FFT compatibility.
 286      */
 287     int                    pmegrid_n[DIM];
 288     /*! \brief PME interpolation order */
 289     int                    pme_order;
 290     /*! \brief Ewald splitting coefficient for Coulomb */
 291     real                   ewaldcoeff_q;
 292     /*! \brief Electrostatics parameter */
 293     real                   epsilon_r;
 294     /*! \brief Gridline indices - nnx, nny, nnz */
 295     std::vector<int>       nn;
 296     /*! \brief Fractional shifts - fshx, fshy, fshz */
 297     std::vector<real>      fsh;
 298     /*! \brief Precomputed B-spline values */
 299     std::vector<real>      bsp_mod[DIM];
 300     /*! \brief The PME codepath being taken */
 301     PmeRunMode             runMode;
 302     /*! \brief The box scaler based on inputrec - created in pme_init and managed by CPU structure */
 303     class EwaldBoxZScaler *boxScaler;
 304     /*! \brief The previous computation box to know if we even need to update the current box params.
 305      * \todo Manage this on higher level.
 306      * \todo Alternatively, when this structure is used by CPU PME code, make use of this field there as well.
 307      */
 308     matrix previousBox;
 309 };
 310
 311 /*! \internal \brief
 312  * The main PME GPU host structure, included in the PME CPU structure by pointer.
 313  */
 314 struct PmeGpu
 315 {
 316     /*! \brief The information copied once per reinit from the CPU structure. */
 317     std::shared_ptr<PmeShared> common; // TODO: make the CPU structure use the same type
 318
 319     /*! \brief The settings. */
 320     PmeGpuSettings settings;
 321
 322     /*! \brief The host-side buffers.
 323      * The device-side buffers are buried in kernelParams, but that will have to change.
 324      */
 325     PmeGpuStaging staging;
 326
 327     /*! \brief Number of local atoms, padded to be divisible by PME_ATOM_DATA_ALIGNMENT.
 328      * Used for kernel scheduling.
 329      * kernelParams.atoms.nAtoms is the actual atom count to be used for data copying.
 330      * TODO: this and the next member represent a memory allocation/padding properties -
 331      * what a container type should do ideally.
 332      */
 333     int nAtomsPadded;
 334     /*! \brief Number of local atoms, padded to be divisible by PME_ATOM_DATA_ALIGNMENT
 335      * if c_usePadding is true.
 336      * Used only as a basic size for almost all the atom data allocations
 337      * (spline parameter data is also aligned by PME_SPREADGATHER_PARTICLES_PER_WARP).
 338      * This should be the same as (c_usePadding ? nAtomsPadded : kernelParams.atoms.nAtoms).
 339      * kernelParams.atoms.nAtoms is the actual atom count to be used for most data copying.
 340      */
 341     int nAtomsAlloc;
 342
 343     /*! \brief A pointer to the device used during the execution. */
 344     gmx_device_info_t *deviceInfo;
 345
 346     /*! \brief A single structure encompassing all the PME data used on GPU.
 347      * Its value is the only argument to all the PME GPU kernels.
 348      * \todo Test whether this should be copied to the constant GPU memory once for each computation
 349      * (or even less often with no box updates) instead of being an argument.
 350      */
 351     std::shared_ptr<PmeGpuKernelParams> kernelParams;
 352
 353     /*! \brief The pointer to GPU-framework specific host-side data, such as CUDA streams and events. */
 354     std::shared_ptr<PmeGpuSpecific> archSpecific; /* FIXME: make it an unique_ptr */
 355 };
 356
 357 #endif