src/gromacs/ewald/pme_gpu_internal.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief This file contains internal function definitions for performing the PME calculations on GPU.
  39  * These are not meant to be exposed outside of the PME GPU code.
  40  * As of now, their bodies are still in the common pme_gpu.cpp files.
  41  *
  42  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  43  * \ingroup module_ewald
  44  */
  45
  46 #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
  47 #define GMX_EWALD_PME_GPU_INTERNAL_H
  48
  49 #include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
  50 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  51 #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
  52 #include "gromacs/utility/arrayref.h"
  53
  54 #include "pme_gpu_types_host.h"
  55 #include "pme_output.h"
  56
  57 class DeviceContext;
  58 struct DeviceInformation;
  59 class DeviceStream;
  60 class GpuEventSynchronizer;
  61 struct gmx_hw_info_t;
  62 struct gmx_gpu_opt_t;
  63 struct gmx_pme_t; // only used in pme_gpu_reinit
  64 struct gmx_wallcycle;
  65 class PmeAtomComm;
  66 enum class PmeForceOutputHandling;
  67 struct PmeGpu;
  68 class PmeGpuProgram;
  69 struct PmeGpuStaging;
  70 struct PmeGpuSettings;
  71 struct t_complex;
  72
  73 namespace gmx
  74 {
  75 class MDLogger;
  76 } // namespace gmx
  77
  78 //! Type of spline data
  79 enum class PmeSplineDataType
  80 {
  81     Values,      // theta
  82     Derivatives, // dtheta
  83 };               // TODO move this into new and shiny pme.h (pme-types.h?)
  84
  85 //! PME grid dimension ordering (from major to minor)
  86 enum class GridOrdering
  87 {
  88     YZX,
  89     XYZ
  90 };
  91
  92 /*! \libinternal \brief
  93  * Returns the size of the block size requirement
  94  *
  95  * The GPU version of PME requires that the coordinates array have a
  96  * size divisible by the returned number.
  97  *
  98  * \returns Number of atoms in a single GPU atom data chunk, which
  99  * determines a minimum divisior of the size of the memory allocated.
 100  */
 101 int pme_gpu_get_atom_data_block_size();
 102
 103 /*! \libinternal \brief
 104  * Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
 105  *
 106  * \param[in] pmeGpu            The PME GPU structure.
 107  */
 108 GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
 109
 110 /*! \libinternal \brief
 111  * Allocates the fixed size energy and virial buffer both on GPU and CPU.
 112  *
 113  * \param[in,out] pmeGpu            The PME GPU structure.
 114  */
 115 void pme_gpu_alloc_energy_virial(PmeGpu* pmeGpu);
 116
 117 /*! \libinternal \brief
 118  * Frees the energy and virial memory both on GPU and CPU.
 119  *
 120  * \param[in] pmeGpu            The PME GPU structure.
 121  */
 122 void pme_gpu_free_energy_virial(PmeGpu* pmeGpu);
 123
 124 /*! \libinternal \brief
 125  * Clears the energy and virial memory on GPU with 0.
 126  * Should be called at the end of PME computation which returned energy/virial.
 127  *
 128  * \param[in] pmeGpu            The PME GPU structure.
 129  */
 130 void pme_gpu_clear_energy_virial(const PmeGpu* pmeGpu);
 131
 132 /*! \libinternal \brief
 133  * Reallocates and copies the pre-computed B-spline values to the GPU.
 134  *
 135  * \param[in,out] pmeGpu             The PME GPU structure.
 136  */
 137 void pme_gpu_realloc_and_copy_bspline_values(PmeGpu* pmeGpu);
 138
 139 /*! \libinternal \brief
 140  * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
 141  *
 142  * \param[in] pmeGpu             The PME GPU structure.
 143  */
 144 void pme_gpu_free_bspline_values(const PmeGpu* pmeGpu);
 145
 146 /*! \libinternal \brief
 147  * Reallocates the GPU buffer for the PME forces.
 148  *
 149  * \param[in] pmeGpu             The PME GPU structure.
 150  */
 151 void pme_gpu_realloc_forces(PmeGpu* pmeGpu);
 152
 153 /*! \libinternal \brief
 154  * Frees the GPU buffer for the PME forces.
 155  *
 156  * \param[in] pmeGpu             The PME GPU structure.
 157  */
 158 void pme_gpu_free_forces(const PmeGpu* pmeGpu);
 159
 160 /*! \libinternal \brief
 161  * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered
 162  * forces). To be called e.g. after the bonded calculations.
 163  *
 164  * \param[in] pmeGpu             The PME GPU structure.
 165  */
 166 void pme_gpu_copy_input_forces(PmeGpu* pmeGpu);
 167
 168 /*! \libinternal \brief
 169  * Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
 170  *
 171  * \param[in] pmeGpu             The PME GPU structure.
 172  */
 173 void pme_gpu_copy_output_forces(PmeGpu* pmeGpu);
 174
 175 /*! \libinternal \brief
 176  * Checks whether work in the PME GPU stream has completed.
 177  *
 178  * \param[in] pmeGpu            The PME GPU structure.
 179  *
 180  * \returns                     True if work in the PME stream has completed.
 181  */
 182 bool pme_gpu_stream_query(const PmeGpu* pmeGpu);
 183
 184 /*! \libinternal \brief
 185  * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
 186  * Clears the padded part if needed.
 187  *
 188  * \param[in] pmeGpu            The PME GPU structure.
 189  * \param[in] h_coefficients    The input atom charges/coefficients.
 190  *
 191  * Does not need to be done for every PME computation, only whenever the local charges change.
 192  * (So, in the beginning of the run, or on DD step).
 193  */
 194 void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients);
 195
 196 /*! \libinternal \brief
 197  * Frees the charges/coefficients on the GPU.
 198  *
 199  * \param[in] pmeGpu             The PME GPU structure.
 200  */
 201 void pme_gpu_free_coefficients(const PmeGpu* pmeGpu);
 202
 203 /*! \libinternal \brief
 204  * Reallocates the buffers on the GPU and the host for the atoms spline data.
 205  *
 206  * \param[in,out] pmeGpu            The PME GPU structure.
 207  */
 208 void pme_gpu_realloc_spline_data(PmeGpu* pmeGpu);
 209
 210 /*! \libinternal \brief
 211  * Frees the buffers on the GPU for the atoms spline data.
 212  *
 213  * \param[in] pmeGpu            The PME GPU structure.
 214  */
 215 void pme_gpu_free_spline_data(const PmeGpu* pmeGpu);
 216
 217 /*! \libinternal \brief
 218  * Reallocates the buffers on the GPU and the host for the particle gridline indices.
 219  *
 220  * \param[in,out] pmeGpu            The PME GPU structure.
 221  */
 222 void pme_gpu_realloc_grid_indices(PmeGpu* pmeGpu);
 223
 224 /*! \libinternal \brief
 225  * Frees the buffer on the GPU for the particle gridline indices.
 226  *
 227  * \param[in] pmeGpu            The PME GPU structure.
 228  */
 229 void pme_gpu_free_grid_indices(const PmeGpu* pmeGpu);
 230
 231 /*! \libinternal \brief
 232  * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
 233  *
 234  * \param[in] pmeGpu            The PME GPU structure.
 235  */
 236 void pme_gpu_realloc_grids(PmeGpu* pmeGpu);
 237
 238 /*! \libinternal \brief
 239  * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
 240  *
 241  * \param[in] pmeGpu            The PME GPU structure.
 242  */
 243 void pme_gpu_free_grids(const PmeGpu* pmeGpu);
 244
 245 /*! \libinternal \brief
 246  * Clears the real space grid on the GPU.
 247  * Should be called at the end of each computation.
 248  *
 249  * \param[in] pmeGpu            The PME GPU structure.
 250  */
 251 void pme_gpu_clear_grids(const PmeGpu* pmeGpu);
 252
 253 /*! \libinternal \brief
 254  * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
 255  *
 256  * \param[in] pmeGpu            The PME GPU structure.
 257  */
 258 void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu* pmeGpu);
 259
 260 /*! \libinternal \brief
 261  * Frees the pre-computed fractional coordinates' shifts on the GPU.
 262  *
 263  * \param[in] pmeGpu            The PME GPU structure.
 264  */
 265 void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu);
 266
 267 /*! \libinternal \brief
 268  * Copies the input real-space grid from the host to the GPU.
 269  *
 270  * \param[in] pmeGpu   The PME GPU structure.
 271  * \param[in] h_grid   The host-side grid buffer.
 272  */
 273 void pme_gpu_copy_input_gather_grid(const PmeGpu* pmeGpu, float* h_grid);
 274
 275 /*! \libinternal \brief
 276  * Copies the output real-space grid from the GPU to the host.
 277  *
 278  * \param[in] pmeGpu   The PME GPU structure.
 279  * \param[out] h_grid  The host-side grid buffer.
 280  */
 281 void pme_gpu_copy_output_spread_grid(const PmeGpu* pmeGpu, float* h_grid);
 282
 283 /*! \libinternal \brief
 284  * Copies the spread output spline data and gridline indices from the GPU to the host.
 285  *
 286  * \param[in] pmeGpu   The PME GPU structure.
 287  */
 288 void pme_gpu_copy_output_spread_atom_data(const PmeGpu* pmeGpu);
 289
 290 /*! \libinternal \brief
 291  * Copies the gather input spline data and gridline indices from the host to the GPU.
 292  *
 293  * \param[in] pmeGpu   The PME GPU structure.
 294  */
 295 void pme_gpu_copy_input_gather_atom_data(const PmeGpu* pmeGpu);
 296
 297 /*! \libinternal \brief
 298  * Waits for the grid copying to the host-side buffer after spreading to finish.
 299  *
 300  * \param[in] pmeGpu  The PME GPU structure.
 301  */
 302 void pme_gpu_sync_spread_grid(const PmeGpu* pmeGpu);
 303
 304 /*! \libinternal \brief
 305  * Initializes the CUDA FFT structures.
 306  *
 307  * \param[in] pmeGpu  The PME GPU structure.
 308  */
 309 void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu);
 310
 311 /*! \libinternal \brief
 312  * Destroys the CUDA FFT structures.
 313  *
 314  * \param[in] pmeGpu  The PME GPU structure.
 315  */
 316 void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu);
 317
 318 /* The PME stages themselves */
 319
 320 /*! \libinternal \brief
 321  * A GPU spline computation and charge spreading function.
 322  *
 323  * \param[in]  pmeGpu          The PME GPU structure.
 324  * \param[in]  xReadyOnDevice  Event synchronizer indicating that the coordinates are ready in the device memory;
 325  *                             can be nullptr when invoked on a separate PME rank or from PME tests.
 326  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
 327  * \param[out] h_grid          The host-side grid buffer (used only if the result of the spread is expected on the host,
 328  *                             e.g. testing or host-side FFT)
 329  * \param[in]  computeSplines  Should the computation of spline parameters and gridline indices be performed.
 330  * \param[in]  spreadCharges   Should the charges/coefficients be spread on the grid.
 331  */
 332 GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu*         GPU_FUNC_ARGUMENT(pmeGpu),
 333                                        GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
 334                                        int                   GPU_FUNC_ARGUMENT(gridIndex),
 335                                        real*                 GPU_FUNC_ARGUMENT(h_grid),
 336                                        bool                  GPU_FUNC_ARGUMENT(computeSplines),
 337                                        bool GPU_FUNC_ARGUMENT(spreadCharges)) GPU_FUNC_TERM;
 338
 339 /*! \libinternal \brief
 340  * 3D FFT R2C/C2R routine.
 341  *
 342  * \param[in]  pmeGpu          The PME GPU structure.
 343  * \param[in]  direction       Transform direction (real-to-complex or complex-to-real)
 344  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
 345  */
 346 void pme_gpu_3dfft(const PmeGpu* pmeGpu, enum gmx_fft_direction direction, int gridIndex);
 347
 348 /*! \libinternal \brief
 349  * A GPU Fourier space solving function.
 350  *
 351  * \param[in]     pmeGpu                  The PME GPU structure.
 352  * \param[in,out] h_grid                  The host-side input and output Fourier grid buffer (used only with testing or host-side FFT)
 353  * \param[in]     gridOrdering            Specifies the dimenion ordering of the complex grid. TODO: store this information?
 354  * \param[in]     computeEnergyAndVirial  Tells if the energy and virial computation should be performed.
 355  */
 356 GPU_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 357                                       t_complex*    GPU_FUNC_ARGUMENT(h_grid),
 358                                       GridOrdering  GPU_FUNC_ARGUMENT(gridOrdering),
 359                                       bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial)) GPU_FUNC_TERM;
 360
 361 /*! \libinternal \brief
 362  * A GPU force gathering function.
 363  *
 364  * \param[in]     pmeGpu           The PME GPU structure.
 365  * reductions. \param[in]     h_grid           The host-side grid buffer (used only in testing mode)
 366  */
 367 GPU_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu*      GPU_FUNC_ARGUMENT(pmeGpu),
 368                                        const float* GPU_FUNC_ARGUMENT(h_grid)) GPU_FUNC_TERM;
 369
 370 /*! \brief Sets the device pointer to coordinate data
 371  * \param[in] pmeGpu         The PME GPU structure.
 372  * \param[in] d_x            Pointer to coordinate data
 373  */
 374 GPU_FUNC_QUALIFIER void pme_gpu_set_kernelparam_coordinates(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 375                                                             DeviceBuffer<gmx::RVec> GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
 376
 377 /*! \brief Return pointer to device copy of force data.
 378  * \param[in] pmeGpu         The PME GPU structure.
 379  * \returns                  Pointer to force data
 380  */
 381 GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
 382         GPU_FUNC_TERM_WITH_RETURN(nullptr);
 383
 384 /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
 385  * \param[in] pmeGpu         The PME GPU structure.
 386  * \returns                  Pointer to sync object
 387  */
 388 GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(
 389         const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
 390
 391 /*! \libinternal \brief
 392  * Returns the PME GPU settings
 393  *
 394  * \param[in] pmeGpu         The PME GPU structure.
 395  * \returns                  The settings for PME on GPU
 396  */
 397 inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu)
 398 {
 399     return pmeGpu->settings;
 400 }
 401
 402 /*! \libinternal \brief
 403  * Returns the PME GPU staging object
 404  *
 405  * \param[in] pmeGpu         The PME GPU structure.
 406  * \returns                  The staging object for PME on GPU
 407  */
 408 inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu)
 409 {
 410     return pmeGpu->staging;
 411 }
 412
 413 /*! \libinternal \brief
 414  * Sets whether the PME module is running in testing mode
 415  *
 416  * \param[in] pmeGpu         The PME GPU structure.
 417  * \param[in] testing        Whether testing mode is on.
 418  */
 419 inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
 420 {
 421     if (pmeGpu)
 422     {
 423         pmeGpu->settings.copyAllOutputs = testing;
 424         pmeGpu->settings.transferKind = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
 425     }
 426 }
 427
 428 /* A block of C++ functions that live in pme_gpu_internal.cpp */
 429
 430 /*! \libinternal \brief
 431  * Returns the energy and virial GPU outputs, useful for testing.
 432  *
 433  * It is the caller's responsibility to be aware of whether the GPU
 434  * handled the solve stage.
 435  *
 436  * \param[in] pme                The PME structure.
 437  * \param[out] output            Pointer to output where energy and virial should be stored.
 438  */
 439 GPU_FUNC_QUALIFIER void pme_gpu_getEnergyAndVirial(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
 440                                                    PmeOutput* GPU_FUNC_ARGUMENT(output)) GPU_FUNC_TERM;
 441
 442 /*! \libinternal \brief
 443  * Returns the GPU outputs (forces, energy and virial)
 444  *
 445  * \param[in] pme                     The PME structure.
 446  * \param[in] computeEnergyAndVirial  Whether the energy and virial are being computed
 447  * \returns                           The output object.
 448  */
 449 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_getOutput(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
 450                                                bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial))
 451         GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
 452
 453 /*! \libinternal \brief
 454  * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
 455  *
 456  * \param[in] pmeGpu         The PME GPU structure.
 457  * \param[in] box            The unit cell box.
 458  */
 459 GPU_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu*      GPU_FUNC_ARGUMENT(pmeGpu),
 460                                                  const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM;
 461
 462 /*! \libinternal \brief
 463  * Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
 464  * If forces were computed, they will have arrived at the external host buffer provided to gather.
 465  * If virial/energy were computed, they will have arrived into the internal staging buffer
 466  * (even though that should have already happened before even launching the gather).
 467  * Finally, cudaEvent_t based GPU timers get updated if enabled. They also need stream synchronization for correctness.
 468  * Additionally, device-side buffers are cleared asynchronously for the next computation.
 469  *
 470  * \param[in] pmeGpu         The PME GPU structure.
 471  */
 472 void pme_gpu_finish_computation(const PmeGpu* pmeGpu);
 473
 474 /*! \libinternal \brief
 475  * Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
 476  *
 477  * \param[in] pmeGpu             The PME GPU structure.
 478  * \param[out] gridSize          Pointer to the grid dimensions to fill in.
 479  * \param[out] paddedGridSize    Pointer to the padded grid dimensions to fill in.
 480  */
 481 GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 482                                                     gmx::IVec*    GPU_FUNC_ARGUMENT(gridSize),
 483                                                     gmx::IVec* GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM;
 484
 485 /*! \libinternal \brief
 486  * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
 487  *
 488  * \param[in,out] pme            The PME structure.
 489  * \param[in]     deviceContext  The GPU context.
 490  * \param[in]     deviceStream   The GPU stream.
 491  * \param[in,out] pmeGpuProgram  The handle to the program/kernel data created outside (e.g. in unit tests/runner)
 492  *
 493  * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
 494  */
 495 GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t*           GPU_FUNC_ARGUMENT(pme),
 496                                        const DeviceContext* GPU_FUNC_ARGUMENT(deviceContext),
 497                                        const DeviceStream*  GPU_FUNC_ARGUMENT(deviceStream),
 498                                        const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
 499
 500 /*! \libinternal \brief
 501  * Destroys the PME GPU data at the end of the run.
 502  *
 503  * \param[in] pmeGpu     The PME GPU structure.
 504  */
 505 GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
 506
 507 /*! \libinternal \brief
 508  * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
 509  *
 510  * \param[in] pmeGpu    The PME GPU structure.
 511  * \param[in] nAtoms    The number of particles.
 512  * \param[in] charges   The pointer to the host-side array of particle charges.
 513  *
 514  * This is a function that should only be called in the beginning of the run and on domain
 515  * decomposition. Should be called before the pme_gpu_set_io_ranges.
 516  */
 517 GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu*     GPU_FUNC_ARGUMENT(pmeGpu),
 518                                              int         GPU_FUNC_ARGUMENT(nAtoms),
 519                                              const real* GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM;
 520
 521 /*! \brief \libinternal
 522  * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
 523  *
 524  * This clears the device-side working buffers in preparation for new computation.
 525  *
 526  * \param[in] pmeGpu            The PME GPU structure.
 527  */
 528 void pme_gpu_reinit_computation(const PmeGpu* pmeGpu);
 529
 530 /*! \brief
 531  * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
 532  * (if they were to be computed).
 533  *
 534  * \param[in]  pme                     The PME data structure.
 535  * \param[in]  computeEnergyAndVirial  Tells if the energy and virial computation should be performed.
 536  * \param[out] wcycle                  The wallclock counter.
 537  * \return                             The output forces, energy and virial
 538  */
 539 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_wait_finish_task(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
 540                                                       bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial),
 541                                                       gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle))
 542         GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
 543
 544 #endif