src/gromacs/ewald/pme_gpu_internal.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief This file contains internal function definitions for performing the PME calculations on GPU.
  39  * These are not meant to be exposed outside of the PME GPU code.
  40  * As of now, their bodies are still in the common pme_gpu.cpp files.
  41  *
  42  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  43  * \ingroup module_ewald
  44  */
  45
  46 #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
  47 #define GMX_EWALD_PME_GPU_INTERNAL_H
  48
  49 #include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
  50 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  51 #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
  52
  53 #include "pme_gpu_types_host.h"
  54 #include "pme_output.h"
  55
  56 class DeviceContext;
  57 struct DeviceInformation;
  58 class DeviceStream;
  59 class GpuEventSynchronizer;
  60 struct gmx_hw_info_t;
  61 struct gmx_gpu_opt_t;
  62 struct gmx_pme_t; // only used in pme_gpu_reinit
  63 struct gmx_wallcycle;
  64 class PmeAtomComm;
  65 enum class PmeForceOutputHandling;
  66 struct PmeGpu;
  67 class PmeGpuProgram;
  68 struct PmeGpuStaging;
  69 struct PmeGpuSettings;
  70 struct t_complex;
  71
  72 namespace gmx
  73 {
  74 template<typename>
  75 class ArrayRef;
  76 class MDLogger;
  77 } // namespace gmx
  78
  79 //! Type of spline data
  80 enum class PmeSplineDataType
  81 {
  82     Values,      // theta
  83     Derivatives, // dtheta
  84 };               // TODO move this into new and shiny pme.h (pme-types.h?)
  85
  86 //! PME grid dimension ordering (from major to minor)
  87 enum class GridOrdering
  88 {
  89     YZX,
  90     XYZ
  91 };
  92
  93 /*! \libinternal \brief
  94  * Returns the size of the block size requirement
  95  *
  96  * The GPU version of PME requires that the coordinates array have a
  97  * size divisible by the returned number.
  98  *
  99  * \returns Number of atoms in a single GPU atom data chunk, which
 100  * determines a minimum divisior of the size of the memory allocated.
 101  */
 102 int pme_gpu_get_atom_data_block_size();
 103
 104 /*! \libinternal \brief
 105  * Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
 106  *
 107  * \param[in] pmeGpu            The PME GPU structure.
 108  */
 109 GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
 110
 111 /*! \libinternal \brief
 112  * Allocates the fixed size energy and virial buffer both on GPU and CPU.
 113  *
 114  * \param[in,out] pmeGpu            The PME GPU structure.
 115  */
 116 void pme_gpu_alloc_energy_virial(PmeGpu* pmeGpu);
 117
 118 /*! \libinternal \brief
 119  * Frees the energy and virial memory both on GPU and CPU.
 120  *
 121  * \param[in] pmeGpu            The PME GPU structure.
 122  */
 123 void pme_gpu_free_energy_virial(PmeGpu* pmeGpu);
 124
 125 /*! \libinternal \brief
 126  * Clears the energy and virial memory on GPU with 0.
 127  * Should be called at the end of PME computation which returned energy/virial.
 128  *
 129  * \param[in] pmeGpu            The PME GPU structure.
 130  */
 131 void pme_gpu_clear_energy_virial(const PmeGpu* pmeGpu);
 132
 133 /*! \libinternal \brief
 134  * Reallocates and copies the pre-computed B-spline values to the GPU.
 135  *
 136  * \param[in,out] pmeGpu             The PME GPU structure.
 137  */
 138 void pme_gpu_realloc_and_copy_bspline_values(PmeGpu* pmeGpu);
 139
 140 /*! \libinternal \brief
 141  * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
 142  *
 143  * \param[in] pmeGpu             The PME GPU structure.
 144  */
 145 void pme_gpu_free_bspline_values(const PmeGpu* pmeGpu);
 146
 147 /*! \libinternal \brief
 148  * Reallocates the GPU buffer for the PME forces.
 149  *
 150  * \param[in] pmeGpu             The PME GPU structure.
 151  */
 152 void pme_gpu_realloc_forces(PmeGpu* pmeGpu);
 153
 154 /*! \libinternal \brief
 155  * Frees the GPU buffer for the PME forces.
 156  *
 157  * \param[in] pmeGpu             The PME GPU structure.
 158  */
 159 void pme_gpu_free_forces(const PmeGpu* pmeGpu);
 160
 161 /*! \libinternal \brief
 162  * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered
 163  * forces). To be called e.g. after the bonded calculations.
 164  *
 165  * \param[in] pmeGpu             The PME GPU structure.
 166  */
 167 void pme_gpu_copy_input_forces(PmeGpu* pmeGpu);
 168
 169 /*! \libinternal \brief
 170  * Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
 171  *
 172  * \param[in] pmeGpu             The PME GPU structure.
 173  */
 174 void pme_gpu_copy_output_forces(PmeGpu* pmeGpu);
 175
 176 /*! \libinternal \brief
 177  * Checks whether work in the PME GPU stream has completed.
 178  *
 179  * \param[in] pmeGpu            The PME GPU structure.
 180  *
 181  * \returns                     True if work in the PME stream has completed.
 182  */
 183 bool pme_gpu_stream_query(const PmeGpu* pmeGpu);
 184
 185 /*! \libinternal \brief
 186  * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
 187  * Clears the padded part if needed.
 188  *
 189  * \param[in] pmeGpu            The PME GPU structure.
 190  * \param[in] h_coefficients    The input atom charges/coefficients.
 191  *
 192  * Does not need to be done for every PME computation, only whenever the local charges change.
 193  * (So, in the beginning of the run, or on DD step).
 194  */
 195 void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients);
 196
 197 /*! \libinternal \brief
 198  * Frees the charges/coefficients on the GPU.
 199  *
 200  * \param[in] pmeGpu             The PME GPU structure.
 201  */
 202 void pme_gpu_free_coefficients(const PmeGpu* pmeGpu);
 203
 204 /*! \libinternal \brief
 205  * Reallocates the buffers on the GPU and the host for the atoms spline data.
 206  *
 207  * \param[in,out] pmeGpu            The PME GPU structure.
 208  */
 209 void pme_gpu_realloc_spline_data(PmeGpu* pmeGpu);
 210
 211 /*! \libinternal \brief
 212  * Frees the buffers on the GPU for the atoms spline data.
 213  *
 214  * \param[in] pmeGpu            The PME GPU structure.
 215  */
 216 void pme_gpu_free_spline_data(const PmeGpu* pmeGpu);
 217
 218 /*! \libinternal \brief
 219  * Reallocates the buffers on the GPU and the host for the particle gridline indices.
 220  *
 221  * \param[in,out] pmeGpu            The PME GPU structure.
 222  */
 223 void pme_gpu_realloc_grid_indices(PmeGpu* pmeGpu);
 224
 225 /*! \libinternal \brief
 226  * Frees the buffer on the GPU for the particle gridline indices.
 227  *
 228  * \param[in] pmeGpu            The PME GPU structure.
 229  */
 230 void pme_gpu_free_grid_indices(const PmeGpu* pmeGpu);
 231
 232 /*! \libinternal \brief
 233  * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
 234  *
 235  * \param[in] pmeGpu            The PME GPU structure.
 236  */
 237 void pme_gpu_realloc_grids(PmeGpu* pmeGpu);
 238
 239 /*! \libinternal \brief
 240  * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
 241  *
 242  * \param[in] pmeGpu            The PME GPU structure.
 243  */
 244 void pme_gpu_free_grids(const PmeGpu* pmeGpu);
 245
 246 /*! \libinternal \brief
 247  * Clears the real space grid on the GPU.
 248  * Should be called at the end of each computation.
 249  *
 250  * \param[in] pmeGpu            The PME GPU structure.
 251  */
 252 void pme_gpu_clear_grids(const PmeGpu* pmeGpu);
 253
 254 /*! \libinternal \brief
 255  * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
 256  *
 257  * \param[in] pmeGpu            The PME GPU structure.
 258  */
 259 void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu* pmeGpu);
 260
 261 /*! \libinternal \brief
 262  * Frees the pre-computed fractional coordinates' shifts on the GPU.
 263  *
 264  * \param[in] pmeGpu            The PME GPU structure.
 265  */
 266 void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu);
 267
 268 /*! \libinternal \brief
 269  * Copies the input real-space grid from the host to the GPU.
 270  *
 271  * \param[in] pmeGpu   The PME GPU structure.
 272  * \param[in] h_grid   The host-side grid buffer.
 273  */
 274 void pme_gpu_copy_input_gather_grid(const PmeGpu* pmeGpu, float* h_grid);
 275
 276 /*! \libinternal \brief
 277  * Copies the output real-space grid from the GPU to the host.
 278  *
 279  * \param[in] pmeGpu   The PME GPU structure.
 280  * \param[out] h_grid  The host-side grid buffer.
 281  */
 282 void pme_gpu_copy_output_spread_grid(const PmeGpu* pmeGpu, float* h_grid);
 283
 284 /*! \libinternal \brief
 285  * Copies the spread output spline data and gridline indices from the GPU to the host.
 286  *
 287  * \param[in] pmeGpu   The PME GPU structure.
 288  */
 289 void pme_gpu_copy_output_spread_atom_data(const PmeGpu* pmeGpu);
 290
 291 /*! \libinternal \brief
 292  * Copies the gather input spline data and gridline indices from the host to the GPU.
 293  *
 294  * \param[in] pmeGpu   The PME GPU structure.
 295  */
 296 void pme_gpu_copy_input_gather_atom_data(const PmeGpu* pmeGpu);
 297
 298 /*! \libinternal \brief
 299  * Waits for the grid copying to the host-side buffer after spreading to finish.
 300  *
 301  * \param[in] pmeGpu  The PME GPU structure.
 302  */
 303 void pme_gpu_sync_spread_grid(const PmeGpu* pmeGpu);
 304
 305 /*! \libinternal \brief
 306  * Initializes the CUDA FFT structures.
 307  *
 308  * \param[in] pmeGpu  The PME GPU structure.
 309  */
 310 void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu);
 311
 312 /*! \libinternal \brief
 313  * Destroys the CUDA FFT structures.
 314  *
 315  * \param[in] pmeGpu  The PME GPU structure.
 316  */
 317 void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu);
 318
 319 /* The PME stages themselves */
 320
 321 /*! \libinternal \brief
 322  * A GPU spline computation and charge spreading function.
 323  *
 324  * \param[in]  pmeGpu          The PME GPU structure.
 325  * \param[in]  xReadyOnDevice  Event synchronizer indicating that the coordinates are ready in the device memory;
 326  *                             can be nullptr when invoked on a separate PME rank or from PME tests.
 327  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
 328  * \param[out] h_grid          The host-side grid buffer (used only if the result of the spread is expected on the host,
 329  *                             e.g. testing or host-side FFT)
 330  * \param[in]  computeSplines  Should the computation of spline parameters and gridline indices be performed.
 331  * \param[in]  spreadCharges   Should the charges/coefficients be spread on the grid.
 332  */
 333 GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu*         GPU_FUNC_ARGUMENT(pmeGpu),
 334                                        GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
 335                                        int                   GPU_FUNC_ARGUMENT(gridIndex),
 336                                        real*                 GPU_FUNC_ARGUMENT(h_grid),
 337                                        bool                  GPU_FUNC_ARGUMENT(computeSplines),
 338                                        bool GPU_FUNC_ARGUMENT(spreadCharges)) GPU_FUNC_TERM;
 339
 340 /*! \libinternal \brief
 341  * 3D FFT R2C/C2R routine.
 342  *
 343  * \param[in]  pmeGpu          The PME GPU structure.
 344  * \param[in]  direction       Transform direction (real-to-complex or complex-to-real)
 345  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
 346  */
 347 void pme_gpu_3dfft(const PmeGpu* pmeGpu, enum gmx_fft_direction direction, int gridIndex);
 348
 349 /*! \libinternal \brief
 350  * A GPU Fourier space solving function.
 351  *
 352  * \param[in]     pmeGpu                  The PME GPU structure.
 353  * \param[in,out] h_grid                  The host-side input and output Fourier grid buffer (used only with testing or host-side FFT)
 354  * \param[in]     gridOrdering            Specifies the dimenion ordering of the complex grid. TODO: store this information?
 355  * \param[in]     computeEnergyAndVirial  Tells if the energy and virial computation should be performed.
 356  */
 357 GPU_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 358                                       t_complex*    GPU_FUNC_ARGUMENT(h_grid),
 359                                       GridOrdering  GPU_FUNC_ARGUMENT(gridOrdering),
 360                                       bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial)) GPU_FUNC_TERM;
 361
 362 /*! \libinternal \brief
 363  * A GPU force gathering function.
 364  *
 365  * \param[in]     pmeGpu           The PME GPU structure.
 366  * reductions. \param[in]     h_grid           The host-side grid buffer (used only in testing mode)
 367  */
 368 GPU_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu*      GPU_FUNC_ARGUMENT(pmeGpu),
 369                                        const float* GPU_FUNC_ARGUMENT(h_grid)) GPU_FUNC_TERM;
 370
 371 /*! \brief Sets the device pointer to coordinate data
 372  * \param[in] pmeGpu         The PME GPU structure.
 373  * \param[in] d_x            Pointer to coordinate data
 374  */
 375 GPU_FUNC_QUALIFIER void pme_gpu_set_kernelparam_coordinates(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 376                                                             DeviceBuffer<gmx::RVec> GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
 377
 378 /*! \brief Return pointer to device copy of force data.
 379  * \param[in] pmeGpu         The PME GPU structure.
 380  * \returns                  Pointer to force data
 381  */
 382 GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
 383         GPU_FUNC_TERM_WITH_RETURN(nullptr);
 384
 385 /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
 386  * \param[in] pmeGpu         The PME GPU structure.
 387  * \returns                  Pointer to sync object
 388  */
 389 GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(
 390         const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
 391
 392 /*! \libinternal \brief
 393  * Returns the PME GPU settings
 394  *
 395  * \param[in] pmeGpu         The PME GPU structure.
 396  * \returns                  The settings for PME on GPU
 397  */
 398 inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu)
 399 {
 400     return pmeGpu->settings;
 401 }
 402
 403 /*! \libinternal \brief
 404  * Returns the PME GPU staging object
 405  *
 406  * \param[in] pmeGpu         The PME GPU structure.
 407  * \returns                  The staging object for PME on GPU
 408  */
 409 inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu)
 410 {
 411     return pmeGpu->staging;
 412 }
 413
 414 /*! \libinternal \brief
 415  * Sets whether the PME module is running in testing mode
 416  *
 417  * \param[in] pmeGpu         The PME GPU structure.
 418  * \param[in] testing        Whether testing mode is on.
 419  */
 420 inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
 421 {
 422     if (pmeGpu)
 423     {
 424         pmeGpu->settings.copyAllOutputs = testing;
 425         pmeGpu->settings.transferKind = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
 426     }
 427 }
 428
 429 /* A block of C++ functions that live in pme_gpu_internal.cpp */
 430
 431 /*! \libinternal \brief
 432  * Returns the energy and virial GPU outputs, useful for testing.
 433  *
 434  * It is the caller's responsibility to be aware of whether the GPU
 435  * handled the solve stage.
 436  *
 437  * \param[in] pme                The PME structure.
 438  * \param[out] output            Pointer to output where energy and virial should be stored.
 439  */
 440 GPU_FUNC_QUALIFIER void pme_gpu_getEnergyAndVirial(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
 441                                                    PmeOutput* GPU_FUNC_ARGUMENT(output)) GPU_FUNC_TERM;
 442
 443 /*! \libinternal \brief
 444  * Returns the GPU outputs (forces, energy and virial)
 445  *
 446  * \param[in] pme                     The PME structure.
 447  * \param[in] computeEnergyAndVirial  Whether the energy and virial are being computed
 448  * \returns                           The output object.
 449  */
 450 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_getOutput(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
 451                                                bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial))
 452         GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
 453
 454 /*! \libinternal \brief
 455  * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
 456  *
 457  * \param[in] pmeGpu         The PME GPU structure.
 458  * \param[in] box            The unit cell box.
 459  */
 460 GPU_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu*      GPU_FUNC_ARGUMENT(pmeGpu),
 461                                                  const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM;
 462
 463 /*! \libinternal \brief
 464  * Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
 465  * If forces were computed, they will have arrived at the external host buffer provided to gather.
 466  * If virial/energy were computed, they will have arrived into the internal staging buffer
 467  * (even though that should have already happened before even launching the gather).
 468  * Finally, cudaEvent_t based GPU timers get updated if enabled. They also need stream synchronization for correctness.
 469  * Additionally, device-side buffers are cleared asynchronously for the next computation.
 470  *
 471  * \param[in] pmeGpu         The PME GPU structure.
 472  */
 473 void pme_gpu_finish_computation(const PmeGpu* pmeGpu);
 474
 475 /*! \libinternal \brief
 476  * Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
 477  *
 478  * \param[in] pmeGpu             The PME GPU structure.
 479  * \param[out] gridSize          Pointer to the grid dimensions to fill in.
 480  * \param[out] paddedGridSize    Pointer to the padded grid dimensions to fill in.
 481  */
 482 GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 483                                                     gmx::IVec*    GPU_FUNC_ARGUMENT(gridSize),
 484                                                     gmx::IVec* GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM;
 485
 486 /*! \libinternal \brief
 487  * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
 488  *
 489  * \param[in,out] pme            The PME structure.
 490  * \param[in]     deviceContext  The GPU context.
 491  * \param[in]     deviceStream   The GPU stream.
 492  * \param[in,out] pmeGpuProgram  The handle to the program/kernel data created outside (e.g. in unit tests/runner)
 493  *
 494  * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
 495  */
 496 GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t*           GPU_FUNC_ARGUMENT(pme),
 497                                        const DeviceContext* GPU_FUNC_ARGUMENT(deviceContext),
 498                                        const DeviceStream*  GPU_FUNC_ARGUMENT(deviceStream),
 499                                        const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
 500
 501 /*! \libinternal \brief
 502  * Destroys the PME GPU data at the end of the run.
 503  *
 504  * \param[in] pmeGpu     The PME GPU structure.
 505  */
 506 GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
 507
 508 /*! \libinternal \brief
 509  * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
 510  *
 511  * \param[in] pmeGpu    The PME GPU structure.
 512  * \param[in] nAtoms    The number of particles.
 513  * \param[in] charges   The pointer to the host-side array of particle charges.
 514  *
 515  * This is a function that should only be called in the beginning of the run and on domain
 516  * decomposition. Should be called before the pme_gpu_set_io_ranges.
 517  */
 518 GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu*     GPU_FUNC_ARGUMENT(pmeGpu),
 519                                              int         GPU_FUNC_ARGUMENT(nAtoms),
 520                                              const real* GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM;
 521
 522 /*! \brief \libinternal
 523  * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
 524  *
 525  * This clears the device-side working buffers in preparation for new computation.
 526  *
 527  * \param[in] pmeGpu            The PME GPU structure.
 528  */
 529 void pme_gpu_reinit_computation(const PmeGpu* pmeGpu);
 530
 531 /*! \brief
 532  * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
 533  * (if they were to be computed).
 534  *
 535  * \param[in]  pme                     The PME data structure.
 536  * \param[in]  computeEnergyAndVirial  Tells if the energy and virial computation should be performed.
 537  * \param[out] wcycle                  The wallclock counter.
 538  * \return                             The output forces, energy and virial
 539  */
 540 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_wait_finish_task(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
 541                                                       bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial),
 542                                                       gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle))
 543         GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
 544
 545 #endif