src/gromacs/ewald/pme_gpu_internal.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief This file contains internal function definitions for performing the PME calculations on GPU.
  39  * These are not meant to be exposed outside of the PME GPU code.
  40  * As of now, their bodies are still in the common pme_gpu.cpp files.
  41  *
  42  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  43  * \ingroup module_ewald
  44  */
  45
  46 #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
  47 #define GMX_EWALD_PME_GPU_INTERNAL_H
  48
  49 #include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
  50 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  51 #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
  52 #include "gromacs/utility/arrayref.h"
  53
  54 #include "pme_gpu_types_host.h"
  55 #include "pme_output.h"
  56
  57 class GpuEventSynchronizer;
  58 struct DeviceInformation;
  59 struct gmx_hw_info_t;
  60 struct gmx_gpu_opt_t;
  61 struct gmx_pme_t; // only used in pme_gpu_reinit
  62 struct gmx_wallcycle;
  63 class PmeAtomComm;
  64 enum class PmeForceOutputHandling;
  65 struct PmeGpu;
  66 class PmeGpuProgram;
  67 struct PmeGpuStaging;
  68 struct PmeGpuSettings;
  69 struct t_complex;
  70
  71 namespace gmx
  72 {
  73 class MDLogger;
  74 }
  75
  76 //! Type of spline data
  77 enum class PmeSplineDataType
  78 {
  79     Values,      // theta
  80     Derivatives, // dtheta
  81 };               // TODO move this into new and shiny pme.h (pme-types.h?)
  82
  83 //! PME grid dimension ordering (from major to minor)
  84 enum class GridOrdering
  85 {
  86     YZX,
  87     XYZ
  88 };
  89
  90 /*! \libinternal \brief
  91  * Returns the size of the block size requirement
  92  *
  93  * The GPU version of PME requires that the coordinates array have a
  94  * size divisible by the returned number.
  95  *
  96  * \returns Number of atoms in a single GPU atom data chunk, which
  97  * determines a minimum divisior of the size of the memory allocated.
  98  */
  99 int pme_gpu_get_atom_data_block_size();
 100
 101 /*! \libinternal \brief
 102  * Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
 103  *
 104  * \param[in] pmeGpu            The PME GPU structure.
 105  */
 106 GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
 107
 108 /*! \libinternal \brief
 109  * Allocates the fixed size energy and virial buffer both on GPU and CPU.
 110  *
 111  * \param[in,out] pmeGpu            The PME GPU structure.
 112  */
 113 void pme_gpu_alloc_energy_virial(PmeGpu* pmeGpu);
 114
 115 /*! \libinternal \brief
 116  * Frees the energy and virial memory both on GPU and CPU.
 117  *
 118  * \param[in] pmeGpu            The PME GPU structure.
 119  */
 120 void pme_gpu_free_energy_virial(PmeGpu* pmeGpu);
 121
 122 /*! \libinternal \brief
 123  * Clears the energy and virial memory on GPU with 0.
 124  * Should be called at the end of PME computation which returned energy/virial.
 125  *
 126  * \param[in] pmeGpu            The PME GPU structure.
 127  */
 128 void pme_gpu_clear_energy_virial(const PmeGpu* pmeGpu);
 129
 130 /*! \libinternal \brief
 131  * Reallocates and copies the pre-computed B-spline values to the GPU.
 132  *
 133  * \param[in,out] pmeGpu             The PME GPU structure.
 134  */
 135 void pme_gpu_realloc_and_copy_bspline_values(PmeGpu* pmeGpu);
 136
 137 /*! \libinternal \brief
 138  * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
 139  *
 140  * \param[in] pmeGpu             The PME GPU structure.
 141  */
 142 void pme_gpu_free_bspline_values(const PmeGpu* pmeGpu);
 143
 144 /*! \libinternal \brief
 145  * Reallocates the GPU buffer for the PME forces.
 146  *
 147  * \param[in] pmeGpu             The PME GPU structure.
 148  */
 149 void pme_gpu_realloc_forces(PmeGpu* pmeGpu);
 150
 151 /*! \libinternal \brief
 152  * Frees the GPU buffer for the PME forces.
 153  *
 154  * \param[in] pmeGpu             The PME GPU structure.
 155  */
 156 void pme_gpu_free_forces(const PmeGpu* pmeGpu);
 157
 158 /*! \libinternal \brief
 159  * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered
 160  * forces). To be called e.g. after the bonded calculations.
 161  *
 162  * \param[in] pmeGpu             The PME GPU structure.
 163  */
 164 void pme_gpu_copy_input_forces(PmeGpu* pmeGpu);
 165
 166 /*! \libinternal \brief
 167  * Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
 168  *
 169  * \param[in] pmeGpu             The PME GPU structure.
 170  */
 171 void pme_gpu_copy_output_forces(PmeGpu* pmeGpu);
 172
 173 /*! \libinternal \brief
 174  * Checks whether work in the PME GPU stream has completed.
 175  *
 176  * \param[in] pmeGpu            The PME GPU structure.
 177  *
 178  * \returns                     True if work in the PME stream has completed.
 179  */
 180 bool pme_gpu_stream_query(const PmeGpu* pmeGpu);
 181
 182 /*! \libinternal \brief
 183  * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
 184  * Clears the padded part if needed.
 185  *
 186  * \param[in] pmeGpu            The PME GPU structure.
 187  * \param[in] h_coefficients    The input atom charges/coefficients.
 188  *
 189  * Does not need to be done for every PME computation, only whenever the local charges change.
 190  * (So, in the beginning of the run, or on DD step).
 191  */
 192 void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients);
 193
 194 /*! \libinternal \brief
 195  * Frees the charges/coefficients on the GPU.
 196  *
 197  * \param[in] pmeGpu             The PME GPU structure.
 198  */
 199 void pme_gpu_free_coefficients(const PmeGpu* pmeGpu);
 200
 201 /*! \libinternal \brief
 202  * Reallocates the buffers on the GPU and the host for the atoms spline data.
 203  *
 204  * \param[in,out] pmeGpu            The PME GPU structure.
 205  */
 206 void pme_gpu_realloc_spline_data(PmeGpu* pmeGpu);
 207
 208 /*! \libinternal \brief
 209  * Frees the buffers on the GPU for the atoms spline data.
 210  *
 211  * \param[in] pmeGpu            The PME GPU structure.
 212  */
 213 void pme_gpu_free_spline_data(const PmeGpu* pmeGpu);
 214
 215 /*! \libinternal \brief
 216  * Reallocates the buffers on the GPU and the host for the particle gridline indices.
 217  *
 218  * \param[in,out] pmeGpu            The PME GPU structure.
 219  */
 220 void pme_gpu_realloc_grid_indices(PmeGpu* pmeGpu);
 221
 222 /*! \libinternal \brief
 223  * Frees the buffer on the GPU for the particle gridline indices.
 224  *
 225  * \param[in] pmeGpu            The PME GPU structure.
 226  */
 227 void pme_gpu_free_grid_indices(const PmeGpu* pmeGpu);
 228
 229 /*! \libinternal \brief
 230  * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
 231  *
 232  * \param[in] pmeGpu            The PME GPU structure.
 233  */
 234 void pme_gpu_realloc_grids(PmeGpu* pmeGpu);
 235
 236 /*! \libinternal \brief
 237  * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
 238  *
 239  * \param[in] pmeGpu            The PME GPU structure.
 240  */
 241 void pme_gpu_free_grids(const PmeGpu* pmeGpu);
 242
 243 /*! \libinternal \brief
 244  * Clears the real space grid on the GPU.
 245  * Should be called at the end of each computation.
 246  *
 247  * \param[in] pmeGpu            The PME GPU structure.
 248  */
 249 void pme_gpu_clear_grids(const PmeGpu* pmeGpu);
 250
 251 /*! \libinternal \brief
 252  * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
 253  *
 254  * \param[in] pmeGpu            The PME GPU structure.
 255  */
 256 void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu* pmeGpu);
 257
 258 /*! \libinternal \brief
 259  * Frees the pre-computed fractional coordinates' shifts on the GPU.
 260  *
 261  * \param[in] pmeGpu            The PME GPU structure.
 262  */
 263 void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu);
 264
 265 /*! \libinternal \brief
 266  * Copies the input real-space grid from the host to the GPU.
 267  *
 268  * \param[in] pmeGpu   The PME GPU structure.
 269  * \param[in] h_grid   The host-side grid buffer.
 270  */
 271 void pme_gpu_copy_input_gather_grid(const PmeGpu* pmeGpu, float* h_grid);
 272
 273 /*! \libinternal \brief
 274  * Copies the output real-space grid from the GPU to the host.
 275  *
 276  * \param[in] pmeGpu   The PME GPU structure.
 277  * \param[out] h_grid  The host-side grid buffer.
 278  */
 279 void pme_gpu_copy_output_spread_grid(const PmeGpu* pmeGpu, float* h_grid);
 280
 281 /*! \libinternal \brief
 282  * Copies the spread output spline data and gridline indices from the GPU to the host.
 283  *
 284  * \param[in] pmeGpu   The PME GPU structure.
 285  */
 286 void pme_gpu_copy_output_spread_atom_data(const PmeGpu* pmeGpu);
 287
 288 /*! \libinternal \brief
 289  * Copies the gather input spline data and gridline indices from the host to the GPU.
 290  *
 291  * \param[in] pmeGpu   The PME GPU structure.
 292  */
 293 void pme_gpu_copy_input_gather_atom_data(const PmeGpu* pmeGpu);
 294
 295 /*! \libinternal \brief
 296  * Waits for the grid copying to the host-side buffer after spreading to finish.
 297  *
 298  * \param[in] pmeGpu  The PME GPU structure.
 299  */
 300 void pme_gpu_sync_spread_grid(const PmeGpu* pmeGpu);
 301
 302 /*! \libinternal \brief
 303  * Does the one-time GPU-framework specific PME initialization.
 304  * For CUDA, the PME stream is created with the highest priority.
 305  *
 306  * \param[in] pmeGpu  The PME GPU structure.
 307  */
 308 void pme_gpu_init_internal(PmeGpu* pmeGpu);
 309
 310 /*! \libinternal \brief
 311  * Initializes the CUDA FFT structures.
 312  *
 313  * \param[in] pmeGpu  The PME GPU structure.
 314  */
 315 void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu);
 316
 317 /*! \libinternal \brief
 318  * Destroys the CUDA FFT structures.
 319  *
 320  * \param[in] pmeGpu  The PME GPU structure.
 321  */
 322 void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu);
 323
 324 /* The PME stages themselves */
 325
 326 /*! \libinternal \brief
 327  * A GPU spline computation and charge spreading function.
 328  *
 329  * \param[in]  pmeGpu          The PME GPU structure.
 330  * \param[in]  xReadyOnDevice  Event synchronizer indicating that the coordinates are ready in the device memory;
 331  *                             can be nullptr when invoked on a separate PME rank or from PME tests.
 332  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
 333  * \param[out] h_grid          The host-side grid buffer (used only if the result of the spread is expected on the host,
 334  *                             e.g. testing or host-side FFT)
 335  * \param[in]  computeSplines  Should the computation of spline parameters and gridline indices be performed.
 336  * \param[in]  spreadCharges   Should the charges/coefficients be spread on the grid.
 337  */
 338 GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu*         GPU_FUNC_ARGUMENT(pmeGpu),
 339                                        GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
 340                                        int                   GPU_FUNC_ARGUMENT(gridIndex),
 341                                        real*                 GPU_FUNC_ARGUMENT(h_grid),
 342                                        bool                  GPU_FUNC_ARGUMENT(computeSplines),
 343                                        bool GPU_FUNC_ARGUMENT(spreadCharges)) GPU_FUNC_TERM;
 344
 345 /*! \libinternal \brief
 346  * 3D FFT R2C/C2R routine.
 347  *
 348  * \param[in]  pmeGpu          The PME GPU structure.
 349  * \param[in]  direction       Transform direction (real-to-complex or complex-to-real)
 350  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
 351  */
 352 void pme_gpu_3dfft(const PmeGpu* pmeGpu, enum gmx_fft_direction direction, int gridIndex);
 353
 354 /*! \libinternal \brief
 355  * A GPU Fourier space solving function.
 356  *
 357  * \param[in]     pmeGpu                  The PME GPU structure.
 358  * \param[in,out] h_grid                  The host-side input and output Fourier grid buffer (used only with testing or host-side FFT)
 359  * \param[in]     gridOrdering            Specifies the dimenion ordering of the complex grid. TODO: store this information?
 360  * \param[in]     computeEnergyAndVirial  Tells if the energy and virial computation should be performed.
 361  */
 362 GPU_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 363                                       t_complex*    GPU_FUNC_ARGUMENT(h_grid),
 364                                       GridOrdering  GPU_FUNC_ARGUMENT(gridOrdering),
 365                                       bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial)) GPU_FUNC_TERM;
 366
 367 /*! \libinternal \brief
 368  * A GPU force gathering function.
 369  *
 370  * \param[in]     pmeGpu           The PME GPU structure.
 371  * reductions. \param[in]     h_grid           The host-side grid buffer (used only in testing mode)
 372  */
 373 GPU_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu*      GPU_FUNC_ARGUMENT(pmeGpu),
 374                                        const float* GPU_FUNC_ARGUMENT(h_grid)) GPU_FUNC_TERM;
 375
 376 /*! \brief Sets the device pointer to coordinate data
 377  * \param[in] pmeGpu         The PME GPU structure.
 378  * \param[in] d_x            Pointer to coordinate data
 379  */
 380 GPU_FUNC_QUALIFIER void pme_gpu_set_kernelparam_coordinates(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 381                                                             DeviceBuffer<gmx::RVec> GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
 382
 383 /*! \brief Return pointer to device copy of force data.
 384  * \param[in] pmeGpu         The PME GPU structure.
 385  * \returns                  Pointer to force data
 386  */
 387 GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
 388         GPU_FUNC_TERM_WITH_RETURN(nullptr);
 389
 390 /*! \brief Return pointer to GPU stream.
 391  * \param[in] pmeGpu         The PME GPU structure.
 392  * \returns                  Pointer to stream object.
 393  */
 394 GPU_FUNC_QUALIFIER const DeviceStream* pme_gpu_get_stream(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
 395         GPU_FUNC_TERM_WITH_RETURN(nullptr);
 396
 397 /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
 398  * \param[in] pmeGpu         The PME GPU structure.
 399  * \returns                  Pointer to sync object
 400  */
 401 GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(
 402         const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
 403
 404 /*! \libinternal \brief
 405  * Returns the PME GPU settings
 406  *
 407  * \param[in] pmeGpu         The PME GPU structure.
 408  * \returns                  The settings for PME on GPU
 409  */
 410 inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu)
 411 {
 412     return pmeGpu->settings;
 413 }
 414
 415 /*! \libinternal \brief
 416  * Returns the PME GPU staging object
 417  *
 418  * \param[in] pmeGpu         The PME GPU structure.
 419  * \returns                  The staging object for PME on GPU
 420  */
 421 inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu)
 422 {
 423     return pmeGpu->staging;
 424 }
 425
 426 /*! \libinternal \brief
 427  * Sets whether the PME module is running in testing mode
 428  *
 429  * \param[in] pmeGpu         The PME GPU structure.
 430  * \param[in] testing        Whether testing mode is on.
 431  */
 432 inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
 433 {
 434     if (pmeGpu)
 435     {
 436         pmeGpu->settings.copyAllOutputs = testing;
 437         pmeGpu->settings.transferKind = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
 438     }
 439 }
 440
 441 /* A block of C++ functions that live in pme_gpu_internal.cpp */
 442
 443 /*! \libinternal \brief
 444  * Returns the energy and virial GPU outputs, useful for testing.
 445  *
 446  * It is the caller's responsibility to be aware of whether the GPU
 447  * handled the solve stage.
 448  *
 449  * \param[in] pme                The PME structure.
 450  * \param[out] output            Pointer to output where energy and virial should be stored.
 451  */
 452 GPU_FUNC_QUALIFIER void pme_gpu_getEnergyAndVirial(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
 453                                                    PmeOutput* GPU_FUNC_ARGUMENT(output)) GPU_FUNC_TERM;
 454
 455 /*! \libinternal \brief
 456  * Returns the GPU outputs (forces, energy and virial)
 457  *
 458  * \param[in] pme                     The PME structure.
 459  * \param[in] computeEnergyAndVirial  Whether the energy and virial are being computed
 460  * \returns                           The output object.
 461  */
 462 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_getOutput(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
 463                                                bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial))
 464         GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
 465
 466 /*! \libinternal \brief
 467  * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
 468  *
 469  * \param[in] pmeGpu         The PME GPU structure.
 470  * \param[in] box            The unit cell box.
 471  */
 472 GPU_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu*      GPU_FUNC_ARGUMENT(pmeGpu),
 473                                                  const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM;
 474
 475 /*! \libinternal \brief
 476  * Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
 477  * If forces were computed, they will have arrived at the external host buffer provided to gather.
 478  * If virial/energy were computed, they will have arrived into the internal staging buffer
 479  * (even though that should have already happened before even launching the gather).
 480  * Finally, cudaEvent_t based GPU timers get updated if enabled. They also need stream synchronization for correctness.
 481  * Additionally, device-side buffers are cleared asynchronously for the next computation.
 482  *
 483  * \param[in] pmeGpu         The PME GPU structure.
 484  */
 485 void pme_gpu_finish_computation(const PmeGpu* pmeGpu);
 486
 487 /*! \libinternal \brief
 488  * Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
 489  *
 490  * \param[in] pmeGpu             The PME GPU structure.
 491  * \param[out] gridSize          Pointer to the grid dimensions to fill in.
 492  * \param[out] paddedGridSize    Pointer to the padded grid dimensions to fill in.
 493  */
 494 GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
 495                                                     gmx::IVec*    GPU_FUNC_ARGUMENT(gridSize),
 496                                                     gmx::IVec* GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM;
 497
 498 /*! \libinternal \brief
 499  * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
 500  *
 501  * \param[in,out] pme             The PME structure.
 502  * \param[in]     deviceInfo      The GPU device information structure.
 503  * \param[in]     pmeGpuProgram   The PME GPU program data
 504  * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
 505  */
 506 GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t*               GPU_FUNC_ARGUMENT(pme),
 507                                        const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo),
 508                                        const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
 509
 510 /*! \libinternal \brief
 511  * Destroys the PME GPU data at the end of the run.
 512  *
 513  * \param[in] pmeGpu     The PME GPU structure.
 514  */
 515 GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
 516
 517 /*! \libinternal \brief
 518  * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
 519  *
 520  * \param[in] pmeGpu    The PME GPU structure.
 521  * \param[in] nAtoms    The number of particles.
 522  * \param[in] charges   The pointer to the host-side array of particle charges.
 523  *
 524  * This is a function that should only be called in the beginning of the run and on domain
 525  * decomposition. Should be called before the pme_gpu_set_io_ranges.
 526  */
 527 GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu*     GPU_FUNC_ARGUMENT(pmeGpu),
 528                                              int         GPU_FUNC_ARGUMENT(nAtoms),
 529                                              const real* GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM;
 530
 531 /*! \brief \libinternal
 532  * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
 533  *
 534  * This clears the device-side working buffers in preparation for new computation.
 535  *
 536  * \param[in] pmeGpu            The PME GPU structure.
 537  */
 538 void pme_gpu_reinit_computation(const PmeGpu* pmeGpu);
 539
 540 /*! \brief
 541  * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
 542  * (if they were to be computed).
 543  *
 544  * \param[in]  pme                     The PME data structure.
 545  * \param[in]  computeEnergyAndVirial  Tells if the energy and virial computation should be performed.
 546  * \param[out] wcycle                  The wallclock counter.
 547  * \return                             The output forces, energy and virial
 548  */
 549 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_wait_finish_task(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
 550                                                       bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial),
 551                                                       gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle))
 552         GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
 553
 554 #endif