2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file contains internal function definitions for performing the PME calculations on GPU.
39 * These are not meant to be exposed outside of the PME GPU code.
40 * As of now, their bodies are still in the common pme_gpu.cpp files.
42 * \author Aleksei Iupinov <a.yupinov@gmail.com>
43 * \ingroup module_ewald
46 #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
47 #define GMX_EWALD_PME_GPU_INTERNAL_H
49 #include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
50 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
51 #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
52 #include "gromacs/utility/arrayref.h"
54 #include "pme_gpu_types_host.h"
55 #include "pme_output.h"
57 class GpuEventSynchronizer;
58 struct DeviceInformation;
61 struct gmx_pme_t; // only used in pme_gpu_reinit
64 enum class PmeForceOutputHandling;
68 struct PmeGpuSettings;
76 //! Type of spline data
77 enum class PmeSplineDataType
80 Derivatives, // dtheta
81 }; // TODO move this into new and shiny pme.h (pme-types.h?)
83 //! PME grid dimension ordering (from major to minor)
84 enum class GridOrdering
90 /*! \libinternal \brief
91 * Returns the number of atoms per chunk in the atom charges/coordinates data layout.
92 * Depends on CUDA-specific block sizes, needed for the atom data padding.
94 * \param[in] pmeGpu The PME GPU structure.
95 * \returns Number of atoms in a single GPU atom data chunk.
97 int pme_gpu_get_atom_data_alignment(const PmeGpu* pmeGpu);
99 /*! \libinternal \brief
100 * Returns the number of atoms per chunk in the atom spline theta/dtheta data layout.
102 * \param[in] pmeGpu The PME GPU structure.
103 * \returns Number of atoms in a single GPU atom spline data chunk.
105 int pme_gpu_get_atoms_per_warp(const PmeGpu* pmeGpu);
107 /*! \libinternal \brief
108 * Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
110 * \param[in] pmeGpu The PME GPU structure.
112 GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
114 /*! \libinternal \brief
115 * Allocates the fixed size energy and virial buffer both on GPU and CPU.
117 * \param[in,out] pmeGpu The PME GPU structure.
119 void pme_gpu_alloc_energy_virial(PmeGpu* pmeGpu);
121 /*! \libinternal \brief
122 * Frees the energy and virial memory both on GPU and CPU.
124 * \param[in] pmeGpu The PME GPU structure.
126 void pme_gpu_free_energy_virial(PmeGpu* pmeGpu);
128 /*! \libinternal \brief
129 * Clears the energy and virial memory on GPU with 0.
130 * Should be called at the end of PME computation which returned energy/virial.
132 * \param[in] pmeGpu The PME GPU structure.
134 void pme_gpu_clear_energy_virial(const PmeGpu* pmeGpu);
136 /*! \libinternal \brief
137 * Reallocates and copies the pre-computed B-spline values to the GPU.
139 * \param[in,out] pmeGpu The PME GPU structure.
141 void pme_gpu_realloc_and_copy_bspline_values(PmeGpu* pmeGpu);
143 /*! \libinternal \brief
144 * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
146 * \param[in] pmeGpu The PME GPU structure.
148 void pme_gpu_free_bspline_values(const PmeGpu* pmeGpu);
150 /*! \libinternal \brief
151 * Reallocates the GPU buffer for the PME forces.
153 * \param[in] pmeGpu The PME GPU structure.
155 void pme_gpu_realloc_forces(PmeGpu* pmeGpu);
157 /*! \libinternal \brief
158 * Frees the GPU buffer for the PME forces.
160 * \param[in] pmeGpu The PME GPU structure.
162 void pme_gpu_free_forces(const PmeGpu* pmeGpu);
164 /*! \libinternal \brief
165 * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered
166 * forces). To be called e.g. after the bonded calculations.
168 * \param[in] pmeGpu The PME GPU structure.
170 void pme_gpu_copy_input_forces(PmeGpu* pmeGpu);
172 /*! \libinternal \brief
173 * Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
175 * \param[in] pmeGpu The PME GPU structure.
177 void pme_gpu_copy_output_forces(PmeGpu* pmeGpu);
179 /*! \libinternal \brief
180 * Checks whether work in the PME GPU stream has completed.
182 * \param[in] pmeGpu The PME GPU structure.
184 * \returns True if work in the PME stream has completed.
186 bool pme_gpu_stream_query(const PmeGpu* pmeGpu);
188 /*! \libinternal \brief
189 * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
190 * Clears the padded part if needed.
192 * \param[in] pmeGpu The PME GPU structure.
193 * \param[in] h_coefficients The input atom charges/coefficients.
195 * Does not need to be done for every PME computation, only whenever the local charges change.
196 * (So, in the beginning of the run, or on DD step).
198 void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients);
200 /*! \libinternal \brief
201 * Frees the charges/coefficients on the GPU.
203 * \param[in] pmeGpu The PME GPU structure.
205 void pme_gpu_free_coefficients(const PmeGpu* pmeGpu);
207 /*! \libinternal \brief
208 * Reallocates the buffers on the GPU and the host for the atoms spline data.
210 * \param[in,out] pmeGpu The PME GPU structure.
212 void pme_gpu_realloc_spline_data(PmeGpu* pmeGpu);
214 /*! \libinternal \brief
215 * Frees the buffers on the GPU for the atoms spline data.
217 * \param[in] pmeGpu The PME GPU structure.
219 void pme_gpu_free_spline_data(const PmeGpu* pmeGpu);
221 /*! \libinternal \brief
222 * Reallocates the buffers on the GPU and the host for the particle gridline indices.
224 * \param[in,out] pmeGpu The PME GPU structure.
226 void pme_gpu_realloc_grid_indices(PmeGpu* pmeGpu);
228 /*! \libinternal \brief
229 * Frees the buffer on the GPU for the particle gridline indices.
231 * \param[in] pmeGpu The PME GPU structure.
233 void pme_gpu_free_grid_indices(const PmeGpu* pmeGpu);
235 /*! \libinternal \brief
236 * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
238 * \param[in] pmeGpu The PME GPU structure.
240 void pme_gpu_realloc_grids(PmeGpu* pmeGpu);
242 /*! \libinternal \brief
243 * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
245 * \param[in] pmeGpu The PME GPU structure.
247 void pme_gpu_free_grids(const PmeGpu* pmeGpu);
249 /*! \libinternal \brief
250 * Clears the real space grid on the GPU.
251 * Should be called at the end of each computation.
253 * \param[in] pmeGpu The PME GPU structure.
255 void pme_gpu_clear_grids(const PmeGpu* pmeGpu);
257 /*! \libinternal \brief
258 * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
260 * \param[in] pmeGpu The PME GPU structure.
262 void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu* pmeGpu);
264 /*! \libinternal \brief
265 * Frees the pre-computed fractional coordinates' shifts on the GPU.
267 * \param[in] pmeGpu The PME GPU structure.
269 void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu);
271 /*! \libinternal \brief
272 * Copies the input real-space grid from the host to the GPU.
274 * \param[in] pmeGpu The PME GPU structure.
275 * \param[in] h_grid The host-side grid buffer.
277 void pme_gpu_copy_input_gather_grid(const PmeGpu* pmeGpu, float* h_grid);
279 /*! \libinternal \brief
280 * Copies the output real-space grid from the GPU to the host.
282 * \param[in] pmeGpu The PME GPU structure.
283 * \param[out] h_grid The host-side grid buffer.
285 void pme_gpu_copy_output_spread_grid(const PmeGpu* pmeGpu, float* h_grid);
287 /*! \libinternal \brief
288 * Copies the spread output spline data and gridline indices from the GPU to the host.
290 * \param[in] pmeGpu The PME GPU structure.
292 void pme_gpu_copy_output_spread_atom_data(const PmeGpu* pmeGpu);
294 /*! \libinternal \brief
295 * Copies the gather input spline data and gridline indices from the host to the GPU.
297 * \param[in] pmeGpu The PME GPU structure.
299 void pme_gpu_copy_input_gather_atom_data(const PmeGpu* pmeGpu);
301 /*! \libinternal \brief
302 * Waits for the grid copying to the host-side buffer after spreading to finish.
304 * \param[in] pmeGpu The PME GPU structure.
306 void pme_gpu_sync_spread_grid(const PmeGpu* pmeGpu);
308 /*! \libinternal \brief
309 * Does the one-time GPU-framework specific PME initialization.
310 * For CUDA, the PME stream is created with the highest priority.
312 * \param[in] pmeGpu The PME GPU structure.
314 void pme_gpu_init_internal(PmeGpu* pmeGpu);
316 /*! \libinternal \brief
317 * Destroys the PME GPU-framework specific data.
318 * Should be called last in the PME GPU destructor.
320 * \param[in] pmeGpu The PME GPU structure.
322 void pme_gpu_destroy_specific(const PmeGpu* pmeGpu);
324 /*! \libinternal \brief
325 * Initializes the CUDA FFT structures.
327 * \param[in] pmeGpu The PME GPU structure.
329 void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu);
331 /*! \libinternal \brief
332 * Destroys the CUDA FFT structures.
334 * \param[in] pmeGpu The PME GPU structure.
336 void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu);
338 /* The PME stages themselves */
340 /*! \libinternal \brief
341 * A GPU spline computation and charge spreading function.
343 * \param[in] pmeGpu The PME GPU structure.
344 * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory;
345 * can be nullptr when invoked on a separate PME rank or from PME tests.
346 * \param[in] gridIndex Index of the PME grid - unused, assumed to be 0.
347 * \param[out] h_grid The host-side grid buffer (used only if the result of the spread is expected on the host,
348 * e.g. testing or host-side FFT)
349 * \param[in] computeSplines Should the computation of spline parameters and gridline indices be performed.
350 * \param[in] spreadCharges Should the charges/coefficients be spread on the grid.
352 GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
353 GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
354 int GPU_FUNC_ARGUMENT(gridIndex),
355 real* GPU_FUNC_ARGUMENT(h_grid),
356 bool GPU_FUNC_ARGUMENT(computeSplines),
357 bool GPU_FUNC_ARGUMENT(spreadCharges)) GPU_FUNC_TERM;
359 /*! \libinternal \brief
360 * 3D FFT R2C/C2R routine.
362 * \param[in] pmeGpu The PME GPU structure.
363 * \param[in] direction Transform direction (real-to-complex or complex-to-real)
364 * \param[in] gridIndex Index of the PME grid - unused, assumed to be 0.
366 void pme_gpu_3dfft(const PmeGpu* pmeGpu, enum gmx_fft_direction direction, int gridIndex);
368 /*! \libinternal \brief
369 * A GPU Fourier space solving function.
371 * \param[in] pmeGpu The PME GPU structure.
372 * \param[in,out] h_grid The host-side input and output Fourier grid buffer (used only with testing or host-side FFT)
373 * \param[in] gridOrdering Specifies the dimenion ordering of the complex grid. TODO: store this information?
374 * \param[in] computeEnergyAndVirial Tells if the energy and virial computation should also be performed.
376 GPU_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
377 t_complex* GPU_FUNC_ARGUMENT(h_grid),
378 GridOrdering GPU_FUNC_ARGUMENT(gridOrdering),
379 bool GPU_FUNC_ARGUMENT(computeEnergyAndVirial)) GPU_FUNC_TERM;
381 /*! \libinternal \brief
382 * A GPU force gathering function.
384 * \param[in] pmeGpu The PME GPU structure.
385 * reductions. \param[in] h_grid The host-side grid buffer (used only in testing mode)
387 GPU_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
388 const float* GPU_FUNC_ARGUMENT(h_grid)) GPU_FUNC_TERM;
390 /*! \brief Sets the device pointer to coordinate data
391 * \param[in] pmeGpu The PME GPU structure.
392 * \param[in] d_x Pointer to coordinate data
394 GPU_FUNC_QUALIFIER void pme_gpu_set_kernelparam_coordinates(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
395 DeviceBuffer<gmx::RVec> GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
397 /*! \brief Return pointer to device copy of force data.
398 * \param[in] pmeGpu The PME GPU structure.
399 * \returns Pointer to force data
401 GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
402 GPU_FUNC_TERM_WITH_RETURN(nullptr);
404 /*! \brief Return pointer to GPU stream.
405 * \param[in] pmeGpu The PME GPU structure.
406 * \returns Pointer to stream object.
408 GPU_FUNC_QUALIFIER void* pme_gpu_get_stream(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
409 GPU_FUNC_TERM_WITH_RETURN(nullptr);
411 /*! \brief Return pointer to GPU context (for OpenCL builds).
412 * \param[in] pmeGpu The PME GPU structure.
413 * \returns Pointer to context object.
415 GPU_FUNC_QUALIFIER void* pme_gpu_get_context(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
416 GPU_FUNC_TERM_WITH_RETURN(nullptr);
418 /*! \brief Return pointer to the sync object triggered after the PME force calculation completion
419 * \param[in] pmeGpu The PME GPU structure.
420 * \returns Pointer to sync object
422 GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(
423 const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
425 /*! \libinternal \brief
426 * Returns the PME GPU settings
428 * \param[in] pmeGpu The PME GPU structure.
429 * \returns The settings for PME on GPU
431 inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu)
433 return pmeGpu->settings;
436 /*! \libinternal \brief
437 * Returns the PME GPU staging object
439 * \param[in] pmeGpu The PME GPU structure.
440 * \returns The staging object for PME on GPU
442 inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu)
444 return pmeGpu->staging;
447 /*! \libinternal \brief
448 * Sets whether the PME module is running in testing mode
450 * \param[in] pmeGpu The PME GPU structure.
451 * \param[in] testing Whether testing mode is on.
453 inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
457 pmeGpu->settings.copyAllOutputs = testing;
458 pmeGpu->settings.transferKind = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
462 /* A block of C++ functions that live in pme_gpu_internal.cpp */
464 /*! \libinternal \brief
465 * Returns the energy and virial GPU outputs, useful for testing.
467 * It is the caller's responsibility to be aware of whether the GPU
468 * handled the solve stage.
470 * \param[in] pme The PME structure.
471 * \param[out] output Pointer to output where energy and virial should be stored.
473 GPU_FUNC_QUALIFIER void pme_gpu_getEnergyAndVirial(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
474 PmeOutput* GPU_FUNC_ARGUMENT(output)) GPU_FUNC_TERM;
476 /*! \libinternal \brief
477 * Returns the GPU outputs (forces, energy and virial)
479 * \param[in] pme The PME structure.
480 * \param[in] flags The combination of flags that affected this PME computation.
481 * The flags are the GMX_PME_ flags from pme.h.
482 * \returns The output object.
484 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_getOutput(const gmx_pme_t& GPU_FUNC_ARGUMENT(pme),
485 int GPU_FUNC_ARGUMENT(flags))
486 GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});
488 /*! \libinternal \brief
489 * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
491 * \param[in] pmeGpu The PME GPU structure.
492 * \param[in] box The unit cell box.
494 GPU_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
495 const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM;
497 /*! \libinternal \brief
498 * Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
499 * If forces were computed, they will have arrived at the external host buffer provided to gather.
500 * If virial/energy were computed, they will have arrived into the internal staging buffer
501 * (even though that should have already happened before even launching the gather).
502 * Finally, cudaEvent_t based GPU timers get updated if enabled. They also need stream synchronization for correctness.
503 * Additionally, device-side buffers are cleared asynchronously for the next computation.
505 * \param[in] pmeGpu The PME GPU structure.
507 void pme_gpu_finish_computation(const PmeGpu* pmeGpu);
509 //! A binary enum for spline data layout transformation
510 enum class PmeLayoutTransform
516 /*! \libinternal \brief
517 * Rearranges the atom spline data between the GPU and host layouts.
518 * Only used for test purposes so far, likely to be horribly slow.
520 * \param[in] pmeGpu The PME GPU structure.
521 * \param[out] atc The PME CPU atom data structure (with a single-threaded layout).
522 * \param[in] type The spline data type (values or derivatives).
523 * \param[in] dimIndex Dimension index.
524 * \param[in] transform Layout transform type
526 GPU_FUNC_QUALIFIER void pme_gpu_transform_spline_atom_data(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
527 const PmeAtomComm* GPU_FUNC_ARGUMENT(atc),
528 PmeSplineDataType GPU_FUNC_ARGUMENT(type),
529 int GPU_FUNC_ARGUMENT(dimIndex),
530 PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM;
532 /*! \libinternal \brief
533 * Gets a unique index to an element in a spline parameter buffer (theta/dtheta),
534 * which is laid out for GPU spread/gather kernels. The index is wrt the execution block,
535 * in range(0, atomsPerBlock * order * DIM).
536 * This is a wrapper, only used in unit tests.
537 * \param[in] order PME order
538 * \param[in] splineIndex Spline contribution index (from 0 to \p order - 1)
539 * \param[in] dimIndex Dimension index (from 0 to 2)
540 * \param[in] atomIndex Atom index wrt the block.
541 * \param[in] atomsPerWarp Number of atoms processed by a warp.
543 * \returns Index into theta or dtheta array using GPU layout.
545 int getSplineParamFullIndex(int order, int splineIndex, int dimIndex, int atomIndex, int atomsPerWarp);
547 /*! \libinternal \brief
548 * Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
550 * \param[in] pmeGpu The PME GPU structure.
551 * \param[out] gridSize Pointer to the grid dimensions to fill in.
552 * \param[out] paddedGridSize Pointer to the padded grid dimensions to fill in.
554 GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
555 gmx::IVec* GPU_FUNC_ARGUMENT(gridSize),
556 gmx::IVec* GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM;
558 /*! \libinternal \brief
559 * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
561 * \param[in,out] pme The PME structure.
562 * \param[in] deviceInfo The GPU device information structure.
563 * \param[in] pmeGpuProgram The PME GPU program data
564 * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
566 GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
567 const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo),
568 const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
570 /*! \libinternal \brief
571 * Destroys the PME GPU data at the end of the run.
573 * \param[in] pmeGpu The PME GPU structure.
575 GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM;
577 /*! \libinternal \brief
578 * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
580 * \param[in] pmeGpu The PME GPU structure.
581 * \param[in] nAtoms The number of particles.
582 * \param[in] charges The pointer to the host-side array of particle charges.
584 * This is a function that should only be called in the beginning of the run and on domain
585 * decomposition. Should be called before the pme_gpu_set_io_ranges.
587 GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
588 int GPU_FUNC_ARGUMENT(nAtoms),
589 const real* GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM;
591 /*! \brief \libinternal
592 * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
594 * This clears the device-side working buffers in preparation for new computation.
596 * \param[in] pmeGpu The PME GPU structure.
598 void pme_gpu_reinit_computation(const PmeGpu* pmeGpu);
601 * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
602 * (if they were to be computed).
604 * \param[in] pme The PME data structure.
605 * \param[in] flags The combination of flags to affect this PME computation.
606 * The flags are the GMX_PME_ flags from pme.h.
607 * \param[out] wcycle The wallclock counter.
608 * \return The output forces, energy and virial
610 GPU_FUNC_QUALIFIER PmeOutput pme_gpu_wait_finish_task(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
611 int GPU_FUNC_ARGUMENT(flags),
612 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle))
613 GPU_FUNC_TERM_WITH_RETURN(PmeOutput{});