2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file contains internal function definitions for performing the PME calculations on GPU.
39 * These are not meant to be exposed outside of the PME GPU code.
40 * As of now, their bodies are still in the common pme-gpu.cpp files.
42 * \author Aleksei Iupinov <a.yupinov@gmail.com>
43 * \ingroup module_ewald
46 #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
47 #define GMX_EWALD_PME_GPU_INTERNAL_H
49 #include "gromacs/gpu_utils/gpu_macros.h" // for the CUDA_FUNC_ macros
51 #include "pme-gpu-types.h" // for the inline functions accessing pme_gpu_t members
55 struct gmx_pme_t; // only used in pme_gpu_reinit
57 struct gmx_wallclock_gpu_pme_t;
58 struct pme_atomcomm_t;
65 /* Some general constants for PME GPU behaviour follow. */
67 /*! \brief \libinternal
68 * false: The atom data GPU buffers are sized precisely according to the number of atoms.
69 * (Except GPU spline data layout which is regardless intertwined for 2 atoms per warp).
70 * The atom index checks in the spread/gather code potentially hinder the performance.
71 * true: The atom data GPU buffers are padded with zeroes so that the possible number of atoms
72 * fitting in is divisible by PME_ATOM_DATA_ALIGNMENT.
73 * The atom index checks are not performed. There should be a performance win, but how big is it, remains to be seen.
74 * Additional cudaMemsetAsync calls are done occasionally (only charges/coordinates; spline data is always recalculated now).
75 * \todo Estimate performance differences
77 const bool c_usePadding = true;
79 /*! \brief \libinternal
80 * false: Atoms with zero charges are processed by PME. Could introduce some overhead.
81 * true: Atoms with zero charges are not processed by PME. Adds branching to the spread/gather.
82 * Could be good for performance in specific systems with lots of neutral atoms.
83 * \todo Estimate performance differences.
85 const bool c_skipNeutralAtoms = false;
87 /*! \brief \libinternal
88 * Number of PME solve output floating point numbers.
89 * 6 for symmetric virial matrix + 1 for reciprocal energy.
91 const int c_virialAndEnergyCount = 7;
93 /* A block of CUDA-only functions that live in pme.cu */
95 /*! \libinternal \brief
96 * Returns the number of atoms per chunk in the atom charges/coordinates data layout.
97 * Depends on CUDA-specific block sizes, needed for the atom data padding.
99 * \param[in] pmeGPU The PME GPU structure.
100 * \returns Number of atoms in a single GPU atom data chunk.
102 CUDA_FUNC_QUALIFIER int pme_gpu_get_atom_data_alignment(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM_WITH_RETURN(1)
104 /*! \libinternal \brief
105 * Returns the number of atoms per chunk in the atom spline theta/dtheta data layout.
107 * \param[in] pmeGPU The PME GPU structure.
108 * \returns Number of atoms in a single GPU atom spline data chunk.
110 CUDA_FUNC_QUALIFIER int pme_gpu_get_atom_spline_data_alignment(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM_WITH_RETURN(1)
112 /*! \libinternal \brief
113 * Synchronizes the current step, waiting for the GPU kernels/transfers to finish.
115 * \param[in] pmeGPU The PME GPU structure.
117 CUDA_FUNC_QUALIFIER void pme_gpu_synchronize(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
119 /*! \libinternal \brief
120 * Allocates the fixed size energy and virial buffer both on GPU and CPU.
122 * \param[in] pmeGPU The PME GPU structure.
124 CUDA_FUNC_QUALIFIER void pme_gpu_alloc_energy_virial(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
126 /*! \libinternal \brief
127 * Frees the energy and virial memory both on GPU and CPU.
129 * \param[in] pmeGPU The PME GPU structure.
131 CUDA_FUNC_QUALIFIER void pme_gpu_free_energy_virial(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
133 /*! \libinternal \brief
134 * Clears the energy and virial memory on GPU with 0.
135 * Should be called at the end of the energy/virial calculation step.
137 * \param[in] pmeGPU The PME GPU structure.
139 CUDA_FUNC_QUALIFIER void pme_gpu_clear_energy_virial(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
141 /*! \libinternal \brief
142 * Reallocates and copies the pre-computed B-spline values to the GPU.
144 * \param[in] pmeGPU The PME GPU structure.
146 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_bspline_values(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
148 /*! \libinternal \brief
149 * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
151 * \param[in] pmeGPU The PME GPU structure.
153 CUDA_FUNC_QUALIFIER void pme_gpu_free_bspline_values(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
155 /*! \libinternal \brief
156 * Reallocates the GPU buffer for the PME forces.
158 * \param[in] pmeGPU The PME GPU structure.
160 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
162 /*! \libinternal \brief
163 * Frees the GPU buffer for the PME forces.
165 * \param[in] pmeGPU The PME GPU structure.
167 CUDA_FUNC_QUALIFIER void pme_gpu_free_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
169 /*! \libinternal \brief
170 * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces).
171 * To be called e.g. after the bonded calculations.
173 * \param[in] pmeGPU The PME GPU structure.
174 * \param[in] h_forces The input forces rvec buffer.
176 CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
177 const float *CUDA_FUNC_ARGUMENT(h_forces)) CUDA_FUNC_TERM
179 /*! \libinternal \brief
180 * Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
182 * \param[in] pmeGPU The PME GPU structure.
183 * \param[out] h_forces The output forces rvec buffer.
185 CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
186 float *CUDA_FUNC_ARGUMENT(h_forces)) CUDA_FUNC_TERM
188 /*! \libinternal \brief
189 * Waits for the PME GPU output forces copying to the CPU buffer to finish.
191 * \param[in] pmeGPU The PME GPU structure.
193 CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
195 /*! \libinternal \brief
196 * Reallocates the input coordinates buffer on the GPU (and clears the padded part if needed).
198 * \param[in] pmeGPU The PME GPU structure.
200 * Needs to be called on every DD step/in the beginning.
202 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_coordinates(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
204 /*! \libinternal \brief
205 * Copies the input coordinates from the CPU buffer onto the GPU.
207 * \param[in] pmeGPU The PME GPU structure.
208 * \param[in] h_coordinates Input coordinates (XYZ rvec array).
210 * Needs to be called every MD step. The coordinates are then used in the spline calculation.
212 CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_coordinates(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
213 const rvec *CUDA_FUNC_ARGUMENT(h_coordinates)) CUDA_FUNC_TERM
215 /*! \libinternal \brief
216 * Frees the coordinates on the GPU.
218 * \param[in] pmeGPU The PME GPU structure.
220 CUDA_FUNC_QUALIFIER void pme_gpu_free_coordinates(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
222 /*! \libinternal \brief
223 * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
224 * Clears the padded part if needed.
226 * \param[in] pmeGPU The PME GPU structure.
227 * \param[in] h_coefficients The input atom charges/coefficients.
229 * Does not need to be done every MD step, only whenever the local charges change.
230 * (So, in the beginning of the run, or on DD step).
232 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_input_coefficients(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
233 const float *CUDA_FUNC_ARGUMENT(h_coefficients)) CUDA_FUNC_TERM
235 /*! \libinternal \brief
236 * Frees the charges/coefficients on the GPU.
238 * \param[in] pmeGPU The PME GPU structure.
240 CUDA_FUNC_QUALIFIER void pme_gpu_free_coefficients(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
242 /*! \libinternal \brief
243 * Reallocates the buffers on the GPU and the host for the atoms spline data.
245 * \param[in] pmeGPU The PME GPU structure.
247 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_spline_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
249 /*! \libinternal \brief
250 * Frees the buffers on the GPU for the atoms spline data.
252 * \param[in] pmeGPU The PME GPU structure.
254 CUDA_FUNC_QUALIFIER void pme_gpu_free_spline_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
256 /*! \libinternal \brief
257 * Reallocates the buffers on the GPU and the host for the particle gridline indices.
259 * \param[in] pmeGPU The PME GPU structure.
261 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_grid_indices(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
263 /*! \libinternal \brief
264 * Frees the buffer on the GPU for the particle gridline indices.
266 * \param[in] pmeGPU The PME GPU structure.
268 CUDA_FUNC_QUALIFIER void pme_gpu_free_grid_indices(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
270 /*! \libinternal \brief
271 * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
273 * \param[in] pmeGPU The PME GPU structure.
275 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_grids(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
277 /*! \libinternal \brief
278 * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
280 * \param[in] pmeGPU The PME GPU structure.
282 CUDA_FUNC_QUALIFIER void pme_gpu_free_grids(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
284 /*! \libinternal \brief
285 * Clears the real space grid on the GPU.
286 * Should be called at the end of each MD step.
288 * \param[in] pmeGPU The PME GPU structure.
290 CUDA_FUNC_QUALIFIER void pme_gpu_clear_grids(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
292 /*! \libinternal \brief
293 * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
295 * \param[in] pmeGPU The PME GPU structure.
297 CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_fract_shifts(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
299 /*! \libinternal \brief
300 * Frees the pre-computed fractional coordinates' shifts on the GPU.
302 * \param[in] pmeGPU The PME GPU structure.
304 CUDA_FUNC_QUALIFIER void pme_gpu_free_fract_shifts(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
306 /*! \libinternal \brief
307 * Waits for the output virial/energy copying to the intermediate CPU buffer to finish.
309 * \param[in] pmeGPU The PME GPU structure.
311 CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_energy_virial(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
313 /*! \libinternal \brief
314 * Copies the input real-space grid from the host to the GPU.
316 * \param[in] pmeGPU The PME GPU structure.
317 * \param[in] h_grid The host-side grid buffer.
319 CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_gather_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
320 float *CUDA_FUNC_ARGUMENT(h_grid)) CUDA_FUNC_TERM
322 /*! \libinternal \brief
323 * Copies the output real-space grid from the GPU to the host.
325 * \param[in] pmeGPU The PME GPU structure.
326 * \param[out] h_grid The host-side grid buffer.
328 CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_spread_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
329 float *CUDA_FUNC_ARGUMENT(h_grid)) CUDA_FUNC_TERM
331 /*! \libinternal \brief
332 * Waits for the grid copying to the host-side buffer after spreading to finish.
334 * \param[in] pmeGPU The PME GPU structure.
336 CUDA_FUNC_QUALIFIER void pme_gpu_sync_spread_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
338 /*! \libinternal \brief
339 * Waits for the atom data copying to the intermediate host-side buffer after spline computation to finish.
341 * \param[in] pmeGPU The PME GPU structure.
343 CUDA_FUNC_QUALIFIER void pme_gpu_sync_spline_atom_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
345 /*! \libinternal \brief
346 * Waits for the grid copying to the host-side buffer after solving to finish.
348 * \param[in] pmeGPU The PME GPU structure.
350 CUDA_FUNC_QUALIFIER void pme_gpu_sync_solve_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
352 /*! \libinternal \brief
353 * Does the one-time GPU-framework specific PME initialization.
354 * For CUDA, the PME stream is created with the highest priority.
356 * \param[in] pmeGPU The PME GPU structure.
358 CUDA_FUNC_QUALIFIER void pme_gpu_init_internal(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
360 /*! \libinternal \brief
361 * Destroys the PME GPU-framework specific data.
362 * Should be called last in the PME GPU destructor.
364 * \param[in] pmeGPU The PME GPU structure.
366 CUDA_FUNC_QUALIFIER void pme_gpu_destroy_specific(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
368 /*! \libinternal \brief
369 * Initializes the PME GPU synchronization events.
371 * \param[in] pmeGPU The PME GPU structure.
373 CUDA_FUNC_QUALIFIER void pme_gpu_init_sync_events(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
375 /*! \libinternal \brief
376 * Destroys the PME GPU synchronization events.
378 * \param[in] pmeGPU The PME GPU structure.
380 CUDA_FUNC_QUALIFIER void pme_gpu_destroy_sync_events(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
382 /*! \libinternal \brief
383 * Initializes the CUDA FFT structures.
385 * \param[in] pmeGPU The PME GPU structure.
387 CUDA_FUNC_QUALIFIER void pme_gpu_reinit_3dfft(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
389 /*! \libinternal \brief
390 * Destroys the CUDA FFT structures.
392 * \param[in] pmeGPU The PME GPU structure.
394 CUDA_FUNC_QUALIFIER void pme_gpu_destroy_3dfft(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
396 /* Several CUDA event-based timing functions that live in pme-timings.cu */
398 /*! \libinternal \brief
399 * Finalizes all the PME GPU stage timings for the current step. Should be called at the end of every step.
401 * \param[in] pmeGPU The PME GPU structure.
403 CUDA_FUNC_QUALIFIER void pme_gpu_update_timings(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
406 * Resets the PME GPU timings. To be called at the reset step.
408 * \param[in] pmeGPU The PME GPU structure.
410 CUDA_FUNC_QUALIFIER void pme_gpu_reset_timings(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
412 /*! \libinternal \brief
413 * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
415 * \param[in] pmeGPU The PME GPU structure.
416 * \param[in] timings The gmx_wallclock_gpu_pme_t structure.
418 CUDA_FUNC_QUALIFIER void pme_gpu_get_timings(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
419 gmx_wallclock_gpu_pme_t *CUDA_FUNC_ARGUMENT(timings)) CUDA_FUNC_TERM
421 /* The inlined convenience PME GPU status getters */
423 /*! \libinternal \brief
424 * Tells if PME runs on multiple GPUs with the decomposition.
426 * \param[in] pmeGPU The PME GPU structure.
427 * \returns True if PME runs on multiple GPUs, false otherwise.
429 gmx_inline bool pme_gpu_uses_dd(const pme_gpu_t *pmeGPU)
431 return !pmeGPU->settings.useDecomposition;
434 /*! \libinternal \brief
435 * Tells if PME performs the gathering stage on GPU.
437 * \param[in] pmeGPU The PME GPU structure.
438 * \returns True if the gathering is performed on GPU, false otherwise.
440 gmx_inline bool pme_gpu_performs_gather(const pme_gpu_t *pmeGPU)
442 return pmeGPU->settings.performGPUGather;
445 /*! \libinternal \brief
446 * Tells if PME performs the FFT stages on GPU.
448 * \param[in] pmeGPU The PME GPU structure.
449 * \returns True if FFT is performed on GPU, false otherwise.
451 gmx_inline bool pme_gpu_performs_FFT(const pme_gpu_t *pmeGPU)
453 return pmeGPU->settings.performGPUFFT;
456 /*! \libinternal \brief
457 * Tells if PME performs the grid (un-)wrapping on GPU.
459 * \param[in] pmeGPU The PME GPU structure.
460 * \returns True if (un-)wrapping is performed on GPU, false otherwise.
462 gmx_inline bool pme_gpu_performs_wrapping(const pme_gpu_t *pmeGPU)
464 return pmeGPU->settings.useDecomposition;
467 /*! \libinternal \brief
468 * Tells if PME performs the grid solving on GPU.
470 * \param[in] pmeGPU The PME GPU structure.
471 * \returns True if solving is performed on GPU, false otherwise.
473 gmx_inline bool pme_gpu_performs_solve(const pme_gpu_t *pmeGPU)
475 return pmeGPU->settings.performGPUSolve;
478 /* A block of C++ functions that live in pme-gpu-internal.cpp */
480 /*! \libinternal \brief
481 * Returns the output virial and energy of the PME solving.
482 * Should be called after pme_gpu_finish_step.
484 * \param[in] pmeGPU The PME GPU structure.
485 * \param[out] energy The output energy.
486 * \param[out] virial The output virial matrix.
488 void pme_gpu_get_energy_virial(const pme_gpu_t *pmeGPU, real *energy, matrix virial);
490 /*! \libinternal \brief
491 * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_start_step().
493 * \param[in] pmeGPU The PME GPU structure.
494 * \param[in] box The unit cell box.
496 void pme_gpu_update_input_box(pme_gpu_t *pmeGPU, const matrix box);
498 /*! \libinternal \brief
499 * Starts the PME GPU step (copies coordinates onto GPU, possibly sets the unit cell parameters).
501 * \param[in] pmeGPU The PME GPU structure.
502 * \param[in] needToUpdateBox Tells if the stored unit cell parameters should be updated from \p box.
503 * \param[in] box The unit cell box which does not necessarily change every step (only with pressure coupling enabled).
504 * Would only be used if \p needToUpdateBox is true.
505 * \param[in] h_coordinates Input coordinates (XYZ rvec array).
507 void pme_gpu_start_step(pme_gpu_t *pmeGPU, bool needToUpdateBox, const matrix box, const rvec *h_coordinates);
509 /*! \libinternal \brief
510 * Finishes the PME GPU step, waiting for the output forces and/or energy/virial to be copied to the host.
512 * \param[in] pmeGPU The PME GPU structure.
513 * \param[in] bCalcForces The left-over flag from the CPU code which tells the function to copy the forces to the CPU side. Should be passed to the launch call instead. FIXME
514 * \param[in] bCalcEnerVir The left-over flag from the CPU code which tells the function to copy the energy/virial to the CPU side. Should be passed to the launch call instead.
516 void pme_gpu_finish_step(const pme_gpu_t *pmeGPU, const bool bCalcForces,
517 const bool bCalcEnerVir);
519 /*! \libinternal \brief
520 * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
522 * \param[in,out] pme The PME structure.
523 * \param[in,out] gpuInfo The GPU information structure.
524 * \param[in] mdlog The logger.
525 * \param[in] cr The communication structure.
526 * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
528 void pme_gpu_reinit(gmx_pme_t *pme, gmx_device_info_t *gpuInfo, const gmx::MDLogger &mdlog, const t_commrec *cr);
530 /*! \libinternal \brief
531 * Destroys the PME GPU data at the end of the run.
533 * \param[in] pmeGPU The PME GPU structure.
535 void pme_gpu_destroy(pme_gpu_t *pmeGPU);
537 /*! \libinternal \brief
538 * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
540 * \param[in] pmeGPU The PME GPU structure.
541 * \param[in] nAtoms The number of particles.
542 * \param[in] charges The pointer to the host-side array of particle charges.
544 * This is a function that should only be called in the beginning of the run and on domain decomposition.
545 * Should be called before the pme_gpu_set_io_ranges.
547 void pme_gpu_reinit_atoms(pme_gpu_t *pmeGPU,
549 const real *charges);