2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 /*! \libinternal \file
40 * \brief This file contains function declarations necessary for
41 * computing energies and forces for the PME long-ranged part (Coulomb
44 * \author Berk Hess <hess@kth.se>
46 * \ingroup module_ewald
49 #ifndef GMX_EWALD_PME_H
50 #define GMX_EWALD_PME_H
55 #include "gromacs/gpu_utils/devicebuffer_datatype.h"
56 #include "gromacs/gpu_utils/gpu_macros.h"
57 #include "gromacs/math/vectypes.h"
58 #include "gromacs/utility/real.h"
65 struct gmx_wallclock_gpu_pme_t;
66 struct gmx_enerdata_t;
74 enum class GpuTaskCompletion;
76 class GpuEventSynchronizer;
82 class ForceWithVirial;
84 enum class PinningPolicy : int;
87 /*! \libinternal \brief Class for managing usage of separate PME-only ranks
89 * Used for checking if some parts of the code could not use PME-only ranks
92 class SeparatePmeRanksPermitted
95 //! Disables PME ranks permitted flag with a reason
96 void disablePmeRanks(const std::string& reason);
97 //! Return status of PME ranks usage
98 bool permitSeparatePmeRanks() const;
99 //! Returns all reasons, for not using PME ranks
100 std::string reasonsWhyDisabled() const;
103 //! Flag that informs whether simualtion could use dedicated PME ranks
104 bool permitSeparatePmeRanks_ = true;
105 //! Storage for all reasons, why PME ranks could not be used
106 std::vector<std::string> reasons_;
113 GMX_SUM_GRID_FORWARD,
114 GMX_SUM_GRID_BACKWARD
117 /*! \brief Possible PME codepaths on a rank.
118 * \todo: make this enum class with gmx_pme_t C++ refactoring
120 enum class PmeRunMode
122 None, //!< No PME task is done
123 CPU, //!< Whole PME computation is done on CPU
124 GPU, //!< Whole PME computation is done on GPU
125 Mixed, //!< Mixed mode: only spread and gather run on GPU; FFT and solving are done on CPU.
128 /*! \brief Return the smallest allowed PME grid size for \p pmeOrder */
129 int minimalPmeGridSize(int pmeOrder);
131 //! Return whether the grid of \c pme is identical to \c grid_size.
132 bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size);
134 /*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
136 * With errorsAreFatal=true, an exception or fatal error is generated
137 * on violation of restrictions.
138 * With errorsAreFatal=false, false is returned on violation of restrictions.
139 * When all restrictions are obeyed, true is returned.
140 * Argument useThreads tells if any MPI rank doing PME uses more than 1 threads.
141 * If at calling useThreads is unknown, pass true for conservative checking.
143 * The PME GPU restrictions are checked separately during pme_gpu_init().
145 bool gmx_pme_check_restrictions(int pme_order,
149 int numPmeDomainsAlongX,
151 bool errorsAreFatal);
153 /*! \brief Construct PME data
155 * \throws gmx::InconsistentInputError if input grid sizes/PME order are inconsistent.
156 * \returns Pointer to newly allocated and initialized PME data.
158 * \todo We should evolve something like a \c GpuManager that holds \c
159 * DeviceInformation* and \c PmeGpuProgram* and perhaps other
160 * related things whose lifetime can/should exceed that of a task (or
161 * perhaps task manager). See Issue #2522.
163 gmx_pme_t* gmx_pme_init(const t_commrec* cr,
164 const NumPmeDomains& numPmeDomains,
165 const t_inputrec* ir,
166 gmx_bool bFreeEnergy_q,
167 gmx_bool bFreeEnergy_lj,
168 gmx_bool bReproducible,
174 const DeviceContext* deviceContext,
175 const DeviceStream* deviceStream,
176 const PmeGpuProgram* pmeGpuProgram,
177 const gmx::MDLogger& mdlog);
179 /*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from
180 * pme_src. This is only called when the PME cut-off/grid size changes.
182 void gmx_pme_reinit(gmx_pme_t** pmedata,
185 const t_inputrec* ir,
186 const ivec grid_size,
190 /*! \brief Destroys the PME data structure.*/
191 void gmx_pme_destroy(gmx_pme_t* pme);
193 /*! \brief Do a PME calculation on a CPU for the long range electrostatics and/or LJ.
195 * Computes the PME forces and the energy and viral, when requested,
196 * for all atoms in \p coordinates. Forces, when requested, are added
197 * to the buffer \p forces, which is allowed to contain more elements
198 * than the number of elements in \p coordinates.
199 * The meaning of \p flags is defined above, and determines which
200 * parts of the calculation are performed.
202 * \return 0 indicates all well, non zero is an error code.
204 int gmx_pme_do(struct gmx_pme_t* pme,
205 gmx::ArrayRef<const gmx::RVec> coordinates,
206 gmx::ArrayRef<gmx::RVec> forces,
207 gmx::ArrayRef<const real> chargeA,
208 gmx::ArrayRef<const real> chargeB,
209 gmx::ArrayRef<const real> c6A,
210 gmx::ArrayRef<const real> c6B,
211 gmx::ArrayRef<const real> sigmaA,
212 gmx::ArrayRef<const real> sigmaB,
218 gmx_wallcycle* wcycle,
227 const gmx::StepWorkload& stepWork);
229 /*! \brief Calculate the PME grid energy V for n charges.
231 * The potential (found in \p pme) must have been found already with a
232 * call to gmx_pme_do(). Note that the charges are not spread on the grid in the
233 * pme struct. Currently does not work in parallel or with free
236 real gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q);
239 * This function updates the local atom data on GPU after DD (charges, coordinates, etc.).
240 * TODO: it should update the PME CPU atom data as well.
241 * (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation).
243 * \param[in,out] pme The PME structure.
244 * \param[in] numAtoms The number of particles.
245 * \param[in] chargesA The pointer to the array of particle charges in the normal state or FEP
246 * state A. Can be nullptr if PME is not performed on the GPU.
247 * \param[in] chargesB The pointer to the array of particle charges in state B. Only used if
248 * charges are perturbed and can otherwise be nullptr.
250 void gmx_pme_reinit_atoms(gmx_pme_t* pme,
252 gmx::ArrayRef<const real> chargesA,
253 gmx::ArrayRef<const real> chargesB);
255 /* A block of PME GPU functions */
257 /*! \brief Checks whether the GROMACS build allows to run PME on GPU.
258 * TODO: this partly duplicates an internal PME assert function
259 * pme_gpu_check_restrictions(), except that works with a
260 * formed gmx_pme_t structure. Should that one go away/work with inputrec?
262 * \param[out] error If non-null, the error message when PME is not supported on GPU.
264 * \returns true if PME can run on GPU on this build, false otherwise.
266 bool pme_gpu_supports_build(std::string* error);
268 /*! \brief Checks whether the detected (GPU) hardware allows to run PME on GPU.
270 * \param[in] hwinfo Information about the detected hardware
271 * \param[out] error If non-null, the error message when PME is not supported on GPU.
273 * \returns true if PME can run on GPU on this build, false otherwise.
275 bool pme_gpu_supports_hardware(const gmx_hw_info_t& hwinfo, std::string* error);
277 /*! \brief Checks whether the input system allows to run PME on GPU.
278 * TODO: this partly duplicates an internal PME assert function
279 * pme_gpu_check_restrictions(), except that works with a
280 * formed gmx_pme_t structure. Should that one go away/work with inputrec?
282 * \param[in] ir Input system.
283 * \param[out] error If non-null, the error message if the input is not supported on GPU.
285 * \returns true if PME can run on GPU with this input, false otherwise.
287 bool pme_gpu_supports_input(const t_inputrec& ir, std::string* error);
290 * Returns the active PME codepath (CPU, GPU, mixed).
291 * \todo This is a rather static data that should be managed by the higher level task scheduler.
293 * \param[in] pme The PME data structure.
294 * \returns active PME codepath.
296 PmeRunMode pme_run_mode(const gmx_pme_t* pme);
298 /*! \libinternal \brief
299 * Return the pinning policy appropriate for this build configuration
300 * for relevant buffers used for PME task on this rank (e.g. running
302 gmx::PinningPolicy pme_get_pinning_policy();
305 * Tells if PME is enabled to run on GPU (not necessarily active at the moment).
306 * \todo This is a rather static data that should be managed by the hardware assignment manager.
307 * For now, it is synonymous with the active PME codepath (in the absence of dynamic switching).
309 * \param[in] pme The PME data structure.
310 * \returns true if PME can run on GPU, false otherwise.
312 inline bool pme_gpu_task_enabled(const gmx_pme_t* pme)
314 return (pme != nullptr) && (pme_run_mode(pme) != PmeRunMode::CPU);
317 /*! \brief Returns the block size requirement
319 * The GPU version of PME requires that the coordinates array have a
320 * size divisible by the returned number.
322 * \param[in] pme The PME data structure.
324 GPU_FUNC_QUALIFIER int pme_gpu_get_block_size(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
325 GPU_FUNC_TERM_WITH_RETURN(0);
327 // The following functions are all the PME GPU entry points,
328 // currently inlining to nothing on non-CUDA builds.
331 * Resets the PME GPU timings. To be called at the reset step.
333 * \param[in] pme The PME structure.
335 GPU_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM;
338 * Copies the PME GPU timings to the gmx_wallclock_gpu_pme_t structure (for log output). To be called at the run end.
340 * \param[in] pme The PME structure.
341 * \param[in] timings The gmx_wallclock_gpu_pme_t structure.
343 GPU_FUNC_QUALIFIER void pme_gpu_get_timings(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
344 gmx_wallclock_gpu_pme_t* GPU_FUNC_ARGUMENT(timings)) GPU_FUNC_TERM;
346 /* The main PME GPU functions */
349 * Prepares PME on GPU computation (updating the box if needed)
350 * \param[in] pme The PME data structure.
351 * \param[in] box The unit cell box.
352 * \param[in] wcycle The wallclock counter.
353 * \param[in] stepWork The required work for this simulation step
355 GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
356 const matrix GPU_FUNC_ARGUMENT(box),
357 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
358 const gmx::StepWorkload& GPU_FUNC_ARGUMENT(stepWork)) GPU_FUNC_TERM;
361 * Launches first stage of PME on GPU - spreading kernel.
363 * \param[in] pme The PME data structure.
364 * \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates
365 * are ready in the device memory; nullptr allowed only on separate PME ranks.
366 * \param[in] wcycle The wallclock counter.
367 * \param[in] lambdaQ The Coulomb lambda of the current state of the
368 * system. Only used if FEP of Coulomb is active.
370 GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
371 GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
372 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
373 real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
376 * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
378 * \param[in] pme The PME data structure.
379 * \param[in] wcycle The wallclock counter.
380 * \param[in] stepWork The required work for this simulation step
382 GPU_FUNC_QUALIFIER void
383 pme_gpu_launch_complex_transforms(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
384 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
385 const gmx::StepWorkload& GPU_FUNC_ARGUMENT(stepWork)) GPU_FUNC_TERM;
388 * Launches last stage of PME on GPU - force gathering and D2H force transfer.
390 * \param[in] pme The PME data structure.
391 * \param[in] wcycle The wallclock counter.
392 * \param[in] lambdaQ The Coulomb lambda to use when calculating the results.
394 GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
395 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
396 real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
399 * Attempts to complete PME GPU tasks.
401 * The \p completionKind argument controls whether the function blocks until all
402 * PME GPU tasks enqueued completed (as pme_gpu_wait_finish_task() does) or only
403 * checks and returns immediately if they did not.
404 * When blocking or the tasks have completed it also gets the output forces
405 * by assigning the ArrayRef to the \p forces pointer passed in.
406 * Virial/energy are also outputs if they were to be computed.
408 * \param[in] pme The PME data structure.
409 * \param[in] stepWork The required work for this simulation step
410 * \param[in] wcycle The wallclock counter.
411 * \param[out] forceWithVirial The output force and virial
412 * \param[out] enerd The output energies
413 * \param[in] lambdaQ The Coulomb lambda to use when calculating the results.
414 * \param[in] completionKind Indicates whether PME task completion should only be checked rather
416 * \returns True if the PME GPU tasks have completed
418 GPU_FUNC_QUALIFIER bool pme_gpu_try_finish_task(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
419 const gmx::StepWorkload& GPU_FUNC_ARGUMENT(stepWork),
420 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
421 gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
422 gmx_enerdata_t* GPU_FUNC_ARGUMENT(enerd),
423 real GPU_FUNC_ARGUMENT(lambdaQ),
424 GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind))
425 GPU_FUNC_TERM_WITH_RETURN(false);
428 * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
429 * (if they were to be computed).
431 * \param[in] pme The PME data structure.
432 * \param[in] stepWork The required work for this simulation step
433 * \param[in] wcycle The wallclock counter.
434 * \param[out] forceWithVirial The output force and virial
435 * \param[out] enerd The output energies
436 * \param[in] lambdaQ The Coulomb lambda to use when calculating the results.
438 GPU_FUNC_QUALIFIER void pme_gpu_wait_and_reduce(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
439 const gmx::StepWorkload& GPU_FUNC_ARGUMENT(stepWork),
440 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
441 gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
442 gmx_enerdata_t* GPU_FUNC_ARGUMENT(enerd),
443 real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
446 * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
448 * Clears the internal grid and energy/virial buffers; it is not safe to start
449 * the PME computation without calling this.
450 * Note that unlike in the nbnxn module, the force buffer does not need clearing.
452 * \todo Rename this function to *clear* -- it clearly only does output resetting
453 * and we should be clear about what the function does..
455 * \param[in] pme The PME data structure.
456 * \param[in] wcycle The wallclock counter.
458 GPU_FUNC_QUALIFIER void pme_gpu_reinit_computation(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
459 gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle)) GPU_FUNC_TERM;
461 /*! \brief Set pointer to device copy of coordinate data.
462 * \param[in] pme The PME data structure.
463 * \param[in] d_x The pointer to the positions buffer to be set
465 GPU_FUNC_QUALIFIER void pme_gpu_set_device_x(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
466 DeviceBuffer<gmx::RVec> GPU_FUNC_ARGUMENT(d_x)) GPU_FUNC_TERM;
468 /*! \brief Get pointer to device copy of force data.
469 * \param[in] pme The PME data structure.
470 * \returns Pointer to force data
472 GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
473 GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
475 /*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
476 * \param[in] pme The PME data structure.
477 * \returns Pointer to synchronizer
479 GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
480 GPU_FUNC_TERM_WITH_RETURN(nullptr);