2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdtypes/commrec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/md_enums.h"
63 #include "gromacs/taskassignment/taskassignment.h"
64 #include "gromacs/topology/topology.h"
65 #include "gromacs/utility/baseversion.h"
66 #include "gromacs/utility/exceptions.h"
67 #include "gromacs/utility/fatalerror.h"
68 #include "gromacs/utility/gmxassert.h"
69 #include "gromacs/utility/logger.h"
70 #include "gromacs/utility/stringutil.h"
79 //! Helper variable to localise the text of an often repeated message.
80 const char * g_specifyEverythingFormatString =
81 "When you use mdrun -gputasks, %s must be set to non-default "
82 "values, so that the device IDs can be interpreted correctly."
83 #if GMX_GPU != GMX_GPU_NONE
84 " If you simply want to restrict which GPUs are used, then it is "
85 "better to use mdrun -gpu_id. Otherwise, setting the "
86 # if GMX_GPU == GMX_GPU_CUDA
87 "CUDA_VISIBLE_DEVICES"
88 # elif GMX_GPU == GMX_GPU_OPENCL
89 // Technically there is no portable way to do this offered by the
90 // OpenCL standard, but the only current relevant case for GROMACS
91 // is AMD OpenCL, which offers this variable.
94 # error "Unreachable branch"
96 " environment variable in your bash profile or job "
97 "script may be more convenient."
104 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
105 const std::vector<int> &gpuIdsToUse,
106 const std::vector<int> &userGpuTaskAssignment,
107 const EmulateGpuNonbonded emulateGpuNonbonded,
108 const bool buildSupportsNonbondedOnGpu,
109 const bool usingVerletScheme,
110 const bool nonbondedOnGpuIsUseful,
111 const int numRanksPerSimulation)
113 // First, exclude all cases where we can't run NB on GPUs.
114 if (nonbondedTarget == TaskTarget::Cpu ||
115 emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
116 !usingVerletScheme ||
117 !nonbondedOnGpuIsUseful ||
118 !buildSupportsNonbondedOnGpu)
120 // If the user required NB on GPUs, we issue an error later.
124 // We now know that NB on GPUs makes sense, if we have any.
126 if (!userGpuTaskAssignment.empty())
128 // Specifying -gputasks requires specifying everything.
129 if (nonbondedTarget == TaskTarget::Auto ||
130 numRanksPerSimulation < 1)
132 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
137 if (nonbondedTarget == TaskTarget::Gpu)
142 // Because this is thread-MPI, we already know about the GPUs that
143 // all potential ranks can use, and can use that in a global
144 // decision that will later be consistent.
145 auto haveGpus = !gpuIdsToUse.empty();
147 // If we get here, then the user permitted or required GPUs.
152 decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
153 const TaskTarget pmeTarget,
154 const std::vector<int> &gpuIdsToUse,
155 const std::vector<int> &userGpuTaskAssignment,
156 const gmx_hw_info_t &hardwareInfo,
157 const t_inputrec &inputrec,
158 const gmx_mtop_t &mtop,
159 const int numRanksPerSimulation,
160 const int numPmeRanksPerSimulation)
162 // First, exclude all cases where we can't run PME on GPUs.
163 if ((pmeTarget == TaskTarget::Cpu) ||
164 !useGpuForNonbonded ||
165 !pme_gpu_supports_build(nullptr) ||
166 !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
167 !pme_gpu_supports_input(inputrec, mtop, nullptr))
169 // PME can't run on a GPU. If the user required that, we issue
174 // We now know that PME on GPUs might make sense, if we have any.
176 if (!userGpuTaskAssignment.empty())
178 // Follow the user's choice of GPU task assignment, if we
179 // can. Checking that their IDs are for compatible GPUs comes
182 // Specifying -gputasks requires specifying everything.
183 if (pmeTarget == TaskTarget::Auto ||
184 numRanksPerSimulation < 1)
186 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
189 // PME on GPUs is only supported in a single case
190 if (pmeTarget == TaskTarget::Gpu)
192 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
193 (numPmeRanksPerSimulation > 1))
195 GMX_THROW(InconsistentInputError
196 ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
201 // pmeTarget == TaskTarget::Auto
202 return numRanksPerSimulation == 1;
205 // Because this is thread-MPI, we already know about the GPUs that
206 // all potential ranks can use, and can use that in a global
207 // decision that will later be consistent.
209 if (pmeTarget == TaskTarget::Gpu)
211 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
212 (numPmeRanksPerSimulation > 1))
214 GMX_THROW(NotImplementedError
215 ("PME tasks were required to run on GPUs, but that is not implemented with "
216 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
217 "or permit PME tasks to be assigned to the CPU."));
222 if (numRanksPerSimulation == 1)
224 // PME can run well on a GPU shared with NB, and we permit
225 // mdrun to default to try that.
226 return !gpuIdsToUse.empty();
229 if (numRanksPerSimulation < 1)
231 // Full automated mode for thread-MPI (the default). PME can
232 // run well on a GPU shared with NB, and we permit mdrun to
233 // default to it if there is only one GPU available.
234 return (gpuIdsToUse.size() == 1);
237 // Not enough support for PME on GPUs for anything else
241 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
242 const std::vector<int> &userGpuTaskAssignment,
243 const EmulateGpuNonbonded emulateGpuNonbonded,
244 const bool buildSupportsNonbondedOnGpu,
245 const bool usingVerletScheme,
246 const bool nonbondedOnGpuIsUseful,
247 const bool gpusWereDetected)
249 if (nonbondedTarget == TaskTarget::Cpu)
251 if (!userGpuTaskAssignment.empty())
253 GMX_THROW(InconsistentInputError
254 ("A GPU task assignment was specified, but nonbonded interactions were "
255 "assigned to the CPU. Make no more than one of these choices."));
261 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
263 GMX_THROW(InconsistentInputError
264 ("Nonbonded interactions on the GPU were requested with -nb gpu, "
265 "but the GROMACS binary has been built without GPU support. "
266 "Either run without selecting GPU options, or recompile GROMACS "
267 "with GPU support enabled"));
270 // TODO refactor all these TaskTarget::Gpu checks into one place?
271 // e.g. use a subfunction that handles only the cases where
272 // TaskTargets are not Cpu?
273 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
275 if (nonbondedTarget == TaskTarget::Gpu)
277 GMX_THROW(InconsistentInputError
278 ("Nonbonded interactions on the GPU were required, which is inconsistent "
279 "with choosing emulation. Make no more than one of these choices."));
281 if (!userGpuTaskAssignment.empty())
283 GMX_THROW(InconsistentInputError
284 ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
290 if (!usingVerletScheme)
292 if (nonbondedTarget == TaskTarget::Gpu)
294 GMX_THROW(InconsistentInputError
295 ("Nonbonded interactions on the GPU were required, which requires using "
296 "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
302 if (!nonbondedOnGpuIsUseful)
304 if (nonbondedTarget == TaskTarget::Gpu)
306 GMX_THROW(InconsistentInputError
307 ("Nonbonded interactions on the GPU were required, but not supported for these "
308 "simulation settings. Change your settings, or do not require using GPUs."));
314 if (!userGpuTaskAssignment.empty())
316 // Specifying -gputasks requires specifying everything.
317 if (nonbondedTarget == TaskTarget::Auto)
319 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
325 if (nonbondedTarget == TaskTarget::Gpu)
327 // We still don't know whether it is an error if no GPUs are found
328 // because we don't know the duty of this rank, yet. For example,
329 // a node with only PME ranks and -pme cpu is OK if there are not
334 // If we get here, then the user permitted GPUs, which we should
335 // use for nonbonded interactions.
336 return gpusWereDetected;
339 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
340 const TaskTarget pmeTarget,
341 const std::vector<int> &userGpuTaskAssignment,
342 const gmx_hw_info_t &hardwareInfo,
343 const t_inputrec &inputrec,
344 const gmx_mtop_t &mtop,
345 const int numRanksPerSimulation,
346 const int numPmeRanksPerSimulation,
347 const bool gpusWereDetected)
349 if (pmeTarget == TaskTarget::Cpu)
354 if (!useGpuForNonbonded)
356 if (pmeTarget == TaskTarget::Gpu)
358 GMX_THROW(NotImplementedError
359 ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
365 if (!pme_gpu_supports_build(&message))
367 if (pmeTarget == TaskTarget::Gpu)
369 GMX_THROW(NotImplementedError
370 ("Cannot compute PME interactions on a GPU, because " + message));
374 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
376 if (pmeTarget == TaskTarget::Gpu)
378 GMX_THROW(NotImplementedError
379 ("Cannot compute PME interactions on a GPU, because " + message));
383 if (!pme_gpu_supports_input(inputrec, mtop, &message))
385 if (pmeTarget == TaskTarget::Gpu)
387 GMX_THROW(NotImplementedError
388 ("Cannot compute PME interactions on a GPU, because " + message));
393 if (pmeTarget == TaskTarget::Cpu)
395 if (!userGpuTaskAssignment.empty())
397 GMX_THROW(InconsistentInputError
398 ("A GPU task assignment was specified, but PME interactions were "
399 "assigned to the CPU. Make no more than one of these choices."));
405 if (!userGpuTaskAssignment.empty())
407 // Specifying -gputasks requires specifying everything.
408 if (pmeTarget == TaskTarget::Auto)
410 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
416 // We still don't know whether it is an error if no GPUs are found
417 // because we don't know the duty of this rank, yet. For example,
418 // a node with only PME ranks and -pme cpu is OK if there are not
421 if (pmeTarget == TaskTarget::Gpu)
423 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
424 (numPmeRanksPerSimulation > 1))
426 GMX_THROW(NotImplementedError
427 ("PME tasks were required to run on GPUs, but that is not implemented with "
428 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
429 "or permit PME tasks to be assigned to the CPU."));
434 // If we get here, then the user permitted GPUs.
435 if (numRanksPerSimulation == 1)
437 // PME can run well on a single GPU shared with NB when there
438 // is one rank, so we permit mdrun to try that if we have
440 return gpusWereDetected;
443 // Not enough support for PME on GPUs for anything else
447 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
448 const bool useGpuForPme,
449 const bool usingVerletScheme,
450 const TaskTarget bondedTarget,
451 const bool canUseGpuForBonded,
452 const bool usingLJPme,
453 const bool usingElecPmeOrEwald,
454 const int numPmeRanksPerSimulation,
455 const bool gpusWereDetected)
457 if (bondedTarget == TaskTarget::Cpu)
462 if (!usingVerletScheme)
464 if (bondedTarget == TaskTarget::Gpu)
466 GMX_THROW(InconsistentInputError
467 ("Bonded interactions on the GPU were required, which requires using "
468 "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
474 if (!canUseGpuForBonded)
476 if (bondedTarget == TaskTarget::Gpu)
478 GMX_THROW(InconsistentInputError
479 ("Bonded interactions on the GPU were required, but not supported for these "
480 "simulation settings. Change your settings, or do not require using GPUs."));
486 if (!useGpuForNonbonded)
488 if (bondedTarget == TaskTarget::Gpu)
490 GMX_THROW(InconsistentInputError
491 ("Bonded interactions on the GPU were required, but this requires that "
492 "short-ranged non-bonded interactions are also run on the GPU. Change "
493 "your settings, or do not require using GPUs."));
499 // TODO If the bonded kernels do not get fused, then performance
500 // overheads might suggest alternative choices here.
502 if (bondedTarget == TaskTarget::Gpu)
504 // We still don't know whether it is an error if no GPUs are
509 // If we get here, then the user permitted GPUs, which we should
510 // use for bonded interactions if any were detected and the CPU
511 // is busy, for which we currently only check PME or Ewald.
512 // (It would be better to dynamically assign bondeds based on timings)
513 // Note that here we assume that the auto setting of PME ranks will not
514 // choose seperate PME ranks when nonBonded are assigned to the GPU.
515 bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
517 return gpusWereDetected && usingOurCpuForPmeOrEwald;