2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdtypes/commrec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/md_enums.h"
63 #include "gromacs/nbnxm/nbnxm.h"
64 #include "gromacs/taskassignment/taskassignment.h"
65 #include "gromacs/topology/topology.h"
66 #include "gromacs/utility/baseversion.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/gmxassert.h"
70 #include "gromacs/utility/logger.h"
71 #include "gromacs/utility/stringutil.h"
80 //! Helper variable to localise the text of an often repeated message.
81 const char * g_specifyEverythingFormatString =
82 "When you use mdrun -gputasks, %s must be set to non-default "
83 "values, so that the device IDs can be interpreted correctly."
84 #if GMX_GPU != GMX_GPU_NONE
85 " If you simply want to restrict which GPUs are used, then it is "
86 "better to use mdrun -gpu_id. Otherwise, setting the "
87 # if GMX_GPU == GMX_GPU_CUDA
88 "CUDA_VISIBLE_DEVICES"
89 # elif GMX_GPU == GMX_GPU_OPENCL
90 // Technically there is no portable way to do this offered by the
91 // OpenCL standard, but the only current relevant case for GROMACS
92 // is AMD OpenCL, which offers this variable.
95 # error "Unreachable branch"
97 " environment variable in your bash profile or job "
98 "script may be more convenient."
105 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
106 const std::vector<int> &gpuIdsToUse,
107 const std::vector<int> &userGpuTaskAssignment,
108 const EmulateGpuNonbonded emulateGpuNonbonded,
109 const bool buildSupportsNonbondedOnGpu,
110 const bool usingVerletScheme,
111 const bool nonbondedOnGpuIsUseful,
112 const int numRanksPerSimulation)
114 // First, exclude all cases where we can't run NB on GPUs.
115 if (nonbondedTarget == TaskTarget::Cpu ||
116 emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
117 !usingVerletScheme ||
118 !nonbondedOnGpuIsUseful ||
119 !buildSupportsNonbondedOnGpu)
121 // If the user required NB on GPUs, we issue an error later.
125 // We now know that NB on GPUs makes sense, if we have any.
127 if (!userGpuTaskAssignment.empty())
129 // Specifying -gputasks requires specifying everything.
130 if (nonbondedTarget == TaskTarget::Auto ||
131 numRanksPerSimulation < 1)
133 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
138 if (nonbondedTarget == TaskTarget::Gpu)
143 // Because this is thread-MPI, we already know about the GPUs that
144 // all potential ranks can use, and can use that in a global
145 // decision that will later be consistent.
146 auto haveGpus = !gpuIdsToUse.empty();
148 // If we get here, then the user permitted or required GPUs.
153 decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
154 const TaskTarget pmeTarget,
155 const std::vector<int> &gpuIdsToUse,
156 const std::vector<int> &userGpuTaskAssignment,
157 const gmx_hw_info_t &hardwareInfo,
158 const t_inputrec &inputrec,
159 const gmx_mtop_t &mtop,
160 const int numRanksPerSimulation,
161 const int numPmeRanksPerSimulation)
163 // First, exclude all cases where we can't run PME on GPUs.
164 if ((pmeTarget == TaskTarget::Cpu) ||
165 !useGpuForNonbonded ||
166 !pme_gpu_supports_build(nullptr) ||
167 !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
168 !pme_gpu_supports_input(inputrec, mtop, nullptr))
170 // PME can't run on a GPU. If the user required that, we issue
175 // We now know that PME on GPUs might make sense, if we have any.
177 if (!userGpuTaskAssignment.empty())
179 // Follow the user's choice of GPU task assignment, if we
180 // can. Checking that their IDs are for compatible GPUs comes
183 // Specifying -gputasks requires specifying everything.
184 if (pmeTarget == TaskTarget::Auto ||
185 numRanksPerSimulation < 1)
187 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
190 // PME on GPUs is only supported in a single case
191 if (pmeTarget == TaskTarget::Gpu)
193 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
194 (numPmeRanksPerSimulation > 1))
196 GMX_THROW(InconsistentInputError
197 ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
202 // pmeTarget == TaskTarget::Auto
203 return numRanksPerSimulation == 1;
206 // Because this is thread-MPI, we already know about the GPUs that
207 // all potential ranks can use, and can use that in a global
208 // decision that will later be consistent.
210 if (pmeTarget == TaskTarget::Gpu)
212 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
213 (numPmeRanksPerSimulation > 1))
215 GMX_THROW(NotImplementedError
216 ("PME tasks were required to run on GPUs, but that is not implemented with "
217 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
218 "or permit PME tasks to be assigned to the CPU."));
223 if (numRanksPerSimulation == 1)
225 // PME can run well on a GPU shared with NB, and we permit
226 // mdrun to default to try that.
227 return !gpuIdsToUse.empty();
230 if (numRanksPerSimulation < 1)
232 // Full automated mode for thread-MPI (the default). PME can
233 // run well on a GPU shared with NB, and we permit mdrun to
234 // default to it if there is only one GPU available.
235 return (gpuIdsToUse.size() == 1);
238 // Not enough support for PME on GPUs for anything else
242 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
243 const std::vector<int> &userGpuTaskAssignment,
244 const EmulateGpuNonbonded emulateGpuNonbonded,
245 const bool buildSupportsNonbondedOnGpu,
246 const bool usingVerletScheme,
247 const bool nonbondedOnGpuIsUseful,
248 const bool gpusWereDetected)
250 if (nonbondedTarget == TaskTarget::Cpu)
252 if (!userGpuTaskAssignment.empty())
254 GMX_THROW(InconsistentInputError
255 ("A GPU task assignment was specified, but nonbonded interactions were "
256 "assigned to the CPU. Make no more than one of these choices."));
262 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
264 GMX_THROW(InconsistentInputError
265 ("Nonbonded interactions on the GPU were requested with -nb gpu, "
266 "but the GROMACS binary has been built without GPU support. "
267 "Either run without selecting GPU options, or recompile GROMACS "
268 "with GPU support enabled"));
271 // TODO refactor all these TaskTarget::Gpu checks into one place?
272 // e.g. use a subfunction that handles only the cases where
273 // TaskTargets are not Cpu?
274 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
276 if (nonbondedTarget == TaskTarget::Gpu)
278 GMX_THROW(InconsistentInputError
279 ("Nonbonded interactions on the GPU were required, which is inconsistent "
280 "with choosing emulation. Make no more than one of these choices."));
282 if (!userGpuTaskAssignment.empty())
284 GMX_THROW(InconsistentInputError
285 ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
291 if (!usingVerletScheme)
293 if (nonbondedTarget == TaskTarget::Gpu)
295 GMX_THROW(InconsistentInputError
296 ("Nonbonded interactions on the GPU were required, which requires using "
297 "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
303 if (!nonbondedOnGpuIsUseful)
305 if (nonbondedTarget == TaskTarget::Gpu)
307 GMX_THROW(InconsistentInputError
308 ("Nonbonded interactions on the GPU were required, but not supported for these "
309 "simulation settings. Change your settings, or do not require using GPUs."));
315 if (!userGpuTaskAssignment.empty())
317 // Specifying -gputasks requires specifying everything.
318 if (nonbondedTarget == TaskTarget::Auto)
320 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
326 if (nonbondedTarget == TaskTarget::Gpu)
328 // We still don't know whether it is an error if no GPUs are found
329 // because we don't know the duty of this rank, yet. For example,
330 // a node with only PME ranks and -pme cpu is OK if there are not
335 // If we get here, then the user permitted GPUs, which we should
336 // use for nonbonded interactions.
337 return gpusWereDetected;
340 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
341 const TaskTarget pmeTarget,
342 const std::vector<int> &userGpuTaskAssignment,
343 const gmx_hw_info_t &hardwareInfo,
344 const t_inputrec &inputrec,
345 const gmx_mtop_t &mtop,
346 const int numRanksPerSimulation,
347 const int numPmeRanksPerSimulation,
348 const bool gpusWereDetected)
350 if (pmeTarget == TaskTarget::Cpu)
355 if (!useGpuForNonbonded)
357 if (pmeTarget == TaskTarget::Gpu)
359 GMX_THROW(NotImplementedError
360 ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
366 if (!pme_gpu_supports_build(&message))
368 if (pmeTarget == TaskTarget::Gpu)
370 GMX_THROW(NotImplementedError
371 ("Cannot compute PME interactions on a GPU, because " + message));
375 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
377 if (pmeTarget == TaskTarget::Gpu)
379 GMX_THROW(NotImplementedError
380 ("Cannot compute PME interactions on a GPU, because " + message));
384 if (!pme_gpu_supports_input(inputrec, mtop, &message))
386 if (pmeTarget == TaskTarget::Gpu)
388 GMX_THROW(NotImplementedError
389 ("Cannot compute PME interactions on a GPU, because " + message));
394 if (pmeTarget == TaskTarget::Cpu)
396 if (!userGpuTaskAssignment.empty())
398 GMX_THROW(InconsistentInputError
399 ("A GPU task assignment was specified, but PME interactions were "
400 "assigned to the CPU. Make no more than one of these choices."));
406 if (!userGpuTaskAssignment.empty())
408 // Specifying -gputasks requires specifying everything.
409 if (pmeTarget == TaskTarget::Auto)
411 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
417 // We still don't know whether it is an error if no GPUs are found
418 // because we don't know the duty of this rank, yet. For example,
419 // a node with only PME ranks and -pme cpu is OK if there are not
422 if (pmeTarget == TaskTarget::Gpu)
424 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
425 (numPmeRanksPerSimulation > 1))
427 GMX_THROW(NotImplementedError
428 ("PME tasks were required to run on GPUs, but that is not implemented with "
429 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
430 "or permit PME tasks to be assigned to the CPU."));
435 // If we get here, then the user permitted GPUs.
436 if (numRanksPerSimulation == 1)
438 // PME can run well on a single GPU shared with NB when there
439 // is one rank, so we permit mdrun to try that if we have
441 return gpusWereDetected;
444 // Not enough support for PME on GPUs for anything else
448 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
449 const bool useGpuForPme,
450 const bool usingVerletScheme,
451 const TaskTarget bondedTarget,
452 const bool canUseGpuForBonded,
453 const bool usingLJPme,
454 const bool usingElecPmeOrEwald,
455 const int numPmeRanksPerSimulation,
456 const bool gpusWereDetected)
458 if (bondedTarget == TaskTarget::Cpu)
463 if (!usingVerletScheme)
465 if (bondedTarget == TaskTarget::Gpu)
467 GMX_THROW(InconsistentInputError
468 ("Bonded interactions on the GPU were required, which requires using "
469 "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
475 if (!canUseGpuForBonded)
477 if (bondedTarget == TaskTarget::Gpu)
479 GMX_THROW(InconsistentInputError
480 ("Bonded interactions on the GPU were required, but not supported for these "
481 "simulation settings. Change your settings, or do not require using GPUs."));
487 if (!useGpuForNonbonded)
489 if (bondedTarget == TaskTarget::Gpu)
491 GMX_THROW(InconsistentInputError
492 ("Bonded interactions on the GPU were required, but this requires that "
493 "short-ranged non-bonded interactions are also run on the GPU. Change "
494 "your settings, or do not require using GPUs."));
500 // TODO If the bonded kernels do not get fused, then performance
501 // overheads might suggest alternative choices here.
503 if (bondedTarget == TaskTarget::Gpu)
505 // We still don't know whether it is an error if no GPUs are
510 // If we get here, then the user permitted GPUs, which we should
511 // use for bonded interactions if any were detected and the CPU
512 // is busy, for which we currently only check PME or Ewald.
513 // (It would be better to dynamically assign bondeds based on timings)
514 // Note that here we assume that the auto setting of PME ranks will not
515 // choose seperate PME ranks when nonBonded are assigned to the GPU.
516 bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
518 return gpusWereDetected && usingOurCpuForPmeOrEwald;