2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/hardware/cpuinfo.h"
55 #include "gromacs/hardware/detecthardware.h"
56 #include "gromacs/hardware/hardwaretopology.h"
57 #include "gromacs/hardware/hw_info.h"
58 #include "gromacs/mdlib/gmx_omp_nthreads.h"
59 #include "gromacs/mdlib/nb_verlet.h"
60 #include "gromacs/mdtypes/commrec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/md_enums.h"
63 #include "gromacs/taskassignment/taskassignment.h"
64 #include "gromacs/topology/topology.h"
65 #include "gromacs/utility/baseversion.h"
66 #include "gromacs/utility/exceptions.h"
67 #include "gromacs/utility/fatalerror.h"
68 #include "gromacs/utility/gmxassert.h"
69 #include "gromacs/utility/logger.h"
70 #include "gromacs/utility/stringutil.h"
79 //! Helper variable to localise the text of an often repeated message.
80 const char * g_specifyEverythingFormatString =
81 "When you use mdrun -gputasks, %s must be set to non-default "
82 "values, so that the device IDs can be interpreted correctly."
83 #if GMX_GPU != GMX_GPU_NONE
84 " If you simply want to restrict which GPUs are used, then it is "
85 "better to use mdrun -gpu_id. Otherwise, setting the "
86 # if GMX_GPU == GMX_GPU_CUDA
87 "CUDA_VISIBLE_DEVICES"
88 # elif GMX_GPU == GMX_GPU_OPENCL
89 // Technically there is no portable way to do this offered by the
90 // OpenCL standard, but the only current relevant case for GROMACS
91 // is AMD OpenCL, which offers this variable.
94 # error "Unreachable branch"
96 " environment variable in your bash profile or job "
97 "script may be more convenient."
104 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
105 const std::vector<int> &gpuIdsToUse,
106 const std::vector<int> &userGpuTaskAssignment,
107 const EmulateGpuNonbonded emulateGpuNonbonded,
108 const bool usingVerletScheme,
109 const bool nonbondedOnGpuIsUseful,
110 const int numRanksPerSimulation)
112 // First, exclude all cases where we can't run NB on GPUs.
113 if (nonbondedTarget == TaskTarget::Cpu ||
114 emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
115 !usingVerletScheme ||
116 !nonbondedOnGpuIsUseful)
118 // If the user required NB on GPUs, we issue an error later.
122 // We now know that NB on GPUs makes sense, if we have any.
124 if (!userGpuTaskAssignment.empty())
126 // Specifying -gputasks requires specifying everything.
127 if (nonbondedTarget == TaskTarget::Auto ||
128 numRanksPerSimulation < 1)
130 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
135 if (nonbondedTarget == TaskTarget::Gpu)
140 // Because this is thread-MPI, we already know about the GPUs that
141 // all potential ranks can use, and can use that in a global
142 // decision that will later be consistent.
143 auto haveGpus = !gpuIdsToUse.empty();
145 // If we get here, then the user permitted or required GPUs.
150 decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
151 const TaskTarget pmeTarget,
152 const std::vector<int> &gpuIdsToUse,
153 const std::vector<int> &userGpuTaskAssignment,
154 const bool canUseGpuForPme,
155 const int numRanksPerSimulation,
156 const int numPmeRanksPerSimulation)
158 // First, exclude all cases where we can't run PME on GPUs.
159 if ((pmeTarget == TaskTarget::Cpu) ||
160 !useGpuForNonbonded ||
163 // PME can't run on a GPU. If the user required that, we issue
168 // We now know that PME on GPUs might make sense, if we have any.
170 if (!userGpuTaskAssignment.empty())
172 // Follow the user's choice of GPU task assignment, if we
173 // can. Checking that their IDs are for compatible GPUs comes
176 // Specifying -gputasks requires specifying everything.
177 if (pmeTarget == TaskTarget::Auto ||
178 numRanksPerSimulation < 1)
180 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
183 // PME on GPUs is only supported in a single case
184 if (pmeTarget == TaskTarget::Gpu)
186 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
187 (numPmeRanksPerSimulation > 1))
189 GMX_THROW(InconsistentInputError
190 ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
195 // pmeTarget == TaskTarget::Auto
196 return numRanksPerSimulation == 1;
199 // Because this is thread-MPI, we already know about the GPUs that
200 // all potential ranks can use, and can use that in a global
201 // decision that will later be consistent.
203 if (pmeTarget == TaskTarget::Gpu)
205 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
206 (numPmeRanksPerSimulation > 1))
208 GMX_THROW(NotImplementedError
209 ("PME tasks were required to run on GPUs, but that is not implemented with "
210 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
211 "or permit PME tasks to be assigned to the CPU."));
216 if (numRanksPerSimulation == 1)
218 // PME can run well on a GPU shared with NB, and we permit
219 // mdrun to default to try that.
220 return !gpuIdsToUse.empty();
223 if (numRanksPerSimulation < 1)
225 // Full automated mode for thread-MPI (the default). PME can
226 // run well on a GPU shared with NB, and we permit mdrun to
227 // default to it if there is only one GPU available.
228 return (gpuIdsToUse.size() == 1);
231 // Not enough support for PME on GPUs for anything else
235 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
236 const std::vector<int> &userGpuTaskAssignment,
237 const EmulateGpuNonbonded emulateGpuNonbonded,
238 const bool usingVerletScheme,
239 const bool nonbondedOnGpuIsUseful,
240 const bool gpusWereDetected)
242 if (nonbondedTarget == TaskTarget::Cpu)
244 if (!userGpuTaskAssignment.empty())
246 GMX_THROW(InconsistentInputError
247 ("A GPU task assignment was specified, but nonbonded interactions were "
248 "assigned to the CPU. Make no more than one of these choices."));
254 // TODO refactor all these TaskTarget::Gpu checks into one place?
255 // e.g. use a subfunction that handles only the cases where
256 // TaskTargets are not Cpu?
257 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
259 if (nonbondedTarget == TaskTarget::Gpu)
261 GMX_THROW(InconsistentInputError
262 ("Nonbonded interactions on the GPU were required, which is inconsistent "
263 "with choosing emulation. Make no more than one of these choices."));
265 if (!userGpuTaskAssignment.empty())
267 GMX_THROW(InconsistentInputError
268 ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
274 if (!usingVerletScheme)
276 if (nonbondedTarget == TaskTarget::Gpu)
278 GMX_THROW(InconsistentInputError
279 ("Nonbonded interactions on the GPU were required, which requires using "
280 "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
286 if (!nonbondedOnGpuIsUseful)
288 if (nonbondedTarget == TaskTarget::Gpu)
290 GMX_THROW(InconsistentInputError
291 ("Nonbonded interactions on the GPU were required, but not supported for these "
292 "simulation settings. Change your settings, or do not require using GPUs."));
298 if (!userGpuTaskAssignment.empty())
300 // Specifying -gputasks requires specifying everything.
301 if (nonbondedTarget == TaskTarget::Auto)
303 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
309 if (nonbondedTarget == TaskTarget::Gpu)
311 // We still don't know whether it is an error if no GPUs are found
312 // because we don't know the duty of this rank, yet. For example,
313 // a node with only PME ranks and -pme cpu is OK if there are not
318 // If we get here, then the user permitted GPUs, which we should
319 // use for nonbonded interactions.
320 return gpusWereDetected;
323 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
324 const TaskTarget pmeTarget,
325 const std::vector<int> &userGpuTaskAssignment,
326 const bool canUseGpuForPme,
327 const int numRanksPerSimulation,
328 const int numPmeRanksPerSimulation,
329 const bool gpusWereDetected)
331 if (pmeTarget == TaskTarget::Cpu)
336 if (!useGpuForNonbonded)
338 if (pmeTarget == TaskTarget::Gpu)
340 GMX_THROW(NotImplementedError
341 ("The PME on the GPU is only supported when nonbonded interactions run on GPUs also."));
346 if (!canUseGpuForPme)
348 if (pmeTarget == TaskTarget::Gpu)
350 // TODO Pass in the inputrec so we can give more help here?
351 GMX_THROW(NotImplementedError
352 ("The input simulation did not use PME in a way that is supported on the GPU."));
357 if (pmeTarget == TaskTarget::Cpu)
359 if (!userGpuTaskAssignment.empty())
361 GMX_THROW(InconsistentInputError
362 ("A GPU task assignment was specified, but PME interactions were "
363 "assigned to the CPU. Make no more than one of these choices."));
369 if (!userGpuTaskAssignment.empty())
371 // Specifying -gputasks requires specifying everything.
372 if (pmeTarget == TaskTarget::Auto)
374 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
380 // We still don't know whether it is an error if no GPUs are found
381 // because we don't know the duty of this rank, yet. For example,
382 // a node with only PME ranks and -pme cpu is OK if there are not
385 if (pmeTarget == TaskTarget::Gpu)
387 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
388 (numPmeRanksPerSimulation > 1))
390 GMX_THROW(NotImplementedError
391 ("PME tasks were required to run on GPUs, but that is not implemented with "
392 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
393 "or permit PME tasks to be assigned to the CPU."));
398 // If we get here, then the user permitted GPUs.
399 if (numRanksPerSimulation == 1)
401 // PME can run well on a single GPU shared with NB when there
402 // is one rank, so we permit mdrun to try that if we have
404 return gpusWereDetected;
407 // Not enough support for PME on GPUs for anything else
411 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
412 const bool useGpuForPme,
413 const bool usingVerletScheme,
414 const TaskTarget bondedTarget,
415 const bool canUseGpuForBonded,
416 const bool usingLJPme,
417 const bool usingElecPmeOrEwald,
418 const int numPmeRanksPerSimulation,
419 const bool gpusWereDetected)
421 if (bondedTarget == TaskTarget::Cpu)
426 if (!usingVerletScheme)
428 if (bondedTarget == TaskTarget::Gpu)
430 GMX_THROW(InconsistentInputError
431 ("Bonded interactions on the GPU were required, which requires using "
432 "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
438 if (!canUseGpuForBonded)
440 if (bondedTarget == TaskTarget::Gpu)
442 GMX_THROW(InconsistentInputError
443 ("Bonded interactions on the GPU were required, but not supported for these "
444 "simulation settings. Change your settings, or do not require using GPUs."));
450 if (!useGpuForNonbonded)
452 if (bondedTarget == TaskTarget::Gpu)
454 GMX_THROW(InconsistentInputError
455 ("Bonded interactions on the GPU were required, but this requires that "
456 "short-ranged non-bonded interactions are also run on the GPU. Change "
457 "your settings, or do not require using GPUs."));
463 // TODO If the bonded kernels do not get fused, then performance
464 // overheads might suggest alternative choices here.
466 if (bondedTarget == TaskTarget::Gpu)
468 // We still don't know whether it is an error if no GPUs are
473 // If we get here, then the user permitted GPUs, which we should
474 // use for bonded interactions if any were detected and the CPU
475 // is busy, for which we currently only check PME or Ewald.
476 // (It would be better to dynamically assign bondeds based on timings)
477 // Note that here we assume that the auto setting of PME ranks will not
478 // choose seperate PME ranks when nonBonded are assigned to the GPU.
479 bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
481 return gpusWereDetected && usingOurCpuForPmeOrEwald;