2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdlib/update_constrain_cuda.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/mdtypes/inputrec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/mdtypes/mdrunoptions.h"
65 #include "gromacs/taskassignment/taskassignment.h"
66 #include "gromacs/topology/mtop_util.h"
67 #include "gromacs/topology/topology.h"
68 #include "gromacs/utility/baseversion.h"
69 #include "gromacs/utility/exceptions.h"
70 #include "gromacs/utility/fatalerror.h"
71 #include "gromacs/utility/gmxassert.h"
72 #include "gromacs/utility/logger.h"
73 #include "gromacs/utility/stringutil.h"
82 //! Helper variable to localise the text of an often repeated message.
83 const char* g_specifyEverythingFormatString =
84 "When you use mdrun -gputasks, %s must be set to non-default "
85 "values, so that the device IDs can be interpreted correctly."
86 #if GMX_GPU != GMX_GPU_NONE
87 " If you simply want to restrict which GPUs are used, then it is "
88 "better to use mdrun -gpu_id. Otherwise, setting the "
89 # if GMX_GPU == GMX_GPU_CUDA
90 "CUDA_VISIBLE_DEVICES"
91 # elif GMX_GPU == GMX_GPU_OPENCL
92 // Technically there is no portable way to do this offered by the
93 // OpenCL standard, but the only current relevant case for GROMACS
94 // is AMD OpenCL, which offers this variable.
97 # error "Unreachable branch"
99 " environment variable in your bash profile or job "
100 "script may be more convenient."
106 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
107 const std::vector<int>& gpuIdsToUse,
108 const std::vector<int>& userGpuTaskAssignment,
109 const EmulateGpuNonbonded emulateGpuNonbonded,
110 const bool buildSupportsNonbondedOnGpu,
111 const bool nonbondedOnGpuIsUseful,
112 const int numRanksPerSimulation)
114 // First, exclude all cases where we can't run NB on GPUs.
115 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
116 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
118 // If the user required NB on GPUs, we issue an error later.
122 // We now know that NB on GPUs makes sense, if we have any.
124 if (!userGpuTaskAssignment.empty())
126 // Specifying -gputasks requires specifying everything.
127 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
129 GMX_THROW(InconsistentInputError(
130 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
135 if (nonbondedTarget == TaskTarget::Gpu)
140 // Because this is thread-MPI, we already know about the GPUs that
141 // all potential ranks can use, and can use that in a global
142 // decision that will later be consistent.
143 auto haveGpus = !gpuIdsToUse.empty();
145 // If we get here, then the user permitted or required GPUs.
149 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
150 const TaskTarget pmeTarget,
151 const std::vector<int>& gpuIdsToUse,
152 const std::vector<int>& userGpuTaskAssignment,
153 const gmx_hw_info_t& hardwareInfo,
154 const t_inputrec& inputrec,
155 const gmx_mtop_t& mtop,
156 const int numRanksPerSimulation,
157 const int numPmeRanksPerSimulation)
159 // First, exclude all cases where we can't run PME on GPUs.
160 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
161 || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
162 || !pme_gpu_supports_input(inputrec, mtop, nullptr))
164 // PME can't run on a GPU. If the user required that, we issue
169 // We now know that PME on GPUs might make sense, if we have any.
171 if (!userGpuTaskAssignment.empty())
173 // Follow the user's choice of GPU task assignment, if we
174 // can. Checking that their IDs are for compatible GPUs comes
177 // Specifying -gputasks requires specifying everything.
178 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
180 GMX_THROW(InconsistentInputError(
181 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
184 // PME on GPUs is only supported in a single case
185 if (pmeTarget == TaskTarget::Gpu)
187 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
188 || (numPmeRanksPerSimulation > 1))
190 GMX_THROW(InconsistentInputError(
191 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
192 "file and use a single PME rank."));
197 // pmeTarget == TaskTarget::Auto
198 return numRanksPerSimulation == 1;
201 // Because this is thread-MPI, we already know about the GPUs that
202 // all potential ranks can use, and can use that in a global
203 // decision that will later be consistent.
205 if (pmeTarget == TaskTarget::Gpu)
207 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
208 || (numPmeRanksPerSimulation > 1))
210 GMX_THROW(NotImplementedError(
211 "PME tasks were required to run on GPUs, but that is not implemented with "
212 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
213 "or permit PME tasks to be assigned to the CPU."));
218 if (numRanksPerSimulation == 1)
220 // PME can run well on a GPU shared with NB, and we permit
221 // mdrun to default to try that.
222 return !gpuIdsToUse.empty();
225 if (numRanksPerSimulation < 1)
227 // Full automated mode for thread-MPI (the default). PME can
228 // run well on a GPU shared with NB, and we permit mdrun to
229 // default to it if there is only one GPU available.
230 return (gpuIdsToUse.size() == 1);
233 // Not enough support for PME on GPUs for anything else
237 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
238 const std::vector<int>& userGpuTaskAssignment,
239 const EmulateGpuNonbonded emulateGpuNonbonded,
240 const bool buildSupportsNonbondedOnGpu,
241 const bool nonbondedOnGpuIsUseful,
242 const bool gpusWereDetected)
244 if (nonbondedTarget == TaskTarget::Cpu)
246 if (!userGpuTaskAssignment.empty())
248 GMX_THROW(InconsistentInputError(
249 "A GPU task assignment was specified, but nonbonded interactions were "
250 "assigned to the CPU. Make no more than one of these choices."));
256 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
258 GMX_THROW(InconsistentInputError(
259 "Nonbonded interactions on the GPU were requested with -nb gpu, "
260 "but the GROMACS binary has been built without GPU support. "
261 "Either run without selecting GPU options, or recompile GROMACS "
262 "with GPU support enabled"));
265 // TODO refactor all these TaskTarget::Gpu checks into one place?
266 // e.g. use a subfunction that handles only the cases where
267 // TaskTargets are not Cpu?
268 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
270 if (nonbondedTarget == TaskTarget::Gpu)
272 GMX_THROW(InconsistentInputError(
273 "Nonbonded interactions on the GPU were required, which is inconsistent "
274 "with choosing emulation. Make no more than one of these choices."));
276 if (!userGpuTaskAssignment.empty())
279 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
280 "no more than one of these choices."));
286 if (!nonbondedOnGpuIsUseful)
288 if (nonbondedTarget == TaskTarget::Gpu)
290 GMX_THROW(InconsistentInputError(
291 "Nonbonded interactions on the GPU were required, but not supported for these "
292 "simulation settings. Change your settings, or do not require using GPUs."));
298 if (!userGpuTaskAssignment.empty())
300 // Specifying -gputasks requires specifying everything.
301 if (nonbondedTarget == TaskTarget::Auto)
303 GMX_THROW(InconsistentInputError(
304 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
310 if (nonbondedTarget == TaskTarget::Gpu)
312 // We still don't know whether it is an error if no GPUs are found
313 // because we don't know the duty of this rank, yet. For example,
314 // a node with only PME ranks and -pme cpu is OK if there are not
319 // If we get here, then the user permitted GPUs, which we should
320 // use for nonbonded interactions.
321 return gpusWereDetected;
324 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
325 const TaskTarget pmeTarget,
326 const std::vector<int>& userGpuTaskAssignment,
327 const gmx_hw_info_t& hardwareInfo,
328 const t_inputrec& inputrec,
329 const gmx_mtop_t& mtop,
330 const int numRanksPerSimulation,
331 const int numPmeRanksPerSimulation,
332 const bool gpusWereDetected)
334 if (pmeTarget == TaskTarget::Cpu)
339 if (!useGpuForNonbonded)
341 if (pmeTarget == TaskTarget::Gpu)
343 GMX_THROW(NotImplementedError(
344 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
350 if (!pme_gpu_supports_build(&message))
352 if (pmeTarget == TaskTarget::Gpu)
354 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
358 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
360 if (pmeTarget == TaskTarget::Gpu)
362 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
366 if (!pme_gpu_supports_input(inputrec, mtop, &message))
368 if (pmeTarget == TaskTarget::Gpu)
370 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
375 if (pmeTarget == TaskTarget::Cpu)
377 if (!userGpuTaskAssignment.empty())
379 GMX_THROW(InconsistentInputError(
380 "A GPU task assignment was specified, but PME interactions were "
381 "assigned to the CPU. Make no more than one of these choices."));
387 if (!userGpuTaskAssignment.empty())
389 // Specifying -gputasks requires specifying everything.
390 if (pmeTarget == TaskTarget::Auto)
392 GMX_THROW(InconsistentInputError(formatString(
393 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
399 // We still don't know whether it is an error if no GPUs are found
400 // because we don't know the duty of this rank, yet. For example,
401 // a node with only PME ranks and -pme cpu is OK if there are not
404 if (pmeTarget == TaskTarget::Gpu)
406 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
407 || (numPmeRanksPerSimulation > 1))
409 GMX_THROW(NotImplementedError(
410 "PME tasks were required to run on GPUs, but that is not implemented with "
411 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
412 "or permit PME tasks to be assigned to the CPU."));
417 // If we get here, then the user permitted GPUs.
418 if (numRanksPerSimulation == 1)
420 // PME can run well on a single GPU shared with NB when there
421 // is one rank, so we permit mdrun to try that if we have
423 return gpusWereDetected;
426 // Not enough support for PME on GPUs for anything else
430 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
431 const bool useGpuForPme,
432 const TaskTarget bondedTarget,
433 const bool canUseGpuForBonded,
434 const bool usingLJPme,
435 const bool usingElecPmeOrEwald,
436 const int numPmeRanksPerSimulation,
437 const bool gpusWereDetected)
439 if (bondedTarget == TaskTarget::Cpu)
444 if (!canUseGpuForBonded)
446 if (bondedTarget == TaskTarget::Gpu)
448 GMX_THROW(InconsistentInputError(
449 "Bonded interactions on the GPU were required, but not supported for these "
450 "simulation settings. Change your settings, or do not require using GPUs."));
456 if (!useGpuForNonbonded)
458 if (bondedTarget == TaskTarget::Gpu)
460 GMX_THROW(InconsistentInputError(
461 "Bonded interactions on the GPU were required, but this requires that "
462 "short-ranged non-bonded interactions are also run on the GPU. Change "
463 "your settings, or do not require using GPUs."));
469 // TODO If the bonded kernels do not get fused, then performance
470 // overheads might suggest alternative choices here.
472 if (bondedTarget == TaskTarget::Gpu)
474 // We still don't know whether it is an error if no GPUs are
479 // If we get here, then the user permitted GPUs, which we should
480 // use for bonded interactions if any were detected and the CPU
481 // is busy, for which we currently only check PME or Ewald.
482 // (It would be better to dynamically assign bondeds based on timings)
483 // Note that here we assume that the auto setting of PME ranks will not
484 // choose seperate PME ranks when nonBonded are assigned to the GPU.
485 bool usingOurCpuForPmeOrEwald =
486 (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
488 return gpusWereDetected && usingOurCpuForPmeOrEwald;
491 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
492 const bool useGpuForPme,
493 const bool useGpuForNonbonded,
494 const bool gpuPmePpCommIsEnabled,
495 const bool gpuHaloExchangeIsEnabled,
496 const TaskTarget updateTarget,
497 const bool gpusWereDetected,
498 const t_inputrec& inputrec,
499 const gmx_mtop_t& mtop,
500 const bool useEssentialDynamics,
501 const bool doOrientationRestraints,
502 const bool useReplicaExchange)
505 if (updateTarget == TaskTarget::Cpu)
510 std::string errorMessage;
512 if (isDomainDecomposition && (!gpuPmePpCommIsEnabled || !gpuHaloExchangeIsEnabled))
515 "Domain decomposition is not supported without GPU halo exchange and GPU PME-PP "
518 // Using the GPU-version of update if:
519 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
520 // 2. Non-bonded interactions are on the GPU.
521 if (!(useGpuForPme || useGpuForNonbonded))
524 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
526 if (!gpusWereDetected)
528 errorMessage += "Compatible GPUs must have been found.\n";
530 if (GMX_GPU != GMX_GPU_CUDA)
532 errorMessage += "Only a CUDA build is supported.\n";
534 if (inputrec.eI != eiMD)
536 errorMessage += "Only the md integrator is supported.\n";
538 if (inputrec.etc == etcNOSEHOOVER)
540 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
542 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN || inputrec.epc == epcBERENDSEN))
544 errorMessage += "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
546 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
548 // The graph is needed, but not supported
549 errorMessage += "Ewald surface correction is not supported.\n";
551 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
553 errorMessage += "Virtual sites are not supported.\n";
555 if (useEssentialDynamics)
557 errorMessage += "Essential dynamics is not supported.\n";
559 if (inputrec.bPull || inputrec.pull)
561 // Pull potentials are actually supported, but constraint pulling is not
562 errorMessage += "Pulling is not supported.\n";
564 if (doOrientationRestraints)
566 // The graph is needed, but not supported
567 errorMessage += "Orientation restraints are not supported.\n";
569 if (inputrec.efep != efepNO)
571 // Actually all free-energy options except for mass and constraint perturbation are supported
572 errorMessage += "Free energy perturbations are not supported.\n";
574 if (useReplicaExchange)
576 errorMessage += "Replica exchange simulations are not supported.\n";
578 if (inputrec.eSwapCoords != eswapNO)
580 errorMessage += "Swapping the coordinates is not supported.\n";
583 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
584 // does not support it, the actual CUDA LINCS code does support it
585 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
587 errorMessage += "Non-connecting constraints are not supported";
589 if (!UpdateConstrainCuda::isNumCoupledConstraintsSupported(mtop))
592 "The number of coupled constraints is higher than supported in the CUDA LINCS "
596 if (!errorMessage.empty())
598 if (updateTarget == TaskTarget::Gpu)
600 std::string prefix = gmx::formatString(
601 "Update task on the GPU was required,\n"
602 "but the following condition(s) were not satisfied:\n");
603 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));