2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/listed_forces/gpubonded.h"
61 #include "gromacs/mdlib/gmx_omp_nthreads.h"
62 #include "gromacs/mdlib/update_constrain_gpu.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/mdrunoptions.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/taskassignment/taskassignment.h"
69 #include "gromacs/topology/mtop_util.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/utility/baseversion.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/gmxassert.h"
75 #include "gromacs/utility/logger.h"
76 #include "gromacs/utility/stringutil.h"
85 //! Helper variable to localise the text of an often repeated message.
86 const char* g_specifyEverythingFormatString =
87 "When you use mdrun -gputasks, %s must be set to non-default "
88 "values, so that the device IDs can be interpreted correctly."
90 " If you simply want to restrict which GPUs are used, then it is "
91 "better to use mdrun -gpu_id. Otherwise, setting the "
93 "CUDA_VISIBLE_DEVICES"
95 // Technically there is no portable way to do this offered by the
96 // OpenCL standard, but the only current relevant case for GROMACS
97 // is AMD OpenCL, which offers this variable.
100 // As with OpenCL, there are no portable way to do it.
101 // Intel reference: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
102 // While SYCL_DEVICE_FILTER is a better option, as of 2021.1-beta10 it is not yet supported.
103 "SYCL_DEVICE_ALLOWLIST"
105 # error "Unreachable branch"
107 " environment variable in your bash profile or job "
108 "script may be more convenient."
114 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
115 const int numDevicesToUse,
116 const std::vector<int>& userGpuTaskAssignment,
117 const EmulateGpuNonbonded emulateGpuNonbonded,
118 const bool buildSupportsNonbondedOnGpu,
119 const bool nonbondedOnGpuIsUseful,
120 const int numRanksPerSimulation)
122 // First, exclude all cases where we can't run NB on GPUs.
123 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
124 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
126 // If the user required NB on GPUs, we issue an error later.
130 // We now know that NB on GPUs makes sense, if we have any.
132 if (!userGpuTaskAssignment.empty())
134 // Specifying -gputasks requires specifying everything.
135 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
137 GMX_THROW(InconsistentInputError(
138 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
143 if (nonbondedTarget == TaskTarget::Gpu)
148 // Because this is thread-MPI, we already know about the GPUs that
149 // all potential ranks can use, and can use that in a global
150 // decision that will later be consistent.
151 // If we get here, then the user permitted or required GPUs.
152 return (numDevicesToUse > 0);
155 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
156 const TaskTarget pmeTarget,
157 const TaskTarget pmeFftTarget,
158 const int numDevicesToUse,
159 const std::vector<int>& userGpuTaskAssignment,
160 const gmx_hw_info_t& hardwareInfo,
161 const t_inputrec& inputrec,
162 const int numRanksPerSimulation,
163 const int numPmeRanksPerSimulation)
165 // First, exclude all cases where we can't run PME on GPUs.
166 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
167 || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
169 // PME can't run on a GPU. If the user required that, we issue an error later.
173 if (pmeFftTarget == TaskTarget::Cpu && !pme_gpu_mixed_mode_supports_input(inputrec, nullptr))
175 /* User requested PME FFT on CPU, but the current system is not compatible with Mixed mode,
176 * so we don't use GPUs at all.
177 * If the user had -pme gpu, we issue an error later. */
181 // We now know that PME on GPUs might make sense, if we have any.
183 if (!userGpuTaskAssignment.empty())
185 // Follow the user's choice of GPU task assignment, if we
186 // can. Checking that their IDs are for compatible GPUs comes
189 // Specifying -gputasks requires specifying everything.
190 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
192 GMX_THROW(InconsistentInputError(
193 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
196 // PME on GPUs is only supported in a single case
197 if (pmeTarget == TaskTarget::Gpu)
199 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
200 || (numPmeRanksPerSimulation > 1))
202 GMX_THROW(InconsistentInputError(
203 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
204 "file and use a single PME rank."));
209 // pmeTarget == TaskTarget::Auto
210 return numRanksPerSimulation == 1;
213 // Because this is thread-MPI, we already know about the GPUs that
214 // all potential ranks can use, and can use that in a global
215 // decision that will later be consistent.
217 if (pmeTarget == TaskTarget::Gpu)
219 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
220 || (numPmeRanksPerSimulation > 1))
222 GMX_THROW(NotImplementedError(
223 "PME tasks were required to run on GPUs, but that is not implemented with "
224 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
225 "or permit PME tasks to be assigned to the CPU."));
230 if (numRanksPerSimulation == 1)
232 // PME can run well on a GPU shared with NB, and we permit
233 // mdrun to default to try that.
234 return numDevicesToUse > 0;
237 if (numRanksPerSimulation < 1)
239 // Full automated mode for thread-MPI (the default). PME can
240 // run well on a GPU shared with NB, and we permit mdrun to
241 // default to it if there is only one GPU available.
242 return (numDevicesToUse == 1);
245 // Not enough support for PME on GPUs for anything else
249 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
250 const std::vector<int>& userGpuTaskAssignment,
251 const EmulateGpuNonbonded emulateGpuNonbonded,
252 const bool buildSupportsNonbondedOnGpu,
253 const bool nonbondedOnGpuIsUseful,
254 const bool gpusWereDetected)
256 if (nonbondedTarget == TaskTarget::Cpu)
258 if (!userGpuTaskAssignment.empty())
260 GMX_THROW(InconsistentInputError(
261 "A GPU task assignment was specified, but nonbonded interactions were "
262 "assigned to the CPU. Make no more than one of these choices."));
268 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
270 GMX_THROW(InconsistentInputError(
271 "Nonbonded interactions on the GPU were requested with -nb gpu, "
272 "but the GROMACS binary has been built without GPU support. "
273 "Either run without selecting GPU options, or recompile GROMACS "
274 "with GPU support enabled"));
277 // TODO refactor all these TaskTarget::Gpu checks into one place?
278 // e.g. use a subfunction that handles only the cases where
279 // TaskTargets are not Cpu?
280 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
282 if (nonbondedTarget == TaskTarget::Gpu)
284 GMX_THROW(InconsistentInputError(
285 "Nonbonded interactions on the GPU were required, which is inconsistent "
286 "with choosing emulation. Make no more than one of these choices."));
288 if (!userGpuTaskAssignment.empty())
291 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
292 "no more than one of these choices."));
298 if (!nonbondedOnGpuIsUseful)
300 if (nonbondedTarget == TaskTarget::Gpu)
302 GMX_THROW(InconsistentInputError(
303 "Nonbonded interactions on the GPU were required, but not supported for these "
304 "simulation settings. Change your settings, or do not require using GPUs."));
310 if (!userGpuTaskAssignment.empty())
312 // Specifying -gputasks requires specifying everything.
313 if (nonbondedTarget == TaskTarget::Auto)
315 GMX_THROW(InconsistentInputError(
316 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
322 if (nonbondedTarget == TaskTarget::Gpu)
324 // We still don't know whether it is an error if no GPUs are found
325 // because we don't know the duty of this rank, yet. For example,
326 // a node with only PME ranks and -pme cpu is OK if there are not
331 // If we get here, then the user permitted GPUs, which we should
332 // use for nonbonded interactions.
333 return buildSupportsNonbondedOnGpu && gpusWereDetected;
336 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
337 const TaskTarget pmeTarget,
338 const TaskTarget pmeFftTarget,
339 const std::vector<int>& userGpuTaskAssignment,
340 const gmx_hw_info_t& hardwareInfo,
341 const t_inputrec& inputrec,
342 const int numRanksPerSimulation,
343 const int numPmeRanksPerSimulation,
344 const bool gpusWereDetected)
346 if (pmeTarget == TaskTarget::Cpu)
351 if (!useGpuForNonbonded)
353 if (pmeTarget == TaskTarget::Gpu)
355 GMX_THROW(NotImplementedError(
356 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
362 if (!pme_gpu_supports_build(&message))
364 if (pmeTarget == TaskTarget::Gpu)
366 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
370 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
372 if (pmeTarget == TaskTarget::Gpu)
374 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
378 if (!pme_gpu_supports_input(inputrec, &message))
380 if (pmeTarget == TaskTarget::Gpu)
382 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
386 if (pmeFftTarget == TaskTarget::Cpu && !pme_gpu_mixed_mode_supports_input(inputrec, &message))
388 /* User requested PME FFT on CPU, but the current system is not compatible with Mixed mode,
389 * so we don't use GPUs at all. */
390 if (pmeTarget == TaskTarget::Gpu)
392 GMX_THROW(NotImplementedError("Cannot compute PME interactions in Mixed mode, because " + message));
397 if (pmeTarget == TaskTarget::Cpu)
399 if (!userGpuTaskAssignment.empty())
401 GMX_THROW(InconsistentInputError(
402 "A GPU task assignment was specified, but PME interactions were "
403 "assigned to the CPU. Make no more than one of these choices."));
409 if (!userGpuTaskAssignment.empty())
411 // Specifying -gputasks requires specifying everything.
412 if (pmeTarget == TaskTarget::Auto)
414 GMX_THROW(InconsistentInputError(formatString(
415 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
421 // We still don't know whether it is an error if no GPUs are found
422 // because we don't know the duty of this rank, yet. For example,
423 // a node with only PME ranks and -pme cpu is OK if there are not
426 if (pmeTarget == TaskTarget::Gpu)
428 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
429 || (numPmeRanksPerSimulation > 1))
431 GMX_THROW(NotImplementedError(
432 "PME tasks were required to run on GPUs, but that is not implemented with "
433 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
434 "or permit PME tasks to be assigned to the CPU."));
439 // If we get here, then the user permitted GPUs.
440 if (numRanksPerSimulation == 1)
442 // PME can run well on a single GPU shared with NB when there
443 // is one rank, so we permit mdrun to try that if we have
445 return gpusWereDetected;
448 // Not enough support for PME on GPUs for anything else
453 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
455 if (!EEL_PME(inputrec.coulombtype))
457 return PmeRunMode::None;
462 if (pmeFftTarget == TaskTarget::Cpu)
464 return PmeRunMode::Mixed;
468 return PmeRunMode::GPU;
473 if (pmeFftTarget == TaskTarget::Gpu)
476 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
477 "on CPU you should not be using -pmefft.");
479 return PmeRunMode::CPU;
483 bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
485 TaskTarget bondedTarget,
486 const t_inputrec& inputrec,
487 const gmx_mtop_t& mtop,
488 int numPmeRanksPerSimulation,
489 bool gpusWereDetected)
491 if (bondedTarget == TaskTarget::Cpu)
496 std::string errorMessage;
498 if (!buildSupportsGpuBondeds(&errorMessage))
500 if (bondedTarget == TaskTarget::Gpu)
502 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
508 if (!inputSupportsGpuBondeds(inputrec, mtop, &errorMessage))
510 if (bondedTarget == TaskTarget::Gpu)
512 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
518 if (!useGpuForNonbonded)
520 if (bondedTarget == TaskTarget::Gpu)
522 GMX_THROW(InconsistentInputError(
523 "Bonded interactions on the GPU were required, but this requires that "
524 "short-ranged non-bonded interactions are also run on the GPU. Change "
525 "your settings, or do not require using GPUs."));
531 // TODO If the bonded kernels do not get fused, then performance
532 // overheads might suggest alternative choices here.
534 if (bondedTarget == TaskTarget::Gpu)
536 // We still don't know whether it is an error if no GPUs are
541 // If we get here, then the user permitted GPUs, which we should
542 // use for bonded interactions if any were detected and the CPU
543 // is busy, for which we currently only check PME or Ewald.
544 // (It would be better to dynamically assign bondeds based on timings)
545 // Note that here we assume that the auto setting of PME ranks will not
546 // choose seperate PME ranks when nonBonded are assigned to the GPU.
547 bool usingOurCpuForPmeOrEwald =
548 (EVDW_PME(inputrec.vdwtype)
549 || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
551 return gpusWereDetected && usingOurCpuForPmeOrEwald;
554 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
555 const bool useUpdateGroups,
556 const PmeRunMode pmeRunMode,
557 const bool havePmeOnlyRank,
558 const bool useGpuForNonbonded,
559 const TaskTarget updateTarget,
560 const bool gpusWereDetected,
561 const t_inputrec& inputrec,
562 const gmx_mtop_t& mtop,
563 const bool useEssentialDynamics,
564 const bool doOrientationRestraints,
565 const bool useReplicaExchange,
566 const bool haveFrozenAtoms,
568 const DevelopmentFeatureFlags& devFlags,
569 const gmx::MDLogger& mdlog)
572 // '-update cpu' overrides the environment variable, '-update auto' does not
573 if (updateTarget == TaskTarget::Cpu
574 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
579 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
580 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
582 std::string errorMessage;
584 if (isDomainDecomposition)
586 if (hasAnyConstraints && !useUpdateGroups)
589 "Domain decomposition is only supported with constraints when update "
591 "are used. This means constraining all bonds is not supported, except for "
592 "small molecules, and box sizes close to half the pair-list cutoff are not "
601 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
607 errorMessage += "Multiple time stepping is not supported.\n";
610 if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
612 errorMessage += "SHAKE constraints are not supported.\n";
614 // Using the GPU-version of update if:
615 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
616 // 2. Non-bonded interactions are on the GPU.
617 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
620 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
623 if (!gpusWereDetected)
625 errorMessage += "Compatible GPUs must have been found.\n";
629 errorMessage += "Only a CUDA build is supported.\n";
631 if (inputrec.eI != eiMD)
633 errorMessage += "Only the md integrator is supported.\n";
635 if (inputrec.etc == etcNOSEHOOVER)
637 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
639 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN
640 || inputrec.epc == epcBERENDSEN || inputrec.epc == epcCRESCALE))
643 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
646 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
648 // The graph is needed, but not supported
649 errorMessage += "Ewald surface correction is not supported.\n";
651 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
653 errorMessage += "Virtual sites are not supported.\n";
655 if (useEssentialDynamics)
657 errorMessage += "Essential dynamics is not supported.\n";
659 if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
661 errorMessage += "Constraints pulling is not supported.\n";
663 if (doOrientationRestraints)
665 // The graph is needed, but not supported
666 errorMessage += "Orientation restraints are not supported.\n";
668 if (inputrec.efep != efepNO && (haveFepPerturbedMasses(mtop) || havePerturbedConstraints(mtop)))
670 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
672 const auto particleTypes = gmx_mtop_particletype_count(mtop);
673 if (particleTypes[eptShell] > 0)
675 errorMessage += "Shells are not supported.\n";
677 if (useReplicaExchange)
679 errorMessage += "Replica exchange simulations are not supported.\n";
681 if (inputrec.eSwapCoords != eswapNO)
683 errorMessage += "Swapping the coordinates is not supported.\n";
687 errorMessage += "Re-run is not supported.\n";
690 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
691 // does not support it, the actual CUDA LINCS code does support it
692 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
694 errorMessage += "Non-connecting constraints are not supported\n";
696 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
699 "The number of coupled constraints is higher than supported in the GPU LINCS "
704 // There is a known bug with frozen atoms and GPU update, see Issue #3920.
705 errorMessage += "Frozen atoms not supported.\n";
708 if (!errorMessage.empty())
710 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
712 GMX_LOG(mdlog.warning)
715 "Update task on the GPU was required, by the "
716 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
717 "condition(s) were not satisfied:");
718 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
719 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
721 else if (updateTarget == TaskTarget::Gpu)
723 std::string prefix = gmx::formatString(
724 "Update task on the GPU was required,\n"
725 "but the following condition(s) were not satisfied:\n");
726 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
731 return (updateTarget == TaskTarget::Gpu
732 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
735 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
736 bool havePPDomainDecomposition,
737 bool useGpuForNonbonded,
738 bool useModularSimulator,
740 bool haveEnergyMinimization)
742 return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
743 && !useModularSimulator && !doRerun && !haveEnergyMinimization;