2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/listed_forces/listed_forces_gpu.h"
61 #include "gromacs/mdlib/gmx_omp_nthreads.h"
62 #include "gromacs/mdlib/update_constrain_gpu.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/mdrunoptions.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/taskassignment/taskassignment.h"
69 #include "gromacs/topology/mtop_util.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/utility/baseversion.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/gmxassert.h"
75 #include "gromacs/utility/logger.h"
76 #include "gromacs/utility/stringutil.h"
85 //! Helper variable to localise the text of an often repeated message.
86 const char* const g_specifyEverythingFormatString =
87 "When you use mdrun -gputasks, %s must be set to non-default "
88 "values, so that the device IDs can be interpreted correctly."
90 " If you simply want to restrict which GPUs are used, then it is "
91 "better to use mdrun -gpu_id. Otherwise, setting the "
93 "CUDA_VISIBLE_DEVICES"
95 // Technically there is no portable way to do this offered by the
96 // OpenCL standard, but the only current relevant case for GROMACS
97 // is AMD OpenCL, which offers this variable.
99 # elif GMX_GPU_SYCL && GMX_SYCL_DPCPP
100 // https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
102 # elif GMX_GPU_SYCL && GMX_SYCL_HIPSYCL
103 // Not true if we use hipSYCL over CUDA or IntelLLVM, but in that case the user probably
104 // knows what they are doing.
105 // https://rocmdocs.amd.com/en/latest/Other_Solutions/Other-Solutions.html#hip-environment-variables
106 "HIP_VISIBLE_DEVICES"
108 # error "Unreachable branch"
110 " environment variable in your bash profile or job "
111 "script may be more convenient."
117 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
118 const bool haveAvailableDevices,
119 const std::vector<int>& userGpuTaskAssignment,
120 const EmulateGpuNonbonded emulateGpuNonbonded,
121 const bool buildSupportsNonbondedOnGpu,
122 const bool nonbondedOnGpuIsUseful,
123 const int numRanksPerSimulation)
125 // First, exclude all cases where we can't run NB on GPUs.
126 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
127 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
129 // If the user required NB on GPUs, we issue an error later.
133 // We now know that NB on GPUs makes sense, if we have any.
135 if (!userGpuTaskAssignment.empty())
137 // Specifying -gputasks requires specifying everything.
138 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
140 GMX_THROW(InconsistentInputError(
141 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
146 if (nonbondedTarget == TaskTarget::Gpu)
151 // Because this is thread-MPI, we already know about the GPUs that
152 // all potential ranks can use, and can use that in a global
153 // decision that will later be consistent.
154 // If we get here, then the user permitted or required GPUs.
155 return haveAvailableDevices;
158 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
159 const TaskTarget pmeTarget,
160 const TaskTarget pmeFftTarget,
161 const int numDevicesToUse,
162 const std::vector<int>& userGpuTaskAssignment,
163 const gmx_hw_info_t& hardwareInfo,
164 const t_inputrec& inputrec,
165 const int numRanksPerSimulation,
166 const int numPmeRanksPerSimulation)
168 // First, exclude all cases where we can't run PME on GPUs.
169 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
170 || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
172 // PME can't run on a GPU. If the user required that, we issue an error later.
176 if (pmeFftTarget == TaskTarget::Cpu && !pme_gpu_mixed_mode_supports_input(inputrec, nullptr))
178 /* User requested PME FFT on CPU, but the current system is not compatible with Mixed mode,
179 * so we don't use GPUs at all.
180 * If the user had -pme gpu, we issue an error later. */
184 // We now know that PME on GPUs might make sense, if we have any.
186 if (!userGpuTaskAssignment.empty())
188 // Follow the user's choice of GPU task assignment, if we
189 // can. Checking that their IDs are for compatible GPUs comes
192 // Specifying -gputasks requires specifying everything.
193 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
195 GMX_THROW(InconsistentInputError(
196 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
199 // PME on GPUs is only supported in a single case
200 if (pmeTarget == TaskTarget::Gpu)
202 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
203 || (numPmeRanksPerSimulation > 1))
205 GMX_THROW(InconsistentInputError(
206 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
207 "file and use a single PME rank."));
212 // pmeTarget == TaskTarget::Auto
213 return numRanksPerSimulation == 1;
216 // Because this is thread-MPI, we already know about the GPUs that
217 // all potential ranks can use, and can use that in a global
218 // decision that will later be consistent.
220 if (pmeTarget == TaskTarget::Gpu)
222 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
223 || (numPmeRanksPerSimulation > 1))
225 GMX_THROW(NotImplementedError(
226 "PME tasks were required to run on GPUs, but that is not implemented with "
227 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
228 "or permit PME tasks to be assigned to the CPU."));
233 if (numRanksPerSimulation == 1)
235 // PME can run well on a GPU shared with NB, and we permit
236 // mdrun to default to try that.
237 return numDevicesToUse > 0;
240 if (numRanksPerSimulation < 1)
242 // Full automated mode for thread-MPI (the default). PME can
243 // run well on a GPU shared with NB, and we permit mdrun to
244 // default to it if there is only one GPU available.
245 return (numDevicesToUse == 1);
248 // Not enough support for PME on GPUs for anything else
252 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
253 const std::vector<int>& userGpuTaskAssignment,
254 const EmulateGpuNonbonded emulateGpuNonbonded,
255 const bool buildSupportsNonbondedOnGpu,
256 const bool nonbondedOnGpuIsUseful,
257 const bool gpusWereDetected)
259 if (nonbondedTarget == TaskTarget::Cpu)
261 if (!userGpuTaskAssignment.empty())
263 GMX_THROW(InconsistentInputError(
264 "A GPU task assignment was specified, but nonbonded interactions were "
265 "assigned to the CPU. Make no more than one of these choices."));
271 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
273 GMX_THROW(InconsistentInputError(
274 "Nonbonded interactions on the GPU were requested with -nb gpu, "
275 "but the GROMACS binary has been built without GPU support. "
276 "Either run without selecting GPU options, or recompile GROMACS "
277 "with GPU support enabled"));
280 // TODO refactor all these TaskTarget::Gpu checks into one place?
281 // e.g. use a subfunction that handles only the cases where
282 // TaskTargets are not Cpu?
283 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
285 if (nonbondedTarget == TaskTarget::Gpu)
287 GMX_THROW(InconsistentInputError(
288 "Nonbonded interactions on the GPU were required, which is inconsistent "
289 "with choosing emulation. Make no more than one of these choices."));
291 if (!userGpuTaskAssignment.empty())
294 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
295 "no more than one of these choices."));
301 if (!nonbondedOnGpuIsUseful)
303 if (nonbondedTarget == TaskTarget::Gpu)
305 GMX_THROW(InconsistentInputError(
306 "Nonbonded interactions on the GPU were required, but not supported for these "
307 "simulation settings. Change your settings, or do not require using GPUs."));
313 if (!userGpuTaskAssignment.empty())
315 // Specifying -gputasks requires specifying everything.
316 if (nonbondedTarget == TaskTarget::Auto)
318 GMX_THROW(InconsistentInputError(
319 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
325 if (nonbondedTarget == TaskTarget::Gpu)
327 // We still don't know whether it is an error if no GPUs are found
328 // because we don't know the duty of this rank, yet. For example,
329 // a node with only PME ranks and -pme cpu is OK if there are not
334 // If we get here, then the user permitted GPUs, which we should
335 // use for nonbonded interactions.
336 return buildSupportsNonbondedOnGpu && gpusWereDetected;
339 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
340 const TaskTarget pmeTarget,
341 const TaskTarget pmeFftTarget,
342 const std::vector<int>& userGpuTaskAssignment,
343 const gmx_hw_info_t& hardwareInfo,
344 const t_inputrec& inputrec,
345 const int numRanksPerSimulation,
346 const int numPmeRanksPerSimulation,
347 const bool gpusWereDetected)
349 if (pmeTarget == TaskTarget::Cpu)
354 if (!useGpuForNonbonded)
356 if (pmeTarget == TaskTarget::Gpu)
358 GMX_THROW(NotImplementedError(
359 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
365 if (!pme_gpu_supports_build(&message))
367 if (pmeTarget == TaskTarget::Gpu)
369 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
373 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
375 if (pmeTarget == TaskTarget::Gpu)
377 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
381 if (!pme_gpu_supports_input(inputrec, &message))
383 if (pmeTarget == TaskTarget::Gpu)
385 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
389 if (pmeFftTarget == TaskTarget::Cpu && !pme_gpu_mixed_mode_supports_input(inputrec, &message))
391 /* User requested PME FFT on CPU, but the current system is not compatible with Mixed mode,
392 * so we don't use GPUs at all. */
393 if (pmeTarget == TaskTarget::Gpu)
395 GMX_THROW(NotImplementedError("Cannot compute PME interactions in Mixed mode, because " + message));
400 if (pmeTarget == TaskTarget::Cpu)
402 if (!userGpuTaskAssignment.empty())
404 GMX_THROW(InconsistentInputError(
405 "A GPU task assignment was specified, but PME interactions were "
406 "assigned to the CPU. Make no more than one of these choices."));
412 if (!userGpuTaskAssignment.empty())
414 // Specifying -gputasks requires specifying everything.
415 if (pmeTarget == TaskTarget::Auto)
417 GMX_THROW(InconsistentInputError(formatString(
418 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
424 // We still don't know whether it is an error if no GPUs are found
425 // because we don't know the duty of this rank, yet. For example,
426 // a node with only PME ranks and -pme cpu is OK if there are not
429 if (pmeTarget == TaskTarget::Gpu)
431 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
432 || (numPmeRanksPerSimulation > 1))
434 GMX_THROW(NotImplementedError(
435 "PME tasks were required to run on GPUs, but that is not implemented with "
436 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
437 "or permit PME tasks to be assigned to the CPU."));
442 // If we get here, then the user permitted GPUs.
443 if (numRanksPerSimulation == 1)
445 // PME can run well on a single GPU shared with NB when there
446 // is one rank, so we permit mdrun to try that if we have
448 return gpusWereDetected;
451 // Not enough support for PME on GPUs for anything else
456 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
458 if (!EEL_PME(inputrec.coulombtype))
460 return PmeRunMode::None;
465 if (pmeFftTarget == TaskTarget::Cpu)
467 return PmeRunMode::Mixed;
471 return PmeRunMode::GPU;
476 if (pmeFftTarget == TaskTarget::Gpu)
479 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
480 "on CPU you should not be using -pmefft.");
482 return PmeRunMode::CPU;
486 bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
488 TaskTarget bondedTarget,
489 const t_inputrec& inputrec,
490 const gmx_mtop_t& mtop,
491 int numPmeRanksPerSimulation,
492 bool gpusWereDetected)
494 if (bondedTarget == TaskTarget::Cpu)
499 std::string errorMessage;
501 if (!buildSupportsListedForcesGpu(&errorMessage))
503 if (bondedTarget == TaskTarget::Gpu)
505 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
511 if (!inputSupportsListedForcesGpu(inputrec, mtop, &errorMessage))
513 if (bondedTarget == TaskTarget::Gpu)
515 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
521 if (!useGpuForNonbonded)
523 if (bondedTarget == TaskTarget::Gpu)
525 GMX_THROW(InconsistentInputError(
526 "Bonded interactions on the GPU were required, but this requires that "
527 "short-ranged non-bonded interactions are also run on the GPU. Change "
528 "your settings, or do not require using GPUs."));
534 // TODO If the bonded kernels do not get fused, then performance
535 // overheads might suggest alternative choices here.
537 if (bondedTarget == TaskTarget::Gpu)
539 // We still don't know whether it is an error if no GPUs are
544 // If we get here, then the user permitted GPUs, which we should
545 // use for bonded interactions if any were detected and the CPU
546 // is busy, for which we currently only check PME or Ewald.
547 // (It would be better to dynamically assign bondeds based on timings)
548 // Note that here we assume that the auto setting of PME ranks will not
549 // choose separate PME ranks when nonBonded are assigned to the GPU.
550 bool usingOurCpuForPmeOrEwald =
551 (EVDW_PME(inputrec.vdwtype)
552 || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
554 return gpusWereDetected && usingOurCpuForPmeOrEwald;
557 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
558 const bool useUpdateGroups,
559 const PmeRunMode pmeRunMode,
560 const bool havePmeOnlyRank,
561 const bool useGpuForNonbonded,
562 const TaskTarget updateTarget,
563 const bool gpusWereDetected,
564 const t_inputrec& inputrec,
565 const gmx_mtop_t& mtop,
566 const bool useEssentialDynamics,
567 const bool doOrientationRestraints,
568 const bool haveFrozenAtoms,
570 const DevelopmentFeatureFlags& devFlags,
571 const gmx::MDLogger& mdlog)
574 // '-update cpu' overrides the environment variable, '-update auto' does not
575 if (updateTarget == TaskTarget::Cpu
576 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
581 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
582 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
584 std::string errorMessage;
586 if (isDomainDecomposition)
588 if (hasAnyConstraints && !useUpdateGroups)
591 "Domain decomposition is only supported with constraints when update "
593 "are used. This means constraining all bonds is not supported, except for "
594 "small molecules, and box sizes close to half the pair-list cutoff are not "
603 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
609 errorMessage += "Multiple time stepping is not supported.\n";
612 if (inputrec.eConstrAlg == ConstraintAlgorithm::Shake && hasAnyConstraints
613 && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
615 errorMessage += "SHAKE constraints are not supported.\n";
617 // Using the GPU-version of update if:
618 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
619 // 2. Non-bonded interactions are on the GPU.
620 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
623 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
625 if (!gpusWereDetected)
627 errorMessage += "Compatible GPUs must have been found.\n";
631 errorMessage += "Only a CUDA build is supported.\n";
633 if (inputrec.eI != IntegrationAlgorithm::MD)
635 errorMessage += "Only the md integrator is supported.\n";
637 if (inputrec.etc == TemperatureCoupling::NoseHoover)
639 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
641 if (!(inputrec.epc == PressureCoupling::No || inputrec.epc == PressureCoupling::ParrinelloRahman
642 || inputrec.epc == PressureCoupling::Berendsen || inputrec.epc == PressureCoupling::CRescale))
645 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
648 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
650 // The graph is needed, but not supported
651 errorMessage += "Ewald surface correction is not supported.\n";
653 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
655 errorMessage += "Virtual sites are not supported.\n";
657 if (useEssentialDynamics)
659 errorMessage += "Essential dynamics is not supported.\n";
661 if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
663 errorMessage += "Constraints pulling is not supported.\n";
665 if (doOrientationRestraints)
667 // The graph is needed, but not supported
668 errorMessage += "Orientation restraints are not supported.\n";
670 if (inputrec.efep != FreeEnergyPerturbationType::No
671 && (haveFepPerturbedMasses(mtop) || havePerturbedConstraints(mtop)))
673 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
675 const auto particleTypes = gmx_mtop_particletype_count(mtop);
676 if (particleTypes[ParticleType::Shell] > 0)
678 errorMessage += "Shells are not supported.\n";
680 if (inputrec.eSwapCoords != SwapType::No)
682 errorMessage += "Swapping the coordinates is not supported.\n";
686 errorMessage += "Re-run is not supported.\n";
689 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
690 // does not support it, the actual CUDA LINCS code does support it
691 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
693 errorMessage += "Non-connecting constraints are not supported\n";
695 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
698 "The number of coupled constraints is higher than supported in the GPU LINCS "
701 if (hasAnyConstraints && !UpdateConstrainGpu::areConstraintsSupported())
703 errorMessage += "Chosen GPU implementation does not support constraints.\n";
707 // There is a known bug with frozen atoms and GPU update, see Issue #3920.
708 errorMessage += "Frozen atoms not supported.\n";
711 if (!errorMessage.empty())
713 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
715 GMX_LOG(mdlog.warning)
718 "Update task on the GPU was required, by the "
719 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
720 "condition(s) were not satisfied:");
721 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
722 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
724 else if (updateTarget == TaskTarget::Gpu)
726 std::string prefix = gmx::formatString(
727 "Update task on the GPU was required,\n"
728 "but the following condition(s) were not satisfied:\n");
729 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
734 return (updateTarget == TaskTarget::Gpu
735 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
738 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
739 bool havePPDomainDecomposition,
740 bool useGpuForNonbonded,
741 bool useModularSimulator,
743 bool haveEnergyMinimization)
745 return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
746 && !useModularSimulator && !doRerun && !haveEnergyMinimization;