2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/listed_forces/listed_forces_gpu.h"
61 #include "gromacs/mdlib/gmx_omp_nthreads.h"
62 #include "gromacs/mdlib/update_constrain_gpu.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/mdrunoptions.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/taskassignment/taskassignment.h"
69 #include "gromacs/topology/mtop_util.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/utility/baseversion.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/gmxassert.h"
75 #include "gromacs/utility/logger.h"
76 #include "gromacs/utility/message_string_collector.h"
77 #include "gromacs/utility/stringutil.h"
86 //! Helper variable to localise the text of an often repeated message.
87 const char* const g_specifyEverythingFormatString =
88 "When you use mdrun -gputasks, %s must be set to non-default "
89 "values, so that the device IDs can be interpreted correctly."
91 " If you simply want to restrict which GPUs are used, then it is "
92 "better to use mdrun -gpu_id. Otherwise, setting the "
94 "CUDA_VISIBLE_DEVICES"
96 // Technically there is no portable way to do this offered by the
97 // OpenCL standard, but the only current relevant case for GROMACS
98 // is AMD OpenCL, which offers this variable.
100 # elif GMX_GPU_SYCL && GMX_SYCL_DPCPP
101 // https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
103 # elif GMX_GPU_SYCL && GMX_SYCL_HIPSYCL
104 // Not true if we use hipSYCL over CUDA or IntelLLVM, but in that case the user probably
105 // knows what they are doing.
106 // https://rocmdocs.amd.com/en/latest/Other_Solutions/Other-Solutions.html#hip-environment-variables
107 "HIP_VISIBLE_DEVICES"
109 # error "Unreachable branch"
111 " environment variable in your bash profile or job "
112 "script may be more convenient."
118 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
119 const bool haveAvailableDevices,
120 const std::vector<int>& userGpuTaskAssignment,
121 const EmulateGpuNonbonded emulateGpuNonbonded,
122 const bool buildSupportsNonbondedOnGpu,
123 const bool nonbondedOnGpuIsUseful,
124 const int numRanksPerSimulation)
126 // First, exclude all cases where we can't run NB on GPUs.
127 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
128 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
130 // If the user required NB on GPUs, we issue an error later.
134 // We now know that NB on GPUs makes sense, if we have any.
136 if (!userGpuTaskAssignment.empty())
138 // Specifying -gputasks requires specifying everything.
139 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
141 GMX_THROW(InconsistentInputError(
142 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
147 if (nonbondedTarget == TaskTarget::Gpu)
152 // Because this is thread-MPI, we already know about the GPUs that
153 // all potential ranks can use, and can use that in a global
154 // decision that will later be consistent.
155 // If we get here, then the user permitted or required GPUs.
156 return haveAvailableDevices;
159 static bool canUseGpusForPme(const bool useGpuForNonbonded,
160 const TaskTarget pmeTarget,
161 const TaskTarget pmeFftTarget,
162 const gmx_hw_info_t& hardwareInfo,
163 const t_inputrec& inputrec,
164 std::string* errorMessage)
166 if (pmeTarget == TaskTarget::Cpu)
171 std::string tempString;
172 gmx::MessageStringCollector errorReasons;
173 // Before changing the prefix string, make sure that it is not searched for in regression tests.
174 errorReasons.startContext("Cannot compute PME interactions on a GPU, because:");
175 errorReasons.appendIf(!useGpuForNonbonded, "Nonbonded interactions must also run on GPUs.");
176 errorReasons.appendIf(!pme_gpu_supports_build(&tempString), tempString);
177 errorReasons.appendIf(!pme_gpu_supports_hardware(hardwareInfo, &tempString), tempString);
178 errorReasons.appendIf(!pme_gpu_supports_input(inputrec, &tempString), tempString);
179 if (pmeFftTarget == TaskTarget::Cpu)
181 // User requested PME FFT on CPU, so we check whether we are able to use PME Mixed mode.
182 errorReasons.appendIf(!pme_gpu_mixed_mode_supports_input(inputrec, &tempString), tempString);
184 errorReasons.finishContext();
186 if (errorReasons.isEmpty())
192 if (pmeTarget == TaskTarget::Gpu && errorMessage != nullptr)
194 *errorMessage = errorReasons.toString();
200 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
201 const TaskTarget pmeTarget,
202 const TaskTarget pmeFftTarget,
203 const int numDevicesToUse,
204 const std::vector<int>& userGpuTaskAssignment,
205 const gmx_hw_info_t& hardwareInfo,
206 const t_inputrec& inputrec,
207 const int numRanksPerSimulation,
208 const int numPmeRanksPerSimulation)
210 // First, exclude all cases where we can't run PME on GPUs.
211 if (!canUseGpusForPme(useGpuForNonbonded, pmeTarget, pmeFftTarget, hardwareInfo, inputrec, nullptr))
213 // PME can't run on a GPU. If the user required that, we issue an error later.
217 // We now know that PME on GPUs might make sense, if we have any.
219 if (!userGpuTaskAssignment.empty())
221 // Follow the user's choice of GPU task assignment, if we
222 // can. Checking that their IDs are for compatible GPUs comes
225 // Specifying -gputasks requires specifying everything.
226 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
228 GMX_THROW(InconsistentInputError(
229 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
232 // PME on GPUs is only supported in a single case
233 if (pmeTarget == TaskTarget::Gpu)
235 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
236 || (numPmeRanksPerSimulation > 1))
238 GMX_THROW(InconsistentInputError(
239 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
240 "file and use a single PME rank."));
245 // pmeTarget == TaskTarget::Auto
246 return numRanksPerSimulation == 1;
249 // Because this is thread-MPI, we already know about the GPUs that
250 // all potential ranks can use, and can use that in a global
251 // decision that will later be consistent.
253 if (pmeTarget == TaskTarget::Gpu)
255 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
256 || (numPmeRanksPerSimulation > 1))
258 GMX_THROW(NotImplementedError(
259 "PME tasks were required to run on GPUs, but that is not implemented with "
260 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
261 "or permit PME tasks to be assigned to the CPU."));
266 if (numRanksPerSimulation == 1)
268 // PME can run well on a GPU shared with NB, and we permit
269 // mdrun to default to try that.
270 return numDevicesToUse > 0;
273 if (numRanksPerSimulation < 1)
275 // Full automated mode for thread-MPI (the default). PME can
276 // run well on a GPU shared with NB, and we permit mdrun to
277 // default to it if there is only one GPU available.
278 return (numDevicesToUse == 1);
281 // Not enough support for PME on GPUs for anything else
285 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
286 const std::vector<int>& userGpuTaskAssignment,
287 const EmulateGpuNonbonded emulateGpuNonbonded,
288 const bool buildSupportsNonbondedOnGpu,
289 const bool nonbondedOnGpuIsUseful,
290 const bool gpusWereDetected)
292 if (nonbondedTarget == TaskTarget::Cpu)
294 if (!userGpuTaskAssignment.empty())
296 GMX_THROW(InconsistentInputError(
297 "A GPU task assignment was specified, but nonbonded interactions were "
298 "assigned to the CPU. Make no more than one of these choices."));
304 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
306 GMX_THROW(InconsistentInputError(
307 "Nonbonded interactions on the GPU were requested with -nb gpu, "
308 "but the GROMACS binary has been built without GPU support. "
309 "Either run without selecting GPU options, or recompile GROMACS "
310 "with GPU support enabled"));
313 // TODO refactor all these TaskTarget::Gpu checks into one place?
314 // e.g. use a subfunction that handles only the cases where
315 // TaskTargets are not Cpu?
316 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
318 if (nonbondedTarget == TaskTarget::Gpu)
320 GMX_THROW(InconsistentInputError(
321 "Nonbonded interactions on the GPU were required, which is inconsistent "
322 "with choosing emulation. Make no more than one of these choices."));
324 if (!userGpuTaskAssignment.empty())
327 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
328 "no more than one of these choices."));
334 if (!nonbondedOnGpuIsUseful)
336 if (nonbondedTarget == TaskTarget::Gpu)
338 GMX_THROW(InconsistentInputError(
339 "Nonbonded interactions on the GPU were required, but not supported for these "
340 "simulation settings. Change your settings, or do not require using GPUs."));
346 if (!userGpuTaskAssignment.empty())
348 // Specifying -gputasks requires specifying everything.
349 if (nonbondedTarget == TaskTarget::Auto)
351 GMX_THROW(InconsistentInputError(
352 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
358 if (nonbondedTarget == TaskTarget::Gpu)
360 // We still don't know whether it is an error if no GPUs are found
361 // because we don't know the duty of this rank, yet. For example,
362 // a node with only PME ranks and -pme cpu is OK if there are not
367 // If we get here, then the user permitted GPUs, which we should
368 // use for nonbonded interactions.
369 return buildSupportsNonbondedOnGpu && gpusWereDetected;
372 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
373 const TaskTarget pmeTarget,
374 const TaskTarget pmeFftTarget,
375 const std::vector<int>& userGpuTaskAssignment,
376 const gmx_hw_info_t& hardwareInfo,
377 const t_inputrec& inputrec,
378 const int numRanksPerSimulation,
379 const int numPmeRanksPerSimulation,
380 const bool gpusWereDetected)
383 if (!canUseGpusForPme(useGpuForNonbonded, pmeTarget, pmeFftTarget, hardwareInfo, inputrec, &message))
385 if (!message.empty())
387 GMX_THROW(InconsistentInputError(message));
392 if (pmeTarget == TaskTarget::Cpu)
394 if (!userGpuTaskAssignment.empty())
396 GMX_THROW(InconsistentInputError(
397 "A GPU task assignment was specified, but PME interactions were "
398 "assigned to the CPU. Make no more than one of these choices."));
404 if (!userGpuTaskAssignment.empty())
406 // Specifying -gputasks requires specifying everything.
407 if (pmeTarget == TaskTarget::Auto)
409 GMX_THROW(InconsistentInputError(formatString(
410 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
416 // We still don't know whether it is an error if no GPUs are found
417 // because we don't know the duty of this rank, yet. For example,
418 // a node with only PME ranks and -pme cpu is OK if there are not
421 if (pmeTarget == TaskTarget::Gpu)
423 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
424 || (numPmeRanksPerSimulation > 1))
426 GMX_THROW(NotImplementedError(
427 "PME tasks were required to run on GPUs, but that is not implemented with "
428 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
429 "or permit PME tasks to be assigned to the CPU."));
434 // If we get here, then the user permitted GPUs.
435 if (numRanksPerSimulation == 1)
437 // PME can run well on a single GPU shared with NB when there
438 // is one rank, so we permit mdrun to try that if we have
440 return gpusWereDetected;
443 // Not enough support for PME on GPUs for anything else
448 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
450 if (!EEL_PME(inputrec.coulombtype))
452 return PmeRunMode::None;
457 if (pmeFftTarget == TaskTarget::Cpu)
459 return PmeRunMode::Mixed;
463 return PmeRunMode::GPU;
468 if (pmeFftTarget == TaskTarget::Gpu)
471 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
472 "on CPU you should not be using -pmefft.");
474 return PmeRunMode::CPU;
478 bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
480 TaskTarget bondedTarget,
481 const t_inputrec& inputrec,
482 const gmx_mtop_t& mtop,
483 int numPmeRanksPerSimulation,
484 bool gpusWereDetected)
486 if (bondedTarget == TaskTarget::Cpu)
491 std::string errorMessage;
493 if (!buildSupportsListedForcesGpu(&errorMessage))
495 if (bondedTarget == TaskTarget::Gpu)
497 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
503 if (!inputSupportsListedForcesGpu(inputrec, mtop, &errorMessage))
505 if (bondedTarget == TaskTarget::Gpu)
507 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
513 if (!useGpuForNonbonded)
515 if (bondedTarget == TaskTarget::Gpu)
517 GMX_THROW(InconsistentInputError(
518 "Bonded interactions on the GPU were required, but this requires that "
519 "short-ranged non-bonded interactions are also run on the GPU. Change "
520 "your settings, or do not require using GPUs."));
526 // TODO If the bonded kernels do not get fused, then performance
527 // overheads might suggest alternative choices here.
529 if (bondedTarget == TaskTarget::Gpu)
531 // We still don't know whether it is an error if no GPUs are
536 // If we get here, then the user permitted GPUs, which we should
537 // use for bonded interactions if any were detected and the CPU
538 // is busy, for which we currently only check PME or Ewald.
539 // (It would be better to dynamically assign bondeds based on timings)
540 // Note that here we assume that the auto setting of PME ranks will not
541 // choose separate PME ranks when nonBonded are assigned to the GPU.
542 bool usingOurCpuForPmeOrEwald =
543 (EVDW_PME(inputrec.vdwtype)
544 || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
546 return gpusWereDetected && usingOurCpuForPmeOrEwald;
549 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
550 const bool useUpdateGroups,
551 const PmeRunMode pmeRunMode,
552 const bool havePmeOnlyRank,
553 const bool useGpuForNonbonded,
554 const TaskTarget updateTarget,
555 const bool gpusWereDetected,
556 const t_inputrec& inputrec,
557 const gmx_mtop_t& mtop,
558 const bool useEssentialDynamics,
559 const bool doOrientationRestraints,
560 const bool haveFrozenAtoms,
562 const DevelopmentFeatureFlags& devFlags,
563 const gmx::MDLogger& mdlog)
566 // '-update cpu' overrides the environment variable, '-update auto' does not
567 if (updateTarget == TaskTarget::Cpu
568 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
573 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
574 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
576 std::string errorMessage;
578 if (isDomainDecomposition)
580 if (hasAnyConstraints && !useUpdateGroups)
583 "Domain decomposition is only supported with constraints when update "
585 "are used. This means constraining all bonds is not supported, except for "
586 "small molecules, and box sizes close to half the pair-list cutoff are not "
595 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
601 errorMessage += "Multiple time stepping is not supported.\n";
604 if (inputrec.eConstrAlg == ConstraintAlgorithm::Shake && hasAnyConstraints
605 && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
607 errorMessage += "SHAKE constraints are not supported.\n";
609 // Using the GPU-version of update if:
610 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
611 // 2. Non-bonded interactions are on the GPU.
612 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
615 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
617 if (!gpusWereDetected)
619 errorMessage += "Compatible GPUs must have been found.\n";
623 errorMessage += "Only a CUDA build is supported.\n";
625 if (inputrec.eI != IntegrationAlgorithm::MD)
627 errorMessage += "Only the md integrator is supported.\n";
629 if (inputrec.etc == TemperatureCoupling::NoseHoover)
631 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
633 if (!(inputrec.epc == PressureCoupling::No || inputrec.epc == PressureCoupling::ParrinelloRahman
634 || inputrec.epc == PressureCoupling::Berendsen || inputrec.epc == PressureCoupling::CRescale))
637 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
640 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
642 // The graph is needed, but not supported
643 errorMessage += "Ewald surface correction is not supported.\n";
645 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
647 errorMessage += "Virtual sites are not supported.\n";
649 if (useEssentialDynamics)
651 errorMessage += "Essential dynamics is not supported.\n";
653 if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
655 errorMessage += "Constraints pulling is not supported.\n";
657 if (doOrientationRestraints)
659 // The graph is needed, but not supported
660 errorMessage += "Orientation restraints are not supported.\n";
662 if (inputrec.efep != FreeEnergyPerturbationType::No
663 && (haveFepPerturbedMasses(mtop) || havePerturbedConstraints(mtop)))
665 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
667 const auto particleTypes = gmx_mtop_particletype_count(mtop);
668 if (particleTypes[ParticleType::Shell] > 0)
670 errorMessage += "Shells are not supported.\n";
672 if (inputrec.eSwapCoords != SwapType::No)
674 errorMessage += "Swapping the coordinates is not supported.\n";
678 errorMessage += "Re-run is not supported.\n";
681 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
682 // does not support it, the actual CUDA LINCS code does support it
683 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
685 errorMessage += "Non-connecting constraints are not supported\n";
687 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
690 "The number of coupled constraints is higher than supported in the GPU LINCS "
693 if (hasAnyConstraints && !UpdateConstrainGpu::areConstraintsSupported())
695 errorMessage += "Chosen GPU implementation does not support constraints.\n";
699 // There is a known bug with frozen atoms and GPU update, see Issue #3920.
700 errorMessage += "Frozen atoms not supported.\n";
703 if (!errorMessage.empty())
705 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
707 GMX_LOG(mdlog.warning)
710 "Update task on the GPU was required, by the "
711 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
712 "condition(s) were not satisfied:");
713 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
714 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
716 else if (updateTarget == TaskTarget::Gpu)
718 std::string prefix = gmx::formatString(
719 "Update task on the GPU was required,\n"
720 "but the following condition(s) were not satisfied:\n");
721 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
726 return (updateTarget == TaskTarget::Gpu
727 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
730 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
731 bool havePPDomainDecomposition,
732 bool useGpuForNonbonded,
733 bool useModularSimulator,
735 bool haveEnergyMinimization)
737 return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
738 && !useModularSimulator && !doRerun && !haveEnergyMinimization;