2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/listed_forces/gpubonded.h"
61 #include "gromacs/mdlib/gmx_omp_nthreads.h"
62 #include "gromacs/mdlib/update_constrain_gpu.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/inputrec.h"
65 #include "gromacs/mdtypes/md_enums.h"
66 #include "gromacs/mdtypes/mdrunoptions.h"
67 #include "gromacs/pulling/pull.h"
68 #include "gromacs/taskassignment/taskassignment.h"
69 #include "gromacs/topology/mtop_util.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/utility/baseversion.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/gmxassert.h"
75 #include "gromacs/utility/logger.h"
76 #include "gromacs/utility/stringutil.h"
85 //! Helper variable to localise the text of an often repeated message.
86 const char* g_specifyEverythingFormatString =
87 "When you use mdrun -gputasks, %s must be set to non-default "
88 "values, so that the device IDs can be interpreted correctly."
90 " If you simply want to restrict which GPUs are used, then it is "
91 "better to use mdrun -gpu_id. Otherwise, setting the "
93 "CUDA_VISIBLE_DEVICES"
95 // Technically there is no portable way to do this offered by the
96 // OpenCL standard, but the only current relevant case for GROMACS
97 // is AMD OpenCL, which offers this variable.
101 "How to restrict visible devices in SYCL?"
103 # error "Unreachable branch"
105 " environment variable in your bash profile or job "
106 "script may be more convenient."
112 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
113 const int numDevicesToUse,
114 const std::vector<int>& userGpuTaskAssignment,
115 const EmulateGpuNonbonded emulateGpuNonbonded,
116 const bool buildSupportsNonbondedOnGpu,
117 const bool nonbondedOnGpuIsUseful,
118 const int numRanksPerSimulation)
120 // First, exclude all cases where we can't run NB on GPUs.
121 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
122 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
124 // If the user required NB on GPUs, we issue an error later.
128 // We now know that NB on GPUs makes sense, if we have any.
130 if (!userGpuTaskAssignment.empty())
132 // Specifying -gputasks requires specifying everything.
133 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
135 GMX_THROW(InconsistentInputError(
136 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
141 if (nonbondedTarget == TaskTarget::Gpu)
146 // Because this is thread-MPI, we already know about the GPUs that
147 // all potential ranks can use, and can use that in a global
148 // decision that will later be consistent.
149 // If we get here, then the user permitted or required GPUs.
150 return (numDevicesToUse > 0);
153 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
154 const TaskTarget pmeTarget,
155 const int numDevicesToUse,
156 const std::vector<int>& userGpuTaskAssignment,
157 const gmx_hw_info_t& hardwareInfo,
158 const t_inputrec& inputrec,
159 const int numRanksPerSimulation,
160 const int numPmeRanksPerSimulation)
162 // First, exclude all cases where we can't run PME on GPUs.
163 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
164 || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
166 // PME can't run on a GPU. If the user required that, we issue
171 // We now know that PME on GPUs might make sense, if we have any.
173 if (!userGpuTaskAssignment.empty())
175 // Follow the user's choice of GPU task assignment, if we
176 // can. Checking that their IDs are for compatible GPUs comes
179 // Specifying -gputasks requires specifying everything.
180 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
182 GMX_THROW(InconsistentInputError(
183 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
186 // PME on GPUs is only supported in a single case
187 if (pmeTarget == TaskTarget::Gpu)
189 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
190 || (numPmeRanksPerSimulation > 1))
192 GMX_THROW(InconsistentInputError(
193 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
194 "file and use a single PME rank."));
199 // pmeTarget == TaskTarget::Auto
200 return numRanksPerSimulation == 1;
203 // Because this is thread-MPI, we already know about the GPUs that
204 // all potential ranks can use, and can use that in a global
205 // decision that will later be consistent.
207 if (pmeTarget == TaskTarget::Gpu)
209 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
210 || (numPmeRanksPerSimulation > 1))
212 GMX_THROW(NotImplementedError(
213 "PME tasks were required to run on GPUs, but that is not implemented with "
214 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
215 "or permit PME tasks to be assigned to the CPU."));
220 if (numRanksPerSimulation == 1)
222 // PME can run well on a GPU shared with NB, and we permit
223 // mdrun to default to try that.
224 return numDevicesToUse > 0;
227 if (numRanksPerSimulation < 1)
229 // Full automated mode for thread-MPI (the default). PME can
230 // run well on a GPU shared with NB, and we permit mdrun to
231 // default to it if there is only one GPU available.
232 return (numDevicesToUse == 1);
235 // Not enough support for PME on GPUs for anything else
239 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
240 const std::vector<int>& userGpuTaskAssignment,
241 const EmulateGpuNonbonded emulateGpuNonbonded,
242 const bool buildSupportsNonbondedOnGpu,
243 const bool nonbondedOnGpuIsUseful,
244 const bool gpusWereDetected)
246 if (nonbondedTarget == TaskTarget::Cpu)
248 if (!userGpuTaskAssignment.empty())
250 GMX_THROW(InconsistentInputError(
251 "A GPU task assignment was specified, but nonbonded interactions were "
252 "assigned to the CPU. Make no more than one of these choices."));
258 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
260 GMX_THROW(InconsistentInputError(
261 "Nonbonded interactions on the GPU were requested with -nb gpu, "
262 "but the GROMACS binary has been built without GPU support. "
263 "Either run without selecting GPU options, or recompile GROMACS "
264 "with GPU support enabled"));
267 // TODO refactor all these TaskTarget::Gpu checks into one place?
268 // e.g. use a subfunction that handles only the cases where
269 // TaskTargets are not Cpu?
270 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
272 if (nonbondedTarget == TaskTarget::Gpu)
274 GMX_THROW(InconsistentInputError(
275 "Nonbonded interactions on the GPU were required, which is inconsistent "
276 "with choosing emulation. Make no more than one of these choices."));
278 if (!userGpuTaskAssignment.empty())
281 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
282 "no more than one of these choices."));
288 if (!nonbondedOnGpuIsUseful)
290 if (nonbondedTarget == TaskTarget::Gpu)
292 GMX_THROW(InconsistentInputError(
293 "Nonbonded interactions on the GPU were required, but not supported for these "
294 "simulation settings. Change your settings, or do not require using GPUs."));
300 if (!userGpuTaskAssignment.empty())
302 // Specifying -gputasks requires specifying everything.
303 if (nonbondedTarget == TaskTarget::Auto)
305 GMX_THROW(InconsistentInputError(
306 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
312 if (nonbondedTarget == TaskTarget::Gpu)
314 // We still don't know whether it is an error if no GPUs are found
315 // because we don't know the duty of this rank, yet. For example,
316 // a node with only PME ranks and -pme cpu is OK if there are not
321 // If we get here, then the user permitted GPUs, which we should
322 // use for nonbonded interactions.
323 return buildSupportsNonbondedOnGpu && gpusWereDetected;
326 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
327 const TaskTarget pmeTarget,
328 const std::vector<int>& userGpuTaskAssignment,
329 const gmx_hw_info_t& hardwareInfo,
330 const t_inputrec& inputrec,
331 const int numRanksPerSimulation,
332 const int numPmeRanksPerSimulation,
333 const bool gpusWereDetected)
335 if (pmeTarget == TaskTarget::Cpu)
340 if (!useGpuForNonbonded)
342 if (pmeTarget == TaskTarget::Gpu)
344 GMX_THROW(NotImplementedError(
345 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
351 if (!pme_gpu_supports_build(&message))
353 if (pmeTarget == TaskTarget::Gpu)
355 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
359 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
361 if (pmeTarget == TaskTarget::Gpu)
363 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
367 if (!pme_gpu_supports_input(inputrec, &message))
369 if (pmeTarget == TaskTarget::Gpu)
371 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
376 if (pmeTarget == TaskTarget::Cpu)
378 if (!userGpuTaskAssignment.empty())
380 GMX_THROW(InconsistentInputError(
381 "A GPU task assignment was specified, but PME interactions were "
382 "assigned to the CPU. Make no more than one of these choices."));
388 if (!userGpuTaskAssignment.empty())
390 // Specifying -gputasks requires specifying everything.
391 if (pmeTarget == TaskTarget::Auto)
393 GMX_THROW(InconsistentInputError(formatString(
394 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
400 // We still don't know whether it is an error if no GPUs are found
401 // because we don't know the duty of this rank, yet. For example,
402 // a node with only PME ranks and -pme cpu is OK if there are not
405 if (pmeTarget == TaskTarget::Gpu)
407 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
408 || (numPmeRanksPerSimulation > 1))
410 GMX_THROW(NotImplementedError(
411 "PME tasks were required to run on GPUs, but that is not implemented with "
412 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
413 "or permit PME tasks to be assigned to the CPU."));
418 // If we get here, then the user permitted GPUs.
419 if (numRanksPerSimulation == 1)
421 // PME can run well on a single GPU shared with NB when there
422 // is one rank, so we permit mdrun to try that if we have
424 return gpusWereDetected;
427 // Not enough support for PME on GPUs for anything else
432 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
434 if (!EEL_PME(inputrec.coulombtype))
436 return PmeRunMode::None;
441 if (pmeFftTarget == TaskTarget::Cpu)
443 return PmeRunMode::Mixed;
447 return PmeRunMode::GPU;
452 if (pmeFftTarget == TaskTarget::Gpu)
455 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
456 "on CPU you should not be using -pmefft.");
458 return PmeRunMode::CPU;
462 bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
464 TaskTarget bondedTarget,
465 const t_inputrec& inputrec,
466 const gmx_mtop_t& mtop,
467 int numPmeRanksPerSimulation,
468 bool gpusWereDetected)
470 if (bondedTarget == TaskTarget::Cpu)
475 std::string errorMessage;
477 if (!buildSupportsGpuBondeds(&errorMessage))
479 if (bondedTarget == TaskTarget::Gpu)
481 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
487 if (!inputSupportsGpuBondeds(inputrec, mtop, &errorMessage))
489 if (bondedTarget == TaskTarget::Gpu)
491 GMX_THROW(InconsistentInputError(errorMessage.c_str()));
497 if (!useGpuForNonbonded)
499 if (bondedTarget == TaskTarget::Gpu)
501 GMX_THROW(InconsistentInputError(
502 "Bonded interactions on the GPU were required, but this requires that "
503 "short-ranged non-bonded interactions are also run on the GPU. Change "
504 "your settings, or do not require using GPUs."));
510 // TODO If the bonded kernels do not get fused, then performance
511 // overheads might suggest alternative choices here.
513 if (bondedTarget == TaskTarget::Gpu)
515 // We still don't know whether it is an error if no GPUs are
520 // If we get here, then the user permitted GPUs, which we should
521 // use for bonded interactions if any were detected and the CPU
522 // is busy, for which we currently only check PME or Ewald.
523 // (It would be better to dynamically assign bondeds based on timings)
524 // Note that here we assume that the auto setting of PME ranks will not
525 // choose seperate PME ranks when nonBonded are assigned to the GPU.
526 bool usingOurCpuForPmeOrEwald =
527 (EVDW_PME(inputrec.vdwtype)
528 || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
530 return gpusWereDetected && usingOurCpuForPmeOrEwald;
533 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
534 const bool useUpdateGroups,
535 const PmeRunMode pmeRunMode,
536 const bool havePmeOnlyRank,
537 const bool useGpuForNonbonded,
538 const TaskTarget updateTarget,
539 const bool gpusWereDetected,
540 const t_inputrec& inputrec,
541 const gmx_mtop_t& mtop,
542 const bool useEssentialDynamics,
543 const bool doOrientationRestraints,
544 const bool useReplicaExchange,
546 const DevelopmentFeatureFlags& devFlags,
547 const gmx::MDLogger& mdlog)
550 // '-update cpu' overrides the environment variable, '-update auto' does not
551 if (updateTarget == TaskTarget::Cpu
552 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
557 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
558 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
560 std::string errorMessage;
562 if (isDomainDecomposition)
564 if (!devFlags.enableGpuHaloExchange)
566 errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n ";
570 if (hasAnyConstraints && !useUpdateGroups)
573 "Domain decomposition is only supported with constraints when update "
575 "are used. This means constraining all bonds is not supported, except for "
576 "small molecules, and box sizes close to half the pair-list cutoff are not "
582 errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
591 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
594 if (!devFlags.enableGpuPmePPComm)
596 errorMessage += "With separate PME rank(s), PME must use direct communication.\n";
602 errorMessage += "Multiple time stepping is not supported.\n";
605 if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
607 errorMessage += "SHAKE constraints are not supported.\n";
609 // Using the GPU-version of update if:
610 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
611 // 2. Non-bonded interactions are on the GPU.
612 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
615 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
618 if (!gpusWereDetected)
620 errorMessage += "Compatible GPUs must have been found.\n";
624 errorMessage += "Only a CUDA build is supported.\n";
626 if (inputrec.eI != eiMD)
628 errorMessage += "Only the md integrator is supported.\n";
630 if (inputrec.etc == etcNOSEHOOVER)
632 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
634 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN
635 || inputrec.epc == epcBERENDSEN || inputrec.epc == epcCRESCALE))
638 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
641 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
643 // The graph is needed, but not supported
644 errorMessage += "Ewald surface correction is not supported.\n";
646 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
648 errorMessage += "Virtual sites are not supported.\n";
650 if (useEssentialDynamics)
652 errorMessage += "Essential dynamics is not supported.\n";
654 if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
656 errorMessage += "Constraints pulling is not supported.\n";
658 if (doOrientationRestraints)
660 // The graph is needed, but not supported
661 errorMessage += "Orientation restraints are not supported.\n";
663 if (inputrec.efep != efepNO
664 && (haveFreeEnergyType(inputrec, efptBONDED) || haveFreeEnergyType(inputrec, efptMASS)))
666 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
668 const auto particleTypes = gmx_mtop_particletype_count(mtop);
669 if (particleTypes[eptShell] > 0)
671 errorMessage += "Shells are not supported.\n";
673 if (useReplicaExchange)
675 errorMessage += "Replica exchange simulations are not supported.\n";
677 if (inputrec.eSwapCoords != eswapNO)
679 errorMessage += "Swapping the coordinates is not supported.\n";
683 errorMessage += "Re-run is not supported.\n";
686 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
687 // does not support it, the actual CUDA LINCS code does support it
688 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
690 errorMessage += "Non-connecting constraints are not supported\n";
692 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
695 "The number of coupled constraints is higher than supported in the GPU LINCS "
699 if (!errorMessage.empty())
701 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
703 GMX_LOG(mdlog.warning)
706 "Update task on the GPU was required, by the "
707 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
708 "condition(s) were not satisfied:");
709 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
710 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
712 else if (updateTarget == TaskTarget::Gpu)
714 std::string prefix = gmx::formatString(
715 "Update task on the GPU was required,\n"
716 "but the following condition(s) were not satisfied:\n");
717 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
722 return (updateTarget == TaskTarget::Gpu
723 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
726 bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
727 bool havePPDomainDecomposition,
728 bool useGpuForNonbonded,
729 bool useModularSimulator,
731 bool haveEnergyMinimization)
733 return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
734 && !useModularSimulator && !doRerun && !haveEnergyMinimization;