2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/update_constrain_gpu.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/mdtypes/mdrunoptions.h"
66 #include "gromacs/pulling/pull.h"
67 #include "gromacs/taskassignment/taskassignment.h"
68 #include "gromacs/topology/mtop_util.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/utility/baseversion.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/stringutil.h"
84 //! Helper variable to localise the text of an often repeated message.
85 const char* g_specifyEverythingFormatString =
86 "When you use mdrun -gputasks, %s must be set to non-default "
87 "values, so that the device IDs can be interpreted correctly."
88 #if GMX_GPU != GMX_GPU_NONE
89 " If you simply want to restrict which GPUs are used, then it is "
90 "better to use mdrun -gpu_id. Otherwise, setting the "
91 # if GMX_GPU == GMX_GPU_CUDA
92 "CUDA_VISIBLE_DEVICES"
93 # elif GMX_GPU == GMX_GPU_OPENCL
94 // Technically there is no portable way to do this offered by the
95 // OpenCL standard, but the only current relevant case for GROMACS
96 // is AMD OpenCL, which offers this variable.
99 # error "Unreachable branch"
101 " environment variable in your bash profile or job "
102 "script may be more convenient."
108 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
109 const std::vector<int>& gpuIdsToUse,
110 const std::vector<int>& userGpuTaskAssignment,
111 const EmulateGpuNonbonded emulateGpuNonbonded,
112 const bool buildSupportsNonbondedOnGpu,
113 const bool nonbondedOnGpuIsUseful,
114 const int numRanksPerSimulation)
116 // First, exclude all cases where we can't run NB on GPUs.
117 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
118 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
120 // If the user required NB on GPUs, we issue an error later.
124 // We now know that NB on GPUs makes sense, if we have any.
126 if (!userGpuTaskAssignment.empty())
128 // Specifying -gputasks requires specifying everything.
129 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
131 GMX_THROW(InconsistentInputError(
132 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
137 if (nonbondedTarget == TaskTarget::Gpu)
142 // Because this is thread-MPI, we already know about the GPUs that
143 // all potential ranks can use, and can use that in a global
144 // decision that will later be consistent.
145 auto haveGpus = !gpuIdsToUse.empty();
147 // If we get here, then the user permitted or required GPUs.
151 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
152 const TaskTarget pmeTarget,
153 const std::vector<int>& gpuIdsToUse,
154 const std::vector<int>& userGpuTaskAssignment,
155 const gmx_hw_info_t& hardwareInfo,
156 const t_inputrec& inputrec,
157 const int numRanksPerSimulation,
158 const int numPmeRanksPerSimulation)
160 // First, exclude all cases where we can't run PME on GPUs.
161 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
162 || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
164 // PME can't run on a GPU. If the user required that, we issue
169 // We now know that PME on GPUs might make sense, if we have any.
171 if (!userGpuTaskAssignment.empty())
173 // Follow the user's choice of GPU task assignment, if we
174 // can. Checking that their IDs are for compatible GPUs comes
177 // Specifying -gputasks requires specifying everything.
178 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
180 GMX_THROW(InconsistentInputError(
181 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
184 // PME on GPUs is only supported in a single case
185 if (pmeTarget == TaskTarget::Gpu)
187 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
188 || (numPmeRanksPerSimulation > 1))
190 GMX_THROW(InconsistentInputError(
191 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
192 "file and use a single PME rank."));
197 // pmeTarget == TaskTarget::Auto
198 return numRanksPerSimulation == 1;
201 // Because this is thread-MPI, we already know about the GPUs that
202 // all potential ranks can use, and can use that in a global
203 // decision that will later be consistent.
205 if (pmeTarget == TaskTarget::Gpu)
207 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
208 || (numPmeRanksPerSimulation > 1))
210 GMX_THROW(NotImplementedError(
211 "PME tasks were required to run on GPUs, but that is not implemented with "
212 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
213 "or permit PME tasks to be assigned to the CPU."));
218 if (numRanksPerSimulation == 1)
220 // PME can run well on a GPU shared with NB, and we permit
221 // mdrun to default to try that.
222 return !gpuIdsToUse.empty();
225 if (numRanksPerSimulation < 1)
227 // Full automated mode for thread-MPI (the default). PME can
228 // run well on a GPU shared with NB, and we permit mdrun to
229 // default to it if there is only one GPU available.
230 return (gpuIdsToUse.size() == 1);
233 // Not enough support for PME on GPUs for anything else
237 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
238 const std::vector<int>& userGpuTaskAssignment,
239 const EmulateGpuNonbonded emulateGpuNonbonded,
240 const bool buildSupportsNonbondedOnGpu,
241 const bool nonbondedOnGpuIsUseful,
242 const bool gpusWereDetected)
244 if (nonbondedTarget == TaskTarget::Cpu)
246 if (!userGpuTaskAssignment.empty())
248 GMX_THROW(InconsistentInputError(
249 "A GPU task assignment was specified, but nonbonded interactions were "
250 "assigned to the CPU. Make no more than one of these choices."));
256 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
258 GMX_THROW(InconsistentInputError(
259 "Nonbonded interactions on the GPU were requested with -nb gpu, "
260 "but the GROMACS binary has been built without GPU support. "
261 "Either run without selecting GPU options, or recompile GROMACS "
262 "with GPU support enabled"));
265 // TODO refactor all these TaskTarget::Gpu checks into one place?
266 // e.g. use a subfunction that handles only the cases where
267 // TaskTargets are not Cpu?
268 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
270 if (nonbondedTarget == TaskTarget::Gpu)
272 GMX_THROW(InconsistentInputError(
273 "Nonbonded interactions on the GPU were required, which is inconsistent "
274 "with choosing emulation. Make no more than one of these choices."));
276 if (!userGpuTaskAssignment.empty())
279 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
280 "no more than one of these choices."));
286 if (!nonbondedOnGpuIsUseful)
288 if (nonbondedTarget == TaskTarget::Gpu)
290 GMX_THROW(InconsistentInputError(
291 "Nonbonded interactions on the GPU were required, but not supported for these "
292 "simulation settings. Change your settings, or do not require using GPUs."));
298 if (!userGpuTaskAssignment.empty())
300 // Specifying -gputasks requires specifying everything.
301 if (nonbondedTarget == TaskTarget::Auto)
303 GMX_THROW(InconsistentInputError(
304 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
310 if (nonbondedTarget == TaskTarget::Gpu)
312 // We still don't know whether it is an error if no GPUs are found
313 // because we don't know the duty of this rank, yet. For example,
314 // a node with only PME ranks and -pme cpu is OK if there are not
319 // If we get here, then the user permitted GPUs, which we should
320 // use for nonbonded interactions.
321 return gpusWereDetected;
324 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
325 const TaskTarget pmeTarget,
326 const std::vector<int>& userGpuTaskAssignment,
327 const gmx_hw_info_t& hardwareInfo,
328 const t_inputrec& inputrec,
329 const int numRanksPerSimulation,
330 const int numPmeRanksPerSimulation,
331 const bool gpusWereDetected)
333 if (pmeTarget == TaskTarget::Cpu)
338 if (!useGpuForNonbonded)
340 if (pmeTarget == TaskTarget::Gpu)
342 GMX_THROW(NotImplementedError(
343 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
349 if (!pme_gpu_supports_build(&message))
351 if (pmeTarget == TaskTarget::Gpu)
353 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
357 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
359 if (pmeTarget == TaskTarget::Gpu)
361 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
365 if (!pme_gpu_supports_input(inputrec, &message))
367 if (pmeTarget == TaskTarget::Gpu)
369 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
374 if (pmeTarget == TaskTarget::Cpu)
376 if (!userGpuTaskAssignment.empty())
378 GMX_THROW(InconsistentInputError(
379 "A GPU task assignment was specified, but PME interactions were "
380 "assigned to the CPU. Make no more than one of these choices."));
386 if (!userGpuTaskAssignment.empty())
388 // Specifying -gputasks requires specifying everything.
389 if (pmeTarget == TaskTarget::Auto)
391 GMX_THROW(InconsistentInputError(formatString(
392 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
398 // We still don't know whether it is an error if no GPUs are found
399 // because we don't know the duty of this rank, yet. For example,
400 // a node with only PME ranks and -pme cpu is OK if there are not
403 if (pmeTarget == TaskTarget::Gpu)
405 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
406 || (numPmeRanksPerSimulation > 1))
408 GMX_THROW(NotImplementedError(
409 "PME tasks were required to run on GPUs, but that is not implemented with "
410 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
411 "or permit PME tasks to be assigned to the CPU."));
416 // If we get here, then the user permitted GPUs.
417 if (numRanksPerSimulation == 1)
419 // PME can run well on a single GPU shared with NB when there
420 // is one rank, so we permit mdrun to try that if we have
422 return gpusWereDetected;
425 // Not enough support for PME on GPUs for anything else
430 PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
432 if (!EEL_PME(inputrec.coulombtype))
434 return PmeRunMode::None;
439 if (pmeFftTarget == TaskTarget::Cpu)
441 return PmeRunMode::Mixed;
445 return PmeRunMode::GPU;
450 if (pmeFftTarget == TaskTarget::Gpu)
453 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
454 "on CPU you should not be using -pmefft.");
456 return PmeRunMode::CPU;
460 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
461 const bool useGpuForPme,
462 const TaskTarget bondedTarget,
463 const bool canUseGpuForBonded,
464 const bool usingLJPme,
465 const bool usingElecPmeOrEwald,
466 const int numPmeRanksPerSimulation,
467 const bool gpusWereDetected)
469 if (bondedTarget == TaskTarget::Cpu)
474 if (!canUseGpuForBonded)
476 if (bondedTarget == TaskTarget::Gpu)
478 GMX_THROW(InconsistentInputError(
479 "Bonded interactions on the GPU were required, but not supported for these "
480 "simulation settings. Change your settings, or do not require using GPUs."));
486 if (!useGpuForNonbonded)
488 if (bondedTarget == TaskTarget::Gpu)
490 GMX_THROW(InconsistentInputError(
491 "Bonded interactions on the GPU were required, but this requires that "
492 "short-ranged non-bonded interactions are also run on the GPU. Change "
493 "your settings, or do not require using GPUs."));
499 // TODO If the bonded kernels do not get fused, then performance
500 // overheads might suggest alternative choices here.
502 if (bondedTarget == TaskTarget::Gpu)
504 // We still don't know whether it is an error if no GPUs are
509 // If we get here, then the user permitted GPUs, which we should
510 // use for bonded interactions if any were detected and the CPU
511 // is busy, for which we currently only check PME or Ewald.
512 // (It would be better to dynamically assign bondeds based on timings)
513 // Note that here we assume that the auto setting of PME ranks will not
514 // choose seperate PME ranks when nonBonded are assigned to the GPU.
515 bool usingOurCpuForPmeOrEwald =
516 (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
518 return gpusWereDetected && usingOurCpuForPmeOrEwald;
521 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
522 const bool useUpdateGroups,
523 const PmeRunMode pmeRunMode,
524 const bool havePmeOnlyRank,
525 const bool useGpuForNonbonded,
526 const TaskTarget updateTarget,
527 const bool gpusWereDetected,
528 const t_inputrec& inputrec,
529 const gmx_mtop_t& mtop,
530 const bool useEssentialDynamics,
531 const bool doOrientationRestraints,
532 const bool useReplicaExchange,
534 const DevelopmentFeatureFlags& devFlags,
535 const gmx::MDLogger& mdlog)
538 // '-update cpu' overrides the environment variable, '-update auto' does not
539 if (updateTarget == TaskTarget::Cpu
540 || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
545 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
546 const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
548 std::string errorMessage;
550 if (isDomainDecomposition)
552 if (!devFlags.enableGpuHaloExchange)
554 errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n ";
558 if (hasAnyConstraints && !useUpdateGroups)
561 "Domain decomposition is only supported with constraints when update "
563 "are used. This means constraining all bonds is not supported, except for "
564 "small molecules, and box sizes close to half the pair-list cutoff are not "
570 errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
579 errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
582 if (!devFlags.enableGpuPmePPComm)
584 errorMessage += "With separate PME rank(s), PME must use direct communication.\n";
588 if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
590 errorMessage += "SHAKE constraints are not supported.\n";
592 // Using the GPU-version of update if:
593 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
594 // 2. Non-bonded interactions are on the GPU.
595 if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
598 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
601 if (!gpusWereDetected)
603 errorMessage += "Compatible GPUs must have been found.\n";
605 if (GMX_GPU != GMX_GPU_CUDA)
607 errorMessage += "Only a CUDA build is supported.\n";
609 if (inputrec.eI != eiMD)
611 errorMessage += "Only the md integrator is supported.\n";
613 if (inputrec.etc == etcNOSEHOOVER)
615 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
617 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN || inputrec.epc == epcBERENDSEN))
619 errorMessage += "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
621 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
623 // The graph is needed, but not supported
624 errorMessage += "Ewald surface correction is not supported.\n";
626 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
628 errorMessage += "Virtual sites are not supported.\n";
630 if (useEssentialDynamics)
632 errorMessage += "Essential dynamics is not supported.\n";
634 if (inputrec.bPull && pull_have_constraint(inputrec.pull))
636 errorMessage += "Constraints pulling is not supported.\n";
638 if (doOrientationRestraints)
640 // The graph is needed, but not supported
641 errorMessage += "Orientation restraints are not supported.\n";
643 if (inputrec.efep != efepNO
644 && (haveFreeEnergyType(inputrec, efptBONDED) || haveFreeEnergyType(inputrec, efptMASS)))
646 errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
648 const auto particleTypes = gmx_mtop_particletype_count(mtop);
649 if (particleTypes[eptShell] > 0)
651 errorMessage += "Shells are not supported.\n";
653 if (useReplicaExchange)
655 errorMessage += "Replica exchange simulations are not supported.\n";
657 if (inputrec.eSwapCoords != eswapNO)
659 errorMessage += "Swapping the coordinates is not supported.\n";
663 errorMessage += "Re-run is not supported.\n";
666 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
667 // does not support it, the actual CUDA LINCS code does support it
668 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
670 errorMessage += "Non-connecting constraints are not supported";
672 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
675 "The number of coupled constraints is higher than supported in the GPU LINCS "
679 if (!errorMessage.empty())
681 if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
683 GMX_LOG(mdlog.warning)
686 "Update task on the GPU was required, by the "
687 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
688 "condition(s) were not satisfied:");
689 GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
690 GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
692 else if (updateTarget == TaskTarget::Gpu)
694 std::string prefix = gmx::formatString(
695 "Update task on the GPU was required,\n"
696 "but the following condition(s) were not satisfied:\n");
697 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
702 return (updateTarget == TaskTarget::Gpu
703 || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));