2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdlib/update_constrain_cuda.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/mdtypes/inputrec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/mdtypes/mdrunoptions.h"
65 #include "gromacs/pulling/pull.h"
66 #include "gromacs/taskassignment/taskassignment.h"
67 #include "gromacs/topology/mtop_util.h"
68 #include "gromacs/topology/topology.h"
69 #include "gromacs/utility/baseversion.h"
70 #include "gromacs/utility/exceptions.h"
71 #include "gromacs/utility/fatalerror.h"
72 #include "gromacs/utility/gmxassert.h"
73 #include "gromacs/utility/logger.h"
74 #include "gromacs/utility/stringutil.h"
83 //! Helper variable to localise the text of an often repeated message.
84 const char* g_specifyEverythingFormatString =
85 "When you use mdrun -gputasks, %s must be set to non-default "
86 "values, so that the device IDs can be interpreted correctly."
87 #if GMX_GPU != GMX_GPU_NONE
88 " If you simply want to restrict which GPUs are used, then it is "
89 "better to use mdrun -gpu_id. Otherwise, setting the "
90 # if GMX_GPU == GMX_GPU_CUDA
91 "CUDA_VISIBLE_DEVICES"
92 # elif GMX_GPU == GMX_GPU_OPENCL
93 // Technically there is no portable way to do this offered by the
94 // OpenCL standard, but the only current relevant case for GROMACS
95 // is AMD OpenCL, which offers this variable.
98 # error "Unreachable branch"
100 " environment variable in your bash profile or job "
101 "script may be more convenient."
107 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
108 const std::vector<int>& gpuIdsToUse,
109 const std::vector<int>& userGpuTaskAssignment,
110 const EmulateGpuNonbonded emulateGpuNonbonded,
111 const bool buildSupportsNonbondedOnGpu,
112 const bool nonbondedOnGpuIsUseful,
113 const int numRanksPerSimulation)
115 // First, exclude all cases where we can't run NB on GPUs.
116 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
117 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
119 // If the user required NB on GPUs, we issue an error later.
123 // We now know that NB on GPUs makes sense, if we have any.
125 if (!userGpuTaskAssignment.empty())
127 // Specifying -gputasks requires specifying everything.
128 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
130 GMX_THROW(InconsistentInputError(
131 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
136 if (nonbondedTarget == TaskTarget::Gpu)
141 // Because this is thread-MPI, we already know about the GPUs that
142 // all potential ranks can use, and can use that in a global
143 // decision that will later be consistent.
144 auto haveGpus = !gpuIdsToUse.empty();
146 // If we get here, then the user permitted or required GPUs.
150 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
151 const TaskTarget pmeTarget,
152 const std::vector<int>& gpuIdsToUse,
153 const std::vector<int>& userGpuTaskAssignment,
154 const gmx_hw_info_t& hardwareInfo,
155 const t_inputrec& inputrec,
156 const gmx_mtop_t& mtop,
157 const int numRanksPerSimulation,
158 const int numPmeRanksPerSimulation)
160 // First, exclude all cases where we can't run PME on GPUs.
161 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
162 || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
163 || !pme_gpu_supports_input(inputrec, mtop, nullptr))
165 // PME can't run on a GPU. If the user required that, we issue
170 // We now know that PME on GPUs might make sense, if we have any.
172 if (!userGpuTaskAssignment.empty())
174 // Follow the user's choice of GPU task assignment, if we
175 // can. Checking that their IDs are for compatible GPUs comes
178 // Specifying -gputasks requires specifying everything.
179 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
181 GMX_THROW(InconsistentInputError(
182 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
185 // PME on GPUs is only supported in a single case
186 if (pmeTarget == TaskTarget::Gpu)
188 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
189 || (numPmeRanksPerSimulation > 1))
191 GMX_THROW(InconsistentInputError(
192 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
193 "file and use a single PME rank."));
198 // pmeTarget == TaskTarget::Auto
199 return numRanksPerSimulation == 1;
202 // Because this is thread-MPI, we already know about the GPUs that
203 // all potential ranks can use, and can use that in a global
204 // decision that will later be consistent.
206 if (pmeTarget == TaskTarget::Gpu)
208 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
209 || (numPmeRanksPerSimulation > 1))
211 GMX_THROW(NotImplementedError(
212 "PME tasks were required to run on GPUs, but that is not implemented with "
213 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
214 "or permit PME tasks to be assigned to the CPU."));
219 if (numRanksPerSimulation == 1)
221 // PME can run well on a GPU shared with NB, and we permit
222 // mdrun to default to try that.
223 return !gpuIdsToUse.empty();
226 if (numRanksPerSimulation < 1)
228 // Full automated mode for thread-MPI (the default). PME can
229 // run well on a GPU shared with NB, and we permit mdrun to
230 // default to it if there is only one GPU available.
231 return (gpuIdsToUse.size() == 1);
234 // Not enough support for PME on GPUs for anything else
238 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
239 const std::vector<int>& userGpuTaskAssignment,
240 const EmulateGpuNonbonded emulateGpuNonbonded,
241 const bool buildSupportsNonbondedOnGpu,
242 const bool nonbondedOnGpuIsUseful,
243 const bool gpusWereDetected)
245 if (nonbondedTarget == TaskTarget::Cpu)
247 if (!userGpuTaskAssignment.empty())
249 GMX_THROW(InconsistentInputError(
250 "A GPU task assignment was specified, but nonbonded interactions were "
251 "assigned to the CPU. Make no more than one of these choices."));
257 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
259 GMX_THROW(InconsistentInputError(
260 "Nonbonded interactions on the GPU were requested with -nb gpu, "
261 "but the GROMACS binary has been built without GPU support. "
262 "Either run without selecting GPU options, or recompile GROMACS "
263 "with GPU support enabled"));
266 // TODO refactor all these TaskTarget::Gpu checks into one place?
267 // e.g. use a subfunction that handles only the cases where
268 // TaskTargets are not Cpu?
269 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
271 if (nonbondedTarget == TaskTarget::Gpu)
273 GMX_THROW(InconsistentInputError(
274 "Nonbonded interactions on the GPU were required, which is inconsistent "
275 "with choosing emulation. Make no more than one of these choices."));
277 if (!userGpuTaskAssignment.empty())
280 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
281 "no more than one of these choices."));
287 if (!nonbondedOnGpuIsUseful)
289 if (nonbondedTarget == TaskTarget::Gpu)
291 GMX_THROW(InconsistentInputError(
292 "Nonbonded interactions on the GPU were required, but not supported for these "
293 "simulation settings. Change your settings, or do not require using GPUs."));
299 if (!userGpuTaskAssignment.empty())
301 // Specifying -gputasks requires specifying everything.
302 if (nonbondedTarget == TaskTarget::Auto)
304 GMX_THROW(InconsistentInputError(
305 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
311 if (nonbondedTarget == TaskTarget::Gpu)
313 // We still don't know whether it is an error if no GPUs are found
314 // because we don't know the duty of this rank, yet. For example,
315 // a node with only PME ranks and -pme cpu is OK if there are not
320 // If we get here, then the user permitted GPUs, which we should
321 // use for nonbonded interactions.
322 return gpusWereDetected;
325 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
326 const TaskTarget pmeTarget,
327 const std::vector<int>& userGpuTaskAssignment,
328 const gmx_hw_info_t& hardwareInfo,
329 const t_inputrec& inputrec,
330 const gmx_mtop_t& mtop,
331 const int numRanksPerSimulation,
332 const int numPmeRanksPerSimulation,
333 const bool gpusWereDetected)
335 if (pmeTarget == TaskTarget::Cpu)
340 if (!useGpuForNonbonded)
342 if (pmeTarget == TaskTarget::Gpu)
344 GMX_THROW(NotImplementedError(
345 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
351 if (!pme_gpu_supports_build(&message))
353 if (pmeTarget == TaskTarget::Gpu)
355 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
359 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
361 if (pmeTarget == TaskTarget::Gpu)
363 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
367 if (!pme_gpu_supports_input(inputrec, mtop, &message))
369 if (pmeTarget == TaskTarget::Gpu)
371 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
376 if (pmeTarget == TaskTarget::Cpu)
378 if (!userGpuTaskAssignment.empty())
380 GMX_THROW(InconsistentInputError(
381 "A GPU task assignment was specified, but PME interactions were "
382 "assigned to the CPU. Make no more than one of these choices."));
388 if (!userGpuTaskAssignment.empty())
390 // Specifying -gputasks requires specifying everything.
391 if (pmeTarget == TaskTarget::Auto)
393 GMX_THROW(InconsistentInputError(formatString(
394 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
400 // We still don't know whether it is an error if no GPUs are found
401 // because we don't know the duty of this rank, yet. For example,
402 // a node with only PME ranks and -pme cpu is OK if there are not
405 if (pmeTarget == TaskTarget::Gpu)
407 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
408 || (numPmeRanksPerSimulation > 1))
410 GMX_THROW(NotImplementedError(
411 "PME tasks were required to run on GPUs, but that is not implemented with "
412 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
413 "or permit PME tasks to be assigned to the CPU."));
418 // If we get here, then the user permitted GPUs.
419 if (numRanksPerSimulation == 1)
421 // PME can run well on a single GPU shared with NB when there
422 // is one rank, so we permit mdrun to try that if we have
424 return gpusWereDetected;
427 // Not enough support for PME on GPUs for anything else
431 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
432 const bool useGpuForPme,
433 const TaskTarget bondedTarget,
434 const bool canUseGpuForBonded,
435 const bool usingLJPme,
436 const bool usingElecPmeOrEwald,
437 const int numPmeRanksPerSimulation,
438 const bool gpusWereDetected)
440 if (bondedTarget == TaskTarget::Cpu)
445 if (!canUseGpuForBonded)
447 if (bondedTarget == TaskTarget::Gpu)
449 GMX_THROW(InconsistentInputError(
450 "Bonded interactions on the GPU were required, but not supported for these "
451 "simulation settings. Change your settings, or do not require using GPUs."));
457 if (!useGpuForNonbonded)
459 if (bondedTarget == TaskTarget::Gpu)
461 GMX_THROW(InconsistentInputError(
462 "Bonded interactions on the GPU were required, but this requires that "
463 "short-ranged non-bonded interactions are also run on the GPU. Change "
464 "your settings, or do not require using GPUs."));
470 // TODO If the bonded kernels do not get fused, then performance
471 // overheads might suggest alternative choices here.
473 if (bondedTarget == TaskTarget::Gpu)
475 // We still don't know whether it is an error if no GPUs are
480 // If we get here, then the user permitted GPUs, which we should
481 // use for bonded interactions if any were detected and the CPU
482 // is busy, for which we currently only check PME or Ewald.
483 // (It would be better to dynamically assign bondeds based on timings)
484 // Note that here we assume that the auto setting of PME ranks will not
485 // choose seperate PME ranks when nonBonded are assigned to the GPU.
486 bool usingOurCpuForPmeOrEwald =
487 (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
489 return gpusWereDetected && usingOurCpuForPmeOrEwald;
492 bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault,
493 const bool isDomainDecomposition,
494 const bool useUpdateGroups,
495 const PmeRunMode pmeRunMode,
496 const bool havePmeOnlyRank,
497 const bool useGpuForNonbonded,
498 const TaskTarget updateTarget,
499 const bool gpusWereDetected,
500 const t_inputrec& inputrec,
501 const gmx_mtop_t& mtop,
502 const bool useEssentialDynamics,
503 const bool doOrientationRestraints,
504 const bool useReplicaExchange,
508 // '-update cpu' overrides the environment variable, '-update auto' does not
509 if (updateTarget == TaskTarget::Cpu || (updateTarget == TaskTarget::Auto && !forceGpuUpdateDefault))
514 const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
516 std::string errorMessage;
518 if (isDomainDecomposition)
520 if (!forceGpuUpdateDefault)
522 errorMessage += "Domain decomposition is not supported.\n ";
524 else if (hasAnyConstraints && !useUpdateGroups)
527 "Domain decomposition is only supported with constraints when update groups "
528 "are used. This means constraining all bonds is not supported, except for "
529 "small molecules, and box sizes close to half the pair-list cutoff are not "
533 if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
535 errorMessage += "SHAKE constraints are not supported.\n";
537 // Using the GPU-version of update if:
538 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
539 // 2. Non-bonded interactions are on the GPU.
540 if (pmeRunMode == PmeRunMode::CPU && !useGpuForNonbonded)
543 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
545 // Since only direct GPU communications are supported with GPU update, PME should be fully offloaded in DD and PME only cases.
546 if (pmeRunMode != PmeRunMode::GPU && (isDomainDecomposition || havePmeOnlyRank))
548 errorMessage += "PME should run on GPU.\n";
550 if (!gpusWereDetected)
552 errorMessage += "Compatible GPUs must have been found.\n";
554 if (GMX_GPU != GMX_GPU_CUDA)
556 errorMessage += "Only a CUDA build is supported.\n";
558 if (inputrec.eI != eiMD)
560 errorMessage += "Only the md integrator is supported.\n";
562 if (inputrec.etc == etcNOSEHOOVER)
564 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
566 if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN || inputrec.epc == epcBERENDSEN))
568 errorMessage += "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
570 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
572 // The graph is needed, but not supported
573 errorMessage += "Ewald surface correction is not supported.\n";
575 if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
577 errorMessage += "Virtual sites are not supported.\n";
579 if (useEssentialDynamics)
581 errorMessage += "Essential dynamics is not supported.\n";
583 if (inputrec.bPull && pull_have_constraint(inputrec.pull))
585 errorMessage += "Constraints pulling is not supported.\n";
587 if (doOrientationRestraints)
589 // The graph is needed, but not supported
590 errorMessage += "Orientation restraints are not supported.\n";
592 if (inputrec.efep != efepNO)
594 // Actually all free-energy options except for mass and constraint perturbation are supported
595 errorMessage += "Free energy perturbations are not supported.\n";
597 if (useReplicaExchange)
599 errorMessage += "Replica exchange simulations are not supported.\n";
601 if (inputrec.eSwapCoords != eswapNO)
603 errorMessage += "Swapping the coordinates is not supported.\n";
607 errorMessage += "Re-run is not supported.\n";
610 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
611 // does not support it, the actual CUDA LINCS code does support it
612 if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
614 errorMessage += "Non-connecting constraints are not supported";
616 if (!UpdateConstrainCuda::isNumCoupledConstraintsSupported(mtop))
619 "The number of coupled constraints is higher than supported in the CUDA LINCS "
623 if (!errorMessage.empty())
625 if (updateTarget == TaskTarget::Gpu)
627 std::string prefix = gmx::formatString(
628 "Update task on the GPU was required,\n"
629 "but the following condition(s) were not satisfied:\n");
630 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
635 if (isDomainDecomposition)
637 return forceGpuUpdateDefault;
641 return (updateTarget == TaskTarget::Gpu || forceGpuUpdateDefault);