2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdtypes/commrec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/md_enums.h"
63 #include "gromacs/mdtypes/mdrunoptions.h"
64 #include "gromacs/taskassignment/taskassignment.h"
65 #include "gromacs/topology/topology.h"
66 #include "gromacs/utility/baseversion.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/gmxassert.h"
70 #include "gromacs/utility/logger.h"
71 #include "gromacs/utility/stringutil.h"
80 //! Helper variable to localise the text of an often repeated message.
81 const char* g_specifyEverythingFormatString =
82 "When you use mdrun -gputasks, %s must be set to non-default "
83 "values, so that the device IDs can be interpreted correctly."
84 #if GMX_GPU != GMX_GPU_NONE
85 " If you simply want to restrict which GPUs are used, then it is "
86 "better to use mdrun -gpu_id. Otherwise, setting the "
87 # if GMX_GPU == GMX_GPU_CUDA
88 "CUDA_VISIBLE_DEVICES"
89 # elif GMX_GPU == GMX_GPU_OPENCL
90 // Technically there is no portable way to do this offered by the
91 // OpenCL standard, but the only current relevant case for GROMACS
92 // is AMD OpenCL, which offers this variable.
95 # error "Unreachable branch"
97 " environment variable in your bash profile or job "
98 "script may be more convenient."
104 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget,
105 const std::vector<int>& gpuIdsToUse,
106 const std::vector<int>& userGpuTaskAssignment,
107 const EmulateGpuNonbonded emulateGpuNonbonded,
108 const bool buildSupportsNonbondedOnGpu,
109 const bool nonbondedOnGpuIsUseful,
110 const int numRanksPerSimulation)
112 // First, exclude all cases where we can't run NB on GPUs.
113 if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
114 || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
116 // If the user required NB on GPUs, we issue an error later.
120 // We now know that NB on GPUs makes sense, if we have any.
122 if (!userGpuTaskAssignment.empty())
124 // Specifying -gputasks requires specifying everything.
125 if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
127 GMX_THROW(InconsistentInputError(
128 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
133 if (nonbondedTarget == TaskTarget::Gpu)
138 // Because this is thread-MPI, we already know about the GPUs that
139 // all potential ranks can use, and can use that in a global
140 // decision that will later be consistent.
141 auto haveGpus = !gpuIdsToUse.empty();
143 // If we get here, then the user permitted or required GPUs.
147 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded,
148 const TaskTarget pmeTarget,
149 const std::vector<int>& gpuIdsToUse,
150 const std::vector<int>& userGpuTaskAssignment,
151 const gmx_hw_info_t& hardwareInfo,
152 const t_inputrec& inputrec,
153 const gmx_mtop_t& mtop,
154 const int numRanksPerSimulation,
155 const int numPmeRanksPerSimulation)
157 // First, exclude all cases where we can't run PME on GPUs.
158 if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
159 || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
160 || !pme_gpu_supports_input(inputrec, mtop, nullptr))
162 // PME can't run on a GPU. If the user required that, we issue
167 // We now know that PME on GPUs might make sense, if we have any.
169 if (!userGpuTaskAssignment.empty())
171 // Follow the user's choice of GPU task assignment, if we
172 // can. Checking that their IDs are for compatible GPUs comes
175 // Specifying -gputasks requires specifying everything.
176 if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
178 GMX_THROW(InconsistentInputError(
179 formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
182 // PME on GPUs is only supported in a single case
183 if (pmeTarget == TaskTarget::Gpu)
185 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
186 || (numPmeRanksPerSimulation > 1))
188 GMX_THROW(InconsistentInputError(
189 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
190 "file and use a single PME rank."));
195 // pmeTarget == TaskTarget::Auto
196 return numRanksPerSimulation == 1;
199 // Because this is thread-MPI, we already know about the GPUs that
200 // all potential ranks can use, and can use that in a global
201 // decision that will later be consistent.
203 if (pmeTarget == TaskTarget::Gpu)
205 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
206 || (numPmeRanksPerSimulation > 1))
208 GMX_THROW(NotImplementedError(
209 "PME tasks were required to run on GPUs, but that is not implemented with "
210 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
211 "or permit PME tasks to be assigned to the CPU."));
216 if (numRanksPerSimulation == 1)
218 // PME can run well on a GPU shared with NB, and we permit
219 // mdrun to default to try that.
220 return !gpuIdsToUse.empty();
223 if (numRanksPerSimulation < 1)
225 // Full automated mode for thread-MPI (the default). PME can
226 // run well on a GPU shared with NB, and we permit mdrun to
227 // default to it if there is only one GPU available.
228 return (gpuIdsToUse.size() == 1);
231 // Not enough support for PME on GPUs for anything else
235 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget,
236 const std::vector<int>& userGpuTaskAssignment,
237 const EmulateGpuNonbonded emulateGpuNonbonded,
238 const bool buildSupportsNonbondedOnGpu,
239 const bool nonbondedOnGpuIsUseful,
240 const bool gpusWereDetected)
242 if (nonbondedTarget == TaskTarget::Cpu)
244 if (!userGpuTaskAssignment.empty())
246 GMX_THROW(InconsistentInputError(
247 "A GPU task assignment was specified, but nonbonded interactions were "
248 "assigned to the CPU. Make no more than one of these choices."));
254 if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
256 GMX_THROW(InconsistentInputError(
257 "Nonbonded interactions on the GPU were requested with -nb gpu, "
258 "but the GROMACS binary has been built without GPU support. "
259 "Either run without selecting GPU options, or recompile GROMACS "
260 "with GPU support enabled"));
263 // TODO refactor all these TaskTarget::Gpu checks into one place?
264 // e.g. use a subfunction that handles only the cases where
265 // TaskTargets are not Cpu?
266 if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
268 if (nonbondedTarget == TaskTarget::Gpu)
270 GMX_THROW(InconsistentInputError(
271 "Nonbonded interactions on the GPU were required, which is inconsistent "
272 "with choosing emulation. Make no more than one of these choices."));
274 if (!userGpuTaskAssignment.empty())
277 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
278 "no more than one of these choices."));
284 if (!nonbondedOnGpuIsUseful)
286 if (nonbondedTarget == TaskTarget::Gpu)
288 GMX_THROW(InconsistentInputError(
289 "Nonbonded interactions on the GPU were required, but not supported for these "
290 "simulation settings. Change your settings, or do not require using GPUs."));
296 if (!userGpuTaskAssignment.empty())
298 // Specifying -gputasks requires specifying everything.
299 if (nonbondedTarget == TaskTarget::Auto)
301 GMX_THROW(InconsistentInputError(
302 formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
308 if (nonbondedTarget == TaskTarget::Gpu)
310 // We still don't know whether it is an error if no GPUs are found
311 // because we don't know the duty of this rank, yet. For example,
312 // a node with only PME ranks and -pme cpu is OK if there are not
317 // If we get here, then the user permitted GPUs, which we should
318 // use for nonbonded interactions.
319 return gpusWereDetected;
322 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded,
323 const TaskTarget pmeTarget,
324 const std::vector<int>& userGpuTaskAssignment,
325 const gmx_hw_info_t& hardwareInfo,
326 const t_inputrec& inputrec,
327 const gmx_mtop_t& mtop,
328 const int numRanksPerSimulation,
329 const int numPmeRanksPerSimulation,
330 const bool gpusWereDetected)
332 if (pmeTarget == TaskTarget::Cpu)
337 if (!useGpuForNonbonded)
339 if (pmeTarget == TaskTarget::Gpu)
341 GMX_THROW(NotImplementedError(
342 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
348 if (!pme_gpu_supports_build(&message))
350 if (pmeTarget == TaskTarget::Gpu)
352 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
356 if (!pme_gpu_supports_hardware(hardwareInfo, &message))
358 if (pmeTarget == TaskTarget::Gpu)
360 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
364 if (!pme_gpu_supports_input(inputrec, mtop, &message))
366 if (pmeTarget == TaskTarget::Gpu)
368 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
373 if (pmeTarget == TaskTarget::Cpu)
375 if (!userGpuTaskAssignment.empty())
377 GMX_THROW(InconsistentInputError(
378 "A GPU task assignment was specified, but PME interactions were "
379 "assigned to the CPU. Make no more than one of these choices."));
385 if (!userGpuTaskAssignment.empty())
387 // Specifying -gputasks requires specifying everything.
388 if (pmeTarget == TaskTarget::Auto)
390 GMX_THROW(InconsistentInputError(formatString(
391 g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
397 // We still don't know whether it is an error if no GPUs are found
398 // because we don't know the duty of this rank, yet. For example,
399 // a node with only PME ranks and -pme cpu is OK if there are not
402 if (pmeTarget == TaskTarget::Gpu)
404 if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
405 || (numPmeRanksPerSimulation > 1))
407 GMX_THROW(NotImplementedError(
408 "PME tasks were required to run on GPUs, but that is not implemented with "
409 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
410 "or permit PME tasks to be assigned to the CPU."));
415 // If we get here, then the user permitted GPUs.
416 if (numRanksPerSimulation == 1)
418 // PME can run well on a single GPU shared with NB when there
419 // is one rank, so we permit mdrun to try that if we have
421 return gpusWereDetected;
424 // Not enough support for PME on GPUs for anything else
428 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
429 const bool useGpuForPme,
430 const TaskTarget bondedTarget,
431 const bool canUseGpuForBonded,
432 const bool usingLJPme,
433 const bool usingElecPmeOrEwald,
434 const int numPmeRanksPerSimulation,
435 const bool gpusWereDetected)
437 if (bondedTarget == TaskTarget::Cpu)
442 if (!canUseGpuForBonded)
444 if (bondedTarget == TaskTarget::Gpu)
446 GMX_THROW(InconsistentInputError(
447 "Bonded interactions on the GPU were required, but not supported for these "
448 "simulation settings. Change your settings, or do not require using GPUs."));
454 if (!useGpuForNonbonded)
456 if (bondedTarget == TaskTarget::Gpu)
458 GMX_THROW(InconsistentInputError(
459 "Bonded interactions on the GPU were required, but this requires that "
460 "short-ranged non-bonded interactions are also run on the GPU. Change "
461 "your settings, or do not require using GPUs."));
467 // TODO If the bonded kernels do not get fused, then performance
468 // overheads might suggest alternative choices here.
470 if (bondedTarget == TaskTarget::Gpu)
472 // We still don't know whether it is an error if no GPUs are
477 // If we get here, then the user permitted GPUs, which we should
478 // use for bonded interactions if any were detected and the CPU
479 // is busy, for which we currently only check PME or Ewald.
480 // (It would be better to dynamically assign bondeds based on timings)
481 // Note that here we assume that the auto setting of PME ranks will not
482 // choose seperate PME ranks when nonBonded are assigned to the GPU.
483 bool usingOurCpuForPmeOrEwald =
484 (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
486 return gpusWereDetected && usingOurCpuForPmeOrEwald;
489 bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultOn,
490 const bool isDomainDecomposition,
491 const bool useGpuForPme,
492 const bool useGpuForNonbonded,
493 const TaskTarget updateTarget,
494 const bool gpusWereDetected,
495 const t_inputrec& inputrec,
496 const bool haveVSites,
497 const bool useEssentialDynamics,
498 const bool doOrientationRestraints,
499 const bool useReplicaExchange)
502 if (updateTarget == TaskTarget::Cpu)
507 std::string errorMessage;
509 if (isDomainDecomposition)
511 errorMessage += "Domain decomposition is not supported.\n";
513 // Using the GPU-version of update if:
514 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
515 // 2. Non-bonded interactions are on the GPU.
516 if (!(useGpuForPme || useGpuForNonbonded))
519 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
521 if (!gpusWereDetected)
523 errorMessage += "Compatible GPUs must have been found.\n";
525 if (GMX_GPU != GMX_GPU_CUDA)
527 errorMessage += "Only a CUDA build is supported.\n";
529 if (inputrec.eI != eiMD)
531 errorMessage += "Only the md integrator is supported.\n";
533 if (inputrec.etc == etcNOSEHOOVER)
535 errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
537 if (inputrec.epc != epcNO)
539 // Coordinate D2H and H2d are missing as well as PBC reinitialization
540 errorMessage += "Pressure coupling is not supported.\n";
542 if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
544 // The graph is needed, but not supported
545 errorMessage += "Ewald surface correction is not supported.\n";
549 errorMessage += "Virtual sites are not supported.\n";
551 if (useEssentialDynamics)
553 errorMessage += "Essential dynamics is not supported.\n";
555 if (inputrec.bPull || inputrec.pull)
557 // Pull potentials are actually supported, but constraint pulling is not
558 errorMessage += "Pulling is not supported.\n";
560 if (doOrientationRestraints)
562 // The graph is needed, but not supported
563 errorMessage += "Orientation restraints are not supported.\n";
565 if (inputrec.efep != efepNO)
567 // Actually all free-energy options except for mass and constraint perturbation are supported
568 errorMessage += "Free energy perturbations are not supported.\n";
570 if (useReplicaExchange)
572 errorMessage += "Replica exchange simulations are not supported.\n";
574 if (inputrec.eSwapCoords != eswapNO)
576 errorMessage += "Swapping the coordinates is not supported.\n";
579 // \todo Check for coupled constraint block size restriction needs to be added
580 // when update auto chooses GPU in some cases. Currently exceeding the restriction
581 // triggers a fatal error during LINCS setup.
583 if (!errorMessage.empty())
585 if (updateTarget == TaskTarget::Gpu)
587 std::string prefix = gmx::formatString(
588 "Update task on the GPU was required,\n"
589 "but the following condition(s) were not satisfied:\n");
590 GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
595 return ((forceGpuUpdateDefaultOn && updateTarget == TaskTarget::Auto)
596 || (updateTarget == TaskTarget::Gpu));