src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/ewald/pme.h"
  55 #include "gromacs/hardware/cpuinfo.h"
  56 #include "gromacs/hardware/detecthardware.h"
  57 #include "gromacs/hardware/hardwaretopology.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdlib/update_constrain_cuda.h"
  61 #include "gromacs/mdtypes/commrec.h"
  62 #include "gromacs/mdtypes/inputrec.h"
  63 #include "gromacs/mdtypes/md_enums.h"
  64 #include "gromacs/mdtypes/mdrunoptions.h"
  65 #include "gromacs/taskassignment/taskassignment.h"
  66 #include "gromacs/topology/mtop_util.h"
  67 #include "gromacs/topology/topology.h"
  68 #include "gromacs/utility/baseversion.h"
  69 #include "gromacs/utility/exceptions.h"
  70 #include "gromacs/utility/fatalerror.h"
  71 #include "gromacs/utility/gmxassert.h"
  72 #include "gromacs/utility/logger.h"
  73 #include "gromacs/utility/stringutil.h"
  74
  75
  76 namespace gmx
  77 {
  78
  79 namespace
  80 {
  81
  82 //! Helper variable to localise the text of an often repeated message.
  83 const char* g_specifyEverythingFormatString =
  84         "When you use mdrun -gputasks, %s must be set to non-default "
  85         "values, so that the device IDs can be interpreted correctly."
  86 #if GMX_GPU != GMX_GPU_NONE
  87         " If you simply want to restrict which GPUs are used, then it is "
  88         "better to use mdrun -gpu_id. Otherwise, setting the "
  89 #    if GMX_GPU == GMX_GPU_CUDA
  90         "CUDA_VISIBLE_DEVICES"
  91 #    elif GMX_GPU == GMX_GPU_OPENCL
  92         // Technically there is no portable way to do this offered by the
  93         // OpenCL standard, but the only current relevant case for GROMACS
  94         // is AMD OpenCL, which offers this variable.
  95         "GPU_DEVICE_ORDINAL"
  96 #    else
  97 #        error "Unreachable branch"
  98 #    endif
  99         " environment variable in your bash profile or job "
 100         "script may be more convenient."
 101 #endif
 102         ;
 103
 104 } // namespace
 105
 106 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
 107                                                      const std::vector<int>& gpuIdsToUse,
 108                                                      const std::vector<int>& userGpuTaskAssignment,
 109                                                      const EmulateGpuNonbonded emulateGpuNonbonded,
 110                                                      const bool buildSupportsNonbondedOnGpu,
 111                                                      const bool nonbondedOnGpuIsUseful,
 112                                                      const int  numRanksPerSimulation)
 113 {
 114     // First, exclude all cases where we can't run NB on GPUs.
 115     if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
 116         || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
 117     {
 118         // If the user required NB on GPUs, we issue an error later.
 119         return false;
 120     }
 121
 122     // We now know that NB on GPUs makes sense, if we have any.
 123
 124     if (!userGpuTaskAssignment.empty())
 125     {
 126         // Specifying -gputasks requires specifying everything.
 127         if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
 128         {
 129             GMX_THROW(InconsistentInputError(
 130                     formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 131         }
 132         return true;
 133     }
 134
 135     if (nonbondedTarget == TaskTarget::Gpu)
 136     {
 137         return true;
 138     }
 139
 140     // Because this is thread-MPI, we already know about the GPUs that
 141     // all potential ranks can use, and can use that in a global
 142     // decision that will later be consistent.
 143     auto haveGpus = !gpuIdsToUse.empty();
 144
 145     // If we get here, then the user permitted or required GPUs.
 146     return haveGpus;
 147 }
 148
 149 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 150                                                const TaskTarget        pmeTarget,
 151                                                const std::vector<int>& gpuIdsToUse,
 152                                                const std::vector<int>& userGpuTaskAssignment,
 153                                                const gmx_hw_info_t&    hardwareInfo,
 154                                                const t_inputrec&       inputrec,
 155                                                const gmx_mtop_t&       mtop,
 156                                                const int               numRanksPerSimulation,
 157                                                const int               numPmeRanksPerSimulation)
 158 {
 159     // First, exclude all cases where we can't run PME on GPUs.
 160     if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
 161         || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
 162         || !pme_gpu_supports_input(inputrec, mtop, nullptr))
 163     {
 164         // PME can't run on a GPU. If the user required that, we issue
 165         // an error later.
 166         return false;
 167     }
 168
 169     // We now know that PME on GPUs might make sense, if we have any.
 170
 171     if (!userGpuTaskAssignment.empty())
 172     {
 173         // Follow the user's choice of GPU task assignment, if we
 174         // can. Checking that their IDs are for compatible GPUs comes
 175         // later.
 176
 177         // Specifying -gputasks requires specifying everything.
 178         if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
 179         {
 180             GMX_THROW(InconsistentInputError(
 181                     formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 182         }
 183
 184         // PME on GPUs is only supported in a single case
 185         if (pmeTarget == TaskTarget::Gpu)
 186         {
 187             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 188                 || (numPmeRanksPerSimulation > 1))
 189             {
 190                 GMX_THROW(InconsistentInputError(
 191                         "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
 192                         "file and use a single PME rank."));
 193             }
 194             return true;
 195         }
 196
 197         // pmeTarget == TaskTarget::Auto
 198         return numRanksPerSimulation == 1;
 199     }
 200
 201     // Because this is thread-MPI, we already know about the GPUs that
 202     // all potential ranks can use, and can use that in a global
 203     // decision that will later be consistent.
 204
 205     if (pmeTarget == TaskTarget::Gpu)
 206     {
 207         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 208             || (numPmeRanksPerSimulation > 1))
 209         {
 210             GMX_THROW(NotImplementedError(
 211                     "PME tasks were required to run on GPUs, but that is not implemented with "
 212                     "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 213                     "or permit PME tasks to be assigned to the CPU."));
 214         }
 215         return true;
 216     }
 217
 218     if (numRanksPerSimulation == 1)
 219     {
 220         // PME can run well on a GPU shared with NB, and we permit
 221         // mdrun to default to try that.
 222         return !gpuIdsToUse.empty();
 223     }
 224
 225     if (numRanksPerSimulation < 1)
 226     {
 227         // Full automated mode for thread-MPI (the default). PME can
 228         // run well on a GPU shared with NB, and we permit mdrun to
 229         // default to it if there is only one GPU available.
 230         return (gpuIdsToUse.size() == 1);
 231     }
 232
 233     // Not enough support for PME on GPUs for anything else
 234     return false;
 235 }
 236
 237 bool decideWhetherToUseGpusForNonbonded(const TaskTarget          nonbondedTarget,
 238                                         const std::vector<int>&   userGpuTaskAssignment,
 239                                         const EmulateGpuNonbonded emulateGpuNonbonded,
 240                                         const bool                buildSupportsNonbondedOnGpu,
 241                                         const bool                nonbondedOnGpuIsUseful,
 242                                         const bool                gpusWereDetected)
 243 {
 244     if (nonbondedTarget == TaskTarget::Cpu)
 245     {
 246         if (!userGpuTaskAssignment.empty())
 247         {
 248             GMX_THROW(InconsistentInputError(
 249                     "A GPU task assignment was specified, but nonbonded interactions were "
 250                     "assigned to the CPU. Make no more than one of these choices."));
 251         }
 252
 253         return false;
 254     }
 255
 256     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 257     {
 258         GMX_THROW(InconsistentInputError(
 259                 "Nonbonded interactions on the GPU were requested with -nb gpu, "
 260                 "but the GROMACS binary has been built without GPU support. "
 261                 "Either run without selecting GPU options, or recompile GROMACS "
 262                 "with GPU support enabled"));
 263     }
 264
 265     // TODO refactor all these TaskTarget::Gpu checks into one place?
 266     // e.g. use a subfunction that handles only the cases where
 267     // TaskTargets are not Cpu?
 268     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 269     {
 270         if (nonbondedTarget == TaskTarget::Gpu)
 271         {
 272             GMX_THROW(InconsistentInputError(
 273                     "Nonbonded interactions on the GPU were required, which is inconsistent "
 274                     "with choosing emulation. Make no more than one of these choices."));
 275         }
 276         if (!userGpuTaskAssignment.empty())
 277         {
 278             GMX_THROW(
 279                     InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
 280                                            "no more than one of these choices."));
 281         }
 282
 283         return false;
 284     }
 285
 286     if (!nonbondedOnGpuIsUseful)
 287     {
 288         if (nonbondedTarget == TaskTarget::Gpu)
 289         {
 290             GMX_THROW(InconsistentInputError(
 291                     "Nonbonded interactions on the GPU were required, but not supported for these "
 292                     "simulation settings. Change your settings, or do not require using GPUs."));
 293         }
 294
 295         return false;
 296     }
 297
 298     if (!userGpuTaskAssignment.empty())
 299     {
 300         // Specifying -gputasks requires specifying everything.
 301         if (nonbondedTarget == TaskTarget::Auto)
 302         {
 303             GMX_THROW(InconsistentInputError(
 304                     formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 305         }
 306
 307         return true;
 308     }
 309
 310     if (nonbondedTarget == TaskTarget::Gpu)
 311     {
 312         // We still don't know whether it is an error if no GPUs are found
 313         // because we don't know the duty of this rank, yet. For example,
 314         // a node with only PME ranks and -pme cpu is OK if there are not
 315         // GPUs.
 316         return true;
 317     }
 318
 319     // If we get here, then the user permitted GPUs, which we should
 320     // use for nonbonded interactions.
 321     return gpusWereDetected;
 322 }
 323
 324 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 325                                   const TaskTarget        pmeTarget,
 326                                   const std::vector<int>& userGpuTaskAssignment,
 327                                   const gmx_hw_info_t&    hardwareInfo,
 328                                   const t_inputrec&       inputrec,
 329                                   const gmx_mtop_t&       mtop,
 330                                   const int               numRanksPerSimulation,
 331                                   const int               numPmeRanksPerSimulation,
 332                                   const bool              gpusWereDetected)
 333 {
 334     if (pmeTarget == TaskTarget::Cpu)
 335     {
 336         return false;
 337     }
 338
 339     if (!useGpuForNonbonded)
 340     {
 341         if (pmeTarget == TaskTarget::Gpu)
 342         {
 343             GMX_THROW(NotImplementedError(
 344                     "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
 345         }
 346         return false;
 347     }
 348
 349     std::string message;
 350     if (!pme_gpu_supports_build(&message))
 351     {
 352         if (pmeTarget == TaskTarget::Gpu)
 353         {
 354             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 355         }
 356         return false;
 357     }
 358     if (!pme_gpu_supports_hardware(hardwareInfo, &message))
 359     {
 360         if (pmeTarget == TaskTarget::Gpu)
 361         {
 362             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 363         }
 364         return false;
 365     }
 366     if (!pme_gpu_supports_input(inputrec, mtop, &message))
 367     {
 368         if (pmeTarget == TaskTarget::Gpu)
 369         {
 370             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 371         }
 372         return false;
 373     }
 374
 375     if (pmeTarget == TaskTarget::Cpu)
 376     {
 377         if (!userGpuTaskAssignment.empty())
 378         {
 379             GMX_THROW(InconsistentInputError(
 380                     "A GPU task assignment was specified, but PME interactions were "
 381                     "assigned to the CPU. Make no more than one of these choices."));
 382         }
 383
 384         return false;
 385     }
 386
 387     if (!userGpuTaskAssignment.empty())
 388     {
 389         // Specifying -gputasks requires specifying everything.
 390         if (pmeTarget == TaskTarget::Auto)
 391         {
 392             GMX_THROW(InconsistentInputError(formatString(
 393                     g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
 394         }
 395
 396         return true;
 397     }
 398
 399     // We still don't know whether it is an error if no GPUs are found
 400     // because we don't know the duty of this rank, yet. For example,
 401     // a node with only PME ranks and -pme cpu is OK if there are not
 402     // GPUs.
 403
 404     if (pmeTarget == TaskTarget::Gpu)
 405     {
 406         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 407             || (numPmeRanksPerSimulation > 1))
 408         {
 409             GMX_THROW(NotImplementedError(
 410                     "PME tasks were required to run on GPUs, but that is not implemented with "
 411                     "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 412                     "or permit PME tasks to be assigned to the CPU."));
 413         }
 414         return true;
 415     }
 416
 417     // If we get here, then the user permitted GPUs.
 418     if (numRanksPerSimulation == 1)
 419     {
 420         // PME can run well on a single GPU shared with NB when there
 421         // is one rank, so we permit mdrun to try that if we have
 422         // detected GPUs.
 423         return gpusWereDetected;
 424     }
 425
 426     // Not enough support for PME on GPUs for anything else
 427     return false;
 428 }
 429
 430 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 431                                      const bool       useGpuForPme,
 432                                      const TaskTarget bondedTarget,
 433                                      const bool       canUseGpuForBonded,
 434                                      const bool       usingLJPme,
 435                                      const bool       usingElecPmeOrEwald,
 436                                      const int        numPmeRanksPerSimulation,
 437                                      const bool       gpusWereDetected)
 438 {
 439     if (bondedTarget == TaskTarget::Cpu)
 440     {
 441         return false;
 442     }
 443
 444     if (!canUseGpuForBonded)
 445     {
 446         if (bondedTarget == TaskTarget::Gpu)
 447         {
 448             GMX_THROW(InconsistentInputError(
 449                     "Bonded interactions on the GPU were required, but not supported for these "
 450                     "simulation settings. Change your settings, or do not require using GPUs."));
 451         }
 452
 453         return false;
 454     }
 455
 456     if (!useGpuForNonbonded)
 457     {
 458         if (bondedTarget == TaskTarget::Gpu)
 459         {
 460             GMX_THROW(InconsistentInputError(
 461                     "Bonded interactions on the GPU were required, but this requires that "
 462                     "short-ranged non-bonded interactions are also run on the GPU. Change "
 463                     "your settings, or do not require using GPUs."));
 464         }
 465
 466         return false;
 467     }
 468
 469     // TODO If the bonded kernels do not get fused, then performance
 470     // overheads might suggest alternative choices here.
 471
 472     if (bondedTarget == TaskTarget::Gpu)
 473     {
 474         // We still don't know whether it is an error if no GPUs are
 475         // found.
 476         return true;
 477     }
 478
 479     // If we get here, then the user permitted GPUs, which we should
 480     // use for bonded interactions if any were detected and the CPU
 481     // is busy, for which we currently only check PME or Ewald.
 482     // (It would be better to dynamically assign bondeds based on timings)
 483     // Note that here we assume that the auto setting of PME ranks will not
 484     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 485     bool usingOurCpuForPmeOrEwald =
 486             (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 487
 488     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 489 }
 490
 491 bool decideWhetherToUseGpuForUpdate(const bool        isDomainDecomposition,
 492                                     const bool        useGpuForPme,
 493                                     const bool        useGpuForNonbonded,
 494                                     const bool        gpuPmePpCommIsEnabled,
 495                                     const bool        gpuHaloExchangeIsEnabled,
 496                                     const TaskTarget  updateTarget,
 497                                     const bool        gpusWereDetected,
 498                                     const t_inputrec& inputrec,
 499                                     const gmx_mtop_t& mtop,
 500                                     const bool        useEssentialDynamics,
 501                                     const bool        doOrientationRestraints,
 502                                     const bool        useReplicaExchange)
 503 {
 504
 505     if (updateTarget == TaskTarget::Cpu)
 506     {
 507         return false;
 508     }
 509
 510     std::string errorMessage;
 511
 512     if (isDomainDecomposition && (!gpuPmePpCommIsEnabled || !gpuHaloExchangeIsEnabled))
 513     {
 514         errorMessage +=
 515                 "Domain decomposition is not supported without GPU halo exchange and GPU PME-PP "
 516                 "communication.\n";
 517     }
 518     // Using the GPU-version of update if:
 519     // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
 520     // 2. Non-bonded interactions are on the GPU.
 521     if (!(useGpuForPme || useGpuForNonbonded))
 522     {
 523         errorMessage +=
 524                 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
 525     }
 526     if (!gpusWereDetected)
 527     {
 528         errorMessage += "Compatible GPUs must have been found.\n";
 529     }
 530     if (GMX_GPU != GMX_GPU_CUDA)
 531     {
 532         errorMessage += "Only a CUDA build is supported.\n";
 533     }
 534     if (inputrec.eI != eiMD)
 535     {
 536         errorMessage += "Only the md integrator is supported.\n";
 537     }
 538     if (inputrec.etc == etcNOSEHOOVER)
 539     {
 540         errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
 541     }
 542     if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN || inputrec.epc == epcBERENDSEN))
 543     {
 544         errorMessage += "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
 545     }
 546     if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
 547     {
 548         // The graph is needed, but not supported
 549         errorMessage += "Ewald surface correction is not supported.\n";
 550     }
 551     if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
 552     {
 553         errorMessage += "Virtual sites are not supported.\n";
 554     }
 555     if (useEssentialDynamics)
 556     {
 557         errorMessage += "Essential dynamics is not supported.\n";
 558     }
 559     if (inputrec.bPull || inputrec.pull)
 560     {
 561         // Pull potentials are actually supported, but constraint pulling is not
 562         errorMessage += "Pulling is not supported.\n";
 563     }
 564     if (doOrientationRestraints)
 565     {
 566         // The graph is needed, but not supported
 567         errorMessage += "Orientation restraints are not supported.\n";
 568     }
 569     if (inputrec.efep != efepNO)
 570     {
 571         // Actually all free-energy options except for mass and constraint perturbation are supported
 572         errorMessage += "Free energy perturbations are not supported.\n";
 573     }
 574     if (useReplicaExchange)
 575     {
 576         errorMessage += "Replica exchange simulations are not supported.\n";
 577     }
 578     if (inputrec.eSwapCoords != eswapNO)
 579     {
 580         errorMessage += "Swapping the coordinates is not supported.\n";
 581     }
 582
 583     // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
 584     // does not support it, the actual CUDA LINCS code does support it
 585     if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
 586     {
 587         errorMessage += "Non-connecting constraints are not supported";
 588     }
 589     if (!UpdateConstrainCuda::isNumCoupledConstraintsSupported(mtop))
 590     {
 591         errorMessage +=
 592                 "The number of coupled constraints is higher than supported in the CUDA LINCS "
 593                 "code.\n";
 594     }
 595
 596     if (!errorMessage.empty())
 597     {
 598         if (updateTarget == TaskTarget::Gpu)
 599         {
 600             std::string prefix = gmx::formatString(
 601                     "Update task on the GPU was required,\n"
 602                     "but the following condition(s) were not satisfied:\n");
 603             GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
 604         }
 605         return false;
 606     }
 607
 608     return true;
 609 }
 610
 611 } // namespace gmx