src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/ewald/pme.h"
  55 #include "gromacs/hardware/cpuinfo.h"
  56 #include "gromacs/hardware/detecthardware.h"
  57 #include "gromacs/hardware/hardwaretopology.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdlib/update_constrain_cuda.h"
  61 #include "gromacs/mdtypes/commrec.h"
  62 #include "gromacs/mdtypes/inputrec.h"
  63 #include "gromacs/mdtypes/md_enums.h"
  64 #include "gromacs/mdtypes/mdrunoptions.h"
  65 #include "gromacs/pulling/pull.h"
  66 #include "gromacs/taskassignment/taskassignment.h"
  67 #include "gromacs/topology/mtop_util.h"
  68 #include "gromacs/topology/topology.h"
  69 #include "gromacs/utility/baseversion.h"
  70 #include "gromacs/utility/exceptions.h"
  71 #include "gromacs/utility/fatalerror.h"
  72 #include "gromacs/utility/gmxassert.h"
  73 #include "gromacs/utility/logger.h"
  74 #include "gromacs/utility/stringutil.h"
  75
  76
  77 namespace gmx
  78 {
  79
  80 namespace
  81 {
  82
  83 //! Helper variable to localise the text of an often repeated message.
  84 const char* g_specifyEverythingFormatString =
  85         "When you use mdrun -gputasks, %s must be set to non-default "
  86         "values, so that the device IDs can be interpreted correctly."
  87 #if GMX_GPU != GMX_GPU_NONE
  88         " If you simply want to restrict which GPUs are used, then it is "
  89         "better to use mdrun -gpu_id. Otherwise, setting the "
  90 #    if GMX_GPU == GMX_GPU_CUDA
  91         "CUDA_VISIBLE_DEVICES"
  92 #    elif GMX_GPU == GMX_GPU_OPENCL
  93         // Technically there is no portable way to do this offered by the
  94         // OpenCL standard, but the only current relevant case for GROMACS
  95         // is AMD OpenCL, which offers this variable.
  96         "GPU_DEVICE_ORDINAL"
  97 #    else
  98 #        error "Unreachable branch"
  99 #    endif
 100         " environment variable in your bash profile or job "
 101         "script may be more convenient."
 102 #endif
 103         ;
 104
 105 } // namespace
 106
 107 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
 108                                                      const std::vector<int>& gpuIdsToUse,
 109                                                      const std::vector<int>& userGpuTaskAssignment,
 110                                                      const EmulateGpuNonbonded emulateGpuNonbonded,
 111                                                      const bool buildSupportsNonbondedOnGpu,
 112                                                      const bool nonbondedOnGpuIsUseful,
 113                                                      const int  numRanksPerSimulation)
 114 {
 115     // First, exclude all cases where we can't run NB on GPUs.
 116     if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
 117         || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
 118     {
 119         // If the user required NB on GPUs, we issue an error later.
 120         return false;
 121     }
 122
 123     // We now know that NB on GPUs makes sense, if we have any.
 124
 125     if (!userGpuTaskAssignment.empty())
 126     {
 127         // Specifying -gputasks requires specifying everything.
 128         if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
 129         {
 130             GMX_THROW(InconsistentInputError(
 131                     formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 132         }
 133         return true;
 134     }
 135
 136     if (nonbondedTarget == TaskTarget::Gpu)
 137     {
 138         return true;
 139     }
 140
 141     // Because this is thread-MPI, we already know about the GPUs that
 142     // all potential ranks can use, and can use that in a global
 143     // decision that will later be consistent.
 144     auto haveGpus = !gpuIdsToUse.empty();
 145
 146     // If we get here, then the user permitted or required GPUs.
 147     return haveGpus;
 148 }
 149
 150 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 151                                                const TaskTarget        pmeTarget,
 152                                                const std::vector<int>& gpuIdsToUse,
 153                                                const std::vector<int>& userGpuTaskAssignment,
 154                                                const gmx_hw_info_t&    hardwareInfo,
 155                                                const t_inputrec&       inputrec,
 156                                                const gmx_mtop_t&       mtop,
 157                                                const int               numRanksPerSimulation,
 158                                                const int               numPmeRanksPerSimulation)
 159 {
 160     // First, exclude all cases where we can't run PME on GPUs.
 161     if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
 162         || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
 163         || !pme_gpu_supports_input(inputrec, mtop, nullptr))
 164     {
 165         // PME can't run on a GPU. If the user required that, we issue
 166         // an error later.
 167         return false;
 168     }
 169
 170     // We now know that PME on GPUs might make sense, if we have any.
 171
 172     if (!userGpuTaskAssignment.empty())
 173     {
 174         // Follow the user's choice of GPU task assignment, if we
 175         // can. Checking that their IDs are for compatible GPUs comes
 176         // later.
 177
 178         // Specifying -gputasks requires specifying everything.
 179         if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
 180         {
 181             GMX_THROW(InconsistentInputError(
 182                     formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 183         }
 184
 185         // PME on GPUs is only supported in a single case
 186         if (pmeTarget == TaskTarget::Gpu)
 187         {
 188             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 189                 || (numPmeRanksPerSimulation > 1))
 190             {
 191                 GMX_THROW(InconsistentInputError(
 192                         "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
 193                         "file and use a single PME rank."));
 194             }
 195             return true;
 196         }
 197
 198         // pmeTarget == TaskTarget::Auto
 199         return numRanksPerSimulation == 1;
 200     }
 201
 202     // Because this is thread-MPI, we already know about the GPUs that
 203     // all potential ranks can use, and can use that in a global
 204     // decision that will later be consistent.
 205
 206     if (pmeTarget == TaskTarget::Gpu)
 207     {
 208         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 209             || (numPmeRanksPerSimulation > 1))
 210         {
 211             GMX_THROW(NotImplementedError(
 212                     "PME tasks were required to run on GPUs, but that is not implemented with "
 213                     "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 214                     "or permit PME tasks to be assigned to the CPU."));
 215         }
 216         return true;
 217     }
 218
 219     if (numRanksPerSimulation == 1)
 220     {
 221         // PME can run well on a GPU shared with NB, and we permit
 222         // mdrun to default to try that.
 223         return !gpuIdsToUse.empty();
 224     }
 225
 226     if (numRanksPerSimulation < 1)
 227     {
 228         // Full automated mode for thread-MPI (the default). PME can
 229         // run well on a GPU shared with NB, and we permit mdrun to
 230         // default to it if there is only one GPU available.
 231         return (gpuIdsToUse.size() == 1);
 232     }
 233
 234     // Not enough support for PME on GPUs for anything else
 235     return false;
 236 }
 237
 238 bool decideWhetherToUseGpusForNonbonded(const TaskTarget          nonbondedTarget,
 239                                         const std::vector<int>&   userGpuTaskAssignment,
 240                                         const EmulateGpuNonbonded emulateGpuNonbonded,
 241                                         const bool                buildSupportsNonbondedOnGpu,
 242                                         const bool                nonbondedOnGpuIsUseful,
 243                                         const bool                gpusWereDetected)
 244 {
 245     if (nonbondedTarget == TaskTarget::Cpu)
 246     {
 247         if (!userGpuTaskAssignment.empty())
 248         {
 249             GMX_THROW(InconsistentInputError(
 250                     "A GPU task assignment was specified, but nonbonded interactions were "
 251                     "assigned to the CPU. Make no more than one of these choices."));
 252         }
 253
 254         return false;
 255     }
 256
 257     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 258     {
 259         GMX_THROW(InconsistentInputError(
 260                 "Nonbonded interactions on the GPU were requested with -nb gpu, "
 261                 "but the GROMACS binary has been built without GPU support. "
 262                 "Either run without selecting GPU options, or recompile GROMACS "
 263                 "with GPU support enabled"));
 264     }
 265
 266     // TODO refactor all these TaskTarget::Gpu checks into one place?
 267     // e.g. use a subfunction that handles only the cases where
 268     // TaskTargets are not Cpu?
 269     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 270     {
 271         if (nonbondedTarget == TaskTarget::Gpu)
 272         {
 273             GMX_THROW(InconsistentInputError(
 274                     "Nonbonded interactions on the GPU were required, which is inconsistent "
 275                     "with choosing emulation. Make no more than one of these choices."));
 276         }
 277         if (!userGpuTaskAssignment.empty())
 278         {
 279             GMX_THROW(
 280                     InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
 281                                            "no more than one of these choices."));
 282         }
 283
 284         return false;
 285     }
 286
 287     if (!nonbondedOnGpuIsUseful)
 288     {
 289         if (nonbondedTarget == TaskTarget::Gpu)
 290         {
 291             GMX_THROW(InconsistentInputError(
 292                     "Nonbonded interactions on the GPU were required, but not supported for these "
 293                     "simulation settings. Change your settings, or do not require using GPUs."));
 294         }
 295
 296         return false;
 297     }
 298
 299     if (!userGpuTaskAssignment.empty())
 300     {
 301         // Specifying -gputasks requires specifying everything.
 302         if (nonbondedTarget == TaskTarget::Auto)
 303         {
 304             GMX_THROW(InconsistentInputError(
 305                     formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 306         }
 307
 308         return true;
 309     }
 310
 311     if (nonbondedTarget == TaskTarget::Gpu)
 312     {
 313         // We still don't know whether it is an error if no GPUs are found
 314         // because we don't know the duty of this rank, yet. For example,
 315         // a node with only PME ranks and -pme cpu is OK if there are not
 316         // GPUs.
 317         return true;
 318     }
 319
 320     // If we get here, then the user permitted GPUs, which we should
 321     // use for nonbonded interactions.
 322     return gpusWereDetected;
 323 }
 324
 325 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 326                                   const TaskTarget        pmeTarget,
 327                                   const std::vector<int>& userGpuTaskAssignment,
 328                                   const gmx_hw_info_t&    hardwareInfo,
 329                                   const t_inputrec&       inputrec,
 330                                   const gmx_mtop_t&       mtop,
 331                                   const int               numRanksPerSimulation,
 332                                   const int               numPmeRanksPerSimulation,
 333                                   const bool              gpusWereDetected)
 334 {
 335     if (pmeTarget == TaskTarget::Cpu)
 336     {
 337         return false;
 338     }
 339
 340     if (!useGpuForNonbonded)
 341     {
 342         if (pmeTarget == TaskTarget::Gpu)
 343         {
 344             GMX_THROW(NotImplementedError(
 345                     "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
 346         }
 347         return false;
 348     }
 349
 350     std::string message;
 351     if (!pme_gpu_supports_build(&message))
 352     {
 353         if (pmeTarget == TaskTarget::Gpu)
 354         {
 355             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 356         }
 357         return false;
 358     }
 359     if (!pme_gpu_supports_hardware(hardwareInfo, &message))
 360     {
 361         if (pmeTarget == TaskTarget::Gpu)
 362         {
 363             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 364         }
 365         return false;
 366     }
 367     if (!pme_gpu_supports_input(inputrec, mtop, &message))
 368     {
 369         if (pmeTarget == TaskTarget::Gpu)
 370         {
 371             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 372         }
 373         return false;
 374     }
 375
 376     if (pmeTarget == TaskTarget::Cpu)
 377     {
 378         if (!userGpuTaskAssignment.empty())
 379         {
 380             GMX_THROW(InconsistentInputError(
 381                     "A GPU task assignment was specified, but PME interactions were "
 382                     "assigned to the CPU. Make no more than one of these choices."));
 383         }
 384
 385         return false;
 386     }
 387
 388     if (!userGpuTaskAssignment.empty())
 389     {
 390         // Specifying -gputasks requires specifying everything.
 391         if (pmeTarget == TaskTarget::Auto)
 392         {
 393             GMX_THROW(InconsistentInputError(formatString(
 394                     g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
 395         }
 396
 397         return true;
 398     }
 399
 400     // We still don't know whether it is an error if no GPUs are found
 401     // because we don't know the duty of this rank, yet. For example,
 402     // a node with only PME ranks and -pme cpu is OK if there are not
 403     // GPUs.
 404
 405     if (pmeTarget == TaskTarget::Gpu)
 406     {
 407         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 408             || (numPmeRanksPerSimulation > 1))
 409         {
 410             GMX_THROW(NotImplementedError(
 411                     "PME tasks were required to run on GPUs, but that is not implemented with "
 412                     "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 413                     "or permit PME tasks to be assigned to the CPU."));
 414         }
 415         return true;
 416     }
 417
 418     // If we get here, then the user permitted GPUs.
 419     if (numRanksPerSimulation == 1)
 420     {
 421         // PME can run well on a single GPU shared with NB when there
 422         // is one rank, so we permit mdrun to try that if we have
 423         // detected GPUs.
 424         return gpusWereDetected;
 425     }
 426
 427     // Not enough support for PME on GPUs for anything else
 428     return false;
 429 }
 430
 431 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 432                                      const bool       useGpuForPme,
 433                                      const TaskTarget bondedTarget,
 434                                      const bool       canUseGpuForBonded,
 435                                      const bool       usingLJPme,
 436                                      const bool       usingElecPmeOrEwald,
 437                                      const int        numPmeRanksPerSimulation,
 438                                      const bool       gpusWereDetected)
 439 {
 440     if (bondedTarget == TaskTarget::Cpu)
 441     {
 442         return false;
 443     }
 444
 445     if (!canUseGpuForBonded)
 446     {
 447         if (bondedTarget == TaskTarget::Gpu)
 448         {
 449             GMX_THROW(InconsistentInputError(
 450                     "Bonded interactions on the GPU were required, but not supported for these "
 451                     "simulation settings. Change your settings, or do not require using GPUs."));
 452         }
 453
 454         return false;
 455     }
 456
 457     if (!useGpuForNonbonded)
 458     {
 459         if (bondedTarget == TaskTarget::Gpu)
 460         {
 461             GMX_THROW(InconsistentInputError(
 462                     "Bonded interactions on the GPU were required, but this requires that "
 463                     "short-ranged non-bonded interactions are also run on the GPU. Change "
 464                     "your settings, or do not require using GPUs."));
 465         }
 466
 467         return false;
 468     }
 469
 470     // TODO If the bonded kernels do not get fused, then performance
 471     // overheads might suggest alternative choices here.
 472
 473     if (bondedTarget == TaskTarget::Gpu)
 474     {
 475         // We still don't know whether it is an error if no GPUs are
 476         // found.
 477         return true;
 478     }
 479
 480     // If we get here, then the user permitted GPUs, which we should
 481     // use for bonded interactions if any were detected and the CPU
 482     // is busy, for which we currently only check PME or Ewald.
 483     // (It would be better to dynamically assign bondeds based on timings)
 484     // Note that here we assume that the auto setting of PME ranks will not
 485     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 486     bool usingOurCpuForPmeOrEwald =
 487             (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 488
 489     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 490 }
 491
 492 bool decideWhetherToUseGpuForUpdate(const bool        isDomainDecomposition,
 493                                     const bool        useGpuForPme,
 494                                     const bool        useGpuForNonbonded,
 495                                     const TaskTarget  updateTarget,
 496                                     const bool        gpusWereDetected,
 497                                     const t_inputrec& inputrec,
 498                                     const gmx_mtop_t& mtop,
 499                                     const bool        useEssentialDynamics,
 500                                     const bool        doOrientationRestraints,
 501                                     const bool        useReplicaExchange,
 502                                     const bool        doRerun)
 503 {
 504
 505     if (updateTarget == TaskTarget::Cpu)
 506     {
 507         return false;
 508     }
 509
 510     std::string errorMessage;
 511
 512     if (isDomainDecomposition)
 513     {
 514         errorMessage += "Domain decomposition is not supported.\n";
 515     }
 516     // Using the GPU-version of update if:
 517     // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
 518     // 2. Non-bonded interactions are on the GPU.
 519     if (!(useGpuForPme || useGpuForNonbonded))
 520     {
 521         errorMessage +=
 522                 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
 523     }
 524     if (!gpusWereDetected)
 525     {
 526         errorMessage += "Compatible GPUs must have been found.\n";
 527     }
 528     if (GMX_GPU != GMX_GPU_CUDA)
 529     {
 530         errorMessage += "Only a CUDA build is supported.\n";
 531     }
 532     if (inputrec.eI != eiMD)
 533     {
 534         errorMessage += "Only the md integrator is supported.\n";
 535     }
 536     if (inputrec.etc == etcNOSEHOOVER)
 537     {
 538         errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
 539     }
 540     if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN || inputrec.epc == epcBERENDSEN))
 541     {
 542         errorMessage += "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
 543     }
 544     if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
 545     {
 546         // The graph is needed, but not supported
 547         errorMessage += "Ewald surface correction is not supported.\n";
 548     }
 549     if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
 550     {
 551         errorMessage += "Virtual sites are not supported.\n";
 552     }
 553     if (useEssentialDynamics)
 554     {
 555         errorMessage += "Essential dynamics is not supported.\n";
 556     }
 557     if (inputrec.bPull && pull_have_constraint(inputrec.pull))
 558     {
 559         errorMessage += "Constraints pulling is not supported.\n";
 560     }
 561     if (doOrientationRestraints)
 562     {
 563         // The graph is needed, but not supported
 564         errorMessage += "Orientation restraints are not supported.\n";
 565     }
 566     if (inputrec.efep != efepNO)
 567     {
 568         // Actually all free-energy options except for mass and constraint perturbation are supported
 569         errorMessage += "Free energy perturbations are not supported.\n";
 570     }
 571     if (useReplicaExchange)
 572     {
 573         errorMessage += "Replica exchange simulations are not supported.\n";
 574     }
 575     if (inputrec.eSwapCoords != eswapNO)
 576     {
 577         errorMessage += "Swapping the coordinates is not supported.\n";
 578     }
 579     if (doRerun)
 580     {
 581         errorMessage += "Re-run is not supported.\n";
 582     }
 583
 584     // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
 585     // does not support it, the actual CUDA LINCS code does support it
 586     if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
 587     {
 588         errorMessage += "Non-connecting constraints are not supported";
 589     }
 590     if (!UpdateConstrainCuda::isNumCoupledConstraintsSupported(mtop))
 591     {
 592         errorMessage +=
 593                 "The number of coupled constraints is higher than supported in the CUDA LINCS "
 594                 "code.\n";
 595     }
 596
 597     if (!errorMessage.empty())
 598     {
 599         if (updateTarget == TaskTarget::Gpu)
 600         {
 601             std::string prefix = gmx::formatString(
 602                     "Update task on the GPU was required,\n"
 603                     "but the following condition(s) were not satisfied:\n");
 604             GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
 605         }
 606         return false;
 607     }
 608
 609     return true;
 610 }
 611
 612 } // namespace gmx