src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/ewald/pme.h"
  55 #include "gromacs/hardware/cpuinfo.h"
  56 #include "gromacs/hardware/detecthardware.h"
  57 #include "gromacs/hardware/hardwaretopology.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdtypes/commrec.h"
  61 #include "gromacs/mdtypes/inputrec.h"
  62 #include "gromacs/mdtypes/md_enums.h"
  63 #include "gromacs/mdtypes/mdrunoptions.h"
  64 #include "gromacs/taskassignment/taskassignment.h"
  65 #include "gromacs/topology/topology.h"
  66 #include "gromacs/utility/baseversion.h"
  67 #include "gromacs/utility/exceptions.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/gmxassert.h"
  70 #include "gromacs/utility/logger.h"
  71 #include "gromacs/utility/stringutil.h"
  72
  73
  74 namespace gmx
  75 {
  76
  77 namespace
  78 {
  79
  80 //! Helper variable to localise the text of an often repeated message.
  81 const char* g_specifyEverythingFormatString =
  82         "When you use mdrun -gputasks, %s must be set to non-default "
  83         "values, so that the device IDs can be interpreted correctly."
  84 #if GMX_GPU != GMX_GPU_NONE
  85         " If you simply want to restrict which GPUs are used, then it is "
  86         "better to use mdrun -gpu_id. Otherwise, setting the "
  87 #    if GMX_GPU == GMX_GPU_CUDA
  88         "CUDA_VISIBLE_DEVICES"
  89 #    elif GMX_GPU == GMX_GPU_OPENCL
  90         // Technically there is no portable way to do this offered by the
  91         // OpenCL standard, but the only current relevant case for GROMACS
  92         // is AMD OpenCL, which offers this variable.
  93         "GPU_DEVICE_ORDINAL"
  94 #    else
  95 #        error "Unreachable branch"
  96 #    endif
  97         " environment variable in your bash profile or job "
  98         "script may be more convenient."
  99 #endif
 100         ;
 101
 102 } // namespace
 103
 104 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
 105                                                      const std::vector<int>& gpuIdsToUse,
 106                                                      const std::vector<int>& userGpuTaskAssignment,
 107                                                      const EmulateGpuNonbonded emulateGpuNonbonded,
 108                                                      const bool buildSupportsNonbondedOnGpu,
 109                                                      const bool nonbondedOnGpuIsUseful,
 110                                                      const int  numRanksPerSimulation)
 111 {
 112     // First, exclude all cases where we can't run NB on GPUs.
 113     if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
 114         || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
 115     {
 116         // If the user required NB on GPUs, we issue an error later.
 117         return false;
 118     }
 119
 120     // We now know that NB on GPUs makes sense, if we have any.
 121
 122     if (!userGpuTaskAssignment.empty())
 123     {
 124         // Specifying -gputasks requires specifying everything.
 125         if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
 126         {
 127             GMX_THROW(InconsistentInputError(
 128                     formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 129         }
 130         return true;
 131     }
 132
 133     if (nonbondedTarget == TaskTarget::Gpu)
 134     {
 135         return true;
 136     }
 137
 138     // Because this is thread-MPI, we already know about the GPUs that
 139     // all potential ranks can use, and can use that in a global
 140     // decision that will later be consistent.
 141     auto haveGpus = !gpuIdsToUse.empty();
 142
 143     // If we get here, then the user permitted or required GPUs.
 144     return haveGpus;
 145 }
 146
 147 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 148                                                const TaskTarget        pmeTarget,
 149                                                const std::vector<int>& gpuIdsToUse,
 150                                                const std::vector<int>& userGpuTaskAssignment,
 151                                                const gmx_hw_info_t&    hardwareInfo,
 152                                                const t_inputrec&       inputrec,
 153                                                const gmx_mtop_t&       mtop,
 154                                                const int               numRanksPerSimulation,
 155                                                const int               numPmeRanksPerSimulation)
 156 {
 157     // First, exclude all cases where we can't run PME on GPUs.
 158     if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
 159         || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
 160         || !pme_gpu_supports_input(inputrec, mtop, nullptr))
 161     {
 162         // PME can't run on a GPU. If the user required that, we issue
 163         // an error later.
 164         return false;
 165     }
 166
 167     // We now know that PME on GPUs might make sense, if we have any.
 168
 169     if (!userGpuTaskAssignment.empty())
 170     {
 171         // Follow the user's choice of GPU task assignment, if we
 172         // can. Checking that their IDs are for compatible GPUs comes
 173         // later.
 174
 175         // Specifying -gputasks requires specifying everything.
 176         if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
 177         {
 178             GMX_THROW(InconsistentInputError(
 179                     formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 180         }
 181
 182         // PME on GPUs is only supported in a single case
 183         if (pmeTarget == TaskTarget::Gpu)
 184         {
 185             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 186                 || (numPmeRanksPerSimulation > 1))
 187             {
 188                 GMX_THROW(InconsistentInputError(
 189                         "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
 190                         "file and use a single PME rank."));
 191             }
 192             return true;
 193         }
 194
 195         // pmeTarget == TaskTarget::Auto
 196         return numRanksPerSimulation == 1;
 197     }
 198
 199     // Because this is thread-MPI, we already know about the GPUs that
 200     // all potential ranks can use, and can use that in a global
 201     // decision that will later be consistent.
 202
 203     if (pmeTarget == TaskTarget::Gpu)
 204     {
 205         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 206             || (numPmeRanksPerSimulation > 1))
 207         {
 208             GMX_THROW(NotImplementedError(
 209                     "PME tasks were required to run on GPUs, but that is not implemented with "
 210                     "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 211                     "or permit PME tasks to be assigned to the CPU."));
 212         }
 213         return true;
 214     }
 215
 216     if (numRanksPerSimulation == 1)
 217     {
 218         // PME can run well on a GPU shared with NB, and we permit
 219         // mdrun to default to try that.
 220         return !gpuIdsToUse.empty();
 221     }
 222
 223     if (numRanksPerSimulation < 1)
 224     {
 225         // Full automated mode for thread-MPI (the default). PME can
 226         // run well on a GPU shared with NB, and we permit mdrun to
 227         // default to it if there is only one GPU available.
 228         return (gpuIdsToUse.size() == 1);
 229     }
 230
 231     // Not enough support for PME on GPUs for anything else
 232     return false;
 233 }
 234
 235 bool decideWhetherToUseGpusForNonbonded(const TaskTarget          nonbondedTarget,
 236                                         const std::vector<int>&   userGpuTaskAssignment,
 237                                         const EmulateGpuNonbonded emulateGpuNonbonded,
 238                                         const bool                buildSupportsNonbondedOnGpu,
 239                                         const bool                nonbondedOnGpuIsUseful,
 240                                         const bool                gpusWereDetected)
 241 {
 242     if (nonbondedTarget == TaskTarget::Cpu)
 243     {
 244         if (!userGpuTaskAssignment.empty())
 245         {
 246             GMX_THROW(InconsistentInputError(
 247                     "A GPU task assignment was specified, but nonbonded interactions were "
 248                     "assigned to the CPU. Make no more than one of these choices."));
 249         }
 250
 251         return false;
 252     }
 253
 254     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 255     {
 256         GMX_THROW(InconsistentInputError(
 257                 "Nonbonded interactions on the GPU were requested with -nb gpu, "
 258                 "but the GROMACS binary has been built without GPU support. "
 259                 "Either run without selecting GPU options, or recompile GROMACS "
 260                 "with GPU support enabled"));
 261     }
 262
 263     // TODO refactor all these TaskTarget::Gpu checks into one place?
 264     // e.g. use a subfunction that handles only the cases where
 265     // TaskTargets are not Cpu?
 266     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 267     {
 268         if (nonbondedTarget == TaskTarget::Gpu)
 269         {
 270             GMX_THROW(InconsistentInputError(
 271                     "Nonbonded interactions on the GPU were required, which is inconsistent "
 272                     "with choosing emulation. Make no more than one of these choices."));
 273         }
 274         if (!userGpuTaskAssignment.empty())
 275         {
 276             GMX_THROW(
 277                     InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
 278                                            "no more than one of these choices."));
 279         }
 280
 281         return false;
 282     }
 283
 284     if (!nonbondedOnGpuIsUseful)
 285     {
 286         if (nonbondedTarget == TaskTarget::Gpu)
 287         {
 288             GMX_THROW(InconsistentInputError(
 289                     "Nonbonded interactions on the GPU were required, but not supported for these "
 290                     "simulation settings. Change your settings, or do not require using GPUs."));
 291         }
 292
 293         return false;
 294     }
 295
 296     if (!userGpuTaskAssignment.empty())
 297     {
 298         // Specifying -gputasks requires specifying everything.
 299         if (nonbondedTarget == TaskTarget::Auto)
 300         {
 301             GMX_THROW(InconsistentInputError(
 302                     formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 303         }
 304
 305         return true;
 306     }
 307
 308     if (nonbondedTarget == TaskTarget::Gpu)
 309     {
 310         // We still don't know whether it is an error if no GPUs are found
 311         // because we don't know the duty of this rank, yet. For example,
 312         // a node with only PME ranks and -pme cpu is OK if there are not
 313         // GPUs.
 314         return true;
 315     }
 316
 317     // If we get here, then the user permitted GPUs, which we should
 318     // use for nonbonded interactions.
 319     return gpusWereDetected;
 320 }
 321
 322 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 323                                   const TaskTarget        pmeTarget,
 324                                   const std::vector<int>& userGpuTaskAssignment,
 325                                   const gmx_hw_info_t&    hardwareInfo,
 326                                   const t_inputrec&       inputrec,
 327                                   const gmx_mtop_t&       mtop,
 328                                   const int               numRanksPerSimulation,
 329                                   const int               numPmeRanksPerSimulation,
 330                                   const bool              gpusWereDetected)
 331 {
 332     if (pmeTarget == TaskTarget::Cpu)
 333     {
 334         return false;
 335     }
 336
 337     if (!useGpuForNonbonded)
 338     {
 339         if (pmeTarget == TaskTarget::Gpu)
 340         {
 341             GMX_THROW(NotImplementedError(
 342                     "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
 343         }
 344         return false;
 345     }
 346
 347     std::string message;
 348     if (!pme_gpu_supports_build(&message))
 349     {
 350         if (pmeTarget == TaskTarget::Gpu)
 351         {
 352             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 353         }
 354         return false;
 355     }
 356     if (!pme_gpu_supports_hardware(hardwareInfo, &message))
 357     {
 358         if (pmeTarget == TaskTarget::Gpu)
 359         {
 360             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 361         }
 362         return false;
 363     }
 364     if (!pme_gpu_supports_input(inputrec, mtop, &message))
 365     {
 366         if (pmeTarget == TaskTarget::Gpu)
 367         {
 368             GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message));
 369         }
 370         return false;
 371     }
 372
 373     if (pmeTarget == TaskTarget::Cpu)
 374     {
 375         if (!userGpuTaskAssignment.empty())
 376         {
 377             GMX_THROW(InconsistentInputError(
 378                     "A GPU task assignment was specified, but PME interactions were "
 379                     "assigned to the CPU. Make no more than one of these choices."));
 380         }
 381
 382         return false;
 383     }
 384
 385     if (!userGpuTaskAssignment.empty())
 386     {
 387         // Specifying -gputasks requires specifying everything.
 388         if (pmeTarget == TaskTarget::Auto)
 389         {
 390             GMX_THROW(InconsistentInputError(formatString(
 391                     g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
 392         }
 393
 394         return true;
 395     }
 396
 397     // We still don't know whether it is an error if no GPUs are found
 398     // because we don't know the duty of this rank, yet. For example,
 399     // a node with only PME ranks and -pme cpu is OK if there are not
 400     // GPUs.
 401
 402     if (pmeTarget == TaskTarget::Gpu)
 403     {
 404         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
 405             || (numPmeRanksPerSimulation > 1))
 406         {
 407             GMX_THROW(NotImplementedError(
 408                     "PME tasks were required to run on GPUs, but that is not implemented with "
 409                     "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 410                     "or permit PME tasks to be assigned to the CPU."));
 411         }
 412         return true;
 413     }
 414
 415     // If we get here, then the user permitted GPUs.
 416     if (numRanksPerSimulation == 1)
 417     {
 418         // PME can run well on a single GPU shared with NB when there
 419         // is one rank, so we permit mdrun to try that if we have
 420         // detected GPUs.
 421         return gpusWereDetected;
 422     }
 423
 424     // Not enough support for PME on GPUs for anything else
 425     return false;
 426 }
 427
 428 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 429                                      const bool       useGpuForPme,
 430                                      const TaskTarget bondedTarget,
 431                                      const bool       canUseGpuForBonded,
 432                                      const bool       usingLJPme,
 433                                      const bool       usingElecPmeOrEwald,
 434                                      const int        numPmeRanksPerSimulation,
 435                                      const bool       gpusWereDetected)
 436 {
 437     if (bondedTarget == TaskTarget::Cpu)
 438     {
 439         return false;
 440     }
 441
 442     if (!canUseGpuForBonded)
 443     {
 444         if (bondedTarget == TaskTarget::Gpu)
 445         {
 446             GMX_THROW(InconsistentInputError(
 447                     "Bonded interactions on the GPU were required, but not supported for these "
 448                     "simulation settings. Change your settings, or do not require using GPUs."));
 449         }
 450
 451         return false;
 452     }
 453
 454     if (!useGpuForNonbonded)
 455     {
 456         if (bondedTarget == TaskTarget::Gpu)
 457         {
 458             GMX_THROW(InconsistentInputError(
 459                     "Bonded interactions on the GPU were required, but this requires that "
 460                     "short-ranged non-bonded interactions are also run on the GPU. Change "
 461                     "your settings, or do not require using GPUs."));
 462         }
 463
 464         return false;
 465     }
 466
 467     // TODO If the bonded kernels do not get fused, then performance
 468     // overheads might suggest alternative choices here.
 469
 470     if (bondedTarget == TaskTarget::Gpu)
 471     {
 472         // We still don't know whether it is an error if no GPUs are
 473         // found.
 474         return true;
 475     }
 476
 477     // If we get here, then the user permitted GPUs, which we should
 478     // use for bonded interactions if any were detected and the CPU
 479     // is busy, for which we currently only check PME or Ewald.
 480     // (It would be better to dynamically assign bondeds based on timings)
 481     // Note that here we assume that the auto setting of PME ranks will not
 482     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 483     bool usingOurCpuForPmeOrEwald =
 484             (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 485
 486     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 487 }
 488
 489 bool decideWhetherToUseGpuForUpdate(const bool        forceGpuUpdateDefaultOn,
 490                                     const bool        isDomainDecomposition,
 491                                     const bool        useGpuForPme,
 492                                     const bool        useGpuForNonbonded,
 493                                     const TaskTarget  updateTarget,
 494                                     const bool        gpusWereDetected,
 495                                     const t_inputrec& inputrec,
 496                                     const bool        haveVSites,
 497                                     const bool        useEssentialDynamics,
 498                                     const bool        doOrientationRestraints,
 499                                     const bool        useReplicaExchange)
 500 {
 501
 502     if (updateTarget == TaskTarget::Cpu)
 503     {
 504         return false;
 505     }
 506
 507     std::string errorMessage;
 508
 509     if (isDomainDecomposition)
 510     {
 511         errorMessage += "Domain decomposition is not supported.\n";
 512     }
 513     // Using the GPU-version of update if:
 514     // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
 515     // 2. Non-bonded interactions are on the GPU.
 516     if (!(useGpuForPme || useGpuForNonbonded))
 517     {
 518         errorMessage +=
 519                 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
 520     }
 521     if (!gpusWereDetected)
 522     {
 523         errorMessage += "Compatible GPUs must have been found.\n";
 524     }
 525     if (GMX_GPU != GMX_GPU_CUDA)
 526     {
 527         errorMessage += "Only a CUDA build is supported.\n";
 528     }
 529     if (inputrec.eI != eiMD)
 530     {
 531         errorMessage += "Only the md integrator is supported.\n";
 532     }
 533     if (inputrec.etc == etcNOSEHOOVER)
 534     {
 535         errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
 536     }
 537     if (inputrec.epc != epcNO)
 538     {
 539         // Coordinate D2H and H2d are missing as well as PBC reinitialization
 540         errorMessage += "Pressure coupling is not supported.\n";
 541     }
 542     if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
 543     {
 544         // The graph is needed, but not supported
 545         errorMessage += "Ewald surface correction is not supported.\n";
 546     }
 547     if (haveVSites)
 548     {
 549         errorMessage += "Virtual sites are not supported.\n";
 550     }
 551     if (useEssentialDynamics)
 552     {
 553         errorMessage += "Essential dynamics is not supported.\n";
 554     }
 555     if (inputrec.bPull || inputrec.pull)
 556     {
 557         // Pull potentials are actually supported, but constraint pulling is not
 558         errorMessage += "Pulling is not supported.\n";
 559     }
 560     if (doOrientationRestraints)
 561     {
 562         // The graph is needed, but not supported
 563         errorMessage += "Orientation restraints are not supported.\n";
 564     }
 565     if (inputrec.efep != efepNO)
 566     {
 567         // Actually all free-energy options except for mass and constraint perturbation are supported
 568         errorMessage += "Free energy perturbations are not supported.\n";
 569     }
 570     if (useReplicaExchange)
 571     {
 572         errorMessage += "Replica exchange simulations are not supported.\n";
 573     }
 574     if (inputrec.eSwapCoords != eswapNO)
 575     {
 576         errorMessage += "Swapping the coordinates is not supported.\n";
 577     }
 578
 579     // \todo Check for coupled constraint block size restriction needs to be added
 580     //       when update auto chooses GPU in some cases. Currently exceeding the restriction
 581     //       triggers a fatal error during LINCS setup.
 582
 583     if (!errorMessage.empty())
 584     {
 585         if (updateTarget == TaskTarget::Gpu)
 586         {
 587             std::string prefix = gmx::formatString(
 588                     "Update task on the GPU was required,\n"
 589                     "but the following condition(s) were not satisfied:\n");
 590             GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
 591         }
 592         return false;
 593     }
 594
 595     return ((forceGpuUpdateDefaultOn && updateTarget == TaskTarget::Auto)
 596             || (updateTarget == TaskTarget::Gpu));
 597 }
 598
 599 } // namespace gmx