src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/ewald/pme.h"
  55 #include "gromacs/hardware/cpuinfo.h"
  56 #include "gromacs/hardware/detecthardware.h"
  57 #include "gromacs/hardware/hardwaretopology.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdlib/mdatoms.h"
  61 #include "gromacs/mdtypes/commrec.h"
  62 #include "gromacs/mdtypes/inputrec.h"
  63 #include "gromacs/mdtypes/md_enums.h"
  64 #include "gromacs/mdtypes/mdrunoptions.h"
  65 #include "gromacs/taskassignment/taskassignment.h"
  66 #include "gromacs/topology/topology.h"
  67 #include "gromacs/utility/baseversion.h"
  68 #include "gromacs/utility/exceptions.h"
  69 #include "gromacs/utility/fatalerror.h"
  70 #include "gromacs/utility/gmxassert.h"
  71 #include "gromacs/utility/logger.h"
  72 #include "gromacs/utility/stringutil.h"
  73
  74
  75 namespace gmx
  76 {
  77
  78 namespace
  79 {
  80
  81 //! Helper variable to localise the text of an often repeated message.
  82 const char * g_specifyEverythingFormatString =
  83     "When you use mdrun -gputasks, %s must be set to non-default "
  84     "values, so that the device IDs can be interpreted correctly."
  85 #if GMX_GPU != GMX_GPU_NONE
  86     " If you simply want to restrict which GPUs are used, then it is "
  87     "better to use mdrun -gpu_id. Otherwise, setting the "
  88 #  if GMX_GPU == GMX_GPU_CUDA
  89     "CUDA_VISIBLE_DEVICES"
  90 #  elif GMX_GPU == GMX_GPU_OPENCL
  91     // Technically there is no portable way to do this offered by the
  92     // OpenCL standard, but the only current relevant case for GROMACS
  93     // is AMD OpenCL, which offers this variable.
  94     "GPU_DEVICE_ORDINAL"
  95 #  else
  96 #  error "Unreachable branch"
  97 #  endif
  98     " environment variable in your bash profile or job "
  99     "script may be more convenient."
 100 #endif
 101 ;
 102
 103 }   // namespace
 104
 105 bool
 106 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbondedTarget,
 107                                                 const std::vector<int>   &gpuIdsToUse,
 108                                                 const std::vector<int>   &userGpuTaskAssignment,
 109                                                 const EmulateGpuNonbonded emulateGpuNonbonded,
 110                                                 const bool                buildSupportsNonbondedOnGpu,
 111                                                 const bool                nonbondedOnGpuIsUseful,
 112                                                 const int                 numRanksPerSimulation)
 113 {
 114     // First, exclude all cases where we can't run NB on GPUs.
 115     if (nonbondedTarget == TaskTarget::Cpu ||
 116         emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
 117         !nonbondedOnGpuIsUseful ||
 118         !buildSupportsNonbondedOnGpu)
 119     {
 120         // If the user required NB on GPUs, we issue an error later.
 121         return false;
 122     }
 123
 124     // We now know that NB on GPUs makes sense, if we have any.
 125
 126     if (!userGpuTaskAssignment.empty())
 127     {
 128         // Specifying -gputasks requires specifying everything.
 129         if (nonbondedTarget == TaskTarget::Auto ||
 130             numRanksPerSimulation < 1)
 131         {
 132             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 133         }
 134         return true;
 135     }
 136
 137     if (nonbondedTarget == TaskTarget::Gpu)
 138     {
 139         return true;
 140     }
 141
 142     // Because this is thread-MPI, we already know about the GPUs that
 143     // all potential ranks can use, and can use that in a global
 144     // decision that will later be consistent.
 145     auto haveGpus = !gpuIdsToUse.empty();
 146
 147     // If we get here, then the user permitted or required GPUs.
 148     return haveGpus;
 149 }
 150
 151 bool
 152 decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 153                                           const TaskTarget        pmeTarget,
 154                                           const std::vector<int> &gpuIdsToUse,
 155                                           const std::vector<int> &userGpuTaskAssignment,
 156                                           const gmx_hw_info_t    &hardwareInfo,
 157                                           const t_inputrec       &inputrec,
 158                                           const gmx_mtop_t       &mtop,
 159                                           const int               numRanksPerSimulation,
 160                                           const int               numPmeRanksPerSimulation)
 161 {
 162     // First, exclude all cases where we can't run PME on GPUs.
 163     if ((pmeTarget == TaskTarget::Cpu) ||
 164         !useGpuForNonbonded ||
 165         !pme_gpu_supports_build(nullptr) ||
 166         !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
 167         !pme_gpu_supports_input(inputrec, mtop, nullptr))
 168     {
 169         // PME can't run on a GPU. If the user required that, we issue
 170         // an error later.
 171         return false;
 172     }
 173
 174     // We now know that PME on GPUs might make sense, if we have any.
 175
 176     if (!userGpuTaskAssignment.empty())
 177     {
 178         // Follow the user's choice of GPU task assignment, if we
 179         // can. Checking that their IDs are for compatible GPUs comes
 180         // later.
 181
 182         // Specifying -gputasks requires specifying everything.
 183         if (pmeTarget == TaskTarget::Auto ||
 184             numRanksPerSimulation < 1)
 185         {
 186             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 187         }
 188
 189         // PME on GPUs is only supported in a single case
 190         if (pmeTarget == TaskTarget::Gpu)
 191         {
 192             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 193                 (numPmeRanksPerSimulation > 1))
 194             {
 195                 GMX_THROW(InconsistentInputError
 196                               ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
 197             }
 198             return true;
 199         }
 200
 201         // pmeTarget == TaskTarget::Auto
 202         return numRanksPerSimulation == 1;
 203     }
 204
 205     // Because this is thread-MPI, we already know about the GPUs that
 206     // all potential ranks can use, and can use that in a global
 207     // decision that will later be consistent.
 208
 209     if (pmeTarget == TaskTarget::Gpu)
 210     {
 211         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 212             (numPmeRanksPerSimulation > 1))
 213         {
 214             GMX_THROW(NotImplementedError
 215                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 216                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 217                           "or permit PME tasks to be assigned to the CPU."));
 218         }
 219         return true;
 220     }
 221
 222     if (numRanksPerSimulation == 1)
 223     {
 224         // PME can run well on a GPU shared with NB, and we permit
 225         // mdrun to default to try that.
 226         return !gpuIdsToUse.empty();
 227     }
 228
 229     if (numRanksPerSimulation < 1)
 230     {
 231         // Full automated mode for thread-MPI (the default). PME can
 232         // run well on a GPU shared with NB, and we permit mdrun to
 233         // default to it if there is only one GPU available.
 234         return (gpuIdsToUse.size() == 1);
 235     }
 236
 237     // Not enough support for PME on GPUs for anything else
 238     return false;
 239 }
 240
 241 bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
 242                                         const std::vector<int>    &userGpuTaskAssignment,
 243                                         const EmulateGpuNonbonded  emulateGpuNonbonded,
 244                                         const bool                 buildSupportsNonbondedOnGpu,
 245                                         const bool                 nonbondedOnGpuIsUseful,
 246                                         const bool                 gpusWereDetected)
 247 {
 248     if (nonbondedTarget == TaskTarget::Cpu)
 249     {
 250         if (!userGpuTaskAssignment.empty())
 251         {
 252             GMX_THROW(InconsistentInputError
 253                           ("A GPU task assignment was specified, but nonbonded interactions were "
 254                           "assigned to the CPU. Make no more than one of these choices."));
 255         }
 256
 257         return false;
 258     }
 259
 260     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 261     {
 262         GMX_THROW(InconsistentInputError
 263                       ("Nonbonded interactions on the GPU were requested with -nb gpu, "
 264                       "but the GROMACS binary has been built without GPU support. "
 265                       "Either run without selecting GPU options, or recompile GROMACS "
 266                       "with GPU support enabled"));
 267     }
 268
 269     // TODO refactor all these TaskTarget::Gpu checks into one place?
 270     // e.g. use a subfunction that handles only the cases where
 271     // TaskTargets are not Cpu?
 272     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 273     {
 274         if (nonbondedTarget == TaskTarget::Gpu)
 275         {
 276             GMX_THROW(InconsistentInputError
 277                           ("Nonbonded interactions on the GPU were required, which is inconsistent "
 278                           "with choosing emulation. Make no more than one of these choices."));
 279         }
 280         if (!userGpuTaskAssignment.empty())
 281         {
 282             GMX_THROW(InconsistentInputError
 283                           ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
 284         }
 285
 286         return false;
 287     }
 288
 289     if (!nonbondedOnGpuIsUseful)
 290     {
 291         if (nonbondedTarget == TaskTarget::Gpu)
 292         {
 293             GMX_THROW(InconsistentInputError
 294                           ("Nonbonded interactions on the GPU were required, but not supported for these "
 295                           "simulation settings. Change your settings, or do not require using GPUs."));
 296         }
 297
 298         return false;
 299     }
 300
 301     if (!userGpuTaskAssignment.empty())
 302     {
 303         // Specifying -gputasks requires specifying everything.
 304         if (nonbondedTarget == TaskTarget::Auto)
 305         {
 306             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 307         }
 308
 309         return true;
 310     }
 311
 312     if (nonbondedTarget == TaskTarget::Gpu)
 313     {
 314         // We still don't know whether it is an error if no GPUs are found
 315         // because we don't know the duty of this rank, yet. For example,
 316         // a node with only PME ranks and -pme cpu is OK if there are not
 317         // GPUs.
 318         return true;
 319     }
 320
 321     // If we get here, then the user permitted GPUs, which we should
 322     // use for nonbonded interactions.
 323     return gpusWereDetected;
 324 }
 325
 326 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 327                                   const TaskTarget        pmeTarget,
 328                                   const std::vector<int> &userGpuTaskAssignment,
 329                                   const gmx_hw_info_t    &hardwareInfo,
 330                                   const t_inputrec       &inputrec,
 331                                   const gmx_mtop_t       &mtop,
 332                                   const int               numRanksPerSimulation,
 333                                   const int               numPmeRanksPerSimulation,
 334                                   const bool              gpusWereDetected)
 335 {
 336     if (pmeTarget == TaskTarget::Cpu)
 337     {
 338         return false;
 339     }
 340
 341     if (!useGpuForNonbonded)
 342     {
 343         if (pmeTarget == TaskTarget::Gpu)
 344         {
 345             GMX_THROW(NotImplementedError
 346                           ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
 347         }
 348         return false;
 349     }
 350
 351     std::string message;
 352     if (!pme_gpu_supports_build(&message))
 353     {
 354         if (pmeTarget == TaskTarget::Gpu)
 355         {
 356             GMX_THROW(NotImplementedError
 357                           ("Cannot compute PME interactions on a GPU, because " + message));
 358         }
 359         return false;
 360     }
 361     if (!pme_gpu_supports_hardware(hardwareInfo, &message))
 362     {
 363         if (pmeTarget == TaskTarget::Gpu)
 364         {
 365             GMX_THROW(NotImplementedError
 366                           ("Cannot compute PME interactions on a GPU, because " + message));
 367         }
 368         return false;
 369     }
 370     if (!pme_gpu_supports_input(inputrec, mtop, &message))
 371     {
 372         if (pmeTarget == TaskTarget::Gpu)
 373         {
 374             GMX_THROW(NotImplementedError
 375                           ("Cannot compute PME interactions on a GPU, because " + message));
 376         }
 377         return false;
 378     }
 379
 380     if (pmeTarget == TaskTarget::Cpu)
 381     {
 382         if (!userGpuTaskAssignment.empty())
 383         {
 384             GMX_THROW(InconsistentInputError
 385                           ("A GPU task assignment was specified, but PME interactions were "
 386                           "assigned to the CPU. Make no more than one of these choices."));
 387         }
 388
 389         return false;
 390     }
 391
 392     if (!userGpuTaskAssignment.empty())
 393     {
 394         // Specifying -gputasks requires specifying everything.
 395         if (pmeTarget == TaskTarget::Auto)
 396         {
 397             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
 398         }
 399
 400         return true;
 401     }
 402
 403     // We still don't know whether it is an error if no GPUs are found
 404     // because we don't know the duty of this rank, yet. For example,
 405     // a node with only PME ranks and -pme cpu is OK if there are not
 406     // GPUs.
 407
 408     if (pmeTarget == TaskTarget::Gpu)
 409     {
 410         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 411             (numPmeRanksPerSimulation > 1))
 412         {
 413             GMX_THROW(NotImplementedError
 414                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 415                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 416                           "or permit PME tasks to be assigned to the CPU."));
 417         }
 418         return true;
 419     }
 420
 421     // If we get here, then the user permitted GPUs.
 422     if (numRanksPerSimulation == 1)
 423     {
 424         // PME can run well on a single GPU shared with NB when there
 425         // is one rank, so we permit mdrun to try that if we have
 426         // detected GPUs.
 427         return gpusWereDetected;
 428     }
 429
 430     // Not enough support for PME on GPUs for anything else
 431     return false;
 432 }
 433
 434 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 435                                      const bool       useGpuForPme,
 436                                      const TaskTarget bondedTarget,
 437                                      const bool       canUseGpuForBonded,
 438                                      const bool       usingLJPme,
 439                                      const bool       usingElecPmeOrEwald,
 440                                      const int        numPmeRanksPerSimulation,
 441                                      const bool       gpusWereDetected)
 442 {
 443     if (bondedTarget == TaskTarget::Cpu)
 444     {
 445         return false;
 446     }
 447
 448     if (!canUseGpuForBonded)
 449     {
 450         if (bondedTarget == TaskTarget::Gpu)
 451         {
 452             GMX_THROW(InconsistentInputError
 453                           ("Bonded interactions on the GPU were required, but not supported for these "
 454                           "simulation settings. Change your settings, or do not require using GPUs."));
 455         }
 456
 457         return false;
 458     }
 459
 460     if (!useGpuForNonbonded)
 461     {
 462         if (bondedTarget == TaskTarget::Gpu)
 463         {
 464             GMX_THROW(InconsistentInputError
 465                           ("Bonded interactions on the GPU were required, but this requires that "
 466                           "short-ranged non-bonded interactions are also run on the GPU. Change "
 467                           "your settings, or do not require using GPUs."));
 468         }
 469
 470         return false;
 471     }
 472
 473     // TODO If the bonded kernels do not get fused, then performance
 474     // overheads might suggest alternative choices here.
 475
 476     if (bondedTarget == TaskTarget::Gpu)
 477     {
 478         // We still don't know whether it is an error if no GPUs are
 479         // found.
 480         return true;
 481     }
 482
 483     // If we get here, then the user permitted GPUs, which we should
 484     // use for bonded interactions if any were detected and the CPU
 485     // is busy, for which we currently only check PME or Ewald.
 486     // (It would be better to dynamically assign bondeds based on timings)
 487     // Note that here we assume that the auto setting of PME ranks will not
 488     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 489     bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 490
 491     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 492 }
 493
 494 bool decideWhetherToUseGpuForUpdate(bool                 forceGpuUpdateDefaultOn,
 495                                     bool                 isDomainDecomposition,
 496                                     bool                 useGpuForPme,
 497                                     bool                 useGpuForNonbonded,
 498                                     bool                 useGpuForBufferOps,
 499                                     TaskTarget           updateTarget,
 500                                     bool                 gpusWereDetected,
 501                                     const t_inputrec    &inputrec,
 502                                     const MDAtoms       &mdatoms,
 503                                     bool                 useEssentialDynamics,
 504                                     bool                 doOrientationRestraints,
 505                                     bool                 doDistanceRestraints,
 506                                     bool                 useReplicaExchange)
 507 {
 508
 509     if (updateTarget == TaskTarget::Cpu)
 510     {
 511         return false;
 512     }
 513
 514     std::string errorMessage;
 515
 516     if (isDomainDecomposition)
 517     {
 518         errorMessage += "Domain decomposition is not supported.\n";
 519     }
 520     // Using the GPU-version of update makes sense if forces are already on the GPU, i.e. if at least:
 521     // 1. PME is on the GPU (there should be a copy of coordinates on a GPU in rvec format for PME spread).
 522     // 2. Non-bonded interactions and buffer ops are on the GPU.
 523     if (!(useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps)))
 524     {
 525         errorMessage += "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
 526     }
 527     if (!gpusWereDetected)
 528     {
 529         errorMessage += "Compatible GPUs must have been found.\n";
 530     }
 531     if (GMX_GPU != GMX_GPU_CUDA)
 532     {
 533         errorMessage += "Only a CUDA build is supported.\n";
 534     }
 535     if (inputrec.eI != eiMD)
 536     {
 537         errorMessage += "Only the md integrator is supported.\n";
 538     }
 539     if (inputrec.etc == etcNOSEHOOVER)
 540     {
 541         errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
 542     }
 543     if (inputrec.epc != epcNO && inputrec.epc != epcPARRINELLORAHMAN)
 544     {
 545         errorMessage += "Only Parrinello-Rahman pressure control is supported.\n";
 546     }
 547     if (mdatoms.mdatoms()->haveVsites)
 548     {
 549         errorMessage += "Virtual sites are not supported.\n";
 550     }
 551     if (useEssentialDynamics)
 552     {
 553         errorMessage += "Essential dynamics is not supported.\n";
 554     }
 555     if (inputrec.bPull || inputrec.pull)
 556     {
 557         errorMessage += "Pulling is not supported.\n";
 558     }
 559     if (doOrientationRestraints)
 560     {
 561         errorMessage += "Orientation restraints are not supported.\n";
 562     }
 563     if (doDistanceRestraints)
 564     {
 565         errorMessage += "Distance restraints are not supported.\n";
 566     }
 567     if (inputrec.efep != efepNO)
 568     {
 569         errorMessage += "Free energy perturbations are not supported.\n";
 570     }
 571     if (useReplicaExchange)
 572     {
 573         errorMessage += "Replica exchange simulations are not supported.\n";
 574     }
 575     if (!errorMessage.empty())
 576     {
 577         if (updateTarget == TaskTarget::Gpu)
 578         {
 579             std::string prefix = gmx::formatString("Update task on the GPU was required,\n"
 580                                                    "but the following condition(s) were not satisfied:\n");
 581             GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
 582         }
 583         return false;
 584     }
 585
 586     return ((forceGpuUpdateDefaultOn && updateTarget == TaskTarget::Auto) || (updateTarget == TaskTarget::Gpu));
 587 }
 588
 589 }  // namespace gmx