src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/ewald/pme.h"
  55 #include "gromacs/hardware/cpuinfo.h"
  56 #include "gromacs/hardware/detecthardware.h"
  57 #include "gromacs/hardware/hardwaretopology.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdtypes/commrec.h"
  61 #include "gromacs/mdtypes/inputrec.h"
  62 #include "gromacs/mdtypes/md_enums.h"
  63 #include "gromacs/taskassignment/taskassignment.h"
  64 #include "gromacs/topology/topology.h"
  65 #include "gromacs/utility/baseversion.h"
  66 #include "gromacs/utility/exceptions.h"
  67 #include "gromacs/utility/fatalerror.h"
  68 #include "gromacs/utility/gmxassert.h"
  69 #include "gromacs/utility/logger.h"
  70 #include "gromacs/utility/stringutil.h"
  71
  72
  73 namespace gmx
  74 {
  75
  76 namespace
  77 {
  78
  79 //! Helper variable to localise the text of an often repeated message.
  80 const char * g_specifyEverythingFormatString =
  81     "When you use mdrun -gputasks, %s must be set to non-default "
  82     "values, so that the device IDs can be interpreted correctly."
  83 #if GMX_GPU != GMX_GPU_NONE
  84     " If you simply want to restrict which GPUs are used, then it is "
  85     "better to use mdrun -gpu_id. Otherwise, setting the "
  86 #  if GMX_GPU == GMX_GPU_CUDA
  87     "CUDA_VISIBLE_DEVICES"
  88 #  elif GMX_GPU == GMX_GPU_OPENCL
  89     // Technically there is no portable way to do this offered by the
  90     // OpenCL standard, but the only current relevant case for GROMACS
  91     // is AMD OpenCL, which offers this variable.
  92     "GPU_DEVICE_ORDINAL"
  93 #  else
  94 #  error "Unreachable branch"
  95 #  endif
  96     " environment variable in your bash profile or job "
  97     "script may be more convenient."
  98 #endif
  99 ;
 100
 101 }   // namespace
 102
 103 bool
 104 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbondedTarget,
 105                                                 const std::vector<int>   &gpuIdsToUse,
 106                                                 const std::vector<int>   &userGpuTaskAssignment,
 107                                                 const EmulateGpuNonbonded emulateGpuNonbonded,
 108                                                 const bool                buildSupportsNonbondedOnGpu,
 109                                                 const bool                nonbondedOnGpuIsUseful,
 110                                                 const int                 numRanksPerSimulation)
 111 {
 112     // First, exclude all cases where we can't run NB on GPUs.
 113     if (nonbondedTarget == TaskTarget::Cpu ||
 114         emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
 115         !nonbondedOnGpuIsUseful ||
 116         !buildSupportsNonbondedOnGpu)
 117     {
 118         // If the user required NB on GPUs, we issue an error later.
 119         return false;
 120     }
 121
 122     // We now know that NB on GPUs makes sense, if we have any.
 123
 124     if (!userGpuTaskAssignment.empty())
 125     {
 126         // Specifying -gputasks requires specifying everything.
 127         if (nonbondedTarget == TaskTarget::Auto ||
 128             numRanksPerSimulation < 1)
 129         {
 130             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 131         }
 132         return true;
 133     }
 134
 135     if (nonbondedTarget == TaskTarget::Gpu)
 136     {
 137         return true;
 138     }
 139
 140     // Because this is thread-MPI, we already know about the GPUs that
 141     // all potential ranks can use, and can use that in a global
 142     // decision that will later be consistent.
 143     auto haveGpus = !gpuIdsToUse.empty();
 144
 145     // If we get here, then the user permitted or required GPUs.
 146     return haveGpus;
 147 }
 148
 149 bool
 150 decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 151                                           const TaskTarget        pmeTarget,
 152                                           const std::vector<int> &gpuIdsToUse,
 153                                           const std::vector<int> &userGpuTaskAssignment,
 154                                           const gmx_hw_info_t    &hardwareInfo,
 155                                           const t_inputrec       &inputrec,
 156                                           const gmx_mtop_t       &mtop,
 157                                           const int               numRanksPerSimulation,
 158                                           const int               numPmeRanksPerSimulation)
 159 {
 160     // First, exclude all cases where we can't run PME on GPUs.
 161     if ((pmeTarget == TaskTarget::Cpu) ||
 162         !useGpuForNonbonded ||
 163         !pme_gpu_supports_build(nullptr) ||
 164         !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
 165         !pme_gpu_supports_input(inputrec, mtop, nullptr))
 166     {
 167         // PME can't run on a GPU. If the user required that, we issue
 168         // an error later.
 169         return false;
 170     }
 171
 172     // We now know that PME on GPUs might make sense, if we have any.
 173
 174     if (!userGpuTaskAssignment.empty())
 175     {
 176         // Follow the user's choice of GPU task assignment, if we
 177         // can. Checking that their IDs are for compatible GPUs comes
 178         // later.
 179
 180         // Specifying -gputasks requires specifying everything.
 181         if (pmeTarget == TaskTarget::Auto ||
 182             numRanksPerSimulation < 1)
 183         {
 184             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 185         }
 186
 187         // PME on GPUs is only supported in a single case
 188         if (pmeTarget == TaskTarget::Gpu)
 189         {
 190             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 191                 (numPmeRanksPerSimulation > 1))
 192             {
 193                 GMX_THROW(InconsistentInputError
 194                               ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
 195             }
 196             return true;
 197         }
 198
 199         // pmeTarget == TaskTarget::Auto
 200         return numRanksPerSimulation == 1;
 201     }
 202
 203     // Because this is thread-MPI, we already know about the GPUs that
 204     // all potential ranks can use, and can use that in a global
 205     // decision that will later be consistent.
 206
 207     if (pmeTarget == TaskTarget::Gpu)
 208     {
 209         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 210             (numPmeRanksPerSimulation > 1))
 211         {
 212             GMX_THROW(NotImplementedError
 213                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 214                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 215                           "or permit PME tasks to be assigned to the CPU."));
 216         }
 217         return true;
 218     }
 219
 220     if (numRanksPerSimulation == 1)
 221     {
 222         // PME can run well on a GPU shared with NB, and we permit
 223         // mdrun to default to try that.
 224         return !gpuIdsToUse.empty();
 225     }
 226
 227     if (numRanksPerSimulation < 1)
 228     {
 229         // Full automated mode for thread-MPI (the default). PME can
 230         // run well on a GPU shared with NB, and we permit mdrun to
 231         // default to it if there is only one GPU available.
 232         return (gpuIdsToUse.size() == 1);
 233     }
 234
 235     // Not enough support for PME on GPUs for anything else
 236     return false;
 237 }
 238
 239 bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
 240                                         const std::vector<int>    &userGpuTaskAssignment,
 241                                         const EmulateGpuNonbonded  emulateGpuNonbonded,
 242                                         const bool                 buildSupportsNonbondedOnGpu,
 243                                         const bool                 nonbondedOnGpuIsUseful,
 244                                         const bool                 gpusWereDetected)
 245 {
 246     if (nonbondedTarget == TaskTarget::Cpu)
 247     {
 248         if (!userGpuTaskAssignment.empty())
 249         {
 250             GMX_THROW(InconsistentInputError
 251                           ("A GPU task assignment was specified, but nonbonded interactions were "
 252                           "assigned to the CPU. Make no more than one of these choices."));
 253         }
 254
 255         return false;
 256     }
 257
 258     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 259     {
 260         GMX_THROW(InconsistentInputError
 261                       ("Nonbonded interactions on the GPU were requested with -nb gpu, "
 262                       "but the GROMACS binary has been built without GPU support. "
 263                       "Either run without selecting GPU options, or recompile GROMACS "
 264                       "with GPU support enabled"));
 265     }
 266
 267     // TODO refactor all these TaskTarget::Gpu checks into one place?
 268     // e.g. use a subfunction that handles only the cases where
 269     // TaskTargets are not Cpu?
 270     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 271     {
 272         if (nonbondedTarget == TaskTarget::Gpu)
 273         {
 274             GMX_THROW(InconsistentInputError
 275                           ("Nonbonded interactions on the GPU were required, which is inconsistent "
 276                           "with choosing emulation. Make no more than one of these choices."));
 277         }
 278         if (!userGpuTaskAssignment.empty())
 279         {
 280             GMX_THROW(InconsistentInputError
 281                           ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
 282         }
 283
 284         return false;
 285     }
 286
 287     if (!nonbondedOnGpuIsUseful)
 288     {
 289         if (nonbondedTarget == TaskTarget::Gpu)
 290         {
 291             GMX_THROW(InconsistentInputError
 292                           ("Nonbonded interactions on the GPU were required, but not supported for these "
 293                           "simulation settings. Change your settings, or do not require using GPUs."));
 294         }
 295
 296         return false;
 297     }
 298
 299     if (!userGpuTaskAssignment.empty())
 300     {
 301         // Specifying -gputasks requires specifying everything.
 302         if (nonbondedTarget == TaskTarget::Auto)
 303         {
 304             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 305         }
 306
 307         return true;
 308     }
 309
 310     if (nonbondedTarget == TaskTarget::Gpu)
 311     {
 312         // We still don't know whether it is an error if no GPUs are found
 313         // because we don't know the duty of this rank, yet. For example,
 314         // a node with only PME ranks and -pme cpu is OK if there are not
 315         // GPUs.
 316         return true;
 317     }
 318
 319     // If we get here, then the user permitted GPUs, which we should
 320     // use for nonbonded interactions.
 321     return gpusWereDetected;
 322 }
 323
 324 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 325                                   const TaskTarget        pmeTarget,
 326                                   const std::vector<int> &userGpuTaskAssignment,
 327                                   const gmx_hw_info_t    &hardwareInfo,
 328                                   const t_inputrec       &inputrec,
 329                                   const gmx_mtop_t       &mtop,
 330                                   const int               numRanksPerSimulation,
 331                                   const int               numPmeRanksPerSimulation,
 332                                   const bool              gpusWereDetected)
 333 {
 334     if (pmeTarget == TaskTarget::Cpu)
 335     {
 336         return false;
 337     }
 338
 339     if (!useGpuForNonbonded)
 340     {
 341         if (pmeTarget == TaskTarget::Gpu)
 342         {
 343             GMX_THROW(NotImplementedError
 344                           ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
 345         }
 346         return false;
 347     }
 348
 349     std::string message;
 350     if (!pme_gpu_supports_build(&message))
 351     {
 352         if (pmeTarget == TaskTarget::Gpu)
 353         {
 354             GMX_THROW(NotImplementedError
 355                           ("Cannot compute PME interactions on a GPU, because " + message));
 356         }
 357         return false;
 358     }
 359     if (!pme_gpu_supports_hardware(hardwareInfo, &message))
 360     {
 361         if (pmeTarget == TaskTarget::Gpu)
 362         {
 363             GMX_THROW(NotImplementedError
 364                           ("Cannot compute PME interactions on a GPU, because " + message));
 365         }
 366         return false;
 367     }
 368     if (!pme_gpu_supports_input(inputrec, mtop, &message))
 369     {
 370         if (pmeTarget == TaskTarget::Gpu)
 371         {
 372             GMX_THROW(NotImplementedError
 373                           ("Cannot compute PME interactions on a GPU, because " + message));
 374         }
 375         return false;
 376     }
 377
 378     if (pmeTarget == TaskTarget::Cpu)
 379     {
 380         if (!userGpuTaskAssignment.empty())
 381         {
 382             GMX_THROW(InconsistentInputError
 383                           ("A GPU task assignment was specified, but PME interactions were "
 384                           "assigned to the CPU. Make no more than one of these choices."));
 385         }
 386
 387         return false;
 388     }
 389
 390     if (!userGpuTaskAssignment.empty())
 391     {
 392         // Specifying -gputasks requires specifying everything.
 393         if (pmeTarget == TaskTarget::Auto)
 394         {
 395             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
 396         }
 397
 398         return true;
 399     }
 400
 401     // We still don't know whether it is an error if no GPUs are found
 402     // because we don't know the duty of this rank, yet. For example,
 403     // a node with only PME ranks and -pme cpu is OK if there are not
 404     // GPUs.
 405
 406     if (pmeTarget == TaskTarget::Gpu)
 407     {
 408         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 409             (numPmeRanksPerSimulation > 1))
 410         {
 411             GMX_THROW(NotImplementedError
 412                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 413                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 414                           "or permit PME tasks to be assigned to the CPU."));
 415         }
 416         return true;
 417     }
 418
 419     // If we get here, then the user permitted GPUs.
 420     if (numRanksPerSimulation == 1)
 421     {
 422         // PME can run well on a single GPU shared with NB when there
 423         // is one rank, so we permit mdrun to try that if we have
 424         // detected GPUs.
 425         return gpusWereDetected;
 426     }
 427
 428     // Not enough support for PME on GPUs for anything else
 429     return false;
 430 }
 431
 432 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 433                                      const bool       useGpuForPme,
 434                                      const TaskTarget bondedTarget,
 435                                      const bool       canUseGpuForBonded,
 436                                      const bool       usingLJPme,
 437                                      const bool       usingElecPmeOrEwald,
 438                                      const int        numPmeRanksPerSimulation,
 439                                      const bool       gpusWereDetected)
 440 {
 441     if (bondedTarget == TaskTarget::Cpu)
 442     {
 443         return false;
 444     }
 445
 446     if (!canUseGpuForBonded)
 447     {
 448         if (bondedTarget == TaskTarget::Gpu)
 449         {
 450             GMX_THROW(InconsistentInputError
 451                           ("Bonded interactions on the GPU were required, but not supported for these "
 452                           "simulation settings. Change your settings, or do not require using GPUs."));
 453         }
 454
 455         return false;
 456     }
 457
 458     if (!useGpuForNonbonded)
 459     {
 460         if (bondedTarget == TaskTarget::Gpu)
 461         {
 462             GMX_THROW(InconsistentInputError
 463                           ("Bonded interactions on the GPU were required, but this requires that "
 464                           "short-ranged non-bonded interactions are also run on the GPU. Change "
 465                           "your settings, or do not require using GPUs."));
 466         }
 467
 468         return false;
 469     }
 470
 471     // TODO If the bonded kernels do not get fused, then performance
 472     // overheads might suggest alternative choices here.
 473
 474     if (bondedTarget == TaskTarget::Gpu)
 475     {
 476         // We still don't know whether it is an error if no GPUs are
 477         // found.
 478         return true;
 479     }
 480
 481     // If we get here, then the user permitted GPUs, which we should
 482     // use for bonded interactions if any were detected and the CPU
 483     // is busy, for which we currently only check PME or Ewald.
 484     // (It would be better to dynamically assign bondeds based on timings)
 485     // Note that here we assume that the auto setting of PME ranks will not
 486     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 487     bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 488
 489     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 490 }
 491
 492 }  // namespace gmx