src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/hardware/cpuinfo.h"
  55 #include "gromacs/hardware/detecthardware.h"
  56 #include "gromacs/hardware/hardwaretopology.h"
  57 #include "gromacs/hardware/hw_info.h"
  58 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  59 #include "gromacs/mdlib/nb_verlet.h"
  60 #include "gromacs/mdtypes/commrec.h"
  61 #include "gromacs/mdtypes/inputrec.h"
  62 #include "gromacs/mdtypes/md_enums.h"
  63 #include "gromacs/taskassignment/taskassignment.h"
  64 #include "gromacs/topology/topology.h"
  65 #include "gromacs/utility/baseversion.h"
  66 #include "gromacs/utility/exceptions.h"
  67 #include "gromacs/utility/fatalerror.h"
  68 #include "gromacs/utility/gmxassert.h"
  69 #include "gromacs/utility/logger.h"
  70 #include "gromacs/utility/stringutil.h"
  71
  72
  73 namespace gmx
  74 {
  75
  76 namespace
  77 {
  78
  79 //! Helper variable to localise the text of an often repeated message.
  80 const char * g_specifyEverythingFormatString =
  81     "When you use mdrun -gputasks, %s must be set to non-default "
  82     "values, so that the device IDs can be interpreted correctly."
  83 #if GMX_GPU != GMX_GPU_NONE
  84     " If you simply want to restrict which GPUs are used, then it is "
  85     "better to use mdrun -gpu_id. Otherwise, setting the "
  86 #  if GMX_GPU == GMX_GPU_CUDA
  87     "CUDA_VISIBLE_DEVICES"
  88 #  elif GMX_GPU == GMX_GPU_OPENCL
  89     // Technically there is no portable way to do this offered by the
  90     // OpenCL standard, but the only current relevant case for GROMACS
  91     // is AMD OpenCL, which offers this variable.
  92     "GPU_DEVICE_ORDINAL"
  93 #  else
  94 #  error "Unreachable branch"
  95 #  endif
  96     " environment variable in your bash profile or job "
  97     "script may be more convenient."
  98 #endif
  99 ;
 100
 101 }   // namespace
 102
 103 bool
 104 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbondedTarget,
 105                                                 const std::vector<int>   &gpuIdsToUse,
 106                                                 const std::vector<int>   &userGpuTaskAssignment,
 107                                                 const EmulateGpuNonbonded emulateGpuNonbonded,
 108                                                 const bool                buildSupportsNonbondedOnGpu,
 109                                                 const bool                usingVerletScheme,
 110                                                 const bool                nonbondedOnGpuIsUseful,
 111                                                 const int                 numRanksPerSimulation)
 112 {
 113     // First, exclude all cases where we can't run NB on GPUs.
 114     if (nonbondedTarget == TaskTarget::Cpu ||
 115         emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
 116         !usingVerletScheme ||
 117         !nonbondedOnGpuIsUseful ||
 118         !buildSupportsNonbondedOnGpu)
 119     {
 120         // If the user required NB on GPUs, we issue an error later.
 121         return false;
 122     }
 123
 124     // We now know that NB on GPUs makes sense, if we have any.
 125
 126     if (!userGpuTaskAssignment.empty())
 127     {
 128         // Specifying -gputasks requires specifying everything.
 129         if (nonbondedTarget == TaskTarget::Auto ||
 130             numRanksPerSimulation < 1)
 131         {
 132             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 133         }
 134         return true;
 135     }
 136
 137     if (nonbondedTarget == TaskTarget::Gpu)
 138     {
 139         return true;
 140     }
 141
 142     // Because this is thread-MPI, we already know about the GPUs that
 143     // all potential ranks can use, and can use that in a global
 144     // decision that will later be consistent.
 145     auto haveGpus = !gpuIdsToUse.empty();
 146
 147     // If we get here, then the user permitted or required GPUs.
 148     return haveGpus;
 149 }
 150
 151 bool
 152 decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 153                                           const TaskTarget        pmeTarget,
 154                                           const std::vector<int> &gpuIdsToUse,
 155                                           const std::vector<int> &userGpuTaskAssignment,
 156                                           const bool              canUseGpuForPme,
 157                                           const int               numRanksPerSimulation,
 158                                           const int               numPmeRanksPerSimulation)
 159 {
 160     // First, exclude all cases where we can't run PME on GPUs.
 161     if ((pmeTarget == TaskTarget::Cpu) ||
 162         !useGpuForNonbonded ||
 163         !canUseGpuForPme)
 164     {
 165         // PME can't run on a GPU. If the user required that, we issue
 166         // an error later.
 167         return false;
 168     }
 169
 170     // We now know that PME on GPUs might make sense, if we have any.
 171
 172     if (!userGpuTaskAssignment.empty())
 173     {
 174         // Follow the user's choice of GPU task assignment, if we
 175         // can. Checking that their IDs are for compatible GPUs comes
 176         // later.
 177
 178         // Specifying -gputasks requires specifying everything.
 179         if (pmeTarget == TaskTarget::Auto ||
 180             numRanksPerSimulation < 1)
 181         {
 182             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 183         }
 184
 185         // PME on GPUs is only supported in a single case
 186         if (pmeTarget == TaskTarget::Gpu)
 187         {
 188             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 189                 (numPmeRanksPerSimulation > 1))
 190             {
 191                 GMX_THROW(InconsistentInputError
 192                               ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
 193             }
 194             return true;
 195         }
 196
 197         // pmeTarget == TaskTarget::Auto
 198         return numRanksPerSimulation == 1;
 199     }
 200
 201     // Because this is thread-MPI, we already know about the GPUs that
 202     // all potential ranks can use, and can use that in a global
 203     // decision that will later be consistent.
 204
 205     if (pmeTarget == TaskTarget::Gpu)
 206     {
 207         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 208             (numPmeRanksPerSimulation > 1))
 209         {
 210             GMX_THROW(NotImplementedError
 211                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 212                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 213                           "or permit PME tasks to be assigned to the CPU."));
 214         }
 215         return true;
 216     }
 217
 218     if (numRanksPerSimulation == 1)
 219     {
 220         // PME can run well on a GPU shared with NB, and we permit
 221         // mdrun to default to try that.
 222         return !gpuIdsToUse.empty();
 223     }
 224
 225     if (numRanksPerSimulation < 1)
 226     {
 227         // Full automated mode for thread-MPI (the default). PME can
 228         // run well on a GPU shared with NB, and we permit mdrun to
 229         // default to it if there is only one GPU available.
 230         return (gpuIdsToUse.size() == 1);
 231     }
 232
 233     // Not enough support for PME on GPUs for anything else
 234     return false;
 235 }
 236
 237 bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
 238                                         const std::vector<int>    &userGpuTaskAssignment,
 239                                         const EmulateGpuNonbonded  emulateGpuNonbonded,
 240                                         const bool                 buildSupportsNonbondedOnGpu,
 241                                         const bool                 usingVerletScheme,
 242                                         const bool                 nonbondedOnGpuIsUseful,
 243                                         const bool                 gpusWereDetected)
 244 {
 245     if (nonbondedTarget == TaskTarget::Cpu)
 246     {
 247         if (!userGpuTaskAssignment.empty())
 248         {
 249             GMX_THROW(InconsistentInputError
 250                           ("A GPU task assignment was specified, but nonbonded interactions were "
 251                           "assigned to the CPU. Make no more than one of these choices."));
 252         }
 253
 254         return false;
 255     }
 256
 257     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 258     {
 259         GMX_THROW(InconsistentInputError
 260                       ("Nonbonded interactions on the GPU were requested with -nb gpu, "
 261                       "but the GROMACS binary has been built without GPU support. "
 262                       "Either run without selecting GPU options, or recompile GROMACS "
 263                       "with GPU support enabled"));
 264     }
 265
 266     // TODO refactor all these TaskTarget::Gpu checks into one place?
 267     // e.g. use a subfunction that handles only the cases where
 268     // TaskTargets are not Cpu?
 269     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 270     {
 271         if (nonbondedTarget == TaskTarget::Gpu)
 272         {
 273             GMX_THROW(InconsistentInputError
 274                           ("Nonbonded interactions on the GPU were required, which is inconsistent "
 275                           "with choosing emulation. Make no more than one of these choices."));
 276         }
 277         if (!userGpuTaskAssignment.empty())
 278         {
 279             GMX_THROW(InconsistentInputError
 280                           ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
 281         }
 282
 283         return false;
 284     }
 285
 286     if (!usingVerletScheme)
 287     {
 288         if (nonbondedTarget == TaskTarget::Gpu)
 289         {
 290             GMX_THROW(InconsistentInputError
 291                           ("Nonbonded interactions on the GPU were required, which requires using "
 292                           "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
 293         }
 294
 295         return false;
 296     }
 297
 298     if (!nonbondedOnGpuIsUseful)
 299     {
 300         if (nonbondedTarget == TaskTarget::Gpu)
 301         {
 302             GMX_THROW(InconsistentInputError
 303                           ("Nonbonded interactions on the GPU were required, but not supported for these "
 304                           "simulation settings. Change your settings, or do not require using GPUs."));
 305         }
 306
 307         return false;
 308     }
 309
 310     if (!userGpuTaskAssignment.empty())
 311     {
 312         // Specifying -gputasks requires specifying everything.
 313         if (nonbondedTarget == TaskTarget::Auto)
 314         {
 315             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 316         }
 317
 318         return true;
 319     }
 320
 321     if (nonbondedTarget == TaskTarget::Gpu)
 322     {
 323         // We still don't know whether it is an error if no GPUs are found
 324         // because we don't know the duty of this rank, yet. For example,
 325         // a node with only PME ranks and -pme cpu is OK if there are not
 326         // GPUs.
 327         return true;
 328     }
 329
 330     // If we get here, then the user permitted GPUs, which we should
 331     // use for nonbonded interactions.
 332     return gpusWereDetected;
 333 }
 334
 335 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 336                                   const TaskTarget        pmeTarget,
 337                                   const std::vector<int> &userGpuTaskAssignment,
 338                                   const bool              canUseGpuForPme,
 339                                   const int               numRanksPerSimulation,
 340                                   const int               numPmeRanksPerSimulation,
 341                                   const bool              gpusWereDetected)
 342 {
 343     if (pmeTarget == TaskTarget::Cpu)
 344     {
 345         return false;
 346     }
 347
 348     if (!useGpuForNonbonded)
 349     {
 350         if (pmeTarget == TaskTarget::Gpu)
 351         {
 352             GMX_THROW(NotImplementedError
 353                           ("The PME on the GPU is only supported when nonbonded interactions run on GPUs also."));
 354         }
 355         return false;
 356     }
 357
 358     if (!canUseGpuForPme)
 359     {
 360         if (pmeTarget == TaskTarget::Gpu)
 361         {
 362             // TODO Pass in the inputrec so we can give more help here?
 363             GMX_THROW(NotImplementedError
 364                           ("The input simulation did not use PME in a way that is supported on the GPU."));
 365         }
 366         return false;
 367     }
 368
 369     if (pmeTarget == TaskTarget::Cpu)
 370     {
 371         if (!userGpuTaskAssignment.empty())
 372         {
 373             GMX_THROW(InconsistentInputError
 374                           ("A GPU task assignment was specified, but PME interactions were "
 375                           "assigned to the CPU. Make no more than one of these choices."));
 376         }
 377
 378         return false;
 379     }
 380
 381     if (!userGpuTaskAssignment.empty())
 382     {
 383         // Specifying -gputasks requires specifying everything.
 384         if (pmeTarget == TaskTarget::Auto)
 385         {
 386             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 387         }
 388
 389         return true;
 390     }
 391
 392     // We still don't know whether it is an error if no GPUs are found
 393     // because we don't know the duty of this rank, yet. For example,
 394     // a node with only PME ranks and -pme cpu is OK if there are not
 395     // GPUs.
 396
 397     if (pmeTarget == TaskTarget::Gpu)
 398     {
 399         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 400             (numPmeRanksPerSimulation > 1))
 401         {
 402             GMX_THROW(NotImplementedError
 403                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 404                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 405                           "or permit PME tasks to be assigned to the CPU."));
 406         }
 407         return true;
 408     }
 409
 410     // If we get here, then the user permitted GPUs.
 411     if (numRanksPerSimulation == 1)
 412     {
 413         // PME can run well on a single GPU shared with NB when there
 414         // is one rank, so we permit mdrun to try that if we have
 415         // detected GPUs.
 416         return gpusWereDetected;
 417     }
 418
 419     // Not enough support for PME on GPUs for anything else
 420     return false;
 421 }
 422
 423 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 424                                      const bool       useGpuForPme,
 425                                      const bool       usingVerletScheme,
 426                                      const TaskTarget bondedTarget,
 427                                      const bool       canUseGpuForBonded,
 428                                      const bool       usingLJPme,
 429                                      const bool       usingElecPmeOrEwald,
 430                                      const int        numPmeRanksPerSimulation,
 431                                      const bool       gpusWereDetected)
 432 {
 433     if (bondedTarget == TaskTarget::Cpu)
 434     {
 435         return false;
 436     }
 437
 438     if (!usingVerletScheme)
 439     {
 440         if (bondedTarget == TaskTarget::Gpu)
 441         {
 442             GMX_THROW(InconsistentInputError
 443                           ("Bonded interactions on the GPU were required, which requires using "
 444                           "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
 445         }
 446
 447         return false;
 448     }
 449
 450     if (!canUseGpuForBonded)
 451     {
 452         if (bondedTarget == TaskTarget::Gpu)
 453         {
 454             GMX_THROW(InconsistentInputError
 455                           ("Bonded interactions on the GPU were required, but not supported for these "
 456                           "simulation settings. Change your settings, or do not require using GPUs."));
 457         }
 458
 459         return false;
 460     }
 461
 462     if (!useGpuForNonbonded)
 463     {
 464         if (bondedTarget == TaskTarget::Gpu)
 465         {
 466             GMX_THROW(InconsistentInputError
 467                           ("Bonded interactions on the GPU were required, but this requires that "
 468                           "short-ranged non-bonded interactions are also run on the GPU. Change "
 469                           "your settings, or do not require using GPUs."));
 470         }
 471
 472         return false;
 473     }
 474
 475     // TODO If the bonded kernels do not get fused, then performance
 476     // overheads might suggest alternative choices here.
 477
 478     if (bondedTarget == TaskTarget::Gpu)
 479     {
 480         // We still don't know whether it is an error if no GPUs are
 481         // found.
 482         return true;
 483     }
 484
 485     // If we get here, then the user permitted GPUs, which we should
 486     // use for bonded interactions if any were detected and the CPU
 487     // is busy, for which we currently only check PME or Ewald.
 488     // (It would be better to dynamically assign bondeds based on timings)
 489     // Note that here we assume that the auto setting of PME ranks will not
 490     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 491     bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 492
 493     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 494 }
 495
 496 }  // namespace gmx