src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/hardware/cpuinfo.h"
  55 #include "gromacs/hardware/detecthardware.h"
  56 #include "gromacs/hardware/hardwaretopology.h"
  57 #include "gromacs/hardware/hw_info.h"
  58 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  59 #include "gromacs/mdlib/nb_verlet.h"
  60 #include "gromacs/mdtypes/commrec.h"
  61 #include "gromacs/mdtypes/inputrec.h"
  62 #include "gromacs/mdtypes/md_enums.h"
  63 #include "gromacs/taskassignment/taskassignment.h"
  64 #include "gromacs/topology/topology.h"
  65 #include "gromacs/utility/baseversion.h"
  66 #include "gromacs/utility/exceptions.h"
  67 #include "gromacs/utility/fatalerror.h"
  68 #include "gromacs/utility/gmxassert.h"
  69 #include "gromacs/utility/logger.h"
  70 #include "gromacs/utility/stringutil.h"
  71
  72
  73 namespace gmx
  74 {
  75
  76 namespace
  77 {
  78
  79 //! Helper variable to localise the text of an often repeated message.
  80 const char * g_specifyEverythingFormatString =
  81     "When you use mdrun -gputasks, %s must be set to non-default "
  82     "values, so that the device IDs can be interpreted correctly."
  83 #if GMX_GPU != GMX_GPU_NONE
  84     " If you simply want to restrict which GPUs are used, then it is "
  85     "better to use mdrun -gpu_id. Otherwise, setting the "
  86 #  if GMX_GPU == GMX_GPU_CUDA
  87     "CUDA_VISIBLE_DEVICES"
  88 #  elif GMX_GPU == GMX_GPU_OPENCL
  89     // Technically there is no portable way to do this offered by the
  90     // OpenCL standard, but the only current relevant case for GROMACS
  91     // is AMD OpenCL, which offers this variable.
  92     "GPU_DEVICE_ORDINAL"
  93 #  else
  94 #  error "Unreachable branch"
  95 #  endif
  96     " environment variable in your bash profile or job "
  97     "script may be more convenient."
  98 #endif
  99 ;
 100
 101 }   // namespace
 102
 103 bool
 104 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbondedTarget,
 105                                                 const std::vector<int>   &gpuIdsToUse,
 106                                                 const std::vector<int>   &userGpuTaskAssignment,
 107                                                 const EmulateGpuNonbonded emulateGpuNonbonded,
 108                                                 const bool                usingVerletScheme,
 109                                                 const bool                nonbondedOnGpuIsUseful,
 110                                                 const int                 numRanksPerSimulation)
 111 {
 112     // First, exclude all cases where we can't run NB on GPUs.
 113     if (nonbondedTarget == TaskTarget::Cpu ||
 114         emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
 115         !usingVerletScheme ||
 116         !nonbondedOnGpuIsUseful)
 117     {
 118         // If the user required NB on GPUs, we issue an error later.
 119         return false;
 120     }
 121
 122     // We now know that NB on GPUs makes sense, if we have any.
 123
 124     if (!userGpuTaskAssignment.empty())
 125     {
 126         // Specifying -gputasks requires specifying everything.
 127         if (nonbondedTarget == TaskTarget::Auto ||
 128             numRanksPerSimulation < 1)
 129         {
 130             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 131         }
 132         return true;
 133     }
 134
 135     if (nonbondedTarget == TaskTarget::Gpu)
 136     {
 137         return true;
 138     }
 139
 140     // Because this is thread-MPI, we already know about the GPUs that
 141     // all potential ranks can use, and can use that in a global
 142     // decision that will later be consistent.
 143     auto haveGpus = !gpuIdsToUse.empty();
 144
 145     // If we get here, then the user permitted or required GPUs.
 146     return haveGpus;
 147 }
 148
 149 bool
 150 decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 151                                           const TaskTarget        pmeTarget,
 152                                           const std::vector<int> &gpuIdsToUse,
 153                                           const std::vector<int> &userGpuTaskAssignment,
 154                                           const bool              canUseGpuForPme,
 155                                           const int               numRanksPerSimulation,
 156                                           const int               numPmeRanksPerSimulation)
 157 {
 158     // First, exclude all cases where we can't run PME on GPUs.
 159     if ((pmeTarget == TaskTarget::Cpu) ||
 160         !useGpuForNonbonded ||
 161         !canUseGpuForPme)
 162     {
 163         // PME can't run on a GPU. If the user required that, we issue
 164         // an error later.
 165         return false;
 166     }
 167
 168     // We now know that PME on GPUs might make sense, if we have any.
 169
 170     if (!userGpuTaskAssignment.empty())
 171     {
 172         // Follow the user's choice of GPU task assignment, if we
 173         // can. Checking that their IDs are for compatible GPUs comes
 174         // later.
 175
 176         // Specifying -gputasks requires specifying everything.
 177         if (pmeTarget == TaskTarget::Auto ||
 178             numRanksPerSimulation < 1)
 179         {
 180             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 181         }
 182
 183         // PME on GPUs is only supported in a single case
 184         if (pmeTarget == TaskTarget::Gpu)
 185         {
 186             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 187                 (numPmeRanksPerSimulation > 1))
 188             {
 189                 GMX_THROW(InconsistentInputError
 190                               ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
 191             }
 192             return true;
 193         }
 194
 195         // pmeTarget == TaskTarget::Auto
 196         return numRanksPerSimulation == 1;
 197     }
 198
 199     // Because this is thread-MPI, we already know about the GPUs that
 200     // all potential ranks can use, and can use that in a global
 201     // decision that will later be consistent.
 202
 203     if (pmeTarget == TaskTarget::Gpu)
 204     {
 205         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 206             (numPmeRanksPerSimulation > 1))
 207         {
 208             GMX_THROW(NotImplementedError
 209                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 210                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 211                           "or permit PME tasks to be assigned to the CPU."));
 212         }
 213         return true;
 214     }
 215
 216     if (numRanksPerSimulation == 1)
 217     {
 218         // PME can run well on a GPU shared with NB, and we permit
 219         // mdrun to default to try that.
 220         return !gpuIdsToUse.empty();
 221     }
 222
 223     if (numRanksPerSimulation < 1)
 224     {
 225         // Full automated mode for thread-MPI (the default). PME can
 226         // run well on a GPU shared with NB, and we permit mdrun to
 227         // default to it if there is only one GPU available.
 228         return (gpuIdsToUse.size() == 1);
 229     }
 230
 231     // Not enough support for PME on GPUs for anything else
 232     return false;
 233 }
 234
 235 bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
 236                                         const std::vector<int>    &userGpuTaskAssignment,
 237                                         const EmulateGpuNonbonded  emulateGpuNonbonded,
 238                                         const bool                 usingVerletScheme,
 239                                         const bool                 nonbondedOnGpuIsUseful,
 240                                         const bool                 gpusWereDetected)
 241 {
 242     if (nonbondedTarget == TaskTarget::Cpu)
 243     {
 244         if (!userGpuTaskAssignment.empty())
 245         {
 246             GMX_THROW(InconsistentInputError
 247                           ("A GPU task assignment was specified, but nonbonded interactions were "
 248                           "assigned to the CPU. Make no more than one of these choices."));
 249         }
 250
 251         return false;
 252     }
 253
 254     // TODO refactor all these TaskTarget::Gpu checks into one place?
 255     // e.g. use a subfunction that handles only the cases where
 256     // TaskTargets are not Cpu?
 257     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 258     {
 259         if (nonbondedTarget == TaskTarget::Gpu)
 260         {
 261             GMX_THROW(InconsistentInputError
 262                           ("Nonbonded interactions on the GPU were required, which is inconsistent "
 263                           "with choosing emulation. Make no more than one of these choices."));
 264         }
 265         if (!userGpuTaskAssignment.empty())
 266         {
 267             GMX_THROW(InconsistentInputError
 268                           ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
 269         }
 270
 271         return false;
 272     }
 273
 274     if (!usingVerletScheme)
 275     {
 276         if (nonbondedTarget == TaskTarget::Gpu)
 277         {
 278             GMX_THROW(InconsistentInputError
 279                           ("Nonbonded interactions on the GPU were required, which requires using "
 280                           "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
 281         }
 282
 283         return false;
 284     }
 285
 286     if (!nonbondedOnGpuIsUseful)
 287     {
 288         if (nonbondedTarget == TaskTarget::Gpu)
 289         {
 290             GMX_THROW(InconsistentInputError
 291                           ("Nonbonded interactions on the GPU were required, but not supported for these "
 292                           "simulation settings. Change your settings, or do not require using GPUs."));
 293         }
 294
 295         return false;
 296     }
 297
 298     if (!userGpuTaskAssignment.empty())
 299     {
 300         // Specifying -gputasks requires specifying everything.
 301         if (nonbondedTarget == TaskTarget::Auto)
 302         {
 303             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 304         }
 305
 306         return true;
 307     }
 308
 309     if (nonbondedTarget == TaskTarget::Gpu)
 310     {
 311         // We still don't know whether it is an error if no GPUs are found
 312         // because we don't know the duty of this rank, yet. For example,
 313         // a node with only PME ranks and -pme cpu is OK if there are not
 314         // GPUs.
 315         return true;
 316     }
 317
 318     // If we get here, then the user permitted GPUs, which we should
 319     // use for nonbonded interactions.
 320     return gpusWereDetected;
 321 }
 322
 323 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 324                                   const TaskTarget        pmeTarget,
 325                                   const std::vector<int> &userGpuTaskAssignment,
 326                                   const bool              canUseGpuForPme,
 327                                   const int               numRanksPerSimulation,
 328                                   const int               numPmeRanksPerSimulation,
 329                                   const bool              gpusWereDetected)
 330 {
 331     if (pmeTarget == TaskTarget::Cpu)
 332     {
 333         return false;
 334     }
 335
 336     if (!useGpuForNonbonded)
 337     {
 338         if (pmeTarget == TaskTarget::Gpu)
 339         {
 340             GMX_THROW(NotImplementedError
 341                           ("The PME on the GPU is only supported when nonbonded interactions run on GPUs also."));
 342         }
 343         return false;
 344     }
 345
 346     if (!canUseGpuForPme)
 347     {
 348         if (pmeTarget == TaskTarget::Gpu)
 349         {
 350             // TODO Pass in the inputrec so we can give more help here?
 351             GMX_THROW(NotImplementedError
 352                           ("The input simulation did not use PME in a way that is supported on the GPU."));
 353         }
 354         return false;
 355     }
 356
 357     if (pmeTarget == TaskTarget::Cpu)
 358     {
 359         if (!userGpuTaskAssignment.empty())
 360         {
 361             GMX_THROW(InconsistentInputError
 362                           ("A GPU task assignment was specified, but PME interactions were "
 363                           "assigned to the CPU. Make no more than one of these choices."));
 364         }
 365
 366         return false;
 367     }
 368
 369     if (!userGpuTaskAssignment.empty())
 370     {
 371         // Specifying -gputasks requires specifying everything.
 372         if (pmeTarget == TaskTarget::Auto)
 373         {
 374             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 375         }
 376
 377         return true;
 378     }
 379
 380     // We still don't know whether it is an error if no GPUs are found
 381     // because we don't know the duty of this rank, yet. For example,
 382     // a node with only PME ranks and -pme cpu is OK if there are not
 383     // GPUs.
 384
 385     if (pmeTarget == TaskTarget::Gpu)
 386     {
 387         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 388             (numPmeRanksPerSimulation > 1))
 389         {
 390             GMX_THROW(NotImplementedError
 391                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 392                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 393                           "or permit PME tasks to be assigned to the CPU."));
 394         }
 395         return true;
 396     }
 397
 398     // If we get here, then the user permitted GPUs.
 399     if (numRanksPerSimulation == 1)
 400     {
 401         // PME can run well on a single GPU shared with NB when there
 402         // is one rank, so we permit mdrun to try that if we have
 403         // detected GPUs.
 404         return gpusWereDetected;
 405     }
 406
 407     // Not enough support for PME on GPUs for anything else
 408     return false;
 409 }
 410
 411 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 412                                      const bool       useGpuForPme,
 413                                      const bool       usingVerletScheme,
 414                                      const TaskTarget bondedTarget,
 415                                      const bool       canUseGpuForBonded,
 416                                      const bool       usingLJPme,
 417                                      const bool       usingElecPmeOrEwald,
 418                                      const int        numPmeRanksPerSimulation,
 419                                      const bool       gpusWereDetected)
 420 {
 421     if (bondedTarget == TaskTarget::Cpu)
 422     {
 423         return false;
 424     }
 425
 426     if (!usingVerletScheme)
 427     {
 428         if (bondedTarget == TaskTarget::Gpu)
 429         {
 430             GMX_THROW(InconsistentInputError
 431                           ("Bonded interactions on the GPU were required, which requires using "
 432                           "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
 433         }
 434
 435         return false;
 436     }
 437
 438     if (!canUseGpuForBonded)
 439     {
 440         if (bondedTarget == TaskTarget::Gpu)
 441         {
 442             GMX_THROW(InconsistentInputError
 443                           ("Bonded interactions on the GPU were required, but not supported for these "
 444                           "simulation settings. Change your settings, or do not require using GPUs."));
 445         }
 446
 447         return false;
 448     }
 449
 450     if (!useGpuForNonbonded)
 451     {
 452         if (bondedTarget == TaskTarget::Gpu)
 453         {
 454             GMX_THROW(InconsistentInputError
 455                           ("Bonded interactions on the GPU were required, but this requires that "
 456                           "short-ranged non-bonded interactions are also run on the GPU. Change "
 457                           "your settings, or do not require using GPUs."));
 458         }
 459
 460         return false;
 461     }
 462
 463     // TODO If the bonded kernels do not get fused, then performance
 464     // overheads might suggest alternative choices here.
 465
 466     if (bondedTarget == TaskTarget::Gpu)
 467     {
 468         // We still don't know whether it is an error if no GPUs are
 469         // found.
 470         return true;
 471     }
 472
 473     // If we get here, then the user permitted GPUs, which we should
 474     // use for bonded interactions if any were detected and the CPU
 475     // is busy, for which we currently only check PME or Ewald.
 476     // (It would be better to dynamically assign bondeds based on timings)
 477     // Note that here we assume that the auto setting of PME ranks will not
 478     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 479     bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 480
 481     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 482 }
 483
 484 }  // namespace gmx