src/gromacs/taskassignment/decidegpuusage.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief Defines functionality for deciding whether tasks will run on GPUs.
  37  *
  38  * \author Mark Abraham <mark.j.abraham@gmail.com>
  39  * \ingroup module_taskassignment
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "decidegpuusage.h"
  45
  46 #include "config.h"
  47
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <string>
  53
  54 #include "gromacs/ewald/pme.h"
  55 #include "gromacs/hardware/cpuinfo.h"
  56 #include "gromacs/hardware/detecthardware.h"
  57 #include "gromacs/hardware/hardwaretopology.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdtypes/commrec.h"
  61 #include "gromacs/mdtypes/inputrec.h"
  62 #include "gromacs/mdtypes/md_enums.h"
  63 #include "gromacs/nbnxm/nbnxm.h"
  64 #include "gromacs/taskassignment/taskassignment.h"
  65 #include "gromacs/topology/topology.h"
  66 #include "gromacs/utility/baseversion.h"
  67 #include "gromacs/utility/exceptions.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/gmxassert.h"
  70 #include "gromacs/utility/logger.h"
  71 #include "gromacs/utility/stringutil.h"
  72
  73
  74 namespace gmx
  75 {
  76
  77 namespace
  78 {
  79
  80 //! Helper variable to localise the text of an often repeated message.
  81 const char * g_specifyEverythingFormatString =
  82     "When you use mdrun -gputasks, %s must be set to non-default "
  83     "values, so that the device IDs can be interpreted correctly."
  84 #if GMX_GPU != GMX_GPU_NONE
  85     " If you simply want to restrict which GPUs are used, then it is "
  86     "better to use mdrun -gpu_id. Otherwise, setting the "
  87 #  if GMX_GPU == GMX_GPU_CUDA
  88     "CUDA_VISIBLE_DEVICES"
  89 #  elif GMX_GPU == GMX_GPU_OPENCL
  90     // Technically there is no portable way to do this offered by the
  91     // OpenCL standard, but the only current relevant case for GROMACS
  92     // is AMD OpenCL, which offers this variable.
  93     "GPU_DEVICE_ORDINAL"
  94 #  else
  95 #  error "Unreachable branch"
  96 #  endif
  97     " environment variable in your bash profile or job "
  98     "script may be more convenient."
  99 #endif
 100 ;
 101
 102 }   // namespace
 103
 104 bool
 105 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbondedTarget,
 106                                                 const std::vector<int>   &gpuIdsToUse,
 107                                                 const std::vector<int>   &userGpuTaskAssignment,
 108                                                 const EmulateGpuNonbonded emulateGpuNonbonded,
 109                                                 const bool                buildSupportsNonbondedOnGpu,
 110                                                 const bool                usingVerletScheme,
 111                                                 const bool                nonbondedOnGpuIsUseful,
 112                                                 const int                 numRanksPerSimulation)
 113 {
 114     // First, exclude all cases where we can't run NB on GPUs.
 115     if (nonbondedTarget == TaskTarget::Cpu ||
 116         emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
 117         !usingVerletScheme ||
 118         !nonbondedOnGpuIsUseful ||
 119         !buildSupportsNonbondedOnGpu)
 120     {
 121         // If the user required NB on GPUs, we issue an error later.
 122         return false;
 123     }
 124
 125     // We now know that NB on GPUs makes sense, if we have any.
 126
 127     if (!userGpuTaskAssignment.empty())
 128     {
 129         // Specifying -gputasks requires specifying everything.
 130         if (nonbondedTarget == TaskTarget::Auto ||
 131             numRanksPerSimulation < 1)
 132         {
 133             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 134         }
 135         return true;
 136     }
 137
 138     if (nonbondedTarget == TaskTarget::Gpu)
 139     {
 140         return true;
 141     }
 142
 143     // Because this is thread-MPI, we already know about the GPUs that
 144     // all potential ranks can use, and can use that in a global
 145     // decision that will later be consistent.
 146     auto haveGpus = !gpuIdsToUse.empty();
 147
 148     // If we get here, then the user permitted or required GPUs.
 149     return haveGpus;
 150 }
 151
 152 bool
 153 decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
 154                                           const TaskTarget        pmeTarget,
 155                                           const std::vector<int> &gpuIdsToUse,
 156                                           const std::vector<int> &userGpuTaskAssignment,
 157                                           const gmx_hw_info_t    &hardwareInfo,
 158                                           const t_inputrec       &inputrec,
 159                                           const gmx_mtop_t       &mtop,
 160                                           const int               numRanksPerSimulation,
 161                                           const int               numPmeRanksPerSimulation)
 162 {
 163     // First, exclude all cases where we can't run PME on GPUs.
 164     if ((pmeTarget == TaskTarget::Cpu) ||
 165         !useGpuForNonbonded ||
 166         !pme_gpu_supports_build(nullptr) ||
 167         !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
 168         !pme_gpu_supports_input(inputrec, mtop, nullptr))
 169     {
 170         // PME can't run on a GPU. If the user required that, we issue
 171         // an error later.
 172         return false;
 173     }
 174
 175     // We now know that PME on GPUs might make sense, if we have any.
 176
 177     if (!userGpuTaskAssignment.empty())
 178     {
 179         // Follow the user's choice of GPU task assignment, if we
 180         // can. Checking that their IDs are for compatible GPUs comes
 181         // later.
 182
 183         // Specifying -gputasks requires specifying everything.
 184         if (pmeTarget == TaskTarget::Auto ||
 185             numRanksPerSimulation < 1)
 186         {
 187             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
 188         }
 189
 190         // PME on GPUs is only supported in a single case
 191         if (pmeTarget == TaskTarget::Gpu)
 192         {
 193             if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 194                 (numPmeRanksPerSimulation > 1))
 195             {
 196                 GMX_THROW(InconsistentInputError
 197                               ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
 198             }
 199             return true;
 200         }
 201
 202         // pmeTarget == TaskTarget::Auto
 203         return numRanksPerSimulation == 1;
 204     }
 205
 206     // Because this is thread-MPI, we already know about the GPUs that
 207     // all potential ranks can use, and can use that in a global
 208     // decision that will later be consistent.
 209
 210     if (pmeTarget == TaskTarget::Gpu)
 211     {
 212         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 213             (numPmeRanksPerSimulation > 1))
 214         {
 215             GMX_THROW(NotImplementedError
 216                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 217                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 218                           "or permit PME tasks to be assigned to the CPU."));
 219         }
 220         return true;
 221     }
 222
 223     if (numRanksPerSimulation == 1)
 224     {
 225         // PME can run well on a GPU shared with NB, and we permit
 226         // mdrun to default to try that.
 227         return !gpuIdsToUse.empty();
 228     }
 229
 230     if (numRanksPerSimulation < 1)
 231     {
 232         // Full automated mode for thread-MPI (the default). PME can
 233         // run well on a GPU shared with NB, and we permit mdrun to
 234         // default to it if there is only one GPU available.
 235         return (gpuIdsToUse.size() == 1);
 236     }
 237
 238     // Not enough support for PME on GPUs for anything else
 239     return false;
 240 }
 241
 242 bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
 243                                         const std::vector<int>    &userGpuTaskAssignment,
 244                                         const EmulateGpuNonbonded  emulateGpuNonbonded,
 245                                         const bool                 buildSupportsNonbondedOnGpu,
 246                                         const bool                 usingVerletScheme,
 247                                         const bool                 nonbondedOnGpuIsUseful,
 248                                         const bool                 gpusWereDetected)
 249 {
 250     if (nonbondedTarget == TaskTarget::Cpu)
 251     {
 252         if (!userGpuTaskAssignment.empty())
 253         {
 254             GMX_THROW(InconsistentInputError
 255                           ("A GPU task assignment was specified, but nonbonded interactions were "
 256                           "assigned to the CPU. Make no more than one of these choices."));
 257         }
 258
 259         return false;
 260     }
 261
 262     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
 263     {
 264         GMX_THROW(InconsistentInputError
 265                       ("Nonbonded interactions on the GPU were requested with -nb gpu, "
 266                       "but the GROMACS binary has been built without GPU support. "
 267                       "Either run without selecting GPU options, or recompile GROMACS "
 268                       "with GPU support enabled"));
 269     }
 270
 271     // TODO refactor all these TaskTarget::Gpu checks into one place?
 272     // e.g. use a subfunction that handles only the cases where
 273     // TaskTargets are not Cpu?
 274     if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
 275     {
 276         if (nonbondedTarget == TaskTarget::Gpu)
 277         {
 278             GMX_THROW(InconsistentInputError
 279                           ("Nonbonded interactions on the GPU were required, which is inconsistent "
 280                           "with choosing emulation. Make no more than one of these choices."));
 281         }
 282         if (!userGpuTaskAssignment.empty())
 283         {
 284             GMX_THROW(InconsistentInputError
 285                           ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
 286         }
 287
 288         return false;
 289     }
 290
 291     if (!usingVerletScheme)
 292     {
 293         if (nonbondedTarget == TaskTarget::Gpu)
 294         {
 295             GMX_THROW(InconsistentInputError
 296                           ("Nonbonded interactions on the GPU were required, which requires using "
 297                           "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
 298         }
 299
 300         return false;
 301     }
 302
 303     if (!nonbondedOnGpuIsUseful)
 304     {
 305         if (nonbondedTarget == TaskTarget::Gpu)
 306         {
 307             GMX_THROW(InconsistentInputError
 308                           ("Nonbonded interactions on the GPU were required, but not supported for these "
 309                           "simulation settings. Change your settings, or do not require using GPUs."));
 310         }
 311
 312         return false;
 313     }
 314
 315     if (!userGpuTaskAssignment.empty())
 316     {
 317         // Specifying -gputasks requires specifying everything.
 318         if (nonbondedTarget == TaskTarget::Auto)
 319         {
 320             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
 321         }
 322
 323         return true;
 324     }
 325
 326     if (nonbondedTarget == TaskTarget::Gpu)
 327     {
 328         // We still don't know whether it is an error if no GPUs are found
 329         // because we don't know the duty of this rank, yet. For example,
 330         // a node with only PME ranks and -pme cpu is OK if there are not
 331         // GPUs.
 332         return true;
 333     }
 334
 335     // If we get here, then the user permitted GPUs, which we should
 336     // use for nonbonded interactions.
 337     return gpusWereDetected;
 338 }
 339
 340 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 341                                   const TaskTarget        pmeTarget,
 342                                   const std::vector<int> &userGpuTaskAssignment,
 343                                   const gmx_hw_info_t    &hardwareInfo,
 344                                   const t_inputrec       &inputrec,
 345                                   const gmx_mtop_t       &mtop,
 346                                   const int               numRanksPerSimulation,
 347                                   const int               numPmeRanksPerSimulation,
 348                                   const bool              gpusWereDetected)
 349 {
 350     if (pmeTarget == TaskTarget::Cpu)
 351     {
 352         return false;
 353     }
 354
 355     if (!useGpuForNonbonded)
 356     {
 357         if (pmeTarget == TaskTarget::Gpu)
 358         {
 359             GMX_THROW(NotImplementedError
 360                           ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
 361         }
 362         return false;
 363     }
 364
 365     std::string message;
 366     if (!pme_gpu_supports_build(&message))
 367     {
 368         if (pmeTarget == TaskTarget::Gpu)
 369         {
 370             GMX_THROW(NotImplementedError
 371                           ("Cannot compute PME interactions on a GPU, because " + message));
 372         }
 373         return false;
 374     }
 375     if (!pme_gpu_supports_hardware(hardwareInfo, &message))
 376     {
 377         if (pmeTarget == TaskTarget::Gpu)
 378         {
 379             GMX_THROW(NotImplementedError
 380                           ("Cannot compute PME interactions on a GPU, because " + message));
 381         }
 382         return false;
 383     }
 384     if (!pme_gpu_supports_input(inputrec, mtop, &message))
 385     {
 386         if (pmeTarget == TaskTarget::Gpu)
 387         {
 388             GMX_THROW(NotImplementedError
 389                           ("Cannot compute PME interactions on a GPU, because " + message));
 390         }
 391         return false;
 392     }
 393
 394     if (pmeTarget == TaskTarget::Cpu)
 395     {
 396         if (!userGpuTaskAssignment.empty())
 397         {
 398             GMX_THROW(InconsistentInputError
 399                           ("A GPU task assignment was specified, but PME interactions were "
 400                           "assigned to the CPU. Make no more than one of these choices."));
 401         }
 402
 403         return false;
 404     }
 405
 406     if (!userGpuTaskAssignment.empty())
 407     {
 408         // Specifying -gputasks requires specifying everything.
 409         if (pmeTarget == TaskTarget::Auto)
 410         {
 411             GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
 412         }
 413
 414         return true;
 415     }
 416
 417     // We still don't know whether it is an error if no GPUs are found
 418     // because we don't know the duty of this rank, yet. For example,
 419     // a node with only PME ranks and -pme cpu is OK if there are not
 420     // GPUs.
 421
 422     if (pmeTarget == TaskTarget::Gpu)
 423     {
 424         if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
 425             (numPmeRanksPerSimulation > 1))
 426         {
 427             GMX_THROW(NotImplementedError
 428                           ("PME tasks were required to run on GPUs, but that is not implemented with "
 429                           "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
 430                           "or permit PME tasks to be assigned to the CPU."));
 431         }
 432         return true;
 433     }
 434
 435     // If we get here, then the user permitted GPUs.
 436     if (numRanksPerSimulation == 1)
 437     {
 438         // PME can run well on a single GPU shared with NB when there
 439         // is one rank, so we permit mdrun to try that if we have
 440         // detected GPUs.
 441         return gpusWereDetected;
 442     }
 443
 444     // Not enough support for PME on GPUs for anything else
 445     return false;
 446 }
 447
 448 bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
 449                                      const bool       useGpuForPme,
 450                                      const bool       usingVerletScheme,
 451                                      const TaskTarget bondedTarget,
 452                                      const bool       canUseGpuForBonded,
 453                                      const bool       usingLJPme,
 454                                      const bool       usingElecPmeOrEwald,
 455                                      const int        numPmeRanksPerSimulation,
 456                                      const bool       gpusWereDetected)
 457 {
 458     if (bondedTarget == TaskTarget::Cpu)
 459     {
 460         return false;
 461     }
 462
 463     if (!usingVerletScheme)
 464     {
 465         if (bondedTarget == TaskTarget::Gpu)
 466         {
 467             GMX_THROW(InconsistentInputError
 468                           ("Bonded interactions on the GPU were required, which requires using "
 469                           "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
 470         }
 471
 472         return false;
 473     }
 474
 475     if (!canUseGpuForBonded)
 476     {
 477         if (bondedTarget == TaskTarget::Gpu)
 478         {
 479             GMX_THROW(InconsistentInputError
 480                           ("Bonded interactions on the GPU were required, but not supported for these "
 481                           "simulation settings. Change your settings, or do not require using GPUs."));
 482         }
 483
 484         return false;
 485     }
 486
 487     if (!useGpuForNonbonded)
 488     {
 489         if (bondedTarget == TaskTarget::Gpu)
 490         {
 491             GMX_THROW(InconsistentInputError
 492                           ("Bonded interactions on the GPU were required, but this requires that "
 493                           "short-ranged non-bonded interactions are also run on the GPU. Change "
 494                           "your settings, or do not require using GPUs."));
 495         }
 496
 497         return false;
 498     }
 499
 500     // TODO If the bonded kernels do not get fused, then performance
 501     // overheads might suggest alternative choices here.
 502
 503     if (bondedTarget == TaskTarget::Gpu)
 504     {
 505         // We still don't know whether it is an error if no GPUs are
 506         // found.
 507         return true;
 508     }
 509
 510     // If we get here, then the user permitted GPUs, which we should
 511     // use for bonded interactions if any were detected and the CPU
 512     // is busy, for which we currently only check PME or Ewald.
 513     // (It would be better to dynamically assign bondeds based on timings)
 514     // Note that here we assume that the auto setting of PME ranks will not
 515     // choose seperate PME ranks when nonBonded are assigned to the GPU.
 516     bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 517
 518     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 519 }
 520
 521 }  // namespace gmx