src/gromacs/gmxlib/gpu_utils/gpu_utils.cu

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "gromacs/legacyheaders/gpu_utils.h"
  39
  40 #include <assert.h>
  41 #include <stdio.h>
  42 #include <stdlib.h>
  43
  44 #include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
  45 #include "gromacs/legacyheaders/types/hw_info.h"
  46 #include "gromacs/utility/cstringutil.h"
  47 #include "gromacs/utility/smalloc.h"
  48
  49 /*! \brief
  50  * Max number of devices supported by CUDA (for consistency checking).
  51  *
  52  * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
  53  */
  54 static int cuda_max_device_count = 32;
  55
  56 /** Dummy kernel used for sanity checking. */
  57 __global__ void k_dummy_test()
  58 {
  59 }
  60
  61
  62 /*!
  63  * \brief Runs GPU sanity checks.
  64  *
  65  * Runs a series of checks to determine that the given GPU and underlying CUDA
  66  * driver/runtime functions properly.
  67  * Returns properties of a device with given ID or the one that has
  68  * already been initialized earlier in the case if of \dev_id == -1.
  69  *
  70  * \param[in]  dev_id      the device ID of the GPU or -1 if the device has already been initialized
  71  * \param[out] dev_prop    pointer to the structure in which the device properties will be returned
  72  * \returns                0 if the device looks OK
  73  *
  74  * TODO: introduce errors codes and handle errors more smoothly.
  75  */
  76 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
  77 {
  78     cudaError_t cu_err;
  79     int         dev_count, id;
  80
  81     cu_err = cudaGetDeviceCount(&dev_count);
  82     if (cu_err != cudaSuccess)
  83     {
  84         fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
  85                 cudaGetErrorString(cu_err));
  86         return -1;
  87     }
  88
  89     /* no CUDA compatible device at all */
  90     if (dev_count == 0)
  91     {
  92         return -1;
  93     }
  94
  95     /* things might go horribly wrong if cudart is not compatible with the driver */
  96     if (dev_count < 0 || dev_count > cuda_max_device_count)
  97     {
  98         return -1;
  99     }
 100
 101     if (dev_id == -1) /* device already selected let's not destroy the context */
 102     {
 103         cu_err = cudaGetDevice(&id);
 104         if (cu_err != cudaSuccess)
 105         {
 106             fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
 107                     cudaGetErrorString(cu_err));
 108             return -1;
 109         }
 110     }
 111     else
 112     {
 113         id = dev_id;
 114         if (id > dev_count - 1) /* pfff there's no such device */
 115         {
 116             fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
 117                     dev_id, dev_count);
 118             return -1;
 119         }
 120     }
 121
 122     memset(dev_prop, 0, sizeof(cudaDeviceProp));
 123     cu_err = cudaGetDeviceProperties(dev_prop, id);
 124     if (cu_err != cudaSuccess)
 125     {
 126         fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
 127                 cudaGetErrorString(cu_err));
 128         return -1;
 129     }
 130
 131     /* both major & minor is 9999 if no CUDA capable devices are present */
 132     if (dev_prop->major == 9999 && dev_prop->minor == 9999)
 133     {
 134         return -1;
 135     }
 136     /* we don't care about emulation mode */
 137     if (dev_prop->major == 0)
 138     {
 139         return -1;
 140     }
 141
 142     if (id != -1)
 143     {
 144         cu_err = cudaSetDevice(id);
 145         if (cu_err != cudaSuccess)
 146         {
 147             fprintf(stderr, "Error %d while switching to device #%d: %s\n",
 148                     cu_err, id, cudaGetErrorString(cu_err));
 149             return -1;
 150         }
 151     }
 152
 153     /* try to execute a dummy kernel */
 154     k_dummy_test<<< 1, 512>>> ();
 155     if (cudaThreadSynchronize() != cudaSuccess)
 156     {
 157         return -1;
 158     }
 159
 160     /* destroy context if we created one */
 161     if (id != -1)
 162     {
 163 #if CUDA_VERSION < 4000
 164         cu_err = cudaThreadExit();
 165         CU_RET_ERR(cu_err, "cudaThreadExit failed");
 166 #else
 167         cu_err = cudaDeviceReset();
 168         CU_RET_ERR(cu_err, "cudaDeviceReset failed");
 169 #endif
 170     }
 171
 172     return 0;
 173 }
 174
 175 /*! \brief Initializes the GPU with the given index.
 176  *
 177  * The varible \mygpu is the index of the GPU to initialize in the
 178  * gpu_info.cuda_dev array.
 179  *
 180  * \param[in]  mygpu        index of the GPU to initialize
 181  * \param[out] result_str   the message related to the error that occurred
 182  *                          during the initialization (if there was any).
 183  * \param[in] gpu_info      GPU info of all detected devices in the system.
 184  * \param[in] gpu_opt       options for using the GPUs in gpu_info
 185  * \returns                 true if no error occurs during initialization.
 186  */
 187 gmx_bool init_gpu(int mygpu, char *result_str,
 188                   const gmx_gpu_info_t *gpu_info,
 189                   const gmx_gpu_opt_t *gpu_opt)
 190 {
 191     cudaError_t stat;
 192     char        sbuf[STRLEN];
 193     int         gpuid;
 194
 195     assert(gpu_info);
 196     assert(result_str);
 197
 198     if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
 199     {
 200         sprintf(sbuf, "Trying to initialize an inexistent GPU: "
 201                 "there are %d %s-selected GPU(s), but #%d was requested.",
 202                 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
 203         gmx_incons(sbuf);
 204     }
 205
 206     gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
 207
 208     stat = cudaSetDevice(gpuid);
 209     strncpy(result_str, cudaGetErrorString(stat), STRLEN);
 210
 211     if (debug)
 212     {
 213         fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
 214     }
 215
 216     return (stat == cudaSuccess);
 217 }
 218
 219 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
 220  *
 221  * The context is explicitly destroyed and therefore all data uploaded to the GPU
 222  * is lost. This should only be called when none of this data is required anymore.
 223  *
 224  * \param[out] result_str   the message related to the error that occurred
 225  *                          during the initialization (if there was any).
 226  * \returns                 true if no error occurs during the freeing.
 227  */
 228 gmx_bool free_gpu(char *result_str)
 229 {
 230     cudaError_t stat;
 231
 232     assert(result_str);
 233
 234     if (debug)
 235     {
 236         int gpuid;
 237         stat = cudaGetDevice(&gpuid);
 238         CU_RET_ERR(stat, "cudaGetDevice failed");
 239         fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
 240     }
 241
 242 #if CUDA_VERSION < 4000
 243     stat = cudaThreadExit();
 244 #else
 245     stat = cudaDeviceReset();
 246 #endif
 247     strncpy(result_str, cudaGetErrorString(stat), STRLEN);
 248
 249     return (stat == cudaSuccess);
 250 }
 251
 252 /*! \brief Returns true if the gpu characterized by the device properties is
 253  *  supported by the native gpu acceleration.
 254  *
 255  * \param[in] dev_prop  the CUDA device properties of the gpus to test.
 256  * \returns             true if the GPU properties passed indicate a compatible
 257  *                      GPU, otherwise false.
 258  */
 259 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
 260 {
 261     return (dev_prop->major >= 2);
 262 }
 263
 264 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
 265  *
 266  * \param[in] stat  GPU status.
 267  * \returns         true if the provided status is egpuCompatible, otherwise false.
 268  */
 269 static bool is_compatible_gpu(int stat)
 270 {
 271     return (stat == egpuCompatible);
 272 }
 273
 274 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
 275  *
 276  *  Returns a status value which indicates compatibility or one of the following
 277  *  errors: incompatibility, insistence, or insanity (=unexpected behavior).
 278  *  It also returns the respective device's properties in \dev_prop (if applicable).
 279  *
 280  *  \param[in]  dev_id   the ID of the GPU to check.
 281  *  \param[out] dev_prop the CUDA device properties of the device checked.
 282  *  \returns             the status of the requested device
 283  */
 284 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
 285 {
 286     cudaError_t stat;
 287     int         ndev;
 288
 289     stat = cudaGetDeviceCount(&ndev);
 290     if (stat != cudaSuccess)
 291     {
 292         return egpuInsane;
 293     }
 294
 295     if (dev_id > ndev - 1)
 296     {
 297         return egpuNonexistent;
 298     }
 299
 300     /* TODO: currently we do not make a distinction between the type of errors
 301      * that can appear during sanity checks. This needs to be improved, e.g if
 302      * the dummy test kernel fails to execute with a "device busy message" we
 303      * should appropriately report that the device is busy instead of insane.
 304      */
 305     if (do_sanity_checks(dev_id, dev_prop) == 0)
 306     {
 307         if (is_gmx_supported_gpu(dev_prop))
 308         {
 309             return egpuCompatible;
 310         }
 311         else
 312         {
 313             return egpuIncompatible;
 314         }
 315     }
 316     else
 317     {
 318         return egpuInsane;
 319     }
 320 }
 321
 322
 323 /*! \brief Detect all NVIDIA GPUs in the system.
 324  *
 325  *  Will detect every NVIDIA GPU supported by the device driver in use. Also
 326  *  check for the compatibility of each and fill the gpu_info->cuda_dev array
 327  *  with the required information on each the device: ID, device properties,
 328  *  status.
 329  *
 330  *  \param[in] gpu_info    pointer to structure holding GPU information.
 331  *  \param[out] err_str    The error message of any CUDA API error that caused
 332  *                         the detection to fail (if there was any). The memory
 333  *                         the pointer points to should be managed externally.
 334  *  \returns               non-zero if the detection encountered a failure, zero otherwise.
 335  */
 336 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
 337 {
 338     int              i, ndev, checkres, retval;
 339     cudaError_t      stat;
 340     cudaDeviceProp   prop;
 341     cuda_dev_info_t *devs;
 342
 343     assert(gpu_info);
 344     assert(err_str);
 345
 346     gpu_info->ncuda_dev_compatible = 0;
 347
 348     ndev    = 0;
 349     devs    = NULL;
 350
 351     stat = cudaGetDeviceCount(&ndev);
 352     if (stat != cudaSuccess)
 353     {
 354         const char *s;
 355
 356         /* cudaGetDeviceCount failed which means that there is something
 357          * wrong with the machine: driver-runtime mismatch, all GPUs being
 358          * busy in exclusive mode, or some other condition which should
 359          * result in us issuing a warning a falling back to CPUs. */
 360         retval = -1;
 361         s      = cudaGetErrorString(stat);
 362         strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
 363     }
 364     else
 365     {
 366         snew(devs, ndev);
 367         for (i = 0; i < ndev; i++)
 368         {
 369             checkres = is_gmx_supported_gpu_id(i, &prop);
 370
 371             devs[i].id   = i;
 372             devs[i].prop = prop;
 373             devs[i].stat = checkres;
 374
 375             if (checkres == egpuCompatible)
 376             {
 377                 gpu_info->ncuda_dev_compatible++;
 378             }
 379         }
 380         retval = 0;
 381     }
 382
 383     gpu_info->ncuda_dev = ndev;
 384     gpu_info->cuda_dev  = devs;
 385
 386     return retval;
 387 }
 388
 389 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
 390  *
 391  * This function selects the compatible gpus and initializes
 392  * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
 393  *
 394  * Given the list of GPUs available in the system check each device in
 395  * gpu_info->cuda_dev and place the indices of the compatible GPUs into
 396  * cuda_dev_use with this marking the respective GPUs as "available for use."
 397  * Note that \detect_cuda_gpus must have been called before.
 398  *
 399  * \param[in]     gpu_info    pointer to structure holding GPU information
 400  * \param[in,out] gpu_opt     pointer to structure holding GPU options
 401  */
 402 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
 403                           gmx_gpu_opt_t        *gpu_opt)
 404 {
 405     int  i, ncompat;
 406     int *compat;
 407
 408     assert(gpu_info);
 409     /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
 410     assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
 411
 412     snew(compat, gpu_info->ncuda_dev);
 413     ncompat = 0;
 414     for (i = 0; i < gpu_info->ncuda_dev; i++)
 415     {
 416         if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
 417         {
 418             ncompat++;
 419             compat[ncompat - 1] = i;
 420         }
 421     }
 422
 423     gpu_opt->ncuda_dev_use = ncompat;
 424     snew(gpu_opt->cuda_dev_use, ncompat);
 425     memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
 426     sfree(compat);
 427 }
 428
 429 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
 430  *
 431  * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
 432  * gpu_opt->cuda_dev_use check the existence and compatibility
 433  * of the respective GPUs. Also provide the caller with an array containing
 434  * the result of checks in \checkres.
 435  *
 436  * \param[out]  checkres    check result for each ID passed in \requested_devs
 437  * \param[in]   gpu_info    pointer to structure holding GPU information
 438  * \param[out]  gpu_opt     pointer to structure holding GPU options
 439  * \returns                 TRUE if every the requested GPUs are compatible
 440  */
 441 gmx_bool check_selected_cuda_gpus(int                  *checkres,
 442                                   const gmx_gpu_info_t *gpu_info,
 443                                   gmx_gpu_opt_t        *gpu_opt)
 444 {
 445     int  i, id;
 446     bool bAllOk;
 447
 448     assert(checkres);
 449     assert(gpu_info);
 450     assert(gpu_opt->ncuda_dev_use >= 0);
 451
 452     if (gpu_opt->ncuda_dev_use == 0)
 453     {
 454         return TRUE;
 455     }
 456
 457     assert(gpu_opt->cuda_dev_use);
 458
 459     /* we will assume that all GPUs requested are valid IDs,
 460        otherwise we'll bail anyways */
 461
 462     bAllOk = true;
 463     for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
 464     {
 465         id = gpu_opt->cuda_dev_use[i];
 466
 467         /* devices are stored in increasing order of IDs in cuda_dev */
 468         gpu_opt->cuda_dev_use[i] = id;
 469
 470         checkres[i] = (id >= gpu_info->ncuda_dev) ?
 471             egpuNonexistent : gpu_info->cuda_dev[id].stat;
 472
 473         bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
 474     }
 475
 476     return bAllOk;
 477 }
 478
 479 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
 480  *
 481  * \param[in]    gpu_info    pointer to structure holding GPU information
 482  */
 483 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
 484 {
 485     if (gpu_info == NULL)
 486     {
 487         return;
 488     }
 489
 490     sfree(gpu_info->cuda_dev);
 491 }
 492
 493 /*! \brief Formats and returns a device information string for a given GPU.
 494  *
 495  * Given an index *directly* into the array of available GPUs (cuda_dev)
 496  * returns a formatted info string for the respective GPU which includes
 497  * ID, name, compute capability, and detection status.
 498  *
 499  * \param[out]  s           pointer to output string (has to be allocated externally)
 500  * \param[in]   gpu_info    pointer to structure holding GPU information
 501  * \param[in]   index       an index *directly* into the array of available GPUs
 502  */
 503 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
 504 {
 505     assert(s);
 506     assert(gpu_info);
 507
 508     if (index < 0 && index >= gpu_info->ncuda_dev)
 509     {
 510         return;
 511     }
 512
 513     cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
 514
 515     bool             bGpuExists =
 516         dinfo->stat == egpuCompatible ||
 517         dinfo->stat == egpuIncompatible;
 518
 519     if (!bGpuExists)
 520     {
 521         sprintf(s, "#%d: %s, stat: %s",
 522                 dinfo->id, "N/A",
 523                 gpu_detect_res_str[dinfo->stat]);
 524     }
 525     else
 526     {
 527         sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
 528                 dinfo->id, dinfo->prop.name,
 529                 dinfo->prop.major, dinfo->prop.minor,
 530                 dinfo->prop.ECCEnabled ? "yes" : " no",
 531                 gpu_detect_res_str[dinfo->stat]);
 532     }
 533 }
 534
 535 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
 536  *
 537  * Getter function which, given an index into the array of GPUs in use
 538  * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
 539  * respective CUDA GPU.
 540  *
 541  * \param[in]    gpu_info   pointer to structure holding GPU information
 542  * \param[in]    gpu_opt    pointer to structure holding GPU options
 543  * \param[in]    idx        index into the array of used GPUs
 544  * \returns                 device ID of the requested GPU
 545  */
 546 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
 547                       const gmx_gpu_opt_t  *gpu_opt,
 548                       int                   idx)
 549 {
 550     assert(gpu_info);
 551     assert(gpu_opt);
 552     assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
 553
 554     return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
 555 }
 556
 557 /*! \brief Returns the device ID of the GPU currently in use.
 558  *
 559  * The GPU used is the one that is active at the time of the call in the active context.
 560  *
 561  * \param[in]    gpu_info   pointer to structure holding GPU information
 562  * \returns                 device ID of the GPU in use at the time of the call
 563  */
 564 int get_current_gpu_device_id(void)
 565 {
 566     int gpuid;
 567     CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
 568
 569     return gpuid;
 570 }
 571
 572 /*! \brief Returns the size of the cuda_dev_info struct.
 573  *
 574  * The size of cuda_dev_info can be used for allocation and communication.
 575  *
 576  * \returns                 size in bytes of cuda_dev_info
 577  */
 578 size_t sizeof_cuda_dev_info(void)
 579 {
 580     return sizeof(cuda_dev_info);
 581 }