src/gromacs/gmxlib/gpu_utils/gpu_utils.cu

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <assert.h>
  41
  42 #include "gromacs/legacyheaders/types/hw_info.h"
  43
  44 #include "gromacs/legacyheaders/gpu_utils.h"
  45 #include "../cuda_tools/cudautils.cuh"
  46
  47 #include "gromacs/utility/cstringutil.h"
  48 #include "gromacs/utility/smalloc.h"
  49
  50 /*! \brief
  51  * Max number of devices supported by CUDA (for consistency checking).
  52  *
  53  * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
  54  */
  55 static int cuda_max_device_count = 32;
  56
  57 /** Dummy kernel used for sanity checking. */
  58 __global__ void k_dummy_test()
  59 {
  60 }
  61
  62
  63 /*!
  64  * \brief Runs GPU sanity checks.
  65  *
  66  * Runs a series of checks to determine that the given GPU and underlying CUDA
  67  * driver/runtime functions properly.
  68  * Returns properties of a device with given ID or the one that has
  69  * already been initialized earlier in the case if of \dev_id == -1.
  70  *
  71  * \param[in]  dev_id      the device ID of the GPU or -1 if the device has already been initialized
  72  * \param[out] dev_prop    pointer to the structure in which the device properties will be returned
  73  * \returns                0 if the device looks OK
  74  *
  75  * TODO: introduce errors codes and handle errors more smoothly.
  76  */
  77 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
  78 {
  79     cudaError_t cu_err;
  80     int         dev_count, id;
  81
  82     cu_err = cudaGetDeviceCount(&dev_count);
  83     if (cu_err != cudaSuccess)
  84     {
  85         fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
  86                 cudaGetErrorString(cu_err));
  87         return -1;
  88     }
  89
  90     /* no CUDA compatible device at all */
  91     if (dev_count == 0)
  92     {
  93         return -1;
  94     }
  95
  96     /* things might go horribly wrong if cudart is not compatible with the driver */
  97     if (dev_count < 0 || dev_count > cuda_max_device_count)
  98     {
  99         return -1;
 100     }
 101
 102     if (dev_id == -1) /* device already selected let's not destroy the context */
 103     {
 104         cu_err = cudaGetDevice(&id);
 105         if (cu_err != cudaSuccess)
 106         {
 107             fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
 108                     cudaGetErrorString(cu_err));
 109             return -1;
 110         }
 111     }
 112     else
 113     {
 114         id = dev_id;
 115         if (id > dev_count - 1) /* pfff there's no such device */
 116         {
 117             fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
 118                     dev_id, dev_count);
 119             return -1;
 120         }
 121     }
 122
 123     memset(dev_prop, 0, sizeof(cudaDeviceProp));
 124     cu_err = cudaGetDeviceProperties(dev_prop, id);
 125     if (cu_err != cudaSuccess)
 126     {
 127         fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
 128                 cudaGetErrorString(cu_err));
 129         return -1;
 130     }
 131
 132     /* both major & minor is 9999 if no CUDA capable devices are present */
 133     if (dev_prop->major == 9999 && dev_prop->minor == 9999)
 134     {
 135         return -1;
 136     }
 137     /* we don't care about emulation mode */
 138     if (dev_prop->major == 0)
 139     {
 140         return -1;
 141     }
 142
 143     if (id != -1)
 144     {
 145         cu_err = cudaSetDevice(id);
 146         if (cu_err != cudaSuccess)
 147         {
 148             fprintf(stderr, "Error %d while switching to device #%d: %s\n",
 149                     cu_err, id, cudaGetErrorString(cu_err));
 150             return -1;
 151         }
 152     }
 153
 154     /* try to execute a dummy kernel */
 155     k_dummy_test<<< 1, 512>>> ();
 156     if (cudaThreadSynchronize() != cudaSuccess)
 157     {
 158         return -1;
 159     }
 160
 161     /* destroy context if we created one */
 162     if (id != -1)
 163     {
 164 #if CUDA_VERSION < 4000
 165         cu_err = cudaThreadExit();
 166         CU_RET_ERR(cu_err, "cudaThreadExit failed");
 167 #else
 168         cu_err = cudaDeviceReset();
 169         CU_RET_ERR(cu_err, "cudaDeviceReset failed");
 170 #endif
 171     }
 172
 173     return 0;
 174 }
 175
 176 /*! \brief Initializes the GPU with the given index.
 177  *
 178  * The varible \mygpu is the index of the GPU to initialize in the
 179  * gpu_info.cuda_dev array.
 180  *
 181  * \param[in]  mygpu        index of the GPU to initialize
 182  * \param[out] result_str   the message related to the error that occurred
 183  *                          during the initialization (if there was any).
 184  * \param[in] gpu_info      GPU info of all detected devices in the system.
 185  * \param[in] gpu_opt       options for using the GPUs in gpu_info
 186  * \returns                 true if no error occurs during initialization.
 187  */
 188 gmx_bool init_gpu(int mygpu, char *result_str,
 189                   const gmx_gpu_info_t *gpu_info,
 190                   const gmx_gpu_opt_t *gpu_opt)
 191 {
 192     cudaError_t stat;
 193     char        sbuf[STRLEN];
 194     int         gpuid;
 195
 196     assert(gpu_info);
 197     assert(result_str);
 198
 199     if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
 200     {
 201         sprintf(sbuf, "Trying to initialize an inexistent GPU: "
 202                 "there are %d %s-selected GPU(s), but #%d was requested.",
 203                 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
 204         gmx_incons(sbuf);
 205     }
 206
 207     gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
 208
 209     stat = cudaSetDevice(gpuid);
 210     strncpy(result_str, cudaGetErrorString(stat), STRLEN);
 211
 212     if (debug)
 213     {
 214         fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
 215     }
 216
 217     return (stat == cudaSuccess);
 218 }
 219
 220 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
 221  *
 222  * The context is explicitly destroyed and therefore all data uploaded to the GPU
 223  * is lost. This should only be called when none of this data is required anymore.
 224  *
 225  * \param[out] result_str   the message related to the error that occurred
 226  *                          during the initialization (if there was any).
 227  * \returns                 true if no error occurs during the freeing.
 228  */
 229 gmx_bool free_gpu(char *result_str)
 230 {
 231     cudaError_t stat;
 232
 233     assert(result_str);
 234
 235     if (debug)
 236     {
 237         int gpuid;
 238         stat = cudaGetDevice(&gpuid);
 239         CU_RET_ERR(stat, "cudaGetDevice failed");
 240         fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
 241     }
 242
 243 #if CUDA_VERSION < 4000
 244     stat = cudaThreadExit();
 245 #else
 246     stat = cudaDeviceReset();
 247 #endif
 248     strncpy(result_str, cudaGetErrorString(stat), STRLEN);
 249
 250     return (stat == cudaSuccess);
 251 }
 252
 253 /*! \brief Returns true if the gpu characterized by the device properties is
 254  *  supported by the native gpu acceleration.
 255  *
 256  * \param[in] dev_prop  the CUDA device properties of the gpus to test.
 257  * \returns             true if the GPU properties passed indicate a compatible
 258  *                      GPU, otherwise false.
 259  */
 260 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
 261 {
 262     return (dev_prop->major >= 2);
 263 }
 264
 265 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
 266  *
 267  * \param[in] stat  GPU status.
 268  * \returns         true if the provided status is egpuCompatible, otherwise false.
 269  */
 270 static bool is_compatible_gpu(int stat)
 271 {
 272     return (stat == egpuCompatible);
 273 }
 274
 275 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
 276  *
 277  *  Returns a status value which indicates compatibility or one of the following
 278  *  errors: incompatibility, insistence, or insanity (=unexpected behavior).
 279  *  It also returns the respective device's properties in \dev_prop (if applicable).
 280  *
 281  *  \param[in]  dev_id   the ID of the GPU to check.
 282  *  \param[out] dev_prop the CUDA device properties of the device checked.
 283  *  \returns             the status of the requested device
 284  */
 285 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
 286 {
 287     cudaError_t stat;
 288     int         ndev;
 289
 290     stat = cudaGetDeviceCount(&ndev);
 291     if (stat != cudaSuccess)
 292     {
 293         return egpuInsane;
 294     }
 295
 296     if (dev_id > ndev - 1)
 297     {
 298         return egpuNonexistent;
 299     }
 300
 301     /* TODO: currently we do not make a distinction between the type of errors
 302      * that can appear during sanity checks. This needs to be improved, e.g if
 303      * the dummy test kernel fails to execute with a "device busy message" we
 304      * should appropriately report that the device is busy instead of insane.
 305      */
 306     if (do_sanity_checks(dev_id, dev_prop) == 0)
 307     {
 308         if (is_gmx_supported_gpu(dev_prop))
 309         {
 310             return egpuCompatible;
 311         }
 312         else
 313         {
 314             return egpuIncompatible;
 315         }
 316     }
 317     else
 318     {
 319         return egpuInsane;
 320     }
 321 }
 322
 323
 324 /*! \brief Detect all NVIDIA GPUs in the system.
 325  *
 326  *  Will detect every NVIDIA GPU supported by the device driver in use. Also
 327  *  check for the compatibility of each and fill the gpu_info->cuda_dev array
 328  *  with the required information on each the device: ID, device properties,
 329  *  status.
 330  *
 331  *  \param[in] gpu_info    pointer to structure holding GPU information.
 332  *  \param[out] err_str    The error message of any CUDA API error that caused
 333  *                         the detection to fail (if there was any). The memory
 334  *                         the pointer points to should be managed externally.
 335  *  \returns               non-zero if the detection encountered a failure, zero otherwise.
 336  */
 337 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
 338 {
 339     int              i, ndev, checkres, retval;
 340     cudaError_t      stat;
 341     cudaDeviceProp   prop;
 342     cuda_dev_info_t *devs;
 343
 344     assert(gpu_info);
 345     assert(err_str);
 346
 347     gpu_info->ncuda_dev_compatible = 0;
 348
 349     ndev    = 0;
 350     devs    = NULL;
 351
 352     stat = cudaGetDeviceCount(&ndev);
 353     if (stat != cudaSuccess)
 354     {
 355         const char *s;
 356
 357         /* cudaGetDeviceCount failed which means that there is something
 358          * wrong with the machine: driver-runtime mismatch, all GPUs being
 359          * busy in exclusive mode, or some other condition which should
 360          * result in us issuing a warning a falling back to CPUs. */
 361         retval = -1;
 362         s      = cudaGetErrorString(stat);
 363         strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
 364     }
 365     else
 366     {
 367         snew(devs, ndev);
 368         for (i = 0; i < ndev; i++)
 369         {
 370             checkres = is_gmx_supported_gpu_id(i, &prop);
 371
 372             devs[i].id   = i;
 373             devs[i].prop = prop;
 374             devs[i].stat = checkres;
 375
 376             if (checkres == egpuCompatible)
 377             {
 378                 gpu_info->ncuda_dev_compatible++;
 379             }
 380         }
 381         retval = 0;
 382     }
 383
 384     gpu_info->ncuda_dev = ndev;
 385     gpu_info->cuda_dev  = devs;
 386
 387     return retval;
 388 }
 389
 390 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
 391  *
 392  * This function selects the compatible gpus and initializes
 393  * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
 394  *
 395  * Given the list of GPUs available in the system check each device in
 396  * gpu_info->cuda_dev and place the indices of the compatible GPUs into
 397  * cuda_dev_use with this marking the respective GPUs as "available for use."
 398  * Note that \detect_cuda_gpus must have been called before.
 399  *
 400  * \param[in]     gpu_info    pointer to structure holding GPU information
 401  * \param[in,out] gpu_opt     pointer to structure holding GPU options
 402  */
 403 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
 404                           gmx_gpu_opt_t        *gpu_opt)
 405 {
 406     int  i, ncompat;
 407     int *compat;
 408
 409     assert(gpu_info);
 410     /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
 411     assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
 412
 413     snew(compat, gpu_info->ncuda_dev);
 414     ncompat = 0;
 415     for (i = 0; i < gpu_info->ncuda_dev; i++)
 416     {
 417         if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
 418         {
 419             ncompat++;
 420             compat[ncompat - 1] = i;
 421         }
 422     }
 423
 424     gpu_opt->ncuda_dev_use = ncompat;
 425     snew(gpu_opt->cuda_dev_use, ncompat);
 426     memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
 427     sfree(compat);
 428 }
 429
 430 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
 431  *
 432  * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
 433  * gpu_opt->cuda_dev_use check the existence and compatibility
 434  * of the respective GPUs. Also provide the caller with an array containing
 435  * the result of checks in \checkres.
 436  *
 437  * \param[out]  checkres    check result for each ID passed in \requested_devs
 438  * \param[in]   gpu_info    pointer to structure holding GPU information
 439  * \param[out]  gpu_opt     pointer to structure holding GPU options
 440  * \returns                 TRUE if every the requested GPUs are compatible
 441  */
 442 gmx_bool check_selected_cuda_gpus(int                  *checkres,
 443                                   const gmx_gpu_info_t *gpu_info,
 444                                   gmx_gpu_opt_t        *gpu_opt)
 445 {
 446     int  i, id;
 447     bool bAllOk;
 448
 449     assert(checkres);
 450     assert(gpu_info);
 451     assert(gpu_opt->ncuda_dev_use >= 0);
 452
 453     if (gpu_opt->ncuda_dev_use == 0)
 454     {
 455         return TRUE;
 456     }
 457
 458     assert(gpu_opt->cuda_dev_use);
 459
 460     /* we will assume that all GPUs requested are valid IDs,
 461        otherwise we'll bail anyways */
 462
 463     bAllOk = true;
 464     for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
 465     {
 466         id = gpu_opt->cuda_dev_use[i];
 467
 468         /* devices are stored in increasing order of IDs in cuda_dev */
 469         gpu_opt->cuda_dev_use[i] = id;
 470
 471         checkres[i] = (id >= gpu_info->ncuda_dev) ?
 472             egpuNonexistent : gpu_info->cuda_dev[id].stat;
 473
 474         bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
 475     }
 476
 477     return bAllOk;
 478 }
 479
 480 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
 481  *
 482  * \param[in]    gpu_info    pointer to structure holding GPU information
 483  */
 484 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
 485 {
 486     if (gpu_info == NULL)
 487     {
 488         return;
 489     }
 490
 491     sfree(gpu_info->cuda_dev);
 492 }
 493
 494 /*! \brief Formats and returns a device information string for a given GPU.
 495  *
 496  * Given an index *directly* into the array of available GPUs (cuda_dev)
 497  * returns a formatted info string for the respective GPU which includes
 498  * ID, name, compute capability, and detection status.
 499  *
 500  * \param[out]  s           pointer to output string (has to be allocated externally)
 501  * \param[in]   gpu_info    pointer to structure holding GPU information
 502  * \param[in]   index       an index *directly* into the array of available GPUs
 503  */
 504 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
 505 {
 506     assert(s);
 507     assert(gpu_info);
 508
 509     if (index < 0 && index >= gpu_info->ncuda_dev)
 510     {
 511         return;
 512     }
 513
 514     cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
 515
 516     bool             bGpuExists =
 517         dinfo->stat == egpuCompatible ||
 518         dinfo->stat == egpuIncompatible;
 519
 520     if (!bGpuExists)
 521     {
 522         sprintf(s, "#%d: %s, stat: %s",
 523                 dinfo->id, "N/A",
 524                 gpu_detect_res_str[dinfo->stat]);
 525     }
 526     else
 527     {
 528         sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
 529                 dinfo->id, dinfo->prop.name,
 530                 dinfo->prop.major, dinfo->prop.minor,
 531                 dinfo->prop.ECCEnabled ? "yes" : " no",
 532                 gpu_detect_res_str[dinfo->stat]);
 533     }
 534 }
 535
 536 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
 537  *
 538  * Getter function which, given an index into the array of GPUs in use
 539  * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
 540  * respective CUDA GPU.
 541  *
 542  * \param[in]    gpu_info   pointer to structure holding GPU information
 543  * \param[in]    gpu_opt    pointer to structure holding GPU options
 544  * \param[in]    idx        index into the array of used GPUs
 545  * \returns                 device ID of the requested GPU
 546  */
 547 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
 548                       const gmx_gpu_opt_t  *gpu_opt,
 549                       int                   idx)
 550 {
 551     assert(gpu_info);
 552     assert(gpu_opt);
 553     assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
 554
 555     return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
 556 }
 557
 558 /*! \brief Returns the device ID of the GPU currently in use.
 559  *
 560  * The GPU used is the one that is active at the time of the call in the active context.
 561  *
 562  * \param[in]    gpu_info   pointer to structure holding GPU information
 563  * \returns                 device ID of the GPU in use at the time of the call
 564  */
 565 int get_current_gpu_device_id(void)
 566 {
 567     int gpuid;
 568     CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
 569
 570     return gpuid;
 571 }
 572
 573 /*! \brief Returns the size of the cuda_dev_info struct.
 574  *
 575  * The size of cuda_dev_info can be used for allocation and communication.
 576  *
 577  * \returns                 size in bytes of cuda_dev_info
 578  */
 579 size_t sizeof_cuda_dev_info(void)
 580 {
 581     return sizeof(cuda_dev_info);
 582 }