src/gromacs/gmxlib/gpu_utils/gpu_utils.cu

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <assert.h>
  39
  40 #include "gromacs/legacyheaders/types/hw_info.h"
  41
  42 #include "gromacs/legacyheaders/gpu_utils.h"
  43 #include "../cuda_tools/cudautils.cuh"
  44
  45 #include "gromacs/utility/cstringutil.h"
  46 #include "gromacs/utility/smalloc.h"
  47
  48 /*! \brief
  49  * Max number of devices supported by CUDA (for consistency checking).
  50  *
  51  * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
  52  */
  53 static int cuda_max_device_count = 32;
  54
  55 /** Dummy kernel used for sanity checking. */
  56 __global__ void k_dummy_test()
  57 {
  58 }
  59
  60
  61 /*!
  62  * \brief Runs GPU sanity checks.
  63  *
  64  * Runs a series of checks to determine that the given GPU and underlying CUDA
  65  * driver/runtime functions properly.
  66  * Returns properties of a device with given ID or the one that has
  67  * already been initialized earlier in the case if of \dev_id == -1.
  68  *
  69  * \param[in]  dev_id      the device ID of the GPU or -1 if the device has already been initialized
  70  * \param[out] dev_prop    pointer to the structure in which the device properties will be returned
  71  * \returns                0 if the device looks OK
  72  *
  73  * TODO: introduce errors codes and handle errors more smoothly.
  74  */
  75 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
  76 {
  77     cudaError_t cu_err;
  78     int         dev_count, id;
  79
  80     cu_err = cudaGetDeviceCount(&dev_count);
  81     if (cu_err != cudaSuccess)
  82     {
  83         fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
  84                 cudaGetErrorString(cu_err));
  85         return -1;
  86     }
  87
  88     /* no CUDA compatible device at all */
  89     if (dev_count == 0)
  90     {
  91         return -1;
  92     }
  93
  94     /* things might go horribly wrong if cudart is not compatible with the driver */
  95     if (dev_count < 0 || dev_count > cuda_max_device_count)
  96     {
  97         return -1;
  98     }
  99
 100     if (dev_id == -1) /* device already selected let's not destroy the context */
 101     {
 102         cu_err = cudaGetDevice(&id);
 103         if (cu_err != cudaSuccess)
 104         {
 105             fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
 106                     cudaGetErrorString(cu_err));
 107             return -1;
 108         }
 109     }
 110     else
 111     {
 112         id = dev_id;
 113         if (id > dev_count - 1) /* pfff there's no such device */
 114         {
 115             fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
 116                     dev_id, dev_count);
 117             return -1;
 118         }
 119     }
 120
 121     memset(dev_prop, 0, sizeof(cudaDeviceProp));
 122     cu_err = cudaGetDeviceProperties(dev_prop, id);
 123     if (cu_err != cudaSuccess)
 124     {
 125         fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
 126                 cudaGetErrorString(cu_err));
 127         return -1;
 128     }
 129
 130     /* both major & minor is 9999 if no CUDA capable devices are present */
 131     if (dev_prop->major == 9999 && dev_prop->minor == 9999)
 132     {
 133         return -1;
 134     }
 135     /* we don't care about emulation mode */
 136     if (dev_prop->major == 0)
 137     {
 138         return -1;
 139     }
 140
 141     if (id != -1)
 142     {
 143         cu_err = cudaSetDevice(id);
 144         if (cu_err != cudaSuccess)
 145         {
 146             fprintf(stderr, "Error %d while switching to device #%d: %s\n",
 147                     cu_err, id, cudaGetErrorString(cu_err));
 148             return -1;
 149         }
 150     }
 151
 152     /* try to execute a dummy kernel */
 153     k_dummy_test<<< 1, 512>>> ();
 154     if (cudaThreadSynchronize() != cudaSuccess)
 155     {
 156         return -1;
 157     }
 158
 159     /* destroy context if we created one */
 160     if (id != -1)
 161     {
 162 #if CUDA_VERSION < 4000
 163         cu_err = cudaThreadExit();
 164         CU_RET_ERR(cu_err, "cudaThreadExit failed");
 165 #else
 166         cu_err = cudaDeviceReset();
 167         CU_RET_ERR(cu_err, "cudaDeviceReset failed");
 168 #endif
 169     }
 170
 171     return 0;
 172 }
 173
 174 /*! \brief Initializes the GPU with the given index.
 175  *
 176  * The varible \mygpu is the index of the GPU to initialize in the
 177  * gpu_info.cuda_dev array.
 178  *
 179  * \param[in]  mygpu        index of the GPU to initialize
 180  * \param[out] result_str   the message related to the error that occurred
 181  *                          during the initialization (if there was any).
 182  * \param[in] gpu_info      GPU info of all detected devices in the system.
 183  * \param[in] gpu_opt       options for using the GPUs in gpu_info
 184  * \returns                 true if no error occurs during initialization.
 185  */
 186 gmx_bool init_gpu(int mygpu, char *result_str,
 187                   const gmx_gpu_info_t *gpu_info,
 188                   const gmx_gpu_opt_t *gpu_opt)
 189 {
 190     cudaError_t stat;
 191     char        sbuf[STRLEN];
 192     int         gpuid;
 193
 194     assert(gpu_info);
 195     assert(result_str);
 196
 197     if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
 198     {
 199         sprintf(sbuf, "Trying to initialize an inexistent GPU: "
 200                 "there are %d %s-selected GPU(s), but #%d was requested.",
 201                 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
 202         gmx_incons(sbuf);
 203     }
 204
 205     gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
 206
 207     stat = cudaSetDevice(gpuid);
 208     strncpy(result_str, cudaGetErrorString(stat), STRLEN);
 209
 210     if (debug)
 211     {
 212         fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
 213     }
 214
 215     return (stat == cudaSuccess);
 216 }
 217
 218 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
 219  *
 220  * The context is explicitly destroyed and therefore all data uploaded to the GPU
 221  * is lost. This should only be called when none of this data is required anymore.
 222  *
 223  * \param[out] result_str   the message related to the error that occurred
 224  *                          during the initialization (if there was any).
 225  * \returns                 true if no error occurs during the freeing.
 226  */
 227 gmx_bool free_gpu(char *result_str)
 228 {
 229     cudaError_t stat;
 230
 231     assert(result_str);
 232
 233     if (debug)
 234     {
 235         int gpuid;
 236         stat = cudaGetDevice(&gpuid);
 237         CU_RET_ERR(stat, "cudaGetDevice failed");
 238         fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
 239     }
 240
 241 #if CUDA_VERSION < 4000
 242     stat = cudaThreadExit();
 243 #else
 244     stat = cudaDeviceReset();
 245 #endif
 246     strncpy(result_str, cudaGetErrorString(stat), STRLEN);
 247
 248     return (stat == cudaSuccess);
 249 }
 250
 251 /*! \brief Returns true if the gpu characterized by the device properties is
 252  *  supported by the native gpu acceleration.
 253  *
 254  * \param[in] dev_prop  the CUDA device properties of the gpus to test.
 255  * \returns             true if the GPU properties passed indicate a compatible
 256  *                      GPU, otherwise false.
 257  */
 258 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
 259 {
 260     return (dev_prop->major >= 2);
 261 }
 262
 263 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
 264  *
 265  * \param[in] stat  GPU status.
 266  * \returns         true if the provided status is egpuCompatible, otherwise false.
 267  */
 268 static bool is_compatible_gpu(int stat)
 269 {
 270     return (stat == egpuCompatible);
 271 }
 272
 273 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
 274  *
 275  *  Returns a status value which indicates compatibility or one of the following
 276  *  errors: incompatibility, insistence, or insanity (=unexpected behavior).
 277  *  It also returns the respective device's properties in \dev_prop (if applicable).
 278  *
 279  *  \param[in]  dev_id   the ID of the GPU to check.
 280  *  \param[out] dev_prop the CUDA device properties of the device checked.
 281  *  \returns             the status of the requested device
 282  */
 283 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
 284 {
 285     cudaError_t stat;
 286     int         ndev;
 287
 288     stat = cudaGetDeviceCount(&ndev);
 289     if (stat != cudaSuccess)
 290     {
 291         return egpuInsane;
 292     }
 293
 294     if (dev_id > ndev - 1)
 295     {
 296         return egpuNonexistent;
 297     }
 298
 299     /* TODO: currently we do not make a distinction between the type of errors
 300      * that can appear during sanity checks. This needs to be improved, e.g if
 301      * the dummy test kernel fails to execute with a "device busy message" we
 302      * should appropriately report that the device is busy instead of insane.
 303      */
 304     if (do_sanity_checks(dev_id, dev_prop) == 0)
 305     {
 306         if (is_gmx_supported_gpu(dev_prop))
 307         {
 308             return egpuCompatible;
 309         }
 310         else
 311         {
 312             return egpuIncompatible;
 313         }
 314     }
 315     else
 316     {
 317         return egpuInsane;
 318     }
 319 }
 320
 321
 322 /*! \brief Detect all NVIDIA GPUs in the system.
 323  *
 324  *  Will detect every NVIDIA GPU supported by the device driver in use. Also
 325  *  check for the compatibility of each and fill the gpu_info->cuda_dev array
 326  *  with the required information on each the device: ID, device properties,
 327  *  status.
 328  *
 329  *  \param[in] gpu_info    pointer to structure holding GPU information.
 330  *  \param[out] err_str    The error message of any CUDA API error that caused
 331  *                         the detection to fail (if there was any). The memory
 332  *                         the pointer points to should be managed externally.
 333  *  \returns               non-zero if the detection encountered a failure, zero otherwise.
 334  */
 335 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
 336 {
 337     int              i, ndev, checkres, retval;
 338     cudaError_t      stat;
 339     cudaDeviceProp   prop;
 340     cuda_dev_info_t *devs;
 341
 342     assert(gpu_info);
 343     assert(err_str);
 344
 345     gpu_info->ncuda_dev_compatible = 0;
 346
 347     ndev    = 0;
 348     devs    = NULL;
 349
 350     stat = cudaGetDeviceCount(&ndev);
 351     if (stat != cudaSuccess)
 352     {
 353         const char *s;
 354
 355         /* cudaGetDeviceCount failed which means that there is something
 356          * wrong with the machine: driver-runtime mismatch, all GPUs being
 357          * busy in exclusive mode, or some other condition which should
 358          * result in us issuing a warning a falling back to CPUs. */
 359         retval = -1;
 360         s      = cudaGetErrorString(stat);
 361         strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
 362     }
 363     else
 364     {
 365         snew(devs, ndev);
 366         for (i = 0; i < ndev; i++)
 367         {
 368             checkres = is_gmx_supported_gpu_id(i, &prop);
 369
 370             devs[i].id   = i;
 371             devs[i].prop = prop;
 372             devs[i].stat = checkres;
 373
 374             if (checkres == egpuCompatible)
 375             {
 376                 gpu_info->ncuda_dev_compatible++;
 377             }
 378         }
 379         retval = 0;
 380     }
 381
 382     gpu_info->ncuda_dev = ndev;
 383     gpu_info->cuda_dev  = devs;
 384
 385     return retval;
 386 }
 387
 388 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
 389  *
 390  * This function selects the compatible gpus and initializes
 391  * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
 392  *
 393  * Given the list of GPUs available in the system check each device in
 394  * gpu_info->cuda_dev and place the indices of the compatible GPUs into
 395  * cuda_dev_use with this marking the respective GPUs as "available for use."
 396  * Note that \detect_cuda_gpus must have been called before.
 397  *
 398  * \param[in]     gpu_info    pointer to structure holding GPU information
 399  * \param[in,out] gpu_opt     pointer to structure holding GPU options
 400  */
 401 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
 402                           gmx_gpu_opt_t        *gpu_opt)
 403 {
 404     int  i, ncompat;
 405     int *compat;
 406
 407     assert(gpu_info);
 408     /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
 409     assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
 410
 411     snew(compat, gpu_info->ncuda_dev);
 412     ncompat = 0;
 413     for (i = 0; i < gpu_info->ncuda_dev; i++)
 414     {
 415         if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
 416         {
 417             ncompat++;
 418             compat[ncompat - 1] = i;
 419         }
 420     }
 421
 422     gpu_opt->ncuda_dev_use = ncompat;
 423     snew(gpu_opt->cuda_dev_use, ncompat);
 424     memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
 425     sfree(compat);
 426 }
 427
 428 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
 429  *
 430  * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
 431  * gpu_opt->cuda_dev_use check the existence and compatibility
 432  * of the respective GPUs. Also provide the caller with an array containing
 433  * the result of checks in \checkres.
 434  *
 435  * \param[out]  checkres    check result for each ID passed in \requested_devs
 436  * \param[in]   gpu_info    pointer to structure holding GPU information
 437  * \param[out]  gpu_opt     pointer to structure holding GPU options
 438  * \returns                 TRUE if every the requested GPUs are compatible
 439  */
 440 gmx_bool check_selected_cuda_gpus(int                  *checkres,
 441                                   const gmx_gpu_info_t *gpu_info,
 442                                   gmx_gpu_opt_t        *gpu_opt)
 443 {
 444     int  i, id;
 445     bool bAllOk;
 446
 447     assert(checkres);
 448     assert(gpu_info);
 449     assert(gpu_opt->ncuda_dev_use >= 0);
 450
 451     if (gpu_opt->ncuda_dev_use == 0)
 452     {
 453         return TRUE;
 454     }
 455
 456     assert(gpu_opt->cuda_dev_use);
 457
 458     /* we will assume that all GPUs requested are valid IDs,
 459        otherwise we'll bail anyways */
 460
 461     bAllOk = true;
 462     for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
 463     {
 464         id = gpu_opt->cuda_dev_use[i];
 465
 466         /* devices are stored in increasing order of IDs in cuda_dev */
 467         gpu_opt->cuda_dev_use[i] = id;
 468
 469         checkres[i] = (id >= gpu_info->ncuda_dev) ?
 470             egpuNonexistent : gpu_info->cuda_dev[id].stat;
 471
 472         bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
 473     }
 474
 475     return bAllOk;
 476 }
 477
 478 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
 479  *
 480  * \param[in]    gpu_info    pointer to structure holding GPU information
 481  */
 482 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
 483 {
 484     if (gpu_info == NULL)
 485     {
 486         return;
 487     }
 488
 489     sfree(gpu_info->cuda_dev);
 490 }
 491
 492 /*! \brief Formats and returns a device information string for a given GPU.
 493  *
 494  * Given an index *directly* into the array of available GPUs (cuda_dev)
 495  * returns a formatted info string for the respective GPU which includes
 496  * ID, name, compute capability, and detection status.
 497  *
 498  * \param[out]  s           pointer to output string (has to be allocated externally)
 499  * \param[in]   gpu_info    pointer to structure holding GPU information
 500  * \param[in]   index       an index *directly* into the array of available GPUs
 501  */
 502 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
 503 {
 504     assert(s);
 505     assert(gpu_info);
 506
 507     if (index < 0 && index >= gpu_info->ncuda_dev)
 508     {
 509         return;
 510     }
 511
 512     cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
 513
 514     bool             bGpuExists =
 515         dinfo->stat == egpuCompatible ||
 516         dinfo->stat == egpuIncompatible;
 517
 518     if (!bGpuExists)
 519     {
 520         sprintf(s, "#%d: %s, stat: %s",
 521                 dinfo->id, "N/A",
 522                 gpu_detect_res_str[dinfo->stat]);
 523     }
 524     else
 525     {
 526         sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
 527                 dinfo->id, dinfo->prop.name,
 528                 dinfo->prop.major, dinfo->prop.minor,
 529                 dinfo->prop.ECCEnabled ? "yes" : " no",
 530                 gpu_detect_res_str[dinfo->stat]);
 531     }
 532 }
 533
 534 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
 535  *
 536  * Getter function which, given an index into the array of GPUs in use
 537  * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
 538  * respective CUDA GPU.
 539  *
 540  * \param[in]    gpu_info   pointer to structure holding GPU information
 541  * \param[in]    gpu_opt    pointer to structure holding GPU options
 542  * \param[in]    idx        index into the array of used GPUs
 543  * \returns                 device ID of the requested GPU
 544  */
 545 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
 546                       const gmx_gpu_opt_t  *gpu_opt,
 547                       int                   idx)
 548 {
 549     assert(gpu_info);
 550     assert(gpu_opt);
 551     assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
 552
 553     return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
 554 }
 555
 556 /*! \brief Returns the device ID of the GPU currently in use.
 557  *
 558  * The GPU used is the one that is active at the time of the call in the active context.
 559  *
 560  * \param[in]    gpu_info   pointer to structure holding GPU information
 561  * \returns                 device ID of the GPU in use at the time of the call
 562  */
 563 int get_current_gpu_device_id(void)
 564 {
 565     int gpuid;
 566     CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
 567
 568     return gpuid;
 569 }
 570
 571 /*! \brief Returns the size of the cuda_dev_info struct.
 572  *
 573  * The size of cuda_dev_info can be used for allocation and communication.
 574  *
 575  * \returns                 size in bytes of cuda_dev_info
 576  */
 577 size_t sizeof_cuda_dev_info(void)
 578 {
 579     return sizeof(cuda_dev_info);
 580 }