2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gromacs/legacyheaders/gpu_utils.h"
44 #include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
45 #include "gromacs/legacyheaders/types/hw_info.h"
46 #include "gromacs/utility/cstringutil.h"
47 #include "gromacs/utility/smalloc.h"
50 * Max number of devices supported by CUDA (for consistency checking).
52 * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
54 static int cuda_max_device_count = 32;
56 /** Dummy kernel used for sanity checking. */
57 __global__ void k_dummy_test()
63 * \brief Runs GPU sanity checks.
65 * Runs a series of checks to determine that the given GPU and underlying CUDA
66 * driver/runtime functions properly.
67 * Returns properties of a device with given ID or the one that has
68 * already been initialized earlier in the case if of \dev_id == -1.
70 * \param[in] dev_id the device ID of the GPU or -1 if the device has already been initialized
71 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
72 * \returns 0 if the device looks OK
74 * TODO: introduce errors codes and handle errors more smoothly.
76 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
81 cu_err = cudaGetDeviceCount(&dev_count);
82 if (cu_err != cudaSuccess)
84 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
85 cudaGetErrorString(cu_err));
89 /* no CUDA compatible device at all */
95 /* things might go horribly wrong if cudart is not compatible with the driver */
96 if (dev_count < 0 || dev_count > cuda_max_device_count)
101 if (dev_id == -1) /* device already selected let's not destroy the context */
103 cu_err = cudaGetDevice(&id);
104 if (cu_err != cudaSuccess)
106 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
107 cudaGetErrorString(cu_err));
114 if (id > dev_count - 1) /* pfff there's no such device */
116 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
122 memset(dev_prop, 0, sizeof(cudaDeviceProp));
123 cu_err = cudaGetDeviceProperties(dev_prop, id);
124 if (cu_err != cudaSuccess)
126 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
127 cudaGetErrorString(cu_err));
131 /* both major & minor is 9999 if no CUDA capable devices are present */
132 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
136 /* we don't care about emulation mode */
137 if (dev_prop->major == 0)
144 cu_err = cudaSetDevice(id);
145 if (cu_err != cudaSuccess)
147 fprintf(stderr, "Error %d while switching to device #%d: %s\n",
148 cu_err, id, cudaGetErrorString(cu_err));
153 /* try to execute a dummy kernel */
154 k_dummy_test<<< 1, 512>>> ();
155 if (cudaThreadSynchronize() != cudaSuccess)
160 /* destroy context if we created one */
163 #if CUDA_VERSION < 4000
164 cu_err = cudaThreadExit();
165 CU_RET_ERR(cu_err, "cudaThreadExit failed");
167 cu_err = cudaDeviceReset();
168 CU_RET_ERR(cu_err, "cudaDeviceReset failed");
175 /*! \brief Initializes the GPU with the given index.
177 * The varible \mygpu is the index of the GPU to initialize in the
178 * gpu_info.cuda_dev array.
180 * \param[in] mygpu index of the GPU to initialize
181 * \param[out] result_str the message related to the error that occurred
182 * during the initialization (if there was any).
183 * \param[in] gpu_info GPU info of all detected devices in the system.
184 * \param[in] gpu_opt options for using the GPUs in gpu_info
185 * \returns true if no error occurs during initialization.
187 gmx_bool init_gpu(int mygpu, char *result_str,
188 const gmx_gpu_info_t *gpu_info,
189 const gmx_gpu_opt_t *gpu_opt)
198 if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
200 sprintf(sbuf, "Trying to initialize an inexistent GPU: "
201 "there are %d %s-selected GPU(s), but #%d was requested.",
202 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
206 gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
208 stat = cudaSetDevice(gpuid);
209 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
213 fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
216 return (stat == cudaSuccess);
219 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
221 * The context is explicitly destroyed and therefore all data uploaded to the GPU
222 * is lost. This should only be called when none of this data is required anymore.
224 * \param[out] result_str the message related to the error that occurred
225 * during the initialization (if there was any).
226 * \returns true if no error occurs during the freeing.
228 gmx_bool free_gpu(char *result_str)
237 stat = cudaGetDevice(&gpuid);
238 CU_RET_ERR(stat, "cudaGetDevice failed");
239 fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
242 #if CUDA_VERSION < 4000
243 stat = cudaThreadExit();
245 stat = cudaDeviceReset();
247 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
249 return (stat == cudaSuccess);
252 /*! \brief Returns true if the gpu characterized by the device properties is
253 * supported by the native gpu acceleration.
255 * \param[in] dev_prop the CUDA device properties of the gpus to test.
256 * \returns true if the GPU properties passed indicate a compatible
257 * GPU, otherwise false.
259 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
261 return (dev_prop->major >= 2);
264 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
266 * \param[in] stat GPU status.
267 * \returns true if the provided status is egpuCompatible, otherwise false.
269 static bool is_compatible_gpu(int stat)
271 return (stat == egpuCompatible);
274 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
276 * Returns a status value which indicates compatibility or one of the following
277 * errors: incompatibility, insistence, or insanity (=unexpected behavior).
278 * It also returns the respective device's properties in \dev_prop (if applicable).
280 * \param[in] dev_id the ID of the GPU to check.
281 * \param[out] dev_prop the CUDA device properties of the device checked.
282 * \returns the status of the requested device
284 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
289 stat = cudaGetDeviceCount(&ndev);
290 if (stat != cudaSuccess)
295 if (dev_id > ndev - 1)
297 return egpuNonexistent;
300 /* TODO: currently we do not make a distinction between the type of errors
301 * that can appear during sanity checks. This needs to be improved, e.g if
302 * the dummy test kernel fails to execute with a "device busy message" we
303 * should appropriately report that the device is busy instead of insane.
305 if (do_sanity_checks(dev_id, dev_prop) == 0)
307 if (is_gmx_supported_gpu(dev_prop))
309 return egpuCompatible;
313 return egpuIncompatible;
323 /*! \brief Detect all NVIDIA GPUs in the system.
325 * Will detect every NVIDIA GPU supported by the device driver in use. Also
326 * check for the compatibility of each and fill the gpu_info->cuda_dev array
327 * with the required information on each the device: ID, device properties,
330 * \param[in] gpu_info pointer to structure holding GPU information.
331 * \param[out] err_str The error message of any CUDA API error that caused
332 * the detection to fail (if there was any). The memory
333 * the pointer points to should be managed externally.
334 * \returns non-zero if the detection encountered a failure, zero otherwise.
336 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
338 int i, ndev, checkres, retval;
341 cuda_dev_info_t *devs;
346 gpu_info->ncuda_dev_compatible = 0;
351 stat = cudaGetDeviceCount(&ndev);
352 if (stat != cudaSuccess)
356 /* cudaGetDeviceCount failed which means that there is something
357 * wrong with the machine: driver-runtime mismatch, all GPUs being
358 * busy in exclusive mode, or some other condition which should
359 * result in us issuing a warning a falling back to CPUs. */
361 s = cudaGetErrorString(stat);
362 strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
367 for (i = 0; i < ndev; i++)
369 checkres = is_gmx_supported_gpu_id(i, &prop);
373 devs[i].stat = checkres;
375 if (checkres == egpuCompatible)
377 gpu_info->ncuda_dev_compatible++;
383 gpu_info->ncuda_dev = ndev;
384 gpu_info->cuda_dev = devs;
389 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
391 * This function selects the compatible gpus and initializes
392 * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
394 * Given the list of GPUs available in the system check each device in
395 * gpu_info->cuda_dev and place the indices of the compatible GPUs into
396 * cuda_dev_use with this marking the respective GPUs as "available for use."
397 * Note that \detect_cuda_gpus must have been called before.
399 * \param[in] gpu_info pointer to structure holding GPU information
400 * \param[in,out] gpu_opt pointer to structure holding GPU options
402 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
403 gmx_gpu_opt_t *gpu_opt)
409 /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
410 assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
412 snew(compat, gpu_info->ncuda_dev);
414 for (i = 0; i < gpu_info->ncuda_dev; i++)
416 if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
419 compat[ncompat - 1] = i;
423 gpu_opt->ncuda_dev_use = ncompat;
424 snew(gpu_opt->cuda_dev_use, ncompat);
425 memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
429 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
431 * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
432 * gpu_opt->cuda_dev_use check the existence and compatibility
433 * of the respective GPUs. Also provide the caller with an array containing
434 * the result of checks in \checkres.
436 * \param[out] checkres check result for each ID passed in \requested_devs
437 * \param[in] gpu_info pointer to structure holding GPU information
438 * \param[out] gpu_opt pointer to structure holding GPU options
439 * \returns TRUE if every the requested GPUs are compatible
441 gmx_bool check_selected_cuda_gpus(int *checkres,
442 const gmx_gpu_info_t *gpu_info,
443 gmx_gpu_opt_t *gpu_opt)
450 assert(gpu_opt->ncuda_dev_use >= 0);
452 if (gpu_opt->ncuda_dev_use == 0)
457 assert(gpu_opt->cuda_dev_use);
459 /* we will assume that all GPUs requested are valid IDs,
460 otherwise we'll bail anyways */
463 for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
465 id = gpu_opt->cuda_dev_use[i];
467 /* devices are stored in increasing order of IDs in cuda_dev */
468 gpu_opt->cuda_dev_use[i] = id;
470 checkres[i] = (id >= gpu_info->ncuda_dev) ?
471 egpuNonexistent : gpu_info->cuda_dev[id].stat;
473 bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
479 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
481 * \param[in] gpu_info pointer to structure holding GPU information
483 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
485 if (gpu_info == NULL)
490 sfree(gpu_info->cuda_dev);
493 /*! \brief Formats and returns a device information string for a given GPU.
495 * Given an index *directly* into the array of available GPUs (cuda_dev)
496 * returns a formatted info string for the respective GPU which includes
497 * ID, name, compute capability, and detection status.
499 * \param[out] s pointer to output string (has to be allocated externally)
500 * \param[in] gpu_info pointer to structure holding GPU information
501 * \param[in] index an index *directly* into the array of available GPUs
503 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
508 if (index < 0 && index >= gpu_info->ncuda_dev)
513 cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
516 dinfo->stat == egpuCompatible ||
517 dinfo->stat == egpuIncompatible;
521 sprintf(s, "#%d: %s, stat: %s",
523 gpu_detect_res_str[dinfo->stat]);
527 sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
528 dinfo->id, dinfo->prop.name,
529 dinfo->prop.major, dinfo->prop.minor,
530 dinfo->prop.ECCEnabled ? "yes" : " no",
531 gpu_detect_res_str[dinfo->stat]);
535 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
537 * Getter function which, given an index into the array of GPUs in use
538 * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
539 * respective CUDA GPU.
541 * \param[in] gpu_info pointer to structure holding GPU information
542 * \param[in] gpu_opt pointer to structure holding GPU options
543 * \param[in] idx index into the array of used GPUs
544 * \returns device ID of the requested GPU
546 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
547 const gmx_gpu_opt_t *gpu_opt,
552 assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
554 return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
557 /*! \brief Returns the device ID of the GPU currently in use.
559 * The GPU used is the one that is active at the time of the call in the active context.
561 * \param[in] gpu_info pointer to structure holding GPU information
562 * \returns device ID of the GPU in use at the time of the call
564 int get_current_gpu_device_id(void)
567 CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
572 /*! \brief Returns the size of the cuda_dev_info struct.
574 * The size of cuda_dev_info can be used for allocation and communication.
576 * \returns size in bytes of cuda_dev_info
578 size_t sizeof_cuda_dev_info(void)
580 return sizeof(cuda_dev_info);