2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/legacyheaders/types/hw_info.h"
44 #include "gromacs/legacyheaders/gpu_utils.h"
45 #include "../cuda_tools/cudautils.cuh"
47 #include "gromacs/utility/cstringutil.h"
48 #include "gromacs/utility/smalloc.h"
51 * Max number of devices supported by CUDA (for consistency checking).
53 * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
55 static int cuda_max_device_count = 32;
57 /** Dummy kernel used for sanity checking. */
58 __global__ void k_dummy_test()
64 * \brief Runs GPU sanity checks.
66 * Runs a series of checks to determine that the given GPU and underlying CUDA
67 * driver/runtime functions properly.
68 * Returns properties of a device with given ID or the one that has
69 * already been initialized earlier in the case if of \dev_id == -1.
71 * \param[in] dev_id the device ID of the GPU or -1 if the device has already been initialized
72 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
73 * \returns 0 if the device looks OK
75 * TODO: introduce errors codes and handle errors more smoothly.
77 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
82 cu_err = cudaGetDeviceCount(&dev_count);
83 if (cu_err != cudaSuccess)
85 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
86 cudaGetErrorString(cu_err));
90 /* no CUDA compatible device at all */
96 /* things might go horribly wrong if cudart is not compatible with the driver */
97 if (dev_count < 0 || dev_count > cuda_max_device_count)
102 if (dev_id == -1) /* device already selected let's not destroy the context */
104 cu_err = cudaGetDevice(&id);
105 if (cu_err != cudaSuccess)
107 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
108 cudaGetErrorString(cu_err));
115 if (id > dev_count - 1) /* pfff there's no such device */
117 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
123 memset(dev_prop, 0, sizeof(cudaDeviceProp));
124 cu_err = cudaGetDeviceProperties(dev_prop, id);
125 if (cu_err != cudaSuccess)
127 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
128 cudaGetErrorString(cu_err));
132 /* both major & minor is 9999 if no CUDA capable devices are present */
133 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
137 /* we don't care about emulation mode */
138 if (dev_prop->major == 0)
145 cu_err = cudaSetDevice(id);
146 if (cu_err != cudaSuccess)
148 fprintf(stderr, "Error %d while switching to device #%d: %s\n",
149 cu_err, id, cudaGetErrorString(cu_err));
154 /* try to execute a dummy kernel */
155 k_dummy_test<<< 1, 512>>> ();
156 if (cudaThreadSynchronize() != cudaSuccess)
161 /* destroy context if we created one */
164 #if CUDA_VERSION < 4000
165 cu_err = cudaThreadExit();
166 CU_RET_ERR(cu_err, "cudaThreadExit failed");
168 cu_err = cudaDeviceReset();
169 CU_RET_ERR(cu_err, "cudaDeviceReset failed");
176 /*! \brief Initializes the GPU with the given index.
178 * The varible \mygpu is the index of the GPU to initialize in the
179 * gpu_info.cuda_dev array.
181 * \param[in] mygpu index of the GPU to initialize
182 * \param[out] result_str the message related to the error that occurred
183 * during the initialization (if there was any).
184 * \param[in] gpu_info GPU info of all detected devices in the system.
185 * \param[in] gpu_opt options for using the GPUs in gpu_info
186 * \returns true if no error occurs during initialization.
188 gmx_bool init_gpu(int mygpu, char *result_str,
189 const gmx_gpu_info_t *gpu_info,
190 const gmx_gpu_opt_t *gpu_opt)
199 if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
201 sprintf(sbuf, "Trying to initialize an inexistent GPU: "
202 "there are %d %s-selected GPU(s), but #%d was requested.",
203 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
207 gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
209 stat = cudaSetDevice(gpuid);
210 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
214 fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
217 return (stat == cudaSuccess);
220 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
222 * The context is explicitly destroyed and therefore all data uploaded to the GPU
223 * is lost. This should only be called when none of this data is required anymore.
225 * \param[out] result_str the message related to the error that occurred
226 * during the initialization (if there was any).
227 * \returns true if no error occurs during the freeing.
229 gmx_bool free_gpu(char *result_str)
238 stat = cudaGetDevice(&gpuid);
239 CU_RET_ERR(stat, "cudaGetDevice failed");
240 fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
243 #if CUDA_VERSION < 4000
244 stat = cudaThreadExit();
246 stat = cudaDeviceReset();
248 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
250 return (stat == cudaSuccess);
253 /*! \brief Returns true if the gpu characterized by the device properties is
254 * supported by the native gpu acceleration.
256 * \param[in] dev_prop the CUDA device properties of the gpus to test.
257 * \returns true if the GPU properties passed indicate a compatible
258 * GPU, otherwise false.
260 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
262 return (dev_prop->major >= 2);
265 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
267 * \param[in] stat GPU status.
268 * \returns true if the provided status is egpuCompatible, otherwise false.
270 static bool is_compatible_gpu(int stat)
272 return (stat == egpuCompatible);
275 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
277 * Returns a status value which indicates compatibility or one of the following
278 * errors: incompatibility, insistence, or insanity (=unexpected behavior).
279 * It also returns the respective device's properties in \dev_prop (if applicable).
281 * \param[in] dev_id the ID of the GPU to check.
282 * \param[out] dev_prop the CUDA device properties of the device checked.
283 * \returns the status of the requested device
285 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
290 stat = cudaGetDeviceCount(&ndev);
291 if (stat != cudaSuccess)
296 if (dev_id > ndev - 1)
298 return egpuNonexistent;
301 /* TODO: currently we do not make a distinction between the type of errors
302 * that can appear during sanity checks. This needs to be improved, e.g if
303 * the dummy test kernel fails to execute with a "device busy message" we
304 * should appropriately report that the device is busy instead of insane.
306 if (do_sanity_checks(dev_id, dev_prop) == 0)
308 if (is_gmx_supported_gpu(dev_prop))
310 return egpuCompatible;
314 return egpuIncompatible;
324 /*! \brief Detect all NVIDIA GPUs in the system.
326 * Will detect every NVIDIA GPU supported by the device driver in use. Also
327 * check for the compatibility of each and fill the gpu_info->cuda_dev array
328 * with the required information on each the device: ID, device properties,
331 * \param[in] gpu_info pointer to structure holding GPU information.
332 * \param[out] err_str The error message of any CUDA API error that caused
333 * the detection to fail (if there was any). The memory
334 * the pointer points to should be managed externally.
335 * \returns non-zero if the detection encountered a failure, zero otherwise.
337 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
339 int i, ndev, checkres, retval;
342 cuda_dev_info_t *devs;
347 gpu_info->ncuda_dev_compatible = 0;
352 stat = cudaGetDeviceCount(&ndev);
353 if (stat != cudaSuccess)
357 /* cudaGetDeviceCount failed which means that there is something
358 * wrong with the machine: driver-runtime mismatch, all GPUs being
359 * busy in exclusive mode, or some other condition which should
360 * result in us issuing a warning a falling back to CPUs. */
362 s = cudaGetErrorString(stat);
363 strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
368 for (i = 0; i < ndev; i++)
370 checkres = is_gmx_supported_gpu_id(i, &prop);
374 devs[i].stat = checkres;
376 if (checkres == egpuCompatible)
378 gpu_info->ncuda_dev_compatible++;
384 gpu_info->ncuda_dev = ndev;
385 gpu_info->cuda_dev = devs;
390 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
392 * This function selects the compatible gpus and initializes
393 * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
395 * Given the list of GPUs available in the system check each device in
396 * gpu_info->cuda_dev and place the indices of the compatible GPUs into
397 * cuda_dev_use with this marking the respective GPUs as "available for use."
398 * Note that \detect_cuda_gpus must have been called before.
400 * \param[in] gpu_info pointer to structure holding GPU information
401 * \param[in,out] gpu_opt pointer to structure holding GPU options
403 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
404 gmx_gpu_opt_t *gpu_opt)
410 /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
411 assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
413 snew(compat, gpu_info->ncuda_dev);
415 for (i = 0; i < gpu_info->ncuda_dev; i++)
417 if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
420 compat[ncompat - 1] = i;
424 gpu_opt->ncuda_dev_use = ncompat;
425 snew(gpu_opt->cuda_dev_use, ncompat);
426 memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
430 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
432 * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
433 * gpu_opt->cuda_dev_use check the existence and compatibility
434 * of the respective GPUs. Also provide the caller with an array containing
435 * the result of checks in \checkres.
437 * \param[out] checkres check result for each ID passed in \requested_devs
438 * \param[in] gpu_info pointer to structure holding GPU information
439 * \param[out] gpu_opt pointer to structure holding GPU options
440 * \returns TRUE if every the requested GPUs are compatible
442 gmx_bool check_selected_cuda_gpus(int *checkres,
443 const gmx_gpu_info_t *gpu_info,
444 gmx_gpu_opt_t *gpu_opt)
451 assert(gpu_opt->ncuda_dev_use >= 0);
453 if (gpu_opt->ncuda_dev_use == 0)
458 assert(gpu_opt->cuda_dev_use);
460 /* we will assume that all GPUs requested are valid IDs,
461 otherwise we'll bail anyways */
464 for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
466 id = gpu_opt->cuda_dev_use[i];
468 /* devices are stored in increasing order of IDs in cuda_dev */
469 gpu_opt->cuda_dev_use[i] = id;
471 checkres[i] = (id >= gpu_info->ncuda_dev) ?
472 egpuNonexistent : gpu_info->cuda_dev[id].stat;
474 bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
480 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
482 * \param[in] gpu_info pointer to structure holding GPU information
484 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
486 if (gpu_info == NULL)
491 sfree(gpu_info->cuda_dev);
494 /*! \brief Formats and returns a device information string for a given GPU.
496 * Given an index *directly* into the array of available GPUs (cuda_dev)
497 * returns a formatted info string for the respective GPU which includes
498 * ID, name, compute capability, and detection status.
500 * \param[out] s pointer to output string (has to be allocated externally)
501 * \param[in] gpu_info pointer to structure holding GPU information
502 * \param[in] index an index *directly* into the array of available GPUs
504 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
509 if (index < 0 && index >= gpu_info->ncuda_dev)
514 cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
517 dinfo->stat == egpuCompatible ||
518 dinfo->stat == egpuIncompatible;
522 sprintf(s, "#%d: %s, stat: %s",
524 gpu_detect_res_str[dinfo->stat]);
528 sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
529 dinfo->id, dinfo->prop.name,
530 dinfo->prop.major, dinfo->prop.minor,
531 dinfo->prop.ECCEnabled ? "yes" : " no",
532 gpu_detect_res_str[dinfo->stat]);
536 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
538 * Getter function which, given an index into the array of GPUs in use
539 * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
540 * respective CUDA GPU.
542 * \param[in] gpu_info pointer to structure holding GPU information
543 * \param[in] gpu_opt pointer to structure holding GPU options
544 * \param[in] idx index into the array of used GPUs
545 * \returns device ID of the requested GPU
547 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
548 const gmx_gpu_opt_t *gpu_opt,
553 assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
555 return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
558 /*! \brief Returns the device ID of the GPU currently in use.
560 * The GPU used is the one that is active at the time of the call in the active context.
562 * \param[in] gpu_info pointer to structure holding GPU information
563 * \returns device ID of the GPU in use at the time of the call
565 int get_current_gpu_device_id(void)
568 CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
573 /*! \brief Returns the size of the cuda_dev_info struct.
575 * The size of cuda_dev_info can be used for allocation and communication.
577 * \returns size in bytes of cuda_dev_info
579 size_t sizeof_cuda_dev_info(void)
581 return sizeof(cuda_dev_info);