1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2010, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
42 #include "types/hw_info.h"
44 #include "gpu_utils.h"
45 #include "../cuda_tools/cudautils.cuh"
46 #include "memtestG80_core.h"
49 #define QUICK_MEM 250 /*!< Amount of memory to be used in quick memtest. */
50 #define QUICK_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS /*!< Bit flag with type of tests
51 to run in quick memtest. */
52 #define QUICK_ITER 3 /*!< Number of iterations in quick memtest. */
54 #define FULL_TESTS 0x3FFF /*!< Bitflag with all test set on for full memetest. */
55 #define FULL_ITER 25 /*!< Number of iterations in full memtest. */
57 #define TIMED_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS /*!< Bit flag with type of tests to
58 run in time constrained memtest. */
60 /*! Number of supported GPUs */
61 #define NB_GPUS (sizeof(SupportedGPUs)/sizeof(SupportedGPUs[0]))
63 static int cuda_max_device_count = 32; /*! Max number of devices supported by CUDA (for consistency checking).
64 In reality it 16 with CUDA <=v5.0, but let's stay on the safe side. */
66 /*! Dummy kernel used for sanity checking. */
67 __global__ void k_dummy_test(){}
70 /*! Bit-flags which refer to memtestG80 test types and are used in do_memtest to specify which tests to run. */
71 enum memtest_G80_test_types {
72 MOVING_INVERSIONS_10 = 0x1,
73 MOVING_INVERSIONS_RAND = 0x2,
74 WALKING_8BIT_M86 = 0x4,
76 WALKING_1_8BIT = 0x10,
77 WALKING_0_32BIT = 0x20,
78 WALKING_1_32BIT = 0x40,
83 LOGIC_1_ITER_SHMEM = 0x800,
84 LOGIC_4_ITER_SHMEM = 0x1000
87 // TODO put this list into an external file and include it so that the list is easily accessible
88 /*! List of supported GPUs. */
89 static const char * const SupportedGPUs[] = {
125 "Quadro Plex 2200 D2",
126 "Quadro Plex 2200 S4",
129 "GeForce 9800 G", /* GX2, GTX, GTX+, GT */
133 "Quadro Plex 2100 D4"
138 * \brief Runs GPU sanity checks.
140 * Runs a series of checks to determine that the given GPU and underlying CUDA
141 * driver/runtime functions properly.
142 * Returns properties of a device with given ID or the one that has
143 * already been initialized earlier in the case if of \dev_id == -1.
145 * \param[in] dev_id the device ID of the GPU or -1 if the device has already been initialized
146 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
147 * \returns 0 if the device looks OK
149 * TODO: introduce errors codes and handle errors more smoothly.
151 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
156 cu_err = cudaGetDeviceCount(&dev_count);
157 if (cu_err != cudaSuccess)
159 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
160 cudaGetErrorString(cu_err));
164 /* no CUDA compatible device at all */
168 /* things might go horribly wrong if cudart is not compatible with the driver */
169 if (dev_count < 0 || dev_count > cuda_max_device_count)
172 if (dev_id == -1) /* device already selected let's not destroy the context */
174 cu_err = cudaGetDevice(&id);
175 if (cu_err != cudaSuccess)
177 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
178 cudaGetErrorString(cu_err));
185 if (id > dev_count - 1) /* pfff there's no such device */
187 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
193 memset(dev_prop, 0, sizeof(cudaDeviceProp));
194 cu_err = cudaGetDeviceProperties(dev_prop, id);
195 if (cu_err != cudaSuccess)
197 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
198 cudaGetErrorString(cu_err));
202 /* both major & minor is 9999 if no CUDA capable devices are present */
203 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
205 /* we don't care about emulation mode */
206 if (dev_prop->major == 0)
211 cu_err = cudaSetDevice(id);
212 if (cu_err != cudaSuccess)
214 fprintf(stderr, "Error %d while switching to device #%d: %s\n",
215 cu_err, id, cudaGetErrorString(cu_err));
220 /* try to execute a dummy kernel */
221 k_dummy_test<<<1, 512>>>();
222 if (cudaThreadSynchronize() != cudaSuccess)
227 /* destroy context if we created one */
230 #if CUDA_VERSION < 4000
231 cu_err = cudaThreadExit();
232 CU_RET_ERR(cu_err, "cudaThreadExit failed");
234 cu_err = cudaDeviceReset();
235 CU_RET_ERR(cu_err, "cudaDeviceReset failed");
244 * \brief Checks whether the GPU with the given name is supported in Gromacs-OpenMM.
246 * \param[in] gpu_name the name of the CUDA device
247 * \returns TRUE if the device is supported, otherwise FALSE
249 static bool is_gmx_openmm_supported_gpu_name(char *gpuName)
252 for (i = 0; i < NB_GPUS; i++)
255 if (gmx_strncasecmp(gpuName, SupportedGPUs[i], strlen(SupportedGPUs[i])) == 0)
261 /*! \brief Checks whether the GPU with the given device id is supported in Gromacs-OpenMM.
263 * \param[in] dev_id the device id of the GPU or -1 if the device has already been selected
264 * \param[out] gpu_name Set to contain the name of the CUDA device, if NULL passed, no device name is set.
265 * \returns TRUE if the device is supported, otherwise FALSE
268 gmx_bool is_gmx_openmm_supported_gpu(int dev_id, char *gpu_name)
270 cudaDeviceProp dev_prop;
272 if (debug) fprintf(debug, "Checking compatibility with device #%d, %s\n", dev_id, gpu_name);
274 if (do_sanity_checks(dev_id, &dev_prop) != 0)
277 if (gpu_name != NULL)
279 strcpy(gpu_name, dev_prop.name);
281 return is_gmx_openmm_supported_gpu_name(dev_prop.name);
286 * \brief Runs a set of memory tests specified by the given bit-flags.
287 * Tries to allocate and do the test on \p megs Mb memory or
288 * the greatest amount that can be allocated (>10Mb).
289 * In case if an error is detected it stops without finishing the remaining
290 * steps/iterations and returns greater then zero value.
291 * In case of other errors (e.g. kernel launch errors, device querying errors)
294 * \param[in] which_tests variable with bit-flags of the requested tests
295 * \param[in] megs amount of memory that will be tested in MB
296 * \param[in] iter number of iterations
297 * \returns 0 if no error was detected, otherwise >0
299 static int do_memtest(unsigned int which_tests, int megs, int iter)
303 uint err_count; //, err_iter;
305 // no parameter check as this fn won't be called externally
307 // let's try to allocate the mem
308 while (!tester.allocate(megs) && (megs - 10 > 0))
309 { megs -= 10; tester.deallocate(); }
313 fprintf(stderr, "Unable to allocate GPU memory!\n");
317 // clear the first 18 bits
318 which_tests &= 0x3FFF;
319 for (i = 0; i < iter; i++)
321 // Moving Inversions (ones and zeros)
322 if ((MOVING_INVERSIONS_10 & which_tests) == MOVING_INVERSIONS_10)
324 tester.gpuMovingInversionsOnesZeros(err_count);
326 return MOVING_INVERSIONS_10;
328 // Moving Inversions (random)
329 if ((MOVING_INVERSIONS_RAND & which_tests) == MOVING_INVERSIONS_RAND)
331 tester.gpuMovingInversionsRandom(err_count);
333 return MOVING_INVERSIONS_RAND;
335 // Memtest86 Walking 8-bit
336 if ((WALKING_8BIT_M86 & which_tests) == WALKING_8BIT_M86)
338 for (uint shift = 0; shift < 8; shift++)
340 tester.gpuWalking8BitM86(err_count, shift);
342 return WALKING_8BIT_M86;
345 // True Walking zeros (8-bit)
346 if ((WALKING_0_8BIT & which_tests) == WALKING_0_8BIT)
348 for (uint shift = 0; shift < 8; shift++)
350 tester.gpuWalking8Bit(err_count, false, shift);
352 return WALKING_0_8BIT;
355 // True Walking ones (8-bit)
356 if ((WALKING_1_8BIT & which_tests) == WALKING_1_8BIT)
358 for (uint shift = 0; shift < 8; shift++)
360 tester.gpuWalking8Bit(err_count, true, shift);
362 return WALKING_1_8BIT;
365 // Memtest86 Walking zeros (32-bit)
366 if ((WALKING_0_32BIT & which_tests) == WALKING_0_32BIT)
368 for (uint shift = 0; shift < 32; shift++)
370 tester.gpuWalking32Bit(err_count, false, shift);
372 return WALKING_0_32BIT;
375 // Memtest86 Walking ones (32-bit)
376 if ((WALKING_1_32BIT & which_tests) == WALKING_1_32BIT)
378 for (uint shift = 0; shift < 32; shift++)
380 tester.gpuWalking32Bit(err_count, true, shift);
382 return WALKING_1_32BIT;
386 if ((RANDOM_BLOCKS & which_tests) == RANDOM_BLOCKS)
388 tester.gpuRandomBlocks(err_count,rand());
390 return RANDOM_BLOCKS;
394 // Memtest86 Modulo-20
395 if ((MOD_20_32BIT & which_tests) == MOD_20_32BIT)
397 for (uint shift = 0; shift < 20; shift++)
399 tester.gpuModuloX(err_count, shift, rand(), 20, 2);
404 // Logic (one iteration)
405 if ((LOGIC_1_ITER & which_tests) == LOGIC_1_ITER)
407 tester.gpuShortLCG0(err_count,1);
411 // Logic (4 iterations)
412 if ((LOGIC_4_ITER & which_tests) == LOGIC_4_ITER)
414 tester.gpuShortLCG0(err_count,4);
419 // Logic (shared memory, one iteration)
420 if ((LOGIC_1_ITER_SHMEM & which_tests) == LOGIC_1_ITER_SHMEM)
422 tester.gpuShortLCG0Shmem(err_count,1);
424 return LOGIC_1_ITER_SHMEM;
426 // Logic (shared-memory, 4 iterations)
427 if ((LOGIC_4_ITER_SHMEM & which_tests) == LOGIC_4_ITER_SHMEM)
429 tester.gpuShortLCG0Shmem(err_count,4);
431 return LOGIC_4_ITER_SHMEM;
439 /*! \brief Runs a quick memory test and returns 0 in case if no error is detected.
440 * If an error is detected it stops before completing the test and returns a
441 * value greater then 0. In case of other errors (e.g. kernel launch errors,
442 * device querying errors) -1 is returned.
444 * \param[in] dev_id the device id of the GPU or -1 if the device has already been selected
445 * \returns 0 if no error was detected, otherwise >0
447 int do_quick_memtest(int dev_id)
449 cudaDeviceProp dev_prop;
450 int devmem, res, time=0;
452 if (debug) { time = getTimeMilliseconds(); }
454 if (do_sanity_checks(dev_id, &dev_prop) != 0)
456 // something went wrong
462 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
463 fprintf(debug, ">> Running QUICK memtests on %d MiB (out of total %d MiB), %d iterations\n",
464 QUICK_MEM, devmem, QUICK_ITER);
467 res = do_memtest(QUICK_TESTS, QUICK_MEM, QUICK_ITER);
471 fprintf(debug, "Q-RES = %d\n", res);
472 fprintf(debug, "Q-runtime: %d ms\n", getTimeMilliseconds() - time);
475 /* destroy context only if we created it */
476 if (dev_id !=-1) cudaThreadExit();
480 /*! \brief Runs a full memory test and returns 0 in case if no error is detected.
481 * If an error is detected it stops before completing the test and returns a
482 * value greater then 0. In case of other errors (e.g. kernel launch errors,
483 * device querying errors) -1 is returned.
485 * \param[in] dev_id the device id of the GPU or -1 if the device has already been selected
486 * \returns 0 if no error was detected, otherwise >0
489 int do_full_memtest(int dev_id)
491 cudaDeviceProp dev_prop;
492 int devmem, res, time=0;
494 if (debug) { time = getTimeMilliseconds(); }
496 if (do_sanity_checks(dev_id, &dev_prop) != 0)
498 // something went wrong
502 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
506 fprintf(debug, ">> Running FULL memtests on %d MiB (out of total %d MiB), %d iterations\n",
507 devmem, devmem, FULL_ITER);
510 /* do all test on the entire memory */
511 res = do_memtest(FULL_TESTS, devmem, FULL_ITER);
515 fprintf(debug, "F-RES = %d\n", res);
516 fprintf(debug, "F-runtime: %d ms\n", getTimeMilliseconds() - time);
519 /* destroy context only if we created it */
520 if (dev_id != -1) cudaThreadExit();
524 /*! \brief Runs a time constrained memory test and returns 0 in case if no error is detected.
525 * If an error is detected it stops before completing the test and returns a value greater
526 * than zero. In case of other errors (e.g. kernel launch errors, device querying errors) -1
527 * is returned. Note, that test iterations are not interrupted therefor the total runtime of
528 * the test will always be multipple of one iteration's runtime.
530 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
531 * \param[in] time_constr the time limit of the testing
532 * \returns 0 if no error was detected, otherwise >0
534 int do_timed_memtest(int dev_id, int time_constr)
536 cudaDeviceProp dev_prop;
537 int devmem, res=0, time=0, startt;
539 if (debug) { time = getTimeMilliseconds(); }
541 time_constr *= 1000; /* convert to ms for convenience */
542 startt = getTimeMilliseconds();
544 if (do_sanity_checks(dev_id, &dev_prop) != 0)
546 // something went wrong
550 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
554 fprintf(debug, ">> Running time constrained memtests on %d MiB (out of total %d MiB), time limit of %d s \n",
555 devmem, devmem, time_constr);
558 /* do the TIMED_TESTS set, one step at a time on the entire memory
559 that can be allocated, and stop when the given time is exceeded */
560 while ( ((int)getTimeMilliseconds() - startt) < time_constr)
562 res = do_memtest(TIMED_TESTS, devmem, 1);
568 fprintf(debug, "T-RES = %d\n", res);
569 fprintf(debug, "T-runtime: %d ms\n", getTimeMilliseconds() - time);
572 /* destroy context only if we created it */
573 if (dev_id != -1) cudaThreadExit();
577 /*! \brief Initializes the GPU with the given index.
579 * The varible \mygpu is the index of the GPU to initialize in the
580 * gpu_info.cuda_dev array.
582 * \param[in] mygpu index of the GPU to initialize
583 * \param[out] result_str the message related to the error that occurred
584 * during the initialization (if there was any).
585 * \param[in] gpu_info GPU info of all detected devices in the system.
586 * \returns true if no error occurs during initialization.
588 gmx_bool init_gpu(int mygpu, char *result_str, const gmx_gpu_info_t *gpu_info)
597 if (mygpu < 0 || mygpu >= gpu_info->ncuda_dev_use)
599 sprintf(sbuf, "Trying to initialize an inexistent GPU: "
600 "there are %d %s-selected GPU(s), but #%d was requested.",
601 gpu_info->ncuda_dev_use, gpu_info->bUserSet ? "user" : "auto", mygpu);
605 gpuid = gpu_info->cuda_dev[gpu_info->cuda_dev_use[mygpu]].id;
607 stat = cudaSetDevice(gpuid);
608 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
612 fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
615 return (stat == cudaSuccess);
618 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
620 * The context is explicitly destroyed and therefore all data uploaded to the GPU
621 * is lost. This should only be called when none of this data is required anymore.
623 * \param[out] result_str the message related to the error that occurred
624 * during the initialization (if there was any).
625 * \returns true if no error occurs during the freeing.
627 gmx_bool free_gpu(char *result_str)
636 stat = cudaGetDevice(&gpuid);
637 CU_RET_ERR(stat, "cudaGetDevice failed");
638 fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
641 #if CUDA_VERSION < 4000
642 stat = cudaThreadExit();
644 stat = cudaDeviceReset();
646 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
648 return (stat == cudaSuccess);
651 /*! \brief Returns true if the gpu characterized by the device properties is
652 * supported by the native gpu acceleration.
654 * \param[in] dev_prop the CUDA device properties of the gpus to test.
655 * \returns true if the GPU properties passed indicate a compatible
656 * GPU, otherwise false.
658 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
660 return (dev_prop->major >= 2);
663 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
665 * \param[in] stat GPU status.
666 * \returns true if the provided status is egpuCompatible, otherwise false.
668 static bool is_compatible_gpu(int stat)
670 return (stat == egpuCompatible);
673 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
675 * Returns a status value which indicates compatibility or one of the following
676 * errors: incompatibility, insistence, or insanity (=unexpected behavior).
677 * It also returns the respective device's properties in \dev_prop (if applicable).
679 * \param[in] dev_id the ID of the GPU to check.
680 * \param[out] dev_prop the CUDA device properties of the device checked.
681 * \returns the status of the requested device
683 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
688 stat = cudaGetDeviceCount(&ndev);
689 if (stat != cudaSuccess)
694 if (dev_id > ndev - 1)
696 return egpuNonexistent;
699 /* TODO: currently we do not make a distinction between the type of errors
700 * that can appear during sanity checks. This needs to be improved, e.g if
701 * the dummy test kernel fails to execute with a "device busy message" we
702 * should appropriately report that the device is busy instead of insane.
704 if (do_sanity_checks(dev_id, dev_prop) == 0)
706 if (is_gmx_supported_gpu(dev_prop))
708 return egpuCompatible;
712 return egpuIncompatible;
722 /*! \brief Detect all NVIDIA GPUs in the system.
724 * Will detect every NVIDIA GPU supported by the device driver in use. Also
725 * check for the compatibility of each and fill the gpu_info->cuda_dev array
726 * with the required information on each the device: ID, device properties,
729 * \param[in] gpu_info pointer to structure holding GPU information.
730 * \param[out] err_str The error message of any CUDA API error that caused
731 * the detection to fail (if there was any). The memory
732 * the pointer points to should be managed externally.
733 * \returns non-zero if the detection encountered a failure, zero otherwise.
735 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
737 int i, ndev, checkres, retval;
740 cuda_dev_info_t *devs;
748 stat = cudaGetDeviceCount(&ndev);
749 if (stat != cudaSuccess)
753 /* cudaGetDeviceCount failed which means that there is something
754 * wrong with the machine: driver-runtime mismatch, all GPUs being
755 * busy in exclusive mode, or some other condition which should
756 * result in us issuing a warning a falling back to CPUs. */
758 s = cudaGetErrorString(stat);
759 strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
764 for (i = 0; i < ndev; i++)
766 checkres = is_gmx_supported_gpu_id(i, &prop);
770 devs[i].stat = checkres;
775 gpu_info->ncuda_dev = ndev;
776 gpu_info->cuda_dev = devs;
781 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
783 * This function selects the compatible gpus and initializes
784 * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
786 * Given the list of GPUs available in the system the it checks each gpu in
787 * gpu_info->cuda_dev and puts the the indices (into gpu_info->cuda_dev) of
788 * the compatible ones into cuda_dev_use with this marking the respective
789 * GPUs as "available for use."
790 * Note that \detect_cuda_gpus must have been called before.
792 * \param[in] gpu_info pointer to structure holding GPU information
794 void pick_compatible_gpus(gmx_gpu_info_t *gpu_info)
800 /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
801 assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
803 snew(compat, gpu_info->ncuda_dev);
805 for (i = 0; i < gpu_info->ncuda_dev; i++)
807 if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
810 compat[ncompat - 1] = i;
814 gpu_info->ncuda_dev_use = ncompat;
815 snew(gpu_info->cuda_dev_use, ncompat);
816 memcpy(gpu_info->cuda_dev_use, compat, ncompat*sizeof(*compat));
820 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
822 * Given the a list of GPU devide IDs in \requested_devs, check for the
823 * existence and compatibility of the respective GPUs and fill in \gpu_info
824 * with the collected information. Also provide the caller with an array with
825 * the result of checks in \checkres.
827 * \param[out] checkres check result for each ID passed in \requested_devs
828 * \param[in] gpu_info pointer to structure holding GPU information
829 * \param[in] requested_devs array of requested device IDs
830 * \param[in] count number of IDs in \requested_devs
831 * \returns TRUE if every requested GPU is compatible
833 gmx_bool check_select_cuda_gpus(int *checkres, gmx_gpu_info_t *gpu_info,
834 const int *requested_devs, int count)
841 assert(requested_devs);
849 /* we will assume that all GPUs requested are valid IDs,
850 otherwise we'll bail anyways */
851 gpu_info->ncuda_dev_use = count;
852 snew(gpu_info->cuda_dev_use, count);
855 for (i = 0; i < count; i++)
857 id = requested_devs[i];
859 /* devices are stored in increasing order of IDs in cuda_dev */
860 gpu_info->cuda_dev_use[i] = id;
862 checkres[i] = (id >= gpu_info->ncuda_dev) ?
863 egpuNonexistent : gpu_info->cuda_dev[id].stat;
865 bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
871 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
873 * \param[in] gpu_info pointer to structure holding GPU information
875 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
877 if (gpu_info == NULL)
882 sfree(gpu_info->cuda_dev_use);
883 sfree(gpu_info->cuda_dev);
886 /*! \brief Formats and returns a device information string for a given GPU.
888 * Given an index *directly* into the array of available GPUs (cuda_dev)
889 * returns a formatted info string for the respective GPU which includes
890 * ID, name, compute capability, and detection status.
892 * \param[out] s pointer to output string (has to be allocated externally)
893 * \param[in] gpu_info pointer to structure holding GPU information
894 * \param[in] index an index *directly* into the array of available GPUs
896 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
901 if (index < 0 && index >= gpu_info->ncuda_dev)
906 cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
909 dinfo->stat == egpuCompatible ||
910 dinfo->stat == egpuIncompatible;
914 sprintf(s, "#%d: %s, stat: %s",
916 gpu_detect_res_str[dinfo->stat]);
920 sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
921 dinfo->id, dinfo->prop.name,
922 dinfo->prop.major, dinfo->prop.minor,
923 dinfo->prop.ECCEnabled ? "yes" : " no",
924 gpu_detect_res_str[dinfo->stat]);
928 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
930 * Getter function which, given an index into the array of GPUs in use
931 * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
932 * respective CUDA GPU.
934 * \param[in] gpu_info pointer to structure holding GPU information
935 * \param[in] idx index into the array of used GPUs
936 * \returns device ID of the requested GPU
938 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info, int idx)
941 if (idx < 0 && idx >= gpu_info->ncuda_dev_use)
946 return gpu_info->cuda_dev[gpu_info->cuda_dev_use[idx]].id;
949 /*! \brief Returns the device ID of the GPU currently in use.
951 * The GPU used is the one that is active at the time of the call in the active context.
953 * \param[in] gpu_info pointer to structure holding GPU information
954 * \returns device ID of the GPU in use at the time of the call
956 int get_current_gpu_device_id(void)
959 CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");