2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "types/hw_info.h"
42 #include "gpu_utils.h"
43 #include "../cuda_tools/cudautils.cuh"
44 #include "memtestG80_core.h"
46 #include "gromacs/utility/cstringutil.h"
47 #include "gromacs/utility/smalloc.h"
49 /** Amount of memory to be used in quick memtest. */
51 /** Bit flag with type of tests to run in quick memtest. */
52 #define QUICK_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS
53 /** Number of iterations in quick memtest. */
56 /** Bitflag with all test set on for full memetest. */
57 #define FULL_TESTS 0x3FFF
58 /** Number of iterations in full memtest. */
61 /** Bit flag with type of tests to run in time constrained memtest. */
62 #define TIMED_TESTS MOD_20_32BIT | LOGIC_4_ITER_SHMEM | RANDOM_BLOCKS
65 * Max number of devices supported by CUDA (for consistency checking).
67 * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
69 static int cuda_max_device_count = 32;
71 /** Dummy kernel used for sanity checking. */
72 __global__ void k_dummy_test()
77 /** Bit-flags which refer to memtestG80 test types and are used in do_memtest
78 * to specify which tests to run. */
79 enum memtest_G80_test_types {
80 MOVING_INVERSIONS_10 = 0x1,
81 MOVING_INVERSIONS_RAND = 0x2,
82 WALKING_8BIT_M86 = 0x4,
84 WALKING_1_8BIT = 0x10,
85 WALKING_0_32BIT = 0x20,
86 WALKING_1_32BIT = 0x40,
91 LOGIC_1_ITER_SHMEM = 0x800,
92 LOGIC_4_ITER_SHMEM = 0x1000
97 * \brief Runs GPU sanity checks.
99 * Runs a series of checks to determine that the given GPU and underlying CUDA
100 * driver/runtime functions properly.
101 * Returns properties of a device with given ID or the one that has
102 * already been initialized earlier in the case if of \dev_id == -1.
104 * \param[in] dev_id the device ID of the GPU or -1 if the device has already been initialized
105 * \param[out] dev_prop pointer to the structure in which the device properties will be returned
106 * \returns 0 if the device looks OK
108 * TODO: introduce errors codes and handle errors more smoothly.
110 static int do_sanity_checks(int dev_id, cudaDeviceProp *dev_prop)
115 cu_err = cudaGetDeviceCount(&dev_count);
116 if (cu_err != cudaSuccess)
118 fprintf(stderr, "Error %d while querying device count: %s\n", cu_err,
119 cudaGetErrorString(cu_err));
123 /* no CUDA compatible device at all */
129 /* things might go horribly wrong if cudart is not compatible with the driver */
130 if (dev_count < 0 || dev_count > cuda_max_device_count)
135 if (dev_id == -1) /* device already selected let's not destroy the context */
137 cu_err = cudaGetDevice(&id);
138 if (cu_err != cudaSuccess)
140 fprintf(stderr, "Error %d while querying device id: %s\n", cu_err,
141 cudaGetErrorString(cu_err));
148 if (id > dev_count - 1) /* pfff there's no such device */
150 fprintf(stderr, "The requested device with id %d does not seem to exist (device count=%d)\n",
156 memset(dev_prop, 0, sizeof(cudaDeviceProp));
157 cu_err = cudaGetDeviceProperties(dev_prop, id);
158 if (cu_err != cudaSuccess)
160 fprintf(stderr, "Error %d while querying device properties: %s\n", cu_err,
161 cudaGetErrorString(cu_err));
165 /* both major & minor is 9999 if no CUDA capable devices are present */
166 if (dev_prop->major == 9999 && dev_prop->minor == 9999)
170 /* we don't care about emulation mode */
171 if (dev_prop->major == 0)
178 cu_err = cudaSetDevice(id);
179 if (cu_err != cudaSuccess)
181 fprintf(stderr, "Error %d while switching to device #%d: %s\n",
182 cu_err, id, cudaGetErrorString(cu_err));
187 /* try to execute a dummy kernel */
188 k_dummy_test<<< 1, 512>>> ();
189 if (cudaThreadSynchronize() != cudaSuccess)
194 /* destroy context if we created one */
197 #if CUDA_VERSION < 4000
198 cu_err = cudaThreadExit();
199 CU_RET_ERR(cu_err, "cudaThreadExit failed");
201 cu_err = cudaDeviceReset();
202 CU_RET_ERR(cu_err, "cudaDeviceReset failed");
211 * \brief Runs a set of memory tests specified by the given bit-flags.
212 * Tries to allocate and do the test on \p megs Mb memory or
213 * the greatest amount that can be allocated (>10Mb).
214 * In case if an error is detected it stops without finishing the remaining
215 * steps/iterations and returns greater then zero value.
216 * In case of other errors (e.g. kernel launch errors, device querying errors)
219 * \param[in] which_tests variable with bit-flags of the requested tests
220 * \param[in] megs amount of memory that will be tested in MB
221 * \param[in] iter number of iterations
222 * \returns 0 if no error was detected, otherwise >0
224 static int do_memtest(unsigned int which_tests, int megs, int iter)
228 uint err_count; //, err_iter;
230 // no parameter check as this fn won't be called externally
232 // let's try to allocate the mem
233 while (!tester.allocate(megs) && (megs - 10 > 0))
235 megs -= 10; tester.deallocate();
240 fprintf(stderr, "Unable to allocate GPU memory!\n");
244 // clear the first 18 bits
245 which_tests &= 0x3FFF;
246 for (i = 0; i < iter; i++)
248 // Moving Inversions (ones and zeros)
249 if ((MOVING_INVERSIONS_10 & which_tests) == MOVING_INVERSIONS_10)
251 tester.gpuMovingInversionsOnesZeros(err_count);
254 return MOVING_INVERSIONS_10;
257 // Moving Inversions (random)
258 if ((MOVING_INVERSIONS_RAND & which_tests) == MOVING_INVERSIONS_RAND)
260 tester.gpuMovingInversionsRandom(err_count);
263 return MOVING_INVERSIONS_RAND;
266 // Memtest86 Walking 8-bit
267 if ((WALKING_8BIT_M86 & which_tests) == WALKING_8BIT_M86)
269 for (uint shift = 0; shift < 8; shift++)
271 tester.gpuWalking8BitM86(err_count, shift);
274 return WALKING_8BIT_M86;
278 // True Walking zeros (8-bit)
279 if ((WALKING_0_8BIT & which_tests) == WALKING_0_8BIT)
281 for (uint shift = 0; shift < 8; shift++)
283 tester.gpuWalking8Bit(err_count, false, shift);
286 return WALKING_0_8BIT;
290 // True Walking ones (8-bit)
291 if ((WALKING_1_8BIT & which_tests) == WALKING_1_8BIT)
293 for (uint shift = 0; shift < 8; shift++)
295 tester.gpuWalking8Bit(err_count, true, shift);
298 return WALKING_1_8BIT;
302 // Memtest86 Walking zeros (32-bit)
303 if ((WALKING_0_32BIT & which_tests) == WALKING_0_32BIT)
305 for (uint shift = 0; shift < 32; shift++)
307 tester.gpuWalking32Bit(err_count, false, shift);
310 return WALKING_0_32BIT;
314 // Memtest86 Walking ones (32-bit)
315 if ((WALKING_1_32BIT & which_tests) == WALKING_1_32BIT)
317 for (uint shift = 0; shift < 32; shift++)
319 tester.gpuWalking32Bit(err_count, true, shift);
322 return WALKING_1_32BIT;
327 if ((RANDOM_BLOCKS & which_tests) == RANDOM_BLOCKS)
329 tester.gpuRandomBlocks(err_count, rand());
332 return RANDOM_BLOCKS;
337 // Memtest86 Modulo-20
338 if ((MOD_20_32BIT & which_tests) == MOD_20_32BIT)
340 for (uint shift = 0; shift < 20; shift++)
342 tester.gpuModuloX(err_count, shift, rand(), 20, 2);
349 // Logic (one iteration)
350 if ((LOGIC_1_ITER & which_tests) == LOGIC_1_ITER)
352 tester.gpuShortLCG0(err_count, 1);
358 // Logic (4 iterations)
359 if ((LOGIC_4_ITER & which_tests) == LOGIC_4_ITER)
361 tester.gpuShortLCG0(err_count, 4);
368 // Logic (shared memory, one iteration)
369 if ((LOGIC_1_ITER_SHMEM & which_tests) == LOGIC_1_ITER_SHMEM)
371 tester.gpuShortLCG0Shmem(err_count, 1);
374 return LOGIC_1_ITER_SHMEM;
377 // Logic (shared-memory, 4 iterations)
378 if ((LOGIC_4_ITER_SHMEM & which_tests) == LOGIC_4_ITER_SHMEM)
380 tester.gpuShortLCG0Shmem(err_count, 4);
383 return LOGIC_4_ITER_SHMEM;
392 /*! \brief Runs a quick memory test and returns 0 in case if no error is detected.
393 * If an error is detected it stops before completing the test and returns a
394 * value greater then 0. In case of other errors (e.g. kernel launch errors,
395 * device querying errors) -1 is returned.
397 * \param[in] dev_id the device id of the GPU or -1 if the device has already been selected
398 * \returns 0 if no error was detected, otherwise >0
400 int do_quick_memtest(int dev_id)
402 cudaDeviceProp dev_prop;
403 int devmem, res, time = 0;
407 time = getTimeMilliseconds();
410 if (do_sanity_checks(dev_id, &dev_prop) != 0)
412 // something went wrong
418 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
419 fprintf(debug, ">> Running QUICK memtests on %d MiB (out of total %d MiB), %d iterations\n",
420 QUICK_MEM, devmem, QUICK_ITER);
423 res = do_memtest(QUICK_TESTS, QUICK_MEM, QUICK_ITER);
427 fprintf(debug, "Q-RES = %d\n", res);
428 fprintf(debug, "Q-runtime: %d ms\n", getTimeMilliseconds() - time);
431 /* destroy context only if we created it */
439 /*! \brief Runs a full memory test and returns 0 in case if no error is detected.
440 * If an error is detected it stops before completing the test and returns a
441 * value greater then 0. In case of other errors (e.g. kernel launch errors,
442 * device querying errors) -1 is returned.
444 * \param[in] dev_id the device id of the GPU or -1 if the device has already been selected
445 * \returns 0 if no error was detected, otherwise >0
448 int do_full_memtest(int dev_id)
450 cudaDeviceProp dev_prop;
451 int devmem, res, time = 0;
455 time = getTimeMilliseconds();
458 if (do_sanity_checks(dev_id, &dev_prop) != 0)
460 // something went wrong
464 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
468 fprintf(debug, ">> Running FULL memtests on %d MiB (out of total %d MiB), %d iterations\n",
469 devmem, devmem, FULL_ITER);
472 /* do all test on the entire memory */
473 res = do_memtest(FULL_TESTS, devmem, FULL_ITER);
477 fprintf(debug, "F-RES = %d\n", res);
478 fprintf(debug, "F-runtime: %d ms\n", getTimeMilliseconds() - time);
481 /* destroy context only if we created it */
489 /*! \brief Runs a time constrained memory test and returns 0 in case if no error is detected.
490 * If an error is detected it stops before completing the test and returns a value greater
491 * than zero. In case of other errors (e.g. kernel launch errors, device querying errors) -1
492 * is returned. Note, that test iterations are not interrupted therefor the total runtime of
493 * the test will always be multipple of one iteration's runtime.
495 * \param[in] dev_id the device id of the GPU or -1 if the device has laredy been selected
496 * \param[in] time_constr the time limit of the testing
497 * \returns 0 if no error was detected, otherwise >0
499 int do_timed_memtest(int dev_id, int time_constr)
501 cudaDeviceProp dev_prop;
502 int devmem, res = 0, time = 0, startt;
506 time = getTimeMilliseconds();
509 time_constr *= 1000; /* convert to ms for convenience */
510 startt = getTimeMilliseconds();
512 if (do_sanity_checks(dev_id, &dev_prop) != 0)
514 // something went wrong
518 devmem = dev_prop.totalGlobalMem/(1024*1024); // in MiB
522 fprintf(debug, ">> Running time constrained memtests on %d MiB (out of total %d MiB), time limit of %d s \n",
523 devmem, devmem, time_constr);
526 /* do the TIMED_TESTS set, one step at a time on the entire memory
527 that can be allocated, and stop when the given time is exceeded */
528 while ( ((int)getTimeMilliseconds() - startt) < time_constr)
530 res = do_memtest(TIMED_TESTS, devmem, 1);
539 fprintf(debug, "T-RES = %d\n", res);
540 fprintf(debug, "T-runtime: %d ms\n", getTimeMilliseconds() - time);
543 /* destroy context only if we created it */
551 /*! \brief Initializes the GPU with the given index.
553 * The varible \mygpu is the index of the GPU to initialize in the
554 * gpu_info.cuda_dev array.
556 * \param[in] mygpu index of the GPU to initialize
557 * \param[out] result_str the message related to the error that occurred
558 * during the initialization (if there was any).
559 * \param[in] gpu_info GPU info of all detected devices in the system.
560 * \param[in] gpu_opt options for using the GPUs in gpu_info
561 * \returns true if no error occurs during initialization.
563 gmx_bool init_gpu(int mygpu, char *result_str,
564 const gmx_gpu_info_t *gpu_info,
565 const gmx_gpu_opt_t *gpu_opt)
574 if (mygpu < 0 || mygpu >= gpu_opt->ncuda_dev_use)
576 sprintf(sbuf, "Trying to initialize an inexistent GPU: "
577 "there are %d %s-selected GPU(s), but #%d was requested.",
578 gpu_opt->ncuda_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
582 gpuid = gpu_info->cuda_dev[gpu_opt->cuda_dev_use[mygpu]].id;
584 stat = cudaSetDevice(gpuid);
585 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
589 fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->cuda_dev[gpuid].prop.name);
592 return (stat == cudaSuccess);
595 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
597 * The context is explicitly destroyed and therefore all data uploaded to the GPU
598 * is lost. This should only be called when none of this data is required anymore.
600 * \param[out] result_str the message related to the error that occurred
601 * during the initialization (if there was any).
602 * \returns true if no error occurs during the freeing.
604 gmx_bool free_gpu(char *result_str)
613 stat = cudaGetDevice(&gpuid);
614 CU_RET_ERR(stat, "cudaGetDevice failed");
615 fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
618 #if CUDA_VERSION < 4000
619 stat = cudaThreadExit();
621 stat = cudaDeviceReset();
623 strncpy(result_str, cudaGetErrorString(stat), STRLEN);
625 return (stat == cudaSuccess);
628 /*! \brief Returns true if the gpu characterized by the device properties is
629 * supported by the native gpu acceleration.
631 * \param[in] dev_prop the CUDA device properties of the gpus to test.
632 * \returns true if the GPU properties passed indicate a compatible
633 * GPU, otherwise false.
635 static bool is_gmx_supported_gpu(const cudaDeviceProp *dev_prop)
637 return (dev_prop->major >= 2);
640 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
642 * \param[in] stat GPU status.
643 * \returns true if the provided status is egpuCompatible, otherwise false.
645 static bool is_compatible_gpu(int stat)
647 return (stat == egpuCompatible);
650 /*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
652 * Returns a status value which indicates compatibility or one of the following
653 * errors: incompatibility, insistence, or insanity (=unexpected behavior).
654 * It also returns the respective device's properties in \dev_prop (if applicable).
656 * \param[in] dev_id the ID of the GPU to check.
657 * \param[out] dev_prop the CUDA device properties of the device checked.
658 * \returns the status of the requested device
660 static int is_gmx_supported_gpu_id(int dev_id, cudaDeviceProp *dev_prop)
665 stat = cudaGetDeviceCount(&ndev);
666 if (stat != cudaSuccess)
671 if (dev_id > ndev - 1)
673 return egpuNonexistent;
676 /* TODO: currently we do not make a distinction between the type of errors
677 * that can appear during sanity checks. This needs to be improved, e.g if
678 * the dummy test kernel fails to execute with a "device busy message" we
679 * should appropriately report that the device is busy instead of insane.
681 if (do_sanity_checks(dev_id, dev_prop) == 0)
683 if (is_gmx_supported_gpu(dev_prop))
685 return egpuCompatible;
689 return egpuIncompatible;
699 /*! \brief Detect all NVIDIA GPUs in the system.
701 * Will detect every NVIDIA GPU supported by the device driver in use. Also
702 * check for the compatibility of each and fill the gpu_info->cuda_dev array
703 * with the required information on each the device: ID, device properties,
706 * \param[in] gpu_info pointer to structure holding GPU information.
707 * \param[out] err_str The error message of any CUDA API error that caused
708 * the detection to fail (if there was any). The memory
709 * the pointer points to should be managed externally.
710 * \returns non-zero if the detection encountered a failure, zero otherwise.
712 int detect_cuda_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
714 int i, ndev, checkres, retval;
717 cuda_dev_info_t *devs;
722 gpu_info->ncuda_dev_compatible = 0;
727 stat = cudaGetDeviceCount(&ndev);
728 if (stat != cudaSuccess)
732 /* cudaGetDeviceCount failed which means that there is something
733 * wrong with the machine: driver-runtime mismatch, all GPUs being
734 * busy in exclusive mode, or some other condition which should
735 * result in us issuing a warning a falling back to CPUs. */
737 s = cudaGetErrorString(stat);
738 strncpy(err_str, s, STRLEN*sizeof(err_str[0]));
743 for (i = 0; i < ndev; i++)
745 checkres = is_gmx_supported_gpu_id(i, &prop);
749 devs[i].stat = checkres;
751 if (checkres == egpuCompatible)
753 gpu_info->ncuda_dev_compatible++;
759 gpu_info->ncuda_dev = ndev;
760 gpu_info->cuda_dev = devs;
765 /*! \brief Select the GPUs compatible with the native GROMACS acceleration.
767 * This function selects the compatible gpus and initializes
768 * gpu_info->cuda_dev_use and gpu_info->ncuda_dev_use.
770 * Given the list of GPUs available in the system check each device in
771 * gpu_info->cuda_dev and place the indices of the compatible GPUs into
772 * cuda_dev_use with this marking the respective GPUs as "available for use."
773 * Note that \detect_cuda_gpus must have been called before.
775 * \param[in] gpu_info pointer to structure holding GPU information
776 * \param[in,out] gpu_opt pointer to structure holding GPU options
778 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
779 gmx_gpu_opt_t *gpu_opt)
785 /* cuda_dev/ncuda_dev have to be either NULL/0 or not (NULL/0) */
786 assert((gpu_info->ncuda_dev != 0 ? 0 : 1) ^ (gpu_info->cuda_dev == NULL ? 0 : 1));
788 snew(compat, gpu_info->ncuda_dev);
790 for (i = 0; i < gpu_info->ncuda_dev; i++)
792 if (is_compatible_gpu(gpu_info->cuda_dev[i].stat))
795 compat[ncompat - 1] = i;
799 gpu_opt->ncuda_dev_use = ncompat;
800 snew(gpu_opt->cuda_dev_use, ncompat);
801 memcpy(gpu_opt->cuda_dev_use, compat, ncompat*sizeof(*compat));
805 /*! \brief Check the existence/compatibility of a set of GPUs specified by their device IDs.
807 * Given the a list of gpu->ncuda_dev_use GPU device IDs stored in
808 * gpu_opt->cuda_dev_use check the existence and compatibility
809 * of the respective GPUs. Also provide the caller with an array containing
810 * the result of checks in \checkres.
812 * \param[out] checkres check result for each ID passed in \requested_devs
813 * \param[in] gpu_info pointer to structure holding GPU information
814 * \param[out] gpu_opt pointer to structure holding GPU options
815 * \returns TRUE if every the requested GPUs are compatible
817 gmx_bool check_selected_cuda_gpus(int *checkres,
818 const gmx_gpu_info_t *gpu_info,
819 gmx_gpu_opt_t *gpu_opt)
826 assert(gpu_opt->ncuda_dev_use >= 0);
828 if (gpu_opt->ncuda_dev_use == 0)
833 assert(gpu_opt->cuda_dev_use);
835 /* we will assume that all GPUs requested are valid IDs,
836 otherwise we'll bail anyways */
839 for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
841 id = gpu_opt->cuda_dev_use[i];
843 /* devices are stored in increasing order of IDs in cuda_dev */
844 gpu_opt->cuda_dev_use[i] = id;
846 checkres[i] = (id >= gpu_info->ncuda_dev) ?
847 egpuNonexistent : gpu_info->cuda_dev[id].stat;
849 bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
855 /*! \brief Frees the cuda_dev and cuda_dev_use array fields of \gpu_info.
857 * \param[in] gpu_info pointer to structure holding GPU information
859 void free_gpu_info(const gmx_gpu_info_t *gpu_info)
861 if (gpu_info == NULL)
866 sfree(gpu_info->cuda_dev);
869 /*! \brief Formats and returns a device information string for a given GPU.
871 * Given an index *directly* into the array of available GPUs (cuda_dev)
872 * returns a formatted info string for the respective GPU which includes
873 * ID, name, compute capability, and detection status.
875 * \param[out] s pointer to output string (has to be allocated externally)
876 * \param[in] gpu_info pointer to structure holding GPU information
877 * \param[in] index an index *directly* into the array of available GPUs
879 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t *gpu_info, int index)
884 if (index < 0 && index >= gpu_info->ncuda_dev)
889 cuda_dev_info_t *dinfo = &gpu_info->cuda_dev[index];
892 dinfo->stat == egpuCompatible ||
893 dinfo->stat == egpuIncompatible;
897 sprintf(s, "#%d: %s, stat: %s",
899 gpu_detect_res_str[dinfo->stat]);
903 sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s",
904 dinfo->id, dinfo->prop.name,
905 dinfo->prop.major, dinfo->prop.minor,
906 dinfo->prop.ECCEnabled ? "yes" : " no",
907 gpu_detect_res_str[dinfo->stat]);
911 /*! \brief Returns the device ID of the GPU with a given index into the array of used GPUs.
913 * Getter function which, given an index into the array of GPUs in use
914 * (cuda_dev_use) -- typically a tMPI/MPI rank --, returns the device ID of the
915 * respective CUDA GPU.
917 * \param[in] gpu_info pointer to structure holding GPU information
918 * \param[in] gpu_opt pointer to structure holding GPU options
919 * \param[in] idx index into the array of used GPUs
920 * \returns device ID of the requested GPU
922 int get_gpu_device_id(const gmx_gpu_info_t *gpu_info,
923 const gmx_gpu_opt_t *gpu_opt,
928 assert(idx >= 0 && idx < gpu_opt->ncuda_dev_use);
930 return gpu_info->cuda_dev[gpu_opt->cuda_dev_use[idx]].id;
933 /*! \brief Returns the device ID of the GPU currently in use.
935 * The GPU used is the one that is active at the time of the call in the active context.
937 * \param[in] gpu_info pointer to structure holding GPU information
938 * \returns device ID of the GPU in use at the time of the call
940 int get_current_gpu_device_id(void)
943 CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
948 /*! \brief Returns the size of the cuda_dev_info struct.
950 * The size of cuda_dev_info can be used for allocation and communication.
952 * \returns size in bytes of cuda_dev_info
954 size_t sizeof_cuda_dev_info(void)
956 return sizeof(cuda_dev_info);