2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define functions for detection and initialization for OpenCL devices.
38 * \author Anca Hamuraru <anca@streamcomputing.eu>
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
52 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
53 #include "gromacs/gmxlib/gpu_utils/ocl_compiler.h"
54 #include "gromacs/gmxlib/ocl_tools/oclutils.h"
55 #include "gromacs/legacyheaders/types/enums.h"
56 #include "gromacs/legacyheaders/types/hw_info.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/smalloc.h"
61 /*! \brief Helper macro for error handling */
62 #define CALLOCLFUNC_LOGERROR(func, err_str, retval) { \
63 cl_int opencl_ret = func; \
64 if (CL_SUCCESS != opencl_ret) \
66 sprintf(err_str, "OpenCL error %d", opencl_ret); \
74 /*! \brief Helper function that checks whether a given GPU status indicates compatible GPU.
76 * \param[in] stat GPU status.
77 * \returns true if the provided status is egpuCompatible, otherwise false.
79 static bool is_compatible_gpu(int stat)
81 return (stat == egpuCompatible);
84 /*! \brief Returns true if the gpu characterized by the device properties is
85 * supported by the native gpu acceleration.
86 * \returns true if the GPU properties passed indicate a compatible
87 * GPU, otherwise false.
89 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
91 /* Only AMD and NVIDIA GPUs are supported for now */
92 if ((OCL_VENDOR_NVIDIA == ocl_gpu_device->vendor_e) ||
93 (OCL_VENDOR_AMD == ocl_gpu_device->vendor_e))
95 return egpuCompatible;
98 return egpuIncompatible;
101 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
103 * \param[in] vendor_name String with OpenCL vendor name.
104 * \returns ocl_vendor_id_t value for the input vendor_name
106 ocl_vendor_id_t get_vendor_id(char *vendor_name)
110 if (strstr(vendor_name, "NVIDIA"))
112 return OCL_VENDOR_NVIDIA;
115 if (strstr(vendor_name, "AMD") ||
116 strstr(vendor_name, "Advanced Micro Devices"))
118 return OCL_VENDOR_AMD;
121 if (strstr(vendor_name, "Intel"))
123 return OCL_VENDOR_INTEL;
126 return OCL_VENDOR_UNKNOWN;
130 //! This function is documented in the header file
131 int detect_gpus(gmx_gpu_info_t *gpu_info, char *err_str)
134 cl_uint ocl_platform_count;
135 cl_platform_id *ocl_platform_ids;
136 cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU;
139 ocl_platform_ids = NULL;
141 if (getenv("GMX_OCL_FORCE_CPU") != NULL)
143 req_dev_type = CL_DEVICE_TYPE_CPU;
148 CALLOCLFUNC_LOGERROR(clGetPlatformIDs(0, NULL, &ocl_platform_count), err_str, retval)
154 if (1 > ocl_platform_count)
159 snew(ocl_platform_ids, ocl_platform_count);
161 CALLOCLFUNC_LOGERROR(clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL), err_str, retval)
167 for (unsigned int i = 0; i < ocl_platform_count; i++)
169 cl_uint ocl_device_count;
171 /* If requesting req_dev_type devices fails, just go to the next platform */
172 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
177 if (1 <= ocl_device_count)
179 gpu_info->n_dev += ocl_device_count;
183 if (1 > gpu_info->n_dev)
188 snew(gpu_info->gpu_dev, gpu_info->n_dev);
192 cl_device_id *ocl_device_ids;
194 snew(ocl_device_ids, gpu_info->n_dev);
197 for (unsigned int i = 0; i < ocl_platform_count; i++)
199 cl_uint ocl_device_count;
201 /* If requesting req_dev_type devices fails, just go to the next platform */
202 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
207 if (1 > ocl_device_count)
212 for (unsigned int j = 0; j < ocl_device_count; j++)
214 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
215 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j];
217 gpu_info->gpu_dev[device_index].device_name[0] = 0;
218 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
220 gpu_info->gpu_dev[device_index].device_version[0] = 0;
221 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
223 gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
224 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
226 gpu_info->gpu_dev[device_index].compute_units = 0;
227 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
229 gpu_info->gpu_dev[device_index].adress_bits = 0;
230 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
232 gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
234 gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
236 if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
238 gpu_info->n_dev_compatible++;
245 gpu_info->n_dev = device_index;
247 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
248 // TODO: Sort devices based on performance.
249 if (0 < gpu_info->n_dev)
252 for (int i = 0; i < gpu_info->n_dev; i++)
254 if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
260 gmx_device_info_t ocl_gpu_info;
261 ocl_gpu_info = gpu_info->gpu_dev[i];
262 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
263 gpu_info->gpu_dev[last] = ocl_gpu_info;
268 /* if more than 1 device left to be sorted */
269 if ((gpu_info->n_dev - 1 - last) > 1)
271 for (int i = 0; i < gpu_info->n_dev; i++)
273 if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
279 gmx_device_info_t ocl_gpu_info;
280 ocl_gpu_info = gpu_info->gpu_dev[i];
281 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
282 gpu_info->gpu_dev[last] = ocl_gpu_info;
289 sfree(ocl_device_ids);
295 sfree(ocl_platform_ids);
300 //! This function is documented in the header file
301 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
305 for (int i = 0; i < gpu_info->n_dev; i++)
307 cl_int gmx_unused cl_error;
309 if (gpu_info->gpu_dev[i].context)
311 cl_error = clReleaseContext(gpu_info->gpu_dev[i].context);
312 gpu_info->gpu_dev[i].context = NULL;
313 assert(CL_SUCCESS == cl_error);
316 if (gpu_info->gpu_dev[i].program)
318 cl_error = clReleaseProgram(gpu_info->gpu_dev[i].program);
319 gpu_info->gpu_dev[i].program = NULL;
320 assert(CL_SUCCESS == cl_error);
324 sfree(gpu_info->gpu_dev);
328 //! This function is documented in the header file
329 void pick_compatible_gpus(const gmx_gpu_info_t *gpu_info,
330 gmx_gpu_opt_t *gpu_opt)
336 /* gpu_dev/n_dev have to be either NULL/0 or not (NULL/0) */
337 assert((gpu_info->n_dev != 0 ? 0 : 1) ^ (gpu_info->gpu_dev == NULL ? 0 : 1));
339 snew(compat, gpu_info->n_dev);
341 for (i = 0; i < gpu_info->n_dev; i++)
343 if (is_compatible_gpu(gpu_info->gpu_dev[i].stat))
346 compat[ncompat - 1] = i;
350 gpu_opt->n_dev_compatible = ncompat;
351 snew(gpu_opt->dev_compatible, ncompat);
352 memcpy(gpu_opt->dev_compatible, compat, ncompat*sizeof(*compat));
356 //! This function is documented in the header file
357 gmx_bool check_selected_gpus(int *checkres,
358 const gmx_gpu_info_t *gpu_info,
359 gmx_gpu_opt_t *gpu_opt)
366 assert(gpu_opt->n_dev_use >= 0);
368 if (gpu_opt->n_dev_use == 0)
373 assert(gpu_opt->dev_use);
375 /* we will assume that all GPUs requested are valid IDs,
376 otherwise we'll bail anyways */
379 for (i = 0; i < gpu_opt->n_dev_use; i++)
381 id = gpu_opt->dev_use[i];
383 /* devices are stored in increasing order of IDs in gpu_dev */
384 gpu_opt->dev_use[i] = id;
386 checkres[i] = (id >= gpu_info->n_dev) ?
387 egpuNonexistent : gpu_info->gpu_dev[id].stat;
389 bAllOk = bAllOk && is_compatible_gpu(checkres[i]);
395 //! This function is documented in the header file
396 void get_gpu_device_info_string(char gmx_unused *s, const gmx_gpu_info_t gmx_unused *gpu_info, int gmx_unused index)
401 if (index < 0 && index >= gpu_info->n_dev)
406 gmx_device_info_t *dinfo = &gpu_info->gpu_dev[index];
409 dinfo->stat == egpuCompatible ||
410 dinfo->stat == egpuIncompatible;
414 sprintf(s, "#%d: %s, stat: %s",
416 gpu_detect_res_str[dinfo->stat]);
420 sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
421 index, dinfo->device_name, dinfo->device_vendor,
422 dinfo->device_version,
423 gpu_detect_res_str[dinfo->stat]);
427 //! This function is documented in the header file
428 gmx_bool init_gpu(FILE gmx_unused *fplog,
431 const gmx_gpu_info_t gmx_unused *gpu_info,
432 const gmx_gpu_opt_t *gpu_opt
439 if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
442 sprintf(sbuf, "Trying to initialize an inexistent GPU: "
443 "there are %d %s-selected GPU(s), but #%d was requested.",
444 gpu_opt->n_dev_use, gpu_opt->bUserSet ? "user" : "auto", mygpu);
451 //! This function is documented in the header file
452 int get_gpu_device_id(const gmx_gpu_info_t *,
453 const gmx_gpu_opt_t *gpu_opt,
457 assert(idx >= 0 && idx < gpu_opt->n_dev_use);
459 return gpu_opt->dev_use[idx];
462 //! This function is documented in the header file
463 char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
464 const gmx_gpu_opt_t *gpu_opt,
469 assert(idx >= 0 && idx < gpu_opt->n_dev_use);
471 return gpu_info->gpu_dev[gpu_opt->dev_use[idx]].device_name;
474 //! This function is documented in the header file
475 size_t sizeof_gpu_dev_info(void)
477 return sizeof(gmx_device_info_t);
480 /*! \brief Prints the name of a kernel function pointer.
482 * \param[in] kernel OpenCL kernel
483 * \returns CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
485 cl_int dbg_ocl_kernel_name(const cl_kernel kernel)
488 char kernel_name[256];
489 cl_error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
490 sizeof(kernel_name), &kernel_name, NULL);
493 printf("No kernel found!\n");
497 printf("%s\n", kernel_name);
502 /*! \brief Prints the name of a kernel function pointer.
504 * \param[in] kernel OpenCL kernel
505 * \returns CL_SUCCESS if the operation was successful, an OpenCL error otherwise.
507 cl_int dbg_ocl_kernel_name_address(void* kernel)
510 char kernel_name[256];
511 cl_error = clGetKernelInfo((cl_kernel)kernel, CL_KERNEL_FUNCTION_NAME,
512 sizeof(kernel_name), &kernel_name, NULL);
515 printf("No kernel found!\n");
519 printf("%s\n", kernel_name);
524 void gpu_set_host_malloc_and_free(bool bUseGpuKernels,
525 gmx_host_alloc_t **nb_alloc,
526 gmx_host_free_t **nb_free)
530 *nb_alloc = &ocl_pmalloc;
531 *nb_free = &ocl_pfree;