2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define functions for detection and initialization for OpenCL devices.
38 * \author Anca Hamuraru <anca@streamcomputing.eu>
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
52 # include <sys/sysctl.h>
57 #include "gromacs/gpu_utils/gpu_utils.h"
58 #include "gromacs/gpu_utils/ocl_compiler.h"
59 #include "gromacs/gpu_utils/oclutils.h"
60 #include "gromacs/hardware/hw_info.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/exceptions.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/stringutil.h"
67 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
69 * This is assumed to be true for OS X version of at least 10.10.4 and
70 * all other OS flavors.
72 * Uses the BSD sysctl() interfaces to extract the kernel version.
74 * \return true if version is 14.4 or later (= OS X version 10.10.4),
75 * or OS is not Darwin.
78 runningOnCompatibleOSForAmd()
82 char kernelVersion[256];
83 size_t len = sizeof(kernelVersion);
86 mib[1] = KERN_OSRELEASE;
88 sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
90 int major = strtod(kernelVersion, NULL);
91 int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
93 // Kernel 14.4 corresponds to OS X 10.10.4
94 return (major > 14 || (major == 14 && minor >= 4));
100 /*! \brief Returns true if the gpu characterized by the device properties is
101 * supported by the native gpu acceleration.
102 * \returns true if the GPU properties passed indicate a compatible
103 * GPU, otherwise false.
105 static int is_gmx_supported_gpu_id(gmx_device_info_t *ocl_gpu_device)
107 if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != nullptr)
109 return egpuCompatible;
112 /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
113 switch (ocl_gpu_device->vendor_e)
115 case OCL_VENDOR_NVIDIA:
116 return egpuCompatible;
118 return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
119 case OCL_VENDOR_INTEL:
120 return GMX_OCL_NB_CLUSTER_SIZE == 4 ? egpuCompatible : egpuIncompatibleClusterSize;
122 return egpuIncompatible;
127 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
129 * \param[in] vendor_name String with OpenCL vendor name.
130 * \returns ocl_vendor_id_t value for the input vendor_name
132 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
136 if (strstr(vendor_name, "NVIDIA"))
138 return OCL_VENDOR_NVIDIA;
141 if (strstr(vendor_name, "AMD") ||
142 strstr(vendor_name, "Advanced Micro Devices"))
144 return OCL_VENDOR_AMD;
147 if (strstr(vendor_name, "Intel"))
149 return OCL_VENDOR_INTEL;
152 return OCL_VENDOR_UNKNOWN;
156 //! This function is documented in the header file
157 bool canDetectGpus(std::string *errorMessage)
159 cl_uint numPlatforms;
160 cl_int status = clGetPlatformIDs(0, nullptr, &numPlatforms);
161 GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
163 if (status == CL_PLATFORM_NOT_FOUND_KHR)
165 // No valid ICDs found
166 if (errorMessage != nullptr)
168 errorMessage->assign("No valid OpenCL driver found");
173 GMX_RELEASE_ASSERT(status == CL_SUCCESS,
174 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
175 status, ocl_get_error_string(status).c_str()).c_str());
176 bool foundPlatform = (numPlatforms > 0);
177 if (!foundPlatform && errorMessage != nullptr)
179 errorMessage->assign("No OpenCL platforms found even though the driver was valid");
181 return foundPlatform;
184 //! This function is documented in the header file
185 void findGpus(gmx_gpu_info_t *gpu_info)
187 cl_uint ocl_platform_count;
188 cl_platform_id *ocl_platform_ids;
189 cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU;
191 ocl_platform_ids = nullptr;
193 if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
195 req_dev_type = CL_DEVICE_TYPE_CPU;
200 cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
201 if (CL_SUCCESS != status)
203 GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
204 status) + ocl_get_error_string(status)));
207 if (1 > ocl_platform_count)
209 // TODO this should have a descriptive error message that we only support one OpenCL platform
213 snew(ocl_platform_ids, ocl_platform_count);
215 status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
216 if (CL_SUCCESS != status)
218 GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
219 status) + ocl_get_error_string(status)));
222 for (unsigned int i = 0; i < ocl_platform_count; i++)
224 cl_uint ocl_device_count;
226 /* If requesting req_dev_type devices fails, just go to the next platform */
227 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
232 if (1 <= ocl_device_count)
234 gpu_info->n_dev += ocl_device_count;
238 if (1 > gpu_info->n_dev)
243 snew(gpu_info->gpu_dev, gpu_info->n_dev);
247 cl_device_id *ocl_device_ids;
249 snew(ocl_device_ids, gpu_info->n_dev);
252 for (unsigned int i = 0; i < ocl_platform_count; i++)
254 cl_uint ocl_device_count;
256 /* If requesting req_dev_type devices fails, just go to the next platform */
257 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
262 if (1 > ocl_device_count)
267 for (unsigned int j = 0; j < ocl_device_count; j++)
269 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
270 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j];
272 gpu_info->gpu_dev[device_index].device_name[0] = 0;
273 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, nullptr);
275 gpu_info->gpu_dev[device_index].device_version[0] = 0;
276 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr);
278 gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
279 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, nullptr);
281 gpu_info->gpu_dev[device_index].compute_units = 0;
282 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), nullptr);
284 gpu_info->gpu_dev[device_index].adress_bits = 0;
285 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
287 gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
289 gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
291 if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
293 gpu_info->n_dev_compatible++;
300 gpu_info->n_dev = device_index;
302 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
303 // TODO: Sort devices based on performance.
304 if (0 < gpu_info->n_dev)
307 for (int i = 0; i < gpu_info->n_dev; i++)
309 if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
315 gmx_device_info_t ocl_gpu_info;
316 ocl_gpu_info = gpu_info->gpu_dev[i];
317 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
318 gpu_info->gpu_dev[last] = ocl_gpu_info;
323 /* if more than 1 device left to be sorted */
324 if ((gpu_info->n_dev - 1 - last) > 1)
326 for (int i = 0; i < gpu_info->n_dev; i++)
328 if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
334 gmx_device_info_t ocl_gpu_info;
335 ocl_gpu_info = gpu_info->gpu_dev[i];
336 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
337 gpu_info->gpu_dev[last] = ocl_gpu_info;
344 sfree(ocl_device_ids);
350 sfree(ocl_platform_ids);
353 //! This function is documented in the header file
354 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int index)
358 if (index < 0 && index >= gpu_info.n_dev)
363 gmx_device_info_t *dinfo = &gpu_info.gpu_dev[index];
365 bool bGpuExists = (dinfo->stat != egpuNonexistent &&
366 dinfo->stat != egpuInsane);
370 sprintf(s, "#%d: %s, stat: %s",
372 gpu_detect_res_str[dinfo->stat]);
376 sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
377 index, dinfo->device_name, dinfo->device_vendor,
378 dinfo->device_version,
379 gpu_detect_res_str[dinfo->stat]);
383 //! This function is documented in the header file
384 void init_gpu(const gmx_device_info_t *deviceInfo)
388 // If the device is NVIDIA, for safety reasons we disable the JIT
389 // caching as this is known to be broken at least until driver 364.19;
390 // the cache does not always get regenerated when the source code changes,
391 // e.g. if the path to the kernel sources remains the same
393 if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
395 // Ignore return values, failing to set the variable does not mean
396 // that something will go wrong later.
398 _putenv("CUDA_CACHE_DISABLE=1");
400 // Don't override, maybe a dev is testing.
401 setenv("CUDA_CACHE_DISABLE", "1", 0);
406 //! This function is documented in the header file
407 gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
410 if (deviceId < 0 || deviceId >= gpu_info.n_dev)
412 gmx_incons("Invalid GPU deviceId requested");
414 return &gpu_info.gpu_dev[deviceId];
417 //! This function is documented in the header file
418 size_t sizeof_gpu_dev_info()
420 return sizeof(gmx_device_info_t);
423 void gpu_set_host_malloc_and_free(bool bUseGpuKernels,
424 gmx_host_alloc_t **nb_alloc,
425 gmx_host_free_t **nb_free)
429 *nb_alloc = &pmalloc;
439 int gpu_info_get_stat(const gmx_gpu_info_t &info, int index)
441 return info.gpu_dev[index].stat;