2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Define functions for detection and initialization for OpenCL devices.
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
41 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \author Szilárd Páll <pall.szilard@gmail.com>
57 # include <sys/sysctl.h>
62 #include "gromacs/gpu_utils/ocl_compiler.h"
63 #include "gromacs/gpu_utils/oclraii.h"
64 #include "gromacs/gpu_utils/oclutils.h"
65 #include "gromacs/hardware/device_information.h"
66 #include "gromacs/hardware/device_management.h"
67 #include "gromacs/hardware/hw_info.h"
68 #include "gromacs/utility/cstringutil.h"
69 #include "gromacs/utility/exceptions.h"
70 #include "gromacs/utility/fatalerror.h"
71 #include "gromacs/utility/smalloc.h"
72 #include "gromacs/utility/stringutil.h"
74 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
76 * This is assumed to be true for OS X version of at least 10.10.4 and
77 * all other OS flavors.
79 * Uses the BSD sysctl() interfaces to extract the kernel version.
81 * \return true if version is 14.4 or later (= OS X version 10.10.4),
82 * or OS is not Darwin.
84 static bool runningOnCompatibleOSForAmd()
88 char kernelVersion[256];
89 size_t len = sizeof(kernelVersion);
92 mib[1] = KERN_OSRELEASE;
94 sysctl(mib, sizeof(mib) / sizeof(mib[0]), kernelVersion, &len, NULL, 0);
96 int major = strtod(kernelVersion, NULL);
97 int minor = strtod(strchr(kernelVersion, '.') + 1, NULL);
99 // Kernel 14.4 corresponds to OS X 10.10.4
100 return (major > 14 || (major == 14 && minor >= 4));
109 /*! \brief Make an error string following an OpenCL API call.
111 * It is meant to be called with \p status != CL_SUCCESS, but it will
112 * work correctly even if it is called with no OpenCL failure.
114 * \param[in] message Supplies context, e.g. the name of the API call that returned the error.
115 * \param[in] status OpenCL API status code
116 * \returns A string describing the OpenCL error.
118 static std::string makeOpenClInternalErrorString(const char* message, cl_int status)
120 if (message != nullptr)
122 return formatString("%s did %ssucceed %d: %s", message, ((status != CL_SUCCESS) ? "not " : ""),
123 status, ocl_get_error_string(status).c_str());
127 return formatString("%sOpenCL error encountered %d: %s", ((status != CL_SUCCESS) ? "" : "No "),
128 status, ocl_get_error_string(status).c_str());
133 * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
135 * Compiles and runs a dummy kernel to determine whether the given
136 * OpenCL device functions properly.
139 * \param[in] deviceInfo The device info pointer.
140 * \param[out] errorMessage An error message related to a failing OpenCL API call.
141 * \throws std::bad_alloc When out of memory.
142 * \returns Whether the device passed sanity checks
144 static bool isDeviceFunctional(const DeviceInformation* deviceInfo, std::string* errorMessage)
146 cl_context_properties properties[] = {
147 CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(deviceInfo->oclPlatformId), 0
149 // uncrustify spacing
152 auto deviceId = deviceInfo->oclDeviceId;
153 ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status));
154 if (status != CL_SUCCESS)
156 errorMessage->assign(makeOpenClInternalErrorString("clCreateContext", status));
159 ClCommandQueue commandQueue(clCreateCommandQueue(context, deviceId, 0, &status));
160 if (status != CL_SUCCESS)
162 errorMessage->assign(makeOpenClInternalErrorString("clCreateCommandQueue", status));
166 // Some compilers such as Apple's require kernel functions to have at least one argument
167 const char* lines[] = { "__kernel void dummyKernel(__global void* input){}" };
168 ClProgram program(clCreateProgramWithSource(context, 1, lines, nullptr, &status));
169 if (status != CL_SUCCESS)
171 errorMessage->assign(makeOpenClInternalErrorString("clCreateProgramWithSource", status));
175 if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS)
177 errorMessage->assign(makeOpenClInternalErrorString("clBuildProgram", status));
181 ClKernel kernel(clCreateKernel(program, "dummyKernel", &status));
182 if (status != CL_SUCCESS)
184 errorMessage->assign(makeOpenClInternalErrorString("clCreateKernel", status));
188 clSetKernelArg(kernel, 0, sizeof(void*), nullptr);
190 const size_t localWorkSize = 1, globalWorkSize = 1;
191 if ((status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr, &globalWorkSize,
192 &localWorkSize, 0, nullptr, nullptr))
195 errorMessage->assign(makeOpenClInternalErrorString("clEnqueueNDRangeKernel", status));
202 * \brief Checks that device \c deviceInfo is compatible with GROMACS.
204 * Vendor and OpenCL version support checks are executed an the result
207 * \param[in] deviceInfo The device info pointer.
208 * \returns The result of the compatibility checks.
210 static DeviceStatus isDeviceSupported(const DeviceInformation* deviceInfo)
212 if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
214 // Assume the device is compatible because checking has been disabled.
215 return DeviceStatus::Compatible;
218 // OpenCL device version check, ensure >= REQUIRED_OPENCL_MIN_VERSION
219 constexpr unsigned int minVersionMajor = REQUIRED_OPENCL_MIN_VERSION_MAJOR;
220 constexpr unsigned int minVersionMinor = REQUIRED_OPENCL_MIN_VERSION_MINOR;
222 // Based on the OpenCL spec we're checking the version supported by
223 // the device which has the following format:
224 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
225 unsigned int deviceVersionMinor, deviceVersionMajor;
226 const int valuesScanned = std::sscanf(deviceInfo->device_version, "OpenCL %u.%u",
227 &deviceVersionMajor, &deviceVersionMinor);
228 const bool versionLargeEnough =
229 ((valuesScanned == 2)
230 && ((deviceVersionMajor > minVersionMajor)
231 || (deviceVersionMajor == minVersionMajor && deviceVersionMinor >= minVersionMinor)));
232 if (!versionLargeEnough)
234 return DeviceStatus::Incompatible;
237 /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
238 switch (deviceInfo->deviceVendor)
240 case DeviceVendor::Nvidia: return DeviceStatus::Compatible;
241 case DeviceVendor::Amd:
242 return runningOnCompatibleOSForAmd() ? DeviceStatus::Compatible : DeviceStatus::Incompatible;
243 case DeviceVendor::Intel:
244 return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? DeviceStatus::Compatible
245 : DeviceStatus::IncompatibleClusterSize;
246 default: return DeviceStatus::Incompatible;
251 /*! \brief Check whether the \c ocl_gpu_device is suitable for use by mdrun
253 * Runs sanity checks: checking that the runtime can compile a dummy kernel
254 * and this can be executed;
255 * Runs compatibility checks verifying the device OpenCL version requirement
256 * and vendor/OS support.
258 * \param[in] deviceId The runtime-reported numeric ID of the device.
259 * \param[in] deviceInfo The device info pointer.
260 * \returns A DeviceStatus to indicate if the GPU device is supported and if it was able to run
261 * basic functionality checks.
263 static DeviceStatus checkGpu(size_t deviceId, const DeviceInformation* deviceInfo)
266 DeviceStatus supportStatus = isDeviceSupported(deviceInfo);
267 if (supportStatus != DeviceStatus::Compatible)
269 return supportStatus;
272 std::string errorMessage;
273 if (!isDeviceFunctional(deviceInfo, &errorMessage))
275 gmx_warning("While sanity checking device #%zu, %s", deviceId, errorMessage.c_str());
276 return DeviceStatus::NonFunctional;
279 return DeviceStatus::Compatible;
284 /*! \brief Returns an DeviceVendor value corresponding to the input OpenCL vendor name.
286 * \param[in] vendorName String with OpenCL vendor name.
287 * \returns DeviceVendor value for the input vendor name
289 static DeviceVendor getDeviceVendor(const char* vendorName)
293 if (strstr(vendorName, "NVIDIA"))
295 return DeviceVendor::Nvidia;
297 else if (strstr(vendorName, "AMD") || strstr(vendorName, "Advanced Micro Devices"))
299 return DeviceVendor::Amd;
301 else if (strstr(vendorName, "Intel"))
303 return DeviceVendor::Intel;
306 return DeviceVendor::Unknown;
309 bool isGpuDetectionFunctional(std::string* errorMessage)
311 cl_uint numPlatforms;
312 cl_int status = clGetPlatformIDs(0, nullptr, &numPlatforms);
313 GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
315 if (status == CL_PLATFORM_NOT_FOUND_KHR)
317 // No valid ICDs found
318 if (errorMessage != nullptr)
320 errorMessage->assign("No valid OpenCL driver found");
326 status == CL_SUCCESS,
327 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
328 status, ocl_get_error_string(status).c_str())
330 bool foundPlatform = (numPlatforms > 0);
331 if (!foundPlatform && errorMessage != nullptr)
333 errorMessage->assign("No OpenCL platforms found even though the driver was valid");
335 return foundPlatform;
338 void findGpus(gmx_gpu_info_t* gpu_info)
340 cl_uint ocl_platform_count;
341 cl_platform_id* ocl_platform_ids;
342 cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU;
344 ocl_platform_ids = nullptr;
346 if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
348 req_dev_type = CL_DEVICE_TYPE_CPU;
353 cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
354 if (CL_SUCCESS != status)
356 GMX_THROW(gmx::InternalError(
357 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status)
358 + ocl_get_error_string(status)));
361 if (1 > ocl_platform_count)
363 // TODO this should have a descriptive error message that we only support one OpenCL platform
367 snew(ocl_platform_ids, ocl_platform_count);
369 status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
370 if (CL_SUCCESS != status)
372 GMX_THROW(gmx::InternalError(
373 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status)
374 + ocl_get_error_string(status)));
377 for (unsigned int i = 0; i < ocl_platform_count; i++)
379 cl_uint ocl_device_count;
381 /* If requesting req_dev_type devices fails, just go to the next platform */
382 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
387 if (1 <= ocl_device_count)
389 gpu_info->n_dev += ocl_device_count;
393 if (1 > gpu_info->n_dev)
398 snew(gpu_info->deviceInfo, gpu_info->n_dev);
402 cl_device_id* ocl_device_ids;
404 snew(ocl_device_ids, gpu_info->n_dev);
407 for (unsigned int i = 0; i < ocl_platform_count; i++)
409 cl_uint ocl_device_count;
411 /* If requesting req_dev_type devices fails, just go to the next platform */
413 != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev,
414 ocl_device_ids, &ocl_device_count))
419 if (1 > ocl_device_count)
424 for (unsigned int j = 0; j < ocl_device_count; j++)
426 gpu_info->deviceInfo[device_index].oclPlatformId = ocl_platform_ids[i];
427 gpu_info->deviceInfo[device_index].oclDeviceId = ocl_device_ids[j];
429 gpu_info->deviceInfo[device_index].device_name[0] = 0;
430 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME,
431 sizeof(gpu_info->deviceInfo[device_index].device_name),
432 gpu_info->deviceInfo[device_index].device_name, nullptr);
434 gpu_info->deviceInfo[device_index].device_version[0] = 0;
435 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION,
436 sizeof(gpu_info->deviceInfo[device_index].device_version),
437 gpu_info->deviceInfo[device_index].device_version, nullptr);
439 gpu_info->deviceInfo[device_index].vendorName[0] = 0;
440 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR,
441 sizeof(gpu_info->deviceInfo[device_index].vendorName),
442 gpu_info->deviceInfo[device_index].vendorName, nullptr);
444 gpu_info->deviceInfo[device_index].compute_units = 0;
445 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS,
446 sizeof(gpu_info->deviceInfo[device_index].compute_units),
447 &(gpu_info->deviceInfo[device_index].compute_units), nullptr);
449 gpu_info->deviceInfo[device_index].adress_bits = 0;
450 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS,
451 sizeof(gpu_info->deviceInfo[device_index].adress_bits),
452 &(gpu_info->deviceInfo[device_index].adress_bits), nullptr);
454 gpu_info->deviceInfo[device_index].deviceVendor =
455 getDeviceVendor(gpu_info->deviceInfo[device_index].vendorName);
457 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t),
458 &gpu_info->deviceInfo[device_index].maxWorkItemSizes, nullptr);
460 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
461 &gpu_info->deviceInfo[device_index].maxWorkGroupSize, nullptr);
463 gpu_info->deviceInfo[device_index].stat =
464 gmx::checkGpu(device_index, gpu_info->deviceInfo + device_index);
466 if (DeviceStatus::Compatible == gpu_info->deviceInfo[device_index].stat)
468 gpu_info->n_dev_compatible++;
475 gpu_info->n_dev = device_index;
477 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
478 // TODO: Sort devices based on performance.
479 if (0 < gpu_info->n_dev)
482 for (int i = 0; i < gpu_info->n_dev; i++)
484 if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Amd)
490 std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]);
495 /* if more than 1 device left to be sorted */
496 if ((gpu_info->n_dev - 1 - last) > 1)
498 for (int i = 0; i < gpu_info->n_dev; i++)
500 if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Nvidia)
506 std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]);
513 sfree(ocl_device_ids);
519 sfree(ocl_platform_ids);
522 void init_gpu(const DeviceInformation* deviceInfo)
526 // If the device is NVIDIA, for safety reasons we disable the JIT
527 // caching as this is known to be broken at least until driver 364.19;
528 // the cache does not always get regenerated when the source code changes,
529 // e.g. if the path to the kernel sources remains the same
531 if (deviceInfo->deviceVendor == DeviceVendor::Nvidia)
533 // Ignore return values, failing to set the variable does not mean
534 // that something will go wrong later.
536 _putenv("CUDA_CACHE_DISABLE=1");
538 // Don't override, maybe a dev is testing.
539 setenv("CUDA_CACHE_DISABLE", "1", 0);
544 void free_gpu(const DeviceInformation* /* deviceInfo */) {}
546 DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
548 if (deviceId < 0 || deviceId >= gpu_info.n_dev)
550 gmx_incons("Invalid GPU deviceId requested");
552 return &gpu_info.deviceInfo[deviceId];
555 void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
559 if (index < 0 && index >= gpu_info.n_dev)
564 DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
567 (dinfo->stat != DeviceStatus::Nonexistent && dinfo->stat != DeviceStatus::NonFunctional);
571 sprintf(s, "#%d: %s, stat: %s", index, "N/A", c_deviceStateString[dinfo->stat]);
575 sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index, dinfo->device_name,
576 dinfo->vendorName, dinfo->device_version, c_deviceStateString[dinfo->stat]);
580 size_t sizeof_gpu_dev_info()
582 return sizeof(DeviceInformation);
585 DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
587 return info.deviceInfo[index].stat;