2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines the OpenCL implementations of the device management.
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
41 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \author Szilárd Páll <pall.szilard@gmail.com>
44 * \author Artem Zhmurov <zhmurov@gmail.com>
46 * \ingroup module_hardware
51 # include <sys/sysctl.h>
57 # include <sys/sysctl.h>
60 #include "gromacs/gpu_utils/oclraii.h"
61 #include "gromacs/gpu_utils/oclutils.h"
62 #include "gromacs/hardware/device_management.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/stringutil.h"
67 #include "device_information.h"
72 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
74 * This is assumed to be true for OS X version of at least 10.10.4 and
75 * all other OS flavors.
77 * Uses the BSD sysctl() interfaces to extract the kernel version.
79 * \return true if version is 14.4 or later (= OS X version 10.10.4),
80 * or OS is not Darwin.
82 static bool runningOnCompatibleOSForAmd()
86 char kernelVersion[256];
87 size_t len = sizeof(kernelVersion);
90 mib[1] = KERN_OSRELEASE;
92 sysctl(mib, sizeof(mib) / sizeof(mib[0]), kernelVersion, &len, NULL, 0);
94 int major = strtod(kernelVersion, NULL);
95 int minor = strtod(strchr(kernelVersion, '.') + 1, NULL);
97 // Kernel 14.4 corresponds to OS X 10.10.4
98 return (major > 14 || (major == 14 && minor >= 4));
104 /*! \brief Return true if executing on compatible GPU for NVIDIA OpenCL.
106 * There are known issues with OpenCL when running on NVIDIA Volta or newer (CC 7+).
107 * As a workaround, we recommend using CUDA on such hardware.
109 * This function relies on cl_nv_device_attribute_query. In case it's not functioning properly,
110 * we trust the user and mark the device as compatible.
112 * \return true if running on Pascal (CC 6.x) or older, or if we can not determine device generation.
114 static bool runningOnCompatibleHWForNvidia(const DeviceInformation& deviceInfo)
116 // The macro is defined in Intel's and AMD's headers, but it's not strictly required to be there.
117 #ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
120 static const unsigned int ccMajorBad = 7; // Volta and Turing
121 unsigned int ccMajor;
122 cl_device_id devId = deviceInfo.oclDeviceId;
123 const cl_int err = clGetDeviceInfo(
124 devId, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(ccMajor), &ccMajor, nullptr);
125 if (err != CL_SUCCESS)
127 return true; // Err on a side of trusting the user to know what they are doing.
129 return ccMajor < ccMajorBad;
134 * \brief Checks that device \c deviceInfo is compatible with GROMACS.
136 * Vendor and OpenCL version support checks are executed an the result
139 * \param[in] deviceInfo The device info pointer.
140 * \returns The status enumeration value for the checked device:
142 static DeviceStatus isDeviceFunctional(const DeviceInformation& deviceInfo)
144 if (getenv("GMX_GPU_DISABLE_COMPATIBILITY_CHECK") != nullptr)
146 // Assume the device is compatible because checking has been disabled.
147 return DeviceStatus::Compatible;
149 if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
152 "Environment variable GMX_OCL_DISABLE_COMPATIBILITY_CHECK is deprecated and will "
153 "be removed in release 2022. Please use GMX_GPU_DISABLE_COMPATIBILITY_CHECK "
155 return DeviceStatus::Compatible;
158 // OpenCL device version check, ensure >= REQUIRED_OPENCL_MIN_VERSION
159 constexpr unsigned int minVersionMajor = REQUIRED_OPENCL_MIN_VERSION_MAJOR;
160 constexpr unsigned int minVersionMinor = REQUIRED_OPENCL_MIN_VERSION_MINOR;
162 // Based on the OpenCL spec we're checking the version supported by
163 // the device which has the following format:
164 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
165 unsigned int deviceVersionMinor, deviceVersionMajor;
166 const int valuesScanned = std::sscanf(
167 deviceInfo.device_version, "OpenCL %u.%u", &deviceVersionMajor, &deviceVersionMinor);
168 const bool versionLargeEnough =
169 ((valuesScanned == 2)
170 && ((deviceVersionMajor > minVersionMajor)
171 || (deviceVersionMajor == minVersionMajor && deviceVersionMinor >= minVersionMinor)));
172 if (!versionLargeEnough)
174 return DeviceStatus::Incompatible;
177 /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
178 switch (deviceInfo.deviceVendor)
180 case DeviceVendor::Nvidia:
181 return runningOnCompatibleHWForNvidia(deviceInfo) ? DeviceStatus::Compatible
182 : DeviceStatus::IncompatibleNvidiaVolta;
183 case DeviceVendor::Amd:
184 return runningOnCompatibleOSForAmd() ? DeviceStatus::Compatible : DeviceStatus::Incompatible;
185 case DeviceVendor::Intel:
186 return GMX_GPU_NB_CLUSTER_SIZE == 4 ? DeviceStatus::Compatible
187 : DeviceStatus::IncompatibleClusterSize;
188 default: return DeviceStatus::Incompatible;
192 /*! \brief Make an error string following an OpenCL API call.
194 * It is meant to be called with \p status != CL_SUCCESS, but it will
195 * work correctly even if it is called with no OpenCL failure.
197 * \todo Make use of this function more.
199 * \param[in] message Supplies context, e.g. the name of the API call that returned the error.
200 * \param[in] status OpenCL API status code
201 * \returns A string describing the OpenCL error.
203 inline std::string makeOpenClInternalErrorString(const char* message, cl_int status)
205 if (message != nullptr)
207 return gmx::formatString("%s did %ssucceed %d: %s",
209 ((status != CL_SUCCESS) ? "not " : ""),
211 ocl_get_error_string(status).c_str());
215 return gmx::formatString("%sOpenCL error encountered %d: %s",
216 ((status != CL_SUCCESS) ? "" : "No "),
218 ocl_get_error_string(status).c_str());
223 * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
225 * Compiles and runs a dummy kernel to determine whether the given
226 * OpenCL device functions properly.
229 * \param[in] deviceInfo The device info pointer.
230 * \param[out] errorMessage An error message related to a failing OpenCL API call.
231 * \throws std::bad_alloc When out of memory.
232 * \returns Whether the device passed sanity checks
234 static bool isDeviceFunctional(const DeviceInformation& deviceInfo, std::string* errorMessage)
236 cl_context_properties properties[] = {
237 CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(deviceInfo.oclPlatformId), 0
239 // uncrustify spacing
242 auto deviceId = deviceInfo.oclDeviceId;
243 ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status));
244 if (status != CL_SUCCESS)
246 errorMessage->assign(makeOpenClInternalErrorString("clCreateContext", status));
249 ClCommandQueue commandQueue(clCreateCommandQueue(context, deviceId, 0, &status));
250 if (status != CL_SUCCESS)
252 errorMessage->assign(makeOpenClInternalErrorString("clCreateCommandQueue", status));
256 // Some compilers such as Apple's require kernel functions to have at least one argument
257 const char* lines[] = { "__kernel void dummyKernel(__global void* input){}" };
258 ClProgram program(clCreateProgramWithSource(context, 1, lines, nullptr, &status));
259 if (status != CL_SUCCESS)
261 errorMessage->assign(makeOpenClInternalErrorString("clCreateProgramWithSource", status));
265 if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS)
267 errorMessage->assign(makeOpenClInternalErrorString("clBuildProgram", status));
271 ClKernel kernel(clCreateKernel(program, "dummyKernel", &status));
272 if (status != CL_SUCCESS)
274 errorMessage->assign(makeOpenClInternalErrorString("clCreateKernel", status));
278 clSetKernelArg(kernel, 0, sizeof(void*), nullptr);
280 const size_t localWorkSize = 1, globalWorkSize = 1;
281 if ((status = clEnqueueNDRangeKernel(
282 commandQueue, kernel, 1, nullptr, &globalWorkSize, &localWorkSize, 0, nullptr, nullptr))
285 errorMessage->assign(makeOpenClInternalErrorString("clEnqueueNDRangeKernel", status));
291 /*! \brief Check whether the \c ocl_gpu_device is suitable for use by mdrun
293 * Runs sanity checks: checking that the runtime can compile a dummy kernel
294 * and this can be executed;
295 * Runs compatibility checks verifying the device OpenCL version requirement
296 * and vendor/OS support.
298 * \param[in] deviceId The runtime-reported numeric ID of the device.
299 * \param[in] deviceInfo The device info pointer.
300 * \returns A DeviceStatus to indicate if the GPU device is supported and if it was able to run
301 * basic functionality checks.
303 static DeviceStatus checkGpu(size_t deviceId, const DeviceInformation& deviceInfo)
306 DeviceStatus supportStatus = isDeviceFunctional(deviceInfo);
307 if (supportStatus != DeviceStatus::Compatible)
309 return supportStatus;
312 std::string errorMessage;
313 if (!isDeviceFunctional(deviceInfo, &errorMessage))
315 gmx_warning("While sanity checking device #%zu, %s", deviceId, errorMessage.c_str());
316 return DeviceStatus::NonFunctional;
319 return DeviceStatus::Compatible;
324 bool isDeviceDetectionFunctional(std::string* errorMessage)
326 cl_uint numPlatforms;
327 cl_int status = clGetPlatformIDs(0, nullptr, &numPlatforms);
328 GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
330 if (status == CL_PLATFORM_NOT_FOUND_KHR)
332 // No valid ICDs found
333 if (errorMessage != nullptr)
335 errorMessage->assign("No valid OpenCL driver found");
341 status == CL_SUCCESS,
342 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
344 ocl_get_error_string(status).c_str())
346 bool foundPlatform = (numPlatforms > 0);
347 if (!foundPlatform && errorMessage != nullptr)
349 errorMessage->assign("No OpenCL platforms found even though the driver was valid");
351 return foundPlatform;
354 std::vector<std::unique_ptr<DeviceInformation>> findDevices()
356 cl_uint ocl_platform_count;
357 cl_platform_id* ocl_platform_ids;
358 cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU;
360 ocl_platform_ids = nullptr;
362 if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
364 req_dev_type = CL_DEVICE_TYPE_CPU;
368 std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList(0);
372 cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
373 if (CL_SUCCESS != status)
375 GMX_THROW(gmx::InternalError(
376 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status)
377 + ocl_get_error_string(status)));
380 if (1 > ocl_platform_count)
382 // TODO this should have a descriptive error message that we only support one OpenCL platform
386 snew(ocl_platform_ids, ocl_platform_count);
388 status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
389 if (CL_SUCCESS != status)
391 GMX_THROW(gmx::InternalError(
392 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status)
393 + ocl_get_error_string(status)));
396 for (unsigned int i = 0; i < ocl_platform_count; i++)
398 cl_uint ocl_device_count;
400 /* If requesting req_dev_type devices fails, just go to the next platform */
401 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
406 if (1 <= ocl_device_count)
408 numDevices += ocl_device_count;
417 deviceInfoList.resize(numDevices);
421 cl_device_id* ocl_device_ids;
423 snew(ocl_device_ids, numDevices);
426 for (unsigned int i = 0; i < ocl_platform_count; i++)
428 cl_uint ocl_device_count;
430 /* If requesting req_dev_type devices fails, just go to the next platform */
432 != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, numDevices, ocl_device_ids, &ocl_device_count))
437 if (1 > ocl_device_count)
442 for (unsigned int j = 0; j < ocl_device_count; j++)
444 deviceInfoList[device_index] = std::make_unique<DeviceInformation>();
446 deviceInfoList[device_index]->id = device_index;
448 deviceInfoList[device_index]->oclPlatformId = ocl_platform_ids[i];
449 deviceInfoList[device_index]->oclDeviceId = ocl_device_ids[j];
451 deviceInfoList[device_index]->device_name[0] = 0;
452 clGetDeviceInfo(ocl_device_ids[j],
454 sizeof(deviceInfoList[device_index]->device_name),
455 deviceInfoList[device_index]->device_name,
458 deviceInfoList[device_index]->device_version[0] = 0;
459 clGetDeviceInfo(ocl_device_ids[j],
461 sizeof(deviceInfoList[device_index]->device_version),
462 deviceInfoList[device_index]->device_version,
465 deviceInfoList[device_index]->vendorName[0] = 0;
466 clGetDeviceInfo(ocl_device_ids[j],
468 sizeof(deviceInfoList[device_index]->vendorName),
469 deviceInfoList[device_index]->vendorName,
472 deviceInfoList[device_index]->compute_units = 0;
473 clGetDeviceInfo(ocl_device_ids[j],
474 CL_DEVICE_MAX_COMPUTE_UNITS,
475 sizeof(deviceInfoList[device_index]->compute_units),
476 &(deviceInfoList[device_index]->compute_units),
479 deviceInfoList[device_index]->adress_bits = 0;
480 clGetDeviceInfo(ocl_device_ids[j],
481 CL_DEVICE_ADDRESS_BITS,
482 sizeof(deviceInfoList[device_index]->adress_bits),
483 &(deviceInfoList[device_index]->adress_bits),
486 deviceInfoList[device_index]->deviceVendor =
487 getDeviceVendor(deviceInfoList[device_index]->vendorName);
489 clGetDeviceInfo(ocl_device_ids[j],
490 CL_DEVICE_MAX_WORK_ITEM_SIZES,
492 &deviceInfoList[device_index]->maxWorkItemSizes,
495 clGetDeviceInfo(ocl_device_ids[j],
496 CL_DEVICE_MAX_WORK_GROUP_SIZE,
498 &deviceInfoList[device_index]->maxWorkGroupSize,
501 deviceInfoList[device_index]->status =
502 gmx::checkGpu(device_index, *deviceInfoList[device_index]);
508 numDevices = device_index;
510 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
511 // TODO: Sort devices based on performance.
515 for (int i = 0; i < numDevices; i++)
517 if (deviceInfoList[i]->deviceVendor == DeviceVendor::Amd)
523 std::swap(deviceInfoList[i], deviceInfoList[last]);
528 /* if more than 1 device left to be sorted */
529 if ((numDevices - 1 - last) > 1)
531 for (int i = 0; i < numDevices; i++)
533 if (deviceInfoList[i]->deviceVendor == DeviceVendor::Nvidia)
539 std::swap(deviceInfoList[i], deviceInfoList[last]);
546 sfree(ocl_device_ids);
552 sfree(ocl_platform_ids);
553 return deviceInfoList;
556 void setActiveDevice(const DeviceInformation& deviceInfo)
558 // If the device is NVIDIA, for safety reasons we disable the JIT
559 // caching as this is known to be broken at least until driver 364.19;
560 // the cache does not always get regenerated when the source code changes,
561 // e.g. if the path to the kernel sources remains the same
563 if (deviceInfo.deviceVendor == DeviceVendor::Nvidia)
565 // Ignore return values, failing to set the variable does not mean
566 // that something will go wrong later.
568 _putenv("CUDA_CACHE_DISABLE=1");
570 // Don't override, maybe a dev is testing.
571 setenv("CUDA_CACHE_DISABLE", "1", 0);
576 void releaseDevice(DeviceInformation* /* deviceInfo */) {}
578 std::string getDeviceInformationString(const DeviceInformation& deviceInfo)
580 bool gpuExists = (deviceInfo.status != DeviceStatus::Nonexistent
581 && deviceInfo.status != DeviceStatus::NonFunctional);
585 return gmx::formatString(
586 "#%d: %s, status: %s", deviceInfo.id, "N/A", c_deviceStateString[deviceInfo.status]);
590 return gmx::formatString("#%d: name: %s, vendor: %s, device version: %s, status: %s",
592 deviceInfo.device_name,
593 deviceInfo.vendorName,
594 deviceInfo.device_version,
595 c_deviceStateString[deviceInfo.status]);