2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define functions for detection and initialization for OpenCL devices.
38 * \author Anca Hamuraru <anca@streamcomputing.eu>
39 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \author Szilárd Páll <pall.szilard@gmail.com>
56 # include <sys/sysctl.h>
61 #include "gromacs/gpu_utils/gpu_utils.h"
62 #include "gromacs/gpu_utils/ocl_compiler.h"
63 #include "gromacs/gpu_utils/oclraii.h"
64 #include "gromacs/gpu_utils/oclutils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/utility/cstringutil.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/smalloc.h"
70 #include "gromacs/utility/stringutil.h"
72 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
74 * This is assumed to be true for OS X version of at least 10.10.4 and
75 * all other OS flavors.
77 * Uses the BSD sysctl() interfaces to extract the kernel version.
79 * \return true if version is 14.4 or later (= OS X version 10.10.4),
80 * or OS is not Darwin.
83 runningOnCompatibleOSForAmd()
87 char kernelVersion[256];
88 size_t len = sizeof(kernelVersion);
91 mib[1] = KERN_OSRELEASE;
93 sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
95 int major = strtod(kernelVersion, NULL);
96 int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
98 // Kernel 14.4 corresponds to OS X 10.10.4
99 return (major > 14 || (major == 14 && minor >= 4));
108 /*! \brief Make an error string following an OpenCL API call.
110 * It is meant to be called with \p status != CL_SUCCESS, but it will
111 * work correctly even if it is called with no OpenCL failure.
113 * \param[in] message Supplies context, e.g. the name of the API call that returned the error.
114 * \param[in] status OpenCL API status code
115 * \returns A string describing the OpenCL error.
118 makeOpenClInternalErrorString(const char *message, cl_int status)
120 if (message != nullptr)
122 return formatString("%s did %ssucceed %d: %s",
124 ((status != CL_SUCCESS) ? "not " : ""),
125 status, ocl_get_error_string(status).c_str());
129 return formatString("%sOpenCL error encountered %d: %s",
130 ((status != CL_SUCCESS) ? "" : "No "),
131 status, ocl_get_error_string(status).c_str());
136 * \brief Checks that device \c devInfo is sane (ie can run a kernel).
138 * Compiles and runs a dummy kernel to determine whether the given
139 * OpenCL device functions properly.
142 * \param[in] devInfo The device info pointer.
143 * \param[out] errorMessage An error message related to a failing OpenCL API call.
144 * \throws std::bad_alloc When out of memory.
145 * \returns Whether the device passed sanity checks
147 static bool isDeviceSane(const gmx_device_info_t *devInfo,
148 std::string *errorMessage)
150 cl_context_properties properties[] = {
152 (cl_context_properties) devInfo->ocl_gpu_id.ocl_platform_id,
155 // uncrustify spacing
158 auto deviceId = devInfo->ocl_gpu_id.ocl_device_id;
159 ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status));
160 if (status != CL_SUCCESS)
162 errorMessage->assign(makeOpenClInternalErrorString("clCreateContext", status));
165 ClCommandQueue commandQueue(clCreateCommandQueue(context, deviceId, 0, &status));
166 if (status != CL_SUCCESS)
168 errorMessage->assign(makeOpenClInternalErrorString("clCreateCommandQueue", status));
172 // Some compilers such as Apple's require kernel functions to have at least one argument
173 const char *lines[] = { "__kernel void dummyKernel(__global void* input){}" };
174 ClProgram program(clCreateProgramWithSource(context, 1, lines, nullptr, &status));
175 if (status != CL_SUCCESS)
177 errorMessage->assign(makeOpenClInternalErrorString("clCreateProgramWithSource", status));
181 if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS)
183 errorMessage->assign(makeOpenClInternalErrorString("clBuildProgram", status));
187 ClKernel kernel(clCreateKernel(program, "dummyKernel", &status));
188 if (status != CL_SUCCESS)
190 errorMessage->assign(makeOpenClInternalErrorString("clCreateKernel", status));
194 clSetKernelArg(kernel, 0, sizeof(void*), nullptr);
196 const size_t localWorkSize = 1, globalWorkSize = 1;
198 clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr,
199 &globalWorkSize, &localWorkSize, 0, nullptr, nullptr)) != CL_SUCCESS)
201 errorMessage->assign(makeOpenClInternalErrorString("clEnqueueNDRangeKernel", status));
208 * \brief Checks that device \c devInfo is compatible with GROMACS.
210 * Vendor and OpenCL version support checks are executed an the result
213 * \param[in] devInfo The device info pointer.
214 * \returns The result of the compatibility checks.
216 static int isDeviceSupported(const gmx_device_info_t *devInfo)
218 if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
220 // Assume the device is compatible because checking has been disabled.
221 return egpuCompatible;
224 // OpenCL device version check, ensure >= REQUIRED_OPENCL_MIN_VERSION
225 constexpr unsigned int minVersionMajor = REQUIRED_OPENCL_MIN_VERSION_MAJOR;
226 constexpr unsigned int minVersionMinor = REQUIRED_OPENCL_MIN_VERSION_MINOR;
228 // Based on the OpenCL spec we're checking the version supported by
229 // the device which has the following format:
230 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
231 unsigned int deviceVersionMinor, deviceVersionMajor;
232 const int valuesScanned = std::sscanf(devInfo->device_version, "OpenCL %u.%u", &deviceVersionMajor, &deviceVersionMinor);
233 const bool versionLargeEnough = ((valuesScanned == 2) &&
234 ((deviceVersionMajor > minVersionMajor) ||
235 (deviceVersionMajor == minVersionMajor && deviceVersionMinor >= minVersionMinor)));
236 if (!versionLargeEnough)
238 return egpuIncompatible;
241 /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
242 switch (devInfo->vendor_e)
244 case OCL_VENDOR_NVIDIA:
245 return egpuCompatible;
247 return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
248 case OCL_VENDOR_INTEL:
249 return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? egpuCompatible : egpuIncompatibleClusterSize;
251 return egpuIncompatible;
257 /*! \brief Check whether the \c ocl_gpu_device is suitable for use by mdrun
259 * Runs sanity checks: checking that the runtime can compile a dummy kernel
260 * and this can be executed;
261 * Runs compatibility checks verifying the device OpenCL version requirement
262 * and vendor/OS support.
264 * \param[in] deviceId The runtime-reported numeric ID of the device.
265 * \param[in] deviceInfo The device info pointer.
266 * \returns An e_gpu_detect_res_t to indicate how the GPU coped with
267 * the sanity and compatibility check.
269 static int checkGpu(size_t deviceId,
270 const gmx_device_info_t *deviceInfo)
273 int supportStatus = isDeviceSupported(deviceInfo);
274 if (supportStatus != egpuCompatible)
276 return supportStatus;
279 std::string errorMessage;
280 if (!isDeviceSane(deviceInfo, &errorMessage))
282 gmx_warning("While sanity checking device #%zu, %s", deviceId, errorMessage.c_str());
286 return egpuCompatible;
291 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
293 * \param[in] vendor_name String with OpenCL vendor name.
294 * \returns ocl_vendor_id_t value for the input vendor_name
296 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
300 if (strstr(vendor_name, "NVIDIA"))
302 return OCL_VENDOR_NVIDIA;
305 if (strstr(vendor_name, "AMD") ||
306 strstr(vendor_name, "Advanced Micro Devices"))
308 return OCL_VENDOR_AMD;
311 if (strstr(vendor_name, "Intel"))
313 return OCL_VENDOR_INTEL;
316 return OCL_VENDOR_UNKNOWN;
319 bool isGpuDetectionFunctional(std::string *errorMessage)
321 cl_uint numPlatforms;
322 cl_int status = clGetPlatformIDs(0, nullptr, &numPlatforms);
323 GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
325 if (status == CL_PLATFORM_NOT_FOUND_KHR)
327 // No valid ICDs found
328 if (errorMessage != nullptr)
330 errorMessage->assign("No valid OpenCL driver found");
335 GMX_RELEASE_ASSERT(status == CL_SUCCESS,
336 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
337 status, ocl_get_error_string(status).c_str()).c_str());
338 bool foundPlatform = (numPlatforms > 0);
339 if (!foundPlatform && errorMessage != nullptr)
341 errorMessage->assign("No OpenCL platforms found even though the driver was valid");
343 return foundPlatform;
346 void findGpus(gmx_gpu_info_t *gpu_info)
348 cl_uint ocl_platform_count;
349 cl_platform_id *ocl_platform_ids;
350 cl_device_type req_dev_type = CL_DEVICE_TYPE_GPU;
352 ocl_platform_ids = nullptr;
354 if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
356 req_dev_type = CL_DEVICE_TYPE_CPU;
361 cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
362 if (CL_SUCCESS != status)
364 GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
365 status) + ocl_get_error_string(status)));
368 if (1 > ocl_platform_count)
370 // TODO this should have a descriptive error message that we only support one OpenCL platform
374 snew(ocl_platform_ids, ocl_platform_count);
376 status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
377 if (CL_SUCCESS != status)
379 GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
380 status) + ocl_get_error_string(status)));
383 for (unsigned int i = 0; i < ocl_platform_count; i++)
385 cl_uint ocl_device_count;
387 /* If requesting req_dev_type devices fails, just go to the next platform */
388 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
393 if (1 <= ocl_device_count)
395 gpu_info->n_dev += ocl_device_count;
399 if (1 > gpu_info->n_dev)
404 snew(gpu_info->gpu_dev, gpu_info->n_dev);
408 cl_device_id *ocl_device_ids;
410 snew(ocl_device_ids, gpu_info->n_dev);
413 for (unsigned int i = 0; i < ocl_platform_count; i++)
415 cl_uint ocl_device_count;
417 /* If requesting req_dev_type devices fails, just go to the next platform */
418 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
423 if (1 > ocl_device_count)
428 for (unsigned int j = 0; j < ocl_device_count; j++)
430 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
431 gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j];
433 gpu_info->gpu_dev[device_index].device_name[0] = 0;
434 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, nullptr);
436 gpu_info->gpu_dev[device_index].device_version[0] = 0;
437 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr);
439 gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
440 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, nullptr);
442 gpu_info->gpu_dev[device_index].compute_units = 0;
443 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), nullptr);
445 gpu_info->gpu_dev[device_index].adress_bits = 0;
446 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
448 gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
450 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t), &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr);
452 clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &gpu_info->gpu_dev[device_index].maxWorkGroupSize, nullptr);
454 gpu_info->gpu_dev[device_index].stat = gmx::checkGpu(device_index, gpu_info->gpu_dev + device_index);
456 if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
458 gpu_info->n_dev_compatible++;
465 gpu_info->n_dev = device_index;
467 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
468 // TODO: Sort devices based on performance.
469 if (0 < gpu_info->n_dev)
472 for (int i = 0; i < gpu_info->n_dev; i++)
474 if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
480 gmx_device_info_t ocl_gpu_info;
481 ocl_gpu_info = gpu_info->gpu_dev[i];
482 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
483 gpu_info->gpu_dev[last] = ocl_gpu_info;
488 /* if more than 1 device left to be sorted */
489 if ((gpu_info->n_dev - 1 - last) > 1)
491 for (int i = 0; i < gpu_info->n_dev; i++)
493 if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
499 gmx_device_info_t ocl_gpu_info;
500 ocl_gpu_info = gpu_info->gpu_dev[i];
501 gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
502 gpu_info->gpu_dev[last] = ocl_gpu_info;
509 sfree(ocl_device_ids);
515 sfree(ocl_platform_ids);
518 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int index)
522 if (index < 0 && index >= gpu_info.n_dev)
527 gmx_device_info_t *dinfo = &gpu_info.gpu_dev[index];
529 bool bGpuExists = (dinfo->stat != egpuNonexistent &&
530 dinfo->stat != egpuInsane);
534 sprintf(s, "#%d: %s, stat: %s",
536 gpu_detect_res_str[dinfo->stat]);
540 sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
541 index, dinfo->device_name, dinfo->device_vendor,
542 dinfo->device_version,
543 gpu_detect_res_str[dinfo->stat]);
547 bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &gpuInfo)
550 for (int i = 0; i < gpuInfo.n_dev; ++i)
552 if ((gpuInfo.gpu_dev[i].stat == egpuCompatible) &&
553 (gpuInfo.gpu_dev[i].vendor_e != OCL_VENDOR_AMD))
562 void init_gpu(const gmx_device_info_t *deviceInfo)
566 // If the device is NVIDIA, for safety reasons we disable the JIT
567 // caching as this is known to be broken at least until driver 364.19;
568 // the cache does not always get regenerated when the source code changes,
569 // e.g. if the path to the kernel sources remains the same
571 if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
573 // Ignore return values, failing to set the variable does not mean
574 // that something will go wrong later.
576 _putenv("CUDA_CACHE_DISABLE=1");
578 // Don't override, maybe a dev is testing.
579 setenv("CUDA_CACHE_DISABLE", "1", 0);
584 gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
587 if (deviceId < 0 || deviceId >= gpu_info.n_dev)
589 gmx_incons("Invalid GPU deviceId requested");
591 return &gpu_info.gpu_dev[deviceId];
594 size_t sizeof_gpu_dev_info()
596 return sizeof(gmx_device_info_t);
599 int gpu_info_get_stat(const gmx_gpu_info_t &info, int index)
601 return info.gpu_dev[index].stat;