src/gromacs/gpu_utils/gpu_utils_ocl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define functions for detection and initialization for OpenCL devices.
  37  *
  38  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  *  \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *  \author Szilárd Páll <pall.szilard@gmail.com>
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "config.h"
  48
  49 #include <assert.h>
  50 #include <stdio.h>
  51 #include <stdlib.h>
  52 #include <string.h>
  53
  54 #include <cstdio>
  55 #ifdef __APPLE__
  56 #    include <sys/sysctl.h>
  57 #endif
  58
  59 #include <memory.h>
  60
  61 #include "gromacs/gpu_utils/gpu_utils.h"
  62 #include "gromacs/gpu_utils/ocl_compiler.h"
  63 #include "gromacs/gpu_utils/oclraii.h"
  64 #include "gromacs/gpu_utils/oclutils.h"
  65 #include "gromacs/hardware/hw_info.h"
  66 #include "gromacs/utility/cstringutil.h"
  67 #include "gromacs/utility/exceptions.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/smalloc.h"
  70 #include "gromacs/utility/stringutil.h"
  71
  72 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
  73  *
  74  * This is assumed to be true for OS X version of at least 10.10.4 and
  75  * all other OS flavors.
  76  *
  77  * Uses the BSD sysctl() interfaces to extract the kernel version.
  78  *
  79  * \return true if version is 14.4 or later (= OS X version 10.10.4),
  80  *         or OS is not Darwin.
  81  */
  82 static bool
  83 runningOnCompatibleOSForAmd()
  84 {
  85 #ifdef __APPLE__
  86     int    mib[2];
  87     char   kernelVersion[256];
  88     size_t len = sizeof(kernelVersion);
  89
  90     mib[0] = CTL_KERN;
  91     mib[1] = KERN_OSRELEASE;
  92
  93     sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
  94
  95     int major = strtod(kernelVersion, NULL);
  96     int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
  97
  98     // Kernel 14.4 corresponds to OS X 10.10.4
  99     return (major > 14 || (major == 14 && minor >= 4));
 100 #else
 101     return true;
 102 #endif
 103 }
 104
 105 namespace gmx
 106 {
 107
 108 /*! \brief Make an error string following an OpenCL API call.
 109  *
 110  *  It is meant to be called with \p status != CL_SUCCESS, but it will
 111  *  work correctly even if it is called with no OpenCL failure.
 112  *
 113  * \param[in]  message  Supplies context, e.g. the name of the API call that returned the error.
 114  * \param[in]  status   OpenCL API status code
 115  * \returns             A string describing the OpenCL error.
 116  */
 117 static std::string
 118 makeOpenClInternalErrorString(const char *message, cl_int status)
 119 {
 120     if (message != nullptr)
 121     {
 122         return formatString("%s did %ssucceed %d: %s",
 123                             message,
 124                             ((status != CL_SUCCESS) ? "not " : ""),
 125                             status, ocl_get_error_string(status).c_str());
 126     }
 127     else
 128     {
 129         return formatString("%sOpenCL error encountered %d: %s",
 130                             ((status != CL_SUCCESS) ? "" : "No "),
 131                             status, ocl_get_error_string(status).c_str());
 132     }
 133 }
 134
 135 /*!
 136  * \brief Checks that device \c devInfo is sane (ie can run a kernel).
 137  *
 138  * Compiles and runs a dummy kernel to determine whether the given
 139  * OpenCL device functions properly.
 140  *
 141  *
 142  * \param[in]  devInfo         The device info pointer.
 143  * \param[out] errorMessage    An error message related to a failing OpenCL API call.
 144  * \throws     std::bad_alloc  When out of memory.
 145  * \returns                    Whether the device passed sanity checks
 146  */
 147 static bool isDeviceSane(const gmx_device_info_t *devInfo,
 148                          std::string             *errorMessage)
 149 {
 150     cl_context_properties properties[] = {
 151         CL_CONTEXT_PLATFORM,
 152         (cl_context_properties) devInfo->ocl_gpu_id.ocl_platform_id,
 153         0
 154     };
 155     // uncrustify spacing
 156
 157     cl_int    status;
 158     auto      deviceId = devInfo->ocl_gpu_id.ocl_device_id;
 159     ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status));
 160     if (status != CL_SUCCESS)
 161     {
 162         errorMessage->assign(makeOpenClInternalErrorString("clCreateContext", status));
 163         return false;
 164     }
 165     ClCommandQueue commandQueue(clCreateCommandQueue(context, deviceId, 0, &status));
 166     if (status != CL_SUCCESS)
 167     {
 168         errorMessage->assign(makeOpenClInternalErrorString("clCreateCommandQueue", status));
 169         return false;
 170     }
 171
 172     // Some compilers such as Apple's require kernel functions to have at least one argument
 173     const char *lines[] = { "__kernel void dummyKernel(__global void* input){}" };
 174     ClProgram   program(clCreateProgramWithSource(context, 1, lines, nullptr, &status));
 175     if (status != CL_SUCCESS)
 176     {
 177         errorMessage->assign(makeOpenClInternalErrorString("clCreateProgramWithSource", status));
 178         return false;
 179     }
 180
 181     if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS)
 182     {
 183         errorMessage->assign(makeOpenClInternalErrorString("clBuildProgram", status));
 184         return false;
 185     }
 186
 187     ClKernel kernel(clCreateKernel(program, "dummyKernel", &status));
 188     if (status != CL_SUCCESS)
 189     {
 190         errorMessage->assign(makeOpenClInternalErrorString("clCreateKernel", status));
 191         return false;
 192     }
 193
 194     clSetKernelArg(kernel, 0, sizeof(void*), nullptr);
 195
 196     const size_t localWorkSize = 1, globalWorkSize = 1;
 197     if ((status =
 198              clEnqueueNDRangeKernel(commandQueue, kernel, 1, nullptr,
 199                                     &globalWorkSize, &localWorkSize, 0, nullptr, nullptr)) != CL_SUCCESS)
 200     {
 201         errorMessage->assign(makeOpenClInternalErrorString("clEnqueueNDRangeKernel", status));
 202         return false;
 203     }
 204     return true;
 205 }
 206
 207 /*!
 208  * \brief Checks that device \c devInfo is compatible with GROMACS.
 209  *
 210  *  Vendor and OpenCL version support checks are executed an the result
 211  *  of these returned.
 212  *
 213  * \param[in]  devInfo         The device info pointer.
 214  * \returns                    The result of the compatibility checks.
 215  */
 216 static int isDeviceSupported(const gmx_device_info_t *devInfo)
 217 {
 218     if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
 219     {
 220         // Assume the device is compatible because checking has been disabled.
 221         return egpuCompatible;
 222     }
 223
 224     // OpenCL device version check, ensure >= REQUIRED_OPENCL_MIN_VERSION
 225     constexpr unsigned int minVersionMajor = REQUIRED_OPENCL_MIN_VERSION_MAJOR;
 226     constexpr unsigned int minVersionMinor = REQUIRED_OPENCL_MIN_VERSION_MINOR;
 227
 228     // Based on the OpenCL spec we're checking the version supported by
 229     // the device which has the following format:
 230     //      OpenCL<space><major_version.minor_version><space><vendor-specific information>
 231     unsigned int deviceVersionMinor, deviceVersionMajor;
 232     const int    valuesScanned      = std::sscanf(devInfo->device_version, "OpenCL %u.%u", &deviceVersionMajor, &deviceVersionMinor);
 233     const bool   versionLargeEnough = ((valuesScanned == 2) &&
 234                                        ((deviceVersionMajor > minVersionMajor) ||
 235                                         (deviceVersionMajor == minVersionMajor && deviceVersionMinor >= minVersionMinor)));
 236     if (!versionLargeEnough)
 237     {
 238         return egpuIncompatible;
 239     }
 240
 241     /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
 242     switch (devInfo->vendor_e)
 243     {
 244         case OCL_VENDOR_NVIDIA:
 245             return egpuCompatible;
 246         case OCL_VENDOR_AMD:
 247             return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
 248         case OCL_VENDOR_INTEL:
 249             return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? egpuCompatible : egpuIncompatibleClusterSize;
 250         default:
 251             return egpuIncompatible;
 252     }
 253 }
 254
 255
 256
 257 /*! \brief Check whether the \c ocl_gpu_device is suitable for use by mdrun
 258  *
 259  * Runs sanity checks: checking that the runtime can compile a dummy kernel
 260  * and this can be executed;
 261  * Runs compatibility checks verifying the device OpenCL version requirement
 262  * and vendor/OS support.
 263  *
 264  * \param[in]  deviceId      The runtime-reported numeric ID of the device.
 265  * \param[in]  deviceInfo    The device info pointer.
 266  * \returns  An e_gpu_detect_res_t to indicate how the GPU coped with
 267  *           the sanity and compatibility check.
 268  */
 269 static int checkGpu(size_t                   deviceId,
 270                     const gmx_device_info_t *deviceInfo)
 271 {
 272
 273     int supportStatus = isDeviceSupported(deviceInfo);
 274     if (supportStatus != egpuCompatible)
 275     {
 276         return supportStatus;
 277     }
 278
 279     std::string errorMessage;
 280     if (!isDeviceSane(deviceInfo, &errorMessage))
 281     {
 282         gmx_warning("While sanity checking device #%zu, %s", deviceId, errorMessage.c_str());
 283         return egpuInsane;
 284     }
 285
 286     return egpuCompatible;
 287 }
 288
 289 } // namespace
 290
 291 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
 292  *
 293  *  \param[in] vendor_name String with OpenCL vendor name.
 294  *  \returns               ocl_vendor_id_t value for the input vendor_name
 295  */
 296 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
 297 {
 298     if (vendor_name)
 299     {
 300         if (strstr(vendor_name, "NVIDIA"))
 301         {
 302             return OCL_VENDOR_NVIDIA;
 303         }
 304         else
 305         if (strstr(vendor_name, "AMD") ||
 306             strstr(vendor_name, "Advanced Micro Devices"))
 307         {
 308             return OCL_VENDOR_AMD;
 309         }
 310         else
 311         if (strstr(vendor_name, "Intel"))
 312         {
 313             return OCL_VENDOR_INTEL;
 314         }
 315     }
 316     return OCL_VENDOR_UNKNOWN;
 317 }
 318
 319 bool isGpuDetectionFunctional(std::string *errorMessage)
 320 {
 321     cl_uint numPlatforms;
 322     cl_int  status       = clGetPlatformIDs(0, nullptr, &numPlatforms);
 323     GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
 324 #ifdef cl_khr_icd
 325     if (status == CL_PLATFORM_NOT_FOUND_KHR)
 326     {
 327         // No valid ICDs found
 328         if (errorMessage != nullptr)
 329         {
 330             errorMessage->assign("No valid OpenCL driver found");
 331         }
 332         return false;
 333     }
 334 #endif
 335     GMX_RELEASE_ASSERT(status == CL_SUCCESS,
 336                        gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
 337                                          status, ocl_get_error_string(status).c_str()).c_str());
 338     bool foundPlatform = (numPlatforms > 0);
 339     if (!foundPlatform && errorMessage != nullptr)
 340     {
 341         errorMessage->assign("No OpenCL platforms found even though the driver was valid");
 342     }
 343     return foundPlatform;
 344 }
 345
 346 void findGpus(gmx_gpu_info_t *gpu_info)
 347 {
 348     cl_uint         ocl_platform_count;
 349     cl_platform_id *ocl_platform_ids;
 350     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 351
 352     ocl_platform_ids = nullptr;
 353
 354     if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
 355     {
 356         req_dev_type = CL_DEVICE_TYPE_CPU;
 357     }
 358
 359     while (true)
 360     {
 361         cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
 362         if (CL_SUCCESS != status)
 363         {
 364             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
 365                                                            status) + ocl_get_error_string(status)));
 366         }
 367
 368         if (1 > ocl_platform_count)
 369         {
 370             // TODO this should have a descriptive error message that we only support one OpenCL platform
 371             break;
 372         }
 373
 374         snew(ocl_platform_ids, ocl_platform_count);
 375
 376         status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
 377         if (CL_SUCCESS != status)
 378         {
 379             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
 380                                                            status) + ocl_get_error_string(status)));
 381         }
 382
 383         for (unsigned int i = 0; i < ocl_platform_count; i++)
 384         {
 385             cl_uint ocl_device_count;
 386
 387             /* If requesting req_dev_type devices fails, just go to the next platform */
 388             if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
 389             {
 390                 continue;
 391             }
 392
 393             if (1 <= ocl_device_count)
 394             {
 395                 gpu_info->n_dev += ocl_device_count;
 396             }
 397         }
 398
 399         if (1 > gpu_info->n_dev)
 400         {
 401             break;
 402         }
 403
 404         snew(gpu_info->gpu_dev, gpu_info->n_dev);
 405
 406         {
 407             int           device_index;
 408             cl_device_id *ocl_device_ids;
 409
 410             snew(ocl_device_ids, gpu_info->n_dev);
 411             device_index = 0;
 412
 413             for (unsigned int i = 0; i < ocl_platform_count; i++)
 414             {
 415                 cl_uint ocl_device_count;
 416
 417                 /* If requesting req_dev_type devices fails, just go to the next platform */
 418                 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
 419                 {
 420                     continue;
 421                 }
 422
 423                 if (1 > ocl_device_count)
 424                 {
 425                     break;
 426                 }
 427
 428                 for (unsigned int j = 0; j < ocl_device_count; j++)
 429                 {
 430                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
 431                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 432
 433                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
 434                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, nullptr);
 435
 436                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
 437                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr);
 438
 439                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
 440                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, nullptr);
 441
 442                     gpu_info->gpu_dev[device_index].compute_units = 0;
 443                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), nullptr);
 444
 445                     gpu_info->gpu_dev[device_index].adress_bits = 0;
 446                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
 447
 448                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 449
 450                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t), &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr);
 451
 452                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &gpu_info->gpu_dev[device_index].maxWorkGroupSize, nullptr);
 453
 454                     gpu_info->gpu_dev[device_index].stat = gmx::checkGpu(device_index, gpu_info->gpu_dev + device_index);
 455
 456                     if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
 457                     {
 458                         gpu_info->n_dev_compatible++;
 459                     }
 460
 461                     device_index++;
 462                 }
 463             }
 464
 465             gpu_info->n_dev = device_index;
 466
 467             /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
 468             // TODO: Sort devices based on performance.
 469             if (0 < gpu_info->n_dev)
 470             {
 471                 int last = -1;
 472                 for (int i = 0; i < gpu_info->n_dev; i++)
 473                 {
 474                     if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
 475                     {
 476                         last++;
 477
 478                         if (last < i)
 479                         {
 480                             gmx_device_info_t ocl_gpu_info;
 481                             ocl_gpu_info            = gpu_info->gpu_dev[i];
 482                             gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 483                             gpu_info->gpu_dev[last] = ocl_gpu_info;
 484                         }
 485                     }
 486                 }
 487
 488                 /* if more than 1 device left to be sorted */
 489                 if ((gpu_info->n_dev - 1 - last) > 1)
 490                 {
 491                     for (int i = 0; i < gpu_info->n_dev; i++)
 492                     {
 493                         if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
 494                         {
 495                             last++;
 496
 497                             if (last < i)
 498                             {
 499                                 gmx_device_info_t ocl_gpu_info;
 500                                 ocl_gpu_info            = gpu_info->gpu_dev[i];
 501                                 gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 502                                 gpu_info->gpu_dev[last] = ocl_gpu_info;
 503                             }
 504                         }
 505                     }
 506                 }
 507             }
 508
 509             sfree(ocl_device_ids);
 510         }
 511
 512         break;
 513     }
 514
 515     sfree(ocl_platform_ids);
 516 }
 517
 518 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int index)
 519 {
 520     assert(s);
 521
 522     if (index < 0 && index >= gpu_info.n_dev)
 523     {
 524         return;
 525     }
 526
 527     gmx_device_info_t *dinfo = &gpu_info.gpu_dev[index];
 528
 529     bool               bGpuExists = (dinfo->stat != egpuNonexistent &&
 530                                      dinfo->stat != egpuInsane);
 531
 532     if (!bGpuExists)
 533     {
 534         sprintf(s, "#%d: %s, stat: %s",
 535                 index, "N/A",
 536                 gpu_detect_res_str[dinfo->stat]);
 537     }
 538     else
 539     {
 540         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
 541                 index, dinfo->device_name, dinfo->device_vendor,
 542                 dinfo->device_version,
 543                 gpu_detect_res_str[dinfo->stat]);
 544     }
 545 }
 546
 547
 548 void init_gpu(const gmx_device_info_t *deviceInfo)
 549 {
 550     assert(deviceInfo);
 551
 552     // If the device is NVIDIA, for safety reasons we disable the JIT
 553     // caching as this is known to be broken at least until driver 364.19;
 554     // the cache does not always get regenerated when the source code changes,
 555     // e.g. if the path to the kernel sources remains the same
 556
 557     if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
 558     {
 559         // Ignore return values, failing to set the variable does not mean
 560         // that something will go wrong later.
 561 #ifdef _MSC_VER
 562         _putenv("CUDA_CACHE_DISABLE=1");
 563 #else
 564         // Don't override, maybe a dev is testing.
 565         setenv("CUDA_CACHE_DISABLE", "1", 0);
 566 #endif
 567     }
 568 }
 569
 570 gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
 571                                  int                   deviceId)
 572 {
 573     if (deviceId < 0 || deviceId >= gpu_info.n_dev)
 574     {
 575         gmx_incons("Invalid GPU deviceId requested");
 576     }
 577     return &gpu_info.gpu_dev[deviceId];
 578 }
 579
 580 size_t sizeof_gpu_dev_info()
 581 {
 582     return sizeof(gmx_device_info_t);
 583 }
 584
 585 int gpu_info_get_stat(const gmx_gpu_info_t &info, int index)
 586 {
 587     return info.gpu_dev[index].stat;
 588 }