src/gromacs/gpu_utils/gpu_utils_ocl.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define functions for detection and initialization for OpenCL devices.
  37  *
  38  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  39  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include <assert.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <string.h>
  49 #ifdef __APPLE__
  50 #    include <sys/sysctl.h>
  51 #endif
  52
  53 #include <memory.h>
  54
  55 #include "gromacs/gpu_utils/gpu_utils.h"
  56 #include "gromacs/gpu_utils/ocl_compiler.h"
  57 #include "gromacs/gpu_utils/oclutils.h"
  58 #include "gromacs/hardware/hw_info.h"
  59 #include "gromacs/mdtypes/md_enums.h"
  60 #include "gromacs/utility/cstringutil.h"
  61 #include "gromacs/utility/exceptions.h"
  62 #include "gromacs/utility/fatalerror.h"
  63 #include "gromacs/utility/smalloc.h"
  64 #include "gromacs/utility/stringutil.h"
  65
  66 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
  67  *
  68  * This is assumed to be true for OS X version of at least 10.10.4 and
  69  * all other OS flavors.
  70  *
  71  * Uses the BSD sysctl() interfaces to extract the kernel version.
  72  *
  73  * \return true if version is 14.4 or later (= OS X version 10.10.4),
  74  *         or OS is not Darwin.
  75  */
  76 static bool
  77 runningOnCompatibleOSForAmd()
  78 {
  79 #ifdef __APPLE__
  80     int    mib[2];
  81     char   kernelVersion[256];
  82     size_t len = sizeof(kernelVersion);
  83
  84     mib[0] = CTL_KERN;
  85     mib[1] = KERN_OSRELEASE;
  86
  87     sysctl(mib, sizeof(mib)/sizeof(mib[0]), kernelVersion, &len, NULL, 0);
  88
  89     int major = strtod(kernelVersion, NULL);
  90     int minor = strtod(strchr(kernelVersion, '.')+1, NULL);
  91
  92     // Kernel 14.4 corresponds to OS X 10.10.4
  93     return (major > 14 || (major == 14 && minor >= 4));
  94 #else
  95     return true;
  96 #endif
  97 }
  98
  99 /*! \brief Returns true if the gpu characterized by the device properties is
 100  *  supported by the native gpu acceleration.
 101  * \returns             true if the GPU properties passed indicate a compatible
 102  *                      GPU, otherwise false.
 103  */
 104 static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
 105 {
 106     if ((getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK")) != NULL)
 107     {
 108         return egpuCompatible;
 109     }
 110
 111     /* Only AMD and NVIDIA GPUs are supported for now */
 112     switch (ocl_gpu_device->vendor_e)
 113     {
 114         case OCL_VENDOR_NVIDIA:
 115             return egpuCompatible;
 116         case OCL_VENDOR_AMD:
 117             return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
 118         default:
 119             return egpuIncompatible;
 120     }
 121 }
 122
 123
 124 /*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
 125  *
 126  *  \param[in] vendor_name String with OpenCL vendor name.
 127  *  \returns               ocl_vendor_id_t value for the input vendor_name
 128  */
 129 static ocl_vendor_id_t get_vendor_id(char *vendor_name)
 130 {
 131     if (vendor_name)
 132     {
 133         if (strstr(vendor_name, "NVIDIA"))
 134         {
 135             return OCL_VENDOR_NVIDIA;
 136         }
 137         else
 138         if (strstr(vendor_name, "AMD") ||
 139             strstr(vendor_name, "Advanced Micro Devices"))
 140         {
 141             return OCL_VENDOR_AMD;
 142         }
 143         else
 144         if (strstr(vendor_name, "Intel"))
 145         {
 146             return OCL_VENDOR_INTEL;
 147         }
 148     }
 149     return OCL_VENDOR_UNKNOWN;
 150 }
 151
 152
 153 //! This function is documented in the header file
 154 bool canDetectGpus(std::string *errorMessage)
 155 {
 156     cl_uint numPlatforms;
 157     cl_int  status       = clGetPlatformIDs(0, nullptr, &numPlatforms);
 158     GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
 159     if (status == CL_PLATFORM_NOT_FOUND_KHR)
 160     {
 161         // No valid ICDs found
 162         if (errorMessage != nullptr)
 163         {
 164             errorMessage->assign("No valid OpenCL driver found");
 165         }
 166         return false;
 167     }
 168     GMX_RELEASE_ASSERT(status == CL_SUCCESS,
 169                        gmx::formatString("An unexpected value was returned from clGetPlatformIDs %u: %s",
 170                                          status, ocl_get_error_string(status).c_str()).c_str());
 171     bool foundPlatform = (numPlatforms > 0);
 172     if (!foundPlatform && errorMessage != nullptr)
 173     {
 174         errorMessage->assign("No OpenCL platforms found even though the driver was valid");
 175     }
 176     return foundPlatform;
 177 }
 178
 179 //! This function is documented in the header file
 180 void findGpus(gmx_gpu_info_t *gpu_info)
 181 {
 182     cl_uint         ocl_platform_count;
 183     cl_platform_id *ocl_platform_ids;
 184     cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;
 185
 186     ocl_platform_ids = NULL;
 187
 188     if (getenv("GMX_OCL_FORCE_CPU") != NULL)
 189     {
 190         req_dev_type = CL_DEVICE_TYPE_CPU;
 191     }
 192
 193     while (1)
 194     {
 195         cl_int status = clGetPlatformIDs(0, NULL, &ocl_platform_count);
 196         if (CL_SUCCESS != status)
 197         {
 198             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
 199                                                            status) + ocl_get_error_string(status)));
 200         }
 201
 202         if (1 > ocl_platform_count)
 203         {
 204             // TODO this should have a descriptive error message that we only support one OpenCL platform
 205             break;
 206         }
 207
 208         snew(ocl_platform_ids, ocl_platform_count);
 209
 210         status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, NULL);
 211         if (CL_SUCCESS != status)
 212         {
 213             GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %u was returned from clGetPlatformIDs: ",
 214                                                            status) + ocl_get_error_string(status)));
 215         }
 216
 217         for (unsigned int i = 0; i < ocl_platform_count; i++)
 218         {
 219             cl_uint ocl_device_count;
 220
 221             /* If requesting req_dev_type devices fails, just go to the next platform */
 222             if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, NULL, &ocl_device_count))
 223             {
 224                 continue;
 225             }
 226
 227             if (1 <= ocl_device_count)
 228             {
 229                 gpu_info->n_dev += ocl_device_count;
 230             }
 231         }
 232
 233         if (1 > gpu_info->n_dev)
 234         {
 235             break;
 236         }
 237
 238         snew(gpu_info->gpu_dev, gpu_info->n_dev);
 239
 240         {
 241             int           device_index;
 242             cl_device_id *ocl_device_ids;
 243
 244             snew(ocl_device_ids, gpu_info->n_dev);
 245             device_index = 0;
 246
 247             for (unsigned int i = 0; i < ocl_platform_count; i++)
 248             {
 249                 cl_uint ocl_device_count;
 250
 251                 /* If requesting req_dev_type devices fails, just go to the next platform */
 252                 if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
 253                 {
 254                     continue;
 255                 }
 256
 257                 if (1 > ocl_device_count)
 258                 {
 259                     break;
 260                 }
 261
 262                 for (unsigned int j = 0; j < ocl_device_count; j++)
 263                 {
 264                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
 265                     gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
 266
 267                     gpu_info->gpu_dev[device_index].device_name[0] = 0;
 268                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, NULL);
 269
 270                     gpu_info->gpu_dev[device_index].device_version[0] = 0;
 271                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, NULL);
 272
 273                     gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
 274                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, NULL);
 275
 276                     gpu_info->gpu_dev[device_index].compute_units = 0;
 277                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), NULL);
 278
 279                     gpu_info->gpu_dev[device_index].adress_bits = 0;
 280                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), NULL);
 281
 282                     gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
 283
 284                     gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);
 285
 286                     if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
 287                     {
 288                         gpu_info->n_dev_compatible++;
 289                     }
 290
 291                     device_index++;
 292                 }
 293             }
 294
 295             gpu_info->n_dev = device_index;
 296
 297             /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
 298             // TODO: Sort devices based on performance.
 299             if (0 < gpu_info->n_dev)
 300             {
 301                 int last = -1;
 302                 for (int i = 0; i < gpu_info->n_dev; i++)
 303                 {
 304                     if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
 305                     {
 306                         last++;
 307
 308                         if (last < i)
 309                         {
 310                             gmx_device_info_t ocl_gpu_info;
 311                             ocl_gpu_info            = gpu_info->gpu_dev[i];
 312                             gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 313                             gpu_info->gpu_dev[last] = ocl_gpu_info;
 314                         }
 315                     }
 316                 }
 317
 318                 /* if more than 1 device left to be sorted */
 319                 if ((gpu_info->n_dev - 1 - last) > 1)
 320                 {
 321                     for (int i = 0; i < gpu_info->n_dev; i++)
 322                     {
 323                         if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
 324                         {
 325                             last++;
 326
 327                             if (last < i)
 328                             {
 329                                 gmx_device_info_t ocl_gpu_info;
 330                                 ocl_gpu_info            = gpu_info->gpu_dev[i];
 331                                 gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
 332                                 gpu_info->gpu_dev[last] = ocl_gpu_info;
 333                             }
 334                         }
 335                     }
 336                 }
 337             }
 338
 339             sfree(ocl_device_ids);
 340         }
 341
 342         break;
 343     }
 344
 345     sfree(ocl_platform_ids);
 346 }
 347
 348 //! This function is documented in the header file
 349 void free_gpu_info(const gmx_gpu_info_t gmx_unused *gpu_info)
 350 {
 351     if (gpu_info == NULL)
 352     {
 353         return;
 354     }
 355
 356     sfree(gpu_info->gpu_dev);
 357 }
 358
 359 //! This function is documented in the header file
 360 std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
 361 {
 362     // Possible minor over-allocation here, but not important for anything
 363     std::vector<int> compatibleGpus;
 364     compatibleGpus.reserve(gpu_info.n_dev);
 365     for (int i = 0; i < gpu_info.n_dev; i++)
 366     {
 367         assert(gpu_info.gpu_dev);
 368         if (gpu_info.gpu_dev[i].stat == egpuCompatible)
 369         {
 370             compatibleGpus.push_back(i);
 371         }
 372     }
 373     return compatibleGpus;
 374 }
 375
 376 //! This function is documented in the header file
 377 const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &gpu_info,
 378                                            int                   index)
 379 {
 380     return (index >= gpu_info.n_dev ?
 381             gpu_detect_res_str[egpuNonexistent] :
 382             gpu_detect_res_str[gpu_info.gpu_dev[index].stat]);
 383 }
 384
 385 //! This function is documented in the header file
 386 void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int index)
 387 {
 388     assert(s);
 389
 390     if (index < 0 && index >= gpu_info.n_dev)
 391     {
 392         return;
 393     }
 394
 395     gmx_device_info_t  *dinfo = &gpu_info.gpu_dev[index];
 396
 397     bool                bGpuExists =
 398         dinfo->stat == egpuCompatible ||
 399         dinfo->stat == egpuIncompatible;
 400
 401     if (!bGpuExists)
 402     {
 403         sprintf(s, "#%d: %s, stat: %s",
 404                 index, "N/A",
 405                 gpu_detect_res_str[dinfo->stat]);
 406     }
 407     else
 408     {
 409         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s",
 410                 index, dinfo->device_name, dinfo->device_vendor,
 411                 dinfo->device_version,
 412                 gpu_detect_res_str[dinfo->stat]);
 413     }
 414 }
 415
 416 //! This function is documented in the header file
 417 void init_gpu(const gmx::MDLogger               & /*mdlog*/,
 418               gmx_device_info_t                *deviceInfo)
 419 {
 420     assert(deviceInfo);
 421
 422     // If the device is NVIDIA, for safety reasons we disable the JIT
 423     // caching as this is known to be broken at least until driver 364.19;
 424     // the cache does not always get regenerated when the source code changes,
 425     // e.g. if the path to the kernel sources remains the same
 426
 427     if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
 428     {
 429         // Ignore return values, failing to set the variable does not mean
 430         // that something will go wrong later.
 431 #ifdef _MSC_VER
 432         _putenv("CUDA_CACHE_DISABLE=1");
 433 #else
 434         // Don't override, maybe a dev is testing.
 435         setenv("CUDA_CACHE_DISABLE", "1", 0);
 436 #endif
 437     }
 438 }
 439
 440 //! This function is documented in the header file
 441 gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
 442                                  int                   deviceId)
 443 {
 444     if (deviceId < 0 || deviceId >= gpu_info.n_dev)
 445     {
 446         gmx_incons("Invalid GPU deviceId requested");
 447     }
 448     return &gpu_info.gpu_dev[deviceId];
 449 }
 450
 451 //! This function is documented in the header file
 452 size_t sizeof_gpu_dev_info(void)
 453 {
 454     return sizeof(gmx_device_info_t);
 455 }
 456
 457 void gpu_set_host_malloc_and_free(bool               bUseGpuKernels,
 458                                   gmx_host_alloc_t **nb_alloc,
 459                                   gmx_host_free_t  **nb_free)
 460 {
 461     if (bUseGpuKernels)
 462     {
 463         *nb_alloc = &ocl_pmalloc;
 464         *nb_free  = &ocl_pfree;
 465     }
 466     else
 467     {
 468         *nb_alloc = NULL;
 469         *nb_free  = NULL;
 470     }
 471 }