src/gromacs/gpu_utils/ocl_compiler.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define infrastructure for OpenCL JIT compilation for Gromacs
  37  *
  38  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  39  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  *  \author Mark Abraham <mark.j.abraham@gmail.com>
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "ocl_compiler.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <string>
  54 #include <vector>
  55
  56 #include "gromacs/gpu_utils/oclutils.h"
  57 #include "gromacs/utility/cstringutil.h"
  58 #include "gromacs/utility/exceptions.h"
  59 #include "gromacs/utility/gmxassert.h"
  60 #include "gromacs/utility/path.h"
  61 #include "gromacs/utility/programcontext.h"
  62 #include "gromacs/utility/smalloc.h"
  63 #include "gromacs/utility/stringutil.h"
  64 #include "gromacs/utility/textreader.h"
  65 #include "gromacs/utility/unique_cptr.h"
  66
  67 #include "ocl_caching.h"
  68
  69 namespace gmx
  70 {
  71 namespace ocl
  72 {
  73
  74 /*! \brief True if OpenCL binary caching is enabled.
  75  *
  76  *  Currently caching is disabled by default unless the env var override
  77  *  is used until we resolve concurrency issues. */
  78 static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
  79
  80 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
  81  *
  82  * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
  83  * variable is set or the compilation failed, then the OpenCL
  84  * compilation log is written.
  85  *
  86  * \param fplog               Open file pointer to log file
  87  * \param program             OpenCL program that was compiled
  88  * \param deviceId            Id of the device for which compilation took place
  89  * \param kernelFilename      File name containing the kernel
  90  * \param preprocessorOptions String containing the preprocessor command-line options used for the build
  91  * \param buildFailed         Whether the OpenCL build succeeded
  92  *
  93  * \throws std::bad_alloc if out of memory */
  94 static void
  95 writeOclBuildLog(FILE              *fplog,
  96                  cl_program         program,
  97                  cl_device_id       deviceId,
  98                  const std::string &kernelFilename,
  99                  const std::string &preprocessorOptions,
 100                  bool               buildFailed)
 101 {
 102     bool writeOutput = ((fplog != nullptr) &&
 103                         (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
 104
 105     if (!writeOutput)
 106     {
 107         return;
 108     }
 109
 110     // Get build log string size
 111     size_t buildLogSize;
 112     cl_int cl_error = clGetProgramBuildInfo(program,
 113                                             deviceId,
 114                                             CL_PROGRAM_BUILD_LOG,
 115                                             0,
 116                                             nullptr,
 117                                             &buildLogSize);
 118     if (cl_error != CL_SUCCESS)
 119     {
 120         GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
 121     }
 122
 123     char             *buildLog = nullptr;
 124     unique_cptr<char> buildLogGuard;
 125     if (buildLogSize != 0)
 126     {
 127         /* Allocate memory to fit the build log,
 128            it can be very large in case of errors */
 129         snew(buildLog, buildLogSize);
 130         buildLogGuard.reset(buildLog);
 131
 132         /* Get the actual compilation log */
 133         cl_error = clGetProgramBuildInfo(program,
 134                                          deviceId,
 135                                          CL_PROGRAM_BUILD_LOG,
 136                                          buildLogSize,
 137                                          buildLog,
 138                                          nullptr);
 139         if (cl_error != CL_SUCCESS)
 140         {
 141             GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
 142         }
 143     }
 144
 145     std::string message;
 146     if (buildFailed)
 147     {
 148         message += "Compilation of source file " + kernelFilename + " failed!\n";
 149     }
 150     else
 151     {
 152         message += "Compilation of source file " + kernelFilename + " was successful!\n";
 153     }
 154     message += "-- Used build options: " + preprocessorOptions + "\n";
 155     message += "--------------LOG START---------------\n";
 156     message += buildLog;
 157     message += "---------------LOG END----------------\n";;
 158
 159     fputs(message.c_str(), fplog);
 160 }
 161
 162 /*! \brief Construct compiler options string
 163  *
 164  * \param deviceVendorId  Device vendor id. Used to
 165  *          automatically enable some vendor-specific options
 166  * \return The string with the compiler options
 167  */
 168 static std::string
 169 selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
 170 {
 171     std::string compilerOptions;
 172
 173     if (getenv("GMX_OCL_NOOPT") )
 174     {
 175         compilerOptions += " -cl-opt-disable";
 176     }
 177
 178     /* Fastmath imprves performance on all supported arch */
 179     if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
 180     {
 181         compilerOptions += " -cl-fast-relaxed-math";
 182     }
 183
 184     if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
 185     {
 186         compilerOptions += " -cl-nv-verbose";
 187     }
 188
 189     if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
 190     {
 191         /* To dump OpenCL build intermediate files, caching must be off */
 192         if (!useBuildCache)
 193         {
 194             compilerOptions += " -save-temps";
 195         }
 196     }
 197
 198     if (getenv("GMX_OCL_DEBUG"))
 199     {
 200         compilerOptions += " -g";
 201     }
 202
 203     return compilerOptions;
 204 }
 205
 206 /*! \brief Get the path to the folder storing an OpenCL source file.
 207  *
 208  * By default, this function constructs the full path to the OpenCL from
 209  * the known location of the binary that is running, so that we handle
 210  * both in-source and installed builds. The user can override this
 211  * behavior by defining GMX_OCL_FILE_PATH environment variable.
 212  *
 213  * \param[in] sourceRelativePath    Relative path to the kernel or other file in the source tree,
 214  *                                  e.g. "src/gromacs/mdlib/nbnxn_ocl" for NB kernels.
 215  * \return OS-normalized path string to the folder storing OpenCL source file
 216  *
 217  * \throws std::bad_alloc    if out of memory.
 218  *         FileIOError  if GMX_OCL_FILE_PATH does not specify a readable path
 219  */
 220 static std::string
 221 getSourceRootPath(const std::string &sourceRelativePath)
 222 {
 223     std::string sourceRootPath;
 224     /* Use GMX_OCL_FILE_PATH if the user has defined it */
 225     const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
 226
 227     if (gmxOclFilePath == nullptr)
 228     {
 229         /* Normal way of getting ocl_root_dir. First get the right
 230            root path from the path to the binary that is running. */
 231         InstallationPrefixInfo      info           = getProgramContext().installationPrefix();
 232         std::string                 dataPathSuffix = (info.bSourceLayout ?
 233                                                       sourceRelativePath :
 234                                                       GMX_INSTALL_OCLDIR);
 235         sourceRootPath = Path::join(info.path, dataPathSuffix);
 236     }
 237     else
 238     {
 239         if (!Directory::exists(gmxOclFilePath))
 240         {
 241             GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
 242                                                "kernels are found, but '%s' does not exist", gmxOclFilePath)));
 243         }
 244         sourceRootPath = gmxOclFilePath;
 245     }
 246
 247     // Make sure we return an OS-correct path format
 248     return Path::normalize(sourceRootPath);
 249 }
 250
 251 /*!  \brief Get the warp size reported by device
 252  *
 253  *  This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
 254  *  Nvidia reports 32, AMD for GPU 64. Ignore the rest
 255  *
 256  *  \param  context   Current OpenCL context
 257  *  \param  deviceId OpenCL device with the context
 258  *  \return cl_int value of the warp size
 259  *
 260  * \throws InternalError if an OpenCL error was encountered
 261  */
 262 static size_t
 263 getWarpSize(cl_context context, cl_device_id deviceId)
 264 {
 265     cl_int      cl_error;
 266     const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
 267     cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error);
 268     if (cl_error != CL_SUCCESS)
 269     {
 270         GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
 271     }
 272
 273     cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
 274     if (cl_error != CL_SUCCESS)
 275     {
 276         GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
 277     }
 278
 279     cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
 280     if (cl_error != CL_SUCCESS)
 281     {
 282         GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
 283     }
 284
 285     size_t warpSize = 0;
 286     cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
 287                                         sizeof(warpSize), &warpSize, nullptr);
 288     if (cl_error != CL_SUCCESS)
 289     {
 290         GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
 291     }
 292     if (warpSize == 0)
 293     {
 294         GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
 295     }
 296
 297     cl_error = clReleaseKernel(kernel);
 298     if (cl_error != CL_SUCCESS)
 299     {
 300         GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
 301     }
 302     cl_error = clReleaseProgram(program);
 303     if (cl_error != CL_SUCCESS)
 304     {
 305         GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
 306     }
 307
 308     return warpSize;
 309 }
 310
 311 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
 312  *
 313  * \param[in] vendorId Vendor id enumerator
 314  *
 315  * \return The appropriate compilation-line define
 316  */
 317 static const char *
 318 makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
 319 {
 320     const char *choice;
 321     switch (vendorId)
 322     {
 323         case OCL_VENDOR_AMD:
 324             choice = "-D_AMD_SOURCE_";
 325             break;
 326         case OCL_VENDOR_NVIDIA:
 327             choice = "-D_NVIDIA_SOURCE_";
 328             break;
 329         case OCL_VENDOR_INTEL:
 330             choice = "-D_INTEL_SOURCE_";
 331             break;
 332         default:
 333             choice = "";
 334             break;
 335     }
 336     return choice;
 337 }
 338
 339 /*! \brief Create include paths for kernel sources.
 340  *
 341  * All OpenCL kernel files are expected to be stored in one single folder.
 342  *
 343  * \throws std::bad_alloc  if out of memory.
 344  */
 345 static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
 346 {
 347     std::string includePathOption;
 348
 349     /* Apple does not seem to accept the quoted include paths other
 350      * OpenCL implementations are happy with. Since the standard still says
 351      * it should be quoted, we handle Apple as a special case.
 352      */
 353 #ifdef __APPLE__
 354     includePathOption += "-I";
 355
 356     // Prepend all the spaces with a backslash
 357     for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
 358     {
 359         if (unescapedKernelRootPath[i] == ' ')
 360         {
 361             includePathOption.push_back('\\');
 362         }
 363         includePathOption.push_back(unescapedKernelRootPath[i]);
 364     }
 365 #else
 366     includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
 367 #endif
 368
 369     return includePathOption;
 370 }
 371
 372 /*! \brief Replace duplicated spaces with a single one in string
 373  *
 374  * Only the first character will be kept for multiple adjacent characters that
 375  * are both identical and where the first one returns true for isspace().
 376  *
 377  * \param str String that will be modified.
 378  */
 379 static void
 380 removeExtraSpaces(std::string *str)
 381 {
 382     GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
 383     std::string::iterator newEnd =
 384         std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) && (a == b); } );
 385     str->erase(newEnd, str->end());
 386 }
 387
 388 /*! \brief Builds a string with build options for the OpenCL kernels
 389  *
 390  * \throws std::bad_alloc  if out of memory. */
 391 static std::string
 392 makePreprocessorOptions(const std::string   &kernelRootPath,
 393                         const std::string   &includeRootPath,
 394                         size_t               warpSize,
 395                         ocl_vendor_id_t      deviceVendorId,
 396                         const std::string   &extraDefines)
 397 {
 398     std::string preprocessorOptions;
 399
 400     /* Compose the complete build options */
 401     preprocessorOptions  = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
 402     preprocessorOptions += ' ';
 403     preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
 404     preprocessorOptions += ' ';
 405     preprocessorOptions += extraDefines;
 406     preprocessorOptions += ' ';
 407     preprocessorOptions += selectCompilerOptions(deviceVendorId);
 408     preprocessorOptions += ' ';
 409     preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
 410     preprocessorOptions += ' ';
 411     preprocessorOptions += makeKernelIncludePathOption(includeRootPath);
 412
 413     // Mac OS (and maybe some other implementations) does not accept double spaces in options
 414     removeExtraSpaces(&preprocessorOptions);
 415
 416     return preprocessorOptions;
 417 }
 418
 419 cl_program
 420 compileProgram(FILE              *fplog,
 421                const std::string &kernelRelativePath,
 422                const std::string &kernelBaseFilename,
 423                const std::string &extraDefines,
 424                cl_context         context,
 425                cl_device_id       deviceId,
 426                ocl_vendor_id_t    deviceVendorId)
 427 {
 428     cl_int      cl_error;
 429     std::string kernelRootPath  = getSourceRootPath(kernelRelativePath);
 430     std::string includeRootPath = getSourceRootPath("src/gromacs/gpu_utils");
 431
 432     GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
 433
 434     /* Load OpenCL source files */
 435     std::string kernelFilename = Path::join(kernelRootPath,
 436                                             kernelBaseFilename);
 437
 438     /* Make the build options */
 439     std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
 440                                                               includeRootPath,
 441                                                               getWarpSize(context, deviceId),
 442                                                               deviceVendorId,
 443                                                               extraDefines);
 444
 445     bool        buildCacheWasRead = false;
 446
 447     std::string cacheFilename;
 448     if (useBuildCache)
 449     {
 450         cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
 451     }
 452
 453     /* Create OpenCL program */
 454     cl_program program = nullptr;
 455     if (useBuildCache)
 456     {
 457         if (File::exists(cacheFilename, File::returnFalseOnError))
 458         {
 459             /* Check if there's a valid cache available */
 460             try
 461             {
 462                 program           = makeProgramFromCache(cacheFilename, context, deviceId);
 463                 buildCacheWasRead = true;
 464             }
 465             catch (FileIOError &e)
 466             {
 467                 // Failing to read from the cache is not a critical error
 468                 formatExceptionMessageToFile(fplog, e);
 469             }
 470         }
 471         else
 472         {
 473             fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
 474         }
 475     }
 476     if (program == nullptr)
 477     {
 478         // Compile OpenCL program from source
 479         std::string kernelSource = TextReader::readFileToString(kernelFilename);
 480         if (kernelSource.empty())
 481         {
 482             GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
 483         }
 484         const char *kernelSourcePtr  = kernelSource.c_str();
 485         size_t      kernelSourceSize = kernelSource.size();
 486         /* Create program from source code */
 487         program = clCreateProgramWithSource(context,
 488                                             1,
 489                                             &kernelSourcePtr,
 490                                             &kernelSourceSize,
 491                                             &cl_error);
 492         if (cl_error != CL_SUCCESS)
 493         {
 494             GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
 495         }
 496     }
 497
 498     /* Build the OpenCL program, keeping the status to potentially
 499        write to the simulation log file. */
 500     cl_int buildStatus = clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
 501
 502     /* Write log first, and then throw exception that the user know what is
 503        the issue even if the build fails. */
 504     writeOclBuildLog(fplog,
 505                      program,
 506                      deviceId,
 507                      kernelFilename,
 508                      preprocessorOptions,
 509                      buildStatus != CL_SUCCESS);
 510
 511     if (buildStatus != CL_SUCCESS)
 512     {
 513         GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
 514     }
 515
 516     if (useBuildCache)
 517     {
 518         if (!buildCacheWasRead)
 519         {
 520             /* If OpenCL caching is ON, but the current cache is not
 521                valid => update it */
 522             try
 523             {
 524                 writeBinaryToCache(program, cacheFilename);
 525             }
 526             catch (GromacsException &e)
 527             {
 528                 // Failing to write the cache is not a critical error
 529                 formatExceptionMessageToFile(fplog, e);
 530             }
 531         }
 532     }
 533     if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
 534     {
 535         /* If dumping intermediate files has been requested and this is an NVIDIA card
 536            => write PTX to file */
 537         char buffer[STRLEN];
 538
 539         cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
 540         if (cl_error != CL_SUCCESS)
 541         {
 542             GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
 543         }
 544         std::string ptxFilename = buffer;
 545         ptxFilename += ".ptx";
 546
 547         try
 548         {
 549             writeBinaryToCache(program, ptxFilename);
 550         }
 551         catch (GromacsException &e)
 552         {
 553             // Failing to write the cache is not a critical error
 554             formatExceptionMessageToFile(fplog, e);
 555         }
 556     }
 557
 558     return program;
 559 }
 560
 561 } // namespace
 562 } // namespace