src/gromacs/gpu_utils/ocl_compiler.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define infrastructure for OpenCL JIT compilation for Gromacs
  37  *
  38  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  39  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  *  \author Mark Abraham <mark.j.abraham@gmail.com>
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "ocl_compiler.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <string>
  53 #include <vector>
  54
  55 #include "gromacs/gpu_utils/oclutils.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/exceptions.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/path.h"
  60 #include "gromacs/utility/programcontext.h"
  61 #include "gromacs/utility/smalloc.h"
  62 #include "gromacs/utility/stringutil.h"
  63 #include "gromacs/utility/textreader.h"
  64 #include "gromacs/utility/unique_cptr.h"
  65
  66 #include "ocl_caching.h"
  67
  68 namespace gmx
  69 {
  70 namespace ocl
  71 {
  72
  73 /*! \brief True if OpenCL binary caching is enabled.
  74  *
  75  *  Currently caching is disabled by default unless the env var override
  76  *  is used until we resolve concurrency issues. */
  77 static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
  78
  79 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
  80  *
  81  * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
  82  * variable is set or the compilation failed, then the OpenCL
  83  * compilation log is written.
  84  *
  85  * \param fplog               Open file pointer to log file
  86  * \param program             OpenCL program that was compiled
  87  * \param deviceId            Id of the device for which compilation took place
  88  * \param kernelFilename      File name containing the kernel
  89  * \param preprocessorOptions String containing the preprocessor command-line options used for the build
  90  * \param buildFailed         Whether the OpenCL build succeeded
  91  *
  92  * \throws std::bad_alloc if out of memory */
  93 static void
  94 writeOclBuildLog(FILE              *fplog,
  95                  cl_program         program,
  96                  cl_device_id       deviceId,
  97                  const std::string &kernelFilename,
  98                  const std::string &preprocessorOptions,
  99                  bool               buildFailed)
 100 {
 101     bool writeOutput = ((fplog != nullptr) &&
 102                         (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
 103
 104     if (!writeOutput)
 105     {
 106         return;
 107     }
 108
 109     // Get build log string size
 110     size_t buildLogSize;
 111     cl_int cl_error = clGetProgramBuildInfo(program,
 112                                             deviceId,
 113                                             CL_PROGRAM_BUILD_LOG,
 114                                             0,
 115                                             NULL,
 116                                             &buildLogSize);
 117     if (cl_error != CL_SUCCESS)
 118     {
 119         GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
 120     }
 121
 122     char             *buildLog = nullptr;
 123     unique_cptr<char> buildLogGuard;
 124     if (buildLogSize != 0)
 125     {
 126         /* Allocate memory to fit the build log,
 127            it can be very large in case of errors */
 128         snew(buildLog, buildLogSize);
 129         buildLogGuard.reset(buildLog);
 130
 131         /* Get the actual compilation log */
 132         cl_error = clGetProgramBuildInfo(program,
 133                                          deviceId,
 134                                          CL_PROGRAM_BUILD_LOG,
 135                                          buildLogSize,
 136                                          buildLog,
 137                                          NULL);
 138         if (cl_error != CL_SUCCESS)
 139         {
 140             GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
 141         }
 142     }
 143
 144     std::string message;
 145     if (buildFailed)
 146     {
 147         message += "Compilation of source file " + kernelFilename + " failed!\n";
 148     }
 149     else
 150     {
 151         message += "Compilation of source file " + kernelFilename + " was successful!\n";
 152     }
 153     message += "-- Used build options: " + preprocessorOptions + "\n";
 154     message += "--------------LOG START---------------\n";
 155     message += buildLog;
 156     message += "---------------LOG END----------------\n";;
 157
 158     fputs(message.c_str(), fplog);
 159 }
 160
 161 /*! \brief Construct compiler options string
 162  *
 163  * \param deviceVendorId  Device vendor id. Used to
 164  *          automatically enable some vendor-specific options
 165  * \return The string with the compiler options
 166  */
 167 static std::string
 168 selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
 169 {
 170     std::string compilerOptions;
 171
 172     if (getenv("GMX_OCL_NOOPT") )
 173     {
 174         compilerOptions += " -cl-opt-disable";
 175     }
 176
 177     /* Fastmath imprves performance on all supported arch */
 178     if (getenv("GMX_OCL_DISABLE_FASTMATH") == NULL)
 179     {
 180         compilerOptions += " -cl-fast-relaxed-math";
 181     }
 182
 183     if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
 184     {
 185         compilerOptions += " -cl-nv-verbose";
 186     }
 187
 188     if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
 189     {
 190         /* To dump OpenCL build intermediate files, caching must be off */
 191         if (!useBuildCache)
 192         {
 193             compilerOptions += " -save-temps";
 194         }
 195     }
 196
 197     if ( ( deviceVendorId == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG"))
 198     {
 199         compilerOptions += " -g";
 200     }
 201
 202     return compilerOptions;
 203 }
 204
 205 /*! \brief Get the path to the main folder storing OpenCL kernels.
 206  *
 207  * By default, this function constructs the full path to the OpenCL from
 208  * the known location of the binary that is running, so that we handle
 209  * both in-source and installed builds. The user can override this
 210  * behavior by defining GMX_OCL_FILE_PATH environment variable.
 211  *
 212  * \return OS-normalized path string to the main folder storing OpenCL kernels
 213  *
 214  * \throws std::bad_alloc    if out of memory.
 215  *         FileIOError  if GMX_OCL_FILE_PATH does not specify a readable path
 216  */
 217 static std::string
 218 getKernelRootPath()
 219 {
 220     std::string kernelRootPath;
 221     /* Use GMX_OCL_FILE_PATH if the user has defined it */
 222     const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
 223
 224     if (gmxOclFilePath == nullptr)
 225     {
 226         /* Normal way of getting ocl_root_dir. First get the right
 227            root path from the path to the binary that is running. */
 228         InstallationPrefixInfo      info           = getProgramContext().installationPrefix();
 229         std::string                 dataPathSuffix = (info.bSourceLayout ?
 230                                                       "src/gromacs/mdlib/nbnxn_ocl" :
 231                                                       OCL_INSTALL_DIR);
 232         kernelRootPath = Path::join(info.path, dataPathSuffix);
 233     }
 234     else
 235     {
 236         if (!Directory::exists(gmxOclFilePath))
 237         {
 238             GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
 239                                                "kernels are found, but '%s' does not exist", gmxOclFilePath)));
 240         }
 241         kernelRootPath = gmxOclFilePath;
 242     }
 243
 244     // Make sure we return an OS-correct path format
 245     return Path::normalize(kernelRootPath);
 246 }
 247
 248 /*!  \brief Get the warp size reported by device
 249  *
 250  *  This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
 251  *  Nvidia reports 32, AMD for GPU 64. Ignore the rest
 252  *
 253  *  \param  context   Current OpenCL context
 254  *  \param  deviceId OpenCL device with the context
 255  *  \return cl_int value of the warp size
 256  *
 257  * \throws InternalError if an OpenCL error was encountered
 258  */
 259 static size_t
 260 getWarpSize(cl_context context, cl_device_id deviceId)
 261 {
 262     cl_int      cl_error;
 263     const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
 264     cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error);
 265     if (cl_error != CL_SUCCESS)
 266     {
 267         GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
 268     }
 269
 270     cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
 271     if (cl_error != CL_SUCCESS)
 272     {
 273         GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
 274     }
 275
 276     cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
 277     if (cl_error != CL_SUCCESS)
 278     {
 279         GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
 280     }
 281
 282     size_t warpSize = 0;
 283     cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
 284                                         sizeof(warpSize), &warpSize, NULL);
 285     if (cl_error != CL_SUCCESS)
 286     {
 287         GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
 288     }
 289     if (warpSize == 0)
 290     {
 291         GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
 292     }
 293
 294     cl_error = clReleaseKernel(kernel);
 295     if (cl_error != CL_SUCCESS)
 296     {
 297         GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
 298     }
 299     cl_error = clReleaseProgram(program);
 300     if (cl_error != CL_SUCCESS)
 301     {
 302         GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
 303     }
 304
 305     return warpSize;
 306 }
 307
 308 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
 309  *
 310  * \param[in] vendorId Vendor id enumerator
 311  *
 312  * \return The appropriate compilation-line define
 313  */
 314 static const char *
 315 makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
 316 {
 317     const char *choice;
 318     switch (vendorId)
 319     {
 320         case OCL_VENDOR_AMD:
 321             choice = "-D_AMD_SOURCE_";
 322             break;
 323         case OCL_VENDOR_NVIDIA:
 324             choice = "-D_NVIDIA_SOURCE_";
 325             break;
 326         default:
 327             choice = "-D_WARPLESS_SOURCE_";
 328             break;
 329     }
 330     return choice;
 331 }
 332
 333 /*! \brief Create include paths for kernel sources.
 334  *
 335  * All OpenCL kernel files are expected to be stored in one single folder.
 336  *
 337  * \throws std::bad_alloc  if out of memory.
 338  */
 339 static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
 340 {
 341     std::string includePathOption;
 342
 343     /* Apple does not seem to accept the quoted include paths other
 344      * OpenCL implementations are happy with. Since the standard still says
 345      * it should be quoted, we handle Apple as a special case.
 346      */
 347 #ifdef __APPLE__
 348     includePathOption += "-I";
 349
 350     // Prepend all the spaces with a backslash
 351     for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
 352     {
 353         if (unescapedKernelRootPath[i] == ' ')
 354         {
 355             includePathOption.push_back('\\');
 356         }
 357         includePathOption.push_back(unescapedKernelRootPath[i]);
 358     }
 359 #else
 360     includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
 361 #endif
 362
 363     return includePathOption;
 364 }
 365
 366 /*! \brief Builds a string with build options for the OpenCL kernels
 367  *
 368  * \throws std::bad_alloc  if out of memory. */
 369 std::string
 370 makePreprocessorOptions(const std::string   &kernelRootPath,
 371                         size_t               warpSize,
 372                         ocl_vendor_id_t      deviceVendorId,
 373                         const std::string   &extraDefines)
 374 {
 375     std::string preprocessorOptions;
 376
 377     /* Compose the complete build options */
 378     preprocessorOptions  = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
 379     preprocessorOptions += ' ';
 380     preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
 381     preprocessorOptions += ' ';
 382     preprocessorOptions += extraDefines;
 383     preprocessorOptions += ' ';
 384     preprocessorOptions += selectCompilerOptions(deviceVendorId);
 385     preprocessorOptions += ' ';
 386     preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
 387
 388     return preprocessorOptions;
 389 }
 390
 391 cl_program
 392 compileProgram(FILE              *fplog,
 393                const std::string &kernelBaseFilename,
 394                const std::string &extraDefines,
 395                cl_context         context,
 396                cl_device_id       deviceId,
 397                ocl_vendor_id_t    deviceVendorId)
 398 {
 399     cl_int      cl_error;
 400     std::string kernelRootPath = getKernelRootPath();
 401
 402     GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
 403
 404     /* Load OpenCL source files */
 405     std::string kernelFilename = Path::join(kernelRootPath,
 406                                             kernelBaseFilename);
 407
 408     /* Make the build options */
 409     std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
 410                                                               getWarpSize(context, deviceId),
 411                                                               deviceVendorId,
 412                                                               extraDefines);
 413
 414     bool        buildCacheWasRead = false;
 415
 416     std::string cacheFilename;
 417     if (useBuildCache)
 418     {
 419         cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
 420     }
 421
 422     /* Create OpenCL program */
 423     cl_program program = nullptr;
 424     if (useBuildCache)
 425     {
 426         if (File::exists(cacheFilename, File::returnFalseOnError))
 427         {
 428             /* Check if there's a valid cache available */
 429             try
 430             {
 431                 program           = makeProgramFromCache(cacheFilename, context, deviceId);
 432                 buildCacheWasRead = true;
 433             }
 434             catch (FileIOError &e)
 435             {
 436                 // Failing to read from the cache is not a critical error
 437                 formatExceptionMessageToFile(fplog, e);
 438             }
 439         }
 440         else
 441         {
 442             fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
 443         }
 444     }
 445     if (program == nullptr)
 446     {
 447         // Compile OpenCL program from source
 448         std::string kernelSource = TextReader::readFileToString(kernelFilename);
 449         if (kernelSource.empty())
 450         {
 451             GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
 452         }
 453         const char *kernelSourcePtr  = kernelSource.c_str();
 454         size_t      kernelSourceSize = kernelSource.size();
 455         /* Create program from source code */
 456         program = clCreateProgramWithSource(context,
 457                                             1,
 458                                             &kernelSourcePtr,
 459                                             &kernelSourceSize,
 460                                             &cl_error);
 461         if (cl_error != CL_SUCCESS)
 462         {
 463             GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
 464         }
 465     }
 466
 467     /* Build the OpenCL program, keeping the status to potentially
 468        write to the simulation log file. */
 469     cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL);
 470
 471     /* Write log first, and then throw exception that the user know what is
 472        the issue even if the build fails. */
 473     writeOclBuildLog(fplog,
 474                      program,
 475                      deviceId,
 476                      kernelFilename,
 477                      preprocessorOptions,
 478                      buildStatus != CL_SUCCESS);
 479
 480     if (buildStatus != CL_SUCCESS)
 481     {
 482         GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
 483     }
 484
 485     if (useBuildCache)
 486     {
 487         if (!buildCacheWasRead)
 488         {
 489             /* If OpenCL caching is ON, but the current cache is not
 490                valid => update it */
 491             try
 492             {
 493                 writeBinaryToCache(program, cacheFilename);
 494             }
 495             catch (GromacsException &e)
 496             {
 497                 // Failing to write the cache is not a critical error
 498                 formatExceptionMessageToFile(fplog, e);
 499             }
 500         }
 501     }
 502     if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
 503     {
 504         /* If dumping intermediate files has been requested and this is an NVIDIA card
 505            => write PTX to file */
 506         char buffer[STRLEN];
 507
 508         cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
 509         if (cl_error != CL_SUCCESS)
 510         {
 511             GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
 512         }
 513         std::string ptxFilename = buffer;
 514         ptxFilename += ".ptx";
 515
 516         try
 517         {
 518             writeBinaryToCache(program, ptxFilename);
 519         }
 520         catch (GromacsException &e)
 521         {
 522             // Failing to write the cache is not a critical error
 523             formatExceptionMessageToFile(fplog, e);
 524         }
 525     }
 526
 527     return program;
 528 }
 529
 530 } // namespace
 531 } // namespace