2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define infrastructure for OpenCL JIT compilation for Gromacs
38 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
46 #include "ocl_compiler.h"
56 #include "gromacs/gpu_utils/oclutils.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/exceptions.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/path.h"
61 #include "gromacs/utility/programcontext.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/stringutil.h"
64 #include "gromacs/utility/textreader.h"
65 #include "gromacs/utility/unique_cptr.h"
67 #include "ocl_caching.h"
74 /*! \brief True if OpenCL binary caching is enabled.
76 * Currently caching is disabled by default unless the env var override
77 * is used until we resolve concurrency issues. */
78 static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
80 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
82 * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
83 * variable is set or the compilation failed, then the OpenCL
84 * compilation log is written.
86 * \param fplog Open file pointer to log file
87 * \param program OpenCL program that was compiled
88 * \param deviceId Id of the device for which compilation took place
89 * \param kernelFilename File name containing the kernel
90 * \param preprocessorOptions String containing the preprocessor command-line options used for the build
91 * \param buildFailed Whether the OpenCL build succeeded
93 * \throws std::bad_alloc if out of memory */
95 writeOclBuildLog(FILE *fplog,
97 cl_device_id deviceId,
98 const std::string &kernelFilename,
99 const std::string &preprocessorOptions,
102 bool writeOutput = ((fplog != nullptr) &&
103 (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
110 // Get build log string size
112 cl_int cl_error = clGetProgramBuildInfo(program,
114 CL_PROGRAM_BUILD_LOG,
118 if (cl_error != CL_SUCCESS)
120 GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
123 char *buildLog = nullptr;
124 unique_cptr<char> buildLogGuard;
125 if (buildLogSize != 0)
127 /* Allocate memory to fit the build log,
128 it can be very large in case of errors */
129 snew(buildLog, buildLogSize);
130 buildLogGuard.reset(buildLog);
132 /* Get the actual compilation log */
133 cl_error = clGetProgramBuildInfo(program,
135 CL_PROGRAM_BUILD_LOG,
139 if (cl_error != CL_SUCCESS)
141 GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
148 message += "Compilation of source file " + kernelFilename + " failed!\n";
152 message += "Compilation of source file " + kernelFilename + " was successful!\n";
154 message += "-- Used build options: " + preprocessorOptions + "\n";
155 message += "--------------LOG START---------------\n";
157 message += "---------------LOG END----------------\n";;
159 fputs(message.c_str(), fplog);
162 /*! \brief Construct compiler options string
164 * \param deviceVendorId Device vendor id. Used to
165 * automatically enable some vendor-specific options
166 * \return The string with the compiler options
169 selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
171 std::string compilerOptions;
173 if (getenv("GMX_OCL_NOOPT") )
175 compilerOptions += " -cl-opt-disable";
178 /* Fastmath imprves performance on all supported arch */
179 if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
181 compilerOptions += " -cl-fast-relaxed-math";
184 if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
186 compilerOptions += " -cl-nv-verbose";
189 if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
191 /* To dump OpenCL build intermediate files, caching must be off */
194 compilerOptions += " -save-temps";
198 if (getenv("GMX_OCL_DEBUG"))
200 compilerOptions += " -g";
203 return compilerOptions;
206 /*! \brief Get the path to the folder storing an OpenCL source file.
208 * By default, this function constructs the full path to the OpenCL from
209 * the known location of the binary that is running, so that we handle
210 * both in-source and installed builds. The user can override this
211 * behavior by defining GMX_OCL_FILE_PATH environment variable.
213 * \param[in] sourceRelativePath Relative path to the kernel or other file in the source tree,
214 * e.g. "src/gromacs/mdlib/nbnxn_ocl" for NB kernels.
215 * \return OS-normalized path string to the folder storing OpenCL source file
217 * \throws std::bad_alloc if out of memory.
218 * FileIOError if GMX_OCL_FILE_PATH does not specify a readable path
221 getSourceRootPath(const std::string &sourceRelativePath)
223 std::string sourceRootPath;
224 /* Use GMX_OCL_FILE_PATH if the user has defined it */
225 const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
227 if (gmxOclFilePath == nullptr)
229 /* Normal way of getting ocl_root_dir. First get the right
230 root path from the path to the binary that is running. */
231 InstallationPrefixInfo info = getProgramContext().installationPrefix();
232 std::string dataPathSuffix = (info.bSourceLayout ?
235 sourceRootPath = Path::join(info.path, dataPathSuffix);
239 if (!Directory::exists(gmxOclFilePath))
241 GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
242 "kernels are found, but '%s' does not exist", gmxOclFilePath)));
244 sourceRootPath = gmxOclFilePath;
247 // Make sure we return an OS-correct path format
248 return Path::normalize(sourceRootPath);
251 /*! \brief Get the warp size reported by device
253 * This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
254 * Nvidia reports 32, AMD for GPU 64. Ignore the rest
256 * \param context Current OpenCL context
257 * \param deviceId OpenCL device with the context
258 * \return cl_int value of the warp size
260 * \throws InternalError if an OpenCL error was encountered
263 getWarpSize(cl_context context, cl_device_id deviceId)
266 const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
267 cl_program program = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error);
268 if (cl_error != CL_SUCCESS)
270 GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
273 cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
274 if (cl_error != CL_SUCCESS)
276 GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
279 cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
280 if (cl_error != CL_SUCCESS)
282 GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
286 cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
287 sizeof(warpSize), &warpSize, nullptr);
288 if (cl_error != CL_SUCCESS)
290 GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
294 GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
297 cl_error = clReleaseKernel(kernel);
298 if (cl_error != CL_SUCCESS)
300 GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
302 cl_error = clReleaseProgram(program);
303 if (cl_error != CL_SUCCESS)
305 GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
311 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
313 * \param[in] vendorId Vendor id enumerator
315 * \return The appropriate compilation-line define
318 makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
324 choice = "-D_AMD_SOURCE_";
326 case OCL_VENDOR_NVIDIA:
327 choice = "-D_NVIDIA_SOURCE_";
329 case OCL_VENDOR_INTEL:
330 choice = "-D_INTEL_SOURCE_";
339 /*! \brief Create include paths for kernel sources.
341 * All OpenCL kernel files are expected to be stored in one single folder.
343 * \throws std::bad_alloc if out of memory.
345 static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
347 std::string includePathOption;
349 /* Apple does not seem to accept the quoted include paths other
350 * OpenCL implementations are happy with. Since the standard still says
351 * it should be quoted, we handle Apple as a special case.
354 includePathOption += "-I";
356 // Prepend all the spaces with a backslash
357 for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
359 if (unescapedKernelRootPath[i] == ' ')
361 includePathOption.push_back('\\');
363 includePathOption.push_back(unescapedKernelRootPath[i]);
366 includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
369 return includePathOption;
372 /*! \brief Replace duplicated spaces with a single one in string
374 * Only the first character will be kept for multiple adjacent characters that
375 * are both identical and where the first one returns true for isspace().
377 * \param str String that will be modified.
380 removeExtraSpaces(std::string *str)
382 GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
383 std::string::iterator newEnd =
384 std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) && (a == b); } );
385 str->erase(newEnd, str->end());
388 /*! \brief Builds a string with build options for the OpenCL kernels
390 * \throws std::bad_alloc if out of memory. */
392 makePreprocessorOptions(const std::string &kernelRootPath,
393 const std::string &includeRootPath,
395 ocl_vendor_id_t deviceVendorId,
396 const std::string &extraDefines)
398 std::string preprocessorOptions;
400 /* Compose the complete build options */
401 preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
402 preprocessorOptions += ' ';
403 preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
404 preprocessorOptions += ' ';
405 preprocessorOptions += extraDefines;
406 preprocessorOptions += ' ';
407 preprocessorOptions += selectCompilerOptions(deviceVendorId);
408 preprocessorOptions += ' ';
409 preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
410 preprocessorOptions += ' ';
411 preprocessorOptions += makeKernelIncludePathOption(includeRootPath);
413 // Mac OS (and maybe some other implementations) does not accept double spaces in options
414 removeExtraSpaces(&preprocessorOptions);
416 return preprocessorOptions;
420 compileProgram(FILE *fplog,
421 const std::string &kernelRelativePath,
422 const std::string &kernelBaseFilename,
423 const std::string &extraDefines,
425 cl_device_id deviceId,
426 ocl_vendor_id_t deviceVendorId)
429 std::string kernelRootPath = getSourceRootPath(kernelRelativePath);
430 std::string includeRootPath = getSourceRootPath("src/gromacs/gpu_utils");
432 GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
434 /* Load OpenCL source files */
435 std::string kernelFilename = Path::join(kernelRootPath,
438 /* Make the build options */
439 std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
441 getWarpSize(context, deviceId),
445 bool buildCacheWasRead = false;
447 std::string cacheFilename;
450 cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
453 /* Create OpenCL program */
454 cl_program program = nullptr;
457 if (File::exists(cacheFilename, File::returnFalseOnError))
459 /* Check if there's a valid cache available */
462 program = makeProgramFromCache(cacheFilename, context, deviceId);
463 buildCacheWasRead = true;
465 catch (FileIOError &e)
467 // Failing to read from the cache is not a critical error
468 formatExceptionMessageToFile(fplog, e);
473 fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
476 if (program == nullptr)
478 // Compile OpenCL program from source
479 std::string kernelSource = TextReader::readFileToString(kernelFilename);
480 if (kernelSource.empty())
482 GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
484 const char *kernelSourcePtr = kernelSource.c_str();
485 size_t kernelSourceSize = kernelSource.size();
486 /* Create program from source code */
487 program = clCreateProgramWithSource(context,
492 if (cl_error != CL_SUCCESS)
494 GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
498 /* Build the OpenCL program, keeping the status to potentially
499 write to the simulation log file. */
500 cl_int buildStatus = clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
502 /* Write log first, and then throw exception that the user know what is
503 the issue even if the build fails. */
504 writeOclBuildLog(fplog,
509 buildStatus != CL_SUCCESS);
511 if (buildStatus != CL_SUCCESS)
513 GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
518 if (!buildCacheWasRead)
520 /* If OpenCL caching is ON, but the current cache is not
521 valid => update it */
524 writeBinaryToCache(program, cacheFilename);
526 catch (GromacsException &e)
528 // Failing to write the cache is not a critical error
529 formatExceptionMessageToFile(fplog, e);
533 if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
535 /* If dumping intermediate files has been requested and this is an NVIDIA card
536 => write PTX to file */
539 cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
540 if (cl_error != CL_SUCCESS)
542 GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
544 std::string ptxFilename = buffer;
545 ptxFilename += ".ptx";
549 writeBinaryToCache(program, ptxFilename);
551 catch (GromacsException &e)
553 // Failing to write the cache is not a critical error
554 formatExceptionMessageToFile(fplog, e);