/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <cstdio>
+#include <algorithm>
#include <string>
#include <vector>
*
* Currently caching is disabled by default unless the env var override
* is used until we resolve concurrency issues. */
-static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
+static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
/*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
*
- * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
+ * If \c fplog is non-null and either the \c GMX_OCL_DUMP_LOG environment
* variable is set or the compilation failed, then the OpenCL
* compilation log is written.
*
* \param program OpenCL program that was compiled
* \param deviceId Id of the device for which compilation took place
* \param kernelFilename File name containing the kernel
- * \param preprocessorOptions String containing the preprocessor command-line options used for the build
+ * \param preprocessorOptions String containing the preprocessor command-line options used for the
+ * build
* \param buildFailed Whether the OpenCL build succeeded
*
* \throws std::bad_alloc if out of memory */
-static void
-writeOclBuildLog(FILE *fplog,
- cl_program program,
- cl_device_id deviceId,
- const std::string &kernelFilename,
- const std::string &preprocessorOptions,
- bool buildFailed)
+static void writeOclBuildLog(FILE* fplog,
+ cl_program program,
+ cl_device_id deviceId,
+ const std::string& kernelFilename,
+ const std::string& preprocessorOptions,
+ bool buildFailed)
{
- bool writeOutput = ((fplog != nullptr) &&
- (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
+ bool writeOutput = ((fplog != nullptr) && (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
if (!writeOutput)
{
// Get build log string size
size_t buildLogSize;
- cl_int cl_error = clGetProgramBuildInfo(program,
- deviceId,
- CL_PROGRAM_BUILD_LOG,
- 0,
- NULL,
- &buildLogSize);
+ cl_int cl_error =
+ clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, 0, nullptr, &buildLogSize);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not get OpenCL program build log size, error was "
+ + ocl_get_error_string(cl_error)));
}
- char *buildLog = nullptr;
+ char* buildLog = nullptr;
unique_cptr<char> buildLogGuard;
if (buildLogSize != 0)
{
buildLogGuard.reset(buildLog);
/* Get the actual compilation log */
- cl_error = clGetProgramBuildInfo(program,
- deviceId,
- CL_PROGRAM_BUILD_LOG,
- buildLogSize,
- buildLog,
- NULL);
+ cl_error = clGetProgramBuildInfo(
+ program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, nullptr);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not get OpenCL program build log, error was "
+ + ocl_get_error_string(cl_error)));
}
}
message += "-- Used build options: " + preprocessorOptions + "\n";
message += "--------------LOG START---------------\n";
message += buildLog;
- message += "---------------LOG END----------------\n";;
+ message += "---------------LOG END----------------\n";
+ ;
fputs(message.c_str(), fplog);
}
/*! \brief Construct compiler options string
*
- * \param deviceVendorId Device vendor id. Used to
- * automatically enable some vendor-specific options
+ * \param deviceVendor Device vendor. Used to automatically enable some
+ * vendor-specific options.
* \return The string with the compiler options
*/
-static std::string
-selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
+static std::string selectCompilerOptions(DeviceVendor deviceVendor)
{
std::string compilerOptions;
- if (getenv("GMX_OCL_NOOPT") )
+ if (getenv("GMX_OCL_NOOPT"))
{
compilerOptions += " -cl-opt-disable";
}
- /* Fastmath imprves performance on all supported arch */
- if (getenv("GMX_OCL_DISABLE_FASTMATH") == NULL)
+ /* Fastmath improves performance on all supported arch,
+ * but is tends to cause problems on Intel (Issue #3898) */
+ if ((deviceVendor != DeviceVendor::Intel) && (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr))
{
compilerOptions += " -cl-fast-relaxed-math";
+
+ // Hint to the compiler that it can flush denorms to zero.
+ // In CUDA this is triggered by the -use_fast_math flag, equivalent with
+ // -cl-fast-relaxed-math, hence the inclusion on the conditional block.
+ compilerOptions += " -cl-denorms-are-zero";
}
- if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
+ if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
{
compilerOptions += " -cl-nv-verbose";
}
- if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+ if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
{
/* To dump OpenCL build intermediate files, caching must be off */
if (!useBuildCache)
}
}
- if ( ( deviceVendorId == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG"))
+ if (getenv("GMX_OCL_DEBUG"))
{
compilerOptions += " -g";
}
return compilerOptions;
}
-/*! \brief Get the path to the main folder storing OpenCL kernels.
+/*! \brief Get the path to the folder storing an OpenCL source file.
*
* By default, this function constructs the full path to the OpenCL from
* the known location of the binary that is running, so that we handle
* both in-source and installed builds. The user can override this
* behavior by defining GMX_OCL_FILE_PATH environment variable.
*
- * \return OS-normalized path string to the main folder storing OpenCL kernels
+ * \param[in] sourceRelativePath Relative path to the kernel or other file in the source tree,
+ * from src, e.g. "gromacs/mdlib/nbnxn_ocl" for NB kernels.
+ * \return OS-normalized path string to the folder storing OpenCL source file
*
* \throws std::bad_alloc if out of memory.
* FileIOError if GMX_OCL_FILE_PATH does not specify a readable path
*/
-static std::string
-getKernelRootPath()
+static std::string getSourceRootPath(const std::string& sourceRelativePath)
{
- std::string kernelRootPath;
+ std::string sourceRootPath;
/* Use GMX_OCL_FILE_PATH if the user has defined it */
- const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
+ const char* gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
if (gmxOclFilePath == nullptr)
{
/* Normal way of getting ocl_root_dir. First get the right
root path from the path to the binary that is running. */
- InstallationPrefixInfo info = getProgramContext().installationPrefix();
- std::string dataPathSuffix = (info.bSourceLayout ?
- "src/gromacs/mdlib/nbnxn_ocl" :
- OCL_INSTALL_DIR);
- kernelRootPath = Path::join(info.path, dataPathSuffix);
+ InstallationPrefixInfo info = getProgramContext().installationPrefix();
+ std::string dataPathSuffix = (info.bSourceLayout ? "src" : GMX_INSTALL_OCLDIR);
+ sourceRootPath = Path::join(info.path, dataPathSuffix, sourceRelativePath);
}
else
{
if (!Directory::exists(gmxOclFilePath))
{
- GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
- "kernels are found, but '%s' does not exist", gmxOclFilePath)));
+ GMX_THROW(FileIOError(
+ formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
+ "kernels are found, but '%s' does not exist",
+ gmxOclFilePath)));
}
- kernelRootPath = gmxOclFilePath;
+ sourceRootPath = Path::join(gmxOclFilePath, sourceRelativePath);
}
// Make sure we return an OS-correct path format
- return Path::normalize(kernelRootPath);
+ return Path::normalize(sourceRootPath);
}
-/*! \brief Get the warp size reported by device
- *
- * This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
- * Nvidia reports 32, AMD for GPU 64. Ignore the rest
- *
- * \param context Current OpenCL context
- * \param deviceId OpenCL device with the context
- * \return cl_int value of the warp size
- *
- * \throws InternalError if an OpenCL error was encountered
- */
-static size_t
-getWarpSize(cl_context context, cl_device_id deviceId)
+size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId)
{
- cl_int cl_error;
- const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
- cl_program program = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error);
+ size_t warpSize = 0;
+ cl_int cl_error = clGetKernelWorkGroupInfo(
+ kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(warpSize), &warpSize, nullptr);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not query OpenCL preferred workgroup size, error was "
+ + ocl_get_error_string(cl_error)));
}
-
- cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
- if (cl_error != CL_SUCCESS)
+ if (warpSize == 0)
{
- GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError(formatString("Invalid OpenCL warp size encountered")));
}
+ return warpSize;
+}
- cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
+size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId)
+{
+ cl_int cl_error;
+ const char* warpSizeKernel =
+ "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
+ cl_program program = clCreateProgramWithSource(context, 1, &warpSizeKernel, nullptr, &cl_error);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was "
+ + ocl_get_error_string(cl_error)));
}
- size_t warpSize = 0;
- cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
- sizeof(warpSize), &warpSize, NULL);
+ cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was "
+ + ocl_get_error_string(cl_error)));
}
- if (warpSize == 0)
+
+ cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
+ if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
+ GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was "
+ + ocl_get_error_string(cl_error)));
}
+ size_t warpSize = getKernelWarpSize(kernel, deviceId);
+
cl_error = clReleaseKernel(kernel);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was "
+ + ocl_get_error_string(cl_error)));
}
cl_error = clReleaseProgram(program);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was "
+ + ocl_get_error_string(cl_error)));
}
return warpSize;
/*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
*
- * \param[in] vendorId Vendor id enumerator
+ * \param[in] deviceVendor Vendor id enumerator
*
* \return The appropriate compilation-line define
*/
-static const char *
-makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
+static std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
{
- const char *choice;
- switch (vendorId)
+ switch (deviceVendor)
{
- case OCL_VENDOR_AMD:
- choice = "-D_AMD_SOURCE_";
- break;
- case OCL_VENDOR_NVIDIA:
- choice = "-D_NVIDIA_SOURCE_";
- break;
- default:
- choice = "-D_WARPLESS_SOURCE_";
- break;
+ case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
+ case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
+ case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
+ default: return "";
}
- return choice;
}
/*! \brief Create include paths for kernel sources.
*
* \throws std::bad_alloc if out of memory.
*/
-static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
+static std::string makeKernelIncludePathOption(const std::string& unescapedKernelRootPath)
{
std::string includePathOption;
return includePathOption;
}
+/*! \brief Replace duplicated spaces with a single one in string
+ *
+ * Only the first character will be kept for multiple adjacent characters that
+ * are both identical and where the first one returns true for isspace().
+ *
+ * \param str String that will be modified.
+ */
+static void removeExtraSpaces(std::string* str)
+{
+ GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
+ std::string::iterator newEnd = std::unique(
+ str->begin(), str->end(), [=](char a, char b) { return isspace(a) != 0 && (a == b); });
+ str->erase(newEnd, str->end());
+}
+
/*! \brief Builds a string with build options for the OpenCL kernels
*
* \throws std::bad_alloc if out of memory. */
-std::string
-makePreprocessorOptions(const std::string &kernelRootPath,
- size_t warpSize,
- ocl_vendor_id_t deviceVendorId,
- const std::string &extraDefines)
+static std::string makePreprocessorOptions(const std::string& kernelRootPath,
+ const std::string& includeRootPath,
+ size_t warpSize,
+ DeviceVendor deviceVendor,
+ const std::string& extraDefines)
{
std::string preprocessorOptions;
/* Compose the complete build options */
- preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
+ preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
preprocessorOptions += ' ';
- preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
+ preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
preprocessorOptions += ' ';
preprocessorOptions += extraDefines;
preprocessorOptions += ' ';
- preprocessorOptions += selectCompilerOptions(deviceVendorId);
+ preprocessorOptions += selectCompilerOptions(deviceVendor);
preprocessorOptions += ' ';
preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
+ preprocessorOptions += ' ';
+ preprocessorOptions += makeKernelIncludePathOption(includeRootPath);
+
+ // Mac OS (and maybe some other implementations) does not accept double spaces in options
+ removeExtraSpaces(&preprocessorOptions);
return preprocessorOptions;
}
-cl_program
-compileProgram(FILE *fplog,
- const std::string &kernelBaseFilename,
- const std::string &extraDefines,
- cl_context context,
- cl_device_id deviceId,
- ocl_vendor_id_t deviceVendorId)
+cl_program compileProgram(FILE* fplog,
+ const std::string& kernelRelativePath,
+ const std::string& kernelBaseFilename,
+ const std::string& extraDefines,
+ cl_context context,
+ cl_device_id deviceId,
+ DeviceVendor deviceVendor)
{
- cl_int cl_error;
- std::string kernelRootPath = getKernelRootPath();
+ cl_int cl_error;
+ // Let the kernel find include files from its module.
+ std::string kernelRootPath = getSourceRootPath(kernelRelativePath);
+ // Let the kernel find include files from other modules.
+ std::string rootPath = getSourceRootPath("");
GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
/* Load OpenCL source files */
- std::string kernelFilename = Path::join(kernelRootPath,
- kernelBaseFilename);
+ std::string kernelFilename = Path::join(kernelRootPath, kernelBaseFilename);
/* Make the build options */
- std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
- getWarpSize(context, deviceId),
- deviceVendorId,
- extraDefines);
+ std::string preprocessorOptions = makePreprocessorOptions(
+ kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
- bool buildCacheWasRead = false;
+ bool buildCacheWasRead = false;
std::string cacheFilename;
if (useBuildCache)
program = makeProgramFromCache(cacheFilename, context, deviceId);
buildCacheWasRead = true;
}
- catch (FileIOError &e)
+ catch (FileIOError& e)
{
// Failing to read from the cache is not a critical error
formatExceptionMessageToFile(fplog, e);
}
+ fprintf(fplog,
+ "OpenCL binary cache file %s is present, will load kernels.\n",
+ cacheFilename.c_str());
}
else
{
- fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
+ fprintf(fplog,
+ "No OpenCL binary cache file was present for %s, so will compile kernels "
+ "normally.\n",
+ kernelBaseFilename.c_str());
}
}
if (program == nullptr)
{
GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
}
- const char *kernelSourcePtr = kernelSource.c_str();
+ const char* kernelSourcePtr = kernelSource.c_str();
size_t kernelSourceSize = kernelSource.size();
/* Create program from source code */
- program = clCreateProgramWithSource(context,
- 1,
- &kernelSourcePtr,
- &kernelSourceSize,
- &cl_error);
+ program = clCreateProgramWithSource(context, 1, &kernelSourcePtr, &kernelSourceSize, &cl_error);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not create OpenCL program, error was "
+ + ocl_get_error_string(cl_error)));
}
}
/* Build the OpenCL program, keeping the status to potentially
write to the simulation log file. */
- cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL);
+ cl_int buildStatus =
+ clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
/* Write log first, and then throw exception that the user know what is
the issue even if the build fails. */
- writeOclBuildLog(fplog,
- program,
- deviceId,
- kernelFilename,
- preprocessorOptions,
- buildStatus != CL_SUCCESS);
+ writeOclBuildLog(fplog, program, deviceId, kernelFilename, preprocessorOptions, buildStatus != CL_SUCCESS);
if (buildStatus != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
+ GMX_THROW(InternalError("Could not build OpenCL program, error was "
+ + ocl_get_error_string(buildStatus)));
}
if (useBuildCache)
{
writeBinaryToCache(program, cacheFilename);
}
- catch (GromacsException &e)
+ catch (GromacsException& e)
{
// Failing to write the cache is not a critical error
formatExceptionMessageToFile(fplog, e);
}
}
}
- if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+ if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
{
/* If dumping intermediate files has been requested and this is an NVIDIA card
=> write PTX to file */
char buffer[STRLEN];
- cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
+ cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
if (cl_error != CL_SUCCESS)
{
- GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
+ GMX_THROW(InternalError("Could not get OpenCL device info, error was "
+ + ocl_get_error_string(cl_error)));
}
std::string ptxFilename = buffer;
ptxFilename += ".ptx";
{
writeBinaryToCache(program, ptxFilename);
}
- catch (GromacsException &e)
+ catch (GromacsException& e)
{
// Failing to write the cache is not a critical error
formatExceptionMessageToFile(fplog, e);
return program;
}
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx