Fix random typos
[alexxy/gromacs.git] / src / gromacs / gpu_utils / ocl_compiler.cpp
index 190448ec4383272edc1c4191bb2ab719ed23ffe3..5f826e5e0ab334dffe87b54d5a26e7ddb4b2ca96 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -75,11 +76,11 @@ namespace ocl
  *
  *  Currently caching is disabled by default unless the env var override
  *  is used until we resolve concurrency issues. */
-static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
+static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
 
 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
  *
- * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
+ * If \c fplog is non-null and either the \c GMX_OCL_DUMP_LOG environment
  * variable is set or the compilation failed, then the OpenCL
  * compilation log is written.
  *
@@ -87,20 +88,19 @@ static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_
  * \param program             OpenCL program that was compiled
  * \param deviceId            Id of the device for which compilation took place
  * \param kernelFilename      File name containing the kernel
- * \param preprocessorOptions String containing the preprocessor command-line options used for the build
+ * \param preprocessorOptions String containing the preprocessor command-line options used for the
+ *                            build
  * \param buildFailed         Whether the OpenCL build succeeded
  *
  * \throws std::bad_alloc if out of memory */
-static void
-writeOclBuildLog(FILE              *fplog,
-                 cl_program         program,
-                 cl_device_id       deviceId,
-                 const std::string &kernelFilename,
-                 const std::string &preprocessorOptions,
-                 bool               buildFailed)
+static void writeOclBuildLog(FILE*              fplog,
+                             cl_program         program,
+                             cl_device_id       deviceId,
+                             const std::string& kernelFilename,
+                             const std::string& preprocessorOptions,
+                             bool               buildFailed)
 {
-    bool writeOutput = ((fplog != nullptr) &&
-                        (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
+    bool writeOutput = ((fplog != nullptr) && (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
 
     if (!writeOutput)
     {
@@ -109,18 +109,15 @@ writeOclBuildLog(FILE              *fplog,
 
     // Get build log string size
     size_t buildLogSize;
-    cl_int cl_error = clGetProgramBuildInfo(program,
-                                            deviceId,
-                                            CL_PROGRAM_BUILD_LOG,
-                                            0,
-                                            nullptr,
-                                            &buildLogSize);
+    cl_int cl_error =
+            clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, 0, nullptr, &buildLogSize);
     if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError("Could not get OpenCL program build log size, error was "
+                                + ocl_get_error_string(cl_error)));
     }
 
-    char             *buildLog = nullptr;
+    char*             buildLog = nullptr;
     unique_cptr<char> buildLogGuard;
     if (buildLogSize != 0)
     {
@@ -130,15 +127,12 @@ writeOclBuildLog(FILE              *fplog,
         buildLogGuard.reset(buildLog);
 
         /* Get the actual compilation log */
-        cl_error = clGetProgramBuildInfo(program,
-                                         deviceId,
-                                         CL_PROGRAM_BUILD_LOG,
-                                         buildLogSize,
-                                         buildLog,
-                                         nullptr);
+        cl_error = clGetProgramBuildInfo(
+                program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, nullptr);
         if (cl_error != CL_SUCCESS)
         {
-            GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
+            GMX_THROW(InternalError("Could not get OpenCL program build log, error was "
+                                    + ocl_get_error_string(cl_error)));
         }
     }
 
@@ -154,39 +148,44 @@ writeOclBuildLog(FILE              *fplog,
     message += "-- Used build options: " + preprocessorOptions + "\n";
     message += "--------------LOG START---------------\n";
     message += buildLog;
-    message += "---------------LOG END----------------\n";;
+    message += "---------------LOG END----------------\n";
+    ;
 
     fputs(message.c_str(), fplog);
 }
 
 /*! \brief Construct compiler options string
  *
- * \param deviceVendorId  Device vendor id. Used to
- *          automatically enable some vendor-specific options
+ * \param deviceVendor  Device vendor. Used to automatically enable some
+ *                      vendor-specific options.
  * \return The string with the compiler options
  */
-static std::string
-selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
+static std::string selectCompilerOptions(DeviceVendor deviceVendor)
 {
     std::string compilerOptions;
 
-    if (getenv("GMX_OCL_NOOPT") )
+    if (getenv("GMX_OCL_NOOPT"))
     {
         compilerOptions += " -cl-opt-disable";
     }
 
-    /* Fastmath imprves performance on all supported arch */
+    /* Fastmath improves performance on all supported arch */
     if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
     {
         compilerOptions += " -cl-fast-relaxed-math";
+
+        // Hint to the compiler that it can flush denorms to zero.
+        // In CUDA this is triggered by the -use_fast_math flag, equivalent with
+        // -cl-fast-relaxed-math, hence the inclusion on the conditional block.
+        compilerOptions += " -cl-denorms-are-zero";
     }
 
-    if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
+    if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
     {
         compilerOptions += " -cl-nv-verbose";
     }
 
-    if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
     {
         /* To dump OpenCL build intermediate files, caching must be off */
         if (!useBuildCache)
@@ -211,86 +210,98 @@ selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
  * behavior by defining GMX_OCL_FILE_PATH environment variable.
  *
  * \param[in] sourceRelativePath    Relative path to the kernel or other file in the source tree,
- *                                  e.g. "src/gromacs/mdlib/nbnxn_ocl" for NB kernels.
+ *                                  from src, e.g. "gromacs/mdlib/nbnxn_ocl" for NB kernels.
  * \return OS-normalized path string to the folder storing OpenCL source file
  *
  * \throws std::bad_alloc    if out of memory.
  *         FileIOError  if GMX_OCL_FILE_PATH does not specify a readable path
  */
-static std::string
-getSourceRootPath(const std::string &sourceRelativePath)
+static std::string getSourceRootPath(const std::string& sourceRelativePath)
 {
     std::string sourceRootPath;
     /* Use GMX_OCL_FILE_PATH if the user has defined it */
-    const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
+    const chargmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
 
     if (gmxOclFilePath == nullptr)
     {
         /* Normal way of getting ocl_root_dir. First get the right
            root path from the path to the binary that is running. */
-        InstallationPrefixInfo      info           = getProgramContext().installationPrefix();
-        std::string                 dataPathSuffix = (info.bSourceLayout ?
-                                                      sourceRelativePath :
-                                                      GMX_INSTALL_OCLDIR);
-        sourceRootPath = Path::join(info.path, dataPathSuffix);
+        InstallationPrefixInfo info           = getProgramContext().installationPrefix();
+        std::string            dataPathSuffix = (info.bSourceLayout ? "src" : GMX_INSTALL_OCLDIR);
+        sourceRootPath = Path::join(info.path, dataPathSuffix, sourceRelativePath);
     }
     else
     {
         if (!Directory::exists(gmxOclFilePath))
         {
-            GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
-                                               "kernels are found, but '%s' does not exist", gmxOclFilePath)));
+            GMX_THROW(FileIOError(
+                    formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
+                                 "kernels are found, but '%s' does not exist",
+                                 gmxOclFilePath)));
         }
-        sourceRootPath = gmxOclFilePath;
+        sourceRootPath = Path::join(gmxOclFilePath, sourceRelativePath);
     }
 
     // Make sure we return an OS-correct path format
     return Path::normalize(sourceRootPath);
 }
 
-size_t getWarpSize(cl_context context, cl_device_id deviceId)
+size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId)
 {
-    cl_int      cl_error;
-    const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
-    cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, nullptr, &cl_error);
+    size_t warpSize = 0;
+    cl_int cl_error = clGetKernelWorkGroupInfo(
+            kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(warpSize), &warpSize, nullptr);
     if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError("Could not query OpenCL preferred workgroup size, error was "
+                                + ocl_get_error_string(cl_error)));
     }
-
-    cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
-    if (cl_error != CL_SUCCESS)
+    if (warpSize == 0)
     {
-        GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError(formatString("Invalid OpenCL warp size encountered")));
     }
+    return warpSize;
+}
 
-    cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
+size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId)
+{
+    cl_int      cl_error;
+    const char* warpSizeKernel =
+            "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
+    cl_program program = clCreateProgramWithSource(context, 1, &warpSizeKernel, nullptr, &cl_error);
     if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was "
+                                + ocl_get_error_string(cl_error)));
     }
 
-    size_t warpSize = 0;
-    cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
-                                        sizeof(warpSize), &warpSize, nullptr);
+    cl_error = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
     if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was "
+                                + ocl_get_error_string(cl_error)));
     }
-    if (warpSize == 0)
+
+    cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
+    if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
+        GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was "
+                                + ocl_get_error_string(cl_error)));
     }
 
+    size_t warpSize = getKernelWarpSize(kernel, deviceId);
+
     cl_error = clReleaseKernel(kernel);
     if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was "
+                                + ocl_get_error_string(cl_error)));
     }
     cl_error = clReleaseProgram(program);
     if (cl_error != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
+        GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was "
+                                + ocl_get_error_string(cl_error)));
     }
 
     return warpSize;
@@ -298,30 +309,19 @@ size_t getWarpSize(cl_context context, cl_device_id deviceId)
 
 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
  *
- * \param[in] vendorId Vendor id enumerator
+ * \param[in] deviceVendor Vendor id enumerator
  *
  * \return The appropriate compilation-line define
  */
-static const char *
-makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
+static std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
 {
-    const char *choice;
-    switch (vendorId)
+    switch (deviceVendor)
     {
-        case OCL_VENDOR_AMD:
-            choice = "-D_AMD_SOURCE_";
-            break;
-        case OCL_VENDOR_NVIDIA:
-            choice = "-D_NVIDIA_SOURCE_";
-            break;
-        case OCL_VENDOR_INTEL:
-            choice = "-D_INTEL_SOURCE_";
-            break;
-        default:
-            choice = "";
-            break;
+        case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
+        case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
+        case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
+        default: return "";
     }
-    return choice;
 }
 
 /*! \brief Create include paths for kernel sources.
@@ -330,7 +330,7 @@ makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
  *
  * \throws std::bad_alloc  if out of memory.
  */
-static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
+static std::string makeKernelIncludePathOption(const std::stringunescapedKernelRootPath)
 {
     std::string includePathOption;
 
@@ -364,35 +364,33 @@ static std::string makeKernelIncludePathOption(const std::string &unescapedKerne
  *
  * \param str String that will be modified.
  */
-static void
-removeExtraSpaces(std::string *str)
+static void removeExtraSpaces(std::string* str)
 {
     GMX_RELEASE_ASSERT(str != nullptr, "A pointer to an actual string must be provided");
-    std::string::iterator newEnd =
-        std::unique( str->begin(), str->end(), [ = ](char a, char b){ return isspace(a) && (a == b); } );
+    std::string::iterator newEnd = std::unique(
+            str->begin(), str->end(), [=](char a, char b) { return isspace(a) != 0 && (a == b); });
     str->erase(newEnd, str->end());
 }
 
 /*! \brief Builds a string with build options for the OpenCL kernels
  *
  * \throws std::bad_alloc  if out of memory. */
-static std::string
-makePreprocessorOptions(const std::string   &kernelRootPath,
-                        const std::string   &includeRootPath,
-                        size_t               warpSize,
-                        ocl_vendor_id_t      deviceVendorId,
-                        const std::string   &extraDefines)
+static std::string makePreprocessorOptions(const std::string& kernelRootPath,
+                                           const std::string& includeRootPath,
+                                           size_t             warpSize,
+                                           DeviceVendor       deviceVendor,
+                                           const std::string& extraDefines)
 {
     std::string preprocessorOptions;
 
     /* Compose the complete build options */
-    preprocessorOptions  = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
+    preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
     preprocessorOptions += ' ';
-    preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
+    preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
     preprocessorOptions += ' ';
     preprocessorOptions += extraDefines;
     preprocessorOptions += ' ';
-    preprocessorOptions += selectCompilerOptions(deviceVendorId);
+    preprocessorOptions += selectCompilerOptions(deviceVendor);
     preprocessorOptions += ' ';
     preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
     preprocessorOptions += ' ';
@@ -404,33 +402,30 @@ makePreprocessorOptions(const std::string   &kernelRootPath,
     return preprocessorOptions;
 }
 
-cl_program
-compileProgram(FILE              *fplog,
-               const std::string &kernelRelativePath,
-               const std::string &kernelBaseFilename,
-               const std::string &extraDefines,
-               cl_context         context,
-               cl_device_id       deviceId,
-               ocl_vendor_id_t    deviceVendorId)
+cl_program compileProgram(FILE*              fplog,
+                          const std::string& kernelRelativePath,
+                          const std::string& kernelBaseFilename,
+                          const std::string& extraDefines,
+                          cl_context         context,
+                          cl_device_id       deviceId,
+                          DeviceVendor       deviceVendor)
 {
-    cl_int      cl_error;
-    std::string kernelRootPath  = getSourceRootPath(kernelRelativePath);
-    std::string includeRootPath = getSourceRootPath("src/gromacs/gpu_utils");
+    cl_int cl_error;
+    // Let the kernel find include files from its module.
+    std::string kernelRootPath = getSourceRootPath(kernelRelativePath);
+    // Let the kernel find include files from other modules.
+    std::string rootPath = getSourceRootPath("");
 
     GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
 
     /* Load OpenCL source files */
-    std::string kernelFilename = Path::join(kernelRootPath,
-                                            kernelBaseFilename);
+    std::string kernelFilename = Path::join(kernelRootPath, kernelBaseFilename);
 
     /* Make the build options */
-    std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
-                                                              includeRootPath,
-                                                              getWarpSize(context, deviceId),
-                                                              deviceVendorId,
-                                                              extraDefines);
+    std::string preprocessorOptions = makePreprocessorOptions(
+            kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
 
-    bool        buildCacheWasRead = false;
+    bool buildCacheWasRead = false;
 
     std::string cacheFilename;
     if (useBuildCache)
@@ -450,15 +445,21 @@ compileProgram(FILE              *fplog,
                 program           = makeProgramFromCache(cacheFilename, context, deviceId);
                 buildCacheWasRead = true;
             }
-            catch (FileIOError &e)
+            catch (FileIOErrore)
             {
                 // Failing to read from the cache is not a critical error
                 formatExceptionMessageToFile(fplog, e);
             }
+            fprintf(fplog,
+                    "OpenCL binary cache file %s is present, will load kernels.\n",
+                    cacheFilename.c_str());
         }
         else
         {
-            fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
+            fprintf(fplog,
+                    "No OpenCL binary cache file was present for %s, so will compile kernels "
+                    "normally.\n",
+                    kernelBaseFilename.c_str());
         }
     }
     if (program == nullptr)
@@ -469,36 +470,30 @@ compileProgram(FILE              *fplog,
         {
             GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
         }
-        const char *kernelSourcePtr  = kernelSource.c_str();
+        const charkernelSourcePtr  = kernelSource.c_str();
         size_t      kernelSourceSize = kernelSource.size();
         /* Create program from source code */
-        program = clCreateProgramWithSource(context,
-                                            1,
-                                            &kernelSourcePtr,
-                                            &kernelSourceSize,
-                                            &cl_error);
+        program = clCreateProgramWithSource(context, 1, &kernelSourcePtr, &kernelSourceSize, &cl_error);
         if (cl_error != CL_SUCCESS)
         {
-            GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
+            GMX_THROW(InternalError("Could not create OpenCL program, error was "
+                                    + ocl_get_error_string(cl_error)));
         }
     }
 
     /* Build the OpenCL program, keeping the status to potentially
        write to the simulation log file. */
-    cl_int buildStatus = clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
+    cl_int buildStatus =
+            clBuildProgram(program, 0, nullptr, preprocessorOptions.c_str(), nullptr, nullptr);
 
     /* Write log first, and then throw exception that the user know what is
        the issue even if the build fails. */
-    writeOclBuildLog(fplog,
-                     program,
-                     deviceId,
-                     kernelFilename,
-                     preprocessorOptions,
-                     buildStatus != CL_SUCCESS);
+    writeOclBuildLog(fplog, program, deviceId, kernelFilename, preprocessorOptions, buildStatus != CL_SUCCESS);
 
     if (buildStatus != CL_SUCCESS)
     {
-        GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
+        GMX_THROW(InternalError("Could not build OpenCL program, error was "
+                                + ocl_get_error_string(buildStatus)));
     }
 
     if (useBuildCache)
@@ -511,14 +506,14 @@ compileProgram(FILE              *fplog,
             {
                 writeBinaryToCache(program, cacheFilename);
             }
-            catch (GromacsException &e)
+            catch (GromacsExceptione)
             {
                 // Failing to write the cache is not a critical error
                 formatExceptionMessageToFile(fplog, e);
             }
         }
     }
-    if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
     {
         /* If dumping intermediate files has been requested and this is an NVIDIA card
            => write PTX to file */
@@ -527,7 +522,8 @@ compileProgram(FILE              *fplog,
         cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, nullptr);
         if (cl_error != CL_SUCCESS)
         {
-            GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
+            GMX_THROW(InternalError("Could not get OpenCL device info, error was "
+                                    + ocl_get_error_string(cl_error)));
         }
         std::string ptxFilename = buffer;
         ptxFilename += ".ptx";
@@ -536,7 +532,7 @@ compileProgram(FILE              *fplog,
         {
             writeBinaryToCache(program, ptxFilename);
         }
-        catch (GromacsException &e)
+        catch (GromacsExceptione)
         {
             // Failing to write the cache is not a critical error
             formatExceptionMessageToFile(fplog, e);
@@ -546,5 +542,5 @@ compileProgram(FILE              *fplog,
     return program;
 }
 
-} // namespace
-} // namespace
+} // namespace ocl
+} // namespace gmx