Disable fastmath with OpenCL on Intel devices

[alexxy/gromacs.git] / src / gromacs / gpu_utils / ocl_compiler.cpp
diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp

index 07e71d2febc0ca1e90b91b89b5815de5a7162e4a..64e925b885036f74e01a9dd2049f2d790cbb5a9a 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.cpp
+++ b/src/gromacs/gpu_utils/ocl_compiler.cpp
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -80,7 +80,7 @@ static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
  
  /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
   *
- * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
+ * If \c fplog is non-null and either the \c GMX_OCL_DUMP_LOG environment
   * variable is set or the compilation failed, then the OpenCL
   * compilation log is written.
   *
@@ -89,7 +89,8 @@ static bool useBuildCache = getenv("GMX_OCL_GENCACHE") != nullptr;
   * \param deviceId            Id of the device for which compilation took place
   * \param kernelFilename      File name containing the kernel
   * \param preprocessorOptions String containing the preprocessor command-line options used for the
- * build \param buildFailed         Whether the OpenCL build succeeded
+ *                            build
+ * \param buildFailed         Whether the OpenCL build succeeded
   *
   * \throws std::bad_alloc if out of memory */
  static void writeOclBuildLog(FILE*              fplog,
@@ -126,8 +127,8 @@ static void writeOclBuildLog(FILE*              fplog,
          buildLogGuard.reset(buildLog);
  
          /* Get the actual compilation log */
-        cl_error = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize,
-                                         buildLog, nullptr);
+        cl_error = clGetProgramBuildInfo(
+                program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, nullptr);
          if (cl_error != CL_SUCCESS)
          {
              GMX_THROW(InternalError("Could not get OpenCL program build log, error was "
@@ -155,11 +156,11 @@ static void writeOclBuildLog(FILE*              fplog,
  
  /*! \brief Construct compiler options string
   *
- * \param deviceVendorId  Device vendor id. Used to
- *          automatically enable some vendor-specific options
+ * \param deviceVendor  Device vendor. Used to automatically enable some
+ *                      vendor-specific options.
   * \return The string with the compiler options
   */
-static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
+static std::string selectCompilerOptions(DeviceVendor deviceVendor)
  {
      std::string compilerOptions;
  
@@ -168,8 +169,9 @@ static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
          compilerOptions += " -cl-opt-disable";
      }
  
-    /* Fastmath imprves performance on all supported arch */
-    if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
+    /* Fastmath improves performance on all supported arch,
+     * but is tends to cause problems on Intel (Issue #3898) */
+    if ((deviceVendor != DeviceVendor::Intel) && (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr))
      {
          compilerOptions += " -cl-fast-relaxed-math";
  
@@ -179,12 +181,12 @@ static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
          compilerOptions += " -cl-denorms-are-zero";
      }
  
-    if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
+    if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
      {
          compilerOptions += " -cl-nv-verbose";
      }
  
-    if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
      {
          /* To dump OpenCL build intermediate files, caching must be off */
          if (!useBuildCache)
@@ -248,9 +250,8 @@ static std::string getSourceRootPath(const std::string& sourceRelativePath)
  size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId)
  {
      size_t warpSize = 0;
-    cl_int cl_error =
-            clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
-                                     sizeof(warpSize), &warpSize, nullptr);
+    cl_int cl_error = clGetKernelWorkGroupInfo(
+            kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(warpSize), &warpSize, nullptr);
      if (cl_error != CL_SUCCESS)
      {
          GMX_THROW(InternalError("Could not query OpenCL preferred workgroup size, error was "
@@ -309,21 +310,19 @@ size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId)
  
  /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
   *
- * \param[in] vendorId Vendor id enumerator
+ * \param[in] deviceVendor Vendor id enumerator
   *
   * \return The appropriate compilation-line define
   */
-static const char* makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
+static std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
  {
-    const char* choice;
-    switch (vendorId)
+    switch (deviceVendor)
      {
-        case OCL_VENDOR_AMD: choice = "-D_AMD_SOURCE_"; break;
-        case OCL_VENDOR_NVIDIA: choice = "-D_NVIDIA_SOURCE_"; break;
-        case OCL_VENDOR_INTEL: choice = "-D_INTEL_SOURCE_"; break;
-        default: choice = ""; break;
+        case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
+        case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
+        case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
+        default: return "";
      }
-    return choice;
  }
  
  /*! \brief Create include paths for kernel sources.
@@ -380,7 +379,7 @@ static void removeExtraSpaces(std::string* str)
  static std::string makePreprocessorOptions(const std::string& kernelRootPath,
                                             const std::string& includeRootPath,
                                             size_t             warpSize,
-                                           ocl_vendor_id_t    deviceVendorId,
+                                           DeviceVendor       deviceVendor,
                                             const std::string& extraDefines)
  {
      std::string preprocessorOptions;
@@ -388,11 +387,11 @@ static std::string makePreprocessorOptions(const std::string& kernelRootPath,
      /* Compose the complete build options */
      preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
      preprocessorOptions += ' ';
-    preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
+    preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
      preprocessorOptions += ' ';
      preprocessorOptions += extraDefines;
      preprocessorOptions += ' ';
-    preprocessorOptions += selectCompilerOptions(deviceVendorId);
+    preprocessorOptions += selectCompilerOptions(deviceVendor);
      preprocessorOptions += ' ';
      preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
      preprocessorOptions += ' ';
@@ -410,7 +409,7 @@ cl_program compileProgram(FILE*              fplog,
                            const std::string& extraDefines,
                            cl_context         context,
                            cl_device_id       deviceId,
-                          ocl_vendor_id_t    deviceVendorId)
+                          DeviceVendor       deviceVendor)
  {
      cl_int cl_error;
      // Let the kernel find include files from its module.
@@ -425,7 +424,7 @@ cl_program compileProgram(FILE*              fplog,
  
      /* Make the build options */
      std::string preprocessorOptions = makePreprocessorOptions(
-            kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendorId, extraDefines);
+            kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
  
      bool buildCacheWasRead = false;
  
@@ -452,11 +451,16 @@ cl_program compileProgram(FILE*              fplog,
                  // Failing to read from the cache is not a critical error
                  formatExceptionMessageToFile(fplog, e);
              }
+            fprintf(fplog,
+                    "OpenCL binary cache file %s is present, will load kernels.\n",
+                    cacheFilename.c_str());
          }
          else
          {
              fprintf(fplog,
-                    "No OpenCL binary cache file was present, so will compile kernels normally.\n");
+                    "No OpenCL binary cache file was present for %s, so will compile kernels "
+                    "normally.\n",
+                    kernelBaseFilename.c_str());
          }
      }
      if (program == nullptr)
@@ -485,8 +489,7 @@ cl_program compileProgram(FILE*              fplog,
  
      /* Write log first, and then throw exception that the user know what is
         the issue even if the build fails. */
-    writeOclBuildLog(fplog, program, deviceId, kernelFilename, preprocessorOptions,
-                     buildStatus != CL_SUCCESS);
+    writeOclBuildLog(fplog, program, deviceId, kernelFilename, preprocessorOptions, buildStatus != CL_SUCCESS);
  
      if (buildStatus != CL_SUCCESS)
      {
@@ -511,7 +514,7 @@ cl_program compileProgram(FILE*              fplog,
              }
          }
      }
-    if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
      {
          /* If dumping intermediate files has been requested and this is an NVIDIA card
             => write PTX to file */