Simplified and updated OpenCL compilation

author Mark Abraham <mark.j.abraham@gmail.com>

Fri, 5 Feb 2016 23:01:18 +0000 (23:01 +0000)

committer Erik Lindahl <erik.lindahl@gmail.com>

Sun, 29 May 2016 11:31:43 +0000 (13:31 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Fri, 5 Feb 2016 23:01:18 +0000 (23:01 +0000)
committer Erik Lindahl <erik.lindahl@gmail.com>
Sun, 29 May 2016 11:31:43 +0000 (13:31 +0200)
diff --git a/docs/user-guide/environment-variables.rst b/docs/user-guide/environment-variables.rst

index 3c95d2663a7f10d56ec40d5f9247b9cf56c935a9..b51e252b91c7412683196421acf895f0fbcc7da2 100644 (file)
--- a/docs/user-guide/environment-variables.rst
+++ b/docs/user-guide/environment-variables.rst
@@ -378,17 +378,9 @@ compilation of OpenCL kernels, but they are also used in device selection.
          the same will happen with the OpenCL version soon)
  
  ``GMX_OCL_DUMP_LOG``
-        If defined, the OpenCL build log is always written to file.
-        The file is saved in the current directory with the name
-        ``OpenCL_kernel_file_name.build_status`` where
-        ``OpenCL_kernel_file_name`` is the name of the file containing the
-        OpenCL source code (usually ``nbnxn_ocl_kernels.cl``) and
-        build_status can be either SUCCEEDED or FAILED. If this
-        environment variable is not defined, the default behavior is
-        the following:
-
-           - Debug build: build log is always written to file
-          - Release build: build log is written to file only in case of errors.
+        If defined, the OpenCL build log is always written to the
+        mdrun log file. Otherwise, the build log is written to the
+        log file only when an error occurs.
  
  ``GMX_OCL_VERBOSE``
          If defined, it enables verbose mode for OpenCL kernel build.
diff --git a/src/gromacs/gpu_utils/CMakeLists.txt b/src/gromacs/gpu_utils/CMakeLists.txt

index 090e227c834642a9a14096dc79e121fe20b759c6..00844e8996ec771bf9ccb7d59e2a8ebfa68d9c46 100644 (file)
--- a/src/gromacs/gpu_utils/CMakeLists.txt
+++ b/src/gromacs/gpu_utils/CMakeLists.txt
@@ -1,7 +1,7 @@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
-# Copyright (c) 2015, by the GROMACS development team, led by
+# Copyright (c) 2015,2016, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
@@ -33,7 +33,7 @@
  # the research papers on the package. Check out http://www.gromacs.org.
  
  if (GMX_USE_OPENCL)
-    gmx_add_libgromacs_sources(gpu_utils_ocl.cpp ocl_compiler.cpp oclutils.cpp)
+    gmx_add_libgromacs_sources(gpu_utils_ocl.cpp ocl_compiler.cpp ocl_caching.cpp oclutils.cpp)
  endif()
  if (GMX_USE_CUDA)
      gmx_add_libgromacs_sources(cudautils.cu gpu_utils.cu pmalloc_cuda.cu)
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp

index 3cd6cf0941e7a50819b59cb1dcddc66bc59622d9..0a672bc2a69a08be0f36d6e26eb4b219b6182152 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -142,7 +142,7 @@ static int is_gmx_supported_gpu_id(struct gmx_device_info_t *ocl_gpu_device)
   *  \param[in] vendor_name String with OpenCL vendor name.
   *  \returns               ocl_vendor_id_t value for the input vendor_name
   */
-ocl_vendor_id_t get_vendor_id(char *vendor_name)
+static ocl_vendor_id_t get_vendor_id(char *vendor_name)
  {
      if (vendor_name)
      {
diff --git a/src/gromacs/gpu_utils/ocl_caching.cpp b/src/gromacs/gpu_utils/ocl_caching.cpp

new file mode 100644 (file)

index 0000000..88cc863
--- /dev/null
+++ b/src/gromacs/gpu_utils/ocl_caching.cpp
@@ -0,0 +1,178 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *  \brief Define infrastructure for OpenCL JIT compilation for Gromacs
+ *
+ *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
+ *  \author Anca Hamuraru <anca@streamcomputing.eu>
+ *  \author Teemu Virolainen <teemu@streamcomputing.eu>
+ *  \author Mark Abraham <mark.j.abraham@gmail.com>
+ */
+
+#include "gmxpre.h"
+
+#include "ocl_caching.h"
+
+#include <cstdio>
+
+#include <string>
+#include <vector>
+
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/programcontext.h"
+#include "gromacs/utility/scoped_cptr.h"
+#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textreader.h"
+
+namespace gmx
+{
+namespace ocl
+{
+
+/*! \brief RAII helper to use with scoped_cptr
+ *
+ * Can't use fclose because the template requires a function that
+ * returns void.
+ *
+ * \todo Either generalise scoped_cptr somehow, or (better) make
+ * general infrastructure for reading and writing binary lumps.
+ * Neither of these is a priority while JIT caching is inactive.
+ */
+static void fclose_wrapper(FILE *fp)
+{
+    fclose(fp);
+}
+
+std::string makeBinaryCacheFilename(const std::string &kernelFilename,
+                                    cl_device_id       deviceId)
+{
+    char   deviceName[1024];
+    cl_int cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
+    if (cl_error != CL_SUCCESS)
+    {
+        GMX_THROW(InternalError(formatString("Could not get OpenCL device name, error was %s", ocl_get_error_string(cl_error).c_str())));
+    }
+    std::string cacheFilename = "OpenCL_cache_" + kernelFilename + "_" + deviceName;
+    cacheFilename  = replaceAll(cacheFilename, ".", "_");
+    cacheFilename += ".bin";
+    return cacheFilename;
+}
+
+cl_program
+makeProgramFromCache(const std::string &filename,
+                     cl_context         context,
+                     cl_device_id       deviceId)
+{
+    // TODO all this file reading stuff should become gmx::BinaryReader
+    FILE *f = fopen(filename.c_str(), "rb");
+    scoped_cptr<FILE, fclose_wrapper> fileGuard(f);
+    if (!f)
+    {
+        GMX_THROW(FileIOError("Failed to open binary cache file " + filename));
+    }
+
+    // TODO more stdio error handling
+    fseek(f, 0, SEEK_END);
+    unsigned char             *binary;
+    scoped_cptr<unsigned char> binaryGuard;
+    size_t                     fileSize = ftell(f);
+    snew(binary, fileSize);
+    binaryGuard.reset(binary);
+    fseek(f, 0, SEEK_SET);
+    size_t readCount = fread(binary, 1, fileSize, f);
+
+    if (readCount != fileSize)
+    {
+        GMX_THROW(FileIOError("Failed to read binary cache file " + filename));
+    }
+
+    /* TODO If/when caching is re-enabled, compare current build
+     * options and code against the build options and the code
+     * corresponding to the cache. If any change is detected then the
+     * cache cannot be used.
+     *
+     * Also caching functionality will need full re-testing. */
+
+    /* Create program from pre-built binary */
+    cl_int     cl_error;
+    cl_program program = clCreateProgramWithBinary(context,
+                                                   1,
+                                                   &deviceId,
+                                                   &fileSize,
+                                                   const_cast<const unsigned char **>(&binary),
+                                                   NULL,
+                                                   &cl_error);
+    if (cl_error != CL_SUCCESS)
+    {
+        GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
+    }
+
+    return program;
+}
+
+void
+writeBinaryToCache(cl_program program, const std::string &filename)
+{
+    size_t fileSize;
+    cl_int cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(fileSize), &fileSize, NULL);
+    if (cl_error != CL_SUCCESS)
+    {
+        GMX_THROW(InternalError("Could not get OpenCL program binary size, error was " + ocl_get_error_string(cl_error)));
+    }
+
+    // TODO all this file writing stuff should become gmx::BinaryWriter
+    unsigned char             *binary;
+    snew(binary, fileSize);
+    scoped_cptr<unsigned char> binaryGuard(binary);
+
+    cl_error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL);
+    if (cl_error != CL_SUCCESS)
+    {
+        GMX_THROW(InternalError("Could not get OpenCL program binary, error was " + ocl_get_error_string(cl_error)));
+    }
+
+    FILE *f = fopen(filename.c_str(), "wb");
+    scoped_cptr<FILE, fclose_wrapper> fileGuard(f);
+    if (!f)
+    {
+        GMX_THROW(FileIOError("Failed to open binary cache file " + filename));
+    }
+
+    fwrite(binary, 1, fileSize, f);
+}
+
+} // namespace
+} // namespace
diff --git a/src/gromacs/gpu_utils/ocl_caching.h b/src/gromacs/gpu_utils/ocl_caching.h

new file mode 100644 (file)

index 0000000..47ee8f6
--- /dev/null
+++ b/src/gromacs/gpu_utils/ocl_caching.h
@@ -0,0 +1,105 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *  \brief Declare infrastructure for managing caching of OpenCL
+ *  JIT-ted binaries
+ *
+ *  This functionality is currently disabled in compileProgram()
+ *
+ *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
+ *  \author Anca Hamuraru <anca@streamcomputing.eu>
+ *  \author Teemu Virolainen <teemu@streamcomputing.eu>
+ *  \author Mark Abraham <mark.j.abraham@gmail.com>
+ */
+#ifndef GMX_GPU_UTILS_OCL_CACHING_H
+#define GMX_GPU_UTILS_OCL_CACHING_H
+
+#include <string>
+
+#include "gromacs/gpu_utils/oclutils.h"
+
+namespace gmx
+{
+namespace ocl
+{
+
+/*! \brief Construct the name for the binary cache file
+ *
+ * \param[in]  kernelFilename  Name of the kernel from which the binary will be compiled.
+ * \param[in]  deviceId        ID of the device upon which the binary is used.
+ *
+ * \todo The set of preprocessor options should also form part of the
+ * identification of the cached binary. Also perhaps compiler, runtime
+ * and device version info?
+ *
+ * \todo Mutual exclusion of ranks and nodes should also be implemented
+ * if/when caching is re-enabled.
+ *
+ * \returns The name of the cache file.
+ */
+std::string makeBinaryCacheFilename(const std::string &kernelFilename,
+                                    cl_device_id       deviceId);
+
+/*! \brief Check if there's a valid cache available, and return it if so
+ *
+ * \param[in]  filename   Name of valid file containing the binary cache
+ * \param[in]  context    The OpenCL context
+ * \param[in]  deviceId   The ID of the device on which to use the program
+ *
+ * \returns The OpenCL program read from the cache
+ *
+ * \throws InternalError  if an OpenCL error was encountered
+ *         FileIOError    if the file could not be opened
+ */
+cl_program
+makeProgramFromCache(const std::string &filename,
+                     cl_context         context,
+                     cl_device_id       deviceId);
+
+/*! \brief Implement caching of OpenCL binaries
+ *
+ * \param[in] program     Index of program to cache
+ * \param[in] filename    Name of file to use for the cache
+ *
+ * \throws InternalError  if an OpenCL error was encountered
+ *         FileIOError    if the file could not be opened
+ */
+void
+writeBinaryToCache(cl_program program, const std::string &filename);
+
+} // namespace
+} // namespace
+
+#endif
diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp

index 6a4772aee3ab13f1e0380614d91836c4cb5f62b3..e87dd7034ed81b0833674b35f611d84b1afc97d0 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.cpp
+++ b/src/gromacs/gpu_utils/ocl_compiler.cpp
@@ -38,10 +38,7 @@
   *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
   *  \author Anca Hamuraru <anca@streamcomputing.eu>
   *  \author Teemu Virolainen <teemu@streamcomputing.eu>
- *
- * TODO Currently this file handles compilation of NBNXN kernels,
- * but e.g. organizing the defines for various physics models
- * is leaking in here a bit.
+ *  \author Mark Abraham <mark.j.abraham@gmail.com>
   */
  
  #include "gmxpre.h"
@@ -50,275 +47,157 @@
  
  #include "config.h"
  
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <cstdio>
  
  #include <string>
+#include <vector>
  
+#include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/path.h"
  #include "gromacs/utility/programcontext.h"
+#include "gromacs/utility/scoped_cptr.h"
+#include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/textreader.h"
  
-/*! \brief Path separator
- */
-#define SEPARATOR '/'
-
-
-/*! \brief True if OpenCL binary caching is on.
- *  Currently caching is disabled until we resolve concurrency issues. */
-static bool bCacheOclBuild = false;
-/* bCacheOclBuild = (NULL == getenv("GMX_OCL_NOGENCACHE"));*/
-
-/*! \brief Compiler options index
- */
-typedef enum {
-    b_invalid_option          = 0,
-    b_amd_cpp,
-    b_nvidia_verbose,
-    b_generic_cl11,
-    b_generic_cl12,
-    b_generic_fast_relaxed_math,
-    b_generic_noopt_compilation,
-    b_generic_debug_symbols,
-    b_amd_dump_temp_files,
-    b_include_install_opencl_dir,
-    b_include_source_opencl_dirs,
-    b_num_build_options
-} build_options_index_t;
-
-/*! \brief List of available OpenCL compiler options
- */
-static const char* build_options_list[] = {
-    "",
-    "-x clc++",                         /**< AMD C++ extension */
-    "-cl-nv-verbose",                   /**< Nvidia verbose build log */
-    "-cl-std=CL1.1",                    /**< Force CL 1.1  */
-    "-cl-std=CL1.2",                    /**< Force CL 1.2  */
-    "-cl-fast-relaxed-math",            /**< Fast math */
-    "-cl-opt-disable",                  /**< Disable optimisations */
-    "-g",                               /**< Debug symbols */
-    "-save-temps"                       /**< AMD option to dump intermediate temporary
-                                             files such as IL or ISA code */
-};
+#include "ocl_caching.h"
  
-/*! \brief Available sources
- */
-static const char * kernel_filenames[] = {"nbnxn_ocl_kernels.cl"};
-
-/*! \brief Defines to enable specific kernels based on vendor
- */
-static const char * kernel_vendor_spec_definitions[] = {
-    "-D_WARPLESS_SOURCE_",     /**< nbnxn_ocl_kernel_nowarp.clh  */
-    "-D_NVIDIA_SOURCE_",       /**< nbnxn_ocl_kernel_nvidia.clh  */
-    "-D_AMD_SOURCE_"           /**< nbnxn_ocl_kernel_amd.clh     */
-};
-
-
-/*! \brief Get the string of a build option of the specific id
- * \param  build_option_id  The option id as defines in the header
- * \return String containing the actual build option string for the compiler
- */
-static const char* get_ocl_build_option(build_options_index_t build_option_id)
+namespace gmx
  {
-    if (build_option_id < b_num_build_options)
-    {
-        return build_options_list[build_option_id];
-    }
-    else
-    {
-        return build_options_list[b_invalid_option];
-    }
-}
-
-/*! \brief Get the size of the string (without null termination) required
- *  for the build option of the specific id
- * \param  build_option_id  The option id as defines in the header
- * \return size_t containing the size in bytes of the build option string
- */
-static size_t get_ocl_build_option_length(build_options_index_t build_option_id)
+namespace ocl
  {
  
-    if (build_option_id < b_num_build_options)
-    {
-        return strlen(build_options_list[build_option_id]);
-    }
-    else
-    {
-        return strlen(build_options_list[b_invalid_option]);
-    }
-}
+/*! \brief True if OpenCL binary caching is enabled.
+ *
+ *  Currently caching is disabled until we resolve concurrency issues. */
+static bool useBuildCache = false; // (NULL == getenv("GMX_OCL_NOGENCACHE"));
  
-/*! \brief Get the size of final composed build options literal
+/*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
   *
- * \param build_device_vendor_id  Device vendor id. Used to
- *          automatically enable some vendor specific options
- * \param custom_build_options_prepend Prepend options string
- * \param custom_build_options_append  Append  options string
- * \return size_t containing the size in bytes of the composed
- *             build options string including null termination
- */
-static size_t
-create_ocl_build_options_length(
-        ocl_vendor_id_t build_device_vendor_id,
-        const char *    custom_build_options_prepend,
-        const char *    custom_build_options_append)
+ * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
+ * variable is set or the compilation failed, then the OpenCL
+ * compilation log is written.
+ *
+ * \param fplog               Open file pointer to log file
+ * \param program             OpenCL program that was compiled
+ * \param deviceId            Id of the device for which compilation took place
+ * \param kernelFilename      File name containing the kernel
+ * \param preprocessorOptions String containing the preprocessor command-line options used for the build
+ * \param buildFailed         Whether the OpenCL build succeeded
+ *
+ * \throws std::bad_alloc if out of memory */
+static void
+writeOclBuildLog(FILE              *fplog,
+                 cl_program         program,
+                 cl_device_id       deviceId,
+                 const std::string &kernelFilename,
+                 const std::string &preprocessorOptions,
+                 bool               buildFailed)
  {
-    size_t build_options_length = 0;
-    size_t whitespace           = 1;
+    bool writeOutput = ((fplog != nullptr) &&
+                        (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
  
-    assert(build_device_vendor_id <= OCL_VENDOR_UNKNOWN);
-
-    if (custom_build_options_prepend)
+    if (!writeOutput)
      {
-        build_options_length +=
-            strlen(custom_build_options_prepend)+whitespace;
+        return;
      }
  
-    if ( (build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DEBUG") && getenv("GMX_OCL_FORCE_CPU") )
+    // Get build log string size
+    size_t buildLogSize;
+    cl_int cl_error = clGetProgramBuildInfo(program,
+                                            deviceId,
+                                            CL_PROGRAM_BUILD_LOG,
+                                            0,
+                                            NULL,
+                                            &buildLogSize);
+    if (cl_error != CL_SUCCESS)
      {
-        build_options_length += get_ocl_build_option_length(b_generic_debug_symbols)+whitespace;
+        GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
      }
  
-    if (getenv("GMX_OCL_NOOPT"))
+    char             *buildLog = nullptr;
+    scoped_cptr<char> buildLogGuard;
+    if (buildLogSize != 0)
      {
-        build_options_length +=
-            get_ocl_build_option_length(b_generic_noopt_compilation)+whitespace;
-    }
+        /* Allocate memory to fit the build log,
+           it can be very large in case of errors */
+        snew(buildLog, buildLogSize);
+        buildLogGuard.reset(buildLog);
  
-    if (getenv("GMX_OCL_FASTMATH"))
-    {
-        build_options_length +=
-            get_ocl_build_option_length(b_generic_fast_relaxed_math)+whitespace;
-    }
-
-    if ((build_device_vendor_id == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
-    {
-        build_options_length +=
-            get_ocl_build_option_length(b_nvidia_verbose) + whitespace;
-    }
-
-    if ((build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
-    {
-        /* To dump OpenCL build intermediate files, caching must be off */
-        if (bCacheOclBuild)
+        /* Get the actual compilation log */
+        cl_error = clGetProgramBuildInfo(program,
+                                         deviceId,
+                                         CL_PROGRAM_BUILD_LOG,
+                                         buildLogSize,
+                                         buildLog,
+                                         NULL);
+        if (cl_error != CL_SUCCESS)
          {
-            build_options_length +=
-                get_ocl_build_option_length(b_amd_dump_temp_files) + whitespace;
+            GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
          }
      }
  
-    if (custom_build_options_append)
+    std::string message;
+    if (buildFailed)
      {
-        build_options_length +=
-            strlen(custom_build_options_append)+whitespace;
+        message += "Compilation of source file " + kernelFilename + " failed!\n";
      }
+    else
+    {
+        message += "Compilation of source file " + kernelFilename + " was successful!\n";
+    }
+    message += "-- Used build options: " + preprocessorOptions + "\n";
+    message += "--------------LOG START---------------\n";
+    message += buildLog;
+    message += "---------------LOG END----------------\n";;
  
-    return build_options_length+1;
+    fputs(message.c_str(), fplog);
  }
  
-/*! \brief Get the size of final composed build options literal
+/*! \brief Construct compiler options string
   *
- * \param build_options_string The string where to save the
- *                                  resulting build options in
- * \param build_options_length The size of the build options
- * \param build_device_vendor_id  Device vendor id. Used to
- *          automatically enable some vendor specific options
- * \param custom_build_options_prepend Prepend options string
- * \param custom_build_options_append  Append  options string
- * \return The string build_options_string with the build options
+ * \param deviceVendorId  Device vendor id. Used to
+ *          automatically enable some vendor-specific options
+ * \return The string with the compiler options
   */
-static char *
-create_ocl_build_options(
-        char *             build_options_string,
-        size_t gmx_unused  build_options_length,
-        ocl_vendor_id_t    build_device_vendor_id,
-        const char *       custom_build_options_prepend,
-        const char *       custom_build_options_append)
+static std::string
+selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
  {
-    size_t char_added = 0;
-
-    if (custom_build_options_prepend)
-    {
-        strncpy( build_options_string+char_added,
-                 custom_build_options_prepend,
-                 strlen(custom_build_options_prepend));
-
-        char_added += strlen(custom_build_options_prepend);
-        build_options_string[char_added++] = ' ';
-    }
+    std::string compilerOptions;
  
      if (getenv("GMX_OCL_NOOPT") )
      {
-        strncpy( build_options_string+char_added,
-                 get_ocl_build_option(b_generic_noopt_compilation),
-                 get_ocl_build_option_length(b_generic_noopt_compilation) );
-
-        char_added += get_ocl_build_option_length(b_generic_noopt_compilation);
-        build_options_string[char_added++] = ' ';
-
+        compilerOptions += " -cl-opt-disable";
      }
  
      if (getenv("GMX_OCL_FASTMATH") )
      {
-        strncpy( build_options_string+char_added,
-                 get_ocl_build_option(b_generic_fast_relaxed_math),
-                 get_ocl_build_option_length(b_generic_fast_relaxed_math) );
-
-        char_added += get_ocl_build_option_length(b_generic_fast_relaxed_math);
-        build_options_string[char_added++] = ' ';
+        compilerOptions += " -cl-fast-relaxed-math";
      }
  
-    if ((build_device_vendor_id == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
+    if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
      {
-        strncpy(build_options_string + char_added,
-                get_ocl_build_option(b_nvidia_verbose),
-                get_ocl_build_option_length(b_nvidia_verbose));
-
-        char_added += get_ocl_build_option_length(b_nvidia_verbose);
-        build_options_string[char_added++] = ' ';
+        compilerOptions += " -cl-nv-verbose";
      }
  
-    if ((build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
      {
          /* To dump OpenCL build intermediate files, caching must be off */
-        if (NULL != getenv("GMX_OCL_NOGENCACHE"))
+        if (!useBuildCache)
          {
-            strncpy(build_options_string + char_added,
-                    get_ocl_build_option(b_amd_dump_temp_files),
-                    get_ocl_build_option_length(b_amd_dump_temp_files));
-
-            char_added += get_ocl_build_option_length(b_amd_dump_temp_files);
-            build_options_string[char_added++] = ' ';
+            compilerOptions += " -save-temps";
          }
      }
  
-    if ( ( build_device_vendor_id == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG") && getenv("GMX_OCL_FORCE_CPU"))
-    {
-        strncpy( build_options_string+char_added,
-                 get_ocl_build_option(b_generic_debug_symbols),
-                 get_ocl_build_option_length(b_generic_debug_symbols) );
-
-        char_added += get_ocl_build_option_length(b_generic_debug_symbols);
-        build_options_string[char_added++] = ' ';
-    }
-
-    if (custom_build_options_append)
+    if ( ( deviceVendorId == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG"))
      {
-        strncpy( build_options_string+char_added,
-                 custom_build_options_append,
-                 strlen(custom_build_options_append) );
-
-        char_added += strlen(custom_build_options_append);
-        build_options_string[char_added++] = ' ';
+        compilerOptions += " -g";
      }
  
-    build_options_string[char_added++] = '\0';
-
-    assert(char_added == build_options_length);
-
-    return build_options_string;
+    return compilerOptions;
  }
  
  /*! \brief Get the path to the main folder storing OpenCL kernels.
@@ -330,753 +209,318 @@ create_ocl_build_options(
   *
   * \return OS-normalized path string to the main folder storing OpenCL kernels
   *
- * \throws std::bad_alloc if out of memory.
+ * \throws std::bad_alloc    if out of memory.
+ *         FileIOError  if GMX_OCL_FILE_PATH does not specify a readable path
   */
  static std::string
-get_ocl_root_path()
+getKernelRootPath()
  {
-    const char *gmx_ocl_file_path;
-    std::string ocl_root_path;
-
+    std::string kernelRootPath;
      /* Use GMX_OCL_FILE_PATH if the user has defined it */
-    gmx_ocl_file_path = getenv("GMX_OCL_FILE_PATH");
+    const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
  
-    if (!gmx_ocl_file_path)
+    if (gmxOclFilePath == nullptr)
      {
          /* Normal way of getting ocl_root_dir. First get the right
             root path from the path to the binary that is running. */
-        gmx::InstallationPrefixInfo info           = gmx::getProgramContext().installationPrefix();
+        InstallationPrefixInfo      info           = getProgramContext().installationPrefix();
          std::string                 dataPathSuffix = (info.bSourceLayout ?
                                                        "src/gromacs/mdlib/nbnxn_ocl" :
                                                        OCL_INSTALL_DIR);
-        ocl_root_path = gmx::Path::join(info.path, dataPathSuffix);
+        kernelRootPath = Path::join(info.path, dataPathSuffix);
      }
      else
      {
-        ocl_root_path = gmx_ocl_file_path;
+        if (!Directory::exists(gmxOclFilePath))
+        {
+            GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
+                                               "kernels are found, but '%s' does not exist", gmxOclFilePath)));
+        }
+        kernelRootPath = gmxOclFilePath;
      }
  
      // Make sure we return an OS-correct path format
-    return gmx::Path::normalize(ocl_root_path);
+    return Path::normalize(kernelRootPath);
  }
  
-/*! \brief Get the size of the full kernel source file path and name
+/*!  \brief Get the warp size reported by device
   *
- * The following full path size is computed:
- * strlen(ocl_root_path) + strlen(kernel_id.cl) + separator + null term
+ *  This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
+ *  Nvidia reports 32, AMD for GPU 64. Ignore the rest
   *
- * \param kernel_src_id Id of the kernel source (auto,nvidia,amd,nowarp)
- * \return Size in bytes of the full kernel source file path and name including
- *          separators and null termination
+ *  \param  context   Current OpenCL context
+ *  \param  deviceId OpenCL device with the context
+ *  \return cl_int value of the warp size
   *
- * \throws std::bad_alloc if out of memory */
+ * \throws InternalError if an OpenCL error was encountered
+ */
  static size_t
-get_ocl_kernel_source_file_info(kernel_source_index_t kernel_src_id)
+getWarpSize(cl_context context, cl_device_id deviceId)
  {
-    std::string ocl_root_path = get_ocl_root_path();
-
-    if (ocl_root_path.empty())
+    cl_int      cl_error;
+    const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
+    cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error);
+    if (cl_error != CL_SUCCESS)
      {
-        return 0;
+        GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
      }
  
-    return (ocl_root_path.length() +                    /* Path to the main OpenCL folder*/
-            1 +                                         /* Separator */
-            strlen(kernel_filenames[kernel_src_id]) +   /* Kernel source file name */
-            1                                           /* null char */
-            );
-}
-
-/*! \brief Compose and the full path and name of the kernel src to be used
- *
- * \param ocl_kernel_filename   String where the full path and name will be saved
- * \param kernel_src_id         Id of the kernel source (default)
- * \param kernel_filename_len   Size of the full path and name string, as computed by get_ocl_kernel_source_file_info()
- * \return The ocl_kernel_filename complete with the full path and name; NULL if error.
- *
- * \throws std::bad_alloc if out of memory */
-static char *
-get_ocl_kernel_source_path(
-        char *                  ocl_kernel_filename,
-        kernel_source_index_t   kernel_src_id,
-        size_t gmx_unused       kernel_filename_len)
-{
-    std::string ocl_root_path;
-
-    assert(kernel_filename_len != 0);
-    assert(ocl_kernel_filename != NULL);
-
-    ocl_root_path = get_ocl_root_path();
-    if (ocl_root_path.empty())
-    {
-        return NULL;
-    }
-
-    size_t chars_copied = 0;
-    strncpy(ocl_kernel_filename, ocl_root_path.c_str(), ocl_root_path.length());
-    chars_copied += ocl_root_path.length();
-
-    ocl_kernel_filename[chars_copied++] = SEPARATOR;
-
-    strncpy(&ocl_kernel_filename[chars_copied],
-            kernel_filenames[kernel_src_id],
-            strlen(kernel_filenames[kernel_src_id]) );
-    chars_copied += strlen(kernel_filenames[kernel_src_id]);
-
-    ocl_kernel_filename[chars_copied++] = '\0';
-
-    assert(chars_copied == kernel_filename_len);
-
-    return ocl_kernel_filename;
-}
-
-/* Undefine the separators */
-#undef SEPARATOR
-
-/*! \brief Loads the src inside the file filename onto a string in memory
- *
- * \param filename The name of the file to be read
- * \param p_source_length Pointer to the size of the source in bytes
- *                          (without null termination)
- * \return A string with the contents of the file with name filename,
- *  or NULL if there was a problem opening/reading the file
- */
-static char*
-load_ocl_source(const char* filename, size_t* p_source_length)
-{
-    FILE * filestream = NULL;
-    char * ocl_source;
-    size_t source_length;
-
-    source_length = 0;
-
-    if (!filename)
+    cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (cl_error != CL_SUCCESS)
      {
-        return NULL;
+        GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
      }
  
-    filestream    = fopen(filename, "rb");
-    if (!filestream)
+    cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
+    if (cl_error != CL_SUCCESS)
      {
-        return NULL;
+        GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
      }
  
-    fseek(filestream, 0, SEEK_END);
-    source_length = ftell(filestream);
-    fseek(filestream, 0, SEEK_SET);
-
-    ocl_source = (char*)malloc(source_length + 1);
-    if (fread(ocl_source, source_length, 1, filestream) != 1)
+    size_t warpSize = 0;
+    cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                        sizeof(warpSize), &warpSize, NULL);
+    if (cl_error != CL_SUCCESS)
      {
-        fclose(filestream);
-        free(ocl_source);
-        return 0;
+        GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
      }
-
-    fclose(filestream);
-    ocl_source[source_length] = '\0';
-
-    *p_source_length = source_length;
-    return ocl_source;
-}
-
-/*! \brief Handles the dumping of the OpenCL JIT compilation log
- *
- * In a debug build:
- *  -Success: Save to file kernel_id.SUCCEEDED in the run folder.
- *  -Fail   : Save to file kernel_id.FAILED in the run folder.
- *            Dump to stderr
- * In a release build:
- *  -Success: Nothing is logged.
- *  -Fail   : Save to a file kernel_id.FAILED in the run folder.
- * If GMX_OCL_DUMP_LOG is set, log is always dumped to file
- * If OCL_JIT_DUMP_STDERR is set, log is always dumped to stderr
- *
- * \param build_log String containing the OpenCL JIT compilation log
- * \param build_options_string String containing the options used for the build
- * \param build_status The OpenCL type status of the build (CL_SUCCESS etc)
- * \param kernel_src_id The id of the kernel src used for the build (default)
- *
- * \throws std::bad_alloc if out of memory */
-static void
-handle_ocl_build_log(
-        const char        *   build_log,
-        const char        *   build_options_string,
-        cl_int                build_status,
-        kernel_source_index_t kernel_src_id)
-{
-    bool dumpStdErr = false;
-    bool dumpFile;
-#ifdef NDEBUG
-    dumpFile   = (build_status != CL_SUCCESS);
-#else
-    dumpFile   = true;
-    if (build_status != CL_SUCCESS)
+    if (warpSize == 0)
      {
-        dumpStdErr = true;
+        GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
      }
-#endif
  
-    /* Override default handling */
-    if (getenv("GMX_OCL_DUMP_LOG") != NULL)
+    cl_error = clReleaseKernel(kernel);
+    if (cl_error != CL_SUCCESS)
      {
-        dumpFile = true;
+        GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
      }
-    if (getenv("OCL_JIT_DUMP_STDERR") != NULL)
+    cl_error = clReleaseProgram(program);
+    if (cl_error != CL_SUCCESS)
      {
-        dumpStdErr = true;
+        GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
      }
  
-    if (dumpFile || dumpStdErr)
-    {
-        FILE       *build_log_file       = NULL;
-        const char *fail_header          = "Compilation of source file failed! \n";
-        const char *success_header       = "Compilation of source file was successful! \n";
-        const char *log_header           = "--------------LOG START---------------\n";
-        const char *log_footer           = "---------------LOG END----------------\n";
-        char       *build_info;
-        std::string log_fname;
-
-        build_info = (char*)malloc(32 + strlen(build_options_string) );
-        sprintf(build_info, "-- Used build options: %s\n", build_options_string);
-
-        if (dumpFile)
-        {
-            log_fname = gmx::formatString("%s.%s", kernel_filenames[kernel_src_id],
-                                          (build_status == CL_SUCCESS) ? "SUCCEEDED" : "FAILED");
-            build_log_file = fopen(log_fname.c_str(), "w");
-        }
-
-        size_t complete_message_size = 0;
-        char * complete_message;
-
-
-        complete_message_size  =  (build_status == CL_SUCCESS) ? strlen(success_header) : strlen(fail_header);
-        complete_message_size += strlen(build_info) + strlen(log_header) + strlen(log_footer);
-        complete_message_size += strlen(build_log);
-        complete_message_size += 1; //null termination
-        complete_message       = (char*)malloc(complete_message_size);
-
-        sprintf(complete_message, "%s%s%s%s%s",
-                (build_status == CL_SUCCESS) ? success_header : fail_header,
-                build_info,
-                log_header,
-                build_log,
-                log_footer);
-
-        if (dumpFile)
-        {
-            if (build_log_file)
-            {
-                fprintf(build_log_file, "%s", complete_message);
-            }
-
-            printf("The OpenCL compilation log has been saved in \"%s\"\n", log_fname.c_str());
-        }
-        if (dumpStdErr)
-        {
-            if (build_status != CL_SUCCESS)
-            {
-                fprintf(stderr, "%s", complete_message);
-            }
-        }
-        if (build_log_file)
-        {
-            fclose(build_log_file);
-        }
-
-        free(complete_message);
-        free(build_info);
-    }
+    return warpSize;
  }
  
-/*!  \brief Get the warp size reported by device
+/*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
   *
- *  This is platform implementation dependant and seems to only work on the Nvidia and Amd platforms!
- *  Nvidia reports 32, Amd for GPU 64. Ignore the rest
+ * \param[in] vendorId Vendor id enumerator
   *
- *  \param  context   Current OpenCL context
- *  \param  device_id OpenCL device with the context
- *  \return cl_int value of the warp size
+ * \return The appropriate compilation-line define
   */
-static cl_int
-ocl_get_warp_size(cl_context context, cl_device_id device_id)
-{
-    cl_int      cl_error     = CL_SUCCESS;
-    size_t      warp_size    = 0;
-    const char *dummy_kernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
-
-    cl_program  program =
-        clCreateProgramWithSource(context, 1, (const char**)&dummy_kernel, NULL, &cl_error);
-
-    cl_error =
-        clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
-
-    cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
-
-    cl_error = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
-                                        sizeof(size_t), &warp_size, NULL);
-
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-
-    assert(warp_size != 0);
-    assert(cl_error == CL_SUCCESS);
-    return warp_size;
-
-}
-
-/*! \brief Automatically select vendor-specific kernel from vendor id
- *
- * \param vendor_id Vendor id enumerator (amd,nvidia,intel,unknown)
- * \return Vendor-specific kernel version
- */
-static kernel_vendor_spec_t
-ocl_autoselect_kernel_from_vendor(ocl_vendor_id_t vendor_id)
+static const char *
+makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
  {
-    kernel_vendor_spec_t kernel_vendor;
-#ifndef NDEBUG
-    printf("Selecting kernel source automatically\n");
-#endif
-    switch (vendor_id)
+    const char *choice;
+    switch (vendorId)
      {
          case OCL_VENDOR_AMD:
-            kernel_vendor = amd_vendor_kernels;
-            printf("Selecting kernel for AMD\n");
+            choice = "-D_AMD_SOURCE_";
              break;
          case OCL_VENDOR_NVIDIA:
-            kernel_vendor = nvidia_vendor_kernels;
-            printf("Selecting kernel for NVIDIA\n");
+            choice = "-D_NVIDIA_SOURCE_";
              break;
          default:
-            kernel_vendor = generic_vendor_kernels;
-            printf("Selecting generic kernel\n");
+            choice = "-D_WARPLESS_SOURCE_";
              break;
      }
-    return kernel_vendor;
-}
-
-/*! \brief Returns the compiler define string needed to activate vendor-specific kernels
- *
- * \param kernel_spec Kernel vendor specification
- * \return String with the define for the spec
- */
-static const char *
-ocl_get_vendor_specific_define(kernel_vendor_spec_t kernel_spec)
-{
-    assert(kernel_spec < auto_vendor_kernels );
-#ifndef NDEBUG
-    printf("Setting up kernel vendor spec definitions:  %s \n", kernel_vendor_spec_definitions[kernel_spec]);
-#endif
-    return kernel_vendor_spec_definitions[kernel_spec];
+    return choice;
  }
  
-/*! \brief Check if there's a valid cache available, and return it if so
+/*! \brief Create include paths for kernel sources.
   *
- * \param[in]  ocl_binary_filename   Name of file containing the binary cache
- * \param[in]  build_options_string  Compiler command-line options to use (currently unused)
- * \param[in]  ocl_source            NULL-terminated string of OpenCL source code (currently unused)
- * \param[out] ocl_binary_size       Size of the binary file once loaded in memory
- * \param[out] ocl_binary            Pointer to the binary file bytes (valid only if return is true)
- * \return                           Whether the file reading was successful
+ * All OpenCL kernel files are expected to be stored in one single folder.
   *
- * \todo Compare current build options and code against the build
- * options and the code corresponding to the cache. If any change is
- * detected this function must return false.
+ * \throws std::bad_alloc  if out of memory.
   */
-bool
-check_ocl_cache(char            *ocl_binary_filename,
-                char gmx_unused *build_options_string,
-                char gmx_unused *ocl_source,
-                size_t          *ocl_binary_size,
-                unsigned char  **ocl_binary)
-{
-    FILE  *f;
-    size_t read_count;
-
-    f = fopen(ocl_binary_filename, "rb");
-    if (!f)
-    {
-        return false;
-    }
-
-    fseek(f, 0, SEEK_END);
-    *ocl_binary_size = ftell(f);
-    *ocl_binary      = (unsigned char*)malloc(*ocl_binary_size);
-    fseek(f, 0, SEEK_SET);
-    read_count = fread(*ocl_binary, 1, *ocl_binary_size, f);
-    fclose(f);
-
-    if (read_count != (*ocl_binary_size))
-    {
-        return false;
-    }
-
-    return true;
-}
-
-/*! \brief Builds a string with build options for the OpenCL kernels
- *
- * \throws std::bad_alloc if out of memory */
-char*
-ocl_get_build_options_string(cl_context           context,
-                             cl_device_id         device_id,
-                             kernel_vendor_spec_t kernel_vendor_spec,
-                             ocl_vendor_id_t      ocl_device_vendor,
-                             const char *         defines_for_kernel_types,
-                             const char *         runtime_consts)
+static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
  {
-    char * build_options_string               = NULL;
-    char   custom_build_options_prepend[1024] = { 0 };
-    char  *custom_build_options_append        = NULL;
-    cl_int warp_size = 0;
-
-    /* Get the reported warp size. Compile a small dummy kernel to do so */
-    warp_size = ocl_get_warp_size(context, device_id);
-
-    /* Select vendor specific kernels automatically */
-    if (kernel_vendor_spec == auto_vendor_kernels)
-    {
-        kernel_vendor_spec = ocl_autoselect_kernel_from_vendor(ocl_device_vendor);
-    }
+    std::string includePathOption;
  
-    /* Create include paths for kernel sources.
-       All OpenCL kernel files are expected to be stored in one single folder. */
-    {
-        /* Apple does not seem to accept the quoted include paths other
-         * OpenCL implementations are happy with. Since the standard still says
-         * it should be quoted, we handle Apple as a special case.
-         */
+    /* Apple does not seem to accept the quoted include paths other
+     * OpenCL implementations are happy with. Since the standard still says
+     * it should be quoted, we handle Apple as a special case.
+     */
  #ifdef __APPLE__
-        std::string unescaped_ocl_root_path = get_ocl_root_path();
-        std::string ocl_root_path;
+    includePathOption += "-I";
  
-        char        incl_opt_start[] = "-I";
-        char        incl_opt_end[]   = "";
-
-        for (std::string::size_type i = 0; i < unescaped_ocl_root_path.length(); i++)
+    // Prepend all the spaces with a backslash
+    for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
+    {
+        if (unescapedKernelRootPath[i] == ' ')
          {
-            if (unescaped_ocl_root_path[i] == ' ')
-            {
-                ocl_root_path.push_back('\\');
-            }
-            ocl_root_path.push_back(unescaped_ocl_root_path[i]);
+            includePathOption.push_back('\\');
          }
-        // Here the Apple ocl_root_path has all spaces prepended with a backslash
+        includePathOption.push_back(unescapedKernelRootPath[i]);
+    }
  #else
-        std::string ocl_root_path = get_ocl_root_path();
-
-        char        incl_opt_start[] = "-I\"";
-        char        incl_opt_end[]   = "\"";
-
+    includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
  #endif
-        size_t      chars            = 0;
-
-        custom_build_options_append =
-            (char*)calloc((ocl_root_path.length()   /* Path to the OpenCL folder */
-                           + strlen(incl_opt_start) /* -I" */
-                           + strlen(incl_opt_end)   /* " */
-                           + 1                      /* null char */
-                           ), 1);
-
-        strncpy(&custom_build_options_append[chars], incl_opt_start, strlen(incl_opt_start));
-        chars += strlen(incl_opt_start);
-
-        strncpy(&custom_build_options_append[chars], ocl_root_path.c_str(), ocl_root_path.length());
-        chars += ocl_root_path.length();
-
-        strncpy(&custom_build_options_append[chars], incl_opt_end, strlen(incl_opt_end));
-    }
-
-    /* Get vendor specific define (amd,nvidia,nowarp) */
-    const char * kernel_vendor_spec_define =
-        ocl_get_vendor_specific_define(kernel_vendor_spec);
-
-    /* Compose the build options to be prepended. */
-    sprintf(custom_build_options_prepend,
-            "-DWARP_SIZE_TEST=%d %s %s %s",
-            warp_size,
-            kernel_vendor_spec_define,
-            defines_for_kernel_types,
-            runtime_consts ? runtime_consts : ""
-            );
-
-    /* Get the size of the complete build options string */
-    size_t build_options_length =
-        create_ocl_build_options_length(
-                ocl_device_vendor,
-                custom_build_options_prepend,
-                custom_build_options_append
-                );
-
-    build_options_string = (char *)malloc(build_options_length);
-
-    /* Compose the complete build options */
-    create_ocl_build_options(
-            build_options_string,
-            build_options_length,
-            ocl_device_vendor,
-            custom_build_options_prepend,
-            custom_build_options_append
-            );
  
-    if (custom_build_options_append)
-    {
-        free(custom_build_options_append);
-    }
-
-    return build_options_string;
+    return includePathOption;
  }
  
-/*! \brief Implement caching of OpenCL binaries
+/*! \brief Builds a string with build options for the OpenCL kernels
   *
- * \param[in] program     Index of program to cache
- * \param[in] file_name  Name of file to use for the cache
- */
-void
-print_ocl_binaries_to_file(cl_program program, char* file_name)
+ * \throws std::bad_alloc  if out of memory. */
+std::string
+makePreprocessorOptions(const std::string   &kernelRootPath,
+                        size_t               warpSize,
+                        ocl_vendor_id_t      deviceVendorId,
+                        const std::string   &extraDefines)
  {
-    size_t         ocl_binary_size = 0;
-    unsigned char *ocl_binary      = NULL;
-
-    clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ocl_binary_size, NULL);
-
-    ocl_binary = (unsigned char*)malloc(ocl_binary_size);
-
-    clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &ocl_binary, NULL);
-
-    FILE *f = fopen(file_name, "wb");
-    fwrite(ocl_binary, 1, ocl_binary_size, f);
-    fclose(f);
+    std::string preprocessorOptions;
  
-    free(ocl_binary);
+    /* Compose the complete build options */
+    preprocessorOptions  = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
+    preprocessorOptions += ' ';
+    preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
+    preprocessorOptions += ' ';
+    preprocessorOptions += extraDefines;
+    preprocessorOptions += ' ';
+    preprocessorOptions += selectCompilerOptions(deviceVendorId);
+    preprocessorOptions += ' ';
+    preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
+
+    return preprocessorOptions;
  }
  
-/*! \brief Compile the kernels as described by kernel src id and vendor spec
- *
- * \param[in]  kernel_source_file        Index of the kernel src to be used (default)
- * \param[in]  kernel_vendor_spec        Vendor-specific compilation (auto,nvidia,amd,nowarp)
- * \param[in]  defines_for_kernel_types  Preprocessor defines that trigger the compilation of the kernels
- * \param[out] result_str                Gromacs error string
- * \param[in]  context                   Current context on the device to compile for
- * \param[in]  device_id                 OpenCL device id of the device to compile for
- * \param[in]  ocl_device_vendor         Enumerator of the device vendor to compile for
- * \param[out] p_program                 Pointer to the cl_program where the compiled
- *                                       cl_program will be stored
- * \param[in]  runtime_consts            Optional string with runtime constants.
- *                                       Each constant is given according to the following
- *                                       format: "-Dname=value".
- *                                       Multiple defines are separated by blanks.
- *
- * \return cl_int with the build status AND any other OpenCL error appended to it
- *
- * \todo Consider whether we can parallelize the compilation of all
- * the kernels by compiling them in separate programs - but since the
- * resulting programs can't refer to each other, that might lead to
- * bloat of util code?
- *
- * \throws std::bad_alloc if out of memory
- */
-cl_int
-ocl_compile_program(
-        kernel_source_index_t kernel_source_file,
-        kernel_vendor_spec_t  kernel_vendor_spec,
-        const char *          defines_for_kernel_types,
-        char *                result_str,
-        cl_context            context,
-        cl_device_id          device_id,
-        ocl_vendor_id_t       ocl_device_vendor,
-        cl_program *          p_program,
-        const char *          runtime_consts
-        )
+cl_program
+compileProgram(FILE              *fplog,
+               const std::string &kernelBaseFilename,
+               const std::string &extraDefines,
+               cl_context         context,
+               cl_device_id       deviceId,
+               ocl_vendor_id_t    deviceVendorId)
  {
-    char         * build_options_string   = NULL;
-    cl_int         cl_error               = CL_SUCCESS;
-
-    char         * ocl_source              = NULL;
-    size_t         ocl_source_length       = 0;
-    size_t         kernel_filename_len     = 0;
-
-    bool           bOclCacheValid           = false;
+    cl_int      cl_error;
+    std::string kernelRootPath = getKernelRootPath();
  
-    char           ocl_binary_filename[256] = { 0 };
-    size_t         ocl_binary_size          = 0;
-    unsigned char *ocl_binary               = NULL;
+    GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
  
      /* Load OpenCL source files */
-    {
-        char* kernel_filename = NULL;
-
-        /* Get the size of the kernel source filename */
-        kernel_filename_len = get_ocl_kernel_source_file_info(kernel_source_file);
-        if (kernel_filename_len)
-        {
-            kernel_filename = (char*)malloc(kernel_filename_len);
-        }
-
-        /* Get the actual full path and name of the source file with the kernels */
-        get_ocl_kernel_source_path(kernel_filename, kernel_source_file, kernel_filename_len);
-
-        /* Load the above source file and store its contents in ocl_source */
-        ocl_source = load_ocl_source(kernel_filename, &ocl_source_length);
-
-        if (!ocl_source)
-        {
-            sprintf(result_str, "Error loading OpenCL code %s", kernel_filename);
-            return CL_BUILD_PROGRAM_FAILURE;
-        }
+    std::string kernelFilename = Path::join(kernelRootPath,
+                                            kernelBaseFilename);
  
-        /* The sources are loaded so the filename is not needed anymore */
-        free(kernel_filename);
-    }
+    /* Make the build options */
+    std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
+                                                              getWarpSize(context, deviceId),
+                                                              deviceVendorId,
+                                                              extraDefines);
  
-    /* Allocate and initialize the string with build options */
-    build_options_string =
-        ocl_get_build_options_string(context, device_id, kernel_vendor_spec,
-                                     ocl_device_vendor,
-                                     defines_for_kernel_types,
-                                     runtime_consts);
+    bool        buildCacheWasRead = false;
  
-    /* Check if OpenCL caching is ON */
-    if (bCacheOclBuild)
+    std::string cacheFilename;
+    if (useBuildCache)
      {
-        clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(ocl_binary_filename), ocl_binary_filename, NULL);
-        strcat(ocl_binary_filename, ".bin");
-
-        /* Check if there's a valid cache available */
-        bOclCacheValid = check_ocl_cache(ocl_binary_filename,
-                                         build_options_string,
-                                         ocl_source,
-                                         &ocl_binary_size, &ocl_binary);
+        cacheFilename = makeBinaryCacheFilename(kernelFilename, deviceId);
      }
  
      /* Create OpenCL program */
-    if (bCacheOclBuild && bOclCacheValid)
+    cl_program program = nullptr;
+    if (useBuildCache)
      {
-        /* Create program from pre-built binaries */
-        *p_program =
-            clCreateProgramWithBinary(
-                    context,
-                    1,
-                    &device_id,
-                    &ocl_binary_size,
-                    (const unsigned char**)&ocl_binary,
-                    NULL,
-                    &cl_error);
+        if (File::exists(cacheFilename, File::returnFalseOnError))
+        {
+            /* Check if there's a valid cache available */
+            try
+            {
+                program           = makeProgramFromCache(cacheFilename, context, deviceId);
+                buildCacheWasRead = true;
+            }
+            catch (FileIOError &e)
+            {
+                // Failing to read from the cache is not a critical error
+                formatExceptionMessageToFile(fplog, e);
+            }
+        }
+        else
+        {
+            fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
+        }
      }
-    else
+    if (program == nullptr)
      {
+        // Compile OpenCL program from source
+        std::string kernelSource = TextReader::readFileToString(kernelFilename);
+        if (kernelSource.empty())
+        {
+            GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
+        }
+        const char *kernelSourcePtr  = kernelSource.c_str();
+        size_t      kernelSourceSize = kernelSource.size();
          /* Create program from source code */
-        *p_program =
-            clCreateProgramWithSource(
-                    context,
-                    1,
-                    (const char**)(&ocl_source),
-                    &ocl_source_length,
-                    &cl_error
-                    );
+        program = clCreateProgramWithSource(context,
+                                            1,
+                                            &kernelSourcePtr,
+                                            &kernelSourceSize,
+                                            &cl_error);
+        if (cl_error != CL_SUCCESS)
+        {
+            GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
+        }
      }
  
-    /* Build program */
-    cl_int build_status         = CL_SUCCESS;
+    /* Build the OpenCL program, keeping the status to potentially
+       write to the simulation log file. */
+    cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL);
+    if (buildStatus != CL_SUCCESS)
      {
-        /* Now we are ready to launch the build */
-        build_status =
-            clBuildProgram(*p_program, 0, NULL, build_options_string, NULL, NULL);
+        GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
+    }
  
-        if (build_status == CL_SUCCESS)
+    if (useBuildCache)
+    {
+        if (!buildCacheWasRead)
          {
-            if (bCacheOclBuild)
+            /* If OpenCL caching is ON, but the current cache is not
+               valid => update it */
+            try
              {
-                /* If OpenCL caching is ON, but the current cache is not
-                   valid => update it */
-                if (!bOclCacheValid)
-                {
-                    print_ocl_binaries_to_file(*p_program, ocl_binary_filename);
-                }
+                writeBinaryToCache(program, cacheFilename);
              }
-            else
-            if ((OCL_VENDOR_NVIDIA == ocl_device_vendor) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+            catch (GromacsException &e)
              {
-                /* If dumping intermediate files has been requested and this is an NVIDIA card
-                   => write PTX to file */
-                char ptx_filename[256];
-
-                clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(ptx_filename), ptx_filename, NULL);
-                strcat(ptx_filename, ".ptx");
-
-                print_ocl_binaries_to_file(*p_program, ptx_filename);
+                // Failing to write the cache is not a critical error
+                formatExceptionMessageToFile(fplog, e);
              }
          }
+    }
+    if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    {
+        /* If dumping intermediate files has been requested and this is an NVIDIA card
+           => write PTX to file */
+        char buffer[STRLEN];
  
-        // Get log string size
-        size_t build_log_size       = 0;
-        cl_error =
-            clGetProgramBuildInfo(
-                    *p_program,
-                    device_id,
-                    CL_PROGRAM_BUILD_LOG,
-                    0,
-                    NULL,
-                    &build_log_size
-                    );
-
-        /* Regardless of success or failure, if there is something in the log
-         *  we might need to display it */
-        if (build_log_size && (cl_error == CL_SUCCESS) )
+        cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
+        if (cl_error != CL_SUCCESS)
          {
-            char *build_log = NULL;
-
-            /* Allocate memory to fit the build log,
-                it can be very large in case of errors */
-            build_log = (char*)malloc(build_log_size);
-
-            if (build_log)
-            {
-                /* Get the actual compilation log */
-                cl_error =
-                    clGetProgramBuildInfo(
-                            *p_program,
-                            device_id,
-                            CL_PROGRAM_BUILD_LOG,
-                            build_log_size,
-                            build_log,
-                            NULL
-                            );
-
-                /* Save or display the log */
-                if (!cl_error)
-                {
-                    handle_ocl_build_log(
-                            build_log,
-                            build_options_string,
-                            build_status,
-                            kernel_source_file
-                            );
-                }
-
-                /* Build_log not needed anymore */
-                free(build_log);
-            }
+            GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
          }
-    }
-
-    /*  Final clean up */
-    if (ocl_binary)
-    {
-        free(ocl_binary);
-    }
+        std::string ptxFilename = buffer;
+        ptxFilename += ".ptx";
  
-    if (build_options_string)
-    {
-        free(build_options_string);
+        try
+        {
+            writeBinaryToCache(program, ptxFilename);
+        }
+        catch (GromacsException &e)
+        {
+            // Failing to write the cache is not a critical error
+            formatExceptionMessageToFile(fplog, e);
+        }
      }
  
-    if (ocl_source)
-    {
-        free(ocl_source);
-    }
+    writeOclBuildLog(fplog,
+                     program,
+                     deviceId,
+                     kernelFilename,
+                     preprocessorOptions,
+                     buildStatus != CL_SUCCESS);
  
-    /* Append any other error to the build_status */
-    return build_status | cl_error;
+    return program;
  }
+
+} // namespace
+} // namespace
diff --git a/src/gromacs/gpu_utils/ocl_compiler.h b/src/gromacs/gpu_utils/ocl_compiler.h

index 6cebe15d15e9322c0e274dea1d7522b8eb0de3f2..b5db76eb4cca5222a440ed4a17a9344fca999333 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.h
+++ b/src/gromacs/gpu_utils/ocl_compiler.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -33,54 +33,55 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \libinternal \file
- *  \brief Declare infrastructure for OpenCL JIT compilation for Gromacs
+ *  \brief Declare infrastructure for OpenCL JIT compilation
   *
   *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
   *  \author Anca Hamuraru <anca@streamcomputing.eu>
   *  \author Teemu Virolainen <teemu@streamcomputing.eu>
+ *  \author Mark Abraham <mark.j.abraham@gmail.com>
   *  \inlibraryapi
- *
- * TODO Currently this file handles compilation of NBNXN kernels,
- * but e.g. organizing the defines for various physics models
- * is leaking in here a bit.
   */
  #ifndef GMX_GPU_UTILS_OCL_COMPILER_H
  #define GMX_GPU_UTILS_OCL_COMPILER_H
  
+#include <string>
+
  #include "gromacs/gpu_utils/oclutils.h"
  #include "gromacs/hardware/gpu_hw_info.h"
  
-/*! \brief Vendor specific kernel sources
- *
- * Only affects the bottom level kernel sources (nbnxn_ocl_kernel_[spec].cl)
- */
-typedef enum {
-    generic_vendor_kernels = 0, /**< Standard (warp-less) source file with generated methods/energy/prune */
-    nvidia_vendor_kernels,      /**< Nvidia source file with generated methods/energy/prune */
-    amd_vendor_kernels,         /**< AMD source file with generated methods/energy/prune */
-    auto_vendor_kernels         /**< Compiler will select source based on vendor id*/
-} kernel_vendor_spec_t;
+namespace gmx
+{
+namespace ocl
+{
  
-/*! \brief Kernel sources index
+/*! \brief Compile the specified kernel for the context and device.
   *
- * For now there is only default source. One may add here future kernel versions etc.
- * This affect the top level kernel sources (nbnxn_ocl_kernels.cl)
- */
-typedef enum {
-    default_source = 0  /* The default top-level source  */
-} kernel_source_index_t;
+ * \param[out] fplog                 Open file pointer for log output
+ * \param[in]  kernelBaseFilename    The name of the kernel source file to compile, e.g. "nbnxn_ocl_kernels.cl"
+ * \param[in]  extraDefines          Preprocessor defines required by the calling code, e.g. for configuring the kernels
+ * \param[in]  context               OpenCL context on the device to compile for
+ * \param[in]  deviceId              OpenCL device id of the device to compile for
+ * \param[in]  deviceVendorId        Enumerator of the device vendor to compile for
+ *
+ * \returns The compiled OpenCL program
+ *
+ * \todo Consider whether we can parallelize the compilation of all
+ * the kernels by compiling them in separate programs - but since the
+ * resulting programs can't refer to each other, that might lead to
+ * bloat of util code?
+ *
+ * \throws std::bad_alloc  if out of memory.
+ *         FileIOError     if a file I/O error prevents returning a valid compiled program.
+ *         InternalError   if an OpenCL API error prevents returning a valid compiled program. */
+cl_program
+compileProgram(FILE              *fplog,
+               const std::string &kernelBaseFilename,
+               const std::string &extraDefines,
+               cl_context         context,
+               cl_device_id       deviceId,
+               ocl_vendor_id_t    deviceVendorId);
  
-cl_int
-ocl_compile_program(
-        kernel_source_index_t kernel_source_file,
-        kernel_vendor_spec_t  kernel_vendor_spec,
-        const char *          defines_for_kernel_types,
-        char *                result_str,
-        cl_context            context,
-        cl_device_id          device_id,
-        ocl_vendor_id_t       ocl_device_vendor,
-        cl_program *          p_program,
-        const char *          custom_build_options
-        );
+} // namespace
+} // namespace
  
  #endif
diff --git a/src/gromacs/gpu_utils/oclutils.cpp b/src/gromacs/gpu_utils/oclutils.cpp

index 466a6bee375b5078b7999430dea2bde7e544a190..f733bdb65be547d983b9783d3815c2f370cd2c3c 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.cpp
+++ b/src/gromacs/gpu_utils/oclutils.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -46,6 +46,8 @@
  #include <cassert>
  #include <cstdio>
  
+#include <string>
+
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
@@ -195,7 +197,7 @@ void ocl_pfree(void *h_ptr)
  }
  
  /*! \brief Convert error code to diagnostic string */
-const char *ocl_get_error_string(cl_int error)
+std::string ocl_get_error_string(cl_int error)
  {
      switch (error)
      {
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h

index e4913b6181617ad692673eddbca912c47717bcc2..16ae06a177357d040ef90b060953a77a4e6b75c2 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -51,6 +51,8 @@
  #    include <CL/opencl.h>
  #endif
  
+#include <string>
+
  /*! \brief OpenCL vendor IDs */
  typedef enum {
      OCL_VENDOR_NVIDIA = 0,
@@ -137,6 +139,6 @@ void ocl_pmalloc(void **h_ptr, size_t nbytes);
  void ocl_pfree(void *h_ptr);
  
  /*! \brief Convert error code to diagnostic string */
-const char *ocl_get_error_string(cl_int error);
+std::string ocl_get_error_string(cl_int error);
  
  #endif
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp

index 854d9188bb82be37a9771b7f41f19f813aad1680..3d7bfdfa29ae81c37d9dce2ced3ba64e2e1e0ae0 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -342,7 +342,7 @@ void sync_ocl_event(cl_command_queue stream, cl_event *ocl_event)
      cl_error = clEnqueueWaitForEvents(stream, 1, ocl_event);
  #endif
  
-    GMX_RELEASE_ASSERT(CL_SUCCESS == cl_error, ocl_get_error_string(cl_error));
+    GMX_RELEASE_ASSERT(CL_SUCCESS == cl_error, ocl_get_error_string(cl_error).c_str());
  
      /* Release event and reset it to 0. It is ok to release it as enqueuewaitforevents performs implicit retain for events. */
      cl_error = clReleaseEvent(*ocl_event);
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index fa76c745b2315f6231a7f88d15ae2340467eb132..f9080af4eb120a01e479d682a4847009e7b16e3d 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -595,7 +595,7 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
          gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s",
                    rank,
                    devInfo->device_name,
-                  cl_error, ocl_get_error_string(cl_error));
+                  cl_error, ocl_get_error_string(cl_error).c_str());
          return;
      }
  
@@ -648,10 +648,10 @@ nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t *nb)
      cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->e_lj));
      cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->e_el));
      cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_uint), &shifts);
-    GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error));
+    GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
  
      cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-    GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error));
+    GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
  }
  
  /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp

index 7809f4be889f22d85448734c3806287a8af6c0ec..f1a3b79b0e7d8d16039baca021b7e610db61de47 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp
@@ -58,6 +58,7 @@
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/stringutil.h"
  
  #include "nbnxn_ocl_types.h"
  
@@ -130,9 +131,9 @@ static const char * kernel_VdW_family_definitions[] =
   * \throws std::bad_alloc if out of memory
   */
  static std::string
-make_defines_for_kernel_types(bool bFastGen,
-                              int  eeltype,
-                              int  vdwtype)
+makeDefinesForKernelTypes(bool bFastGen,
+                          int  eeltype,
+                          int  vdwtype)
  {
      std::string defines_for_kernel_types;
  
@@ -153,10 +154,6 @@ make_defines_for_kernel_types(bool bFastGen,
          }
          defines_for_kernel_types += kernel_electrostatic_family_definitions[eeltype];
          defines_for_kernel_types += kernel_VdW_family_definitions[vdwtype];
-
-#ifndef NDEBUG
-        printf("Setting up defines for kernel types for FastGen %s \n", defines_for_kernel_types.c_str());
-#endif
      }
  
      return defines_for_kernel_types;
@@ -179,60 +176,55 @@ make_defines_for_kernel_types(bool bFastGen,
  void
  nbnxn_gpu_compile_kernels(gmx_nbnxn_ocl_t *nb)
  {
-    char                      gpu_err_str[STRLEN];
      gmx_bool                  bFastGen = TRUE;
-    cl_device_id              device_id;
-    cl_context                context;
-    cl_program                program;
-    char                      runtime_consts[256];
+    cl_program                program  = nullptr;
  
      if (getenv("GMX_OCL_NOFASTGEN") != NULL)
      {
          bFastGen = FALSE;
      }
  
-    device_id        = nb->dev_info->ocl_gpu_id.ocl_device_id;
-    context          = nb->dev_rundata->context;
-
-    /* Here we pass macros and static const int variables defined in include
-     * files outside the nbnxn_ocl as macros, to avoid including those files
-     * in the JIT compilation that happens at runtime.
-     */
-    sprintf(runtime_consts,
-            "-DCENTRAL=%d -DNBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER=%d -DNBNXN_GPU_CLUSTER_SIZE=%d -DNBNXN_GPU_JGROUP_SIZE=%d -DNBNXN_AVOID_SING_R2_INC=%s %s",
-            CENTRAL,                                    /* Defined in ishift.h */
-            c_nbnxnGpuNumClusterPerSupercluster,        /* Defined in nbnxn_pairlist.h */
-            c_nbnxnGpuClusterSize,                      /* Defined in nbnxn_pairlist.h */
-            c_nbnxnGpuJgroupSize,                       /* Defined in nbnxn_pairlist.h */
-            STRINGIFY_MACRO(NBNXN_AVOID_SING_R2_INC)    /* Defined in nbnxn_consts.h */
-                                                        /* NBNXN_AVOID_SING_R2_INC passed as string to avoid
-                                                           floating point representation problems with sprintf */
-            , (nb->bPrefetchLjParam) ? "-DIATYPE_SHMEM" : ""
-            );
-
      /* Need to catch std::bad_alloc here and during compilation string
         handling. */
      try
      {
-        std::string defines_for_kernel_types =
-            make_defines_for_kernel_types(bFastGen,
-                                          nb->nbparam->eeltype,
-                                          nb->nbparam->vdwtype);
-
-        cl_int cl_error = ocl_compile_program(default_source,
-                                              auto_vendor_kernels,
-                                              defines_for_kernel_types.c_str(),
-                                              gpu_err_str,
-                                              context,
-                                              device_id,
-                                              nb->dev_info->vendor_e,
-                                              &program,
-                                              runtime_consts);
-        if (cl_error != CL_SUCCESS)
+        std::string extraDefines = makeDefinesForKernelTypes(bFastGen,
+                                                             nb->nbparam->eeltype,
+                                                             nb->nbparam->vdwtype);
+
+        /* Here we pass macros and static const int variables defined in include
+         * files outside the nbnxn_ocl as macros, to avoid including those files
+         * in the JIT compilation that happens at runtime.
+         */
+        extraDefines += gmx::formatString(
+                    " -DCENTRAL=%d -DNBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER=%d -DNBNXN_GPU_CLUSTER_SIZE=%d -DNBNXN_GPU_JGROUP_SIZE=%d -DNBNXN_AVOID_SING_R2_INC=%s %s",
+                    CENTRAL,                                 /* Defined in ishift.h */
+                    c_nbnxnGpuNumClusterPerSupercluster,     /* Defined in nbnxn_pairlist.h */
+                    c_nbnxnGpuClusterSize,                   /* Defined in nbnxn_pairlist.h */
+                    c_nbnxnGpuJgroupSize,                    /* Defined in nbnxn_pairlist.h */
+                    STRINGIFY_MACRO(NBNXN_AVOID_SING_R2_INC) /* Defined in nbnxn_consts.h */
+                                                             /* NBNXN_AVOID_SING_R2_INC passed as string to avoid
+                                                                floating point representation problems with sprintf */
+                    , (nb->bPrefetchLjParam) ? "-DIATYPE_SHMEM" : ""
+                    );
+
+
+        try
+        {
+            /* TODO when we have a proper MPI-aware logging module,
+               the log output here should be written there */
+            program = gmx::ocl::compileProgram(stderr,
+                                               "nbnxn_ocl_kernels.cl",
+                                               extraDefines,
+                                               nb->dev_rundata->context,
+                                               nb->dev_info->ocl_gpu_id.ocl_device_id,
+                                               nb->dev_info->vendor_e);
+        }
+        catch (gmx::GromacsException &e)
          {
-            gmx_fatal(FARGS, "Failed to compile NBNXN kernels for GPU #%s: %s",
-                      nb->dev_info->device_name,
-                      gpu_err_str);
+            e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
+                                               nb->dev_info->device_name));
+            throw;
          }
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
author	Mark Abraham <mark.j.abraham@gmail.com>
	Fri, 5 Feb 2016 23:01:18 +0000 (23:01 +0000)
committer	Erik Lindahl <erik.lindahl@gmail.com>
	Sun, 29 May 2016 11:31:43 +0000 (13:31 +0200)
docs/user-guide/environment-variables.rst		patch \| blob \| history
src/gromacs/gpu_utils/CMakeLists.txt		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils_ocl.cpp		patch \| blob \| history
src/gromacs/gpu_utils/ocl_caching.cpp	[new file with mode: 0644]	patch \| blob
src/gromacs/gpu_utils/ocl_caching.h	[new file with mode: 0644]	patch \| blob
src/gromacs/gpu_utils/ocl_compiler.cpp		patch \| blob \| history
src/gromacs/gpu_utils/ocl_compiler.h		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.cpp		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_jit_support.cpp		patch \| blob \| history