2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define infrastructure for OpenCL JIT compilation for Gromacs
38 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * TODO Currently this file handles compilation of NBNXN kernels,
43 * but e.g. organizing the defines for various physics models
44 * is leaking in here a bit.
49 #include "ocl_compiler.h"
60 #include "gromacs/utility/path.h"
61 #include "gromacs/utility/programcontext.h"
62 #include "gromacs/utility/stringutil.h"
64 /*! \brief Path separator
68 /*! \brief Compiler options index
76 b_generic_fast_relaxed_math,
77 b_generic_noopt_compilation,
78 b_generic_debug_symbols,
79 b_amd_dump_temp_files,
80 b_include_install_opencl_dir,
81 b_include_source_opencl_dirs,
83 } build_options_index_t;
85 /*! \brief List of available OpenCL compiler options
87 static const char* build_options_list[] = {
89 "-x clc++", /**< AMD C++ extension */
90 "-cl-nv-verbose", /**< Nvidia verbose build log */
91 "-cl-std=CL1.1", /**< Force CL 1.1 */
92 "-cl-std=CL1.2", /**< Force CL 1.2 */
93 "-cl-fast-relaxed-math", /**< Fast math */
94 "-cl-opt-disable", /**< Disable optimisations */
95 "-g", /**< Debug symbols */
96 "-save-temps" /**< AMD option to dump intermediate temporary
97 files such as IL or ISA code */
100 /*! \brief Available sources
102 static const char * kernel_filenames[] = {"nbnxn_ocl_kernels.cl"};
104 /*! \brief Defines to enable specific kernels based on vendor
106 static const char * kernel_vendor_spec_definitions[] = {
107 "-D_WARPLESS_SOURCE_", /**< nbnxn_ocl_kernel_nowarp.clh */
108 "-D_NVIDIA_SOURCE_", /**< nbnxn_ocl_kernel_nvidia.clh */
109 "-D_AMD_SOURCE_" /**< nbnxn_ocl_kernel_amd.clh */
113 /*! \brief Get the string of a build option of the specific id
114 * \param build_option_id The option id as defines in the header
115 * \return String containing the actual build option string for the compiler
117 static const char* get_ocl_build_option(build_options_index_t build_option_id)
119 if (build_option_id < b_num_build_options)
121 return build_options_list[build_option_id];
125 return build_options_list[b_invalid_option];
129 /*! \brief Get the size of the string (without null termination) required
130 * for the build option of the specific id
131 * \param build_option_id The option id as defines in the header
132 * \return size_t containing the size in bytes of the build option string
134 static size_t get_ocl_build_option_length(build_options_index_t build_option_id)
137 if (build_option_id < b_num_build_options)
139 return strlen(build_options_list[build_option_id]);
143 return strlen(build_options_list[b_invalid_option]);
147 /*! \brief Get the size of final composed build options literal
149 * \param build_device_vendor_id Device vendor id. Used to
150 * automatically enable some vendor specific options
151 * \param custom_build_options_prepend Prepend options string
152 * \param custom_build_options_append Append options string
153 * \return size_t containing the size in bytes of the composed
154 * build options string including null termination
157 create_ocl_build_options_length(
158 ocl_vendor_id_t build_device_vendor_id,
159 const char * custom_build_options_prepend,
160 const char * custom_build_options_append)
162 size_t build_options_length = 0;
163 size_t whitespace = 1;
165 assert(build_device_vendor_id <= OCL_VENDOR_UNKNOWN);
167 if (custom_build_options_prepend)
169 build_options_length +=
170 strlen(custom_build_options_prepend)+whitespace;
173 if ( (build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DEBUG") && getenv("GMX_OCL_FORCE_CPU") )
175 build_options_length += get_ocl_build_option_length(b_generic_debug_symbols)+whitespace;
178 if (getenv("GMX_OCL_NOOPT"))
180 build_options_length +=
181 get_ocl_build_option_length(b_generic_noopt_compilation)+whitespace;
184 if (getenv("GMX_OCL_FASTMATH"))
186 build_options_length +=
187 get_ocl_build_option_length(b_generic_fast_relaxed_math)+whitespace;
190 if ((build_device_vendor_id == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
192 build_options_length +=
193 get_ocl_build_option_length(b_nvidia_verbose) + whitespace;
196 if ((build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
198 /* To dump OpenCL build intermediate files, caching must be off */
199 if (NULL != getenv("GMX_OCL_NOGENCACHE"))
201 build_options_length +=
202 get_ocl_build_option_length(b_amd_dump_temp_files) + whitespace;
206 if (custom_build_options_append)
208 build_options_length +=
209 strlen(custom_build_options_append)+whitespace;
212 return build_options_length+1;
215 /*! \brief Get the size of final composed build options literal
217 * \param build_options_string The string where to save the
218 * resulting build options in
219 * \param build_options_length The size of the build options
220 * \param build_device_vendor_id Device vendor id. Used to
221 * automatically enable some vendor specific options
222 * \param custom_build_options_prepend Prepend options string
223 * \param custom_build_options_append Append options string
224 * \return The string build_options_string with the build options
227 create_ocl_build_options(
228 char * build_options_string,
229 size_t gmx_unused build_options_length,
230 ocl_vendor_id_t build_device_vendor_id,
231 const char * custom_build_options_prepend,
232 const char * custom_build_options_append)
234 size_t char_added = 0;
236 if (custom_build_options_prepend)
238 strncpy( build_options_string+char_added,
239 custom_build_options_prepend,
240 strlen(custom_build_options_prepend));
242 char_added += strlen(custom_build_options_prepend);
243 build_options_string[char_added++] = ' ';
246 if (getenv("GMX_OCL_NOOPT") )
248 strncpy( build_options_string+char_added,
249 get_ocl_build_option(b_generic_noopt_compilation),
250 get_ocl_build_option_length(b_generic_noopt_compilation) );
252 char_added += get_ocl_build_option_length(b_generic_noopt_compilation);
253 build_options_string[char_added++] = ' ';
257 if (getenv("GMX_OCL_FASTMATH") )
259 strncpy( build_options_string+char_added,
260 get_ocl_build_option(b_generic_fast_relaxed_math),
261 get_ocl_build_option_length(b_generic_fast_relaxed_math) );
263 char_added += get_ocl_build_option_length(b_generic_fast_relaxed_math);
264 build_options_string[char_added++] = ' ';
267 if ((build_device_vendor_id == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
269 strncpy(build_options_string + char_added,
270 get_ocl_build_option(b_nvidia_verbose),
271 get_ocl_build_option_length(b_nvidia_verbose));
273 char_added += get_ocl_build_option_length(b_nvidia_verbose);
274 build_options_string[char_added++] = ' ';
277 if ((build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
279 /* To dump OpenCL build intermediate files, caching must be off */
280 if (NULL != getenv("GMX_OCL_NOGENCACHE"))
282 strncpy(build_options_string + char_added,
283 get_ocl_build_option(b_amd_dump_temp_files),
284 get_ocl_build_option_length(b_amd_dump_temp_files));
286 char_added += get_ocl_build_option_length(b_amd_dump_temp_files);
287 build_options_string[char_added++] = ' ';
291 if ( ( build_device_vendor_id == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG") && getenv("GMX_OCL_FORCE_CPU"))
293 strncpy( build_options_string+char_added,
294 get_ocl_build_option(b_generic_debug_symbols),
295 get_ocl_build_option_length(b_generic_debug_symbols) );
297 char_added += get_ocl_build_option_length(b_generic_debug_symbols);
298 build_options_string[char_added++] = ' ';
301 if (custom_build_options_append)
303 strncpy( build_options_string+char_added,
304 custom_build_options_append,
305 strlen(custom_build_options_append) );
307 char_added += strlen(custom_build_options_append);
308 build_options_string[char_added++] = ' ';
311 build_options_string[char_added++] = '\0';
313 assert(char_added == build_options_length);
315 return build_options_string;
318 /*! \brief Get the path to the main folder storing OpenCL kernels.
320 * By default, this function constructs the full path to the OpenCL from
321 * the known location of the binary that is running, so that we handle
322 * both in-source and installed builds. The user can override this
323 * behavior by defining GMX_OCL_FILE_PATH environment variable.
325 * \return OS-normalized path string to the main folder storing OpenCL kernels
327 * \throws std::bad_alloc if out of memory.
332 const char *gmx_ocl_file_path;
333 std::string ocl_root_path;
335 /* Use GMX_OCL_FILE_PATH if the user has defined it */
336 gmx_ocl_file_path = getenv("GMX_OCL_FILE_PATH");
338 if (!gmx_ocl_file_path)
340 /* Normal way of getting ocl_root_dir. First get the right
341 root path from the path to the binary that is running. */
342 gmx::InstallationPrefixInfo info = gmx::getProgramContext().installationPrefix();
343 std::string dataPathSuffix = (info.bSourceLayout ?
344 "src/gromacs/mdlib/nbnxn_ocl" :
346 ocl_root_path = gmx::Path::join(info.path, dataPathSuffix);
350 ocl_root_path = gmx_ocl_file_path;
353 // Make sure we return an OS-correct path format
354 return gmx::Path::normalize(ocl_root_path);
357 /*! \brief Get the size of the full kernel source file path and name
359 * The following full path size is computed:
360 * strlen(ocl_root_path) + strlen(kernel_id.cl) + separator + null term
362 * \param kernel_src_id Id of the kernel source (auto,nvidia,amd,nowarp)
363 * \return Size in bytes of the full kernel source file path and name including
364 * separators and null termination
366 * \throws std::bad_alloc if out of memory */
368 get_ocl_kernel_source_file_info(kernel_source_index_t kernel_src_id)
370 std::string ocl_root_path = get_ocl_root_path();
372 if (ocl_root_path.empty())
377 return (ocl_root_path.length() + /* Path to the main OpenCL folder*/
379 strlen(kernel_filenames[kernel_src_id]) + /* Kernel source file name */
384 /*! \brief Compose and the full path and name of the kernel src to be used
386 * \param ocl_kernel_filename String where the full path and name will be saved
387 * \param kernel_src_id Id of the kernel source (default)
388 * \param kernel_filename_len Size of the full path and name string, as computed by get_ocl_kernel_source_file_info()
389 * \return The ocl_kernel_filename complete with the full path and name; NULL if error.
391 * \throws std::bad_alloc if out of memory */
393 get_ocl_kernel_source_path(
394 char * ocl_kernel_filename,
395 kernel_source_index_t kernel_src_id,
396 size_t gmx_unused kernel_filename_len)
398 std::string ocl_root_path;
400 assert(kernel_filename_len != 0);
401 assert(ocl_kernel_filename != NULL);
403 ocl_root_path = get_ocl_root_path();
404 if (ocl_root_path.empty())
409 size_t chars_copied = 0;
410 strncpy(ocl_kernel_filename, ocl_root_path.c_str(), ocl_root_path.length());
411 chars_copied += ocl_root_path.length();
413 ocl_kernel_filename[chars_copied++] = SEPARATOR;
415 strncpy(&ocl_kernel_filename[chars_copied],
416 kernel_filenames[kernel_src_id],
417 strlen(kernel_filenames[kernel_src_id]) );
418 chars_copied += strlen(kernel_filenames[kernel_src_id]);
420 ocl_kernel_filename[chars_copied++] = '\0';
422 assert(chars_copied == kernel_filename_len);
424 return ocl_kernel_filename;
427 /* Undefine the separators */
430 /*! \brief Loads the src inside the file filename onto a string in memory
432 * \param filename The name of the file to be read
433 * \param p_source_length Pointer to the size of the source in bytes
434 * (without null termination)
435 * \return A string with the contents of the file with name filename,
436 * or NULL if there was a problem opening/reading the file
439 load_ocl_source(const char* filename, size_t* p_source_length)
441 FILE * filestream = NULL;
443 size_t source_length;
452 filestream = fopen(filename, "rb");
458 fseek(filestream, 0, SEEK_END);
459 source_length = ftell(filestream);
460 fseek(filestream, 0, SEEK_SET);
462 ocl_source = (char*)malloc(source_length + 1);
463 if (fread(ocl_source, source_length, 1, filestream) != 1)
471 ocl_source[source_length] = '\0';
473 *p_source_length = source_length;
477 /*! \brief Handles the dumping of the OpenCL JIT compilation log
480 * -Success: Save to file kernel_id.SUCCEEDED in the run folder.
481 * -Fail : Save to file kernel_id.FAILED in the run folder.
483 * In a release build:
484 * -Success: Nothing is logged.
485 * -Fail : Save to a file kernel_id.FAILED in the run folder.
486 * If GMX_OCL_DUMP_LOG is set, log is always dumped to file
487 * If OCL_JIT_DUMP_STDERR is set, log is always dumped to stderr
489 * \param build_log String containing the OpenCL JIT compilation log
490 * \param build_options_string String containing the options used for the build
491 * \param build_status The OpenCL type status of the build (CL_SUCCESS etc)
492 * \param kernel_src_id The id of the kernel src used for the build (default)
494 * \throws std::bad_alloc if out of memory */
496 handle_ocl_build_log(
497 const char * build_log,
498 const char * build_options_string,
500 kernel_source_index_t kernel_src_id)
502 bool dumpStdErr = false;
505 dumpFile = (build_status != CL_SUCCESS);
508 if (build_status != CL_SUCCESS)
514 /* Override default handling */
515 if (getenv("GMX_OCL_DUMP_LOG") != NULL)
519 if (getenv("OCL_JIT_DUMP_STDERR") != NULL)
524 if (dumpFile || dumpStdErr)
526 FILE *build_log_file = NULL;
527 const char *fail_header = "Compilation of source file failed! \n";
528 const char *success_header = "Compilation of source file was successful! \n";
529 const char *log_header = "--------------LOG START---------------\n";
530 const char *log_footer = "---------------LOG END----------------\n";
532 std::string log_fname;
534 build_info = (char*)malloc(32 + strlen(build_options_string) );
535 sprintf(build_info, "-- Used build options: %s\n", build_options_string);
539 log_fname = gmx::formatString("%s.%s", kernel_filenames[kernel_src_id],
540 (build_status == CL_SUCCESS) ? "SUCCEEDED" : "FAILED");
541 build_log_file = fopen(log_fname.c_str(), "w");
544 size_t complete_message_size = 0;
545 char * complete_message;
548 complete_message_size = (build_status == CL_SUCCESS) ? strlen(success_header) : strlen(fail_header);
549 complete_message_size += strlen(build_info) + strlen(log_header) + strlen(log_footer);
550 complete_message_size += strlen(build_log);
551 complete_message_size += 1; //null termination
552 complete_message = (char*)malloc(complete_message_size);
554 sprintf(complete_message, "%s%s%s%s%s",
555 (build_status == CL_SUCCESS) ? success_header : fail_header,
565 fprintf(build_log_file, "%s", complete_message);
568 printf("The OpenCL compilation log has been saved in \"%s\"\n", log_fname.c_str());
572 if (build_status != CL_SUCCESS)
574 fprintf(stderr, "%s", complete_message);
579 fclose(build_log_file);
582 free(complete_message);
587 /*! \brief Get the warp size reported by device
589 * This is platform implementation dependant and seems to only work on the Nvidia and Amd platforms!
590 * Nvidia reports 32, Amd for GPU 64. Ignore the rest
592 * \param context Current OpenCL context
593 * \param device_id OpenCL device with the context
594 * \return cl_int value of the warp size
597 ocl_get_warp_size(cl_context context, cl_device_id device_id)
599 cl_int cl_error = CL_SUCCESS;
600 size_t warp_size = 0;
601 const char *dummy_kernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
604 clCreateProgramWithSource(context, 1, (const char**)&dummy_kernel, NULL, &cl_error);
607 clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
609 cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
611 cl_error = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
612 sizeof(size_t), &warp_size, NULL);
614 clReleaseKernel(kernel);
615 clReleaseProgram(program);
617 assert(warp_size != 0);
618 assert(cl_error == CL_SUCCESS);
623 /*! \brief Automatically select vendor-specific kernel from vendor id
625 * \param vendor_id Vendor id enumerator (amd,nvidia,intel,unknown)
626 * \return Vendor-specific kernel version
628 static kernel_vendor_spec_t
629 ocl_autoselect_kernel_from_vendor(ocl_vendor_id_t vendor_id)
631 kernel_vendor_spec_t kernel_vendor;
633 printf("Selecting kernel source automatically\n");
638 kernel_vendor = amd_vendor_kernels;
639 printf("Selecting kernel for AMD\n");
641 case OCL_VENDOR_NVIDIA:
642 kernel_vendor = nvidia_vendor_kernels;
643 printf("Selecting kernel for NVIDIA\n");
646 kernel_vendor = generic_vendor_kernels;
647 printf("Selecting generic kernel\n");
650 return kernel_vendor;
653 /*! \brief Returns the compiler define string needed to activate vendor-specific kernels
655 * \param kernel_spec Kernel vendor specification
656 * \return String with the define for the spec
659 ocl_get_vendor_specific_define(kernel_vendor_spec_t kernel_spec)
661 assert(kernel_spec < auto_vendor_kernels );
663 printf("Setting up kernel vendor spec definitions: %s \n", kernel_vendor_spec_definitions[kernel_spec]);
665 return kernel_vendor_spec_definitions[kernel_spec];
668 /*! \brief Check if there's a valid cache available, and return it if so
670 * \param[in] ocl_binary_filename Name of file containing the binary cache
671 * \param[in] build_options_string Compiler command-line options to use (currently unused)
672 * \param[in] ocl_source NULL-terminated string of OpenCL source code (currently unused)
673 * \param[out] ocl_binary_size Size of the binary file once loaded in memory
674 * \param[out] ocl_binary Pointer to the binary file bytes (valid only if return is true)
675 * \return Whether the file reading was successful
677 * \todo Compare current build options and code against the build
678 * options and the code corresponding to the cache. If any change is
679 * detected this function must return false.
682 check_ocl_cache(char *ocl_binary_filename,
683 char gmx_unused *build_options_string,
684 char gmx_unused *ocl_source,
685 size_t *ocl_binary_size,
686 unsigned char **ocl_binary)
691 f = fopen(ocl_binary_filename, "rb");
697 fseek(f, 0, SEEK_END);
698 *ocl_binary_size = ftell(f);
699 *ocl_binary = (unsigned char*)malloc(*ocl_binary_size);
700 fseek(f, 0, SEEK_SET);
701 read_count = fread(*ocl_binary, 1, *ocl_binary_size, f);
704 if (read_count != (*ocl_binary_size))
712 /*! \brief Builds a string with build options for the OpenCL kernels
714 * \throws std::bad_alloc if out of memory */
716 ocl_get_build_options_string(cl_context context,
717 cl_device_id device_id,
718 kernel_vendor_spec_t kernel_vendor_spec,
719 ocl_vendor_id_t ocl_device_vendor,
720 const char * defines_for_kernel_types,
721 const char * runtime_consts)
723 char * build_options_string = NULL;
724 char custom_build_options_prepend[1024] = { 0 };
725 char *custom_build_options_append = NULL;
726 cl_int warp_size = 0;
728 /* Get the reported warp size. Compile a small dummy kernel to do so */
729 warp_size = ocl_get_warp_size(context, device_id);
731 /* Select vendor specific kernels automatically */
732 if (kernel_vendor_spec == auto_vendor_kernels)
734 kernel_vendor_spec = ocl_autoselect_kernel_from_vendor(ocl_device_vendor);
737 /* Create include paths for kernel sources.
738 All OpenCL kernel files are expected to be stored in one single folder. */
740 std::string ocl_root_path = get_ocl_root_path();
742 char incl_opt_start[] = "-I\"";
743 char incl_opt_end[] = "\"";
746 custom_build_options_append =
747 (char*)calloc((ocl_root_path.length() /* Path to the OpenCL folder */
748 + strlen(incl_opt_start) /* -I" */
749 + strlen(incl_opt_end) /* " */
753 strncpy(&custom_build_options_append[chars], incl_opt_start, strlen(incl_opt_start));
754 chars += strlen(incl_opt_start);
756 strncpy(&custom_build_options_append[chars], ocl_root_path.c_str(), ocl_root_path.length());
757 chars += ocl_root_path.length();
759 strncpy(&custom_build_options_append[chars], incl_opt_end, strlen(incl_opt_end));
762 /* Get vendor specific define (amd,nvidia,nowarp) */
763 const char * kernel_vendor_spec_define =
764 ocl_get_vendor_specific_define(kernel_vendor_spec);
766 /* Compose the build options to be prepended. */
767 sprintf(custom_build_options_prepend,
768 "-DWARP_SIZE_TEST=%d %s %s %s",
770 kernel_vendor_spec_define,
771 defines_for_kernel_types,
772 runtime_consts ? runtime_consts : ""
775 /* Get the size of the complete build options string */
776 size_t build_options_length =
777 create_ocl_build_options_length(
779 custom_build_options_prepend,
780 custom_build_options_append
783 build_options_string = (char *)malloc(build_options_length);
785 /* Compose the complete build options */
786 create_ocl_build_options(
787 build_options_string,
788 build_options_length,
790 custom_build_options_prepend,
791 custom_build_options_append
794 if (custom_build_options_append)
796 free(custom_build_options_append);
799 return build_options_string;
802 /*! \brief Implement caching of OpenCL binaries
804 * \param[in] program Index of program to cache
805 * \param[in] file_name Name of file to use for the cache
808 print_ocl_binaries_to_file(cl_program program, char* file_name)
810 size_t ocl_binary_size = 0;
811 unsigned char *ocl_binary = NULL;
813 clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ocl_binary_size, NULL);
815 ocl_binary = (unsigned char*)malloc(ocl_binary_size);
817 clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &ocl_binary, NULL);
819 FILE *f = fopen(file_name, "wb");
820 fwrite(ocl_binary, 1, ocl_binary_size, f);
826 /*! \brief Compile the kernels as described by kernel src id and vendor spec
828 * \param[in] kernel_source_file Index of the kernel src to be used (default)
829 * \param[in] kernel_vendor_spec Vendor-specific compilation (auto,nvidia,amd,nowarp)
830 * \param[in] defines_for_kernel_types Preprocessor defines that trigger the compilation of the kernels
831 * \param[out] result_str Gromacs error string
832 * \param[in] context Current context on the device to compile for
833 * \param[in] device_id OpenCL device id of the device to compile for
834 * \param[in] ocl_device_vendor Enumerator of the device vendor to compile for
835 * \param[out] p_program Pointer to the cl_program where the compiled
836 * cl_program will be stored
837 * \param[in] runtime_consts Optional string with runtime constants.
838 * Each constant is given according to the following
839 * format: "-Dname=value".
840 * Multiple defines are separated by blanks.
842 * \return cl_int with the build status AND any other OpenCL error appended to it
844 * \todo Consider whether we can parallelize the compilation of all
845 * the kernels by compiling them in separate programs - but since the
846 * resulting programs can't refer to each other, that might lead to
847 * bloat of util code?
849 * \throws std::bad_alloc if out of memory
853 kernel_source_index_t kernel_source_file,
854 kernel_vendor_spec_t kernel_vendor_spec,
855 const char * defines_for_kernel_types,
858 cl_device_id device_id,
859 ocl_vendor_id_t ocl_device_vendor,
860 cl_program * p_program,
861 const char * runtime_consts
864 char * build_options_string = NULL;
865 cl_int cl_error = CL_SUCCESS;
867 char * ocl_source = NULL;
868 size_t ocl_source_length = 0;
869 size_t kernel_filename_len = 0;
871 bool bCacheOclBuild = false;
872 bool bOclCacheValid = false;
874 char ocl_binary_filename[256] = { 0 };
875 size_t ocl_binary_size = 0;
876 unsigned char *ocl_binary = NULL;
878 /* Load OpenCL source files */
880 char* kernel_filename = NULL;
882 /* Get the size of the kernel source filename */
883 kernel_filename_len = get_ocl_kernel_source_file_info(kernel_source_file);
884 if (kernel_filename_len)
886 kernel_filename = (char*)malloc(kernel_filename_len);
889 /* Get the actual full path and name of the source file with the kernels */
890 get_ocl_kernel_source_path(kernel_filename, kernel_source_file, kernel_filename_len);
892 /* Load the above source file and store its contents in ocl_source */
893 ocl_source = load_ocl_source(kernel_filename, &ocl_source_length);
897 sprintf(result_str, "Error loading OpenCL code %s", kernel_filename);
898 return CL_BUILD_PROGRAM_FAILURE;
901 /* The sources are loaded so the filename is not needed anymore */
902 free(kernel_filename);
905 /* Allocate and initialize the string with build options */
906 build_options_string =
907 ocl_get_build_options_string(context, device_id, kernel_vendor_spec,
909 defines_for_kernel_types,
912 /* Check if OpenCL caching is ON - currently caching is disabled
913 until we resolve concurrency issues. */
914 /* bCacheOclBuild = (NULL == getenv("GMX_OCL_NOGENCACHE"));*/
917 clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(ocl_binary_filename), ocl_binary_filename, NULL);
918 strcat(ocl_binary_filename, ".bin");
920 /* Check if there's a valid cache available */
921 bOclCacheValid = check_ocl_cache(ocl_binary_filename,
922 build_options_string,
924 &ocl_binary_size, &ocl_binary);
927 /* Create OpenCL program */
928 if (bCacheOclBuild && bOclCacheValid)
930 /* Create program from pre-built binaries */
932 clCreateProgramWithBinary(
937 (const unsigned char**)&ocl_binary,
943 /* Create program from source code */
945 clCreateProgramWithSource(
948 (const char**)(&ocl_source),
955 cl_int build_status = CL_SUCCESS;
957 /* Now we are ready to launch the build */
959 clBuildProgram(*p_program, 0, NULL, build_options_string, NULL, NULL);
961 if (build_status == CL_SUCCESS)
965 /* If OpenCL caching is ON, but the current cache is not
966 valid => update it */
969 print_ocl_binaries_to_file(*p_program, ocl_binary_filename);
973 if ((OCL_VENDOR_NVIDIA == ocl_device_vendor) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
975 /* If dumping intermediate files has been requested and this is an NVIDIA card
976 => write PTX to file */
977 char ptx_filename[256];
979 clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(ptx_filename), ptx_filename, NULL);
980 strcat(ptx_filename, ".ptx");
982 print_ocl_binaries_to_file(*p_program, ptx_filename);
986 // Get log string size
987 size_t build_log_size = 0;
989 clGetProgramBuildInfo(
992 CL_PROGRAM_BUILD_LOG,
998 /* Regardless of success or failure, if there is something in the log
999 * we might need to display it */
1000 if (build_log_size && (cl_error == CL_SUCCESS) )
1002 char *build_log = NULL;
1004 /* Allocate memory to fit the build log,
1005 it can be very large in case of errors */
1006 build_log = (char*)malloc(build_log_size);
1010 /* Get the actual compilation log */
1012 clGetProgramBuildInfo(
1015 CL_PROGRAM_BUILD_LOG,
1021 /* Save or display the log */
1024 handle_ocl_build_log(
1026 build_options_string,
1032 /* Build_log not needed anymore */
1038 /* Final clean up */
1044 if (build_options_string)
1046 free(build_options_string);
1054 /* Append any other error to the build_status */
1055 return build_status | cl_error;