From ca9c69429d8a1d4ea5cb59f0c962d373424ee20d Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Wed, 29 Jan 2020 14:31:43 +0100 Subject: [PATCH] Make OpenCL DeviceVendor into enum class and move to GPU traits The device context in OpenCL requires the information on vendor when constructed. To prepare for opaque DeviceContext, the vendor enum was moved into OpenCL traits. Refs. #3311, needed for #3315. Change-Id: Iec22ff17543b6a99407048de6e0cd82bb7218fb0 --- .../ewald/pme_gpu_program_impl_ocl.cpp | 4 +- src/gromacs/gpu_utils/gpu_utils_ocl.cpp | 50 +++++++++---------- src/gromacs/gpu_utils/gputraits_ocl.h | 12 ++++- src/gromacs/gpu_utils/ocl_compiler.cpp | 38 +++++++------- src/gromacs/gpu_utils/ocl_compiler.h | 12 +++-- src/gromacs/gpu_utils/oclutils.h | 29 ++++------- src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp | 3 +- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 4 +- .../nbnxm/opencl/nbnxm_ocl_jit_support.cpp | 2 +- src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h | 7 ++- 10 files changed, 81 insertions(+), 80 deletions(-) diff --git a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp index 79ef8b6d21..d17a76256e 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp +++ b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp @@ -112,7 +112,7 @@ PmeGpuProgramImpl::~PmeGpuProgramImpl() */ static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const gmx_device_info_t* deviceInfo) { - if (deviceInfo->vendor_e == OCL_VENDOR_INTEL) + if (deviceInfo->deviceVendor == DeviceVendor::Intel) { size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->ocl_gpu_id.ocl_device_id); @@ -167,7 +167,7 @@ void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo) the log output here should be written there */ program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", commonDefines, context, deviceInfo->ocl_gpu_id.ocl_device_id, - deviceInfo->vendor_e); + deviceInfo->deviceVendor); } catch (gmx::GromacsException& e) { diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp index fd97e17439..8770e6862d 100644 --- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp +++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp @@ -234,12 +234,12 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo) } /* Only AMD, Intel, and NVIDIA GPUs are supported for now */ - switch (devInfo->vendor_e) + switch (devInfo->deviceVendor) { - case OCL_VENDOR_NVIDIA: return egpuCompatible; - case OCL_VENDOR_AMD: + case DeviceVendor::Nvidia: return egpuCompatible; + case DeviceVendor::Amd: return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible; - case OCL_VENDOR_INTEL: + case DeviceVendor::Intel: return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? egpuCompatible : egpuIncompatibleClusterSize; default: return egpuIncompatible; } @@ -279,29 +279,29 @@ static int checkGpu(size_t deviceId, const gmx_device_info_t* deviceInfo) } // namespace gmx -/*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name. +/*! \brief Returns an DeviceVendor value corresponding to the input OpenCL vendor name. * - * \param[in] vendor_name String with OpenCL vendor name. - * \returns ocl_vendor_id_t value for the input vendor_name + * \param[in] vendorName String with OpenCL vendor name. + * \returns DeviceVendor value for the input vendor name */ -static ocl_vendor_id_t get_vendor_id(char* vendor_name) +static DeviceVendor getDeviceVendor(const char* vendorName) { - if (vendor_name) + if (vendorName) { - if (strstr(vendor_name, "NVIDIA")) + if (strstr(vendorName, "NVIDIA")) { - return OCL_VENDOR_NVIDIA; + return DeviceVendor::Nvidia; } - else if (strstr(vendor_name, "AMD") || strstr(vendor_name, "Advanced Micro Devices")) + else if (strstr(vendorName, "AMD") || strstr(vendorName, "Advanced Micro Devices")) { - return OCL_VENDOR_AMD; + return DeviceVendor::Amd; } - else if (strstr(vendor_name, "Intel")) + else if (strstr(vendorName, "Intel")) { - return OCL_VENDOR_INTEL; + return DeviceVendor::Intel; } } - return OCL_VENDOR_UNKNOWN; + return DeviceVendor::Unknown; } bool isGpuDetectionFunctional(std::string* errorMessage) @@ -434,10 +434,10 @@ void findGpus(gmx_gpu_info_t* gpu_info) sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr); - gpu_info->gpu_dev[device_index].device_vendor[0] = 0; + gpu_info->gpu_dev[device_index].vendorName[0] = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, - sizeof(gpu_info->gpu_dev[device_index].device_vendor), - gpu_info->gpu_dev[device_index].device_vendor, nullptr); + sizeof(gpu_info->gpu_dev[device_index].vendorName), + gpu_info->gpu_dev[device_index].vendorName, nullptr); gpu_info->gpu_dev[device_index].compute_units = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, @@ -449,8 +449,8 @@ void findGpus(gmx_gpu_info_t* gpu_info) sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr); - gpu_info->gpu_dev[device_index].vendor_e = - get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor); + gpu_info->gpu_dev[device_index].deviceVendor = + getDeviceVendor(gpu_info->gpu_dev[device_index].vendorName); clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t), &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr); @@ -479,7 +479,7 @@ void findGpus(gmx_gpu_info_t* gpu_info) int last = -1; for (int i = 0; i < gpu_info->n_dev; i++) { - if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e) + if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Amd) { last++; @@ -498,7 +498,7 @@ void findGpus(gmx_gpu_info_t* gpu_info) { for (int i = 0; i < gpu_info->n_dev; i++) { - if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e) + if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Nvidia) { last++; @@ -543,7 +543,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind else { sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index, dinfo->device_name, - dinfo->device_vendor, dinfo->device_version, gpu_detect_res_str[dinfo->stat]); + dinfo->vendorName, dinfo->device_version, gpu_detect_res_str[dinfo->stat]); } } @@ -557,7 +557,7 @@ void init_gpu(const gmx_device_info_t* deviceInfo) // the cache does not always get regenerated when the source code changes, // e.g. if the path to the kernel sources remains the same - if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA) + if (deviceInfo->deviceVendor == DeviceVendor::Nvidia) { // Ignore return values, failing to set the variable does not mean // that something will go wrong later. diff --git a/src/gromacs/gpu_utils/gputraits_ocl.h b/src/gromacs/gpu_utils/gputraits_ocl.h index 865c29967c..c4a421f3a9 100644 --- a/src/gromacs/gpu_utils/gputraits_ocl.h +++ b/src/gromacs/gpu_utils/gputraits_ocl.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -45,6 +45,16 @@ #include "gromacs/gpu_utils/gmxopencl.h" +//! OpenCL device vendors +enum class DeviceVendor : int +{ + Unknown = 0, //!< No data + Nvidia = 1, //!< NVIDIA + Amd = 2, //!< Advanced Micro Devices + Intel = 3, //!< Intel + Count = 4 +}; + //! \brief GPU command stream using CommandStream = cl_command_queue; //! \brief Single GPU call timing event diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp index 07e71d2feb..5ce28b8fd2 100644 --- a/src/gromacs/gpu_utils/ocl_compiler.cpp +++ b/src/gromacs/gpu_utils/ocl_compiler.cpp @@ -155,11 +155,11 @@ static void writeOclBuildLog(FILE* fplog, /*! \brief Construct compiler options string * - * \param deviceVendorId Device vendor id. Used to - * automatically enable some vendor-specific options + * \param deviceVendor Device vendor. Used to automatically enable some + * vendor-specific options. * \return The string with the compiler options */ -static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId) +static std::string selectCompilerOptions(DeviceVendor deviceVendor) { std::string compilerOptions; @@ -179,12 +179,12 @@ static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId) compilerOptions += " -cl-denorms-are-zero"; } - if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE")) + if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE")) { compilerOptions += " -cl-nv-verbose"; } - if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES")) + if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES")) { /* To dump OpenCL build intermediate files, caching must be off */ if (!useBuildCache) @@ -309,21 +309,19 @@ size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId) /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id * - * \param[in] vendorId Vendor id enumerator + * \param[in] deviceVendor Vendor id enumerator * * \return The appropriate compilation-line define */ -static const char* makeVendorFlavorChoice(ocl_vendor_id_t vendorId) +static const std::string makeVendorFlavorChoice(DeviceVendor deviceVendor) { - const char* choice; - switch (vendorId) + switch (deviceVendor) { - case OCL_VENDOR_AMD: choice = "-D_AMD_SOURCE_"; break; - case OCL_VENDOR_NVIDIA: choice = "-D_NVIDIA_SOURCE_"; break; - case OCL_VENDOR_INTEL: choice = "-D_INTEL_SOURCE_"; break; - default: choice = ""; break; + case DeviceVendor::Amd: return "-D_AMD_SOURCE_"; + case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_"; + case DeviceVendor::Intel: return "-D_INTEL_SOURCE_"; + default: return ""; } - return choice; } /*! \brief Create include paths for kernel sources. @@ -380,7 +378,7 @@ static void removeExtraSpaces(std::string* str) static std::string makePreprocessorOptions(const std::string& kernelRootPath, const std::string& includeRootPath, size_t warpSize, - ocl_vendor_id_t deviceVendorId, + DeviceVendor deviceVendor, const std::string& extraDefines) { std::string preprocessorOptions; @@ -388,11 +386,11 @@ static std::string makePreprocessorOptions(const std::string& kernelRootPath, /* Compose the complete build options */ preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast(warpSize)); preprocessorOptions += ' '; - preprocessorOptions += makeVendorFlavorChoice(deviceVendorId); + preprocessorOptions += makeVendorFlavorChoice(deviceVendor); preprocessorOptions += ' '; preprocessorOptions += extraDefines; preprocessorOptions += ' '; - preprocessorOptions += selectCompilerOptions(deviceVendorId); + preprocessorOptions += selectCompilerOptions(deviceVendor); preprocessorOptions += ' '; preprocessorOptions += makeKernelIncludePathOption(kernelRootPath); preprocessorOptions += ' '; @@ -410,7 +408,7 @@ cl_program compileProgram(FILE* fplog, const std::string& extraDefines, cl_context context, cl_device_id deviceId, - ocl_vendor_id_t deviceVendorId) + DeviceVendor deviceVendor) { cl_int cl_error; // Let the kernel find include files from its module. @@ -425,7 +423,7 @@ cl_program compileProgram(FILE* fplog, /* Make the build options */ std::string preprocessorOptions = makePreprocessorOptions( - kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendorId, extraDefines); + kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines); bool buildCacheWasRead = false; @@ -511,7 +509,7 @@ cl_program compileProgram(FILE* fplog, } } } - if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES")) + if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES")) { /* If dumping intermediate files has been requested and this is an NVIDIA card => write PTX to file */ diff --git a/src/gromacs/gpu_utils/ocl_compiler.h b/src/gromacs/gpu_utils/ocl_compiler.h index 06a0354b1e..db6d0db8e3 100644 --- a/src/gromacs/gpu_utils/ocl_compiler.h +++ b/src/gromacs/gpu_utils/ocl_compiler.h @@ -86,10 +86,12 @@ size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId); * \param[in] kernelRelativePath Relative path to the kernel in the source tree, * e.g. "src/gromacs/mdlib/nbnxn_ocl" for NB kernels. * \param[in] kernelBaseFilename The name of the kernel source file to compile, e.g. - * "nbnxn_ocl_kernels.cl" \param[in] extraDefines Preprocessor defines required by the - * calling code, e.g. for configuring the kernels \param[in] context OpenCL context - * on the device to compile for \param[in] deviceId OpenCL device id of the device to - * compile for \param[in] deviceVendorId Enumerator of the device vendor to compile for + * "nbnxn_ocl_kernels.cl" + * \param[in] extraDefines Preprocessor defines required by the calling code, + * e.g. for configuring the kernels + * \param[in] context OpenCL context on the device to compile for + * \param[in] deviceId OpenCL device id of the device to compile for + * \param[in] deviceVendor Enumerator of the device vendor to compile for * * \returns The compiled OpenCL program * @@ -107,7 +109,7 @@ cl_program compileProgram(FILE* fplog, const std::string& extraDefines, cl_context context, cl_device_id deviceId, - ocl_vendor_id_t deviceVendorId); + DeviceVendor deviceVendor); } // namespace ocl } // namespace gmx diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h index f5ec03353b..6ad4de9d48 100644 --- a/src/gromacs/gpu_utils/oclutils.h +++ b/src/gromacs/gpu_utils/oclutils.h @@ -51,15 +51,6 @@ enum class GpuApiCallBehavior; -/*! \brief OpenCL vendor IDs */ -typedef enum -{ - OCL_VENDOR_NVIDIA = 0, - OCL_VENDOR_AMD, - OCL_VENDOR_INTEL, - OCL_VENDOR_UNKNOWN -} ocl_vendor_id_t; - /*! \internal * \brief OpenCL GPU device identificator * @@ -81,16 +72,16 @@ typedef struct */ struct gmx_device_info_t { - ocl_gpu_id_t ocl_gpu_id; /**< device ID assigned at detection */ - char device_name[256]; /**< device name */ - char device_version[256]; /**< device version */ - char device_vendor[256]; /**< device vendor */ - int compute_units; /**< number of compute units */ - int adress_bits; /**< number of adress bits the device is capable of */ - int stat; /**< device status takes values of e_gpu_detect_res_t */ - ocl_vendor_id_t vendor_e; /**< device vendor as defined by ocl_vendor_id_t */ - size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */ - size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */ + ocl_gpu_id_t ocl_gpu_id; /**< device ID assigned at detection */ + char device_name[256]; /**< device name */ + char device_version[256]; /**< device version */ + char vendorName[256]; /**< device vendor */ + int compute_units; /**< number of compute units */ + int adress_bits; /**< number of adress bits the device is capable of */ + int stat; /**< device status takes values of e_gpu_detect_res_t */ + DeviceVendor deviceVendor; /**< device vendor */ + size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */ + size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */ }; /*! \internal diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index 90dbf0a336..d7511d3bd1 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -788,7 +788,8 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c * and j-cluster concurrency, in x, y, and z, respectively. * - The 1D block-grid contains as many blocks as super-clusters. */ - int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->vendor_e); + int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->deviceVendor); + /* kernel launch config */ KernelLaunchConfig config; diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index 2556ea6261..c4df4f1711 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -695,8 +695,8 @@ NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, * TODO: decide about NVIDIA */ nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr) - && ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) - || (nb->dev_info->vendor_e == OCL_VENDOR_INTEL) + && ((nb->dev_info->deviceVendor == DeviceVendor::Amd) + || (nb->dev_info->deviceVendor == DeviceVendor::Intel) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr)); /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here, diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp index 7374d433d3..285d91ef8f 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp @@ -203,7 +203,7 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb) program = gmx::ocl::compileProgram( stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines, nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, - nb->dev_info->vendor_e); + nb->dev_info->deviceVendor); } catch (gmx::GromacsException& e) { diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h index 7ae0526851..d1ce7be20a 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h @@ -78,17 +78,16 @@ const int c_oclPruneKernelJ4ConcurrencyDEFAULT = GMX_NBNXN_PRUNE_KERNEL_J4_CONCU /*! @} */ /*! \brief Returns the j4 processing concurrency parameter for the vendor \p vendorId - * \param vendorId takes values from #ocl_vendor_id_t. + * \param deviceVendor Vendor. */ -static inline int getOclPruneKernelJ4Concurrency(int vendorId) +static inline int getOclPruneKernelJ4Concurrency(DeviceVendor deviceVendor) { - switch (vendorId) + switch (deviceVendor) { default: return c_oclPruneKernelJ4ConcurrencyDEFAULT; } } - /*! \brief Electrostatic OpenCL kernel flavors. * * Types of electrostatics implementations available in the OpenCL non-bonded -- 2.22.0