*/
static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const gmx_device_info_t* deviceInfo)
{
- if (deviceInfo->vendor_e == OCL_VENDOR_INTEL)
+ if (deviceInfo->deviceVendor == DeviceVendor::Intel)
{
size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->ocl_gpu_id.ocl_device_id);
the log output here should be written there */
program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", commonDefines,
context, deviceInfo->ocl_gpu_id.ocl_device_id,
- deviceInfo->vendor_e);
+ deviceInfo->deviceVendor);
}
catch (gmx::GromacsException& e)
{
}
/* Only AMD, Intel, and NVIDIA GPUs are supported for now */
- switch (devInfo->vendor_e)
+ switch (devInfo->deviceVendor)
{
- case OCL_VENDOR_NVIDIA: return egpuCompatible;
- case OCL_VENDOR_AMD:
+ case DeviceVendor::Nvidia: return egpuCompatible;
+ case DeviceVendor::Amd:
return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
- case OCL_VENDOR_INTEL:
+ case DeviceVendor::Intel:
return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? egpuCompatible : egpuIncompatibleClusterSize;
default: return egpuIncompatible;
}
} // namespace gmx
-/*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
+/*! \brief Returns an DeviceVendor value corresponding to the input OpenCL vendor name.
*
- * \param[in] vendor_name String with OpenCL vendor name.
- * \returns ocl_vendor_id_t value for the input vendor_name
+ * \param[in] vendorName String with OpenCL vendor name.
+ * \returns DeviceVendor value for the input vendor name
*/
-static ocl_vendor_id_t get_vendor_id(char* vendor_name)
+static DeviceVendor getDeviceVendor(const char* vendorName)
{
- if (vendor_name)
+ if (vendorName)
{
- if (strstr(vendor_name, "NVIDIA"))
+ if (strstr(vendorName, "NVIDIA"))
{
- return OCL_VENDOR_NVIDIA;
+ return DeviceVendor::Nvidia;
}
- else if (strstr(vendor_name, "AMD") || strstr(vendor_name, "Advanced Micro Devices"))
+ else if (strstr(vendorName, "AMD") || strstr(vendorName, "Advanced Micro Devices"))
{
- return OCL_VENDOR_AMD;
+ return DeviceVendor::Amd;
}
- else if (strstr(vendor_name, "Intel"))
+ else if (strstr(vendorName, "Intel"))
{
- return OCL_VENDOR_INTEL;
+ return DeviceVendor::Intel;
}
}
- return OCL_VENDOR_UNKNOWN;
+ return DeviceVendor::Unknown;
}
bool isGpuDetectionFunctional(std::string* errorMessage)
sizeof(gpu_info->gpu_dev[device_index].device_version),
gpu_info->gpu_dev[device_index].device_version, nullptr);
- gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
+ gpu_info->gpu_dev[device_index].vendorName[0] = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR,
- sizeof(gpu_info->gpu_dev[device_index].device_vendor),
- gpu_info->gpu_dev[device_index].device_vendor, nullptr);
+ sizeof(gpu_info->gpu_dev[device_index].vendorName),
+ gpu_info->gpu_dev[device_index].vendorName, nullptr);
gpu_info->gpu_dev[device_index].compute_units = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(gpu_info->gpu_dev[device_index].adress_bits),
&(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
- gpu_info->gpu_dev[device_index].vendor_e =
- get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
+ gpu_info->gpu_dev[device_index].deviceVendor =
+ getDeviceVendor(gpu_info->gpu_dev[device_index].vendorName);
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t),
&gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr);
int last = -1;
for (int i = 0; i < gpu_info->n_dev; i++)
{
- if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
+ if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Amd)
{
last++;
{
for (int i = 0; i < gpu_info->n_dev; i++)
{
- if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
+ if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Nvidia)
{
last++;
else
{
sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index, dinfo->device_name,
- dinfo->device_vendor, dinfo->device_version, gpu_detect_res_str[dinfo->stat]);
+ dinfo->vendorName, dinfo->device_version, gpu_detect_res_str[dinfo->stat]);
}
}
// the cache does not always get regenerated when the source code changes,
// e.g. if the path to the kernel sources remains the same
- if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
+ if (deviceInfo->deviceVendor == DeviceVendor::Nvidia)
{
// Ignore return values, failing to set the variable does not mean
// that something will go wrong later.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/gpu_utils/gmxopencl.h"
+//! OpenCL device vendors
+enum class DeviceVendor : int
+{
+ Unknown = 0, //!< No data
+ Nvidia = 1, //!< NVIDIA
+ Amd = 2, //!< Advanced Micro Devices
+ Intel = 3, //!< Intel
+ Count = 4
+};
+
//! \brief GPU command stream
using CommandStream = cl_command_queue;
//! \brief Single GPU call timing event
/*! \brief Construct compiler options string
*
- * \param deviceVendorId Device vendor id. Used to
- * automatically enable some vendor-specific options
+ * \param deviceVendor Device vendor. Used to automatically enable some
+ * vendor-specific options.
* \return The string with the compiler options
*/
-static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
+static std::string selectCompilerOptions(DeviceVendor deviceVendor)
{
std::string compilerOptions;
compilerOptions += " -cl-denorms-are-zero";
}
- if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
+ if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
{
compilerOptions += " -cl-nv-verbose";
}
- if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+ if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
{
/* To dump OpenCL build intermediate files, caching must be off */
if (!useBuildCache)
/*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
*
- * \param[in] vendorId Vendor id enumerator
+ * \param[in] deviceVendor Vendor id enumerator
*
* \return The appropriate compilation-line define
*/
-static const char* makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
+static const std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
{
- const char* choice;
- switch (vendorId)
+ switch (deviceVendor)
{
- case OCL_VENDOR_AMD: choice = "-D_AMD_SOURCE_"; break;
- case OCL_VENDOR_NVIDIA: choice = "-D_NVIDIA_SOURCE_"; break;
- case OCL_VENDOR_INTEL: choice = "-D_INTEL_SOURCE_"; break;
- default: choice = ""; break;
+ case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
+ case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
+ case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
+ default: return "";
}
- return choice;
}
/*! \brief Create include paths for kernel sources.
static std::string makePreprocessorOptions(const std::string& kernelRootPath,
const std::string& includeRootPath,
size_t warpSize,
- ocl_vendor_id_t deviceVendorId,
+ DeviceVendor deviceVendor,
const std::string& extraDefines)
{
std::string preprocessorOptions;
/* Compose the complete build options */
preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
preprocessorOptions += ' ';
- preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
+ preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
preprocessorOptions += ' ';
preprocessorOptions += extraDefines;
preprocessorOptions += ' ';
- preprocessorOptions += selectCompilerOptions(deviceVendorId);
+ preprocessorOptions += selectCompilerOptions(deviceVendor);
preprocessorOptions += ' ';
preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
preprocessorOptions += ' ';
const std::string& extraDefines,
cl_context context,
cl_device_id deviceId,
- ocl_vendor_id_t deviceVendorId)
+ DeviceVendor deviceVendor)
{
cl_int cl_error;
// Let the kernel find include files from its module.
/* Make the build options */
std::string preprocessorOptions = makePreprocessorOptions(
- kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendorId, extraDefines);
+ kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
bool buildCacheWasRead = false;
}
}
}
- if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+ if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
{
/* If dumping intermediate files has been requested and this is an NVIDIA card
=> write PTX to file */
* \param[in] kernelRelativePath Relative path to the kernel in the source tree,
* e.g. "src/gromacs/mdlib/nbnxn_ocl" for NB kernels.
* \param[in] kernelBaseFilename The name of the kernel source file to compile, e.g.
- * "nbnxn_ocl_kernels.cl" \param[in] extraDefines Preprocessor defines required by the
- * calling code, e.g. for configuring the kernels \param[in] context OpenCL context
- * on the device to compile for \param[in] deviceId OpenCL device id of the device to
- * compile for \param[in] deviceVendorId Enumerator of the device vendor to compile for
+ * "nbnxn_ocl_kernels.cl"
+ * \param[in] extraDefines Preprocessor defines required by the calling code,
+ * e.g. for configuring the kernels
+ * \param[in] context OpenCL context on the device to compile for
+ * \param[in] deviceId OpenCL device id of the device to compile for
+ * \param[in] deviceVendor Enumerator of the device vendor to compile for
*
* \returns The compiled OpenCL program
*
const std::string& extraDefines,
cl_context context,
cl_device_id deviceId,
- ocl_vendor_id_t deviceVendorId);
+ DeviceVendor deviceVendor);
} // namespace ocl
} // namespace gmx
enum class GpuApiCallBehavior;
-/*! \brief OpenCL vendor IDs */
-typedef enum
-{
- OCL_VENDOR_NVIDIA = 0,
- OCL_VENDOR_AMD,
- OCL_VENDOR_INTEL,
- OCL_VENDOR_UNKNOWN
-} ocl_vendor_id_t;
-
/*! \internal
* \brief OpenCL GPU device identificator
*
*/
struct gmx_device_info_t
{
- ocl_gpu_id_t ocl_gpu_id; /**< device ID assigned at detection */
- char device_name[256]; /**< device name */
- char device_version[256]; /**< device version */
- char device_vendor[256]; /**< device vendor */
- int compute_units; /**< number of compute units */
- int adress_bits; /**< number of adress bits the device is capable of */
- int stat; /**< device status takes values of e_gpu_detect_res_t */
- ocl_vendor_id_t vendor_e; /**< device vendor as defined by ocl_vendor_id_t */
- size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */
- size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */
+ ocl_gpu_id_t ocl_gpu_id; /**< device ID assigned at detection */
+ char device_name[256]; /**< device name */
+ char device_version[256]; /**< device version */
+ char vendorName[256]; /**< device vendor */
+ int compute_units; /**< number of compute units */
+ int adress_bits; /**< number of adress bits the device is capable of */
+ int stat; /**< device status takes values of e_gpu_detect_res_t */
+ DeviceVendor deviceVendor; /**< device vendor */
+ size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */
+ size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */
};
/*! \internal
* and j-cluster concurrency, in x, y, and z, respectively.
* - The 1D block-grid contains as many blocks as super-clusters.
*/
- int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->vendor_e);
+ int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->deviceVendor);
+
/* kernel launch config */
KernelLaunchConfig config;
* TODO: decide about NVIDIA
*/
nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
- && ((nb->dev_info->vendor_e == OCL_VENDOR_AMD)
- || (nb->dev_info->vendor_e == OCL_VENDOR_INTEL)
+ && ((nb->dev_info->deviceVendor == DeviceVendor::Amd)
+ || (nb->dev_info->deviceVendor == DeviceVendor::Intel)
|| (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
/* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
program = gmx::ocl::compileProgram(
stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id,
- nb->dev_info->vendor_e);
+ nb->dev_info->deviceVendor);
}
catch (gmx::GromacsException& e)
{
/*! @} */
/*! \brief Returns the j4 processing concurrency parameter for the vendor \p vendorId
- * \param vendorId takes values from #ocl_vendor_id_t.
+ * \param deviceVendor Vendor.
*/
-static inline int getOclPruneKernelJ4Concurrency(int vendorId)
+static inline int getOclPruneKernelJ4Concurrency(DeviceVendor deviceVendor)
{
- switch (vendorId)
+ switch (deviceVendor)
{
default: return c_oclPruneKernelJ4ConcurrencyDEFAULT;
}
}
-
/*! \brief Electrostatic OpenCL kernel flavors.
*
* Types of electrostatics implementations available in the OpenCL non-bonded