Make OpenCL DeviceVendor into enum class and move to GPU traits
authorArtem Zhmurov <zhmurov@gmail.com>
Wed, 29 Jan 2020 13:31:43 +0000 (14:31 +0100)
committerArtem Zhmurov <zhmurov@gmail.com>
Wed, 5 Feb 2020 23:24:11 +0000 (00:24 +0100)
The device context in OpenCL requires the information on vendor when
constructed. To prepare for opaque DeviceContext, the vendor
enum was moved into OpenCL traits.

Refs. #3311, needed for #3315.

Change-Id: Iec22ff17543b6a99407048de6e0cd82bb7218fb0

src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
src/gromacs/gpu_utils/gpu_utils_ocl.cpp
src/gromacs/gpu_utils/gputraits_ocl.h
src/gromacs/gpu_utils/ocl_compiler.cpp
src/gromacs/gpu_utils/ocl_compiler.h
src/gromacs/gpu_utils/oclutils.h
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index 79ef8b6d2103fb7678fa753b86ec962a96ec943d..d17a76256eea5bb1071420e81c2e63f2fb44d236 100644 (file)
@@ -112,7 +112,7 @@ PmeGpuProgramImpl::~PmeGpuProgramImpl()
  */
 static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const gmx_device_info_t* deviceInfo)
 {
-    if (deviceInfo->vendor_e == OCL_VENDOR_INTEL)
+    if (deviceInfo->deviceVendor == DeviceVendor::Intel)
     {
         size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->ocl_gpu_id.ocl_device_id);
 
@@ -167,7 +167,7 @@ void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo)
                the log output here should be written there */
             program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", commonDefines,
                                                context, deviceInfo->ocl_gpu_id.ocl_device_id,
-                                               deviceInfo->vendor_e);
+                                               deviceInfo->deviceVendor);
         }
         catch (gmx::GromacsException& e)
         {
index fd97e174391f719fd64119d1f6295af8c2af1b8b..8770e6862d1637337af772388396e4c65b14d42e 100644 (file)
@@ -234,12 +234,12 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo)
     }
 
     /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
-    switch (devInfo->vendor_e)
+    switch (devInfo->deviceVendor)
     {
-        case OCL_VENDOR_NVIDIA: return egpuCompatible;
-        case OCL_VENDOR_AMD:
+        case DeviceVendor::Nvidia: return egpuCompatible;
+        case DeviceVendor::Amd:
             return runningOnCompatibleOSForAmd() ? egpuCompatible : egpuIncompatible;
-        case OCL_VENDOR_INTEL:
+        case DeviceVendor::Intel:
             return GMX_OPENCL_NB_CLUSTER_SIZE == 4 ? egpuCompatible : egpuIncompatibleClusterSize;
         default: return egpuIncompatible;
     }
@@ -279,29 +279,29 @@ static int checkGpu(size_t deviceId, const gmx_device_info_t* deviceInfo)
 
 } // namespace gmx
 
-/*! \brief Returns an ocl_vendor_id_t value corresponding to the input OpenCL vendor name.
+/*! \brief Returns an DeviceVendor value corresponding to the input OpenCL vendor name.
  *
- *  \param[in] vendor_name String with OpenCL vendor name.
- *  \returns               ocl_vendor_id_t value for the input vendor_name
+ *  \param[in] vendorName  String with OpenCL vendor name.
+ *  \returns               DeviceVendor value for the input vendor name
  */
-static ocl_vendor_id_t get_vendor_id(char* vendor_name)
+static DeviceVendor getDeviceVendor(const char* vendorName)
 {
-    if (vendor_name)
+    if (vendorName)
     {
-        if (strstr(vendor_name, "NVIDIA"))
+        if (strstr(vendorName, "NVIDIA"))
         {
-            return OCL_VENDOR_NVIDIA;
+            return DeviceVendor::Nvidia;
         }
-        else if (strstr(vendor_name, "AMD") || strstr(vendor_name, "Advanced Micro Devices"))
+        else if (strstr(vendorName, "AMD") || strstr(vendorName, "Advanced Micro Devices"))
         {
-            return OCL_VENDOR_AMD;
+            return DeviceVendor::Amd;
         }
-        else if (strstr(vendor_name, "Intel"))
+        else if (strstr(vendorName, "Intel"))
         {
-            return OCL_VENDOR_INTEL;
+            return DeviceVendor::Intel;
         }
     }
-    return OCL_VENDOR_UNKNOWN;
+    return DeviceVendor::Unknown;
 }
 
 bool isGpuDetectionFunctional(std::string* errorMessage)
@@ -434,10 +434,10 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                                     sizeof(gpu_info->gpu_dev[device_index].device_version),
                                     gpu_info->gpu_dev[device_index].device_version, nullptr);
 
-                    gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
+                    gpu_info->gpu_dev[device_index].vendorName[0] = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR,
-                                    sizeof(gpu_info->gpu_dev[device_index].device_vendor),
-                                    gpu_info->gpu_dev[device_index].device_vendor, nullptr);
+                                    sizeof(gpu_info->gpu_dev[device_index].vendorName),
+                                    gpu_info->gpu_dev[device_index].vendorName, nullptr);
 
                     gpu_info->gpu_dev[device_index].compute_units = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS,
@@ -449,8 +449,8 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                                     sizeof(gpu_info->gpu_dev[device_index].adress_bits),
                                     &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
 
-                    gpu_info->gpu_dev[device_index].vendor_e =
-                            get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);
+                    gpu_info->gpu_dev[device_index].deviceVendor =
+                            getDeviceVendor(gpu_info->gpu_dev[device_index].vendorName);
 
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t),
                                     &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr);
@@ -479,7 +479,7 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                 int last = -1;
                 for (int i = 0; i < gpu_info->n_dev; i++)
                 {
-                    if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
+                    if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Amd)
                     {
                         last++;
 
@@ -498,7 +498,7 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                 {
                     for (int i = 0; i < gpu_info->n_dev; i++)
                     {
-                        if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
+                        if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Nvidia)
                         {
                             last++;
 
@@ -543,7 +543,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind
     else
     {
         sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index, dinfo->device_name,
-                dinfo->device_vendor, dinfo->device_version, gpu_detect_res_str[dinfo->stat]);
+                dinfo->vendorName, dinfo->device_version, gpu_detect_res_str[dinfo->stat]);
     }
 }
 
@@ -557,7 +557,7 @@ void init_gpu(const gmx_device_info_t* deviceInfo)
     // the cache does not always get regenerated when the source code changes,
     // e.g. if the path to the kernel sources remains the same
 
-    if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
+    if (deviceInfo->deviceVendor == DeviceVendor::Nvidia)
     {
         // Ignore return values, failing to set the variable does not mean
         // that something will go wrong later.
index 865c29967c56c9a8911d86f4c80d73da5bf1a490..c4a421f3a9fa001e02944995cad377ab714493a2 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include "gromacs/gpu_utils/gmxopencl.h"
 
+//! OpenCL device vendors
+enum class DeviceVendor : int
+{
+    Unknown = 0, //!< No data
+    Nvidia  = 1, //!< NVIDIA
+    Amd     = 2, //!< Advanced Micro Devices
+    Intel   = 3, //!< Intel
+    Count   = 4
+};
+
 //! \brief GPU command stream
 using CommandStream = cl_command_queue;
 //! \brief Single GPU call timing event
index 07e71d2febc0ca1e90b91b89b5815de5a7162e4a..5ce28b8fd2bff832410e3dcaa3485a726f230cd5 100644 (file)
@@ -155,11 +155,11 @@ static void writeOclBuildLog(FILE*              fplog,
 
 /*! \brief Construct compiler options string
  *
- * \param deviceVendorId  Device vendor id. Used to
- *          automatically enable some vendor-specific options
+ * \param deviceVendor  Device vendor. Used to automatically enable some
+ *                      vendor-specific options.
  * \return The string with the compiler options
  */
-static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
+static std::string selectCompilerOptions(DeviceVendor deviceVendor)
 {
     std::string compilerOptions;
 
@@ -179,12 +179,12 @@ static std::string selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
         compilerOptions += " -cl-denorms-are-zero";
     }
 
-    if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
+    if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_VERBOSE"))
     {
         compilerOptions += " -cl-nv-verbose";
     }
 
-    if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendor == DeviceVendor::Amd) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
     {
         /* To dump OpenCL build intermediate files, caching must be off */
         if (!useBuildCache)
@@ -309,21 +309,19 @@ size_t getDeviceWarpSize(cl_context context, cl_device_id deviceId)
 
 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
  *
- * \param[in] vendorId Vendor id enumerator
+ * \param[in] deviceVendor Vendor id enumerator
  *
  * \return The appropriate compilation-line define
  */
-static const char* makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
+static const std::string makeVendorFlavorChoice(DeviceVendor deviceVendor)
 {
-    const char* choice;
-    switch (vendorId)
+    switch (deviceVendor)
     {
-        case OCL_VENDOR_AMD: choice = "-D_AMD_SOURCE_"; break;
-        case OCL_VENDOR_NVIDIA: choice = "-D_NVIDIA_SOURCE_"; break;
-        case OCL_VENDOR_INTEL: choice = "-D_INTEL_SOURCE_"; break;
-        default: choice = ""; break;
+        case DeviceVendor::Amd: return "-D_AMD_SOURCE_";
+        case DeviceVendor::Nvidia: return "-D_NVIDIA_SOURCE_";
+        case DeviceVendor::Intel: return "-D_INTEL_SOURCE_";
+        default: return "";
     }
-    return choice;
 }
 
 /*! \brief Create include paths for kernel sources.
@@ -380,7 +378,7 @@ static void removeExtraSpaces(std::string* str)
 static std::string makePreprocessorOptions(const std::string& kernelRootPath,
                                            const std::string& includeRootPath,
                                            size_t             warpSize,
-                                           ocl_vendor_id_t    deviceVendorId,
+                                           DeviceVendor       deviceVendor,
                                            const std::string& extraDefines)
 {
     std::string preprocessorOptions;
@@ -388,11 +386,11 @@ static std::string makePreprocessorOptions(const std::string& kernelRootPath,
     /* Compose the complete build options */
     preprocessorOptions = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
     preprocessorOptions += ' ';
-    preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
+    preprocessorOptions += makeVendorFlavorChoice(deviceVendor);
     preprocessorOptions += ' ';
     preprocessorOptions += extraDefines;
     preprocessorOptions += ' ';
-    preprocessorOptions += selectCompilerOptions(deviceVendorId);
+    preprocessorOptions += selectCompilerOptions(deviceVendor);
     preprocessorOptions += ' ';
     preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
     preprocessorOptions += ' ';
@@ -410,7 +408,7 @@ cl_program compileProgram(FILE*              fplog,
                           const std::string& extraDefines,
                           cl_context         context,
                           cl_device_id       deviceId,
-                          ocl_vendor_id_t    deviceVendorId)
+                          DeviceVendor       deviceVendor)
 {
     cl_int cl_error;
     // Let the kernel find include files from its module.
@@ -425,7 +423,7 @@ cl_program compileProgram(FILE*              fplog,
 
     /* Make the build options */
     std::string preprocessorOptions = makePreprocessorOptions(
-            kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendorId, extraDefines);
+            kernelRootPath, rootPath, getDeviceWarpSize(context, deviceId), deviceVendor, extraDefines);
 
     bool buildCacheWasRead = false;
 
@@ -511,7 +509,7 @@ cl_program compileProgram(FILE*              fplog,
             }
         }
     }
-    if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
+    if ((deviceVendor == DeviceVendor::Nvidia) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
     {
         /* If dumping intermediate files has been requested and this is an NVIDIA card
            => write PTX to file */
index 06a0354b1ea0e7437aa9ce7a5e997d3470691ede..db6d0db8e3c7aa8d4f0d80c3d44183f6eff3ee4d 100644 (file)
@@ -86,10 +86,12 @@ size_t getKernelWarpSize(cl_kernel kernel, cl_device_id deviceId);
  * \param[in]  kernelRelativePath    Relative path to the kernel in the source tree,
  *                                   e.g. "src/gromacs/mdlib/nbnxn_ocl" for NB kernels.
  * \param[in]  kernelBaseFilename    The name of the kernel source file to compile, e.g.
- * "nbnxn_ocl_kernels.cl" \param[in]  extraDefines          Preprocessor defines required by the
- * calling code, e.g. for configuring the kernels \param[in]  context               OpenCL context
- * on the device to compile for \param[in]  deviceId              OpenCL device id of the device to
- * compile for \param[in]  deviceVendorId        Enumerator of the device vendor to compile for
+ *                                   "nbnxn_ocl_kernels.cl"
+ * \param[in]  extraDefines          Preprocessor defines required by the calling code,
+ *                                   e.g. for configuring the kernels
+ * \param[in]  context               OpenCL context on the device to compile for
+ * \param[in]  deviceId              OpenCL device id of the device to compile for
+ * \param[in]  deviceVendor          Enumerator of the device vendor to compile for
  *
  * \returns The compiled OpenCL program
  *
@@ -107,7 +109,7 @@ cl_program compileProgram(FILE*              fplog,
                           const std::string& extraDefines,
                           cl_context         context,
                           cl_device_id       deviceId,
-                          ocl_vendor_id_t    deviceVendorId);
+                          DeviceVendor       deviceVendor);
 
 } // namespace ocl
 } // namespace gmx
index f5ec03353b5e3d2bfd5e051946ba7a966789f965..6ad4de9d485af9e48deebf64b212cdcd0e217122 100644 (file)
 
 enum class GpuApiCallBehavior;
 
-/*! \brief OpenCL vendor IDs */
-typedef enum
-{
-    OCL_VENDOR_NVIDIA = 0,
-    OCL_VENDOR_AMD,
-    OCL_VENDOR_INTEL,
-    OCL_VENDOR_UNKNOWN
-} ocl_vendor_id_t;
-
 /*! \internal
  * \brief OpenCL GPU device identificator
  *
@@ -81,16 +72,16 @@ typedef struct
  */
 struct gmx_device_info_t
 {
-    ocl_gpu_id_t    ocl_gpu_id;          /**< device ID assigned at detection   */
-    char            device_name[256];    /**< device name */
-    char            device_version[256]; /**< device version */
-    char            device_vendor[256];  /**< device vendor */
-    int             compute_units;       /**< number of compute units */
-    int             adress_bits;         /**< number of adress bits the device is capable of */
-    int             stat;                /**< device status takes values of e_gpu_detect_res_t */
-    ocl_vendor_id_t vendor_e;            /**< device vendor as defined by ocl_vendor_id_t */
-    size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */
-    size_t maxWorkGroupSize;    /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */
+    ocl_gpu_id_t ocl_gpu_id;          /**< device ID assigned at detection   */
+    char         device_name[256];    /**< device name */
+    char         device_version[256]; /**< device version */
+    char         vendorName[256];     /**< device vendor */
+    int          compute_units;       /**< number of compute units */
+    int          adress_bits;         /**< number of adress bits the device is capable of */
+    int          stat;                /**< device status takes values of e_gpu_detect_res_t */
+    DeviceVendor deviceVendor;        /**< device vendor */
+    size_t       maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */
+    size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */
 };
 
 /*! \internal
index 90dbf0a3362ae1fcf133b703b19f737cfa0e4a5a..d7511d3bd18462ed5750871afe546ec7fad5e181 100644 (file)
@@ -788,7 +788,8 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      *   and j-cluster concurrency, in x, y, and z, respectively.
      * - The 1D block-grid contains as many blocks as super-clusters.
      */
-    int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->vendor_e);
+    int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->deviceVendor);
+
 
     /* kernel launch config */
     KernelLaunchConfig config;
index 2556ea62618e7d3bc224062c4ad4d88ff1cb3d58..c4df4f17115cddfa9d8f727edb03e2680aa22ff1 100644 (file)
@@ -695,8 +695,8 @@ NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
      * TODO: decide about NVIDIA
      */
     nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
-                           && ((nb->dev_info->vendor_e == OCL_VENDOR_AMD)
-                               || (nb->dev_info->vendor_e == OCL_VENDOR_INTEL)
+                           && ((nb->dev_info->deviceVendor == DeviceVendor::Amd)
+                               || (nb->dev_info->deviceVendor == DeviceVendor::Intel)
                                || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
 
     /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
index 7374d433d37b7cc01941fc027726d5592c539c56..285d91ef8fd0817d3baf856fc190f25c5a342a3e 100644 (file)
@@ -203,7 +203,7 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
             program = gmx::ocl::compileProgram(
                     stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
                     nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id,
-                    nb->dev_info->vendor_e);
+                    nb->dev_info->deviceVendor);
         }
         catch (gmx::GromacsException& e)
         {
index 7ae05268514cfc733be7bda920b25959b8538214..d1ce7be20afdbd834dfc863b419035773af231cb 100644 (file)
@@ -78,17 +78,16 @@ const int c_oclPruneKernelJ4ConcurrencyDEFAULT = GMX_NBNXN_PRUNE_KERNEL_J4_CONCU
 /*! @} */
 
 /*! \brief Returns the j4 processing concurrency parameter for the vendor \p vendorId
- *  \param vendorId takes values from #ocl_vendor_id_t.
+ *  \param deviceVendor Vendor.
  */
-static inline int getOclPruneKernelJ4Concurrency(int vendorId)
+static inline int getOclPruneKernelJ4Concurrency(DeviceVendor deviceVendor)
 {
-    switch (vendorId)
+    switch (deviceVendor)
     {
         default: return c_oclPruneKernelJ4ConcurrencyDEFAULT;
     }
 }
 
-
 /*! \brief Electrostatic OpenCL kernel flavors.
  *
  *  Types of electrostatics implementations available in the OpenCL non-bonded