Move DeviceInfo into GPU traits
authorArtem Zhmurov <zhmurov@gmail.com>
Wed, 29 Jan 2020 14:45:59 +0000 (15:45 +0100)
committerArtem Zhmurov <zhmurov@gmail.com>
Thu, 6 Feb 2020 02:49:40 +0000 (03:49 +0100)
The DeviceInfo is needed upon construction of DeviceContext. To
prepare for opaque DeviceContext type, it is moved to GPU traits
and renamed according to the common naming scheme.

Refs. #3311, needed for #3315.

Change-Id: I2a9f1d932f142d645df75901521a734d208de509

42 files changed:
docs/OpenCLTODOList.txt
src/gromacs/ewald/pme.cpp
src/gromacs/ewald/pme.h
src/gromacs/ewald/pme_gpu_internal.cpp
src/gromacs/ewald/pme_gpu_internal.h
src/gromacs/ewald/pme_gpu_program.cpp
src/gromacs/ewald/pme_gpu_program.h
src/gromacs/ewald/pme_gpu_program_impl.cpp
src/gromacs/ewald/pme_gpu_program_impl.cu
src/gromacs/ewald/pme_gpu_program_impl.h
src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp
src/gromacs/ewald/pme_gpu_types_host.h
src/gromacs/ewald/tests/pmetestcommon.cpp
src/gromacs/ewald/tests/pmetestcommon.h
src/gromacs/ewald/tests/testhardwarecontexts.cpp
src/gromacs/ewald/tests/testhardwarecontexts.h
src/gromacs/gpu_utils/cudautils.cuh
src/gromacs/gpu_utils/gpu_utils.cpp
src/gromacs/gpu_utils/gpu_utils.cu
src/gromacs/gpu_utils/gpu_utils.h
src/gromacs/gpu_utils/gpu_utils_ocl.cpp
src/gromacs/gpu_utils/gputraits.cuh
src/gromacs/gpu_utils/gputraits.h
src/gromacs/gpu_utils/gputraits_ocl.h
src/gromacs/gpu_utils/oclutils.h
src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp
src/gromacs/hardware/detecthardware.cpp
src/gromacs/hardware/gpu_hw_info.h
src/gromacs/mdlib/forcerec.h
src/gromacs/mdrun/runner.cpp
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
src/gromacs/nbnxm/gpu_data_mgmt.h
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
src/gromacs/taskassignment/taskassignment.cpp
src/gromacs/taskassignment/taskassignment.h

index 34b71b439b990ba6d9383ba9172ea7f5cd66b6da..2864e911a1ce1367b28b7f4334eee85131ae7943 100644 (file)
@@ -32,8 +32,6 @@ TABLE OF CONTENTS
 
 - Quite a few error conditions are unhandled, noted with TODOs in several files
 
-- gmx_device_info_t needs struct field documentation
-
 3. ENHANCEMENTS
    ============
 - Implement OpenCL kernels for Intel GPUs
index d6cd44b346b0708f15cdc3588aebf3fbf3d46d14..475453973ef52d58ce0ce2dc8d47749a15500b25 100644 (file)
@@ -570,7 +570,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
                         int                      nthread,
                         PmeRunMode               runMode,
                         PmeGpu*                  pmeGpu,
-                        const gmx_device_info_t* gpuInfo,
+                        const DeviceInformation* deviceInfo,
                         const PmeGpuProgram*     pmeGpuProgram,
                         const gmx::MDLogger& /*mdlog*/)
 {
@@ -883,7 +883,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
             GMX_THROW(gmx::NotImplementedError(errorString));
         }
     }
-    pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram);
+    pme_gpu_reinit(pme.get(), deviceInfo, pmeGpuProgram);
 
     pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx);
 
index a471e93e2e32126351a6b451afe0ad3f2bd5ad17..edbe2835230fb635368bfa04af33cce0a5215b48 100644 (file)
@@ -65,7 +65,7 @@ struct t_inputrec;
 struct t_nrnb;
 struct PmeGpu;
 struct gmx_wallclock_gpu_pme_t;
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_enerdata_t;
 struct gmx_mtop_t;
 struct gmx_pme_t;
@@ -139,7 +139,7 @@ bool gmx_pme_check_restrictions(int  pme_order,
  * \returns  Pointer to newly allocated and initialized PME data.
  *
  * \todo We should evolve something like a \c GpuManager that holds \c
- * gmx_device_info_t * and \c PmeGpuProgram* and perhaps other
+ * DeviceInformation* and \c PmeGpuProgram* and perhaps other
  * related things whose lifetime can/should exceed that of a task (or
  * perhaps task manager). See Redmine #2522.
  */
@@ -154,7 +154,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
                         int                      nthread,
                         PmeRunMode               runMode,
                         PmeGpu*                  pmeGpu,
-                        const gmx_device_info_t* gpuInfo,
+                        const DeviceInformation* deviceInfo,
                         const PmeGpuProgram*     pmeGpuProgram,
                         const gmx::MDLogger&     mdlog);
 
index e45f07f0fe3a862cbdcc7acb121d18baaffdfe8f..9c984025256fcfc96c78e9d8357c63d7d0ddeef1 100644 (file)
@@ -535,7 +535,7 @@ void pme_gpu_init_internal(PmeGpu* pmeGpu)
 #elif GMX_GPU == GMX_GPU_OPENCL
     cl_command_queue_properties queueProperties =
             pmeGpu->archSpecific->useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
-    cl_device_id device_id = pmeGpu->deviceInfo->ocl_gpu_id.ocl_device_id;
+    cl_device_id device_id = pmeGpu->deviceInfo->oclDeviceId;
     cl_int       clError;
     pmeGpu->archSpecific->pmeStream =
             clCreateCommandQueue(pmeGpu->archSpecific->context, device_id, queueProperties, &clError);
@@ -819,10 +819,10 @@ static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeG
  * TODO: this should become PmeGpu::PmeGpu()
  *
  * \param[in,out] pme            The PME structure.
- * \param[in,out] gpuInfo        The GPU information structure.
+ * \param[in,out] deviceInfo     The GPU device information structure.
  * \param[in]     pmeGpuProgram  The handle to the program/kernel data created outside (e.g. in unit tests/runner)
  */
-static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
+static void pme_gpu_init(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram)
 {
     pme->gpu       = new PmeGpu();
     PmeGpu* pmeGpu = pme->gpu;
@@ -839,7 +839,7 @@ static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const
 
     pme_gpu_set_testing(pmeGpu, false);
 
-    pmeGpu->deviceInfo = gpuInfo;
+    pmeGpu->deviceInfo = deviceInfo;
     GMX_ASSERT(pmeGpuProgram != nullptr, "GPU kernels must be already compiled");
     pmeGpu->programHandle_ = pmeGpuProgram;
 
@@ -930,7 +930,7 @@ void pme_gpu_get_real_grid_sizes(const PmeGpu* pmeGpu, gmx::IVec* gridSize, gmx:
     }
 }
 
-void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
+void pme_gpu_reinit(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram)
 {
     GMX_ASSERT(pme != nullptr, "Need valid PME object");
     if (pme->runMode == PmeRunMode::CPU)
@@ -942,7 +942,7 @@ void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeG
     if (!pme->gpu)
     {
         /* First-time initialization */
-        pme_gpu_init(pme, gpuInfo, pmeGpuProgram);
+        pme_gpu_init(pme, deviceInfo, pmeGpuProgram);
     }
     else
     {
index 2cd1f862602d897b1ad8ce781c07957ef06d923c..cc7e9d1f342556add51c16ae31f880f1c82fd1e7 100644 (file)
@@ -55,7 +55,7 @@
 #include "pme_output.h"
 
 class GpuEventSynchronizer;
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_hw_info_t;
 struct gmx_gpu_opt_t;
 struct gmx_pme_t; // only used in pme_gpu_reinit
@@ -562,12 +562,12 @@ GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUM
  * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
  *
  * \param[in,out] pme             The PME structure.
- * \param[in]     gpuInfo         The GPU information structure.
+ * \param[in]     deviceInfo      The GPU device information structure.
  * \param[in]     pmeGpuProgram   The PME GPU program data
  * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
  */
 GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t*               GPU_FUNC_ARGUMENT(pme),
-                                       const gmx_device_info_t* GPU_FUNC_ARGUMENT(gpuInfo),
+                                       const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo),
                                        const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
 
 /*! \libinternal \brief
index 5227eca063f712fce3c52087dd421e03452cce47..ccb54946182d1624e05e07c4d904872049e5f9ec 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include "pme_gpu_program_impl.h"
 
-PmeGpuProgram::PmeGpuProgram(const gmx_device_info_t* deviceInfo) :
+PmeGpuProgram::PmeGpuProgram(const DeviceInformation* deviceInfo) :
     impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo))
 {
 }
 
 PmeGpuProgram::~PmeGpuProgram() = default;
 
-PmeGpuProgramStorage buildPmeGpuProgram(const gmx_device_info_t* deviceInfo)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* deviceInfo)
 {
     if (!deviceInfo)
     {
index e9e084bf1e58fadb94ded5c5ac599ae3ac1780fd..610c46f4337f32370a84f5353c06e51a3bf5eeac 100644 (file)
 #include <memory>
 
 struct PmeGpuProgramImpl;
-struct gmx_device_info_t;
+struct DeviceInformation;
 
 class PmeGpuProgram
 {
 public:
-    explicit PmeGpuProgram(const gmx_device_info_t* deviceInfo);
+    explicit PmeGpuProgram(const DeviceInformation* deviceInfo);
     ~PmeGpuProgram();
 
     // TODO: design getters for information inside, if needed for PME, and make this private?
@@ -69,6 +69,6 @@ using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
 /*! \brief
  * Factory function used to build persistent PME GPU program for the device at once.
  */
-PmeGpuProgramStorage buildPmeGpuProgram(const gmx_device_info_t* /*deviceInfo*/);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* /*deviceInfo*/);
 
 #endif
index 078f97ee4fe258f0f4989885bdbaa1b381987c18..d50849913005ec64f10d8dacf0045e5894560254 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -45,7 +45,7 @@
 
 #include "pme_gpu_program_impl.h"
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t* /*unused*/) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* /* deviceInfo */) :
     warpSize(0),
     spreadWorkGroupSize(0),
     gatherWorkGroupSize(0),
index f34f7a2741b335568bc62ab8bd82319f9f92c056..019bc3f2aaa67a034a8c0059b683796dd133966c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -102,7 +102,7 @@ extern template void pme_gather_kernel<c_pmeOrder, true, c_wrapX, c_wrapY, false
 extern template void pme_gather_kernel<c_pmeOrder, false, c_wrapX, c_wrapY, true, false>(const PmeGpuCudaKernelParams);
 extern template void pme_gather_kernel<c_pmeOrder, false, c_wrapX, c_wrapY, false, false>(const PmeGpuCudaKernelParams);
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t*)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* /* deviceInfo */)
 {
     // kernel parameters
     warpSize              = warp_size;
index 8867ea0bdcacc8c123f5e9cfce79c7f071c735f0..f42179598ea0fbf082f7fde28589c7b7e32377d9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -47,7 +47,7 @@
 #include "gromacs/gpu_utils/gputraits.h"
 #include "gromacs/utility/classhelpers.h"
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 
 /*! \internal
  * \brief
@@ -150,13 +150,13 @@ struct PmeGpuProgramImpl
 
     PmeGpuProgramImpl() = delete;
     //! Constructor for the given device
-    explicit PmeGpuProgramImpl(const gmx_device_info_t* deviceInfo);
+    explicit PmeGpuProgramImpl(const DeviceInformation* deviceInfo);
     ~PmeGpuProgramImpl();
     GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
 
 private:
     // Compiles kernels, if supported. Called by the constructor.
-    void compileKernels(const gmx_device_info_t* deviceInfo);
+    void compileKernels(const DeviceInformation* deviceInfo);
 };
 
 #endif
index d17a76256eea5bb1071420e81c2e63f2fb44d236..ae319b2c283c07b3f00391c7066a5f7cd3fe32e2 100644 (file)
 #include "pme_gpu_types_host.h"
 #include "pme_grid.h"
 
-PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t* deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* deviceInfo)
 {
     // Context creation (which should happen outside of this class: #2522)
-    cl_platform_id        platformId = deviceInfo->ocl_gpu_id.ocl_platform_id;
-    cl_device_id          deviceId   = deviceInfo->ocl_gpu_id.ocl_device_id;
+    cl_platform_id        platformId = deviceInfo->oclPlatformId;
+    cl_device_id          deviceId   = deviceInfo->oclDeviceId;
     cl_context_properties contextProperties[3];
     contextProperties[0] = CL_CONTEXT_PLATFORM;
     contextProperties[1] = reinterpret_cast<cl_context_properties>(platformId);
@@ -110,11 +110,11 @@ PmeGpuProgramImpl::~PmeGpuProgramImpl()
  * smaller than the minimum order^2 required in spread/gather ATM which
  * we need to check for.
  */
-static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const gmx_device_info_t* deviceInfo)
+static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const DeviceInformation* deviceInfo)
 {
     if (deviceInfo->deviceVendor == DeviceVendor::Intel)
     {
-        size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->ocl_gpu_id.ocl_device_id);
+        size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->oclDeviceId);
 
         if (kernelWarpSize < c_pmeSpreadGatherMinWarpSize)
         {
@@ -128,7 +128,7 @@ static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, cons
     }
 }
 
-void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo)
+void PmeGpuProgramImpl::compileKernels(const DeviceInformation* deviceInfo)
 {
     // We might consider storing program as a member variable if it's needed later
     cl_program program = nullptr;
@@ -165,8 +165,8 @@ void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo)
         {
             /* TODO when we have a proper MPI-aware logging module,
                the log output here should be written there */
-            program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", commonDefines,
-                                               context, deviceInfo->ocl_gpu_id.ocl_device_id,
+            program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl",
+                                               commonDefines, context, deviceInfo->oclDeviceId,
                                                deviceInfo->deviceVendor);
         }
         catch (gmx::GromacsException& e)
index 45745c9a19dee3e6f5366a05897f299f3dd1dced..acdf24bf6d622edbb82a21dd2f244da9fcb91b4a 100644 (file)
@@ -87,7 +87,7 @@ typedef PmeGpuKernelParamsBase PmeGpuKernelParams;
 typedef int PmeGpuKernelParams;
 #endif
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 
 /*! \internal \brief
  * The PME GPU structure for all the data copied directly from the CPU PME structure.
@@ -168,7 +168,7 @@ struct PmeGpu
     int nAtomsAlloc;
 
     /*! \brief A pointer to the device used during the execution. */
-    const gmx_device_info_t* deviceInfo;
+    const DeviceInformation* deviceInfo;
 
     /*! \brief Kernel scheduling grid width limit in X - derived from deviceinfo compute capability in CUDA.
      * Declared as very large int to make it useful in computations with type promotion, to avoid overflows.
index 198c77cadd8eb9d1e01883f4f5f28f97b85b69a2..891e7bb048612a4005f7c8e2dc3b10ba15da9f72 100644 (file)
@@ -104,7 +104,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder)
 //! PME initialization
 PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
                               const CodePath           mode,
-                              const gmx_device_info_t* gpuInfo,
+                              const DeviceInformation* deviceInfo,
                               const PmeGpuProgram*     pmeGpuProgram,
                               const Matrix3x3&         box,
                               const real               ewaldCoeff_q,
@@ -116,7 +116,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
     NumPmeDomains  numPmeDomains = { 1, 1 };
     gmx_pme_t*     pmeDataRaw =
             gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true, ewaldCoeff_q,
-                         ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger);
+                         ewaldCoeff_lj, 1, runMode, nullptr, deviceInfo, pmeGpuProgram, dummyLogger);
     PmeSafePointer pme(pmeDataRaw); // taking ownership
 
     // TODO get rid of this with proper matrix type
@@ -149,13 +149,13 @@ PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
 //! Simple PME initialization based on input, no atom data
 PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
                             const CodePath           mode,
-                            const gmx_device_info_t* gpuInfo,
+                            const DeviceInformation* deviceInfo,
                             const PmeGpuProgram*     pmeGpuProgram,
                             const Matrix3x3&         box,
                             real                     ewaldCoeff_q,
                             real                     ewaldCoeff_lj)
 {
-    return pmeInitWrapper(inputRec, mode, gpuInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
+    return pmeInitWrapper(inputRec, mode, deviceInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
     // hiding the fact that PME actually needs to know the number of atoms in advance
 }
 
index 870b9f7aa67fe3d9ded76cca30203a1b4d628c62..d6377bd455bb161645d1da56aa7f752fd42aa7fa 100644 (file)
@@ -121,7 +121,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder);
 //! PME initialization
 PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
                               CodePath                 mode,
-                              const gmx_device_info_t* gpuInfo,
+                              const DeviceInformation* deviceInfo,
                               const PmeGpuProgram*     pmeGpuProgram,
                               const Matrix3x3&         box,
                               real                     ewaldCoeff_q  = 1.0F,
@@ -129,7 +129,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
 //! Simple PME initialization (no atom data)
 PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
                             CodePath                 mode          = CodePath::CPU,
-                            const gmx_device_info_t* gpuInfo       = nullptr,
+                            const DeviceInformation* deviceInfo    = nullptr,
                             const PmeGpuProgram*     pmeGpuProgram = nullptr,
                             const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } },
                             real             ewaldCoeff_q  = 0.0F,
index 6e0a888402bd74c48183733319f5d904fea08146..aab3099aa137b228c99617230548c4247f893aac 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -119,7 +119,7 @@ void PmeTestEnvironment::SetUp()
     // Constructing contexts for all compatible GPUs - will be empty on non-GPU builds
     for (int gpuIndex : getCompatibleGpus(hardwareInfo_->gpu_info))
     {
-        const gmx_device_info_t* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
+        const DeviceInformation* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
         init_gpu(deviceInfo);
 
         char stmp[200] = {};
index e7d49c5a79553fe235b0758df94f72c8f6aff9ce..6ec22930d4f9638167a8e36fa206f48ed789ddce 100644 (file)
@@ -78,7 +78,7 @@ struct TestHardwareContext
     //! Readable description
     std::string description_;
     //! Device information pointer
-    const gmx_device_info_t* deviceInfo_;
+    const DeviceInformation* deviceInfo_;
     //! Persistent compiled GPU kernels for PME.
     PmeGpuProgramStorage program_;
 
@@ -88,11 +88,11 @@ public:
     //! Returns a human-readable context description line
     std::string getDescription() const { return description_; }
     //! Returns the device info pointer
-    const gmx_device_info_t* getDeviceInfo() const { return deviceInfo_; }
+    const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
     //! Returns the persistent PME GPU kernels
     const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
     //! Constructs the context
-    TestHardwareContext(CodePath codePath, const char* description, const gmx_device_info_t* deviceInfo) :
+    TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation* deviceInfo) :
         codePath_(codePath),
         description_(description),
         deviceInfo_(deviceInfo),
index 618808d03bcbe7c853555716830c281d0c8d7624..71d9b7dac45dac38ac76b96357cdec6b91e0acde 100644 (file)
@@ -142,19 +142,6 @@ enum class GpuApiCallBehavior;
 
 #endif /* CHECK_CUDA_ERRORS */
 
-/*! \brief CUDA device information.
- *
- * The CUDA device information is queried and set at detection and contains
- * both information about the device/hardware returned by the runtime as well
- * as additional data like support status.
- */
-struct gmx_device_info_t
-{
-    int            id;   /* id of the CUDA device */
-    cudaDeviceProp prop; /* CUDA device properties */
-    int            stat; /* result of the device check */
-};
-
 /*! Launches synchronous or asynchronous device to host memory copy.
  *
  *  The copy is launched in stream s or if not specified, in stream 0.
index 51622063a7a967689ce8f4bf2d0e12d577fa45e1..98b701ac624f86b3c23be7e4f3ac58a0a24811bd 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -78,7 +78,7 @@ int gpu_info_get_stat(const gmx_gpu_info_t& /*unused*/, int /*unused*/)
 
 void free_gpu_info(const gmx_gpu_info_t* gpu_info)
 {
-    sfree(static_cast<void*>(gpu_info->gpu_dev)); // circumvent is_pod check in sfree
+    sfree(static_cast<void*>(gpu_info->deviceInfo)); // circumvent is_pod check in sfree
 }
 
 std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info)
@@ -88,7 +88,7 @@ std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info)
     compatibleGpus.reserve(gpu_info.n_dev);
     for (int i = 0; i < gpu_info.n_dev; i++)
     {
-        assert(gpu_info.gpu_dev);
+        assert(gpu_info.deviceInfo);
         if (gpu_info_get_stat(gpu_info, i) == egpuCompatible)
         {
             compatibleGpus.push_back(i);
index b7e5e0f77ead42f116484c55e26a20a5fcce2cc8..16215c1fe657e08c4c8a55ee794d9cc63299be16 100644 (file)
@@ -2,7 +2,7 @@
  * This file is part of the GROMACS molecular simulation package.
  *
  * Copyright (c) 2010-2018, The GROMACS development team.
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -222,7 +222,7 @@ static int do_sanity_checks(int dev_id, const cudaDeviceProp& dev_prop)
     return 0;
 }
 
-void init_gpu(const gmx_device_info_t* deviceInfo)
+void init_gpu(const DeviceInformation* deviceInfo)
 {
     cudaError_t stat;
 
@@ -241,7 +241,7 @@ void init_gpu(const gmx_device_info_t* deviceInfo)
     }
 }
 
-void free_gpu(const gmx_device_info_t* deviceInfo)
+void free_gpu(const DeviceInformation* deviceInfo)
 {
     // One should only attempt to clear the device context when
     // it has been used, but currently the only way to know that a GPU
@@ -268,13 +268,13 @@ void free_gpu(const gmx_device_info_t* deviceInfo)
     }
 }
 
-gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
 {
     if (deviceId < 0 || deviceId >= gpu_info.n_dev)
     {
         gmx_incons("Invalid GPU deviceId requested");
     }
-    return &gpu_info.gpu_dev[deviceId];
+    return &gpu_info.deviceInfo[deviceId];
 }
 
 /*! \brief Returns true if the gpu characterized by the device properties is
@@ -396,7 +396,7 @@ void findGpus(gmx_gpu_info_t* gpu_info)
     // We expect to start device support/sanity checks with a clean runtime error state
     gmx::ensureNoPendingCudaError("");
 
-    gmx_device_info_t* devs;
+    DeviceInformation* devs;
     snew(devs, ndev);
     for (int i = 0; i < ndev; i++)
     {
@@ -450,8 +450,8 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                                          cudaGetErrorName(stat), cudaGetErrorString(stat))
                                .c_str());
 
-    gpu_info->n_dev   = ndev;
-    gpu_info->gpu_dev = devs;
+    gpu_info->n_dev      = ndev;
+    gpu_info->deviceInfo = devs;
 }
 
 void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
@@ -463,7 +463,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind
         return;
     }
 
-    gmx_device_info_t* dinfo = &gpu_info.gpu_dev[index];
+    DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
 
     bool bGpuExists = (dinfo->stat != egpuNonexistent && dinfo->stat != egpuInsane);
 
@@ -489,7 +489,7 @@ int get_current_cuda_gpu_device_id(void)
 
 size_t sizeof_gpu_dev_info(void)
 {
-    return sizeof(gmx_device_info_t);
+    return sizeof(DeviceInformation);
 }
 
 void startGpuProfiler(void)
@@ -537,7 +537,7 @@ void resetGpuProfiler(void)
 
 int gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
 {
-    return info.gpu_dev[index].stat;
+    return info.deviceInfo[index].stat;
 }
 
 /*! \brief Check status returned from peer access CUDA call, and error out or warn appropriately
index 24590f589d1b81de88e34a512807126d0121b81b..f935b0e513dc574f1d2a72600a0a7ac51d986532 100644 (file)
@@ -54,7 +54,7 @@
 #include "gromacs/gpu_utils/gpu_macros.h"
 #include "gromacs/utility/basedefinitions.h"
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_gpu_info_t;
 
 namespace gmx
@@ -108,7 +108,7 @@ bool isGpuDetectionFunctional(std::string* GPU_FUNC_ARGUMENT(errorMessage))
  *  Will detect every GPU supported by the device driver in use.
  *  Must only be called if canPerformGpuDetection() has returned true.
  *  This routine also checks for the compatibility of each and fill the
- *  gpu_info->gpu_dev array with the required information on each the
+ *  gpu_info->deviceInfo array with the required information on each the
  *  device: ID, device properties, status.
  *
  *  Note that this function leaves the GPU runtime API error state clean;
@@ -158,7 +158,7 @@ void free_gpu_info(const gmx_gpu_info_t* gpu_info);
  * initialization.
  */
 GPU_FUNC_QUALIFIER
-void init_gpu(const gmx_device_info_t* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM;
+void init_gpu(const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM;
 
 /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
  *
@@ -176,7 +176,7 @@ void init_gpu(const gmx_device_info_t* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_T
  * \returns                 true if no error occurs during the freeing.
  */
 CUDA_FUNC_QUALIFIER
-void free_gpu(const gmx_device_info_t* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM;
+void free_gpu(const DeviceInformation* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM;
 
 /*! \brief Return a pointer to the device info for \c deviceId
  *
@@ -186,7 +186,7 @@ void free_gpu(const gmx_device_info_t* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC
  * \returns                 Pointer to the device info for \c deviceId.
  */
 GPU_FUNC_QUALIFIER
-gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info),
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info),
                                  int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
 
 /*! \brief Returns the device ID of the CUDA GPU currently in use.
index 8770e6862d1637337af772388396e4c65b14d42e..d4e9daddd099a0fc77768131f52c1a3dc74d0bd1 100644 (file)
@@ -129,26 +129,26 @@ static std::string makeOpenClInternalErrorString(const char* message, cl_int sta
 }
 
 /*!
- * \brief Checks that device \c devInfo is sane (ie can run a kernel).
+ * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
  *
  * Compiles and runs a dummy kernel to determine whether the given
  * OpenCL device functions properly.
  *
  *
- * \param[in]  devInfo         The device info pointer.
+ * \param[in]  deviceInfo      The device info pointer.
  * \param[out] errorMessage    An error message related to a failing OpenCL API call.
  * \throws     std::bad_alloc  When out of memory.
  * \returns                    Whether the device passed sanity checks
  */
-static bool isDeviceSane(const gmx_device_info_t* devInfo, std::string* errorMessage)
+static bool isDeviceSane(const DeviceInformation* deviceInfo, std::string* errorMessage)
 {
     cl_context_properties properties[] = {
-        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(devInfo->ocl_gpu_id.ocl_platform_id), 0
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(deviceInfo->oclPlatformId), 0
     };
     // uncrustify spacing
 
     cl_int    status;
-    auto      deviceId = devInfo->ocl_gpu_id.ocl_device_id;
+    auto      deviceId = deviceInfo->oclDeviceId;
     ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status));
     if (status != CL_SUCCESS)
     {
@@ -198,15 +198,15 @@ static bool isDeviceSane(const gmx_device_info_t* devInfo, std::string* errorMes
 }
 
 /*!
- * \brief Checks that device \c devInfo is compatible with GROMACS.
+ * \brief Checks that device \c deviceInfo is compatible with GROMACS.
  *
  *  Vendor and OpenCL version support checks are executed an the result
  *  of these returned.
  *
- * \param[in]  devInfo         The device info pointer.
- * \returns                    The result of the compatibility checks.
+ * \param[in]  deviceInfo  The device info pointer.
+ * \returns                The result of the compatibility checks.
  */
-static int isDeviceSupported(const gmx_device_info_t* devInfo)
+static int isDeviceSupported(const DeviceInformation* deviceInfo)
 {
     if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
     {
@@ -222,7 +222,7 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo)
     // the device which has the following format:
     //      OpenCL<space><major_version.minor_version><space><vendor-specific information>
     unsigned int deviceVersionMinor, deviceVersionMajor;
-    const int    valuesScanned = std::sscanf(devInfo->device_version, "OpenCL %u.%u",
+    const int    valuesScanned = std::sscanf(deviceInfo->device_version, "OpenCL %u.%u",
                                           &deviceVersionMajor, &deviceVersionMinor);
     const bool   versionLargeEnough =
             ((valuesScanned == 2)
@@ -234,7 +234,7 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo)
     }
 
     /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
-    switch (devInfo->deviceVendor)
+    switch (deviceInfo->deviceVendor)
     {
         case DeviceVendor::Nvidia: return egpuCompatible;
         case DeviceVendor::Amd:
@@ -258,7 +258,7 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo)
  * \returns  An e_gpu_detect_res_t to indicate how the GPU coped with
  *           the sanity and compatibility check.
  */
-static int checkGpu(size_t deviceId, const gmx_device_info_t* deviceInfo)
+static int checkGpu(size_t deviceId, const DeviceInformation* deviceInfo)
 {
 
     int supportStatus = isDeviceSupported(deviceInfo);
@@ -393,7 +393,7 @@ void findGpus(gmx_gpu_info_t* gpu_info)
             break;
         }
 
-        snew(gpu_info->gpu_dev, gpu_info->n_dev);
+        snew(gpu_info->deviceInfo, gpu_info->n_dev);
 
         {
             int           device_index;
@@ -421,47 +421,47 @@ void findGpus(gmx_gpu_info_t* gpu_info)
 
                 for (unsigned int j = 0; j < ocl_device_count; j++)
                 {
-                    gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
-                    gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];
+                    gpu_info->deviceInfo[device_index].oclPlatformId = ocl_platform_ids[i];
+                    gpu_info->deviceInfo[device_index].oclDeviceId   = ocl_device_ids[j];
 
-                    gpu_info->gpu_dev[device_index].device_name[0] = 0;
+                    gpu_info->deviceInfo[device_index].device_name[0] = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME,
-                                    sizeof(gpu_info->gpu_dev[device_index].device_name),
-                                    gpu_info->gpu_dev[device_index].device_name, nullptr);
+                                    sizeof(gpu_info->deviceInfo[device_index].device_name),
+                                    gpu_info->deviceInfo[device_index].device_name, nullptr);
 
-                    gpu_info->gpu_dev[device_index].device_version[0] = 0;
+                    gpu_info->deviceInfo[device_index].device_version[0] = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION,
-                                    sizeof(gpu_info->gpu_dev[device_index].device_version),
-                                    gpu_info->gpu_dev[device_index].device_version, nullptr);
+                                    sizeof(gpu_info->deviceInfo[device_index].device_version),
+                                    gpu_info->deviceInfo[device_index].device_version, nullptr);
 
-                    gpu_info->gpu_dev[device_index].vendorName[0] = 0;
+                    gpu_info->deviceInfo[device_index].vendorName[0] = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR,
-                                    sizeof(gpu_info->gpu_dev[device_index].vendorName),
-                                    gpu_info->gpu_dev[device_index].vendorName, nullptr);
+                                    sizeof(gpu_info->deviceInfo[device_index].vendorName),
+                                    gpu_info->deviceInfo[device_index].vendorName, nullptr);
 
-                    gpu_info->gpu_dev[device_index].compute_units = 0;
+                    gpu_info->deviceInfo[device_index].compute_units = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS,
-                                    sizeof(gpu_info->gpu_dev[device_index].compute_units),
-                                    &(gpu_info->gpu_dev[device_index].compute_units), nullptr);
+                                    sizeof(gpu_info->deviceInfo[device_index].compute_units),
+                                    &(gpu_info->deviceInfo[device_index].compute_units), nullptr);
 
-                    gpu_info->gpu_dev[device_index].adress_bits = 0;
+                    gpu_info->deviceInfo[device_index].adress_bits = 0;
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS,
-                                    sizeof(gpu_info->gpu_dev[device_index].adress_bits),
-                                    &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
+                                    sizeof(gpu_info->deviceInfo[device_index].adress_bits),
+                                    &(gpu_info->deviceInfo[device_index].adress_bits), nullptr);
 
-                    gpu_info->gpu_dev[device_index].deviceVendor =
-                            getDeviceVendor(gpu_info->gpu_dev[device_index].vendorName);
+                    gpu_info->deviceInfo[device_index].deviceVendor =
+                            getDeviceVendor(gpu_info->deviceInfo[device_index].vendorName);
 
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t),
-                                    &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr);
+                                    &gpu_info->deviceInfo[device_index].maxWorkItemSizes, nullptr);
 
                     clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
-                                    &gpu_info->gpu_dev[device_index].maxWorkGroupSize, nullptr);
+                                    &gpu_info->deviceInfo[device_index].maxWorkGroupSize, nullptr);
 
-                    gpu_info->gpu_dev[device_index].stat =
-                            gmx::checkGpu(device_index, gpu_info->gpu_dev + device_index);
+                    gpu_info->deviceInfo[device_index].stat =
+                            gmx::checkGpu(device_index, gpu_info->deviceInfo + device_index);
 
-                    if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
+                    if (egpuCompatible == gpu_info->deviceInfo[device_index].stat)
                     {
                         gpu_info->n_dev_compatible++;
                     }
@@ -479,16 +479,13 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                 int last = -1;
                 for (int i = 0; i < gpu_info->n_dev; i++)
                 {
-                    if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Amd)
+                    if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Amd)
                     {
                         last++;
 
                         if (last < i)
                         {
-                            gmx_device_info_t ocl_gpu_info;
-                            ocl_gpu_info            = gpu_info->gpu_dev[i];
-                            gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
-                            gpu_info->gpu_dev[last] = ocl_gpu_info;
+                            std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]);
                         }
                     }
                 }
@@ -498,16 +495,13 @@ void findGpus(gmx_gpu_info_t* gpu_info)
                 {
                     for (int i = 0; i < gpu_info->n_dev; i++)
                     {
-                        if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Nvidia)
+                        if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Nvidia)
                         {
                             last++;
 
                             if (last < i)
                             {
-                                gmx_device_info_t ocl_gpu_info;
-                                ocl_gpu_info            = gpu_info->gpu_dev[i];
-                                gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
-                                gpu_info->gpu_dev[last] = ocl_gpu_info;
+                                std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]);
                             }
                         }
                     }
@@ -532,7 +526,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind
         return;
     }
 
-    gmx_device_info_t* dinfo = &gpu_info.gpu_dev[index];
+    DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
 
     bool bGpuExists = (dinfo->stat != egpuNonexistent && dinfo->stat != egpuInsane);
 
@@ -548,7 +542,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind
 }
 
 
-void init_gpu(const gmx_device_info_t* deviceInfo)
+void init_gpu(const DeviceInformation* deviceInfo)
 {
     assert(deviceInfo);
 
@@ -570,21 +564,21 @@ void init_gpu(const gmx_device_info_t* deviceInfo)
     }
 }
 
-gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
 {
     if (deviceId < 0 || deviceId >= gpu_info.n_dev)
     {
         gmx_incons("Invalid GPU deviceId requested");
     }
-    return &gpu_info.gpu_dev[deviceId];
+    return &gpu_info.deviceInfo[deviceId];
 }
 
 size_t sizeof_gpu_dev_info()
 {
-    return sizeof(gmx_device_info_t);
+    return sizeof(DeviceInformation);
 }
 
 int gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
 {
-    return info.gpu_dev[index].stat;
+    return info.deviceInfo[index].stat;
 }
index 7ac35d532968c201991c15631da9f4c0d049f002..8a4936dabcf801376e1df06b5e33d268fc136d97 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
  * \ingroup module_gpu_utils
  */
 
+/*! \brief CUDA device information.
+ *
+ * The CUDA device information is queried and set at detection and contains
+ * both information about the device/hardware returned by the runtime as well
+ * as additional data like support status.
+ */
+struct DeviceInformation
+{
+    //! ID of the CUDA device.
+    int id;
+    //! CUDA device properties.
+    cudaDeviceProp prop;
+    //! Result of the device check.
+    int stat;
+};
+
 //! \brief GPU command stream
 using CommandStream = cudaStream_t;
 //! \brief Single GPU call timing event - meaningless in CUDA
index 9b91c0ec18e5145695afb1f71703d1fe7b2e5c6e..0229ea443c60c7454a99e50c1751637734dd1964 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #else
 
+//! Stub for device information.
+struct DeviceInformation
+{
+    // No member needed
+};
+
 //! \brief GPU command stream
 using CommandStream = void*;
 //! \brief Single GPU call timing event
index c4a421f3a9fa001e02944995cad377ab714493a2..00d9cba90d95de2b2e52a8b5055aeea07ed2d371 100644 (file)
@@ -55,6 +55,28 @@ enum class DeviceVendor : int
     Count   = 4
 };
 
+/*! \internal
+ * \brief OpenCL device information.
+ *
+ * The OpenCL device information is queried and set at detection and contains
+ * both information about the device/hardware returned by the runtime as well
+ * as additional data like support status.
+ */
+struct DeviceInformation
+{
+    cl_platform_id oclPlatformId;       //!< OpenCL Platform ID.
+    cl_device_id   oclDeviceId;         //!< OpenCL Device ID.
+    char           device_name[256];    //!< Device name.
+    char           device_version[256]; //!< Device version.
+    char           vendorName[256];     //!< Device vendor name.
+    int            compute_units;       //!< Number of compute units.
+    int            adress_bits;         //!< Number of address bits the device is capable of.
+    int            stat;                //!< Device status takes values of e_gpu_detect_res_t.
+    DeviceVendor   deviceVendor;        //!< Device vendor.
+    size_t         maxWorkItemSizes[3]; //!< Workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES).
+    size_t maxWorkGroupSize; //!< Workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE).
+};
+
 //! \brief GPU command stream
 using CommandStream = cl_command_queue;
 //! \brief Single GPU call timing event
index 6ad4de9d485af9e48deebf64b212cdcd0e217122..91b6059d27c5bf1838fdfec1e000015ac6b4a205 100644 (file)
 
 enum class GpuApiCallBehavior;
 
-/*! \internal
- * \brief OpenCL GPU device identificator
- *
- * An OpenCL device is identified by its ID.
- * The platform ID is also included for caching reasons.
- */
-typedef struct
-{
-    cl_platform_id ocl_platform_id; /**< Platform ID */
-    cl_device_id   ocl_device_id;   /**< Device ID */
-} ocl_gpu_id_t;
-
-/*! \internal
- * \brief OpenCL device information.
- *
- * The OpenCL device information is queried and set at detection and contains
- * both information about the device/hardware returned by the runtime as well
- * as additional data like support status.
- */
-struct gmx_device_info_t
-{
-    ocl_gpu_id_t ocl_gpu_id;          /**< device ID assigned at detection   */
-    char         device_name[256];    /**< device name */
-    char         device_version[256]; /**< device version */
-    char         vendorName[256];     /**< device vendor */
-    int          compute_units;       /**< number of compute units */
-    int          adress_bits;         /**< number of adress bits the device is capable of */
-    int          stat;                /**< device status takes values of e_gpu_detect_res_t */
-    DeviceVendor deviceVendor;        /**< device vendor */
-    size_t       maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */
-    size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */
-};
-
 /*! \internal
  * \brief OpenCL GPU runtime data
  *
index ed3808595457786544f84f8d6fc0d2c1ad53352a..ffe60c00e9c0de6666a746c341aab6a333148bd6 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -84,11 +84,11 @@ void doDeviceTransfers(const gmx_gpu_info_t& gpuInfo, ArrayRef<const char> input
 
     const auto*           device       = getDeviceInfo(gpuInfo, compatibleGpus[0]);
     cl_context_properties properties[] = {
-        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(device->ocl_gpu_id.ocl_platform_id), 0
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(device->oclPlatformId), 0
     };
     // Give uncrustify more space
 
-    auto deviceId = device->ocl_gpu_id.ocl_device_id;
+    auto deviceId = device->oclDeviceId;
     auto context  = clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status);
     throwUponFailure(status, "creating context");
     auto commandQueue = clCreateCommandQueue(context, deviceId, 0, &status);
index 23887cad4eb2f7d7defc88b678476a8b7d0eece7..969399b3eccbd073d9361083781512fae30164d5 100644 (file)
@@ -171,9 +171,9 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
 
             if (!isMasterRankOfPhysicalNode)
             {
-                hardwareInfo->gpu_info.gpu_dev = (struct gmx_device_info_t*)malloc(dev_size);
+                hardwareInfo->gpu_info.deviceInfo = (struct DeviceInformation*)malloc(dev_size);
             }
-            MPI_Bcast(hardwareInfo->gpu_info.gpu_dev, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_);
+            MPI_Bcast(hardwareInfo->gpu_info.deviceInfo, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_);
             MPI_Bcast(&hardwareInfo->gpu_info.n_dev_compatible, 1, MPI_INT, 0, physicalNodeComm.comm_);
         }
     }
@@ -454,7 +454,7 @@ gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNod
     // Detect GPUs
     hardwareInfo->gpu_info.n_dev            = 0;
     hardwareInfo->gpu_info.n_dev_compatible = 0;
-    hardwareInfo->gpu_info.gpu_dev          = nullptr;
+    hardwareInfo->gpu_info.deviceInfo       = nullptr;
 
     gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
     gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
index 7c57413dc1e16a32f3490e01492ba27db2d7dee1..98f87760cddef0df4aaeb8f48b078abe68671ffa 100644 (file)
@@ -38,7 +38,7 @@
 
 #include "gromacs/utility/basedefinitions.h"
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 
 /*! \brief Possible results of the GPU detection/check.
  *
@@ -73,7 +73,7 @@ struct gmx_gpu_info_t
     //! Total number of GPU devices detected on this physical node
     int n_dev;
     //! Information about each GPU device detected on this physical node
-    gmx_device_info_t* gpu_dev;
+    DeviceInformation* deviceInfo;
     //! Number of GPU devices detected on this physical node that are compatible.
     int n_dev_compatible;
 };
index 53fc326e30ccfa737319aa8c3e3ca14d42655752..07d0b05d7c8a955f56b7d9fbb5239890328806c1 100644 (file)
@@ -45,7 +45,7 @@
 #include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/arrayref.h"
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_hw_info_t;
 struct t_commrec;
 struct t_fcdata;
index 364d0bd9c29bc1f086b60d9307b274f6677b270b..3debb2046e5b12ad3e9f92874b44694033e75474 100644 (file)
@@ -1146,8 +1146,8 @@ int Mdrunner::mdrunner()
             EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
 
     // Get the device handles for the modules, nullptr when no task is assigned.
-    gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
-    gmx_device_info_t* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
+    DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
+    DeviceInformation* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
 
     // TODO Initialize GPU streams here.
 
index 359e6c590525b8b295515231025dbfa29a51fd06..94e99879db0e3c54976e19a24cf030885143ca7e 100644 (file)
@@ -125,16 +125,16 @@ typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const cu_nbparam_t, co
 /*********************************/
 
 /*! Returns the number of blocks to be used for the nonbonded GPU kernel. */
-static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t* dinfo)
+static inline int calc_nb_kernel_nblock(int nwork_units, const DeviceInformation* deviceInfo)
 {
     int max_grid_x_size;
 
-    assert(dinfo);
+    assert(deviceInfo);
     /* CUDA does not accept grid dimension of 0 (which can happen e.g. with an
        empty domain) and that case should be handled before this point. */
     assert(nwork_units > 0);
 
-    max_grid_x_size = dinfo->prop.maxGridSize[0];
+    max_grid_x_size = deviceInfo->prop.maxGridSize[0];
 
     /* do we exceed the grid x dimension limit? */
     if (nwork_units > max_grid_x_size)
@@ -284,7 +284,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
                                                        int                     evdwtype,
                                                        bool                    bDoEne,
                                                        bool                    bDoPrune,
-                                                       const gmx_device_info_t gmx_unused* devInfo)
+                                                       const DeviceInformation gmx_unused* deviceInfo)
 {
     nbnxn_cu_kfunc_ptr_t res;
 
@@ -295,7 +295,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
 
     /* assert assumptions made by the kernels */
     GMX_ASSERT(c_nbnxnGpuClusterSize * c_nbnxnGpuClusterSize / c_nbnxnGpuClusterpairSplit
-                       == devInfo->prop.warpSize,
+                       == deviceInfo->prop.warpSize,
                "The CUDA kernels require the "
                "cluster_size_i*cluster_size_j/nbnxn_gpu_clusterpair_split to match the warp size "
                "of the architecture targeted.");
@@ -328,12 +328,12 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int                     e
 
 /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
 static inline int calc_shmem_required_nonbonded(const int               num_threads_z,
-                                                const gmx_device_info_t gmx_unused* dinfo,
+                                                const DeviceInformation gmx_unused* deviceInfo,
                                                 const cu_nbparam_t*                 nbp)
 {
     int shmem;
 
-    assert(dinfo);
+    assert(deviceInfo);
 
     /* size of shmem (force-buffers/xq/atom type preloading) */
     /* NOTE: with the default kernel on sm3.0 we need shmem only for pre-loading */
@@ -530,11 +530,11 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      * - The 1D block-grid contains as many blocks as super-clusters.
      */
     int num_threads_z = 1;
-    if (nb->dev_info->prop.major == 3 && nb->dev_info->prop.minor == 7)
+    if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7)
     {
         num_threads_z = 2;
     }
-    int nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info);
+    int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo);
 
 
     KernelLaunchConfig config;
@@ -542,7 +542,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
     config.blockSize[1]     = c_clSize;
     config.blockSize[2]     = num_threads_z;
     config.gridSize[0]      = nblock;
-    config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->dev_info, nbp);
+    config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp);
     config.stream           = stream;
 
     if (debug)
@@ -559,7 +559,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
     auto*      timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
     const auto kernel      = select_nbnxn_kernel(
             nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
-            (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->dev_info);
+            (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo);
     const auto kernelArgs =
             prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
     launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs);
@@ -660,7 +660,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      * - The 1D block-grid contains as many blocks as super-clusters.
      */
     int                num_threads_z = c_cudaPruneKernelJ4Concurrency;
-    int                nblock        = calc_nb_kernel_nblock(numSciInPart, nb->dev_info);
+    int                nblock        = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo);
     KernelLaunchConfig config;
     config.blockSize[0]     = c_clSize;
     config.blockSize[1]     = c_clSize;
index b2fc758417a814784fcc3ee1b9abf8c7881aa5de..11e490551a85a6ead6d30a50b837fb17428ff1bf 100644 (file)
@@ -412,7 +412,7 @@ static void cuda_init_const(NbnxmGpu*                       nb,
     nbnxn_cuda_clear_e_fshift(nb);
 }
 
-NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
                    const interaction_const_t* ic,
                    const PairlistParams&      listParams,
                    const nbnxn_atomdata_t*    nbat,
@@ -443,7 +443,7 @@ NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
     init_plist(nb->plist[InteractionLocality::Local]);
 
     /* set device info, just point it to the right GPU among the detected ones */
-    nb->dev_info = deviceInfo;
+    nb->deviceInfo = deviceInfo;
 
     /* local/non-local GPU streams */
     stat = cudaStreamCreate(&nb->stream[InteractionLocality::Local]);
@@ -812,7 +812,7 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
 
 int gpu_min_ci_balanced(NbnxmGpu* nb)
 {
-    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0;
+    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->prop.multiProcessorCount : 0;
 }
 
 gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
index 863e6a1efc09de5c6a81d7b5656ad01fe946cf86..da607e442974f3dc946db8f566cd6c2bd66fd222 100644 (file)
@@ -267,7 +267,7 @@ class GpuEventSynchronizer;
 struct NbnxmGpu
 {
     /*! \brief CUDA device information */
-    const gmx_device_info_t* dev_info = nullptr;
+    const DeviceInformation* deviceInfo = nullptr;
     /*! \brief true if doing both local/non-local NB work on GPU */
     bool bUseTwoStreams = false;
     /*! \brief atom data */
index 9d7502cdbd20518f73b0106b52ca7478b3ec2e57..aef5b44cfb3e64694fb5c7bc593bf5199da59bb2 100644 (file)
@@ -52,7 +52,7 @@
 
 struct NbnxmGpu;
 struct gmx_gpu_info_t;
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_wallclock_gpu_nbnxn_t;
 struct nbnxn_atomdata_t;
 struct NbnxnPairlistGpu;
@@ -63,7 +63,7 @@ namespace Nbnxm
 
 /** Initializes the data structures related to GPU nonbonded calculations. */
 GPU_FUNC_QUALIFIER
-NbnxmGpu* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
                    const interaction_const_t gmx_unused* ic,
                    const PairlistParams gmx_unused& listParams,
                    const nbnxn_atomdata_t gmx_unused* nbat,
index 65b3c1a1ce0c312e410dc340e3add4664e41ea11..50fed0fe5bac947273c3369653604aba990c25b9 100644 (file)
 #include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/real.h"
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_domdec_zones_t;
 struct gmx_enerdata_t;
 struct gmx_hw_info_t;
@@ -408,7 +408,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                                                    const t_forcerec*        fr,
                                                    const t_commrec*         cr,
                                                    const gmx_hw_info_t&     hardwareInfo,
-                                                   const gmx_device_info_t* deviceInfo,
+                                                   const DeviceInformation* deviceInfo,
                                                    const gmx_mtop_t*        mtop,
                                                    matrix                   box,
                                                    gmx_wallcycle*           wcycle);
index c45d54decf96dde6ba8640999fa7d41b4e8f627c..9cadba3743cb304a6c17d88c7f51fccf42593129 100644 (file)
@@ -362,7 +362,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                                                    const t_forcerec*        fr,
                                                    const t_commrec*         cr,
                                                    const gmx_hw_info_t&     hardwareInfo,
-                                                   const gmx_device_info_t* deviceInfo,
+                                                   const DeviceInformation* deviceInfo,
                                                    const gmx_mtop_t*        mtop,
                                                    matrix                   box,
                                                    gmx_wallcycle*           wcycle)
index d7511d3bd18462ed5750871afe546ec7fad5e181..f0c88b10ba8b023897c1f0d74f95ff2063136496 100644 (file)
@@ -103,7 +103,7 @@ static constexpr int c_clSize = c_nbnxnGpuClusterSize;
  */
 static inline void validate_global_work_size(const KernelLaunchConfig& config,
                                              int                       work_dim,
-                                             const gmx_device_info_t*  dinfo)
+                                             const DeviceInformation*  dinfo)
 {
     cl_uint device_size_t_size_bits;
     cl_uint host_size_t_size_bits;
@@ -639,7 +639,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
     config.blockSize[1]     = c_clSize;
     config.gridSize[0]      = plist->nsci;
 
-    validate_global_work_size(config, 3, nb->dev_info);
+    validate_global_work_size(config, 3, nb->deviceInfo);
 
     if (debug)
     {
@@ -788,7 +788,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      *   and j-cluster concurrency, in x, y, and z, respectively.
      * - The 1D block-grid contains as many blocks as super-clusters.
      */
-    int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->deviceVendor);
+    int num_threads_z = getOclPruneKernelJ4Concurrency(nb->deviceInfo->deviceVendor);
 
 
     /* kernel launch config */
@@ -800,7 +800,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
     config.blockSize[2]     = num_threads_z;
     config.gridSize[0]      = numSciInPart;
 
-    validate_global_work_size(config, 3, nb->dev_info);
+    validate_global_work_size(config, 3, nb->deviceInfo);
 
     if (debug)
     {
index c4df4f17115cddfa9d8f727edb03e2680aa22ff1..59ee706c4f99c39893db7d0fadba75f32f3e933d 100644 (file)
@@ -477,12 +477,12 @@ static void CL_CALLBACK ocl_notify_fn(const char* pErrInfo,
  *
  * A fatal error results if creation fails.
  *
- * \param[inout] runtimeData runtime data including program and context
- * \param[in]    devInfo     device info struct
+ * \param[inout] runtimeData Runtime data including program and context
+ * \param[in]    deviceInfo  Device info struct
  * \param[in]    rank        MPI rank (for error reporting)
  */
 static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData,
-                                     const gmx_device_info_t*   devInfo,
+                                     const DeviceInformation*   deviceInfo,
                                      int                        rank)
 {
     cl_context_properties context_properties[5];
@@ -492,10 +492,10 @@ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData,
     cl_int                cl_error;
 
     GMX_ASSERT(runtimeData, "Need a valid runtimeData object");
-    GMX_ASSERT(devInfo, "Need a valid device info object");
+    GMX_ASSERT(deviceInfo, "Need a valid device info object");
 
-    platform_id = devInfo->ocl_gpu_id.ocl_platform_id;
-    device_id   = devInfo->ocl_gpu_id.ocl_device_id;
+    platform_id = deviceInfo->oclPlatformId;
+    device_id   = deviceInfo->oclDeviceId;
 
     int i                   = 0;
     context_properties[i++] = CL_CONTEXT_PLATFORM;
@@ -512,7 +512,7 @@ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData,
     if (CL_SUCCESS != cl_error)
     {
         gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s",
-                  rank, devInfo->device_name, cl_error, ocl_get_error_string(cl_error).c_str());
+                  rank, deviceInfo->device_name, cl_error, ocl_get_error_string(cl_error).c_str());
     }
 
     runtimeData->context = context;
@@ -528,7 +528,7 @@ static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
     if (CL_SUCCESS != cl_error)
     {
         gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
-                  nb->dev_info->device_name, cl_error);
+                  nb->deviceInfo->device_name, cl_error);
     }
 
     return kernel;
@@ -609,7 +609,7 @@ static void nbnxn_ocl_init_const(NbnxmGpu*                       nb,
 
 
 //! This function is documented in the header file
-NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation*   deviceInfo,
                    const interaction_const_t* ic,
                    const PairlistParams&      listParams,
                    const nbnxn_atomdata_t*    nbat,
@@ -636,7 +636,7 @@ NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
     snew(nb->timings, 1);
 
     /* set device info, just point it to the right GPU among the detected ones */
-    nb->dev_info = deviceInfo;
+    nb->deviceInfo = deviceInfo;
     snew(nb->dev_rundata, 1);
 
     /* init nbst */
@@ -659,28 +659,27 @@ NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
         queue_properties = 0;
     }
 
-    nbnxn_gpu_create_context(nb->dev_rundata, nb->dev_info, rank);
+    nbnxn_gpu_create_context(nb->dev_rundata, nb->deviceInfo, rank);
 
     /* local/non-local GPU streams */
     nb->stream[InteractionLocality::Local] = clCreateCommandQueue(
-            nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error);
+            nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
     if (CL_SUCCESS != cl_error)
     {
         gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d", rank,
-                  nb->dev_info->device_name, cl_error);
+                  nb->deviceInfo->device_name, cl_error);
     }
 
     if (nb->bUseTwoStreams)
     {
         init_plist(nb->plist[InteractionLocality::NonLocal]);
 
-        nb->stream[InteractionLocality::NonLocal] =
-                clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id,
-                                     queue_properties, &cl_error);
+        nb->stream[InteractionLocality::NonLocal] = clCreateCommandQueue(
+                nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
         if (CL_SUCCESS != cl_error)
         {
             gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d",
-                      rank, nb->dev_info->device_name, cl_error);
+                      rank, nb->deviceInfo->device_name, cl_error);
         }
     }
 
@@ -695,8 +694,8 @@ NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
      * TODO: decide about NVIDIA
      */
     nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
-                           && ((nb->dev_info->deviceVendor == DeviceVendor::Amd)
-                               || (nb->dev_info->deviceVendor == DeviceVendor::Intel)
+                           && ((nb->deviceInfo->deviceVendor == DeviceVendor::Amd)
+                               || (nb->deviceInfo->deviceVendor == DeviceVendor::Intel)
                                || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
 
     /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
@@ -1108,7 +1107,7 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
 //! This function is documented in the header file
 int gpu_min_ci_balanced(NbnxmGpu* nb)
 {
-    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
+    return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->compute_units : 0;
 }
 
 //! This function is documented in the header file
index 285d91ef8fd0817d3baf856fc190f25c5a342a3e..8a4e217d84d56139653477897e5013ad2befe800 100644 (file)
@@ -163,7 +163,7 @@ static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdw
  *
  * A fatal error results if compilation fails.
  *
- * \param[inout] nb  Manages OpenCL non-bonded calculations; compiled kernels returned in dev_info members
+ * \param[inout] nb  Manages OpenCL non-bonded calculations; compiled kernels returned in deviceInfo members
  *
  * Does not throw
  */
@@ -202,13 +202,12 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
                the log output here should be written there */
             program = gmx::ocl::compileProgram(
                     stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
-                    nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id,
-                    nb->dev_info->deviceVendor);
+                    nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, nb->deviceInfo->deviceVendor);
         }
         catch (gmx::GromacsException& e)
         {
             e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
-                                               nb->dev_info->device_name));
+                                               nb->deviceInfo->device_name));
             throw;
         }
     }
index d1ce7be20afdbd834dfc863b419035773af231cb..74f042939cc2a3243c11f0bd8e4a1b858601772e 100644 (file)
@@ -331,7 +331,7 @@ typedef struct Nbnxm::gpu_timers_t cl_timers_t;
 struct NbnxmGpu
 {
     //! OpenCL device information
-    const gmx_device_info_t* dev_info = nullptr;
+    const DeviceInformation* deviceInfo = nullptr;
     //! OpenCL runtime data (context, kernels)
     struct gmx_device_runtime_data_t* dev_rundata = nullptr;
 
index 6df5a4614e651a5e03df7fc159d140446078d222..5a62972d61001ad59172b8538fdab1f2c6c09860 100644 (file)
@@ -400,9 +400,9 @@ void GpuTaskAssignments::reportGpuUsage(const MDLogger& mdlog,
                         numRanksOnThisNode_, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
 }
 
-gmx_device_info_t* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const
+DeviceInformation* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const
 {
-    gmx_device_info_t*       deviceInfo        = nullptr;
+    DeviceInformation*       deviceInfo        = nullptr;
     const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_];
 
     // This works because only one task of each type per rank is currently permitted.
@@ -425,9 +425,9 @@ gmx_device_info_t* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr)
     return deviceInfo;
 }
 
-gmx_device_info_t* GpuTaskAssignments::initPmeDevice() const
+DeviceInformation* GpuTaskAssignments::initPmeDevice() const
 {
-    gmx_device_info_t*       deviceInfo        = nullptr;
+    DeviceInformation*       deviceInfo        = nullptr;
     const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_];
 
     // This works because only one task of each type is currently permitted.
index 6ef380385d3906d0b873da3b32454d7454c60106..c6ac9ac9de6b8b8730545d4197efc6572b8626ec 100644 (file)
@@ -55,7 +55,7 @@
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/gmxmpi.h"
 
-struct gmx_device_info_t;
+struct DeviceInformation;
 struct gmx_hw_info_t;
 struct t_commrec;
 
@@ -247,12 +247,12 @@ public:
      * \todo This also sets up DLB for device sharing, where
      * appropriate, but that responsbility should move
      * elsewhere. */
-    gmx_device_info_t* initNonbondedDevice(const t_commrec* cr) const;
+    DeviceInformation* initNonbondedDevice(const t_commrec* cr) const;
     /*! \brief Return handle to the initialized GPU to use for the
      * PME task on this rank, if any.
      *
      * Returns nullptr if no such task is assigned to this rank. */
-    gmx_device_info_t* initPmeDevice() const;
+    DeviceInformation* initPmeDevice() const;
     //! Return whether this rank has a PME task running on a GPU
     bool thisRankHasPmeGpuTask() const;
     //! Return whether this rank has any task running on a GPU