From e742ad1076578eedea2e41c3d4e5912339ac9a80 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Wed, 29 Jan 2020 15:45:59 +0100 Subject: [PATCH] Move DeviceInfo into GPU traits The DeviceInfo is needed upon construction of DeviceContext. To prepare for opaque DeviceContext type, it is moved to GPU traits and renamed according to the common naming scheme. Refs. #3311, needed for #3315. Change-Id: I2a9f1d932f142d645df75901521a734d208de509 --- docs/OpenCLTODOList.txt | 2 - src/gromacs/ewald/pme.cpp | 4 +- src/gromacs/ewald/pme.h | 6 +- src/gromacs/ewald/pme_gpu_internal.cpp | 12 +-- src/gromacs/ewald/pme_gpu_internal.h | 6 +- src/gromacs/ewald/pme_gpu_program.cpp | 6 +- src/gromacs/ewald/pme_gpu_program.h | 6 +- src/gromacs/ewald/pme_gpu_program_impl.cpp | 4 +- src/gromacs/ewald/pme_gpu_program_impl.cu | 4 +- src/gromacs/ewald/pme_gpu_program_impl.h | 8 +- .../ewald/pme_gpu_program_impl_ocl.cpp | 16 +-- src/gromacs/ewald/pme_gpu_types_host.h | 4 +- src/gromacs/ewald/tests/pmetestcommon.cpp | 8 +- src/gromacs/ewald/tests/pmetestcommon.h | 4 +- .../ewald/tests/testhardwarecontexts.cpp | 4 +- .../ewald/tests/testhardwarecontexts.h | 6 +- src/gromacs/gpu_utils/cudautils.cuh | 13 --- src/gromacs/gpu_utils/gpu_utils.cpp | 6 +- src/gromacs/gpu_utils/gpu_utils.cu | 22 ++-- src/gromacs/gpu_utils/gpu_utils.h | 10 +- src/gromacs/gpu_utils/gpu_utils_ocl.cpp | 100 ++++++++---------- src/gromacs/gpu_utils/gputraits.cuh | 18 +++- src/gromacs/gpu_utils/gputraits.h | 8 +- src/gromacs/gpu_utils/gputraits_ocl.h | 22 ++++ src/gromacs/gpu_utils/oclutils.h | 33 ------ .../gpu_utils/tests/devicetransfers_ocl.cpp | 6 +- src/gromacs/hardware/detecthardware.cpp | 6 +- src/gromacs/hardware/gpu_hw_info.h | 4 +- src/gromacs/mdlib/forcerec.h | 2 +- src/gromacs/mdrun/runner.cpp | 4 +- src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 24 ++--- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 6 +- src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h | 2 +- src/gromacs/nbnxm/gpu_data_mgmt.h | 4 +- src/gromacs/nbnxm/nbnxm.h | 4 +- src/gromacs/nbnxm/nbnxm_setup.cpp | 2 +- src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp | 8 +- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 39 ++++--- .../nbnxm/opencl/nbnxm_ocl_jit_support.cpp | 7 +- src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h | 2 +- src/gromacs/taskassignment/taskassignment.cpp | 8 +- src/gromacs/taskassignment/taskassignment.h | 6 +- 42 files changed, 227 insertions(+), 239 deletions(-) diff --git a/docs/OpenCLTODOList.txt b/docs/OpenCLTODOList.txt index 34b71b439b..2864e911a1 100644 --- a/docs/OpenCLTODOList.txt +++ b/docs/OpenCLTODOList.txt @@ -32,8 +32,6 @@ TABLE OF CONTENTS - Quite a few error conditions are unhandled, noted with TODOs in several files -- gmx_device_info_t needs struct field documentation - 3. ENHANCEMENTS ============ - Implement OpenCL kernels for Intel GPUs diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index d6cd44b346..475453973e 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -570,7 +570,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, int nthread, PmeRunMode runMode, PmeGpu* pmeGpu, - const gmx_device_info_t* gpuInfo, + const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram, const gmx::MDLogger& /*mdlog*/) { @@ -883,7 +883,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, GMX_THROW(gmx::NotImplementedError(errorString)); } } - pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram); + pme_gpu_reinit(pme.get(), deviceInfo, pmeGpuProgram); pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx); diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h index a471e93e2e..edbe283523 100644 --- a/src/gromacs/ewald/pme.h +++ b/src/gromacs/ewald/pme.h @@ -65,7 +65,7 @@ struct t_inputrec; struct t_nrnb; struct PmeGpu; struct gmx_wallclock_gpu_pme_t; -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_enerdata_t; struct gmx_mtop_t; struct gmx_pme_t; @@ -139,7 +139,7 @@ bool gmx_pme_check_restrictions(int pme_order, * \returns Pointer to newly allocated and initialized PME data. * * \todo We should evolve something like a \c GpuManager that holds \c - * gmx_device_info_t * and \c PmeGpuProgram* and perhaps other + * DeviceInformation* and \c PmeGpuProgram* and perhaps other * related things whose lifetime can/should exceed that of a task (or * perhaps task manager). See Redmine #2522. */ @@ -154,7 +154,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, int nthread, PmeRunMode runMode, PmeGpu* pmeGpu, - const gmx_device_info_t* gpuInfo, + const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram, const gmx::MDLogger& mdlog); diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index e45f07f0fe..9c98402525 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -535,7 +535,7 @@ void pme_gpu_init_internal(PmeGpu* pmeGpu) #elif GMX_GPU == GMX_GPU_OPENCL cl_command_queue_properties queueProperties = pmeGpu->archSpecific->useTiming ? CL_QUEUE_PROFILING_ENABLE : 0; - cl_device_id device_id = pmeGpu->deviceInfo->ocl_gpu_id.ocl_device_id; + cl_device_id device_id = pmeGpu->deviceInfo->oclDeviceId; cl_int clError; pmeGpu->archSpecific->pmeStream = clCreateCommandQueue(pmeGpu->archSpecific->context, device_id, queueProperties, &clError); @@ -819,10 +819,10 @@ static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeG * TODO: this should become PmeGpu::PmeGpu() * * \param[in,out] pme The PME structure. - * \param[in,out] gpuInfo The GPU information structure. + * \param[in,out] deviceInfo The GPU device information structure. * \param[in] pmeGpuProgram The handle to the program/kernel data created outside (e.g. in unit tests/runner) */ -static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram) +static void pme_gpu_init(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram) { pme->gpu = new PmeGpu(); PmeGpu* pmeGpu = pme->gpu; @@ -839,7 +839,7 @@ static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const pme_gpu_set_testing(pmeGpu, false); - pmeGpu->deviceInfo = gpuInfo; + pmeGpu->deviceInfo = deviceInfo; GMX_ASSERT(pmeGpuProgram != nullptr, "GPU kernels must be already compiled"); pmeGpu->programHandle_ = pmeGpuProgram; @@ -930,7 +930,7 @@ void pme_gpu_get_real_grid_sizes(const PmeGpu* pmeGpu, gmx::IVec* gridSize, gmx: } } -void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram) +void pme_gpu_reinit(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram) { GMX_ASSERT(pme != nullptr, "Need valid PME object"); if (pme->runMode == PmeRunMode::CPU) @@ -942,7 +942,7 @@ void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeG if (!pme->gpu) { /* First-time initialization */ - pme_gpu_init(pme, gpuInfo, pmeGpuProgram); + pme_gpu_init(pme, deviceInfo, pmeGpuProgram); } else { diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h index 2cd1f86260..cc7e9d1f34 100644 --- a/src/gromacs/ewald/pme_gpu_internal.h +++ b/src/gromacs/ewald/pme_gpu_internal.h @@ -55,7 +55,7 @@ #include "pme_output.h" class GpuEventSynchronizer; -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_hw_info_t; struct gmx_gpu_opt_t; struct gmx_pme_t; // only used in pme_gpu_reinit @@ -562,12 +562,12 @@ GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUM * (Re-)initializes the PME GPU data at the beginning of the run or on DLB. * * \param[in,out] pme The PME structure. - * \param[in] gpuInfo The GPU information structure. + * \param[in] deviceInfo The GPU device information structure. * \param[in] pmeGpuProgram The PME GPU program data * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs. */ GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t* GPU_FUNC_ARGUMENT(pme), - const gmx_device_info_t* GPU_FUNC_ARGUMENT(gpuInfo), + const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo), const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM; /*! \libinternal \brief diff --git a/src/gromacs/ewald/pme_gpu_program.cpp b/src/gromacs/ewald/pme_gpu_program.cpp index 5227eca063..ccb5494618 100644 --- a/src/gromacs/ewald/pme_gpu_program.cpp +++ b/src/gromacs/ewald/pme_gpu_program.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,14 +51,14 @@ #include "pme_gpu_program_impl.h" -PmeGpuProgram::PmeGpuProgram(const gmx_device_info_t* deviceInfo) : +PmeGpuProgram::PmeGpuProgram(const DeviceInformation* deviceInfo) : impl_(std::make_unique(deviceInfo)) { } PmeGpuProgram::~PmeGpuProgram() = default; -PmeGpuProgramStorage buildPmeGpuProgram(const gmx_device_info_t* deviceInfo) +PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* deviceInfo) { if (!deviceInfo) { diff --git a/src/gromacs/ewald/pme_gpu_program.h b/src/gromacs/ewald/pme_gpu_program.h index e9e084bf1e..610c46f433 100644 --- a/src/gromacs/ewald/pme_gpu_program.h +++ b/src/gromacs/ewald/pme_gpu_program.h @@ -50,12 +50,12 @@ #include struct PmeGpuProgramImpl; -struct gmx_device_info_t; +struct DeviceInformation; class PmeGpuProgram { public: - explicit PmeGpuProgram(const gmx_device_info_t* deviceInfo); + explicit PmeGpuProgram(const DeviceInformation* deviceInfo); ~PmeGpuProgram(); // TODO: design getters for information inside, if needed for PME, and make this private? @@ -69,6 +69,6 @@ using PmeGpuProgramStorage = std::unique_ptr; /*! \brief * Factory function used to build persistent PME GPU program for the device at once. */ -PmeGpuProgramStorage buildPmeGpuProgram(const gmx_device_info_t* /*deviceInfo*/); +PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* /*deviceInfo*/); #endif diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cpp b/src/gromacs/ewald/pme_gpu_program_impl.cpp index 078f97ee4f..d508499130 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl.cpp +++ b/src/gromacs/ewald/pme_gpu_program_impl.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -45,7 +45,7 @@ #include "pme_gpu_program_impl.h" -PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t* /*unused*/) : +PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* /* deviceInfo */) : warpSize(0), spreadWorkGroupSize(0), gatherWorkGroupSize(0), diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cu b/src/gromacs/ewald/pme_gpu_program_impl.cu index f34f7a2741..019bc3f2aa 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl.cu +++ b/src/gromacs/ewald/pme_gpu_program_impl.cu @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -102,7 +102,7 @@ extern template void pme_gather_kernel(const PmeGpuCudaKernelParams); extern template void pme_gather_kernel(const PmeGpuCudaKernelParams); -PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t*) +PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* /* deviceInfo */) { // kernel parameters warpSize = warp_size; diff --git a/src/gromacs/ewald/pme_gpu_program_impl.h b/src/gromacs/ewald/pme_gpu_program_impl.h index 8867ea0bdc..f42179598e 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl.h +++ b/src/gromacs/ewald/pme_gpu_program_impl.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -47,7 +47,7 @@ #include "gromacs/gpu_utils/gputraits.h" #include "gromacs/utility/classhelpers.h" -struct gmx_device_info_t; +struct DeviceInformation; /*! \internal * \brief @@ -150,13 +150,13 @@ struct PmeGpuProgramImpl PmeGpuProgramImpl() = delete; //! Constructor for the given device - explicit PmeGpuProgramImpl(const gmx_device_info_t* deviceInfo); + explicit PmeGpuProgramImpl(const DeviceInformation* deviceInfo); ~PmeGpuProgramImpl(); GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl); private: // Compiles kernels, if supported. Called by the constructor. - void compileKernels(const gmx_device_info_t* deviceInfo); + void compileKernels(const DeviceInformation* deviceInfo); }; #endif diff --git a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp index d17a76256e..ae319b2c28 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp +++ b/src/gromacs/ewald/pme_gpu_program_impl_ocl.cpp @@ -53,11 +53,11 @@ #include "pme_gpu_types_host.h" #include "pme_grid.h" -PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t* deviceInfo) +PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* deviceInfo) { // Context creation (which should happen outside of this class: #2522) - cl_platform_id platformId = deviceInfo->ocl_gpu_id.ocl_platform_id; - cl_device_id deviceId = deviceInfo->ocl_gpu_id.ocl_device_id; + cl_platform_id platformId = deviceInfo->oclPlatformId; + cl_device_id deviceId = deviceInfo->oclDeviceId; cl_context_properties contextProperties[3]; contextProperties[0] = CL_CONTEXT_PLATFORM; contextProperties[1] = reinterpret_cast(platformId); @@ -110,11 +110,11 @@ PmeGpuProgramImpl::~PmeGpuProgramImpl() * smaller than the minimum order^2 required in spread/gather ATM which * we need to check for. */ -static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const gmx_device_info_t* deviceInfo) +static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const DeviceInformation* deviceInfo) { if (deviceInfo->deviceVendor == DeviceVendor::Intel) { - size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->ocl_gpu_id.ocl_device_id); + size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->oclDeviceId); if (kernelWarpSize < c_pmeSpreadGatherMinWarpSize) { @@ -128,7 +128,7 @@ static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, cons } } -void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo) +void PmeGpuProgramImpl::compileKernels(const DeviceInformation* deviceInfo) { // We might consider storing program as a member variable if it's needed later cl_program program = nullptr; @@ -165,8 +165,8 @@ void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo) { /* TODO when we have a proper MPI-aware logging module, the log output here should be written there */ - program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", commonDefines, - context, deviceInfo->ocl_gpu_id.ocl_device_id, + program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", + commonDefines, context, deviceInfo->oclDeviceId, deviceInfo->deviceVendor); } catch (gmx::GromacsException& e) diff --git a/src/gromacs/ewald/pme_gpu_types_host.h b/src/gromacs/ewald/pme_gpu_types_host.h index 45745c9a19..acdf24bf6d 100644 --- a/src/gromacs/ewald/pme_gpu_types_host.h +++ b/src/gromacs/ewald/pme_gpu_types_host.h @@ -87,7 +87,7 @@ typedef PmeGpuKernelParamsBase PmeGpuKernelParams; typedef int PmeGpuKernelParams; #endif -struct gmx_device_info_t; +struct DeviceInformation; /*! \internal \brief * The PME GPU structure for all the data copied directly from the CPU PME structure. @@ -168,7 +168,7 @@ struct PmeGpu int nAtomsAlloc; /*! \brief A pointer to the device used during the execution. */ - const gmx_device_info_t* deviceInfo; + const DeviceInformation* deviceInfo; /*! \brief Kernel scheduling grid width limit in X - derived from deviceinfo compute capability in CUDA. * Declared as very large int to make it useful in computations with type promotion, to avoid overflows. diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp index 198c77cadd..891e7bb048 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.cpp +++ b/src/gromacs/ewald/tests/pmetestcommon.cpp @@ -104,7 +104,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder) //! PME initialization PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, const CodePath mode, - const gmx_device_info_t* gpuInfo, + const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram, const Matrix3x3& box, const real ewaldCoeff_q, @@ -116,7 +116,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, NumPmeDomains numPmeDomains = { 1, 1 }; gmx_pme_t* pmeDataRaw = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true, ewaldCoeff_q, - ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger); + ewaldCoeff_lj, 1, runMode, nullptr, deviceInfo, pmeGpuProgram, dummyLogger); PmeSafePointer pme(pmeDataRaw); // taking ownership // TODO get rid of this with proper matrix type @@ -149,13 +149,13 @@ PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, //! Simple PME initialization based on input, no atom data PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec, const CodePath mode, - const gmx_device_info_t* gpuInfo, + const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram, const Matrix3x3& box, real ewaldCoeff_q, real ewaldCoeff_lj) { - return pmeInitWrapper(inputRec, mode, gpuInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj); + return pmeInitWrapper(inputRec, mode, deviceInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj); // hiding the fact that PME actually needs to know the number of atoms in advance } diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h index 870b9f7aa6..d6377bd455 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.h +++ b/src/gromacs/ewald/tests/pmetestcommon.h @@ -121,7 +121,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder); //! PME initialization PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, CodePath mode, - const gmx_device_info_t* gpuInfo, + const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram, const Matrix3x3& box, real ewaldCoeff_q = 1.0F, @@ -129,7 +129,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, //! Simple PME initialization (no atom data) PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec, CodePath mode = CodePath::CPU, - const gmx_device_info_t* gpuInfo = nullptr, + const DeviceInformation* deviceInfo = nullptr, const PmeGpuProgram* pmeGpuProgram = nullptr, const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } }, real ewaldCoeff_q = 0.0F, diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp index 6e0a888402..aab3099aa1 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp +++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -119,7 +119,7 @@ void PmeTestEnvironment::SetUp() // Constructing contexts for all compatible GPUs - will be empty on non-GPU builds for (int gpuIndex : getCompatibleGpus(hardwareInfo_->gpu_info)) { - const gmx_device_info_t* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex); + const DeviceInformation* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex); init_gpu(deviceInfo); char stmp[200] = {}; diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h index e7d49c5a79..6ec22930d4 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.h +++ b/src/gromacs/ewald/tests/testhardwarecontexts.h @@ -78,7 +78,7 @@ struct TestHardwareContext //! Readable description std::string description_; //! Device information pointer - const gmx_device_info_t* deviceInfo_; + const DeviceInformation* deviceInfo_; //! Persistent compiled GPU kernels for PME. PmeGpuProgramStorage program_; @@ -88,11 +88,11 @@ public: //! Returns a human-readable context description line std::string getDescription() const { return description_; } //! Returns the device info pointer - const gmx_device_info_t* getDeviceInfo() const { return deviceInfo_; } + const DeviceInformation* getDeviceInfo() const { return deviceInfo_; } //! Returns the persistent PME GPU kernels const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); } //! Constructs the context - TestHardwareContext(CodePath codePath, const char* description, const gmx_device_info_t* deviceInfo) : + TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation* deviceInfo) : codePath_(codePath), description_(description), deviceInfo_(deviceInfo), diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh index 618808d03b..71d9b7dac4 100644 --- a/src/gromacs/gpu_utils/cudautils.cuh +++ b/src/gromacs/gpu_utils/cudautils.cuh @@ -142,19 +142,6 @@ enum class GpuApiCallBehavior; #endif /* CHECK_CUDA_ERRORS */ -/*! \brief CUDA device information. - * - * The CUDA device information is queried and set at detection and contains - * both information about the device/hardware returned by the runtime as well - * as additional data like support status. - */ -struct gmx_device_info_t -{ - int id; /* id of the CUDA device */ - cudaDeviceProp prop; /* CUDA device properties */ - int stat; /* result of the device check */ -}; - /*! Launches synchronous or asynchronous device to host memory copy. * * The copy is launched in stream s or if not specified, in stream 0. diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp index 51622063a7..98b701ac62 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cpp +++ b/src/gromacs/gpu_utils/gpu_utils.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -78,7 +78,7 @@ int gpu_info_get_stat(const gmx_gpu_info_t& /*unused*/, int /*unused*/) void free_gpu_info(const gmx_gpu_info_t* gpu_info) { - sfree(static_cast(gpu_info->gpu_dev)); // circumvent is_pod check in sfree + sfree(static_cast(gpu_info->deviceInfo)); // circumvent is_pod check in sfree } std::vector getCompatibleGpus(const gmx_gpu_info_t& gpu_info) @@ -88,7 +88,7 @@ std::vector getCompatibleGpus(const gmx_gpu_info_t& gpu_info) compatibleGpus.reserve(gpu_info.n_dev); for (int i = 0; i < gpu_info.n_dev; i++) { - assert(gpu_info.gpu_dev); + assert(gpu_info.deviceInfo); if (gpu_info_get_stat(gpu_info, i) == egpuCompatible) { compatibleGpus.push_back(i); diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu index b7e5e0f77e..16215c1fe6 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cu +++ b/src/gromacs/gpu_utils/gpu_utils.cu @@ -2,7 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2010-2018, The GROMACS development team. - * Copyright (c) 2019, by the GROMACS development team, led by + * Copyright (c) 2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -222,7 +222,7 @@ static int do_sanity_checks(int dev_id, const cudaDeviceProp& dev_prop) return 0; } -void init_gpu(const gmx_device_info_t* deviceInfo) +void init_gpu(const DeviceInformation* deviceInfo) { cudaError_t stat; @@ -241,7 +241,7 @@ void init_gpu(const gmx_device_info_t* deviceInfo) } } -void free_gpu(const gmx_device_info_t* deviceInfo) +void free_gpu(const DeviceInformation* deviceInfo) { // One should only attempt to clear the device context when // it has been used, but currently the only way to know that a GPU @@ -268,13 +268,13 @@ void free_gpu(const gmx_device_info_t* deviceInfo) } } -gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId) +DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId) { if (deviceId < 0 || deviceId >= gpu_info.n_dev) { gmx_incons("Invalid GPU deviceId requested"); } - return &gpu_info.gpu_dev[deviceId]; + return &gpu_info.deviceInfo[deviceId]; } /*! \brief Returns true if the gpu characterized by the device properties is @@ -396,7 +396,7 @@ void findGpus(gmx_gpu_info_t* gpu_info) // We expect to start device support/sanity checks with a clean runtime error state gmx::ensureNoPendingCudaError(""); - gmx_device_info_t* devs; + DeviceInformation* devs; snew(devs, ndev); for (int i = 0; i < ndev; i++) { @@ -450,8 +450,8 @@ void findGpus(gmx_gpu_info_t* gpu_info) cudaGetErrorName(stat), cudaGetErrorString(stat)) .c_str()); - gpu_info->n_dev = ndev; - gpu_info->gpu_dev = devs; + gpu_info->n_dev = ndev; + gpu_info->deviceInfo = devs; } void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index) @@ -463,7 +463,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind return; } - gmx_device_info_t* dinfo = &gpu_info.gpu_dev[index]; + DeviceInformation* dinfo = &gpu_info.deviceInfo[index]; bool bGpuExists = (dinfo->stat != egpuNonexistent && dinfo->stat != egpuInsane); @@ -489,7 +489,7 @@ int get_current_cuda_gpu_device_id(void) size_t sizeof_gpu_dev_info(void) { - return sizeof(gmx_device_info_t); + return sizeof(DeviceInformation); } void startGpuProfiler(void) @@ -537,7 +537,7 @@ void resetGpuProfiler(void) int gpu_info_get_stat(const gmx_gpu_info_t& info, int index) { - return info.gpu_dev[index].stat; + return info.deviceInfo[index].stat; } /*! \brief Check status returned from peer access CUDA call, and error out or warn appropriately diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h index 24590f589d..f935b0e513 100644 --- a/src/gromacs/gpu_utils/gpu_utils.h +++ b/src/gromacs/gpu_utils/gpu_utils.h @@ -54,7 +54,7 @@ #include "gromacs/gpu_utils/gpu_macros.h" #include "gromacs/utility/basedefinitions.h" -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_gpu_info_t; namespace gmx @@ -108,7 +108,7 @@ bool isGpuDetectionFunctional(std::string* GPU_FUNC_ARGUMENT(errorMessage)) * Will detect every GPU supported by the device driver in use. * Must only be called if canPerformGpuDetection() has returned true. * This routine also checks for the compatibility of each and fill the - * gpu_info->gpu_dev array with the required information on each the + * gpu_info->deviceInfo array with the required information on each the * device: ID, device properties, status. * * Note that this function leaves the GPU runtime API error state clean; @@ -158,7 +158,7 @@ void free_gpu_info(const gmx_gpu_info_t* gpu_info); * initialization. */ GPU_FUNC_QUALIFIER -void init_gpu(const gmx_device_info_t* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM; +void init_gpu(const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM; /*! \brief Frees up the CUDA GPU used by the active context at the time of calling. * @@ -176,7 +176,7 @@ void init_gpu(const gmx_device_info_t* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_T * \returns true if no error occurs during the freeing. */ CUDA_FUNC_QUALIFIER -void free_gpu(const gmx_device_info_t* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM; +void free_gpu(const DeviceInformation* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM; /*! \brief Return a pointer to the device info for \c deviceId * @@ -186,7 +186,7 @@ void free_gpu(const gmx_device_info_t* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC * \returns Pointer to the device info for \c deviceId. */ GPU_FUNC_QUALIFIER -gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info), +DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info), int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(nullptr); /*! \brief Returns the device ID of the CUDA GPU currently in use. diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp index 8770e6862d..d4e9daddd0 100644 --- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp +++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp @@ -129,26 +129,26 @@ static std::string makeOpenClInternalErrorString(const char* message, cl_int sta } /*! - * \brief Checks that device \c devInfo is sane (ie can run a kernel). + * \brief Checks that device \c deviceInfo is sane (ie can run a kernel). * * Compiles and runs a dummy kernel to determine whether the given * OpenCL device functions properly. * * - * \param[in] devInfo The device info pointer. + * \param[in] deviceInfo The device info pointer. * \param[out] errorMessage An error message related to a failing OpenCL API call. * \throws std::bad_alloc When out of memory. * \returns Whether the device passed sanity checks */ -static bool isDeviceSane(const gmx_device_info_t* devInfo, std::string* errorMessage) +static bool isDeviceSane(const DeviceInformation* deviceInfo, std::string* errorMessage) { cl_context_properties properties[] = { - CL_CONTEXT_PLATFORM, reinterpret_cast(devInfo->ocl_gpu_id.ocl_platform_id), 0 + CL_CONTEXT_PLATFORM, reinterpret_cast(deviceInfo->oclPlatformId), 0 }; // uncrustify spacing cl_int status; - auto deviceId = devInfo->ocl_gpu_id.ocl_device_id; + auto deviceId = deviceInfo->oclDeviceId; ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status)); if (status != CL_SUCCESS) { @@ -198,15 +198,15 @@ static bool isDeviceSane(const gmx_device_info_t* devInfo, std::string* errorMes } /*! - * \brief Checks that device \c devInfo is compatible with GROMACS. + * \brief Checks that device \c deviceInfo is compatible with GROMACS. * * Vendor and OpenCL version support checks are executed an the result * of these returned. * - * \param[in] devInfo The device info pointer. - * \returns The result of the compatibility checks. + * \param[in] deviceInfo The device info pointer. + * \returns The result of the compatibility checks. */ -static int isDeviceSupported(const gmx_device_info_t* devInfo) +static int isDeviceSupported(const DeviceInformation* deviceInfo) { if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr) { @@ -222,7 +222,7 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo) // the device which has the following format: // OpenCL unsigned int deviceVersionMinor, deviceVersionMajor; - const int valuesScanned = std::sscanf(devInfo->device_version, "OpenCL %u.%u", + const int valuesScanned = std::sscanf(deviceInfo->device_version, "OpenCL %u.%u", &deviceVersionMajor, &deviceVersionMinor); const bool versionLargeEnough = ((valuesScanned == 2) @@ -234,7 +234,7 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo) } /* Only AMD, Intel, and NVIDIA GPUs are supported for now */ - switch (devInfo->deviceVendor) + switch (deviceInfo->deviceVendor) { case DeviceVendor::Nvidia: return egpuCompatible; case DeviceVendor::Amd: @@ -258,7 +258,7 @@ static int isDeviceSupported(const gmx_device_info_t* devInfo) * \returns An e_gpu_detect_res_t to indicate how the GPU coped with * the sanity and compatibility check. */ -static int checkGpu(size_t deviceId, const gmx_device_info_t* deviceInfo) +static int checkGpu(size_t deviceId, const DeviceInformation* deviceInfo) { int supportStatus = isDeviceSupported(deviceInfo); @@ -393,7 +393,7 @@ void findGpus(gmx_gpu_info_t* gpu_info) break; } - snew(gpu_info->gpu_dev, gpu_info->n_dev); + snew(gpu_info->deviceInfo, gpu_info->n_dev); { int device_index; @@ -421,47 +421,47 @@ void findGpus(gmx_gpu_info_t* gpu_info) for (unsigned int j = 0; j < ocl_device_count; j++) { - gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i]; - gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j]; + gpu_info->deviceInfo[device_index].oclPlatformId = ocl_platform_ids[i]; + gpu_info->deviceInfo[device_index].oclDeviceId = ocl_device_ids[j]; - gpu_info->gpu_dev[device_index].device_name[0] = 0; + gpu_info->deviceInfo[device_index].device_name[0] = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, - sizeof(gpu_info->gpu_dev[device_index].device_name), - gpu_info->gpu_dev[device_index].device_name, nullptr); + sizeof(gpu_info->deviceInfo[device_index].device_name), + gpu_info->deviceInfo[device_index].device_name, nullptr); - gpu_info->gpu_dev[device_index].device_version[0] = 0; + gpu_info->deviceInfo[device_index].device_version[0] = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, - sizeof(gpu_info->gpu_dev[device_index].device_version), - gpu_info->gpu_dev[device_index].device_version, nullptr); + sizeof(gpu_info->deviceInfo[device_index].device_version), + gpu_info->deviceInfo[device_index].device_version, nullptr); - gpu_info->gpu_dev[device_index].vendorName[0] = 0; + gpu_info->deviceInfo[device_index].vendorName[0] = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, - sizeof(gpu_info->gpu_dev[device_index].vendorName), - gpu_info->gpu_dev[device_index].vendorName, nullptr); + sizeof(gpu_info->deviceInfo[device_index].vendorName), + gpu_info->deviceInfo[device_index].vendorName, nullptr); - gpu_info->gpu_dev[device_index].compute_units = 0; + gpu_info->deviceInfo[device_index].compute_units = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(gpu_info->gpu_dev[device_index].compute_units), - &(gpu_info->gpu_dev[device_index].compute_units), nullptr); + sizeof(gpu_info->deviceInfo[device_index].compute_units), + &(gpu_info->deviceInfo[device_index].compute_units), nullptr); - gpu_info->gpu_dev[device_index].adress_bits = 0; + gpu_info->deviceInfo[device_index].adress_bits = 0; clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, - sizeof(gpu_info->gpu_dev[device_index].adress_bits), - &(gpu_info->gpu_dev[device_index].adress_bits), nullptr); + sizeof(gpu_info->deviceInfo[device_index].adress_bits), + &(gpu_info->deviceInfo[device_index].adress_bits), nullptr); - gpu_info->gpu_dev[device_index].deviceVendor = - getDeviceVendor(gpu_info->gpu_dev[device_index].vendorName); + gpu_info->deviceInfo[device_index].deviceVendor = + getDeviceVendor(gpu_info->deviceInfo[device_index].vendorName); clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t), - &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr); + &gpu_info->deviceInfo[device_index].maxWorkItemSizes, nullptr); clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), - &gpu_info->gpu_dev[device_index].maxWorkGroupSize, nullptr); + &gpu_info->deviceInfo[device_index].maxWorkGroupSize, nullptr); - gpu_info->gpu_dev[device_index].stat = - gmx::checkGpu(device_index, gpu_info->gpu_dev + device_index); + gpu_info->deviceInfo[device_index].stat = + gmx::checkGpu(device_index, gpu_info->deviceInfo + device_index); - if (egpuCompatible == gpu_info->gpu_dev[device_index].stat) + if (egpuCompatible == gpu_info->deviceInfo[device_index].stat) { gpu_info->n_dev_compatible++; } @@ -479,16 +479,13 @@ void findGpus(gmx_gpu_info_t* gpu_info) int last = -1; for (int i = 0; i < gpu_info->n_dev; i++) { - if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Amd) + if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Amd) { last++; if (last < i) { - gmx_device_info_t ocl_gpu_info; - ocl_gpu_info = gpu_info->gpu_dev[i]; - gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last]; - gpu_info->gpu_dev[last] = ocl_gpu_info; + std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]); } } } @@ -498,16 +495,13 @@ void findGpus(gmx_gpu_info_t* gpu_info) { for (int i = 0; i < gpu_info->n_dev; i++) { - if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Nvidia) + if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Nvidia) { last++; if (last < i) { - gmx_device_info_t ocl_gpu_info; - ocl_gpu_info = gpu_info->gpu_dev[i]; - gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last]; - gpu_info->gpu_dev[last] = ocl_gpu_info; + std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]); } } } @@ -532,7 +526,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind return; } - gmx_device_info_t* dinfo = &gpu_info.gpu_dev[index]; + DeviceInformation* dinfo = &gpu_info.deviceInfo[index]; bool bGpuExists = (dinfo->stat != egpuNonexistent && dinfo->stat != egpuInsane); @@ -548,7 +542,7 @@ void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int ind } -void init_gpu(const gmx_device_info_t* deviceInfo) +void init_gpu(const DeviceInformation* deviceInfo) { assert(deviceInfo); @@ -570,21 +564,21 @@ void init_gpu(const gmx_device_info_t* deviceInfo) } } -gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId) +DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId) { if (deviceId < 0 || deviceId >= gpu_info.n_dev) { gmx_incons("Invalid GPU deviceId requested"); } - return &gpu_info.gpu_dev[deviceId]; + return &gpu_info.deviceInfo[deviceId]; } size_t sizeof_gpu_dev_info() { - return sizeof(gmx_device_info_t); + return sizeof(DeviceInformation); } int gpu_info_get_stat(const gmx_gpu_info_t& info, int index) { - return info.gpu_dev[index].stat; + return info.deviceInfo[index].stat; } diff --git a/src/gromacs/gpu_utils/gputraits.cuh b/src/gromacs/gpu_utils/gputraits.cuh index 7ac35d5329..8a4936dabc 100644 --- a/src/gromacs/gpu_utils/gputraits.cuh +++ b/src/gromacs/gpu_utils/gputraits.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -43,6 +43,22 @@ * \ingroup module_gpu_utils */ +/*! \brief CUDA device information. + * + * The CUDA device information is queried and set at detection and contains + * both information about the device/hardware returned by the runtime as well + * as additional data like support status. + */ +struct DeviceInformation +{ + //! ID of the CUDA device. + int id; + //! CUDA device properties. + cudaDeviceProp prop; + //! Result of the device check. + int stat; +}; + //! \brief GPU command stream using CommandStream = cudaStream_t; //! \brief Single GPU call timing event - meaningless in CUDA diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h index 9b91c0ec18..0229ea443c 100644 --- a/src/gromacs/gpu_utils/gputraits.h +++ b/src/gromacs/gpu_utils/gputraits.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -55,6 +55,12 @@ #else +//! Stub for device information. +struct DeviceInformation +{ + // No member needed +}; + //! \brief GPU command stream using CommandStream = void*; //! \brief Single GPU call timing event diff --git a/src/gromacs/gpu_utils/gputraits_ocl.h b/src/gromacs/gpu_utils/gputraits_ocl.h index c4a421f3a9..00d9cba90d 100644 --- a/src/gromacs/gpu_utils/gputraits_ocl.h +++ b/src/gromacs/gpu_utils/gputraits_ocl.h @@ -55,6 +55,28 @@ enum class DeviceVendor : int Count = 4 }; +/*! \internal + * \brief OpenCL device information. + * + * The OpenCL device information is queried and set at detection and contains + * both information about the device/hardware returned by the runtime as well + * as additional data like support status. + */ +struct DeviceInformation +{ + cl_platform_id oclPlatformId; //!< OpenCL Platform ID. + cl_device_id oclDeviceId; //!< OpenCL Device ID. + char device_name[256]; //!< Device name. + char device_version[256]; //!< Device version. + char vendorName[256]; //!< Device vendor name. + int compute_units; //!< Number of compute units. + int adress_bits; //!< Number of address bits the device is capable of. + int stat; //!< Device status takes values of e_gpu_detect_res_t. + DeviceVendor deviceVendor; //!< Device vendor. + size_t maxWorkItemSizes[3]; //!< Workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES). + size_t maxWorkGroupSize; //!< Workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE). +}; + //! \brief GPU command stream using CommandStream = cl_command_queue; //! \brief Single GPU call timing event diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h index 6ad4de9d48..91b6059d27 100644 --- a/src/gromacs/gpu_utils/oclutils.h +++ b/src/gromacs/gpu_utils/oclutils.h @@ -51,39 +51,6 @@ enum class GpuApiCallBehavior; -/*! \internal - * \brief OpenCL GPU device identificator - * - * An OpenCL device is identified by its ID. - * The platform ID is also included for caching reasons. - */ -typedef struct -{ - cl_platform_id ocl_platform_id; /**< Platform ID */ - cl_device_id ocl_device_id; /**< Device ID */ -} ocl_gpu_id_t; - -/*! \internal - * \brief OpenCL device information. - * - * The OpenCL device information is queried and set at detection and contains - * both information about the device/hardware returned by the runtime as well - * as additional data like support status. - */ -struct gmx_device_info_t -{ - ocl_gpu_id_t ocl_gpu_id; /**< device ID assigned at detection */ - char device_name[256]; /**< device name */ - char device_version[256]; /**< device version */ - char vendorName[256]; /**< device vendor */ - int compute_units; /**< number of compute units */ - int adress_bits; /**< number of adress bits the device is capable of */ - int stat; /**< device status takes values of e_gpu_detect_res_t */ - DeviceVendor deviceVendor; /**< device vendor */ - size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */ - size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */ -}; - /*! \internal * \brief OpenCL GPU runtime data * diff --git a/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp b/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp index ed38085954..ffe60c00e9 100644 --- a/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp +++ b/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -84,11 +84,11 @@ void doDeviceTransfers(const gmx_gpu_info_t& gpuInfo, ArrayRef input const auto* device = getDeviceInfo(gpuInfo, compatibleGpus[0]); cl_context_properties properties[] = { - CL_CONTEXT_PLATFORM, reinterpret_cast(device->ocl_gpu_id.ocl_platform_id), 0 + CL_CONTEXT_PLATFORM, reinterpret_cast(device->oclPlatformId), 0 }; // Give uncrustify more space - auto deviceId = device->ocl_gpu_id.ocl_device_id; + auto deviceId = device->oclDeviceId; auto context = clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status); throwUponFailure(status, "creating context"); auto commandQueue = clCreateCommandQueue(context, deviceId, 0, &status); diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp index 23887cad4e..969399b3ec 100644 --- a/src/gromacs/hardware/detecthardware.cpp +++ b/src/gromacs/hardware/detecthardware.cpp @@ -171,9 +171,9 @@ static void gmx_detect_gpus(const gmx::MDLogger& mdlog, if (!isMasterRankOfPhysicalNode) { - hardwareInfo->gpu_info.gpu_dev = (struct gmx_device_info_t*)malloc(dev_size); + hardwareInfo->gpu_info.deviceInfo = (struct DeviceInformation*)malloc(dev_size); } - MPI_Bcast(hardwareInfo->gpu_info.gpu_dev, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_); + MPI_Bcast(hardwareInfo->gpu_info.deviceInfo, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_); MPI_Bcast(&hardwareInfo->gpu_info.n_dev_compatible, 1, MPI_INT, 0, physicalNodeComm.comm_); } } @@ -454,7 +454,7 @@ gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNod // Detect GPUs hardwareInfo->gpu_info.n_dev = 0; hardwareInfo->gpu_info.n_dev_compatible = 0; - hardwareInfo->gpu_info.gpu_dev = nullptr; + hardwareInfo->gpu_info.deviceInfo = nullptr; gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo)); gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo)); diff --git a/src/gromacs/hardware/gpu_hw_info.h b/src/gromacs/hardware/gpu_hw_info.h index 7c57413dc1..98f87760cd 100644 --- a/src/gromacs/hardware/gpu_hw_info.h +++ b/src/gromacs/hardware/gpu_hw_info.h @@ -38,7 +38,7 @@ #include "gromacs/utility/basedefinitions.h" -struct gmx_device_info_t; +struct DeviceInformation; /*! \brief Possible results of the GPU detection/check. * @@ -73,7 +73,7 @@ struct gmx_gpu_info_t //! Total number of GPU devices detected on this physical node int n_dev; //! Information about each GPU device detected on this physical node - gmx_device_info_t* gpu_dev; + DeviceInformation* deviceInfo; //! Number of GPU devices detected on this physical node that are compatible. int n_dev_compatible; }; diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h index 53fc326e30..07d0b05d7c 100644 --- a/src/gromacs/mdlib/forcerec.h +++ b/src/gromacs/mdlib/forcerec.h @@ -45,7 +45,7 @@ #include "gromacs/timing/wallcycle.h" #include "gromacs/utility/arrayref.h" -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_hw_info_t; struct t_commrec; struct t_fcdata; diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 364d0bd9c2..3debb2046e 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1146,8 +1146,8 @@ int Mdrunner::mdrunner() EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME)); // Get the device handles for the modules, nullptr when no task is assigned. - gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr); - gmx_device_info_t* pmeDeviceInfo = gpuTaskAssignments.initPmeDevice(); + DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr); + DeviceInformation* pmeDeviceInfo = gpuTaskAssignments.initPmeDevice(); // TODO Initialize GPU streams here. diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 359e6c5905..94e99879db 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -125,16 +125,16 @@ typedef void (*nbnxn_cu_kfunc_ptr_t)(const cu_atomdata_t, const cu_nbparam_t, co /*********************************/ /*! Returns the number of blocks to be used for the nonbonded GPU kernel. */ -static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t* dinfo) +static inline int calc_nb_kernel_nblock(int nwork_units, const DeviceInformation* deviceInfo) { int max_grid_x_size; - assert(dinfo); + assert(deviceInfo); /* CUDA does not accept grid dimension of 0 (which can happen e.g. with an empty domain) and that case should be handled before this point. */ assert(nwork_units > 0); - max_grid_x_size = dinfo->prop.maxGridSize[0]; + max_grid_x_size = deviceInfo->prop.maxGridSize[0]; /* do we exceed the grid x dimension limit? */ if (nwork_units > max_grid_x_size) @@ -284,7 +284,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int e int evdwtype, bool bDoEne, bool bDoPrune, - const gmx_device_info_t gmx_unused* devInfo) + const DeviceInformation gmx_unused* deviceInfo) { nbnxn_cu_kfunc_ptr_t res; @@ -295,7 +295,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int e /* assert assumptions made by the kernels */ GMX_ASSERT(c_nbnxnGpuClusterSize * c_nbnxnGpuClusterSize / c_nbnxnGpuClusterpairSplit - == devInfo->prop.warpSize, + == deviceInfo->prop.warpSize, "The CUDA kernels require the " "cluster_size_i*cluster_size_j/nbnxn_gpu_clusterpair_split to match the warp size " "of the architecture targeted."); @@ -328,12 +328,12 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int e /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */ static inline int calc_shmem_required_nonbonded(const int num_threads_z, - const gmx_device_info_t gmx_unused* dinfo, + const DeviceInformation gmx_unused* deviceInfo, const cu_nbparam_t* nbp) { int shmem; - assert(dinfo); + assert(deviceInfo); /* size of shmem (force-buffers/xq/atom type preloading) */ /* NOTE: with the default kernel on sm3.0 we need shmem only for pre-loading */ @@ -530,11 +530,11 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In * - The 1D block-grid contains as many blocks as super-clusters. */ int num_threads_z = 1; - if (nb->dev_info->prop.major == 3 && nb->dev_info->prop.minor == 7) + if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7) { num_threads_z = 2; } - int nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info); + int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo); KernelLaunchConfig config; @@ -542,7 +542,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In config.blockSize[1] = c_clSize; config.blockSize[2] = num_threads_z; config.gridSize[0] = nblock; - config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->dev_info, nbp); + config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp); config.stream = stream; if (debug) @@ -559,7 +559,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr; const auto kernel = select_nbnxn_kernel( nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy, - (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->dev_info); + (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo); const auto kernelArgs = prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial); launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs); @@ -660,7 +660,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c * - The 1D block-grid contains as many blocks as super-clusters. */ int num_threads_z = c_cudaPruneKernelJ4Concurrency; - int nblock = calc_nb_kernel_nblock(numSciInPart, nb->dev_info); + int nblock = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo); KernelLaunchConfig config; config.blockSize[0] = c_clSize; config.blockSize[1] = c_clSize; diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index b2fc758417..11e490551a 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -412,7 +412,7 @@ static void cuda_init_const(NbnxmGpu* nb, nbnxn_cuda_clear_e_fshift(nb); } -NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, +NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo, const interaction_const_t* ic, const PairlistParams& listParams, const nbnxn_atomdata_t* nbat, @@ -443,7 +443,7 @@ NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, init_plist(nb->plist[InteractionLocality::Local]); /* set device info, just point it to the right GPU among the detected ones */ - nb->dev_info = deviceInfo; + nb->deviceInfo = deviceInfo; /* local/non-local GPU streams */ stat = cudaStreamCreate(&nb->stream[InteractionLocality::Local]); @@ -812,7 +812,7 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv) int gpu_min_ci_balanced(NbnxmGpu* nb) { - return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0; + return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->prop.multiProcessorCount : 0; } gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h index 863e6a1efc..da607e4429 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h @@ -267,7 +267,7 @@ class GpuEventSynchronizer; struct NbnxmGpu { /*! \brief CUDA device information */ - const gmx_device_info_t* dev_info = nullptr; + const DeviceInformation* deviceInfo = nullptr; /*! \brief true if doing both local/non-local NB work on GPU */ bool bUseTwoStreams = false; /*! \brief atom data */ diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h index 9d7502cdbd..aef5b44cfb 100644 --- a/src/gromacs/nbnxm/gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/gpu_data_mgmt.h @@ -52,7 +52,7 @@ struct NbnxmGpu; struct gmx_gpu_info_t; -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_wallclock_gpu_nbnxn_t; struct nbnxn_atomdata_t; struct NbnxnPairlistGpu; @@ -63,7 +63,7 @@ namespace Nbnxm /** Initializes the data structures related to GPU nonbonded calculations. */ GPU_FUNC_QUALIFIER -NbnxmGpu* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo, +NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo, const interaction_const_t gmx_unused* ic, const PairlistParams gmx_unused& listParams, const nbnxn_atomdata_t gmx_unused* nbat, diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index 65b3c1a1ce..50fed0fe5b 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -120,7 +120,7 @@ #include "gromacs/utility/enumerationhelpers.h" #include "gromacs/utility/real.h" -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_domdec_zones_t; struct gmx_enerdata_t; struct gmx_hw_info_t; @@ -408,7 +408,7 @@ std::unique_ptr init_nb_verlet(const gmx::MDLogger& mdlo const t_forcerec* fr, const t_commrec* cr, const gmx_hw_info_t& hardwareInfo, - const gmx_device_info_t* deviceInfo, + const DeviceInformation* deviceInfo, const gmx_mtop_t* mtop, matrix box, gmx_wallcycle* wcycle); diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp index c45d54decf..9cadba3743 100644 --- a/src/gromacs/nbnxm/nbnxm_setup.cpp +++ b/src/gromacs/nbnxm/nbnxm_setup.cpp @@ -362,7 +362,7 @@ std::unique_ptr init_nb_verlet(const gmx::MDLogger& mdlo const t_forcerec* fr, const t_commrec* cr, const gmx_hw_info_t& hardwareInfo, - const gmx_device_info_t* deviceInfo, + const DeviceInformation* deviceInfo, const gmx_mtop_t* mtop, matrix box, gmx_wallcycle* wcycle) diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index d7511d3bd1..f0c88b10ba 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -103,7 +103,7 @@ static constexpr int c_clSize = c_nbnxnGpuClusterSize; */ static inline void validate_global_work_size(const KernelLaunchConfig& config, int work_dim, - const gmx_device_info_t* dinfo) + const DeviceInformation* dinfo) { cl_uint device_size_t_size_bits; cl_uint host_size_t_size_bits; @@ -639,7 +639,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb config.blockSize[1] = c_clSize; config.gridSize[0] = plist->nsci; - validate_global_work_size(config, 3, nb->dev_info); + validate_global_work_size(config, 3, nb->deviceInfo); if (debug) { @@ -788,7 +788,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c * and j-cluster concurrency, in x, y, and z, respectively. * - The 1D block-grid contains as many blocks as super-clusters. */ - int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->deviceVendor); + int num_threads_z = getOclPruneKernelJ4Concurrency(nb->deviceInfo->deviceVendor); /* kernel launch config */ @@ -800,7 +800,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c config.blockSize[2] = num_threads_z; config.gridSize[0] = numSciInPart; - validate_global_work_size(config, 3, nb->dev_info); + validate_global_work_size(config, 3, nb->deviceInfo); if (debug) { diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index c4df4f1711..59ee706c4f 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -477,12 +477,12 @@ static void CL_CALLBACK ocl_notify_fn(const char* pErrInfo, * * A fatal error results if creation fails. * - * \param[inout] runtimeData runtime data including program and context - * \param[in] devInfo device info struct + * \param[inout] runtimeData Runtime data including program and context + * \param[in] deviceInfo Device info struct * \param[in] rank MPI rank (for error reporting) */ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData, - const gmx_device_info_t* devInfo, + const DeviceInformation* deviceInfo, int rank) { cl_context_properties context_properties[5]; @@ -492,10 +492,10 @@ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData, cl_int cl_error; GMX_ASSERT(runtimeData, "Need a valid runtimeData object"); - GMX_ASSERT(devInfo, "Need a valid device info object"); + GMX_ASSERT(deviceInfo, "Need a valid device info object"); - platform_id = devInfo->ocl_gpu_id.ocl_platform_id; - device_id = devInfo->ocl_gpu_id.ocl_device_id; + platform_id = deviceInfo->oclPlatformId; + device_id = deviceInfo->oclDeviceId; int i = 0; context_properties[i++] = CL_CONTEXT_PLATFORM; @@ -512,7 +512,7 @@ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData, if (CL_SUCCESS != cl_error) { gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s", - rank, devInfo->device_name, cl_error, ocl_get_error_string(cl_error).c_str()); + rank, deviceInfo->device_name, cl_error, ocl_get_error_string(cl_error).c_str()); } runtimeData->context = context; @@ -528,7 +528,7 @@ static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name) if (CL_SUCCESS != cl_error) { gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name, - nb->dev_info->device_name, cl_error); + nb->deviceInfo->device_name, cl_error); } return kernel; @@ -609,7 +609,7 @@ static void nbnxn_ocl_init_const(NbnxmGpu* nb, //! This function is documented in the header file -NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, +NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo, const interaction_const_t* ic, const PairlistParams& listParams, const nbnxn_atomdata_t* nbat, @@ -636,7 +636,7 @@ NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, snew(nb->timings, 1); /* set device info, just point it to the right GPU among the detected ones */ - nb->dev_info = deviceInfo; + nb->deviceInfo = deviceInfo; snew(nb->dev_rundata, 1); /* init nbst */ @@ -659,28 +659,27 @@ NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, queue_properties = 0; } - nbnxn_gpu_create_context(nb->dev_rundata, nb->dev_info, rank); + nbnxn_gpu_create_context(nb->dev_rundata, nb->deviceInfo, rank); /* local/non-local GPU streams */ nb->stream[InteractionLocality::Local] = clCreateCommandQueue( - nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error); + nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, queue_properties, &cl_error); if (CL_SUCCESS != cl_error) { gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d", rank, - nb->dev_info->device_name, cl_error); + nb->deviceInfo->device_name, cl_error); } if (nb->bUseTwoStreams) { init_plist(nb->plist[InteractionLocality::NonLocal]); - nb->stream[InteractionLocality::NonLocal] = - clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, - queue_properties, &cl_error); + nb->stream[InteractionLocality::NonLocal] = clCreateCommandQueue( + nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, queue_properties, &cl_error); if (CL_SUCCESS != cl_error) { gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d", - rank, nb->dev_info->device_name, cl_error); + rank, nb->deviceInfo->device_name, cl_error); } } @@ -695,8 +694,8 @@ NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo, * TODO: decide about NVIDIA */ nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr) - && ((nb->dev_info->deviceVendor == DeviceVendor::Amd) - || (nb->dev_info->deviceVendor == DeviceVendor::Intel) + && ((nb->deviceInfo->deviceVendor == DeviceVendor::Amd) + || (nb->deviceInfo->deviceVendor == DeviceVendor::Intel) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr)); /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here, @@ -1108,7 +1107,7 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv) //! This function is documented in the header file int gpu_min_ci_balanced(NbnxmGpu* nb) { - return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0; + return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->compute_units : 0; } //! This function is documented in the header file diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp index 285d91ef8f..8a4e217d84 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp @@ -163,7 +163,7 @@ static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdw * * A fatal error results if compilation fails. * - * \param[inout] nb Manages OpenCL non-bonded calculations; compiled kernels returned in dev_info members + * \param[inout] nb Manages OpenCL non-bonded calculations; compiled kernels returned in deviceInfo members * * Does not throw */ @@ -202,13 +202,12 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb) the log output here should be written there */ program = gmx::ocl::compileProgram( stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines, - nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, - nb->dev_info->deviceVendor); + nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, nb->deviceInfo->deviceVendor); } catch (gmx::GromacsException& e) { e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n", - nb->dev_info->device_name)); + nb->deviceInfo->device_name)); throw; } } diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h index d1ce7be20a..74f042939c 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h @@ -331,7 +331,7 @@ typedef struct Nbnxm::gpu_timers_t cl_timers_t; struct NbnxmGpu { //! OpenCL device information - const gmx_device_info_t* dev_info = nullptr; + const DeviceInformation* deviceInfo = nullptr; //! OpenCL runtime data (context, kernels) struct gmx_device_runtime_data_t* dev_rundata = nullptr; diff --git a/src/gromacs/taskassignment/taskassignment.cpp b/src/gromacs/taskassignment/taskassignment.cpp index 6df5a4614e..5a62972d61 100644 --- a/src/gromacs/taskassignment/taskassignment.cpp +++ b/src/gromacs/taskassignment/taskassignment.cpp @@ -400,9 +400,9 @@ void GpuTaskAssignments::reportGpuUsage(const MDLogger& mdlog, numRanksOnThisNode_, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate); } -gmx_device_info_t* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const +DeviceInformation* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const { - gmx_device_info_t* deviceInfo = nullptr; + DeviceInformation* deviceInfo = nullptr; const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_]; // This works because only one task of each type per rank is currently permitted. @@ -425,9 +425,9 @@ gmx_device_info_t* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) return deviceInfo; } -gmx_device_info_t* GpuTaskAssignments::initPmeDevice() const +DeviceInformation* GpuTaskAssignments::initPmeDevice() const { - gmx_device_info_t* deviceInfo = nullptr; + DeviceInformation* deviceInfo = nullptr; const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_]; // This works because only one task of each type is currently permitted. diff --git a/src/gromacs/taskassignment/taskassignment.h b/src/gromacs/taskassignment/taskassignment.h index 6ef380385d..c6ac9ac9de 100644 --- a/src/gromacs/taskassignment/taskassignment.h +++ b/src/gromacs/taskassignment/taskassignment.h @@ -55,7 +55,7 @@ #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/gmxmpi.h" -struct gmx_device_info_t; +struct DeviceInformation; struct gmx_hw_info_t; struct t_commrec; @@ -247,12 +247,12 @@ public: * \todo This also sets up DLB for device sharing, where * appropriate, but that responsbility should move * elsewhere. */ - gmx_device_info_t* initNonbondedDevice(const t_commrec* cr) const; + DeviceInformation* initNonbondedDevice(const t_commrec* cr) const; /*! \brief Return handle to the initialized GPU to use for the * PME task on this rank, if any. * * Returns nullptr if no such task is assigned to this rank. */ - gmx_device_info_t* initPmeDevice() const; + DeviceInformation* initPmeDevice() const; //! Return whether this rank has a PME task running on a GPU bool thisRankHasPmeGpuTask() const; //! Return whether this rank has any task running on a GPU -- 2.22.0