From e519efa8063c6edae55123e6abf01218c6116e68 Mon Sep 17 00:00:00 2001
From: Artem Zhmurov <zhmurov@gmail.com>
Date: Mon, 24 Aug 2020 08:39:46 +0000
Subject: [PATCH] Move GPU devices management into hardware subfolder

This consolidate the GPU management in CUDA and OpenCL into one
place.
---
 src/gromacs/ewald/pme_gpu_internal.cpp        |   1 +
 .../ewald/tests/testhardwarecontexts.cpp      |   2 +-
 src/gromacs/gpu_utils/CMakeLists.txt          |   1 -
 src/gromacs/gpu_utils/device_context_ocl.cpp  |   2 +-
 src/gromacs/gpu_utils/device_stream_ocl.cpp   |   1 +
 src/gromacs/gpu_utils/gpu_testutils.cpp       |   4 +-
 src/gromacs/gpu_utils/gpu_utils.cpp           |  52 +-
 src/gromacs/gpu_utils/gpu_utils.cu            | 421 ----------------
 src/gromacs/gpu_utils/gpu_utils.h             | 154 ------
 src/gromacs/gpu_utils/gputraits.cuh           |  16 -
 src/gromacs/gpu_utils/gputraits.h             |   6 -
 src/gromacs/gpu_utils/gputraits_ocl.h         |  32 --
 src/gromacs/gpu_utils/ocl_compiler.h          |   1 +
 .../gpu_utils/tests/devicetransfers.cu        |   4 +-
 .../gpu_utils/tests/devicetransfers_ocl.cpp   |   2 +-
 src/gromacs/gpu_utils/tests/gputest.cpp       |   2 +-
 .../gpu_utils/tests/typecasts_runner.cu       |   1 +
 src/gromacs/hardware/CMakeLists.txt           |  15 +
 src/gromacs/hardware/detecthardware.cpp       |   2 +-
 src/gromacs/hardware/device_information.h     | 148 ++++++
 src/gromacs/hardware/device_management.cpp    |  90 ++++
 src/gromacs/hardware/device_management.cu     | 466 ++++++++++++++++++
 src/gromacs/hardware/device_management.h      | 188 +++++++
 .../hardware/device_management_common.cpp     |  89 ++++
 .../device_management_ocl.cpp}                |  56 ++-
 src/gromacs/hardware/gpu_hw_info.h            |  41 --
 src/gromacs/hardware/printhardware.cpp        |   2 +-
 src/gromacs/mdlib/tests/constrtestrunners.cu  |   2 +-
 .../mdlib/tests/leapfrogtestrunners.cu        |   2 +-
 src/gromacs/mdlib/tests/settletestrunners.cu  |   2 +-
 src/gromacs/mdrun/runner.cpp                  |   2 +-
 src/gromacs/nbnxm/cuda/nbnxm_cuda.cu          |   1 +
 .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu        |   2 +-
 src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp        |   1 +
 .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp      |   2 +-
 src/gromacs/taskassignment/taskassignment.cpp |   2 +-
 src/gromacs/taskassignment/usergpuids.cpp     |   4 +-
 37 files changed, 1051 insertions(+), 768 deletions(-)
 create mode 100644 src/gromacs/hardware/device_information.h
 create mode 100644 src/gromacs/hardware/device_management.cpp
 create mode 100644 src/gromacs/hardware/device_management.cu
 create mode 100644 src/gromacs/hardware/device_management.h
 create mode 100644 src/gromacs/hardware/device_management_common.cpp
 rename src/gromacs/{gpu_utils/gpu_utils_ocl.cpp => hardware/device_management_ocl.cpp} (99%)
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp
index 24e9a4ba4b..88e39976c7 100644
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -59,6 +59,7 @@
 #include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/device_stream.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/math/invertmatrix.h"
 #include "gromacs/math/units.h"
 #include "gromacs/timing/gpu_timing.h"
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
index 96f36f9810..6ae36951d5 100644
--- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
@@ -47,8 +47,8 @@
 #include <memory>
 
 #include "gromacs/ewald/pme.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/utility/basenetwork.h"
 #include "gromacs/utility/exceptions.h"
diff --git a/src/gromacs/gpu_utils/CMakeLists.txt b/src/gromacs/gpu_utils/CMakeLists.txt
index 4db569f5fa..a85efdeda1 100644
--- a/src/gromacs/gpu_utils/CMakeLists.txt
+++ b/src/gromacs/gpu_utils/CMakeLists.txt
@@ -47,7 +47,6 @@ if(GMX_GPU_OPENCL)
     gmx_add_libgromacs_sources(
         device_context_ocl.cpp
         device_stream_ocl.cpp
-        gpu_utils_ocl.cpp
         ocl_compiler.cpp
         ocl_caching.cpp
         oclutils.cpp
diff --git a/src/gromacs/gpu_utils/device_context_ocl.cpp b/src/gromacs/gpu_utils/device_context_ocl.cpp
index cfbd60c1a3..e8deb7fab2 100644
--- a/src/gromacs/gpu_utils/device_context_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_context_ocl.cpp
@@ -45,7 +45,7 @@
 
 #include "device_context_ocl.h"
 
-#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxassert.h"
diff --git a/src/gromacs/gpu_utils/device_stream_ocl.cpp b/src/gromacs/gpu_utils/device_stream_ocl.cpp
index 39c58ff5da..84407b1674 100644
--- a/src/gromacs/gpu_utils/device_stream_ocl.cpp
+++ b/src/gromacs/gpu_utils/device_stream_ocl.cpp
@@ -45,6 +45,7 @@
 #include "gromacs/gpu_utils/device_context_ocl.h"
 #include "gromacs/gpu_utils/device_stream.h"
 #include "gromacs/gpu_utils/gputraits_ocl.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/stringutil.h"
diff --git a/src/gromacs/gpu_utils/gpu_testutils.cpp b/src/gromacs/gpu_utils/gpu_testutils.cpp
index 63a8756afc..99b173c4a6 100644
--- a/src/gromacs/gpu_utils/gpu_testutils.cpp
+++ b/src/gromacs/gpu_utils/gpu_testutils.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -41,7 +41,7 @@
 
 #include "gpu_testutils.h"
 
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 
 bool canComputeOnGpu()
diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp
index 004ad30c86..a8eb03b23a 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils.cpp
@@ -41,11 +41,9 @@
 
 #include "gpu_utils.h"
 
-#include "config.h"
-
 #include <cassert>
 
-#include "gromacs/hardware/gpu_hw_info.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/stringutil.h"
@@ -54,54 +52,6 @@
 #    pragma warning(disable : 6237)
 #endif
 
-//! Constant used to help minimize preprocessed code
-static constexpr bool c_binarySupportsGpus = (GMX_GPU != 0);
-
-bool canPerformGpuDetection()
-{
-    if (c_binarySupportsGpus && getenv("GMX_DISABLE_GPU_DETECTION") == nullptr)
-    {
-        return isGpuDetectionFunctional(nullptr);
-    }
-    else
-    {
-        return false;
-    }
-}
-
-#if !GMX_GPU
-DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& /*unused*/, int /*unused*/)
-{
-    return DeviceStatus::Nonexistent;
-}
-#endif
-
-void free_gpu_info(const gmx_gpu_info_t* gpu_info)
-{
-    sfree(static_cast<void*>(gpu_info->deviceInfo)); // circumvent is_pod check in sfree
-}
-
-std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info)
-{
-    // Possible minor over-allocation here, but not important for anything
-    std::vector<int> compatibleGpus;
-    compatibleGpus.reserve(gpu_info.n_dev);
-    for (int i = 0; i < gpu_info.n_dev; i++)
-    {
-        assert(gpu_info.deviceInfo);
-        if (gpu_info_get_stat(gpu_info, i) == DeviceStatus::Compatible)
-        {
-            compatibleGpus.push_back(i);
-        }
-    }
-    return compatibleGpus;
-}
-
-const char* getGpuCompatibilityDescription(const gmx_gpu_info_t& gpu_info, int index)
-{
-    return (index >= gpu_info.n_dev ? c_deviceStateString[DeviceStatus::Nonexistent]
-                                    : c_deviceStateString[gpu_info_get_stat(gpu_info, index)]);
-}
 /*! \brief Help build a descriptive message in \c error if there are
  * \c errorReasons why nonbondeds on a GPU are not supported.
  *
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu
index 7d282e5a1a..e0ae3bed30 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -65,39 +65,8 @@
 #include "gromacs/utility/snprintf.h"
 #include "gromacs/utility/stringutil.h"
 
-/*! \internal \brief
- * Max number of devices supported by CUDA (for consistency checking).
- *
- * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
- */
-static int cuda_max_device_count = 32;
-
 static bool cudaProfilerRun = ((getenv("NVPROF_ID") != nullptr));
 
-/** Dummy kernel used for sanity checking. */
-static __global__ void k_dummy_test(void) {}
-
-static cudaError_t checkCompiledTargetCompatibility(int deviceId, const cudaDeviceProp& deviceProp)
-{
-    cudaFuncAttributes attributes;
-    cudaError_t        stat = cudaFuncGetAttributes(&attributes, k_dummy_test);
-
-    if (cudaErrorInvalidDeviceFunction == stat)
-    {
-        fprintf(stderr,
-                "\nWARNING: The %s binary does not include support for the CUDA architecture of "
-                "the GPU ID #%d (compute capability %d.%d) detected during detection. "
-                "By default, GROMACS supports all architectures of compute "
-                "capability >= 3.0, so your GPU "
-                "might be rare, or some architectures were disabled in the build. \n"
-                "Consult the install guide for how to use the GMX_CUDA_TARGET_SM and "
-                "GMX_CUDA_TARGET_COMPUTE CMake variables to add this architecture. \n",
-                gmx::getProgramContext().displayName(), deviceId, deviceProp.major, deviceProp.minor);
-    }
-
-    return stat;
-}
-
 bool isHostMemoryPinned(const void* h_ptr)
 {
     cudaPointerAttributes memoryAttributes;
@@ -133,391 +102,6 @@ bool isHostMemoryPinned(const void* h_ptr)
     return isPinned;
 }
 
-/*!
- * \brief Runs GPU sanity checks.
- *
- * Runs a series of checks to determine that the given GPU and underlying CUDA
- * driver/runtime functions properly.
- *
- * \todo Currently we do not make a distinction between the type of errors
- *       that can appear during functionality checks. This needs to be improved,
- *       e.g if the dummy test kernel fails to execute with a "device busy message"
- *       we should appropriately report that the device is busy instead of NonFunctional.
- *
- * \todo Introduce errors codes and handle errors more smoothly.
- *
- *
- * \param[in]  dev_id      the device ID of the GPU or -1 if the device has already been initialized
- * \param[in]  dev_prop    The device properties structure
- * \returns                0 if the device looks OK, -1 if it sanity checks failed, and -2 if the device is busy
- */
-static DeviceStatus isDeviceFunctional(int dev_id, const cudaDeviceProp& dev_prop)
-{
-    cudaError_t cu_err;
-    int         dev_count, id;
-
-    cu_err = cudaGetDeviceCount(&dev_count);
-    if (cu_err != cudaSuccess)
-    {
-        fprintf(stderr, "Error %d while querying device count: %s\n", cu_err, cudaGetErrorString(cu_err));
-        return DeviceStatus::NonFunctional;
-    }
-
-    /* no CUDA compatible device at all */
-    if (dev_count == 0)
-    {
-        return DeviceStatus::NonFunctional;
-    }
-
-    /* things might go horribly wrong if cudart is not compatible with the driver */
-    if (dev_count < 0 || dev_count > cuda_max_device_count)
-    {
-        return DeviceStatus::NonFunctional;
-    }
-
-    if (dev_id == -1) /* device already selected let's not destroy the context */
-    {
-        cu_err = cudaGetDevice(&id);
-        if (cu_err != cudaSuccess)
-        {
-            fprintf(stderr, "Error %d while querying device id: %s\n", cu_err, cudaGetErrorString(cu_err));
-            return DeviceStatus::NonFunctional;
-        }
-    }
-    else
-    {
-        id = dev_id;
-        if (id > dev_count - 1) /* pfff there's no such device */
-        {
-            fprintf(stderr,
-                    "The requested device with id %d does not seem to exist (device count=%d)\n",
-                    dev_id, dev_count);
-            return DeviceStatus::NonFunctional;
-        }
-    }
-
-    /* both major & minor is 9999 if no CUDA capable devices are present */
-    if (dev_prop.major == 9999 && dev_prop.minor == 9999)
-    {
-        return DeviceStatus::NonFunctional;
-    }
-    /* we don't care about emulation mode */
-    if (dev_prop.major == 0)
-    {
-        return DeviceStatus::NonFunctional;
-    }
-
-    if (id != -1)
-    {
-        cu_err = cudaSetDevice(id);
-        if (cu_err != cudaSuccess)
-        {
-            fprintf(stderr, "Error %d while switching to device #%d: %s\n", cu_err, id,
-                    cudaGetErrorString(cu_err));
-            return DeviceStatus::NonFunctional;
-        }
-    }
-
-    cu_err = checkCompiledTargetCompatibility(dev_id, dev_prop);
-    // Avoid triggering an error if GPU devices are in exclusive or prohibited mode;
-    // it is enough to check for cudaErrorDevicesUnavailable only here because
-    // if we encounter it that will happen in cudaFuncGetAttributes in the above function.
-    if (cu_err == cudaErrorDevicesUnavailable)
-    {
-        return DeviceStatus::Unavailable;
-    }
-    else if (cu_err != cudaSuccess)
-    {
-        return DeviceStatus::NonFunctional;
-    }
-
-    /* try to execute a dummy kernel */
-    try
-    {
-        KernelLaunchConfig config;
-        config.blockSize[0]                = 512;
-        const auto          dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
-        DeviceInformation   deviceInfo;
-        const DeviceContext deviceContext(deviceInfo);
-        const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
-        launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
-    }
-    catch (gmx::GromacsException& ex)
-    {
-        // launchGpuKernel error is not fatal and should continue with marking the device bad
-        fprintf(stderr,
-                "Error occurred while running dummy kernel sanity check on device #%d:\n %s\n", id,
-                formatExceptionMessageToString(ex).c_str());
-        return DeviceStatus::NonFunctional;
-    }
-
-    if (cudaDeviceSynchronize() != cudaSuccess)
-    {
-        return DeviceStatus::NonFunctional;
-    }
-
-    /* destroy context if we created one */
-    if (id != -1)
-    {
-        cu_err = cudaDeviceReset();
-        CU_RET_ERR(cu_err, "cudaDeviceReset failed");
-    }
-
-    return DeviceStatus::Compatible;
-}
-
-void init_gpu(const DeviceInformation* deviceInfo)
-{
-    cudaError_t stat;
-
-    assert(deviceInfo);
-
-    stat = cudaSetDevice(deviceInfo->id);
-    if (stat != cudaSuccess)
-    {
-        auto message = gmx::formatString("Failed to initialize GPU #%d", deviceInfo->id);
-        CU_RET_ERR(stat, message.c_str());
-    }
-
-    if (debug)
-    {
-        fprintf(stderr, "Initialized GPU ID #%d: %s\n", deviceInfo->id, deviceInfo->prop.name);
-    }
-}
-
-void free_gpu(const DeviceInformation* deviceInfo)
-{
-    // One should only attempt to clear the device context when
-    // it has been used, but currently the only way to know that a GPU
-    // device was used is that deviceInfo will be non-null.
-    if (deviceInfo == nullptr)
-    {
-        return;
-    }
-
-    cudaError_t stat;
-
-    if (debug)
-    {
-        int gpuid;
-        stat = cudaGetDevice(&gpuid);
-        CU_RET_ERR(stat, "cudaGetDevice failed");
-        fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
-    }
-
-    stat = cudaDeviceReset();
-    if (stat != cudaSuccess)
-    {
-        gmx_warning("Failed to free GPU #%d: %s", deviceInfo->id, cudaGetErrorString(stat));
-    }
-}
-
-DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
-{
-    if (deviceId < 0 || deviceId >= gpu_info.n_dev)
-    {
-        gmx_incons("Invalid GPU deviceId requested");
-    }
-    return &gpu_info.deviceInfo[deviceId];
-}
-
-/*! \brief Returns true if the gpu characterized by the device properties is
- *  supported by the native gpu acceleration.
- *
- * \param[in] dev_prop  the CUDA device properties of the gpus to test.
- * \returns             true if the GPU properties passed indicate a compatible
- *                      GPU, otherwise false.
- */
-static bool is_gmx_supported_gpu(const cudaDeviceProp& dev_prop)
-{
-    return (dev_prop.major >= 3);
-}
-
-/*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
- *
- *  Returns a status value which indicates compatibility or one of the following
- *  errors: incompatibility or insanity (=unexpected behavior).
- *
- *  As the error handling only permits returning the state of the GPU, this function
- *  does not clear the CUDA runtime API status allowing the caller to inspect the error
- *  upon return. Note that this also means it is the caller's responsibility to
- *  reset the CUDA runtime state.
- *
- *  \param[in]  deviceId   the ID of the GPU to check.
- *  \param[in]  deviceProp the CUDA device properties of the device checked.
- *  \returns               the status of the requested device
- */
-static DeviceStatus checkDeviceStatus(int deviceId, const cudaDeviceProp& deviceProp)
-{
-    if (!is_gmx_supported_gpu(deviceProp))
-    {
-        return DeviceStatus::Incompatible;
-    }
-    return isDeviceFunctional(deviceId, deviceProp);
-}
-
-bool isGpuDetectionFunctional(std::string* errorMessage)
-{
-    cudaError_t stat;
-    int         driverVersion = -1;
-    stat                      = cudaDriverGetVersion(&driverVersion);
-    GMX_ASSERT(stat != cudaErrorInvalidValue,
-               "An impossible null pointer was passed to cudaDriverGetVersion");
-    GMX_RELEASE_ASSERT(
-            stat == cudaSuccess,
-            gmx::formatString("An unexpected value was returned from cudaDriverGetVersion %s: %s",
-                              cudaGetErrorName(stat), cudaGetErrorString(stat))
-                    .c_str());
-    bool foundDriver = (driverVersion > 0);
-    if (!foundDriver)
-    {
-        // Can't detect GPUs if there is no driver
-        if (errorMessage != nullptr)
-        {
-            errorMessage->assign("No valid CUDA driver found");
-        }
-        return false;
-    }
-
-    int numDevices;
-    stat = cudaGetDeviceCount(&numDevices);
-    if (stat != cudaSuccess)
-    {
-        if (errorMessage != nullptr)
-        {
-            /* cudaGetDeviceCount failed which means that there is
-             * something wrong with the machine: driver-runtime
-             * mismatch, all GPUs being busy in exclusive mode,
-             * invalid CUDA_VISIBLE_DEVICES, or some other condition
-             * which should result in GROMACS issuing at least a
-             * warning. */
-            errorMessage->assign(cudaGetErrorString(stat));
-        }
-
-        // Consume the error now that we have prepared to handle
-        // it. This stops it reappearing next time we check for
-        // errors. Note that if CUDA_VISIBLE_DEVICES does not contain
-        // valid devices, then cudaGetLastError returns the
-        // (undocumented) cudaErrorNoDevice, but this should not be a
-        // problem as there should be no future CUDA API calls.
-        // NVIDIA bug report #2038718 has been filed.
-        cudaGetLastError();
-        // Can't detect GPUs
-        return false;
-    }
-
-    // We don't actually use numDevices here, that's not the job of
-    // this function.
-    return true;
-}
-
-void findGpus(gmx_gpu_info_t* gpu_info)
-{
-    assert(gpu_info);
-
-    gpu_info->n_dev_compatible = 0;
-
-    int         ndev;
-    cudaError_t stat = cudaGetDeviceCount(&ndev);
-    if (stat != cudaSuccess)
-    {
-        GMX_THROW(gmx::InternalError(
-                "Invalid call of findGpus() when CUDA API returned an error, perhaps "
-                "canDetectGpus() was not called appropriately beforehand."));
-    }
-
-    // We expect to start device support/sanity checks with a clean runtime error state
-    gmx::ensureNoPendingCudaError("");
-
-    DeviceInformation* devs;
-    snew(devs, ndev);
-    for (int i = 0; i < ndev; i++)
-    {
-        cudaDeviceProp prop;
-        memset(&prop, 0, sizeof(cudaDeviceProp));
-        stat = cudaGetDeviceProperties(&prop, i);
-        const DeviceStatus checkResult =
-                (stat != cudaSuccess) ? DeviceStatus::NonFunctional : checkDeviceStatus(i, prop);
-
-        devs[i].id   = i;
-        devs[i].prop = prop;
-        devs[i].stat = checkResult;
-
-        if (checkResult == DeviceStatus::Compatible)
-        {
-            gpu_info->n_dev_compatible++;
-        }
-        else
-        {
-            // TODO:
-            //  - we inspect the CUDA API state to retrieve and record any
-            //    errors that occurred during is_gmx_supported_gpu_id() here,
-            //    but this would be more elegant done within is_gmx_supported_gpu_id()
-            //    and only return a string with the error if one was encountered.
-            //  - we'll be reporting without rank information which is not ideal.
-            //  - we'll end up warning also in cases where users would already
-            //    get an error before mdrun aborts.
-            //
-            // Here we also clear the CUDA API error state so potential
-            // errors during sanity checks don't propagate.
-            if ((stat = cudaGetLastError()) != cudaSuccess)
-            {
-                gmx_warning("An error occurred while sanity checking device #%d; %s: %s",
-                            devs[i].id, cudaGetErrorName(stat), cudaGetErrorString(stat));
-            }
-        }
-    }
-
-    stat = cudaPeekAtLastError();
-    GMX_RELEASE_ASSERT(stat == cudaSuccess,
-                       gmx::formatString("We promise to return with clean CUDA state, but "
-                                         "non-success state encountered: %s: %s",
-                                         cudaGetErrorName(stat), cudaGetErrorString(stat))
-                               .c_str());
-
-    gpu_info->n_dev      = ndev;
-    gpu_info->deviceInfo = devs;
-}
-
-void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
-{
-    assert(s);
-
-    if (index < 0 && index >= gpu_info.n_dev)
-    {
-        return;
-    }
-
-    DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
-
-    bool bGpuExists =
-            (dinfo->stat != DeviceStatus::Nonexistent && dinfo->stat != DeviceStatus::NonFunctional);
-
-    if (!bGpuExists)
-    {
-        sprintf(s, "#%d: %s, stat: %s", dinfo->id, "N/A", c_deviceStateString[dinfo->stat]);
-    }
-    else
-    {
-        sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s", dinfo->id,
-                dinfo->prop.name, dinfo->prop.major, dinfo->prop.minor,
-                dinfo->prop.ECCEnabled ? "yes" : " no", c_deviceStateString[dinfo->stat]);
-    }
-}
-
-int get_current_cuda_gpu_device_id(void)
-{
-    int gpuid;
-    CU_RET_ERR(cudaGetDevice(&gpuid), "cudaGetDevice failed");
-
-    return gpuid;
-}
-
-size_t sizeof_gpu_dev_info(void)
-{
-    return sizeof(DeviceInformation);
-}
-
 void startGpuProfiler(void)
 {
     /* The NVPROF_ID environment variable is set by nvprof and indicates that
@@ -561,11 +145,6 @@ void resetGpuProfiler(void)
     }
 }
 
-DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
-{
-    return info.deviceInfo[index].stat;
-}
-
 /*! \brief Check status returned from peer access CUDA call, and error out or warn appropriately
  * \param[in] stat           CUDA call return status
  * \param[in] gpuA           ID for GPU initiating peer access call
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h
index 0e27565c51..fce1e99580 100644
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -54,10 +54,6 @@
 #include "gromacs/gpu_utils/gpu_macros.h"
 #include "gromacs/utility/basedefinitions.h"
 
-struct DeviceInformation;
-enum class DeviceStatus : int;
-struct gmx_gpu_info_t;
-
 namespace gmx
 {
 class MDLogger;
@@ -77,156 +73,6 @@ enum class GpuTaskCompletion
     Check /*<< Only check whether the task has completed */
 };
 
-/*! \brief Return whether GPUs can be detected
- *
- * Returns true when this is a build of \Gromacs configured to support
- * GPU usage, GPU detection is not disabled by an environment variable
- * and a valid device driver, ICD, and/or runtime was detected.
- * Does not throw. */
-bool canPerformGpuDetection();
-
-/*! \brief Return whether GPU detection is functioning correctly
- *
- * Returns true when this is a build of \Gromacs configured to support
- * GPU usage, and a valid device driver, ICD, and/or runtime was detected.
- *
- * This function is not intended to be called from build
- * configurations that do not support GPUs, and there will be no
- * descriptive message in that case.
- *
- * \param[out] errorMessage  When returning false on a build configured with
- *                           GPU support and non-nullptr was passed,
- *                           the string contains a descriptive message about
- *                           why GPUs cannot be detected.
- *
- * Does not throw. */
-GPU_FUNC_QUALIFIER
-bool isGpuDetectionFunctional(std::string* GPU_FUNC_ARGUMENT(errorMessage))
-        GPU_FUNC_TERM_WITH_RETURN(false);
-
-/*! \brief Find all GPUs in the system.
- *
- *  Will detect every GPU supported by the device driver in use.
- *  Must only be called if canPerformGpuDetection() has returned true.
- *  This routine also checks for the compatibility of each and fill the
- *  gpu_info->deviceInfo array with the required information on each the
- *  device: ID, device properties, status.
- *
- *  Note that this function leaves the GPU runtime API error state clean;
- *  this is implemented ATM in the CUDA flavor.
- *  TODO: check if errors do propagate in OpenCL as they do in CUDA and
- *  whether there is a mechanism to "clear" them.
- *
- *  \param[in] gpu_info    pointer to structure holding GPU information.
- *
- *  \throws                InternalError if a GPU API returns an unexpected failure (because
- *                         the call to canDetectGpus() should always prevent this occuring)
- */
-GPU_FUNC_QUALIFIER
-void findGpus(gmx_gpu_info_t* GPU_FUNC_ARGUMENT(gpu_info)) GPU_FUNC_TERM;
-
-/*! \brief Return a container of the detected GPUs that are compatible.
- *
- * This function filters the result of the detection for compatible
- * GPUs, based on the previously run compatibility tests.
- *
- * \param[in]     gpu_info    Information detected about GPUs, including compatibility.
- * \return                    vector of IDs of GPUs already recorded as compatible */
-std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info);
-
-/*! \brief Return a string describing how compatible the GPU with given \c index is.
- *
- * \param[in]   gpu_info    Information about detected GPUs
- * \param[in]   index       index of GPU to ask about
- * \returns                 A null-terminated C string describing the compatibility status, useful for error messages.
- */
-const char* getGpuCompatibilityDescription(const gmx_gpu_info_t& gpu_info, int index);
-
-/*! \brief Frees the gpu_dev and dev_use array fields of \p gpu_info.
- *
- * \param[in]    gpu_info    pointer to structure holding GPU information
- */
-void free_gpu_info(const gmx_gpu_info_t* gpu_info);
-
-/*! \brief Initializes the GPU described by \c deviceInfo.
- *
- * TODO Doxygen complains about these - probably a Doxygen bug, since
- * the patterns here are the same as elsewhere in this header.
- *
- * \param[in]    deviceInfo   device info of the GPU to initialize
- *
- * Issues a fatal error for any critical errors that occur during
- * initialization.
- */
-GPU_FUNC_QUALIFIER
-void init_gpu(const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM;
-
-/*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
- *
- * If \c deviceInfo is nullptr, then it is understood that no device
- * was selected so no context is active to be freed. Otherwise, the
- * context is explicitly destroyed and therefore all data uploaded to
- * the GPU is lost. This must only be called when none of this data is
- * required anymore, because subsequent attempts to free memory
- * associated with the context will otherwise fail.
- *
- * Calls gmx_warning upon errors.
- *
- * \param[in]  deviceInfo   device info of the GPU to clean up for
- *
- * \returns                 true if no error occurs during the freeing.
- */
-CUDA_FUNC_QUALIFIER
-void free_gpu(const DeviceInformation* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM;
-
-/*! \brief Return a pointer to the device info for \c deviceId
- *
- * \param[in] gpu_info      GPU info of all detected devices in the system.
- * \param[in] deviceId      ID for the GPU device requested.
- *
- * \returns                 Pointer to the device info for \c deviceId.
- */
-GPU_FUNC_QUALIFIER
-DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info),
-                                 int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
-/*! \brief Returns the device ID of the CUDA GPU currently in use.
- *
- * The GPU used is the one that is active at the time of the call in the active context.
- *
- * \returns                 device ID of the GPU in use at the time of the call
- */
-CUDA_FUNC_QUALIFIER
-int get_current_cuda_gpu_device_id() CUDA_FUNC_TERM_WITH_RETURN(-1);
-
-/*! \brief Formats and returns a device information string for a given GPU.
- *
- * Given an index *directly* into the array of available GPUs (gpu_dev)
- * returns a formatted info string for the respective GPU which includes
- * ID, name, compute capability, and detection status.
- *
- * \param[out]  s           pointer to output string (has to be allocated externally)
- * \param[in]   gpu_info    Information about detected GPUs
- * \param[in]   index       an index *directly* into the array of available GPUs
- */
-GPU_FUNC_QUALIFIER
-void get_gpu_device_info_string(char*                 GPU_FUNC_ARGUMENT(s),
-                                const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info),
-                                int                   GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM;
-
-
-/*! \brief Returns the size of the gpu_dev_info struct.
- *
- * The size of gpu_dev_info can be used for allocation and communication.
- *
- * \returns                 size in bytes of gpu_dev_info
- */
-GPU_FUNC_QUALIFIER
-size_t sizeof_gpu_dev_info() GPU_FUNC_TERM_WITH_RETURN(0);
-
-//! Get status of device with specified index
-DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& info, int index);
-
 /*! \brief Check if GROMACS has been built with GPU support.
  *
  * \param[in] error Pointer to error string or nullptr.
diff --git a/src/gromacs/gpu_utils/gputraits.cuh b/src/gromacs/gpu_utils/gputraits.cuh
index 98fd8d04ef..a165df595d 100644
--- a/src/gromacs/gpu_utils/gputraits.cuh
+++ b/src/gromacs/gpu_utils/gputraits.cuh
@@ -51,22 +51,6 @@
 //! Device texture for fast read-only data fetching
 using DeviceTexture = cudaTextureObject_t;
 
-/*! \brief CUDA device information.
- *
- * The CUDA device information is queried and set at detection and contains
- * both information about the device/hardware returned by the runtime as well
- * as additional data like support status.
- */
-struct DeviceInformation
-{
-    //! ID of the CUDA device.
-    int id;
-    //! CUDA device properties.
-    cudaDeviceProp prop;
-    //! Device status.
-    DeviceStatus stat;
-};
-
 //! \brief Single GPU call timing event - meaningless in CUDA
 using CommandEvent = void;
 
diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h
index e3a3a9275e..79ccaa64aa 100644
--- a/src/gromacs/gpu_utils/gputraits.h
+++ b/src/gromacs/gpu_utils/gputraits.h
@@ -59,12 +59,6 @@
 
 using DeviceTexture = void*;
 
-//! \internal Stub for device information.
-struct DeviceInformation
-{
-    // No member needed
-};
-
 //! \brief Single GPU call timing event
 using CommandEvent = void*;
 
diff --git a/src/gromacs/gpu_utils/gputraits_ocl.h b/src/gromacs/gpu_utils/gputraits_ocl.h
index a3eb510c95..b3c6c8340e 100644
--- a/src/gromacs/gpu_utils/gputraits_ocl.h
+++ b/src/gromacs/gpu_utils/gputraits_ocl.h
@@ -50,38 +50,6 @@
 
 using DeviceTexture = void*;
 
-//! OpenCL device vendors
-enum class DeviceVendor : int
-{
-    Unknown = 0, //!< No data
-    Nvidia  = 1, //!< NVIDIA
-    Amd     = 2, //!< Advanced Micro Devices
-    Intel   = 3, //!< Intel
-    Count   = 4
-};
-
-/*! \internal
- * \brief OpenCL device information.
- *
- * The OpenCL device information is queried and set at detection and contains
- * both information about the device/hardware returned by the runtime as well
- * as additional data like support status.
- */
-struct DeviceInformation
-{
-    cl_platform_id oclPlatformId;       //!< OpenCL Platform ID.
-    cl_device_id   oclDeviceId;         //!< OpenCL Device ID.
-    char           device_name[256];    //!< Device name.
-    char           device_version[256]; //!< Device version.
-    char           vendorName[256];     //!< Device vendor name.
-    int            compute_units;       //!< Number of compute units.
-    int            adress_bits;         //!< Number of address bits the device is capable of.
-    DeviceStatus   stat;                //!< Device status.
-    DeviceVendor   deviceVendor;        //!< Device vendor.
-    size_t         maxWorkItemSizes[3]; //!< Workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES).
-    size_t maxWorkGroupSize; //!< Workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE).
-};
-
 //! \brief Single GPU call timing event
 using CommandEvent = cl_event;
 
diff --git a/src/gromacs/gpu_utils/ocl_compiler.h b/src/gromacs/gpu_utils/ocl_compiler.h
index 4cd381865e..00baec0f06 100644
--- a/src/gromacs/gpu_utils/ocl_compiler.h
+++ b/src/gromacs/gpu_utils/ocl_compiler.h
@@ -48,6 +48,7 @@
 #include <string>
 
 #include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/hardware/device_information.h"
 
 namespace gmx
 {
diff --git a/src/gromacs/gpu_utils/tests/devicetransfers.cu b/src/gromacs/gpu_utils/tests/devicetransfers.cu
index e3a56be9c3..0636285a1e 100644
--- a/src/gromacs/gpu_utils/tests/devicetransfers.cu
+++ b/src/gromacs/gpu_utils/tests/devicetransfers.cu
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -49,7 +49,7 @@
 #include "devicetransfers.h"
 
 #include "gromacs/gpu_utils/cudautils.cuh"
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/exceptions.h"
diff --git a/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp b/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp
index ffe60c00e9..8338e58fa8 100644
--- a/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp
+++ b/src/gromacs/gpu_utils/tests/devicetransfers_ocl.cpp
@@ -41,8 +41,8 @@
 #include "gmxpre.h"
 
 #include "gromacs/gpu_utils/gmxopencl.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/exceptions.h"
diff --git a/src/gromacs/gpu_utils/tests/gputest.cpp b/src/gromacs/gpu_utils/tests/gputest.cpp
index a862361586..4caabc374f 100644
--- a/src/gromacs/gpu_utils/tests/gputest.cpp
+++ b/src/gromacs/gpu_utils/tests/gputest.cpp
@@ -44,7 +44,7 @@
 
 #include <gtest/gtest.h>
 
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/utility/smalloc.h"
 
diff --git a/src/gromacs/gpu_utils/tests/typecasts_runner.cu b/src/gromacs/gpu_utils/tests/typecasts_runner.cu
index 1aedf1a166..0a4134bd9c 100644
--- a/src/gromacs/gpu_utils/tests/typecasts_runner.cu
+++ b/src/gromacs/gpu_utils/tests/typecasts_runner.cu
@@ -47,6 +47,7 @@
 #include "gromacs/gpu_utils/cudautils.cuh"
 #include "gromacs/gpu_utils/devicebuffer.h"
 #include "gromacs/gpu_utils/typecasts.cuh"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/stringutil.h"
 
diff --git a/src/gromacs/hardware/CMakeLists.txt b/src/gromacs/hardware/CMakeLists.txt
index efc684c31f..c93767f790 100644
--- a/src/gromacs/hardware/CMakeLists.txt
+++ b/src/gromacs/hardware/CMakeLists.txt
@@ -35,11 +35,26 @@
 gmx_add_libgromacs_sources(
     cpuinfo.cpp
     detecthardware.cpp
+    device_management_common.cpp
     hardwaretopology.cpp
     printhardware.cpp
     identifyavx512fmaunits.cpp
     )
 
+if(GMX_GPU_OPENCL)
+    gmx_add_libgromacs_sources(
+        device_management_ocl.cpp
+        )
+elseif(GMX_GPU_CUDA)
+    gmx_add_libgromacs_sources(
+        device_management.cu
+        )
+else()
+    gmx_add_libgromacs_sources(
+        device_management.cpp
+    )
+endif()
+
 if (BUILD_TESTING)
     add_subdirectory(tests)
 endif()
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp
index e635ca0b17..7e8ac92c24 100644
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -48,8 +48,8 @@
 #include <vector>
 
 #include "gromacs/compat/pointers.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/hardware/cpuinfo.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/hardwaretopology.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/simd/support.h"
diff --git a/src/gromacs/hardware/device_information.h b/src/gromacs/hardware/device_information.h
new file mode 100644
index 0000000000..d9116a3a72
--- /dev/null
+++ b/src/gromacs/hardware/device_information.h
@@ -0,0 +1,148 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *  \brief Declares the GPU type traits for non-GPU builds.
+ *
+ *  \author Mark Abraham <mark.j.abraham@gmail.com>
+ *  \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \inlibraryapi
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_DEVICE_INFORMATION_H
+#define GMX_HARDWARE_DEVICE_INFORMATION_H
+
+#include "config.h"
+
+#if GMX_GPU_CUDA
+#    include <cuda_runtime.h>
+#endif
+
+#if GMX_GPU_OPENCL
+#    include "gromacs/gpu_utils/gmxopencl.h"
+#endif
+#include "gromacs/utility/enumerationhelpers.h"
+
+//! Constant used to help minimize preprocessed code
+static constexpr bool c_binarySupportsGpus = (GMX_GPU != 0);
+
+//! Possible results of the GPU detection/check.
+enum class DeviceStatus : int
+{
+    //! The device is compatible
+    Compatible = 0,
+    //! Device does not exist
+    Nonexistent = 1,
+    //! Device is not compatible
+    Incompatible = 2,
+    //! OpenCL device has incompatible cluster size for non-bonded kernels.
+    IncompatibleClusterSize = 3,
+    /*! \brief An error occurred he functionality checks.
+     * That indicates malfunctioning of the device, driver, or incompatible driver/runtime.
+     */
+    NonFunctional = 4,
+    /*! \brief CUDA devices are busy or unavailable.
+     * typically due to use of \p cudaComputeModeExclusive, \p cudaComputeModeProhibited modes.
+     */
+    Unavailable = 5,
+    //! Enumeration size
+    Count = 6
+};
+
+/*! \brief Names of the GPU detection/check results
+ *
+ * Check-source wants to warn about the use of a symbol name that would
+ * require an inclusion of config.h. However the use is in a comment, so that
+ * is a false warning. So C-style string concatenation is used to fool the
+ * naive parser in check-source. That needs a clang-format suppression
+ * in order to look reasonable. Also clang-tidy wants to suggest that a comma is
+ * missing, so that is suppressed.
+ */
+static const gmx::EnumerationArray<DeviceStatus, const char*> c_deviceStateString = {
+    "compatible", "nonexistent", "incompatible",
+    // clang-format off
+    // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
+    "incompatible (please recompile with correct GMX" "_OPENCL_NB_CLUSTER_SIZE of 4)",
+    // clang-format on
+    "non-functional", "unavailable"
+};
+
+//! Device vendors
+enum class DeviceVendor : int
+{
+    //! No data
+    Unknown = 0,
+    //! NVIDIA
+    Nvidia = 1,
+    //! Advanced Micro Devices
+    Amd = 2,
+    //! Intel
+    Intel = 3,
+    //! Enumeration size
+    Count = 4
+};
+
+
+/*! \libinternal \brief Platform-dependent device information.
+ *
+ * The device information is queried and set at detection and contains
+ * both information about the device/hardware returned by the runtime as well
+ * as additional data like support status.
+ */
+struct DeviceInformation
+{
+    //! Device status.
+    DeviceStatus stat;
+    //! ID of the device.
+    int id;
+
+#if GMX_GPU_CUDA
+    //! CUDA device properties.
+    cudaDeviceProp prop;
+#elif GMX_GPU_OPENCL
+    cl_platform_id oclPlatformId;       //!< OpenCL Platform ID.
+    cl_device_id   oclDeviceId;         //!< OpenCL Device ID.
+    char           device_name[256];    //!< Device name.
+    char           device_version[256]; //!< Device version.
+    char           vendorName[256];     //!< Device vendor name.
+    int            compute_units;       //!< Number of compute units.
+    int            adress_bits;         //!< Number of address bits the device is capable of.
+    DeviceVendor   deviceVendor;        //!< Device vendor.
+    size_t         maxWorkItemSizes[3]; //!< Workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES).
+    size_t         maxWorkGroupSize;    //!< Workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE).
+#endif
+};
+
+#endif // GMX_HARDWARE_DEVICE_INFORMATION_H
diff --git a/src/gromacs/hardware/device_management.cpp b/src/gromacs/hardware/device_management.cpp
new file mode 100644
index 0000000000..1d03f1b0be
--- /dev/null
+++ b/src/gromacs/hardware/device_management.cpp
@@ -0,0 +1,90 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2017 The GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *  \brief Defines the CPU stubs for the device management.
+ *
+ *  \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "device_management.h"
+
+bool isGpuDetectionFunctional(std::string* errorMessage)
+{
+    if (errorMessage != nullptr)
+    {
+        errorMessage->assign("GROMACS has been built without GPU support.");
+    }
+    return false;
+}
+
+void findGpus(gmx_gpu_info_t* /* gpu_info */)
+{
+    GMX_RELEASE_ASSERT(false, "Trying to initialize GPUs in the build that does not support them.");
+}
+
+void init_gpu(const DeviceInformation* /* deviceInfo */)
+{
+    GMX_RELEASE_ASSERT(false, "Trying to initialize GPU in the build that does not support GPUs.");
+}
+
+void free_gpu(const DeviceInformation* /* deviceInfo */) {}
+
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& /* gpu_info */, int /* deviceId */)
+{
+    GMX_RELEASE_ASSERT(
+            false, "Trying to get GPU device information in the build that does not support GPUs.");
+    return nullptr;
+}
+
+void get_gpu_device_info_string(char* /* s */, const gmx_gpu_info_t& /* gpu_info */, int /* index */)
+{
+    GMX_RELEASE_ASSERT(
+            false,
+            "Trying to get the GPU device description in the build that does not support GPUs.");
+}
+
+size_t sizeof_gpu_dev_info()
+{
+    return 0;
+}
+
+DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& /* gpu_info */, int /* index */)
+{
+    return DeviceStatus::Nonexistent;
+}
diff --git a/src/gromacs/hardware/device_management.cu b/src/gromacs/hardware/device_management.cu
new file mode 100644
index 0000000000..fba12ace11
--- /dev/null
+++ b/src/gromacs/hardware/device_management.cu
@@ -0,0 +1,466 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2017 The GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *  \brief Defines the CUDA implementations of the device management.
+ *
+ *  \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "device_management.h"
+
+#include <assert.h>
+
+#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
+#include "gromacs/gpu_utils/device_stream.h"
+#include "gromacs/utility/programcontext.h"
+#include "gromacs/utility/smalloc.h"
+
+/*! \internal \brief
+ * Max number of devices supported by CUDA (for consistency checking).
+ *
+ * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
+ */
+static int cuda_max_device_count = 32;
+
+/** Dummy kernel used for sanity checking. */
+static __global__ void k_dummy_test(void) {}
+
+static cudaError_t checkCompiledTargetCompatibility(int deviceId, const cudaDeviceProp& deviceProp)
+{
+    cudaFuncAttributes attributes;
+    cudaError_t        stat = cudaFuncGetAttributes(&attributes, k_dummy_test);
+
+    if (cudaErrorInvalidDeviceFunction == stat)
+    {
+        fprintf(stderr,
+                "\nWARNING: The %s binary does not include support for the CUDA architecture of "
+                "the GPU ID #%d (compute capability %d.%d) detected during detection. "
+                "By default, GROMACS supports all architectures of compute "
+                "capability >= 3.0, so your GPU "
+                "might be rare, or some architectures were disabled in the build. \n"
+                "Consult the install guide for how to use the GMX_CUDA_TARGET_SM and "
+                "GMX_CUDA_TARGET_COMPUTE CMake variables to add this architecture. \n",
+                gmx::getProgramContext().displayName(), deviceId, deviceProp.major, deviceProp.minor);
+    }
+
+    return stat;
+}
+
+/*!
+ * \brief Runs GPU sanity checks.
+ *
+ * Runs a series of checks to determine that the given GPU and underlying CUDA
+ * driver/runtime functions properly.
+ *
+ * \todo Currently we do not make a distinction between the type of errors
+ *       that can appear during functionality checks. This needs to be improved,
+ *       e.g if the dummy test kernel fails to execute with a "device busy message"
+ *       we should appropriately report that the device is busy instead of NonFunctional.
+ *
+ * \todo Introduce errors codes and handle errors more smoothly.
+ *
+ *
+ * \param[in]  dev_id      the device ID of the GPU or -1 if the device has already been initialized
+ * \param[in]  dev_prop    The device properties structure
+ * \returns                0 if the device looks OK, -1 if it sanity checks failed, and -2 if the device is busy
+ */
+static DeviceStatus isDeviceFunctional(int dev_id, const cudaDeviceProp& dev_prop)
+{
+    cudaError_t cu_err;
+    int         dev_count, id;
+
+    cu_err = cudaGetDeviceCount(&dev_count);
+    if (cu_err != cudaSuccess)
+    {
+        fprintf(stderr, "Error %d while querying device count: %s\n", cu_err, cudaGetErrorString(cu_err));
+        return DeviceStatus::NonFunctional;
+    }
+
+    /* no CUDA compatible device at all */
+    if (dev_count == 0)
+    {
+        return DeviceStatus::NonFunctional;
+    }
+
+    /* things might go horribly wrong if cudart is not compatible with the driver */
+    if (dev_count < 0 || dev_count > cuda_max_device_count)
+    {
+        return DeviceStatus::NonFunctional;
+    }
+
+    if (dev_id == -1) /* device already selected let's not destroy the context */
+    {
+        cu_err = cudaGetDevice(&id);
+        if (cu_err != cudaSuccess)
+        {
+            fprintf(stderr, "Error %d while querying device id: %s\n", cu_err, cudaGetErrorString(cu_err));
+            return DeviceStatus::NonFunctional;
+        }
+    }
+    else
+    {
+        id = dev_id;
+        if (id > dev_count - 1) /* pfff there's no such device */
+        {
+            fprintf(stderr,
+                    "The requested device with id %d does not seem to exist (device count=%d)\n",
+                    dev_id, dev_count);
+            return DeviceStatus::NonFunctional;
+        }
+    }
+
+    /* both major & minor is 9999 if no CUDA capable devices are present */
+    if (dev_prop.major == 9999 && dev_prop.minor == 9999)
+    {
+        return DeviceStatus::NonFunctional;
+    }
+    /* we don't care about emulation mode */
+    if (dev_prop.major == 0)
+    {
+        return DeviceStatus::NonFunctional;
+    }
+
+    if (id != -1)
+    {
+        cu_err = cudaSetDevice(id);
+        if (cu_err != cudaSuccess)
+        {
+            fprintf(stderr, "Error %d while switching to device #%d: %s\n", cu_err, id,
+                    cudaGetErrorString(cu_err));
+            return DeviceStatus::NonFunctional;
+        }
+    }
+
+    cu_err = checkCompiledTargetCompatibility(dev_id, dev_prop);
+    // Avoid triggering an error if GPU devices are in exclusive or prohibited mode;
+    // it is enough to check for cudaErrorDevicesUnavailable only here because
+    // if we encounter it that will happen in cudaFuncGetAttributes in the above function.
+    if (cu_err == cudaErrorDevicesUnavailable)
+    {
+        return DeviceStatus::Unavailable;
+    }
+    else if (cu_err != cudaSuccess)
+    {
+        return DeviceStatus::NonFunctional;
+    }
+
+    /* try to execute a dummy kernel */
+    try
+    {
+        KernelLaunchConfig config;
+        config.blockSize[0]                = 512;
+        const auto          dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
+        DeviceInformation   deviceInfo;
+        const DeviceContext deviceContext(deviceInfo);
+        const DeviceStream  deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
+        launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
+    }
+    catch (gmx::GromacsException& ex)
+    {
+        // launchGpuKernel error is not fatal and should continue with marking the device bad
+        fprintf(stderr,
+                "Error occurred while running dummy kernel sanity check on device #%d:\n %s\n", id,
+                formatExceptionMessageToString(ex).c_str());
+        return DeviceStatus::NonFunctional;
+    }
+
+    if (cudaDeviceSynchronize() != cudaSuccess)
+    {
+        return DeviceStatus::NonFunctional;
+    }
+
+    /* destroy context if we created one */
+    if (id != -1)
+    {
+        cu_err = cudaDeviceReset();
+        CU_RET_ERR(cu_err, "cudaDeviceReset failed");
+    }
+
+    return DeviceStatus::Compatible;
+}
+
+/*! \brief Returns true if the gpu characterized by the device properties is
+ *  supported by the native gpu acceleration.
+ *
+ * \param[in] dev_prop  the CUDA device properties of the gpus to test.
+ * \returns             true if the GPU properties passed indicate a compatible
+ *                      GPU, otherwise false.
+ */
+static bool is_gmx_supported_gpu(const cudaDeviceProp& dev_prop)
+{
+    return (dev_prop.major >= 3);
+}
+
+/*! \brief Checks if a GPU with a given ID is supported by the native GROMACS acceleration.
+ *
+ *  Returns a status value which indicates compatibility or one of the following
+ *  errors: incompatibility or insanity (=unexpected behavior).
+ *
+ *  As the error handling only permits returning the state of the GPU, this function
+ *  does not clear the CUDA runtime API status allowing the caller to inspect the error
+ *  upon return. Note that this also means it is the caller's responsibility to
+ *  reset the CUDA runtime state.
+ *
+ *  \param[in]  deviceId   the ID of the GPU to check.
+ *  \param[in]  deviceProp the CUDA device properties of the device checked.
+ *  \returns               the status of the requested device
+ */
+static DeviceStatus checkDeviceStatus(int deviceId, const cudaDeviceProp& deviceProp)
+{
+    if (!is_gmx_supported_gpu(deviceProp))
+    {
+        return DeviceStatus::Incompatible;
+    }
+    return isDeviceFunctional(deviceId, deviceProp);
+}
+
+bool isGpuDetectionFunctional(std::string* errorMessage)
+{
+    cudaError_t stat;
+    int         driverVersion = -1;
+    stat                      = cudaDriverGetVersion(&driverVersion);
+    GMX_ASSERT(stat != cudaErrorInvalidValue,
+               "An impossible null pointer was passed to cudaDriverGetVersion");
+    GMX_RELEASE_ASSERT(
+            stat == cudaSuccess,
+            gmx::formatString("An unexpected value was returned from cudaDriverGetVersion %s: %s",
+                              cudaGetErrorName(stat), cudaGetErrorString(stat))
+                    .c_str());
+    bool foundDriver = (driverVersion > 0);
+    if (!foundDriver)
+    {
+        // Can't detect GPUs if there is no driver
+        if (errorMessage != nullptr)
+        {
+            errorMessage->assign("No valid CUDA driver found");
+        }
+        return false;
+    }
+
+    int numDevices;
+    stat = cudaGetDeviceCount(&numDevices);
+    if (stat != cudaSuccess)
+    {
+        if (errorMessage != nullptr)
+        {
+            /* cudaGetDeviceCount failed which means that there is
+             * something wrong with the machine: driver-runtime
+             * mismatch, all GPUs being busy in exclusive mode,
+             * invalid CUDA_VISIBLE_DEVICES, or some other condition
+             * which should result in GROMACS issuing at least a
+             * warning. */
+            errorMessage->assign(cudaGetErrorString(stat));
+        }
+
+        // Consume the error now that we have prepared to handle
+        // it. This stops it reappearing next time we check for
+        // errors. Note that if CUDA_VISIBLE_DEVICES does not contain
+        // valid devices, then cudaGetLastError returns the
+        // (undocumented) cudaErrorNoDevice, but this should not be a
+        // problem as there should be no future CUDA API calls.
+        // NVIDIA bug report #2038718 has been filed.
+        cudaGetLastError();
+        // Can't detect GPUs
+        return false;
+    }
+
+    // We don't actually use numDevices here, that's not the job of
+    // this function.
+    return true;
+}
+
+void findGpus(gmx_gpu_info_t* gpu_info)
+{
+    assert(gpu_info);
+
+    gpu_info->n_dev_compatible = 0;
+
+    int         ndev;
+    cudaError_t stat = cudaGetDeviceCount(&ndev);
+    if (stat != cudaSuccess)
+    {
+        GMX_THROW(gmx::InternalError(
+                "Invalid call of findGpus() when CUDA API returned an error, perhaps "
+                "canDetectGpus() was not called appropriately beforehand."));
+    }
+
+    // We expect to start device support/sanity checks with a clean runtime error state
+    gmx::ensureNoPendingCudaError("");
+
+    DeviceInformation* devs;
+    snew(devs, ndev);
+    for (int i = 0; i < ndev; i++)
+    {
+        cudaDeviceProp prop;
+        memset(&prop, 0, sizeof(cudaDeviceProp));
+        stat = cudaGetDeviceProperties(&prop, i);
+        const DeviceStatus checkResult =
+                (stat != cudaSuccess) ? DeviceStatus::NonFunctional : checkDeviceStatus(i, prop);
+
+        devs[i].id   = i;
+        devs[i].prop = prop;
+        devs[i].stat = checkResult;
+
+        if (checkResult == DeviceStatus::Compatible)
+        {
+            gpu_info->n_dev_compatible++;
+        }
+        else
+        {
+            // TODO:
+            //  - we inspect the CUDA API state to retrieve and record any
+            //    errors that occurred during is_gmx_supported_gpu_id() here,
+            //    but this would be more elegant done within is_gmx_supported_gpu_id()
+            //    and only return a string with the error if one was encountered.
+            //  - we'll be reporting without rank information which is not ideal.
+            //  - we'll end up warning also in cases where users would already
+            //    get an error before mdrun aborts.
+            //
+            // Here we also clear the CUDA API error state so potential
+            // errors during sanity checks don't propagate.
+            if ((stat = cudaGetLastError()) != cudaSuccess)
+            {
+                gmx_warning("An error occurred while sanity checking device #%d; %s: %s",
+                            devs[i].id, cudaGetErrorName(stat), cudaGetErrorString(stat));
+            }
+        }
+    }
+
+    stat = cudaPeekAtLastError();
+    GMX_RELEASE_ASSERT(stat == cudaSuccess,
+                       gmx::formatString("We promise to return with clean CUDA state, but "
+                                         "non-success state encountered: %s: %s",
+                                         cudaGetErrorName(stat), cudaGetErrorString(stat))
+                               .c_str());
+
+    gpu_info->n_dev      = ndev;
+    gpu_info->deviceInfo = devs;
+}
+
+void init_gpu(const DeviceInformation* deviceInfo)
+{
+    cudaError_t stat;
+
+    assert(deviceInfo);
+
+    stat = cudaSetDevice(deviceInfo->id);
+    if (stat != cudaSuccess)
+    {
+        auto message = gmx::formatString("Failed to initialize GPU #%d", deviceInfo->id);
+        CU_RET_ERR(stat, message.c_str());
+    }
+
+    if (debug)
+    {
+        fprintf(stderr, "Initialized GPU ID #%d: %s\n", deviceInfo->id, deviceInfo->prop.name);
+    }
+}
+
+void free_gpu(const DeviceInformation* deviceInfo)
+{
+    // One should only attempt to clear the device context when
+    // it has been used, but currently the only way to know that a GPU
+    // device was used is that deviceInfo will be non-null.
+    if (deviceInfo == nullptr)
+    {
+        return;
+    }
+
+    cudaError_t stat;
+
+    if (debug)
+    {
+        int gpuid;
+        stat = cudaGetDevice(&gpuid);
+        CU_RET_ERR(stat, "cudaGetDevice failed");
+        fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
+    }
+
+    stat = cudaDeviceReset();
+    if (stat != cudaSuccess)
+    {
+        gmx_warning("Failed to free GPU #%d: %s", deviceInfo->id, cudaGetErrorString(stat));
+    }
+}
+
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
+{
+    if (deviceId < 0 || deviceId >= gpu_info.n_dev)
+    {
+        gmx_incons("Invalid GPU deviceId requested");
+    }
+    return &gpu_info.deviceInfo[deviceId];
+}
+
+void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
+{
+    assert(s);
+
+    if (index < 0 && index >= gpu_info.n_dev)
+    {
+        return;
+    }
+
+    DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
+
+    bool bGpuExists =
+            (dinfo->stat != DeviceStatus::Nonexistent && dinfo->stat != DeviceStatus::NonFunctional);
+
+    if (!bGpuExists)
+    {
+        sprintf(s, "#%d: %s, stat: %s", dinfo->id, "N/A", c_deviceStateString[dinfo->stat]);
+    }
+    else
+    {
+        sprintf(s, "#%d: NVIDIA %s, compute cap.: %d.%d, ECC: %3s, stat: %s", dinfo->id,
+                dinfo->prop.name, dinfo->prop.major, dinfo->prop.minor,
+                dinfo->prop.ECCEnabled ? "yes" : " no", c_deviceStateString[dinfo->stat]);
+    }
+}
+
+size_t sizeof_gpu_dev_info(void)
+{
+    return sizeof(DeviceInformation);
+}
+
+DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
+{
+    return info.deviceInfo[index].stat;
+}
diff --git a/src/gromacs/hardware/device_management.h b/src/gromacs/hardware/device_management.h
new file mode 100644
index 0000000000..ed86982c67
--- /dev/null
+++ b/src/gromacs/hardware/device_management.h
@@ -0,0 +1,188 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *
+ * \brief Implements the device management for OpenCL.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \inlibraryapi
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_DEVICE_MANAGEMENT_H
+#define GMX_HARDWARE_DEVICE_MANAGEMENT_H
+
+#include "gmxpre.h"
+
+#include <string>
+#include <vector>
+
+#include "gromacs/hardware/device_information.h"
+
+struct DeviceInformation;
+enum class DeviceStatus : int;
+struct gmx_gpu_info_t;
+
+/*! \brief Return whether GPUs can be detected
+ *
+ * Returns true when this is a build of \Gromacs configured to support
+ * GPU usage, GPU detection is not disabled by an environment variable
+ * and a valid device driver, ICD, and/or runtime was detected.
+ * Does not throw. */
+bool canPerformGpuDetection();
+
+/*! \brief Return whether GPU detection is functioning correctly
+ *
+ * Returns true when this is a build of \Gromacs configured to support
+ * GPU usage, and a valid device driver, ICD, and/or runtime was detected.
+ *
+ * This function is not intended to be called from build
+ * configurations that do not support GPUs, and there will be no
+ * descriptive message in that case.
+ *
+ * \param[out] errorMessage  When returning false on a build configured with
+ *                           GPU support and non-nullptr was passed,
+ *                           the string contains a descriptive message about
+ *                           why GPUs cannot be detected.
+ *
+ * Does not throw. */
+bool isGpuDetectionFunctional(std::string* errorMessage);
+
+/*! \brief Find all GPUs in the system.
+ *
+ *  Will detect every GPU supported by the device driver in use.
+ *  Must only be called if canPerformGpuDetection() has returned true.
+ *  This routine also checks for the compatibility of each and fill the
+ *  gpu_info->deviceInfo array with the required information on each the
+ *  device: ID, device properties, status.
+ *
+ *  Note that this function leaves the GPU runtime API error state clean;
+ *  this is implemented ATM in the CUDA flavor.
+ *  TODO: check if errors do propagate in OpenCL as they do in CUDA and
+ *  whether there is a mechanism to "clear" them.
+ *
+ *  \param[in] gpu_info    pointer to structure holding GPU information.
+ *
+ *  \throws                InternalError if a GPU API returns an unexpected failure (because
+ *                         the call to canDetectGpus() should always prevent this occuring)
+ */
+void findGpus(gmx_gpu_info_t* gpu_info);
+
+/*! \brief Return a container of the detected GPUs that are compatible.
+ *
+ * This function filters the result of the detection for compatible
+ * GPUs, based on the previously run compatibility tests.
+ *
+ * \param[in]     gpu_info    Information detected about GPUs, including compatibility.
+ * \return                    vector of IDs of GPUs already recorded as compatible */
+std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info);
+
+/*! \brief Return a string describing how compatible the GPU with given \c index is.
+ *
+ * \param[in]   gpu_info    Information about detected GPUs
+ * \param[in]   index       index of GPU to ask about
+ * \returns                 A null-terminated C string describing the compatibility status, useful for error messages.
+ */
+const char* getGpuCompatibilityDescription(const gmx_gpu_info_t& gpu_info, int index);
+
+/*! \brief Frees the gpu_dev and dev_use array fields of \p gpu_info.
+ *
+ * \param[in]    gpu_info    pointer to structure holding GPU information
+ */
+void free_gpu_info(const gmx_gpu_info_t* gpu_info);
+
+/*! \brief Initializes the GPU described by \c deviceInfo.
+ *
+ * TODO Doxygen complains about these - probably a Doxygen bug, since
+ * the patterns here are the same as elsewhere in this header.
+ *
+ * \param[in]    deviceInfo   device info of the GPU to initialize
+ *
+ * Issues a fatal error for any critical errors that occur during
+ * initialization.
+ */
+void init_gpu(const DeviceInformation* deviceInfo);
+
+/*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
+ *
+ * If \c deviceInfo is nullptr, then it is understood that no device
+ * was selected so no context is active to be freed. Otherwise, the
+ * context is explicitly destroyed and therefore all data uploaded to
+ * the GPU is lost. This must only be called when none of this data is
+ * required anymore, because subsequent attempts to free memory
+ * associated with the context will otherwise fail.
+ *
+ * Calls gmx_warning upon errors.
+ *
+ * \param[in]  deviceInfo   device info of the GPU to clean up for
+ *
+ * \returns                 true if no error occurs during the freeing.
+ */
+void free_gpu(const DeviceInformation* deviceInfo);
+
+/*! \brief Return a pointer to the device info for \c deviceId
+ *
+ * \param[in] gpu_info      GPU info of all detected devices in the system.
+ * \param[in] deviceId      ID for the GPU device requested.
+ *
+ * \returns                 Pointer to the device info for \c deviceId.
+ */
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId);
+
+/*! \brief Formats and returns a device information string for a given GPU.
+ *
+ * Given an index *directly* into the array of available GPUs (gpu_dev)
+ * returns a formatted info string for the respective GPU which includes
+ * ID, name, compute capability, and detection status.
+ *
+ * \param[out]  s           pointer to output string (has to be allocated externally)
+ * \param[in]   gpu_info    Information about detected GPUs
+ * \param[in]   index       an index *directly* into the array of available GPUs
+ */
+void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index);
+
+
+/*! \brief Returns the size of the gpu_dev_info struct.
+ *
+ * The size of gpu_dev_info can be used for allocation and communication.
+ *
+ * \returns                 size in bytes of gpu_dev_info
+ */
+size_t sizeof_gpu_dev_info();
+
+//! Get status of device with specified index
+DeviceStatus gpu_info_get_stat(const gmx_gpu_info_t& info, int index);
+
+#endif // GMX_HARDWARE_DEVICE_MANAGEMENT_H
diff --git a/src/gromacs/hardware/device_management_common.cpp b/src/gromacs/hardware/device_management_common.cpp
new file mode 100644
index 0000000000..d5325b77e0
--- /dev/null
+++ b/src/gromacs/hardware/device_management_common.cpp
@@ -0,0 +1,89 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2017 The GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *  \brief Defines the implementations of the device management that are common for CPU, CUDA and OpenCL.
+ *
+ *  \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include <assert.h>
+
+#include "gromacs/hardware/device_information.h"
+#include "gromacs/hardware/device_management.h"
+#include "gromacs/hardware/gpu_hw_info.h"
+#include "gromacs/utility/smalloc.h"
+
+bool canPerformGpuDetection()
+{
+    if (c_binarySupportsGpus && getenv("GMX_DISABLE_GPU_DETECTION") == nullptr)
+    {
+        return isGpuDetectionFunctional(nullptr);
+    }
+    else
+    {
+        return false;
+    }
+}
+
+std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info)
+{
+    // Possible minor over-allocation here, but not important for anything
+    std::vector<int> compatibleGpus;
+    compatibleGpus.reserve(gpu_info.n_dev);
+    for (int i = 0; i < gpu_info.n_dev; i++)
+    {
+        assert(gpu_info.deviceInfo);
+        if (gpu_info_get_stat(gpu_info, i) == DeviceStatus::Compatible)
+        {
+            compatibleGpus.push_back(i);
+        }
+    }
+    return compatibleGpus;
+}
+
+const char* getGpuCompatibilityDescription(const gmx_gpu_info_t& gpu_info, int index)
+{
+    return (index >= gpu_info.n_dev ? c_deviceStateString[DeviceStatus::Nonexistent]
+                                    : c_deviceStateString[gpu_info_get_stat(gpu_info, index)]);
+}
+
+void free_gpu_info(const gmx_gpu_info_t* gpu_info)
+{
+    sfree(static_cast<void*>(gpu_info->deviceInfo)); // circumvent is_pod check in sfree
+}
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/hardware/device_management_ocl.cpp
similarity index 99%
rename from src/gromacs/gpu_utils/gpu_utils_ocl.cpp
rename to src/gromacs/hardware/device_management_ocl.cpp
index 9f78bbda6c..3cf2eec706 100644
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/hardware/device_management_ocl.cpp
@@ -59,10 +59,11 @@
 
 #include <memory.h>
 
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/gpu_utils/ocl_compiler.h"
 #include "gromacs/gpu_utils/oclraii.h"
 #include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/hardware/device_information.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/exceptions.h"
@@ -518,32 +519,6 @@ void findGpus(gmx_gpu_info_t* gpu_info)
     sfree(ocl_platform_ids);
 }
 
-void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
-{
-    assert(s);
-
-    if (index < 0 && index >= gpu_info.n_dev)
-    {
-        return;
-    }
-
-    DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
-
-    bool bGpuExists =
-            (dinfo->stat != DeviceStatus::Nonexistent && dinfo->stat != DeviceStatus::NonFunctional);
-
-    if (!bGpuExists)
-    {
-        sprintf(s, "#%d: %s, stat: %s", index, "N/A", c_deviceStateString[dinfo->stat]);
-    }
-    else
-    {
-        sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index, dinfo->device_name,
-                dinfo->vendorName, dinfo->device_version, c_deviceStateString[dinfo->stat]);
-    }
-}
-
-
 void init_gpu(const DeviceInformation* deviceInfo)
 {
     assert(deviceInfo);
@@ -566,6 +541,8 @@ void init_gpu(const DeviceInformation* deviceInfo)
     }
 }
 
+void free_gpu(const DeviceInformation* /* deviceInfo */) {}
+
 DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
 {
     if (deviceId < 0 || deviceId >= gpu_info.n_dev)
@@ -575,6 +552,31 @@ DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
     return &gpu_info.deviceInfo[deviceId];
 }
 
+void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
+{
+    assert(s);
+
+    if (index < 0 && index >= gpu_info.n_dev)
+    {
+        return;
+    }
+
+    DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
+
+    bool bGpuExists =
+            (dinfo->stat != DeviceStatus::Nonexistent && dinfo->stat != DeviceStatus::NonFunctional);
+
+    if (!bGpuExists)
+    {
+        sprintf(s, "#%d: %s, stat: %s", index, "N/A", c_deviceStateString[dinfo->stat]);
+    }
+    else
+    {
+        sprintf(s, "#%d: name: %s, vendor: %s, device version: %s, stat: %s", index, dinfo->device_name,
+                dinfo->vendorName, dinfo->device_version, c_deviceStateString[dinfo->stat]);
+    }
+}
+
 size_t sizeof_gpu_dev_info()
 {
     return sizeof(DeviceInformation);
diff --git a/src/gromacs/hardware/gpu_hw_info.h b/src/gromacs/hardware/gpu_hw_info.h
index 125a3a9eed..ff114d1248 100644
--- a/src/gromacs/hardware/gpu_hw_info.h
+++ b/src/gromacs/hardware/gpu_hw_info.h
@@ -41,47 +41,6 @@
 
 struct DeviceInformation;
 
-//! Possible results of the GPU detection/check.
-enum class DeviceStatus : int
-{
-    //! The device is compatible
-    Compatible = 0,
-    //! Device does not exist
-    Nonexistent = 1,
-    //! Device is not compatible
-    Incompatible = 2,
-    //! OpenCL device has incompatible cluster size for non-bonded kernels.
-    IncompatibleClusterSize = 3,
-    /*! \brief An error occurred he functionality checks.
-     * That indicates malfunctioning of the device, driver, or incompatible driver/runtime.
-     */
-    NonFunctional = 4,
-    /*! \brief CUDA devices are busy or unavailable.
-     * typically due to use of \p cudaComputeModeExclusive, \p cudaComputeModeProhibited modes.
-     */
-    Unavailable = 5,
-    //! Enumeration size
-    Count = 6
-};
-
-/*! \brief Names of the GPU detection/check results
- *
- * Check-source wants to warn about the use of a symbol name that would
- * require an inclusion of config.h. However the use is in a comment, so that
- * is a false warning. So C-style string concatenation is used to fool the
- * naive parser in check-source. That needs a clang-format suppression
- * in order to look reasonable. Also clang-tidy wants to suggest that a comma is
- * missing, so that is suppressed.
- */
-static const gmx::EnumerationArray<DeviceStatus, const char*> c_deviceStateString = {
-    "compatible", "nonexistent", "incompatible",
-    // clang-format off
-    // NOLINTNEXTLINE(bugprone-suspicious-missing-comma)
-    "incompatible (please recompile with correct GMX" "_OPENCL_NB_CLUSTER_SIZE of 4)",
-    // clang-format on
-    "non-functional", "unavailable"
-};
-
 /*! \brief Information about GPU devices on this physical node.
  *
  * Includes either CUDA or OpenCL devices.  The gmx_hardware_detect
diff --git a/src/gromacs/hardware/printhardware.cpp b/src/gromacs/hardware/printhardware.cpp
index 4283441c0e..b7af58092b 100644
--- a/src/gromacs/hardware/printhardware.cpp
+++ b/src/gromacs/hardware/printhardware.cpp
@@ -44,8 +44,8 @@
 #include <string>
 #include <vector>
 
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/hardware/cpuinfo.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/hardwaretopology.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/hardware/identifyavx512fmaunits.h"
diff --git a/src/gromacs/mdlib/tests/constrtestrunners.cu b/src/gromacs/mdlib/tests/constrtestrunners.cu
index 161c63b5d5..6b97a80649 100644
--- a/src/gromacs/mdlib/tests/constrtestrunners.cu
+++ b/src/gromacs/mdlib/tests/constrtestrunners.cu
@@ -52,7 +52,7 @@
 #include <vector>
 
 #include "gromacs/gpu_utils/devicebuffer.cuh"
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/mdlib/lincs_gpu.cuh"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/utility/unique_cptr.h"
diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners.cu b/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
index f895816ad2..7f9a5766a3 100644
--- a/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
+++ b/src/gromacs/mdlib/tests/leapfrogtestrunners.cu
@@ -53,7 +53,7 @@
 #include <vector>
 
 #include "gromacs/gpu_utils/devicebuffer.cuh"
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/mdlib/leapfrog_gpu.cuh"
 #include "gromacs/mdlib/stat.h"
diff --git a/src/gromacs/mdlib/tests/settletestrunners.cu b/src/gromacs/mdlib/tests/settletestrunners.cu
index f9cf9867f3..6bbf8eb5e3 100644
--- a/src/gromacs/mdlib/tests/settletestrunners.cu
+++ b/src/gromacs/mdlib/tests/settletestrunners.cu
@@ -52,7 +52,7 @@
 #include <vector>
 
 #include "gromacs/gpu_utils/devicebuffer.cuh"
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/mdlib/settle_gpu.cuh"
 #include "gromacs/utility/unique_cptr.h"
 
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index c2664db220..37388e1e6e 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -75,9 +75,9 @@
 #include "gromacs/gmxlib/nrnb.h"
 #include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/device_stream_manager.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/hardware/cpuinfo.h"
 #include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/printhardware.h"
 #include "gromacs/imd/imd.h"
 #include "gromacs/listed_forces/disre.h"
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
index 71e598fdf1..5f41fda5ef 100644
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -58,6 +58,7 @@
 #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
 #include "gromacs/gpu_utils/typecasts.cuh"
 #include "gromacs/gpu_utils/vectype_ops.cuh"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/gpu_common.h"
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
index f5d64d7d83..cc1b6f37ea 100644
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -55,7 +55,7 @@
 #include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
 #include "gromacs/gpu_utils/pmalloc_cuda.h"
-#include "gromacs/hardware/gpu_hw_info.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/math/vectypes.h"
 #include "gromacs/mdlib/force_flags.h"
 #include "gromacs/mdtypes/interaction_const.h"
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
index eaa7bfec4b..1744e9890c 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -73,6 +73,7 @@
 #include "gromacs/gpu_utils/device_context.h"
 #include "gromacs/gpu_utils/gputraits_ocl.h"
 #include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/mdtypes/simulation_workload.h"
 #include "gromacs/nbnxm/atomdata.h"
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
index 7d74ebac4c..58d9624e17 100644
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -53,8 +53,8 @@
 #include <cmath>
 
 #include "gromacs/gpu_utils/device_stream_manager.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/hardware/device_information.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/math/vectypes.h"
 #include "gromacs/mdlib/force_flags.h"
diff --git a/src/gromacs/taskassignment/taskassignment.cpp b/src/gromacs/taskassignment/taskassignment.cpp
index 8e0ace91cf..1688e69293 100644
--- a/src/gromacs/taskassignment/taskassignment.cpp
+++ b/src/gromacs/taskassignment/taskassignment.cpp
@@ -60,7 +60,7 @@
 
 #include "gromacs/domdec/domdec.h"
 #include "gromacs/gmxlib/network.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/mdrunutility/multisim.h"
 #include "gromacs/mdtypes/commrec.h"
diff --git a/src/gromacs/taskassignment/usergpuids.cpp b/src/gromacs/taskassignment/usergpuids.cpp
index a370e506f7..275dbfd43b 100644
--- a/src/gromacs/taskassignment/usergpuids.cpp
+++ b/src/gromacs/taskassignment/usergpuids.cpp
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -49,7 +49,7 @@
 #include <string>
 #include <vector>
 
-#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/device_management.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/stringutil.h"
-- 
2.22.0