- Quite a few error conditions are unhandled, noted with TODOs in several files
-- gmx_device_info_t needs struct field documentation
-
3. ENHANCEMENTS
============
- Implement OpenCL kernels for Intel GPUs
int nthread,
PmeRunMode runMode,
PmeGpu* pmeGpu,
- const gmx_device_info_t* gpuInfo,
+ const DeviceInformation* deviceInfo,
const PmeGpuProgram* pmeGpuProgram,
const gmx::MDLogger& /*mdlog*/)
{
GMX_THROW(gmx::NotImplementedError(errorString));
}
}
- pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram);
+ pme_gpu_reinit(pme.get(), deviceInfo, pmeGpuProgram);
pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx);
struct t_nrnb;
struct PmeGpu;
struct gmx_wallclock_gpu_pme_t;
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_enerdata_t;
struct gmx_mtop_t;
struct gmx_pme_t;
* \returns Pointer to newly allocated and initialized PME data.
*
* \todo We should evolve something like a \c GpuManager that holds \c
- * gmx_device_info_t * and \c PmeGpuProgram* and perhaps other
+ * DeviceInformation* and \c PmeGpuProgram* and perhaps other
* related things whose lifetime can/should exceed that of a task (or
* perhaps task manager). See Redmine #2522.
*/
int nthread,
PmeRunMode runMode,
PmeGpu* pmeGpu,
- const gmx_device_info_t* gpuInfo,
+ const DeviceInformation* deviceInfo,
const PmeGpuProgram* pmeGpuProgram,
const gmx::MDLogger& mdlog);
#elif GMX_GPU == GMX_GPU_OPENCL
cl_command_queue_properties queueProperties =
pmeGpu->archSpecific->useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
- cl_device_id device_id = pmeGpu->deviceInfo->ocl_gpu_id.ocl_device_id;
+ cl_device_id device_id = pmeGpu->deviceInfo->oclDeviceId;
cl_int clError;
pmeGpu->archSpecific->pmeStream =
clCreateCommandQueue(pmeGpu->archSpecific->context, device_id, queueProperties, &clError);
* TODO: this should become PmeGpu::PmeGpu()
*
* \param[in,out] pme The PME structure.
- * \param[in,out] gpuInfo The GPU information structure.
+ * \param[in,out] deviceInfo The GPU device information structure.
* \param[in] pmeGpuProgram The handle to the program/kernel data created outside (e.g. in unit tests/runner)
*/
-static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
+static void pme_gpu_init(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram)
{
pme->gpu = new PmeGpu();
PmeGpu* pmeGpu = pme->gpu;
pme_gpu_set_testing(pmeGpu, false);
- pmeGpu->deviceInfo = gpuInfo;
+ pmeGpu->deviceInfo = deviceInfo;
GMX_ASSERT(pmeGpuProgram != nullptr, "GPU kernels must be already compiled");
pmeGpu->programHandle_ = pmeGpuProgram;
}
}
-void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
+void pme_gpu_reinit(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram)
{
GMX_ASSERT(pme != nullptr, "Need valid PME object");
if (pme->runMode == PmeRunMode::CPU)
if (!pme->gpu)
{
/* First-time initialization */
- pme_gpu_init(pme, gpuInfo, pmeGpuProgram);
+ pme_gpu_init(pme, deviceInfo, pmeGpuProgram);
}
else
{
#include "pme_output.h"
class GpuEventSynchronizer;
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_hw_info_t;
struct gmx_gpu_opt_t;
struct gmx_pme_t; // only used in pme_gpu_reinit
* (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
*
* \param[in,out] pme The PME structure.
- * \param[in] gpuInfo The GPU information structure.
+ * \param[in] deviceInfo The GPU device information structure.
* \param[in] pmeGpuProgram The PME GPU program data
* \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
*/
GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
- const gmx_device_info_t* GPU_FUNC_ARGUMENT(gpuInfo),
+ const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo),
const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
/*! \libinternal \brief
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "pme_gpu_program_impl.h"
-PmeGpuProgram::PmeGpuProgram(const gmx_device_info_t* deviceInfo) :
+PmeGpuProgram::PmeGpuProgram(const DeviceInformation* deviceInfo) :
impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo))
{
}
PmeGpuProgram::~PmeGpuProgram() = default;
-PmeGpuProgramStorage buildPmeGpuProgram(const gmx_device_info_t* deviceInfo)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* deviceInfo)
{
if (!deviceInfo)
{
#include <memory>
struct PmeGpuProgramImpl;
-struct gmx_device_info_t;
+struct DeviceInformation;
class PmeGpuProgram
{
public:
- explicit PmeGpuProgram(const gmx_device_info_t* deviceInfo);
+ explicit PmeGpuProgram(const DeviceInformation* deviceInfo);
~PmeGpuProgram();
// TODO: design getters for information inside, if needed for PME, and make this private?
/*! \brief
* Factory function used to build persistent PME GPU program for the device at once.
*/
-PmeGpuProgramStorage buildPmeGpuProgram(const gmx_device_info_t* /*deviceInfo*/);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* /*deviceInfo*/);
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "pme_gpu_program_impl.h"
-PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t* /*unused*/) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* /* deviceInfo */) :
warpSize(0),
spreadWorkGroupSize(0),
gatherWorkGroupSize(0),
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
extern template void pme_gather_kernel<c_pmeOrder, false, c_wrapX, c_wrapY, true, false>(const PmeGpuCudaKernelParams);
extern template void pme_gather_kernel<c_pmeOrder, false, c_wrapX, c_wrapY, false, false>(const PmeGpuCudaKernelParams);
-PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t*)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* /* deviceInfo */)
{
// kernel parameters
warpSize = warp_size;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/utility/classhelpers.h"
-struct gmx_device_info_t;
+struct DeviceInformation;
/*! \internal
* \brief
PmeGpuProgramImpl() = delete;
//! Constructor for the given device
- explicit PmeGpuProgramImpl(const gmx_device_info_t* deviceInfo);
+ explicit PmeGpuProgramImpl(const DeviceInformation* deviceInfo);
~PmeGpuProgramImpl();
GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
private:
// Compiles kernels, if supported. Called by the constructor.
- void compileKernels(const gmx_device_info_t* deviceInfo);
+ void compileKernels(const DeviceInformation* deviceInfo);
};
#endif
#include "pme_gpu_types_host.h"
#include "pme_grid.h"
-PmeGpuProgramImpl::PmeGpuProgramImpl(const gmx_device_info_t* deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation* deviceInfo)
{
// Context creation (which should happen outside of this class: #2522)
- cl_platform_id platformId = deviceInfo->ocl_gpu_id.ocl_platform_id;
- cl_device_id deviceId = deviceInfo->ocl_gpu_id.ocl_device_id;
+ cl_platform_id platformId = deviceInfo->oclPlatformId;
+ cl_device_id deviceId = deviceInfo->oclDeviceId;
cl_context_properties contextProperties[3];
contextProperties[0] = CL_CONTEXT_PLATFORM;
contextProperties[1] = reinterpret_cast<cl_context_properties>(platformId);
* smaller than the minimum order^2 required in spread/gather ATM which
* we need to check for.
*/
-static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const gmx_device_info_t* deviceInfo)
+static void checkRequiredWarpSize(cl_kernel kernel, const char* kernelName, const DeviceInformation* deviceInfo)
{
if (deviceInfo->deviceVendor == DeviceVendor::Intel)
{
- size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->ocl_gpu_id.ocl_device_id);
+ size_t kernelWarpSize = gmx::ocl::getKernelWarpSize(kernel, deviceInfo->oclDeviceId);
if (kernelWarpSize < c_pmeSpreadGatherMinWarpSize)
{
}
}
-void PmeGpuProgramImpl::compileKernels(const gmx_device_info_t* deviceInfo)
+void PmeGpuProgramImpl::compileKernels(const DeviceInformation* deviceInfo)
{
// We might consider storing program as a member variable if it's needed later
cl_program program = nullptr;
{
/* TODO when we have a proper MPI-aware logging module,
the log output here should be written there */
- program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl", commonDefines,
- context, deviceInfo->ocl_gpu_id.ocl_device_id,
+ program = gmx::ocl::compileProgram(stderr, "gromacs/ewald", "pme_program.cl",
+ commonDefines, context, deviceInfo->oclDeviceId,
deviceInfo->deviceVendor);
}
catch (gmx::GromacsException& e)
typedef int PmeGpuKernelParams;
#endif
-struct gmx_device_info_t;
+struct DeviceInformation;
/*! \internal \brief
* The PME GPU structure for all the data copied directly from the CPU PME structure.
int nAtomsAlloc;
/*! \brief A pointer to the device used during the execution. */
- const gmx_device_info_t* deviceInfo;
+ const DeviceInformation* deviceInfo;
/*! \brief Kernel scheduling grid width limit in X - derived from deviceinfo compute capability in CUDA.
* Declared as very large int to make it useful in computations with type promotion, to avoid overflows.
//! PME initialization
PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec,
const CodePath mode,
- const gmx_device_info_t* gpuInfo,
+ const DeviceInformation* deviceInfo,
const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
const real ewaldCoeff_q,
NumPmeDomains numPmeDomains = { 1, 1 };
gmx_pme_t* pmeDataRaw =
gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true, ewaldCoeff_q,
- ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger);
+ ewaldCoeff_lj, 1, runMode, nullptr, deviceInfo, pmeGpuProgram, dummyLogger);
PmeSafePointer pme(pmeDataRaw); // taking ownership
// TODO get rid of this with proper matrix type
//! Simple PME initialization based on input, no atom data
PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec,
const CodePath mode,
- const gmx_device_info_t* gpuInfo,
+ const DeviceInformation* deviceInfo,
const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
real ewaldCoeff_q,
real ewaldCoeff_lj)
{
- return pmeInitWrapper(inputRec, mode, gpuInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
+ return pmeInitWrapper(inputRec, mode, deviceInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
// hiding the fact that PME actually needs to know the number of atoms in advance
}
//! PME initialization
PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec,
CodePath mode,
- const gmx_device_info_t* gpuInfo,
+ const DeviceInformation* deviceInfo,
const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
real ewaldCoeff_q = 1.0F,
//! Simple PME initialization (no atom data)
PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec,
CodePath mode = CodePath::CPU,
- const gmx_device_info_t* gpuInfo = nullptr,
+ const DeviceInformation* deviceInfo = nullptr,
const PmeGpuProgram* pmeGpuProgram = nullptr,
const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } },
real ewaldCoeff_q = 0.0F,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
// Constructing contexts for all compatible GPUs - will be empty on non-GPU builds
for (int gpuIndex : getCompatibleGpus(hardwareInfo_->gpu_info))
{
- const gmx_device_info_t* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
+ const DeviceInformation* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
init_gpu(deviceInfo);
char stmp[200] = {};
//! Readable description
std::string description_;
//! Device information pointer
- const gmx_device_info_t* deviceInfo_;
+ const DeviceInformation* deviceInfo_;
//! Persistent compiled GPU kernels for PME.
PmeGpuProgramStorage program_;
//! Returns a human-readable context description line
std::string getDescription() const { return description_; }
//! Returns the device info pointer
- const gmx_device_info_t* getDeviceInfo() const { return deviceInfo_; }
+ const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
//! Returns the persistent PME GPU kernels
const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
//! Constructs the context
- TestHardwareContext(CodePath codePath, const char* description, const gmx_device_info_t* deviceInfo) :
+ TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation* deviceInfo) :
codePath_(codePath),
description_(description),
deviceInfo_(deviceInfo),
#endif /* CHECK_CUDA_ERRORS */
-/*! \brief CUDA device information.
- *
- * The CUDA device information is queried and set at detection and contains
- * both information about the device/hardware returned by the runtime as well
- * as additional data like support status.
- */
-struct gmx_device_info_t
-{
- int id; /* id of the CUDA device */
- cudaDeviceProp prop; /* CUDA device properties */
- int stat; /* result of the device check */
-};
-
/*! Launches synchronous or asynchronous device to host memory copy.
*
* The copy is launched in stream s or if not specified, in stream 0.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
void free_gpu_info(const gmx_gpu_info_t* gpu_info)
{
- sfree(static_cast<void*>(gpu_info->gpu_dev)); // circumvent is_pod check in sfree
+ sfree(static_cast<void*>(gpu_info->deviceInfo)); // circumvent is_pod check in sfree
}
std::vector<int> getCompatibleGpus(const gmx_gpu_info_t& gpu_info)
compatibleGpus.reserve(gpu_info.n_dev);
for (int i = 0; i < gpu_info.n_dev; i++)
{
- assert(gpu_info.gpu_dev);
+ assert(gpu_info.deviceInfo);
if (gpu_info_get_stat(gpu_info, i) == egpuCompatible)
{
compatibleGpus.push_back(i);
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2010-2018, The GROMACS development team.
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
return 0;
}
-void init_gpu(const gmx_device_info_t* deviceInfo)
+void init_gpu(const DeviceInformation* deviceInfo)
{
cudaError_t stat;
}
}
-void free_gpu(const gmx_device_info_t* deviceInfo)
+void free_gpu(const DeviceInformation* deviceInfo)
{
// One should only attempt to clear the device context when
// it has been used, but currently the only way to know that a GPU
}
}
-gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
{
if (deviceId < 0 || deviceId >= gpu_info.n_dev)
{
gmx_incons("Invalid GPU deviceId requested");
}
- return &gpu_info.gpu_dev[deviceId];
+ return &gpu_info.deviceInfo[deviceId];
}
/*! \brief Returns true if the gpu characterized by the device properties is
// We expect to start device support/sanity checks with a clean runtime error state
gmx::ensureNoPendingCudaError("");
- gmx_device_info_t* devs;
+ DeviceInformation* devs;
snew(devs, ndev);
for (int i = 0; i < ndev; i++)
{
cudaGetErrorName(stat), cudaGetErrorString(stat))
.c_str());
- gpu_info->n_dev = ndev;
- gpu_info->gpu_dev = devs;
+ gpu_info->n_dev = ndev;
+ gpu_info->deviceInfo = devs;
}
void get_gpu_device_info_string(char* s, const gmx_gpu_info_t& gpu_info, int index)
return;
}
- gmx_device_info_t* dinfo = &gpu_info.gpu_dev[index];
+ DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
bool bGpuExists = (dinfo->stat != egpuNonexistent && dinfo->stat != egpuInsane);
size_t sizeof_gpu_dev_info(void)
{
- return sizeof(gmx_device_info_t);
+ return sizeof(DeviceInformation);
}
void startGpuProfiler(void)
int gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
{
- return info.gpu_dev[index].stat;
+ return info.deviceInfo[index].stat;
}
/*! \brief Check status returned from peer access CUDA call, and error out or warn appropriately
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/utility/basedefinitions.h"
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_gpu_info_t;
namespace gmx
* Will detect every GPU supported by the device driver in use.
* Must only be called if canPerformGpuDetection() has returned true.
* This routine also checks for the compatibility of each and fill the
- * gpu_info->gpu_dev array with the required information on each the
+ * gpu_info->deviceInfo array with the required information on each the
* device: ID, device properties, status.
*
* Note that this function leaves the GPU runtime API error state clean;
* initialization.
*/
GPU_FUNC_QUALIFIER
-void init_gpu(const gmx_device_info_t* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM;
+void init_gpu(const DeviceInformation* GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM;
/*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
*
* \returns true if no error occurs during the freeing.
*/
CUDA_FUNC_QUALIFIER
-void free_gpu(const gmx_device_info_t* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM;
+void free_gpu(const DeviceInformation* CUDA_FUNC_ARGUMENT(deviceInfo)) CUDA_FUNC_TERM;
/*! \brief Return a pointer to the device info for \c deviceId
*
* \returns Pointer to the device info for \c deviceId.
*/
GPU_FUNC_QUALIFIER
-gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info),
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& GPU_FUNC_ARGUMENT(gpu_info),
int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/*! \brief Returns the device ID of the CUDA GPU currently in use.
}
/*!
- * \brief Checks that device \c devInfo is sane (ie can run a kernel).
+ * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
*
* Compiles and runs a dummy kernel to determine whether the given
* OpenCL device functions properly.
*
*
- * \param[in] devInfo The device info pointer.
+ * \param[in] deviceInfo The device info pointer.
* \param[out] errorMessage An error message related to a failing OpenCL API call.
* \throws std::bad_alloc When out of memory.
* \returns Whether the device passed sanity checks
*/
-static bool isDeviceSane(const gmx_device_info_t* devInfo, std::string* errorMessage)
+static bool isDeviceSane(const DeviceInformation* deviceInfo, std::string* errorMessage)
{
cl_context_properties properties[] = {
- CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(devInfo->ocl_gpu_id.ocl_platform_id), 0
+ CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(deviceInfo->oclPlatformId), 0
};
// uncrustify spacing
cl_int status;
- auto deviceId = devInfo->ocl_gpu_id.ocl_device_id;
+ auto deviceId = deviceInfo->oclDeviceId;
ClContext context(clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status));
if (status != CL_SUCCESS)
{
}
/*!
- * \brief Checks that device \c devInfo is compatible with GROMACS.
+ * \brief Checks that device \c deviceInfo is compatible with GROMACS.
*
* Vendor and OpenCL version support checks are executed an the result
* of these returned.
*
- * \param[in] devInfo The device info pointer.
- * \returns The result of the compatibility checks.
+ * \param[in] deviceInfo The device info pointer.
+ * \returns The result of the compatibility checks.
*/
-static int isDeviceSupported(const gmx_device_info_t* devInfo)
+static int isDeviceSupported(const DeviceInformation* deviceInfo)
{
if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
{
// the device which has the following format:
// OpenCL<space><major_version.minor_version><space><vendor-specific information>
unsigned int deviceVersionMinor, deviceVersionMajor;
- const int valuesScanned = std::sscanf(devInfo->device_version, "OpenCL %u.%u",
+ const int valuesScanned = std::sscanf(deviceInfo->device_version, "OpenCL %u.%u",
&deviceVersionMajor, &deviceVersionMinor);
const bool versionLargeEnough =
((valuesScanned == 2)
}
/* Only AMD, Intel, and NVIDIA GPUs are supported for now */
- switch (devInfo->deviceVendor)
+ switch (deviceInfo->deviceVendor)
{
case DeviceVendor::Nvidia: return egpuCompatible;
case DeviceVendor::Amd:
* \returns An e_gpu_detect_res_t to indicate how the GPU coped with
* the sanity and compatibility check.
*/
-static int checkGpu(size_t deviceId, const gmx_device_info_t* deviceInfo)
+static int checkGpu(size_t deviceId, const DeviceInformation* deviceInfo)
{
int supportStatus = isDeviceSupported(deviceInfo);
break;
}
- snew(gpu_info->gpu_dev, gpu_info->n_dev);
+ snew(gpu_info->deviceInfo, gpu_info->n_dev);
{
int device_index;
for (unsigned int j = 0; j < ocl_device_count; j++)
{
- gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
- gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id = ocl_device_ids[j];
+ gpu_info->deviceInfo[device_index].oclPlatformId = ocl_platform_ids[i];
+ gpu_info->deviceInfo[device_index].oclDeviceId = ocl_device_ids[j];
- gpu_info->gpu_dev[device_index].device_name[0] = 0;
+ gpu_info->deviceInfo[device_index].device_name[0] = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME,
- sizeof(gpu_info->gpu_dev[device_index].device_name),
- gpu_info->gpu_dev[device_index].device_name, nullptr);
+ sizeof(gpu_info->deviceInfo[device_index].device_name),
+ gpu_info->deviceInfo[device_index].device_name, nullptr);
- gpu_info->gpu_dev[device_index].device_version[0] = 0;
+ gpu_info->deviceInfo[device_index].device_version[0] = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION,
- sizeof(gpu_info->gpu_dev[device_index].device_version),
- gpu_info->gpu_dev[device_index].device_version, nullptr);
+ sizeof(gpu_info->deviceInfo[device_index].device_version),
+ gpu_info->deviceInfo[device_index].device_version, nullptr);
- gpu_info->gpu_dev[device_index].vendorName[0] = 0;
+ gpu_info->deviceInfo[device_index].vendorName[0] = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR,
- sizeof(gpu_info->gpu_dev[device_index].vendorName),
- gpu_info->gpu_dev[device_index].vendorName, nullptr);
+ sizeof(gpu_info->deviceInfo[device_index].vendorName),
+ gpu_info->deviceInfo[device_index].vendorName, nullptr);
- gpu_info->gpu_dev[device_index].compute_units = 0;
+ gpu_info->deviceInfo[device_index].compute_units = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS,
- sizeof(gpu_info->gpu_dev[device_index].compute_units),
- &(gpu_info->gpu_dev[device_index].compute_units), nullptr);
+ sizeof(gpu_info->deviceInfo[device_index].compute_units),
+ &(gpu_info->deviceInfo[device_index].compute_units), nullptr);
- gpu_info->gpu_dev[device_index].adress_bits = 0;
+ gpu_info->deviceInfo[device_index].adress_bits = 0;
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS,
- sizeof(gpu_info->gpu_dev[device_index].adress_bits),
- &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);
+ sizeof(gpu_info->deviceInfo[device_index].adress_bits),
+ &(gpu_info->deviceInfo[device_index].adress_bits), nullptr);
- gpu_info->gpu_dev[device_index].deviceVendor =
- getDeviceVendor(gpu_info->gpu_dev[device_index].vendorName);
+ gpu_info->deviceInfo[device_index].deviceVendor =
+ getDeviceVendor(gpu_info->deviceInfo[device_index].vendorName);
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t),
- &gpu_info->gpu_dev[device_index].maxWorkItemSizes, nullptr);
+ &gpu_info->deviceInfo[device_index].maxWorkItemSizes, nullptr);
clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
- &gpu_info->gpu_dev[device_index].maxWorkGroupSize, nullptr);
+ &gpu_info->deviceInfo[device_index].maxWorkGroupSize, nullptr);
- gpu_info->gpu_dev[device_index].stat =
- gmx::checkGpu(device_index, gpu_info->gpu_dev + device_index);
+ gpu_info->deviceInfo[device_index].stat =
+ gmx::checkGpu(device_index, gpu_info->deviceInfo + device_index);
- if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
+ if (egpuCompatible == gpu_info->deviceInfo[device_index].stat)
{
gpu_info->n_dev_compatible++;
}
int last = -1;
for (int i = 0; i < gpu_info->n_dev; i++)
{
- if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Amd)
+ if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Amd)
{
last++;
if (last < i)
{
- gmx_device_info_t ocl_gpu_info;
- ocl_gpu_info = gpu_info->gpu_dev[i];
- gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
- gpu_info->gpu_dev[last] = ocl_gpu_info;
+ std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]);
}
}
}
{
for (int i = 0; i < gpu_info->n_dev; i++)
{
- if (gpu_info->gpu_dev[i].deviceVendor == DeviceVendor::Nvidia)
+ if (gpu_info->deviceInfo[i].deviceVendor == DeviceVendor::Nvidia)
{
last++;
if (last < i)
{
- gmx_device_info_t ocl_gpu_info;
- ocl_gpu_info = gpu_info->gpu_dev[i];
- gpu_info->gpu_dev[i] = gpu_info->gpu_dev[last];
- gpu_info->gpu_dev[last] = ocl_gpu_info;
+ std::swap(gpu_info->deviceInfo[i], gpu_info->deviceInfo[last]);
}
}
}
return;
}
- gmx_device_info_t* dinfo = &gpu_info.gpu_dev[index];
+ DeviceInformation* dinfo = &gpu_info.deviceInfo[index];
bool bGpuExists = (dinfo->stat != egpuNonexistent && dinfo->stat != egpuInsane);
}
-void init_gpu(const gmx_device_info_t* deviceInfo)
+void init_gpu(const DeviceInformation* deviceInfo)
{
assert(deviceInfo);
}
}
-gmx_device_info_t* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
+DeviceInformation* getDeviceInfo(const gmx_gpu_info_t& gpu_info, int deviceId)
{
if (deviceId < 0 || deviceId >= gpu_info.n_dev)
{
gmx_incons("Invalid GPU deviceId requested");
}
- return &gpu_info.gpu_dev[deviceId];
+ return &gpu_info.deviceInfo[deviceId];
}
size_t sizeof_gpu_dev_info()
{
- return sizeof(gmx_device_info_t);
+ return sizeof(DeviceInformation);
}
int gpu_info_get_stat(const gmx_gpu_info_t& info, int index)
{
- return info.gpu_dev[index].stat;
+ return info.deviceInfo[index].stat;
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* \ingroup module_gpu_utils
*/
+/*! \brief CUDA device information.
+ *
+ * The CUDA device information is queried and set at detection and contains
+ * both information about the device/hardware returned by the runtime as well
+ * as additional data like support status.
+ */
+struct DeviceInformation
+{
+ //! ID of the CUDA device.
+ int id;
+ //! CUDA device properties.
+ cudaDeviceProp prop;
+ //! Result of the device check.
+ int stat;
+};
+
//! \brief GPU command stream
using CommandStream = cudaStream_t;
//! \brief Single GPU call timing event - meaningless in CUDA
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#else
+//! Stub for device information.
+struct DeviceInformation
+{
+ // No member needed
+};
+
//! \brief GPU command stream
using CommandStream = void*;
//! \brief Single GPU call timing event
Count = 4
};
+/*! \internal
+ * \brief OpenCL device information.
+ *
+ * The OpenCL device information is queried and set at detection and contains
+ * both information about the device/hardware returned by the runtime as well
+ * as additional data like support status.
+ */
+struct DeviceInformation
+{
+ cl_platform_id oclPlatformId; //!< OpenCL Platform ID.
+ cl_device_id oclDeviceId; //!< OpenCL Device ID.
+ char device_name[256]; //!< Device name.
+ char device_version[256]; //!< Device version.
+ char vendorName[256]; //!< Device vendor name.
+ int compute_units; //!< Number of compute units.
+ int adress_bits; //!< Number of address bits the device is capable of.
+ int stat; //!< Device status takes values of e_gpu_detect_res_t.
+ DeviceVendor deviceVendor; //!< Device vendor.
+ size_t maxWorkItemSizes[3]; //!< Workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES).
+ size_t maxWorkGroupSize; //!< Workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE).
+};
+
//! \brief GPU command stream
using CommandStream = cl_command_queue;
//! \brief Single GPU call timing event
enum class GpuApiCallBehavior;
-/*! \internal
- * \brief OpenCL GPU device identificator
- *
- * An OpenCL device is identified by its ID.
- * The platform ID is also included for caching reasons.
- */
-typedef struct
-{
- cl_platform_id ocl_platform_id; /**< Platform ID */
- cl_device_id ocl_device_id; /**< Device ID */
-} ocl_gpu_id_t;
-
-/*! \internal
- * \brief OpenCL device information.
- *
- * The OpenCL device information is queried and set at detection and contains
- * both information about the device/hardware returned by the runtime as well
- * as additional data like support status.
- */
-struct gmx_device_info_t
-{
- ocl_gpu_id_t ocl_gpu_id; /**< device ID assigned at detection */
- char device_name[256]; /**< device name */
- char device_version[256]; /**< device version */
- char vendorName[256]; /**< device vendor */
- int compute_units; /**< number of compute units */
- int adress_bits; /**< number of adress bits the device is capable of */
- int stat; /**< device status takes values of e_gpu_detect_res_t */
- DeviceVendor deviceVendor; /**< device vendor */
- size_t maxWorkItemSizes[3]; /**< workgroup size limits (CL_DEVICE_MAX_WORK_ITEM_SIZES) */
- size_t maxWorkGroupSize; /**< workgroup total size limit (CL_DEVICE_MAX_WORK_GROUP_SIZE) */
-};
-
/*! \internal
* \brief OpenCL GPU runtime data
*
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
const auto* device = getDeviceInfo(gpuInfo, compatibleGpus[0]);
cl_context_properties properties[] = {
- CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(device->ocl_gpu_id.ocl_platform_id), 0
+ CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(device->oclPlatformId), 0
};
// Give uncrustify more space
- auto deviceId = device->ocl_gpu_id.ocl_device_id;
+ auto deviceId = device->oclDeviceId;
auto context = clCreateContext(properties, 1, &deviceId, nullptr, nullptr, &status);
throwUponFailure(status, "creating context");
auto commandQueue = clCreateCommandQueue(context, deviceId, 0, &status);
if (!isMasterRankOfPhysicalNode)
{
- hardwareInfo->gpu_info.gpu_dev = (struct gmx_device_info_t*)malloc(dev_size);
+ hardwareInfo->gpu_info.deviceInfo = (struct DeviceInformation*)malloc(dev_size);
}
- MPI_Bcast(hardwareInfo->gpu_info.gpu_dev, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_);
+ MPI_Bcast(hardwareInfo->gpu_info.deviceInfo, dev_size, MPI_BYTE, 0, physicalNodeComm.comm_);
MPI_Bcast(&hardwareInfo->gpu_info.n_dev_compatible, 1, MPI_INT, 0, physicalNodeComm.comm_);
}
}
// Detect GPUs
hardwareInfo->gpu_info.n_dev = 0;
hardwareInfo->gpu_info.n_dev_compatible = 0;
- hardwareInfo->gpu_info.gpu_dev = nullptr;
+ hardwareInfo->gpu_info.deviceInfo = nullptr;
gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
#include "gromacs/utility/basedefinitions.h"
-struct gmx_device_info_t;
+struct DeviceInformation;
/*! \brief Possible results of the GPU detection/check.
*
//! Total number of GPU devices detected on this physical node
int n_dev;
//! Information about each GPU device detected on this physical node
- gmx_device_info_t* gpu_dev;
+ DeviceInformation* deviceInfo;
//! Number of GPU devices detected on this physical node that are compatible.
int n_dev_compatible;
};
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/arrayref.h"
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_hw_info_t;
struct t_commrec;
struct t_fcdata;
EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
// Get the device handles for the modules, nullptr when no task is assigned.
- gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
- gmx_device_info_t* pmeDeviceInfo = gpuTaskAssignments.initPmeDevice();
+ DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
+ DeviceInformation* pmeDeviceInfo = gpuTaskAssignments.initPmeDevice();
// TODO Initialize GPU streams here.
/*********************************/
/*! Returns the number of blocks to be used for the nonbonded GPU kernel. */
-static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t* dinfo)
+static inline int calc_nb_kernel_nblock(int nwork_units, const DeviceInformation* deviceInfo)
{
int max_grid_x_size;
- assert(dinfo);
+ assert(deviceInfo);
/* CUDA does not accept grid dimension of 0 (which can happen e.g. with an
empty domain) and that case should be handled before this point. */
assert(nwork_units > 0);
- max_grid_x_size = dinfo->prop.maxGridSize[0];
+ max_grid_x_size = deviceInfo->prop.maxGridSize[0];
/* do we exceed the grid x dimension limit? */
if (nwork_units > max_grid_x_size)
int evdwtype,
bool bDoEne,
bool bDoPrune,
- const gmx_device_info_t gmx_unused* devInfo)
+ const DeviceInformation gmx_unused* deviceInfo)
{
nbnxn_cu_kfunc_ptr_t res;
/* assert assumptions made by the kernels */
GMX_ASSERT(c_nbnxnGpuClusterSize * c_nbnxnGpuClusterSize / c_nbnxnGpuClusterpairSplit
- == devInfo->prop.warpSize,
+ == deviceInfo->prop.warpSize,
"The CUDA kernels require the "
"cluster_size_i*cluster_size_j/nbnxn_gpu_clusterpair_split to match the warp size "
"of the architecture targeted.");
/*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
static inline int calc_shmem_required_nonbonded(const int num_threads_z,
- const gmx_device_info_t gmx_unused* dinfo,
+ const DeviceInformation gmx_unused* deviceInfo,
const cu_nbparam_t* nbp)
{
int shmem;
- assert(dinfo);
+ assert(deviceInfo);
/* size of shmem (force-buffers/xq/atom type preloading) */
/* NOTE: with the default kernel on sm3.0 we need shmem only for pre-loading */
* - The 1D block-grid contains as many blocks as super-clusters.
*/
int num_threads_z = 1;
- if (nb->dev_info->prop.major == 3 && nb->dev_info->prop.minor == 7)
+ if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7)
{
num_threads_z = 2;
}
- int nblock = calc_nb_kernel_nblock(plist->nsci, nb->dev_info);
+ int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo);
KernelLaunchConfig config;
config.blockSize[1] = c_clSize;
config.blockSize[2] = num_threads_z;
config.gridSize[0] = nblock;
- config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->dev_info, nbp);
+ config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp);
config.stream = stream;
if (debug)
auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
const auto kernel = select_nbnxn_kernel(
nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
- (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->dev_info);
+ (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo);
const auto kernelArgs =
prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
launchGpuKernel(kernel, config, timingEvent, "k_calc_nb", kernelArgs);
* - The 1D block-grid contains as many blocks as super-clusters.
*/
int num_threads_z = c_cudaPruneKernelJ4Concurrency;
- int nblock = calc_nb_kernel_nblock(numSciInPart, nb->dev_info);
+ int nblock = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo);
KernelLaunchConfig config;
config.blockSize[0] = c_clSize;
config.blockSize[1] = c_clSize;
nbnxn_cuda_clear_e_fshift(nb);
}
-NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
init_plist(nb->plist[InteractionLocality::Local]);
/* set device info, just point it to the right GPU among the detected ones */
- nb->dev_info = deviceInfo;
+ nb->deviceInfo = deviceInfo;
/* local/non-local GPU streams */
stat = cudaStreamCreate(&nb->stream[InteractionLocality::Local]);
int gpu_min_ci_balanced(NbnxmGpu* nb)
{
- return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0;
+ return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->prop.multiProcessorCount : 0;
}
gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
struct NbnxmGpu
{
/*! \brief CUDA device information */
- const gmx_device_info_t* dev_info = nullptr;
+ const DeviceInformation* deviceInfo = nullptr;
/*! \brief true if doing both local/non-local NB work on GPU */
bool bUseTwoStreams = false;
/*! \brief atom data */
struct NbnxmGpu;
struct gmx_gpu_info_t;
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_wallclock_gpu_nbnxn_t;
struct nbnxn_atomdata_t;
struct NbnxnPairlistGpu;
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
-NbnxmGpu* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
const interaction_const_t gmx_unused* ic,
const PairlistParams gmx_unused& listParams,
const nbnxn_atomdata_t gmx_unused* nbat,
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_domdec_zones_t;
struct gmx_enerdata_t;
struct gmx_hw_info_t;
const t_forcerec* fr,
const t_commrec* cr,
const gmx_hw_info_t& hardwareInfo,
- const gmx_device_info_t* deviceInfo,
+ const DeviceInformation* deviceInfo,
const gmx_mtop_t* mtop,
matrix box,
gmx_wallcycle* wcycle);
const t_forcerec* fr,
const t_commrec* cr,
const gmx_hw_info_t& hardwareInfo,
- const gmx_device_info_t* deviceInfo,
+ const DeviceInformation* deviceInfo,
const gmx_mtop_t* mtop,
matrix box,
gmx_wallcycle* wcycle)
*/
static inline void validate_global_work_size(const KernelLaunchConfig& config,
int work_dim,
- const gmx_device_info_t* dinfo)
+ const DeviceInformation* dinfo)
{
cl_uint device_size_t_size_bits;
cl_uint host_size_t_size_bits;
config.blockSize[1] = c_clSize;
config.gridSize[0] = plist->nsci;
- validate_global_work_size(config, 3, nb->dev_info);
+ validate_global_work_size(config, 3, nb->deviceInfo);
if (debug)
{
* and j-cluster concurrency, in x, y, and z, respectively.
* - The 1D block-grid contains as many blocks as super-clusters.
*/
- int num_threads_z = getOclPruneKernelJ4Concurrency(nb->dev_info->deviceVendor);
+ int num_threads_z = getOclPruneKernelJ4Concurrency(nb->deviceInfo->deviceVendor);
/* kernel launch config */
config.blockSize[2] = num_threads_z;
config.gridSize[0] = numSciInPart;
- validate_global_work_size(config, 3, nb->dev_info);
+ validate_global_work_size(config, 3, nb->deviceInfo);
if (debug)
{
*
* A fatal error results if creation fails.
*
- * \param[inout] runtimeData runtime data including program and context
- * \param[in] devInfo device info struct
+ * \param[inout] runtimeData Runtime data including program and context
+ * \param[in] deviceInfo Device info struct
* \param[in] rank MPI rank (for error reporting)
*/
static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData,
- const gmx_device_info_t* devInfo,
+ const DeviceInformation* deviceInfo,
int rank)
{
cl_context_properties context_properties[5];
cl_int cl_error;
GMX_ASSERT(runtimeData, "Need a valid runtimeData object");
- GMX_ASSERT(devInfo, "Need a valid device info object");
+ GMX_ASSERT(deviceInfo, "Need a valid device info object");
- platform_id = devInfo->ocl_gpu_id.ocl_platform_id;
- device_id = devInfo->ocl_gpu_id.ocl_device_id;
+ platform_id = deviceInfo->oclPlatformId;
+ device_id = deviceInfo->oclDeviceId;
int i = 0;
context_properties[i++] = CL_CONTEXT_PLATFORM;
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s",
- rank, devInfo->device_name, cl_error, ocl_get_error_string(cl_error).c_str());
+ rank, deviceInfo->device_name, cl_error, ocl_get_error_string(cl_error).c_str());
}
runtimeData->context = context;
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
- nb->dev_info->device_name, cl_error);
+ nb->deviceInfo->device_name, cl_error);
}
return kernel;
//! This function is documented in the header file
-NbnxmGpu* gpu_init(const gmx_device_info_t* deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
snew(nb->timings, 1);
/* set device info, just point it to the right GPU among the detected ones */
- nb->dev_info = deviceInfo;
+ nb->deviceInfo = deviceInfo;
snew(nb->dev_rundata, 1);
/* init nbst */
queue_properties = 0;
}
- nbnxn_gpu_create_context(nb->dev_rundata, nb->dev_info, rank);
+ nbnxn_gpu_create_context(nb->dev_rundata, nb->deviceInfo, rank);
/* local/non-local GPU streams */
nb->stream[InteractionLocality::Local] = clCreateCommandQueue(
- nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id, queue_properties, &cl_error);
+ nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d", rank,
- nb->dev_info->device_name, cl_error);
+ nb->deviceInfo->device_name, cl_error);
}
if (nb->bUseTwoStreams)
{
init_plist(nb->plist[InteractionLocality::NonLocal]);
- nb->stream[InteractionLocality::NonLocal] =
- clCreateCommandQueue(nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id,
- queue_properties, &cl_error);
+ nb->stream[InteractionLocality::NonLocal] = clCreateCommandQueue(
+ nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d",
- rank, nb->dev_info->device_name, cl_error);
+ rank, nb->deviceInfo->device_name, cl_error);
}
}
* TODO: decide about NVIDIA
*/
nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
- && ((nb->dev_info->deviceVendor == DeviceVendor::Amd)
- || (nb->dev_info->deviceVendor == DeviceVendor::Intel)
+ && ((nb->deviceInfo->deviceVendor == DeviceVendor::Amd)
+ || (nb->deviceInfo->deviceVendor == DeviceVendor::Intel)
|| (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
/* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
//! This function is documented in the header file
int gpu_min_ci_balanced(NbnxmGpu* nb)
{
- return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
+ return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->compute_units : 0;
}
//! This function is documented in the header file
*
* A fatal error results if compilation fails.
*
- * \param[inout] nb Manages OpenCL non-bonded calculations; compiled kernels returned in dev_info members
+ * \param[inout] nb Manages OpenCL non-bonded calculations; compiled kernels returned in deviceInfo members
*
* Does not throw
*/
the log output here should be written there */
program = gmx::ocl::compileProgram(
stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
- nb->dev_rundata->context, nb->dev_info->ocl_gpu_id.ocl_device_id,
- nb->dev_info->deviceVendor);
+ nb->dev_rundata->context, nb->deviceInfo->oclDeviceId, nb->deviceInfo->deviceVendor);
}
catch (gmx::GromacsException& e)
{
e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
- nb->dev_info->device_name));
+ nb->deviceInfo->device_name));
throw;
}
}
struct NbnxmGpu
{
//! OpenCL device information
- const gmx_device_info_t* dev_info = nullptr;
+ const DeviceInformation* deviceInfo = nullptr;
//! OpenCL runtime data (context, kernels)
struct gmx_device_runtime_data_t* dev_rundata = nullptr;
numRanksOnThisNode_, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
}
-gmx_device_info_t* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const
+DeviceInformation* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const
{
- gmx_device_info_t* deviceInfo = nullptr;
+ DeviceInformation* deviceInfo = nullptr;
const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_];
// This works because only one task of each type per rank is currently permitted.
return deviceInfo;
}
-gmx_device_info_t* GpuTaskAssignments::initPmeDevice() const
+DeviceInformation* GpuTaskAssignments::initPmeDevice() const
{
- gmx_device_info_t* deviceInfo = nullptr;
+ DeviceInformation* deviceInfo = nullptr;
const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_];
// This works because only one task of each type is currently permitted.
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/gmxmpi.h"
-struct gmx_device_info_t;
+struct DeviceInformation;
struct gmx_hw_info_t;
struct t_commrec;
* \todo This also sets up DLB for device sharing, where
* appropriate, but that responsbility should move
* elsewhere. */
- gmx_device_info_t* initNonbondedDevice(const t_commrec* cr) const;
+ DeviceInformation* initNonbondedDevice(const t_commrec* cr) const;
/*! \brief Return handle to the initialized GPU to use for the
* PME task on this rank, if any.
*
* Returns nullptr if no such task is assigned to this rank. */
- gmx_device_info_t* initPmeDevice() const;
+ DeviceInformation* initPmeDevice() const;
//! Return whether this rank has a PME task running on a GPU
bool thisRankHasPmeGpuTask() const;
//! Return whether this rank has any task running on a GPU