return bCutoffAllowed;
}
-void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, void* streamLocal, void* streamNonLocal)
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+ const t_commrec& cr,
+ const DeviceContext& deviceContext,
+ void* streamLocal,
+ void* streamNonLocal)
{
int gpuHaloExchangeSize = 0;
for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
{
cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
- cr.dd, cr.mpi_comm_mysim, streamLocal, streamNonLocal, pulse));
+ cr.dd, cr.mpi_comm_mysim, deviceContext, streamLocal, streamNonLocal, pulse));
}
}
}
struct gmx_wallcycle;
enum class PbcType : int;
class t_state;
+class DeviceContext;
class GpuEventSynchronizer;
namespace gmx
/*! \brief Construct the GPU halo exchange object(s)
* \param[in] mdlog The logger object
* \param[in] cr The commrec object
+ * \param[in] deviceContext GPU device context
* \param[in] streamLocal The local GPU stream
* \param[in] streamNonLocal The non-local GPU stream
*/
-void constructGpuHaloExchange(const gmx::MDLogger& mdlog, const t_commrec& cr, void* streamLocal, void* streamNonLocal);
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+ const t_commrec& cr,
+ const DeviceContext& deviceContext,
+ void* streamLocal,
+ void* streamNonLocal);
/*! \brief
* (Re-) Initialization for GPU halo exchange
#include "gromacs/utility/gmxmpi.h"
struct gmx_domdec_t;
+class DeviceContext;
class GpuEventSynchronizer;
namespace gmx
*
* \param [inout] dd domdec structure
* \param [in] mpi_comm_mysim communicator used for simulation
+ * \param [in] deviceContext GPU device context
* \param [in] streamLocal local NB CUDA stream.
* \param [in] streamNonLocal non-local NB CUDA stream.
* \param [in] pulse the communication pulse for this instance
*/
- GpuHaloExchange(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* streamLocal, void* streamNonLocal, int pulse);
+ GpuHaloExchange(gmx_domdec_t* dd,
+ MPI_Comm mpi_comm_mysim,
+ const DeviceContext& deviceContext,
+ void* streamLocal,
+ void* streamNonLocal,
+ int pulse);
~GpuHaloExchange();
/*! \brief
/*!\brief Constructor stub. */
GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* /* dd */,
MPI_Comm /* mpi_comm_mysim */,
+ const DeviceContext& /* deviceContext */,
void* /*streamLocal */,
void* /*streamNonLocal */,
int /*pulse */) :
#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/domdec/gpuhaloexchange.h"
#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/gpu_utils/typecasts.cuh"
}
/*! \brief Create Domdec GPU object */
-GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd,
- MPI_Comm mpi_comm_mysim,
- void* localStream,
- void* nonLocalStream,
- int pulse) :
+GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd,
+ MPI_Comm mpi_comm_mysim,
+ const DeviceContext& deviceContext,
+ void* localStream,
+ void* nonLocalStream,
+ int pulse) :
dd_(dd),
sendRankX_(dd->neighbor[0][1]),
recvRankX_(dd->neighbor[0][0]),
usePBC_(dd->ci[dd->dim[0]] == 0),
haloDataTransferLaunched_(new GpuEventSynchronizer()),
mpi_comm_mysim_(mpi_comm_mysim),
+ deviceContext_(deviceContext),
localStream_(*static_cast<cudaStream_t*>(localStream)),
nonLocalStream_(*static_cast<cudaStream_t*>(nonLocalStream)),
pulse_(pulse)
delete haloDataTransferLaunched_;
}
-GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* dd,
- MPI_Comm mpi_comm_mysim,
- void* localStream,
- void* nonLocalStream,
- int pulse) :
- impl_(new Impl(dd, mpi_comm_mysim, localStream, nonLocalStream, pulse))
+GpuHaloExchange::GpuHaloExchange(gmx_domdec_t* dd,
+ MPI_Comm mpi_comm_mysim,
+ const DeviceContext& deviceContext,
+ void* localStream,
+ void* nonLocalStream,
+ int pulse) :
+ impl_(new Impl(dd, mpi_comm_mysim, deviceContext, localStream, nonLocalStream, pulse))
{
}
*
* \param [inout] dd domdec structure
* \param [in] mpi_comm_mysim communicator used for simulation
+ * \param [in] deviceContext GPU device context
* \param [in] localStream local NB CUDA stream
* \param [in] nonLocalStream non-local NB CUDA stream
* \param [in] pulse the communication pulse for this instance
*/
- Impl(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* localStream, void* nonLocalStream, int pulse);
+ Impl(gmx_domdec_t* dd,
+ MPI_Comm mpi_comm_mysim,
+ const DeviceContext& deviceContext,
+ void* localStream,
+ void* nonLocalStream,
+ int pulse);
~Impl();
/*! \brief
GpuEventSynchronizer* haloDataTransferLaunched_ = nullptr;
//! MPI communicator used for simulation
MPI_Comm mpi_comm_mysim_;
- //! Dummy GPU context object
- const DeviceContext deviceContext_;
+ //! GPU context object
+ const DeviceContext& deviceContext_;
//! CUDA stream for local non-bonded calculations
cudaStream_t localStream_ = nullptr;
//! CUDA stream for non-local non-bonded calculations
GPU_FUNC_QUALIFIER void* pme_gpu_get_device_stream(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
GPU_FUNC_TERM_WITH_RETURN(nullptr);
-/*! \brief Returns the pointer to the GPU context.
- * \param[in] pme The PME data structure.
- * \returns Pointer to GPU context object.
- */
-GPU_FUNC_QUALIFIER const DeviceContext* pme_gpu_get_device_context(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
- GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
/*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
* \param[in] pme The PME data structure.
* \returns Pointer to sychronizer
return pme_gpu_get_stream(pme->gpu);
}
-const DeviceContext* pme_gpu_get_device_context(const gmx_pme_t* pme)
-{
- GMX_RELEASE_ASSERT(pme, "GPU context requested from PME before PME was constructed.");
- GMX_RELEASE_ASSERT(pme_gpu_active(pme),
- "GPU context requested from PME, but PME is running on the CPU.");
- return pme_gpu_get_context(pme->gpu);
-}
-
GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* pme)
{
if (!pme || !pme_gpu_active(pme))
}
}
-const DeviceContext* pme_gpu_get_context(const PmeGpu* pmeGpu)
-{
- GMX_RELEASE_ASSERT(
- pmeGpu,
- "GPU context object was requested, but PME GPU object was not (yet) initialized.");
- return &pmeGpu->archSpecific->deviceContext_;
-}
-
GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(const PmeGpu* pmeGpu)
{
if (pmeGpu && pmeGpu->kernelParams)
GPU_FUNC_QUALIFIER void* pme_gpu_get_stream(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
GPU_FUNC_TERM_WITH_RETURN(nullptr);
-/*! \brief Return pointer to GPU context (for OpenCL builds).
- * \param[in] pmeGpu The PME GPU structure.
- * \returns Pointer to context object.
- */
-GPU_FUNC_QUALIFIER const DeviceContext* pme_gpu_get_context(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
- GPU_FUNC_TERM_WITH_RETURN(nullptr);
-
/*! \brief Return pointer to the sync object triggered after the PME force calculation completion
* \param[in] pmeGpu The PME GPU structure.
* \returns Pointer to sync object
#include "pme_gpu_program_impl.h"
-PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo) :
- impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo))
+PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+ impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo, deviceContext))
{
}
PmeGpuProgram::~PmeGpuProgram() = default;
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* deviceInfo)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
{
- GMX_RELEASE_ASSERT(
- deviceInfo != nullptr,
- "Device information can not be nullptr when building PME GPU program object.");
- return std::make_unique<PmeGpuProgram>(*deviceInfo);
+ return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
}
#include <memory>
+class DeviceContext;
+
struct PmeGpuProgramImpl;
struct DeviceInformation;
class PmeGpuProgram
{
public:
- explicit PmeGpuProgram(const DeviceInformation& deviceInfo);
+ explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
~PmeGpuProgram();
// TODO: design getters for information inside, if needed for PME, and make this private?
/*! \brief
* Factory function used to build persistent PME GPU program for the device at once.
*/
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation* /*deviceInfo*/);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& /*deviceInfo*/,
+ const DeviceContext& /* deviceContext */);
#endif
#include "pme_gpu_program_impl.h"
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
+ const DeviceContext& deviceContext) :
+ deviceContext_(deviceContext),
warpSize(0),
spreadWorkGroupSize(0),
gatherWorkGroupSize(0),
extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, true, false>(const PmeGpuCudaKernelParams);
extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, false>(const PmeGpuCudaKernelParams);
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo) :
- deviceContext_(deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
+ const DeviceContext& deviceContext) :
+ deviceContext_(deviceContext)
{
// kernel parameters
warpSize = warp_size;
#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/utility/classhelpers.h"
+class DeviceContext;
struct DeviceInformation;
/*! \internal
/*! \brief
* This is a handle to the GPU context, which is just a dummy in CUDA,
* but is created/destroyed by this class in OpenCL.
- * TODO: Later we want to be able to own the context at a higher level and not here,
- * but this class would still need the non-owning context handle to build the kernels.
*/
- DeviceContext deviceContext_;
+ const DeviceContext& deviceContext_;
//! Conveniently all the PME kernels use the same single argument type
#if GMX_GPU == GMX_GPU_CUDA
PmeGpuProgramImpl() = delete;
//! Constructor for the given device
- explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo);
+ explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
~PmeGpuProgramImpl();
GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
#include "pme_gpu_types_host.h"
#include "pme_grid.h"
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo) :
- deviceContext_(deviceInfo)
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+ deviceContext_(deviceContext)
{
// kernel parameters
warpSize = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
gmx_wallcycle* wcycle,
gmx_walltime_accounting_t walltime_accounting,
t_inputrec* ir,
- PmeRunMode runMode)
+ PmeRunMode runMode,
+ const DeviceContext* deviceContext)
{
int ret;
int natoms = 0;
const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
if (useGpuForPme)
{
- const void* commandStream = pme_gpu_get_device_stream(pme);
- const DeviceContext& deviceContext = *pme_gpu_get_device_context(pme);
+ const void* commandStream = pme_gpu_get_device_stream(pme);
changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy());
changePinningPolicy(&pme_pp->x, pme_get_pinning_policy());
pme_pp->pmeForceSenderGpu = std::make_unique<gmx::PmeForceSenderGpu>(
commandStream, pme_pp->mpi_comm_mysim, pme_pp->ppRanks);
}
+ GMX_RELEASE_ASSERT(
+ deviceContext != nullptr,
+ "Device context can not be nullptr when building GPU propagator data object.");
// TODO: Special PME-only constructor is used here. There is no mechanism to prevent from using the other constructor here.
// This should be made safer.
stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
- commandStream, deviceContext, GpuApiCallBehavior::Async,
+ commandStream, *deviceContext, GpuApiCallBehavior::Async,
pme_gpu_get_padding_size(pme), wcycle);
}
struct gmx_pme_t;
struct gmx_wallcycle;
+class DeviceContext;
enum class PmeRunMode;
/*! \brief Called on the nodes that do PME exclusively */
gmx_wallcycle* wcycle,
gmx_walltime_accounting_t walltime_accounting,
t_inputrec* ir,
- PmeRunMode runMode);
+ PmeRunMode runMode,
+ const DeviceContext* deviceContext);
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/classhelpers.h"
#include "gromacs/utility/gmxmpi.h"
+class DeviceContext;
class GpuEventSynchronizer;
namespace gmx
/*! \brief Creates PME-PP GPU communication object
* \param[in] comm Communicator used for simulation
* \param[in] pmeRank Rank of PME task
+ * \param[in] deviceContext GPU context.
*/
- PmePpCommGpu(MPI_Comm comm, int pmeRank);
+ PmePpCommGpu(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext);
~PmePpCommGpu();
/*! \brief Perform steps required when buffer size changes
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
};
/*!\brief Constructor stub. */
-PmePpCommGpu::PmePpCommGpu(MPI_Comm gmx_unused comm, int gmx_unused pmeRank) : impl_(nullptr)
+PmePpCommGpu::PmePpCommGpu(MPI_Comm /* comm */, int /* pmeRank */, const DeviceContext& /* deviceContext */) :
+ impl_(nullptr)
{
GMX_ASSERT(false,
"A CPU stub for PME-PP GPU communication was called instead of the correct "
PmePpCommGpu::~PmePpCommGpu() = default;
/*!\brief init PME-PP GPU communication stub */
-void PmePpCommGpu::reinit(int gmx_unused size)
+void PmePpCommGpu::reinit(int /* size */)
{
GMX_ASSERT(false,
"A CPU stub for PME-PP GPU communication initialization was called instead of the "
"correct implementation.");
}
-void PmePpCommGpu::receiveForceFromPmeCudaDirect(void gmx_unused* recvPtr,
- int gmx_unused recvSize,
- bool gmx_unused receivePmeForceToGpu)
+void PmePpCommGpu::receiveForceFromPmeCudaDirect(void* /* recvPtr */,
+ int /* recvSize */,
+ bool /* receivePmeForceToGpu */)
{
GMX_ASSERT(false,
"A CPU stub for PME-PP GPU communication was called instead of the correct "
"implementation.");
}
-void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void gmx_unused* sendPtr,
- int gmx_unused sendSize,
- bool gmx_unused sendPmeCoordinatesFromGpu,
- GpuEventSynchronizer gmx_unused* coordinatesOnDeviceEvent)
+void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void* /* sendPtr */,
+ int /* sendSize */,
+ bool /* sendPmeCoordinatesFromGpu */,
+ GpuEventSynchronizer* /* coordinatesOnDeviceEvent */)
{
GMX_ASSERT(false,
"A CPU stub for PME-PP GPU communication was called instead of the correct "
#include "config.h"
#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/utility/gmxmpi.h"
namespace gmx
{
-PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank) : comm_(comm), pmeRank_(pmeRank)
+PmePpCommGpu::Impl::Impl(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext) :
+ comm_(comm),
+ pmeRank_(pmeRank),
+ deviceContext_(deviceContext)
{
GMX_RELEASE_ASSERT(
GMX_THREAD_MPI,
return static_cast<void*>(&forcesReadySynchronizer_);
}
-PmePpCommGpu::PmePpCommGpu(MPI_Comm comm, int pmeRank) : impl_(new Impl(comm, pmeRank)) {}
+PmePpCommGpu::PmePpCommGpu(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext) :
+ impl_(new Impl(comm, pmeRank, deviceContext))
+{
+}
PmePpCommGpu::~PmePpCommGpu() = default;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/*! \brief Creates PME-PP GPU communication object.
* \param[in] comm Communicator used for simulation
* \param[in] pmeRank Rank of PME task
+ * \param[in] deviceContext GPU context.
*/
- Impl(MPI_Comm comm, int pmeRank);
+ Impl(MPI_Comm comm, int pmeRank, const DeviceContext& deviceContext);
~Impl();
/*! \brief Perform steps required when buffer size changes
void* getForcesReadySynchronizer();
private:
+ //! Device context object
+ const DeviceContext& deviceContext_;
//! CUDA stream used for the communication operations in this class
cudaStream_t pmePpCommStream_ = nullptr;
//! Remote location of PME coordinate data buffer
PmeSafePointer pmeSafe = pmeInitWrapper(&inputRec, codePath, context->getDeviceInfo(),
context->getPmeGpuProgram(), box);
std::unique_ptr<StatePropagatorDataGpu> stateGpu =
- (codePath == CodePath::GPU) ? makeStatePropagatorDataGpu(*pmeSafe.get()) : nullptr;
+ (codePath == CodePath::GPU)
+ ? makeStatePropagatorDataGpu(*pmeSafe.get(), context->deviceContext())
+ : nullptr;
pmeInitAtoms(pmeSafe.get(), stateGpu.get(), codePath, inputAtomData.coordinates,
inputAtomData.charges);
PmeSafePointer pmeSafe = pmeInitWrapper(&inputRec, codePath, context->getDeviceInfo(),
context->getPmeGpuProgram(), box);
std::unique_ptr<StatePropagatorDataGpu> stateGpu =
- (codePath == CodePath::GPU) ? makeStatePropagatorDataGpu(*pmeSafe.get()) : nullptr;
+ (codePath == CodePath::GPU)
+ ? makeStatePropagatorDataGpu(*pmeSafe.get(), context->deviceContext())
+ : nullptr;
pmeInitAtoms(pmeSafe.get(), stateGpu.get(), codePath, coordinates, charges);
}
//! Make a GPU state-propagator manager
-std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme)
+std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme,
+ const DeviceContext& deviceContext)
{
// TODO: Pin the host buffer and use async memory copies
// TODO: Special constructor for PME-only rank / PME-tests is used here. There should be a mechanism to
// restrict one from using other constructor here.
- return std::make_unique<StatePropagatorDataGpu>(
- pme_gpu_get_device_stream(&pme), *pme_gpu_get_device_context(&pme),
- GpuApiCallBehavior::Sync, pme_gpu_get_padding_size(&pme), nullptr);
+ return std::make_unique<StatePropagatorDataGpu>(pme_gpu_get_device_stream(&pme), deviceContext,
+ GpuApiCallBehavior::Sync,
+ pme_gpu_get_padding_size(&pme), nullptr);
}
//! PME initialization with atom data
real ewaldCoeff_q = 0.0F,
real ewaldCoeff_lj = 0.0F);
//! Make a GPU state-propagator manager
-std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme);
+std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme,
+ const DeviceContext& deviceContext);
//! PME initialization with atom data and system box
void pmeInitAtoms(gmx_pme_t* pme,
StatePropagatorDataGpu* stateGpu,
void PmeTestEnvironment::SetUp()
{
- hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(CodePath::CPU, "(CPU) ", nullptr));
+ hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(CodePath::CPU, "(CPU) "));
hardwareInfo_ = hardwareInit();
if (!pme_gpu_supports_build(nullptr) || !pme_gpu_supports_hardware(*hardwareInfo_, nullptr))
for (int gpuIndex : getCompatibleGpus(hardwareInfo_->gpu_info))
{
const DeviceInformation* deviceInfo = getDeviceInfo(hardwareInfo_->gpu_info, gpuIndex);
+ GMX_RELEASE_ASSERT(deviceInfo != nullptr,
+ "Device information should be provided for the GPU builds.");
init_gpu(deviceInfo);
char stmp[200] = {};
get_gpu_device_info_string(stmp, hardwareInfo_->gpu_info, gpuIndex);
std::string description = "(GPU " + std::string(stmp) + ") ";
hardwareContexts_.emplace_back(std::make_unique<TestHardwareContext>(
- CodePath::GPU, description.c_str(), deviceInfo));
+ CodePath::GPU, description.c_str(), *deviceInfo));
}
}
#include <gtest/gtest.h>
#include "gromacs/ewald/pme_gpu_program.h"
+#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/hardware/gpu_hw_info.h"
#include "gromacs/utility/gmxassert.h"
std::string description_;
//! Device information pointer
const DeviceInformation* deviceInfo_;
+ //! Local copy of the device context pointer
+ DeviceContext deviceContext_;
//! Persistent compiled GPU kernels for PME.
PmeGpuProgramStorage program_;
CodePath getCodePath() const { return codePath_; }
//! Returns a human-readable context description line
std::string getDescription() const { return description_; }
+ //! Getter for the DeviceContext
+ const DeviceContext& deviceContext() const { return deviceContext_; }
//! Returns the device info pointer
const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
//! Returns the persistent PME GPU kernels
const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
- //! Constructs the context
- TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation* deviceInfo) :
+ //! Constructs the context for CPU builds
+ TestHardwareContext(CodePath codePath, const char* description) :
+ codePath_(codePath),
+ description_(description)
+ {
+ GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
+ "A GPU code path should provide DeviceInformation to the "
+ "TestHerdwareContext constructor.");
+ }
+ //! Constructs the context for GPU builds
+ TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
codePath_(codePath),
description_(description),
- deviceInfo_(deviceInfo)
+ deviceInfo_(&deviceInfo),
+ deviceContext_(deviceInfo),
+ program_(buildPmeGpuProgram(deviceInfo, deviceContext_))
{
- if (codePath == CodePath::GPU)
- {
- program_ = buildPmeGpuProgram(deviceInfo_);
- }
+ GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
+ "TestHerdwareContext tries to construct DeviceContext and PmeGpuProgram "
+ "in CPU build.");
}
~TestHardwareContext();
};
class DeviceContext
{
public:
- //! Default constructor. In OpenCL leaves context \c nullptr.
+ //! Default constructor.
DeviceContext() {}
- /*! \brief Second stage of construction. Creates the \c cl_context in OpenCL, does nothing in CUDA.
- *
- * \param[in] deviceInfo Platform-specific device information.
- */
- void init(const DeviceInformation& /*deviceInfo*/) {}
- /*! \brief Construct the object and call \c init(...) .
- *
- * \param[in] deviceInfo Platform-specific device information.
- */
- DeviceContext(const DeviceInformation& deviceInfo) { init(deviceInfo); }
+ //! Constructor.
+ DeviceContext(const DeviceInformation& /* deviceInfo */) {}
//! Destructor
~DeviceContext() = default;
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
/**@}*/
-DeviceContext::DeviceContext()
-{
- context_ = nullptr;
-}
-
-void DeviceContext::init(const DeviceInformation& deviceInfo)
+DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
{
cl_platform_id platformId = deviceInfo.oclPlatformId;
cl_device_id deviceId = deviceInfo.oclDeviceId;
}
}
-DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
-{
- init(deviceInfo);
-}
-
DeviceContext::~DeviceContext()
{
cl_int clError;
class DeviceContext
{
public:
- //! Default constructor. Sets \c context_ to \c nullptr.
- DeviceContext();
- /*! \brief Second stage of construction. Creates the \c cl_context.
- *
- * \param[in] deviceInfo Platform-specific device information.
- *
- * \throws InternalError if context creation failed.
- */
- void init(const DeviceInformation& deviceInfo);
- /*! \brief Construct the object and call \c init(...) .
+ //! Default constructor.
+ DeviceContext() {}
+ /*! \brief Constructor that creates the \c cl_context
*
* \param[in] deviceInfo Platform-specific device information.
*
*/
struct gmx_device_runtime_data_t
{
+ //! Constructor
+ gmx_device_runtime_data_t(const DeviceContext& deviceContext) : deviceContext_(deviceContext) {}
+
//! OpenCL context
- DeviceContext deviceContext;
+ const DeviceContext& deviceContext_;
//! OpenCL program
cl_program program;
};
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
+class DeviceContext;
struct gmx_enerdata_t;
struct gmx_ffparams_t;
struct gmx_mtop_t;
{
public:
//! Construct the manager with constant data and the stream to use.
- GpuBonded(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle);
+ GpuBonded(const gmx_ffparams_t& ffparams,
+ const DeviceContext& deviceContext,
+ void* streamPtr,
+ gmx_wallcycle* wcycle);
//! Destructor
~GpuBonded();
{
};
-GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */, void* /*streamPtr */, gmx_wallcycle* /* wcycle */) :
+GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */,
+ const DeviceContext& /* deviceContext */,
+ void* /*streamPtr */,
+ gmx_wallcycle* /* wcycle */) :
impl_(nullptr)
{
}
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/typecasts.cuh"
#include "gromacs/mdtypes/enerdata.h"
// ---- GpuBonded::Impl
-GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle)
+GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams,
+ const DeviceContext& deviceContext,
+ void* streamPtr,
+ gmx_wallcycle* wcycle) :
+ deviceContext_(deviceContext)
{
stream_ = *static_cast<CommandStream*>(streamPtr);
wcycle_ = wcycle;
// ---- GpuBonded
-GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle) :
- impl_(new Impl(ffparams, streamPtr, wcycle))
+GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams,
+ const DeviceContext& deviceContext,
+ void* streamPtr,
+ gmx_wallcycle* wcycle) :
+ impl_(new Impl(ffparams, deviceContext, streamPtr, wcycle))
{
}
{
public:
//! Constructor
- Impl(const gmx_ffparams_t& ffparams, void* streamPtr, gmx_wallcycle* wcycle);
+ Impl(const gmx_ffparams_t& ffparams, const DeviceContext& deviceContext, void* streamPtr, gmx_wallcycle* wcycle);
/*! \brief Destructor, non-default needed for freeing
* device-side buffers */
~Impl();
//! \brief Device-side total virial
float* d_vTot_ = nullptr;
- //! Dummy GPU context object
- const DeviceContext deviceContext_;
+ //! GPU context object
+ const DeviceContext& deviceContext_;
//! \brief Bonded GPU stream, not owned by this module
CommandStream stream_;
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/arrayref.h"
-struct DeviceInformation;
struct gmx_hw_info_t;
struct t_commrec;
struct t_fcdata;
class Impl;
private:
- //! Dummy GPU context object
+ //! GPU context object
const DeviceContext& deviceContext_;
//! GPU stream
CommandStream commandStream_;
static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
private:
- //! Dummy GPU context object
+ //! GPU context object
const DeviceContext& deviceContext_;
//! GPU stream
CommandStream commandStream_;
void set(const InteractionDefinitions& idef, const t_mdatoms& md);
private:
- //! Dummy GPU context object
+ //! GPU context object
const DeviceContext& deviceContext_;
//! GPU stream
CommandStream commandStream_;
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
+class DeviceContext;
class GpuEventSynchronizer;
struct gmx_mtop_t;
* projection from it.
* \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
* and target O-H and H-H distances from this object.
+ * \param[in] deviceContext GPU device context.
* \param[in] commandStream GPU stream to use. Can be nullptr.
* \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done on the GPU.
*/
UpdateConstrainGpu(const t_inputrec& ir,
const gmx_mtop_t& mtop,
+ const DeviceContext& deviceContext,
const void* commandStream,
GpuEventSynchronizer* xUpdatedOnDevice);
UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir */,
const gmx_mtop_t& /* mtop */,
+ const DeviceContext& /* deviceContext */,
const void* /* commandStream */,
GpuEventSynchronizer* /* xUpdatedOnDevice */) :
impl_(nullptr)
#include <algorithm>
#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/gputraits.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
UpdateConstrainGpu::Impl::Impl(const t_inputrec& ir,
const gmx_mtop_t& mtop,
+ const DeviceContext& deviceContext,
const void* commandStream,
GpuEventSynchronizer* xUpdatedOnDevice) :
+ deviceContext_(deviceContext),
coordinatesReady_(xUpdatedOnDevice)
{
GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& ir,
const gmx_mtop_t& mtop,
+ const DeviceContext& deviceContext,
const void* commandStream,
GpuEventSynchronizer* xUpdatedOnDevice) :
- impl_(new Impl(ir, mtop, commandStream, xUpdatedOnDevice))
+ impl_(new Impl(ir, mtop, deviceContext, commandStream, xUpdatedOnDevice))
{
}
* projection from it.
* \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
* and target O-H and H-H distances from this object.
+ * \param[in] deviceContext GPU device context.
* \param[in] commandStream GPU stream to use. Can be nullptr.
* \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done on the GPU.
*/
- Impl(const t_inputrec& ir, const gmx_mtop_t& mtop, const void* commandStream, GpuEventSynchronizer* xUpdatedOnDevice);
+ Impl(const t_inputrec& ir,
+ const gmx_mtop_t& mtop,
+ const DeviceContext& deviceContext,
+ const void* commandStream,
+ GpuEventSynchronizer* xUpdatedOnDevice);
~Impl();
static bool isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop);
private:
- //! Dummy GPU context object
- const DeviceContext deviceContext_;
+ //! GPU context object
+ const DeviceContext& deviceContext_;
//! GPU stream
CommandStream commandStream_ = nullptr;
//! GPU kernel launch config
{
GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
}
- integrator = std::make_unique<UpdateConstrainGpu>(
- *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
+
+ GMX_RELEASE_ASSERT(fr->deviceContext != nullptr,
+ "GPU device context should be initialized to use GPU update.");
+
+ integrator = std::make_unique<UpdateConstrainGpu>(*ir, *top_global, *fr->deviceContext,
+ stateGpu->getUpdateStream(),
+ stateGpu->xUpdatedOnDevice());
integrator->setPbc(PbcType::Xyz, state->box);
}
Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
void* streamNonLocal = Nbnxm::gpu_get_command_stream(
fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
- constructGpuHaloExchange(mdlog, *cr, streamLocal, streamNonLocal);
+ GMX_RELEASE_ASSERT(
+ fr->deviceContext != nullptr,
+ "GPU device context should be initialized to use GPU halo exchange.");
+ constructGpuHaloExchange(mdlog, *cr, *fr->deviceContext, streamLocal, streamNonLocal);
}
}
}
#include "gromacs/fileio/tpxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/hardware/cpuinfo.h"
#include "gromacs/hardware/detecthardware.h"
EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
// Get the device handles for the modules, nullptr when no task is assigned.
+ // TODO: There should be only one DeviceInformation.
DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
DeviceInformation* pmeDeviceInfo = gpuTaskAssignments.initPmeDevice();
+ std::unique_ptr<DeviceContext> deviceContext = nullptr;
+ if (pmeDeviceInfo)
+ {
+ deviceContext = std::make_unique<DeviceContext>(*pmeDeviceInfo);
+ }
+ else if (nonbondedDeviceInfo)
+ {
+ deviceContext = std::make_unique<DeviceContext>(*nonbondedDeviceInfo);
+ }
+
// TODO Initialize GPU streams here.
// TODO Currently this is always built, yet DD partition code
opt2fn("-tablep", filenames.size(), filenames.data()),
opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
+ fr->deviceContext = deviceContext.get();
+
if (devFlags.enableGpuPmePPComm && !thisRankHasDuty(cr, DUTY_PME))
{
- fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(cr->mpi_comm_mysim, cr->dd->pme_nodeid);
+ GMX_RELEASE_ASSERT(
+ deviceContext != nullptr,
+ "Device context can not be nullptr when PME-PP direct communications object.");
+ fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
+ cr->mpi_comm_mysim, cr->dd->pme_nodeid, *deviceContext);
}
fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec, fr, cr, *hwinfo, nonbondedDeviceInfo,
- &mtop, box, wcycle);
+ fr->deviceContext, &mtop, box, wcycle);
if (useGpuForBonded)
{
auto stream = havePPDomainDecomposition(cr)
fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal)
: Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv,
gmx::InteractionLocality::Local);
- gpuBonded = std::make_unique<GpuBonded>(mtop.ffparams, stream, wcycle);
+ GMX_RELEASE_ASSERT(
+ fr->deviceContext != nullptr,
+ "Device context can not be nullptr when computing bonded interactions on GPU.");
+ gpuBonded = std::make_unique<GpuBonded>(mtop.ffparams, *fr->deviceContext, stream, wcycle);
fr->gpuBonded = gpuBonded.get();
}
PmeGpuProgramStorage pmeGpuProgram;
if (thisRankHasPmeGpuTask)
{
- pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
+ GMX_RELEASE_ASSERT(
+ pmeDeviceInfo != nullptr,
+ "Device information can not be nullptr when building PME GPU program object.");
+ GMX_RELEASE_ASSERT(
+ deviceContext != nullptr,
+ "Device context can not be nullptr when building PME GPU program object.");
+ pmeGpuProgram = buildPmeGpuProgram(*pmeDeviceInfo, *deviceContext);
}
/* Initiate PME if necessary,
fr->nbv->gpu_nbv != nullptr
? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal)
: nullptr;
- const DeviceContext& deviceContext = *pme_gpu_get_device_context(fr->pmedata);
- const int paddingSize = pme_gpu_get_padding_size(fr->pmedata);
+ const int paddingSize = pme_gpu_get_padding_size(fr->pmedata);
GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
? GpuApiCallBehavior::Async
: GpuApiCallBehavior::Sync;
-
+ GMX_RELEASE_ASSERT(
+ deviceContext != nullptr,
+ "Device context can not be nullptr when building GPU propagator data object.");
stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
- pmeStream, localStream, nonLocalStream, deviceContext, transferKind, paddingSize, wcycle);
+ pmeStream, localStream, nonLocalStream, *deviceContext, transferKind,
+ paddingSize, wcycle);
fr->stateGpu = stateGpu.get();
}
GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
/* do PME only */
walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
- gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
+ gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode,
+ deviceContext.get());
}
wallcycle_stop(wcycle, ewcRUN);
free_gpu(nonbondedDeviceInfo);
free_gpu(pmeDeviceInfo);
+ deviceContext.reset(nullptr);
sfree(fcd);
if (doMembed)
struct gmx_pme_t;
struct nonbonded_verlet_t;
struct bonded_threading_t;
+class DeviceContext;
class DispersionCorrection;
struct t_forcetable;
struct t_QMMMrec;
// general StatePropagatorData object that is passed around
gmx::StatePropagatorDataGpu* stateGpu = nullptr;
+ //! GPU device context
+ DeviceContext* deviceContext = nullptr;
+
/* For PME-PP GPU communication */
std::unique_ptr<gmx::PmePpCommGpu> pmePpCommGpu;
};
nbnxn_cuda_clear_e_fshift(nb);
}
-NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
+NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
+ const DeviceContext& /* deviceContext */,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/mdtypes/locality.h"
+class DeviceContext;
+
struct NbnxmGpu;
struct gmx_gpu_info_t;
struct DeviceInformation;
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
+ const DeviceContext gmx_unused& deviceContext,
const interaction_const_t gmx_unused* ic,
const PairlistParams gmx_unused& listParams,
const nbnxn_atomdata_t gmx_unused* nbat,
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
+class DeviceContext;
struct DeviceInformation;
struct gmx_domdec_zones_t;
struct gmx_enerdata_t;
const t_commrec* cr,
const gmx_hw_info_t& hardwareInfo,
const DeviceInformation* deviceInfo,
+ const DeviceContext* deviceContext,
const gmx_mtop_t* mtop,
matrix box,
gmx_wallcycle* wcycle);
const t_commrec* cr,
const gmx_hw_info_t& hardwareInfo,
const DeviceInformation* deviceInfo,
+ const DeviceContext* deviceContext,
const gmx_mtop_t* mtop,
matrix box,
gmx_wallcycle* wcycle)
int minimumIlistCountForGpuBalancing = 0;
if (useGpu)
{
+ GMX_RELEASE_ASSERT(
+ deviceContext != nullptr,
+ "Device context can not be nullptr when to use GPU for non-bonded forces.");
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
- gpu_nbv = gpu_init(deviceInfo, fr->ic, pairlistParams, nbat.get(), cr->nodeid, haveMultipleDomains);
+ gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
+ cr->nodeid, haveMultipleDomains);
minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
}
CL_MEM_COPY_HOST_PTR, &array_format, tabsize, 1, 0, ftmp, &cl_error);
*/
- coul_tab = clCreateBuffer(runData->deviceContext.context(),
+ coul_tab = clCreateBuffer(runData->deviceContext_.context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
tables.tableF.size() * sizeof(cl_float),
const_cast<real*>(tables.tableF.data()), &cl_error);
ad->ntypes = ntypes;
ad->shift_vec =
- clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
+ clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
SHIFTS * sizeof(nbnxn_atomdata_t::shift_vec[0]), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
ad->bShiftVecUploaded = CL_FALSE;
- ad->fshift = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+ ad->fshift = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
SHIFTS * sizeof(nb_staging_t::fshift[0]), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
- ad->e_lj = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+ ad->e_lj = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
sizeof(float), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
- ad->e_el = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+ ad->e_el = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
sizeof(float), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);
*/
- nbp->coulomb_tab_climg2d = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY,
+ nbp->coulomb_tab_climg2d = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY,
sizeof(cl_float), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
array_format.image_channel_data_type = CL_FLOAT;
array_format.image_channel_order = CL_R;
- nbp->nbfp_climg2d = clCreateImage2D(runData->deviceContext.context(), CL_MEM_READ_ONLY |
+ nbp->nbfp_climg2d = clCreateImage2D(runData->deviceContext_.context(), CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
*/
nbp->nbfp_climg2d = clCreateBuffer(
- runData->deviceContext.context(),
+ runData->deviceContext_.context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
nnbfp * sizeof(cl_float), const_cast<float*>(nbatParams.nbfp.data()), &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
/* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->deviceContext.context(), CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
nbp->nbfp_comb_climg2d =
- clCreateBuffer(runData->deviceContext.context(),
+ clCreateBuffer(runData->deviceContext_.context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
nnbfp_comb * sizeof(cl_float),
const_cast<float*>(nbatParams.nbfp_comb.data()), &cl_error);
// TODO: decide which alternative is most efficient - textures or buffers.
/* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->deviceContext.context(),
CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);*/
- nbp->nbfp_comb_climg2d = clCreateBuffer(runData->deviceContext.context(), CL_MEM_READ_ONLY,
+ nbp->nbfp_comb_climg2d = clCreateBuffer(runData->deviceContext_.context(), CL_MEM_READ_ONLY,
sizeof(cl_float), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
//! This function is documented in the header file
NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
+ const DeviceContext& deviceContext,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
/* set device info, just point it to the right GPU among the detected ones */
nb->deviceInfo = deviceInfo;
- nb->dev_rundata = new gmx_device_runtime_data_t();
+ nb->dev_rundata = new gmx_device_runtime_data_t(deviceContext);
/* init nbst */
pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
queue_properties = 0;
}
- nb->dev_rundata->deviceContext.init(*deviceInfo);
-
/* local/non-local GPU streams */
nb->stream[InteractionLocality::Local] =
- clCreateCommandQueue(nb->dev_rundata->deviceContext.context(),
+ clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
if (CL_SUCCESS != cl_error)
{
init_plist(nb->plist[InteractionLocality::NonLocal]);
nb->stream[InteractionLocality::NonLocal] =
- clCreateCommandQueue(nb->dev_rundata->deviceContext.context(),
+ clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
if (CL_SUCCESS != cl_error)
{
}
// TODO most of this function is same in CUDA and OpenCL, move into the header
- const DeviceContext& deviceContext = nb->dev_rundata->deviceContext;
+ const DeviceContext& deviceContext = nb->dev_rundata->deviceContext_;
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
deviceContext);
freeDeviceBuffer(&d_atdat->atom_types);
}
- d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+ d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
- d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+ d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(cl_float4), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
if (useLjCombRule(nb->nbparam->vdwtype))
{
- d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+ d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(cl_float2), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
}
else
{
- d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext.context(),
+ d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(int), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
{
/* TODO when we have a proper MPI-aware logging module,
the log output here should be written there */
- program =
- gmx::ocl::compileProgram(stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl",
- extraDefines, nb->dev_rundata->deviceContext.context(),
- nb->deviceInfo->oclDeviceId, nb->deviceInfo->deviceVendor);
+ program = gmx::ocl::compileProgram(
+ stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
+ nb->dev_rundata->deviceContext_.context(), nb->deviceInfo->oclDeviceId,
+ nb->deviceInfo->deviceVendor);
}
catch (gmx::GromacsException& e)
{