The device information with which the context was created is
now stored inside the DeviceContext object. This allows to pass
less arguments when information from DeviceInformation is needed
(e.g. for OpenCL optimization). The empty constructor for the
DeviceContext was also removed to make having invalid context less
probable.
Change-Id: Ie9600a89c21327246251c891807c37084f626f76
33 files changed:
* - default high priority with CUDA
* - no priorities implemented yet with OpenCL; see #2532
*/
* - default high priority with CUDA
* - no priorities implemented yet with OpenCL; see #2532
*/
- pmeGpu->archSpecific->pmeStream_.init(*pmeGpu->deviceInfo, pmeGpu->archSpecific->deviceContext_,
+ pmeGpu->archSpecific->pmeStream_.init(pmeGpu->archSpecific->deviceContext_,
DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
}
DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
}
#include "pme_gpu_program_impl.h"
#include "pme_gpu_program_impl.h"
-PmeGpuProgram::PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
- impl_(std::make_unique<PmeGpuProgramImpl>(deviceInfo, deviceContext))
+PmeGpuProgram::PmeGpuProgram(const DeviceContext& deviceContext) :
+ impl_(std::make_unique<PmeGpuProgramImpl>(deviceContext))
return impl_->warpSize();
}
return impl_->warpSize();
}
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceContext& deviceContext)
- return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
+ return std::make_unique<PmeGpuProgram>(deviceContext);
class PmeGpuProgram
{
public:
class PmeGpuProgram
{
public:
- //! Constructor
- explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
+ /*! \brief Construct a PME GPU program.
+ *
+ * \param[in] deviceContext GPU context.
+ */
+ explicit PmeGpuProgram(const DeviceContext& deviceContext);
+ //! Destructor
~PmeGpuProgram();
//! Return the warp size for which the kernels were compiled
~PmeGpuProgram();
//! Return the warp size for which the kernels were compiled
/*! \brief
* Factory function used to build persistent PME GPU program for the device at once.
*/
/*! \brief
* Factory function used to build persistent PME GPU program for the device at once.
*/
-PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& /*deviceInfo*/,
- const DeviceContext& /* deviceContext */);
+PmeGpuProgramStorage buildPmeGpuProgram(const DeviceContext& /* deviceContext */);
#include "pme_gpu_program_impl.h"
#include "pme_gpu_program_impl.h"
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
- const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
deviceContext_(deviceContext),
warpSize_(0),
spreadWorkGroupSize(0),
deviceContext_(deviceContext),
warpSize_(0),
spreadWorkGroupSize(0),
extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, ThreadsPerAtom::OrderSquared>(const PmeGpuCudaKernelParams);
// clang-format on
extern template void pme_gather_kernel<c_pmeOrder, c_wrapX, c_wrapY, false, ThreadsPerAtom::OrderSquared>(const PmeGpuCudaKernelParams);
// clang-format on
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
- const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
deviceContext_(deviceContext)
{
// kernel parameters
deviceContext_(deviceContext)
{
// kernel parameters
PmeGpuProgramImpl() = delete;
//! Constructor for the given device
PmeGpuProgramImpl() = delete;
//! Constructor for the given device
- explicit PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
+ explicit PmeGpuProgramImpl(const DeviceContext& deviceContext);
~PmeGpuProgramImpl();
GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
~PmeGpuProgramImpl();
GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
#include "pme_gpu_types_host.h"
#include "pme_grid.h"
#include "pme_gpu_types_host.h"
#include "pme_grid.h"
-PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext) :
+PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :
deviceContext_(deviceContext)
{
deviceContext_(deviceContext)
{
+ const DeviceInformation& deviceInfo = deviceContext.deviceInfo();
// kernel parameters
warpSize_ = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
// TODO: for Intel ideally we'd want to set these based on the compiler warp size
// kernel parameters
warpSize_ = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
// TODO: for Intel ideally we'd want to set these based on the compiler warp size
// In CUDA we only need priority to create stream.
// (note that this will be moved from here in the follow-up patch)
// In CUDA we only need priority to create stream.
// (note that this will be moved from here in the follow-up patch)
- pmePpCommStream_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
+ pmePpCommStream_.init(deviceContext, DeviceStreamPriority::Normal, false);
}
PmePpCommGpu::Impl::~Impl() = default;
}
PmePpCommGpu::Impl::~Impl() = default;
//! Device information pointer
const DeviceInformation* deviceInfo_;
//! Local copy of the device context pointer
//! Device information pointer
const DeviceInformation* deviceInfo_;
//! Local copy of the device context pointer
- DeviceContext deviceContext_;
+ std::unique_ptr<DeviceContext> deviceContext_;
//! Persistent compiled GPU kernels for PME.
PmeGpuProgramStorage program_;
//! Persistent compiled GPU kernels for PME.
PmeGpuProgramStorage program_;
//! Returns a human-readable context description line
std::string getDescription() const { return description_; }
//! Getter for the DeviceContext
//! Returns a human-readable context description line
std::string getDescription() const { return description_; }
//! Getter for the DeviceContext
- const DeviceContext& deviceContext() const { return deviceContext_; }
+ const DeviceContext& deviceContext() const
+ {
+ GMX_RELEASE_ASSERT(deviceContext_ != nullptr,
+ "Trying to get device context before it was initialized or in builds "
+ "without GPU support.");
+ return *deviceContext_;
+ }
//! Returns the device info pointer
const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
//! Returns the persistent PME GPU kernels
//! Returns the device info pointer
const DeviceInformation* getDeviceInfo() const { return deviceInfo_; }
//! Returns the persistent PME GPU kernels
{
GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
"A GPU code path should provide DeviceInformation to the "
{
GMX_RELEASE_ASSERT(codePath == CodePath::CPU,
"A GPU code path should provide DeviceInformation to the "
- "TestHerdwareContext constructor.");
+ "TestHardwareContext constructor.");
}
//! Constructs the context for GPU builds
TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
codePath_(codePath),
description_(description),
}
//! Constructs the context for GPU builds
TestHardwareContext(CodePath codePath, const char* description, const DeviceInformation& deviceInfo) :
codePath_(codePath),
description_(description),
- deviceInfo_(&deviceInfo),
- deviceContext_(deviceInfo),
- program_(buildPmeGpuProgram(deviceInfo, deviceContext_))
+ deviceInfo_(&deviceInfo)
{
GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
{
GMX_RELEASE_ASSERT(codePath == CodePath::GPU,
- "TestHerdwareContext tries to construct DeviceContext and PmeGpuProgram "
+ "TestHardwareContext tries to construct DeviceContext and PmeGpuProgram "
+ deviceContext_ = std::make_unique<DeviceContext>(deviceInfo);
+ program_ = buildPmeGpuProgram(*deviceContext_);
}
~TestHardwareContext();
};
}
~TestHardwareContext();
};
class DeviceContext
{
public:
class DeviceContext
{
public:
- //! Default constructor.
- DeviceContext() {}
- DeviceContext(const DeviceInformation& /* deviceInfo */) {}
+ DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo) {}
//! Destructor
~DeviceContext() = default;
//! Destructor
~DeviceContext() = default;
+ //! Get the associated device information
+ const DeviceInformation& deviceInfo() const { return deviceInfo_; }
+
+private:
+ //! A reference to the device information used upon context creation
+ const DeviceInformation& deviceInfo_;
+
GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
};
#endif // GMX_GPU != GMX_GPU_OPENCL
GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
};
#endif // GMX_GPU != GMX_GPU_OPENCL
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
/**@}*/
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL 0x4
/**@}*/
-DeviceContext::DeviceContext(const DeviceInformation& deviceInfo)
+DeviceContext::DeviceContext(const DeviceInformation& deviceInfo) : deviceInfo_(deviceInfo)
{
cl_platform_id platformId = deviceInfo.oclPlatformId;
cl_device_id deviceId = deviceInfo.oclDeviceId;
{
cl_platform_id platformId = deviceInfo.oclPlatformId;
cl_device_id deviceId = deviceInfo.oclDeviceId;
class DeviceContext
{
public:
class DeviceContext
{
public:
- //! Default constructor.
- DeviceContext() {}
/*! \brief Constructor that creates the \c cl_context
*
* \param[in] deviceInfo Platform-specific device information.
/*! \brief Constructor that creates the \c cl_context
*
* \param[in] deviceInfo Platform-specific device information.
//! Destructor
~DeviceContext();
//! Destructor
~DeviceContext();
+ //! Get the associated device information
+ const DeviceInformation& deviceInfo() const { return deviceInfo_; }
//! Getter
cl_context context() const;
//! Getter
cl_context context() const;
- GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
-
+ //! A reference to the device information used upon context creation
+ const DeviceInformation& deviceInfo_;
//! OpenCL context object
cl_context context_ = nullptr;
//! OpenCL context object
cl_context context_ = nullptr;
+
+ GMX_DISALLOW_COPY_MOVE_AND_ASSIGN(DeviceContext);
};
#endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H
};
#endif // GMX_GPU_UTILS_DEVICE_CONTEXT_OCL_H
DeviceStream::DeviceStream() = default;
DeviceStream::DeviceStream() = default;
-void DeviceStream::init(const DeviceInformation& /* deviceInfo */,
- const DeviceContext& /* deviceContext */,
+void DeviceStream::init(const DeviceContext& /* deviceContext */,
DeviceStreamPriority /* priority */,
const bool /* useTiming */)
{
DeviceStreamPriority /* priority */,
const bool /* useTiming */)
{
-void DeviceStream::init(const DeviceInformation& /* deviceInfo */,
- const DeviceContext& /* deviceContext */,
+void DeviceStream::init(const DeviceContext& /* deviceContext */,
DeviceStreamPriority priority,
const bool /* useTiming */)
{
DeviceStreamPriority priority,
const bool /* useTiming */)
{
- * \param[in] deviceInfo Platform-specific device information (only used in OpenCL).
* \param[in] deviceContext Device context (not used in CUDA).
* \param[in] priority Stream priority: high or normal.
* \param[in] useTiming If the timing should be enabled (not used in CUDA).
*/
* \param[in] deviceContext Device context (not used in CUDA).
* \param[in] priority Stream priority: high or normal.
* \param[in] useTiming If the timing should be enabled (not used in CUDA).
*/
- void init(const DeviceInformation& deviceInfo,
- const DeviceContext& deviceContext,
- DeviceStreamPriority priority,
- const bool useTiming);
+ void init(const DeviceContext& deviceContext, DeviceStreamPriority priority, const bool useTiming);
/*! \brief Construct and init.
*
/*! \brief Construct and init.
*
- * \param[in] deviceInfo Platform-specific device information (only used in OpenCL).
* \param[in] deviceContext Device context (only used in OpenCL).
* \param[in] priority Stream priority: high or normal (only used in CUDA).
* \param[in] useTiming If the timing should be enabled (only used in OpenCL).
*/
* \param[in] deviceContext Device context (only used in OpenCL).
* \param[in] priority Stream priority: high or normal (only used in CUDA).
* \param[in] useTiming If the timing should be enabled (only used in OpenCL).
*/
- DeviceStream(const DeviceInformation& deviceInfo,
- const DeviceContext& deviceContext,
- DeviceStreamPriority priority,
- const bool useTiming)
+ DeviceStream(const DeviceContext& deviceContext, DeviceStreamPriority priority, const bool useTiming)
- init(deviceInfo, deviceContext, priority, useTiming);
+ init(deviceContext, priority, useTiming);
}
//! Synchronize the steam
}
//! Synchronize the steam
-void DeviceStream::init(const DeviceInformation& deviceInfo,
- const DeviceContext& deviceContext,
- DeviceStreamPriority /* priority */,
- const bool useTiming)
+void DeviceStream::init(const DeviceContext& deviceContext, DeviceStreamPriority /* priority */, const bool useTiming)
+ const DeviceInformation& deviceInfo = deviceContext.deviceInfo();
cl_command_queue_properties queueProperties = useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
cl_device_id deviceId = deviceInfo.oclDeviceId;
cl_int clError;
cl_command_queue_properties queueProperties = useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
cl_device_id deviceId = deviceInfo.oclDeviceId;
cl_int clError;
const auto dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
const auto dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
- const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+ const DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
}
catch (gmx::GromacsException& ex)
launchGpuKernel(k_dummy_test, config, deviceStream, nullptr, "Dummy kernel", dummyArguments);
}
catch (gmx::GromacsException& ex)
*/
struct gmx_device_runtime_data_t
{
*/
struct gmx_device_runtime_data_t
{
- //! Constructor
- gmx_device_runtime_data_t(const DeviceContext& deviceContext) : deviceContext_(deviceContext) {}
-
- //! OpenCL context
- const DeviceContext& deviceContext_;
//! OpenCL program
cl_program program;
};
//! OpenCL program
cl_program program;
};
{
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
{
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
- const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+ const DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
const int numElements = h_rVecInput.size();
const int numElements = h_rVecInput.size();
{
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
{
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
- const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+ const DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
auto lincsGpu = std::make_unique<LincsGpu>(testData->ir_.nLincsIter, testData->ir_.nProjOrder,
deviceContext, deviceStream);
auto lincsGpu = std::make_unique<LincsGpu>(testData->ir_.nLincsIter, testData->ir_.nProjOrder,
deviceContext, deviceStream);
{
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
{
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
- const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+ const DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
int numAtoms = testData->numAtoms_;
int numAtoms = testData->numAtoms_;
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
DeviceInformation deviceInfo;
const DeviceContext deviceContext(deviceInfo);
- const DeviceStream deviceStream(deviceInfo, deviceContext, DeviceStreamPriority::Normal, false);
+ const DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
auto settleGpu = std::make_unique<SettleGpu>(testData->mtop_, deviceContext, deviceStream);
auto settleGpu = std::make_unique<SettleGpu>(testData->mtop_, deviceContext, deviceStream);
PmeGpuProgramStorage pmeGpuProgram;
if (thisRankHasPmeGpuTask)
{
PmeGpuProgramStorage pmeGpuProgram;
if (thisRankHasPmeGpuTask)
{
- GMX_RELEASE_ASSERT(
- deviceInfo != nullptr,
- "Device information can not be nullptr when building PME GPU program object.");
GMX_RELEASE_ASSERT(
deviceContext != nullptr,
"Device context can not be nullptr when building PME GPU program object.");
GMX_RELEASE_ASSERT(
deviceContext != nullptr,
"Device context can not be nullptr when building PME GPU program object.");
- pmeGpuProgram = buildPmeGpuProgram(*deviceInfo, *deviceContext);
+ pmeGpuProgram = buildPmeGpuProgram(*deviceContext);
}
/* Initiate PME if necessary,
}
/* Initiate PME if necessary,
# if (GMX_GPU == GMX_GPU_CUDA)
// In CUDA we only need priority to create stream.
// (note that this will be moved from here in the follow-up patch)
# if (GMX_GPU == GMX_GPU_CUDA)
// In CUDA we only need priority to create stream.
// (note that this will be moved from here in the follow-up patch)
- updateStreamOwn_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
+ updateStreamOwn_.init(deviceContext, DeviceStreamPriority::Normal, false);
updateStream_ = &updateStreamOwn_;
# endif
}
updateStream_ = &updateStreamOwn_;
# endif
}
* - The 1D block-grid contains as many blocks as super-clusters.
*/
int num_threads_z = 1;
* - The 1D block-grid contains as many blocks as super-clusters.
*/
int num_threads_z = 1;
- if (nb->deviceInfo->prop.major == 3 && nb->deviceInfo->prop.minor == 7)
+ if (nb->deviceContext_->deviceInfo().prop.major == 3 && nb->deviceContext_->deviceInfo().prop.minor == 7)
- int nblock = calc_nb_kernel_nblock(plist->nsci, nb->deviceInfo);
+ int nblock = calc_nb_kernel_nblock(plist->nsci, &nb->deviceContext_->deviceInfo());
KernelLaunchConfig config;
KernelLaunchConfig config;
- config.blockSize[0] = c_clSize;
- config.blockSize[1] = c_clSize;
- config.blockSize[2] = num_threads_z;
- config.gridSize[0] = nblock;
- config.sharedMemorySize = calc_shmem_required_nonbonded(num_threads_z, nb->deviceInfo, nbp);
+ config.blockSize[0] = c_clSize;
+ config.blockSize[1] = c_clSize;
+ config.blockSize[2] = num_threads_z;
+ config.gridSize[0] = nblock;
+ config.sharedMemorySize =
+ calc_shmem_required_nonbonded(num_threads_z, &nb->deviceContext_->deviceInfo(), nbp);
}
auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
}
auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
- const auto kernel = select_nbnxn_kernel(
- nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
- (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), nb->deviceInfo);
+ const auto kernel =
+ select_nbnxn_kernel(nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
+ (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
+ &nb->deviceContext_->deviceInfo());
const auto kernelArgs =
prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
launchGpuKernel(kernel, config, deviceStream, timingEvent, "k_calc_nb", kernelArgs);
const auto kernelArgs =
prepareGpuKernelArguments(kernel, config, adat, nbp, plist, &stepWork.computeVirial);
launchGpuKernel(kernel, config, deviceStream, timingEvent, "k_calc_nb", kernelArgs);
* and j-cluster concurrency, in x, y, and z, respectively.
* - The 1D block-grid contains as many blocks as super-clusters.
*/
* and j-cluster concurrency, in x, y, and z, respectively.
* - The 1D block-grid contains as many blocks as super-clusters.
*/
- int num_threads_z = c_cudaPruneKernelJ4Concurrency;
- int nblock = calc_nb_kernel_nblock(numSciInPart, nb->deviceInfo);
+ int num_threads_z = c_cudaPruneKernelJ4Concurrency;
+ int nblock = calc_nb_kernel_nblock(numSciInPart, &nb->deviceContext_->deviceInfo());
KernelLaunchConfig config;
config.blockSize[0] = c_clSize;
config.blockSize[1] = c_clSize;
KernelLaunchConfig config;
config.blockSize[0] = c_clSize;
config.blockSize[1] = c_clSize;
nbnxn_cuda_clear_e_fshift(nb);
}
nbnxn_cuda_clear_e_fshift(nb);
}
-NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
- const DeviceContext& /* deviceContext */,
+NbnxmGpu* gpu_init(const DeviceContext& deviceContext,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
- auto nb = new NbnxmGpu;
+ auto nb = new NbnxmGpu();
+ nb->deviceContext_ = &deviceContext;
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
snew(nb->plist[InteractionLocality::Local], 1);
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
snew(nb->plist[InteractionLocality::Local], 1);
init_plist(nb->plist[InteractionLocality::Local]);
init_plist(nb->plist[InteractionLocality::Local]);
- /* set device info, just point it to the right GPU among the detected ones */
- nb->deviceInfo = deviceInfo;
-
/* local/non-local GPU streams */
/* local/non-local GPU streams */
- nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, DeviceContext(),
+ nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceContext_,
DeviceStreamPriority::Normal, nb->bDoTime);
if (nb->bUseTwoStreams)
{
DeviceStreamPriority::Normal, nb->bDoTime);
if (nb->bUseTwoStreams)
{
* case will be a single value.
*/
nb->deviceStreams[InteractionLocality::NonLocal].init(
* case will be a single value.
*/
nb->deviceStreams[InteractionLocality::NonLocal].init(
- *nb->deviceInfo, DeviceContext(), DeviceStreamPriority::High, nb->bDoTime);
+ *nb->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
}
/* init events for sychronization (timing disabled for performance reasons!) */
}
/* init events for sychronization (timing disabled for performance reasons!) */
iTimers.didPairlistH2D = true;
}
iTimers.didPairlistH2D = true;
}
+ const DeviceContext& deviceContext = *nb->deviceContext_;
+
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
- &d_plist->nimask, &d_plist->imask_nalloc, DeviceContext());
+ &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
- &d_plist->excl_nalloc, DeviceContext());
+ &d_plist->excl_nalloc, deviceContext);
copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
int gpu_min_ci_balanced(NbnxmGpu* nb)
{
int gpu_min_ci_balanced(NbnxmGpu* nb)
{
- return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->prop.multiProcessorCount : 0;
+ return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().prop.multiProcessorCount
+ : 0;
}
gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
}
gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
const int maxNumColumns = gridSet.numColumnsMax();
reallocateDeviceBuffer(&gpu_nbv->cxy_na, maxNumColumns * gridSet.grids().size(),
const int maxNumColumns = gridSet.numColumnsMax();
reallocateDeviceBuffer(&gpu_nbv->cxy_na, maxNumColumns * gridSet.grids().size(),
- &gpu_nbv->ncxy_na, &gpu_nbv->ncxy_na_alloc, DeviceContext());
+ &gpu_nbv->ncxy_na, &gpu_nbv->ncxy_na_alloc, *gpu_nbv->deviceContext_);
reallocateDeviceBuffer(&gpu_nbv->cxy_ind, maxNumColumns * gridSet.grids().size(),
reallocateDeviceBuffer(&gpu_nbv->cxy_ind, maxNumColumns * gridSet.grids().size(),
- &gpu_nbv->ncxy_ind, &gpu_nbv->ncxy_ind_alloc, DeviceContext());
+ &gpu_nbv->ncxy_ind, &gpu_nbv->ncxy_ind_alloc, *gpu_nbv->deviceContext_);
for (unsigned int g = 0; g < gridSet.grids().size(); g++)
{
for (unsigned int g = 0; g < gridSet.grids().size(); g++)
{
const int* cxy_ind = grid.cxy_ind().data();
reallocateDeviceBuffer(&gpu_nbv->atomIndices, atomIndicesSize, &gpu_nbv->atomIndicesSize,
const int* cxy_ind = grid.cxy_ind().data();
reallocateDeviceBuffer(&gpu_nbv->atomIndices, atomIndicesSize, &gpu_nbv->atomIndicesSize,
- &gpu_nbv->atomIndicesSize_alloc, DeviceContext());
+ &gpu_nbv->atomIndicesSize_alloc, *gpu_nbv->deviceContext_);
if (atomIndicesSize > 0)
{
if (atomIndicesSize > 0)
{
if (natoms_total > 0)
{
reallocateDeviceBuffer(&gpu_nbv->cell, natoms_total, &gpu_nbv->ncell, &gpu_nbv->ncell_alloc,
if (natoms_total > 0)
{
reallocateDeviceBuffer(&gpu_nbv->cell, natoms_total, &gpu_nbv->ncell, &gpu_nbv->ncell_alloc,
+ *gpu_nbv->deviceContext_);
copyToDeviceBuffer(&gpu_nbv->cell, cell, 0, natoms_total, deviceStream,
GpuApiCallBehavior::Async, nullptr);
}
copyToDeviceBuffer(&gpu_nbv->cell, cell, 0, natoms_total, deviceStream,
GpuApiCallBehavior::Async, nullptr);
}
- /*! \brief CUDA device information */
- const DeviceInformation* deviceInfo = nullptr;
+ /*! \brief GPU device context.
+ *
+ * \todo Make it constant reference, once NbnxmGpu is a proper class.
+ */
+ const DeviceContext* deviceContext_;
/*! \brief true if doing both local/non-local NB work on GPU */
bool bUseTwoStreams = false;
/*! \brief atom data */
/*! \brief true if doing both local/non-local NB work on GPU */
bool bUseTwoStreams = false;
/*! \brief atom data */
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
/** Initializes the data structures related to GPU nonbonded calculations. */
GPU_FUNC_QUALIFIER
-NbnxmGpu* gpu_init(const DeviceInformation gmx_unused* deviceInfo,
- const DeviceContext gmx_unused& deviceContext,
+NbnxmGpu* gpu_init(const DeviceContext gmx_unused& deviceContext,
const interaction_const_t gmx_unused* ic,
const PairlistParams gmx_unused& listParams,
const nbnxn_atomdata_t gmx_unused* nbat,
const interaction_const_t gmx_unused* ic,
const PairlistParams gmx_unused& listParams,
const nbnxn_atomdata_t gmx_unused* nbat,
"Device context can not be nullptr when to use GPU for non-bonded forces.");
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
"Device context can not be nullptr when to use GPU for non-bonded forces.");
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
- gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
- haveMultipleDomains);
+ gpu_nbv = gpu_init(*deviceContext, fr->ic, pairlistParams, nbat.get(), haveMultipleDomains);
minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
}
minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
}
config.blockSize[1] = c_clSize;
config.gridSize[0] = plist->nsci;
config.blockSize[1] = c_clSize;
config.gridSize[0] = plist->nsci;
- validate_global_work_size(config, 3, nb->deviceInfo);
+ validate_global_work_size(config, 3, &nb->deviceContext_->deviceInfo());
config.blockSize[2] = num_threads_z;
config.gridSize[0] = numSciInPart;
config.blockSize[2] = num_threads_z;
config.gridSize[0] = numSciInPart;
- validate_global_work_size(config, 3, nb->deviceInfo);
+ validate_global_work_size(config, 3, &nb->deviceContext_->deviceInfo());
nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
- init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, nb->dev_rundata->deviceContext_);
+ init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
}
/*! \brief Initializes the pair list data structure.
}
/*! \brief Initializes the pair list data structure.
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
if (CL_SUCCESS != cl_error)
{
gmx_fatal(FARGS, "Failed to create kernel '%s' for GPU #%s: OpenCL error %d", kernel_name,
- nb->deviceInfo->device_name, cl_error);
+ nb->deviceContext_->deviceInfo().device_name, cl_error);
//! This function is documented in the header file
//! This function is documented in the header file
-NbnxmGpu* gpu_init(const DeviceInformation* deviceInfo,
- const DeviceContext& deviceContext,
+NbnxmGpu* gpu_init(const DeviceContext& deviceContext,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
{
GMX_ASSERT(ic, "Need a valid interaction constants object");
{
GMX_ASSERT(ic, "Need a valid interaction constants object");
- auto nb = new NbnxmGpu;
+ auto nb = new NbnxmGpu();
+ nb->deviceContext_ = &deviceContext;
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
snew(nb->plist[InteractionLocality::Local], 1);
snew(nb->atdat, 1);
snew(nb->nbparam, 1);
snew(nb->plist[InteractionLocality::Local], 1);
nb->timers = new cl_timers_t();
snew(nb->timings, 1);
nb->timers = new cl_timers_t();
snew(nb->timings, 1);
- /* set device info, just point it to the right GPU among the detected ones */
- nb->deviceInfo = deviceInfo;
- nb->dev_rundata = new gmx_device_runtime_data_t(deviceContext);
+ nb->dev_rundata = new gmx_device_runtime_data_t();
/* init nbst */
pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
/* init nbst */
pmalloc(reinterpret_cast<void**>(&nb->nbst.e_lj), sizeof(*nb->nbst.e_lj));
nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
/* local/non-local GPU streams */
nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
/* local/non-local GPU streams */
- nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, nb->dev_rundata->deviceContext_,
+ nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceContext_,
DeviceStreamPriority::Normal, nb->bDoTime);
if (nb->bUseTwoStreams)
DeviceStreamPriority::Normal, nb->bDoTime);
if (nb->bUseTwoStreams)
init_plist(nb->plist[InteractionLocality::NonLocal]);
nb->deviceStreams[InteractionLocality::NonLocal].init(
init_plist(nb->plist[InteractionLocality::NonLocal]);
nb->deviceStreams[InteractionLocality::NonLocal].init(
- *nb->deviceInfo, nb->dev_rundata->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
+ *nb->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
init_timings(nb->timings);
}
init_timings(nb->timings);
}
- nbnxn_ocl_init_const(nb->atdat, nb->nbparam, ic, listParams, nbat->params(),
- nb->dev_rundata->deviceContext_);
+ nbnxn_ocl_init_const(nb->atdat, nb->nbparam, ic, listParams, nbat->params(), *nb->deviceContext_);
/* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
* TODO: decide about NVIDIA
*/
nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
/* Enable LJ param manual prefetch for AMD or Intel or if we request through env. var.
* TODO: decide about NVIDIA
*/
nb->bPrefetchLjParam = (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr)
- && ((nb->deviceInfo->deviceVendor == DeviceVendor::Amd)
- || (nb->deviceInfo->deviceVendor == DeviceVendor::Intel)
+ && ((nb->deviceContext_->deviceInfo().deviceVendor == DeviceVendor::Amd)
+ || (nb->deviceContext_->deviceInfo().deviceVendor == DeviceVendor::Intel)
|| (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
/* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
|| (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
/* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
}
// TODO most of this function is same in CUDA and OpenCL, move into the header
}
// TODO most of this function is same in CUDA and OpenCL, move into the header
- const DeviceContext& deviceContext = nb->dev_rundata->deviceContext_;
+ const DeviceContext& deviceContext = *nb->deviceContext_;
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
deviceContext);
reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
deviceContext);
freeDeviceBuffer(&d_atdat->atom_types);
}
freeDeviceBuffer(&d_atdat->atom_types);
}
- d_atdat->f = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
- CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
+ d_atdat->f = clCreateBuffer(nb->deviceContext_->context(), CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
nalloc * DIM * sizeof(nbat->out[0].f[0]), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
- d_atdat->xq = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
- CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
+ d_atdat->xq = clCreateBuffer(nb->deviceContext_->context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(cl_float4), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
if (useLjCombRule(nb->nbparam->vdwtype))
{
nalloc * sizeof(cl_float4), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
if (useLjCombRule(nb->nbparam->vdwtype))
{
- d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
+ d_atdat->lj_comb = clCreateBuffer(nb->deviceContext_->context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(cl_float2), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(cl_float2), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
- d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->deviceContext_.context(),
+ d_atdat->atom_types = clCreateBuffer(nb->deviceContext_->context(),
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(int), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY,
nalloc * sizeof(int), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
//! This function is documented in the header file
int gpu_min_ci_balanced(NbnxmGpu* nb)
{
//! This function is documented in the header file
int gpu_min_ci_balanced(NbnxmGpu* nb)
{
- return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceInfo->compute_units : 0;
+ return nb != nullptr ? gpu_min_ci_balanced_factor * nb->deviceContext_->deviceInfo().compute_units : 0;
}
//! This function is documented in the header file
}
//! This function is documented in the header file
the log output here should be written there */
program = gmx::ocl::compileProgram(
stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
the log output here should be written there */
program = gmx::ocl::compileProgram(
stderr, "gromacs/nbnxm/opencl", "nbnxm_ocl_kernels.cl", extraDefines,
- nb->dev_rundata->deviceContext_.context(), nb->deviceInfo->oclDeviceId,
- nb->deviceInfo->deviceVendor);
+ nb->deviceContext_->context(), nb->deviceContext_->deviceInfo().oclDeviceId,
+ nb->deviceContext_->deviceInfo().deviceVendor);
}
catch (gmx::GromacsException& e)
{
e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
}
catch (gmx::GromacsException& e)
{
e.prependContext(gmx::formatString("Failed to compile NBNXN kernels for GPU #%s\n",
- nb->deviceInfo->device_name));
+ nb->deviceContext_->deviceInfo().device_name));
- //! OpenCL device information
- const DeviceInformation* deviceInfo = nullptr;
+ /* \brief OpenCL device context
+ *
+ * \todo Make it constant reference, once NbnxmGpu is a proper class.
+ */
+ const DeviceContext* deviceContext_;
//! OpenCL runtime data (context, kernels)
struct gmx_device_runtime_data_t* dev_rundata = nullptr;
//! OpenCL runtime data (context, kernels)
struct gmx_device_runtime_data_t* dev_rundata = nullptr;