* - default high priority with CUDA
* - no priorities implemented yet with OpenCL; see #2532
*/
-#if GMX_GPU == GMX_GPU_CUDA
- cudaError_t stat;
- int highest_priority, lowest_priority;
- stat = cudaDeviceGetStreamPriorityRange(&lowest_priority, &highest_priority);
- CU_RET_ERR(stat, "PME cudaDeviceGetStreamPriorityRange failed");
- cudaStream_t stream;
- stat = cudaStreamCreateWithPriority(&stream,
- cudaStreamDefault, // cudaStreamNonBlocking,
- highest_priority);
- pmeGpu->archSpecific->pmeStream_.setStream(stream);
- CU_RET_ERR(stat, "cudaStreamCreateWithPriority on the PME stream failed");
-#elif GMX_GPU == GMX_GPU_OPENCL
- cl_command_queue_properties queueProperties =
- pmeGpu->archSpecific->useTiming ? CL_QUEUE_PROFILING_ENABLE : 0;
- cl_device_id device_id = pmeGpu->deviceInfo->oclDeviceId;
- cl_int clError;
- pmeGpu->archSpecific->pmeStream_.setStream(clCreateCommandQueue(
- pmeGpu->archSpecific->deviceContext_.context(), device_id, queueProperties, &clError));
-
-
- if (clError != CL_SUCCESS)
- {
- GMX_THROW(gmx::InternalError(
- gmx::formatString("Failed to create PME command queue (OpenCL error %d)", clError).c_str()));
- }
-#endif
+ pmeGpu->archSpecific->pmeStream_.init(*pmeGpu->deviceInfo, pmeGpu->archSpecific->deviceContext_,
+ DeviceStreamPriority::High, pmeGpu->archSpecific->useTiming);
}
void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu)
*/
static void pme_gpu_init(gmx_pme_t* pme, const DeviceInformation* deviceInfo, const PmeGpuProgram* pmeGpuProgram)
{
+ GMX_ASSERT(deviceInfo != nullptr,
+ "Device information can not be nullptr when GPU is used for PME.");
pme->gpu = new PmeGpu();
PmeGpu* pmeGpu = pme->gpu;
changePinningPolicy(&pmeGpu->staging.h_forces, pme_get_pinning_policy());
GMX_RELEASE_ASSERT(
GMX_THREAD_MPI,
"PME-PP GPU Communication is currently only supported with thread-MPI enabled");
- cudaStream_t stream;
- cudaStreamCreate(&stream);
- pmePpCommStream_.setStream(stream);
+
+ // In CUDA we only need priority to create stream.
+ // (note that this will be moved from here in the follow-up patch)
+ pmePpCommStream_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
}
PmePpCommGpu::Impl::~Impl() = default;
// TODO: The update stream should be created only when it is needed.
# if (GMX_GPU == GMX_GPU_CUDA)
- cudaError_t stat;
- cudaStream_t stream;
- stat = cudaStreamCreate(&stream);
- updateStreamOwn_.setStream(stream);
+ // In CUDA we only need priority to create stream.
+ // (note that this will be moved from here in the follow-up patch)
+ updateStreamOwn_.init(DeviceInformation(), DeviceContext(), DeviceStreamPriority::Normal, false);
updateStream_ = &updateStreamOwn_;
- CU_RET_ERR(stat, "CUDA stream creation failed in StatePropagatorDataGpu");
# endif
}
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
- int /*rank*/,
- bool bLocalAndNonlocal)
+ bool bLocalAndNonlocal)
{
cudaError_t stat;
nb->deviceInfo = deviceInfo;
/* local/non-local GPU streams */
- cudaStream_t localStream;
- stat = cudaStreamCreate(&localStream);
- nb->deviceStreams[InteractionLocality::Local].setStream(localStream);
- CU_RET_ERR(stat, "cudaStreamCreate on stream[InterationLocality::Local] failed");
+ nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, DeviceContext(),
+ DeviceStreamPriority::Normal, nb->bDoTime);
if (nb->bUseTwoStreams)
{
init_plist(nb->plist[InteractionLocality::NonLocal]);
* priorities, because we are querying the priority range which in this
* case will be a single value.
*/
- int highest_priority;
- stat = cudaDeviceGetStreamPriorityRange(nullptr, &highest_priority);
- CU_RET_ERR(stat, "cudaDeviceGetStreamPriorityRange failed");
-
- cudaStream_t nonLocalStream;
- stat = cudaStreamCreateWithPriority(&nonLocalStream, cudaStreamDefault, highest_priority);
- nb->deviceStreams[InteractionLocality::NonLocal].setStream(nonLocalStream);
- CU_RET_ERR(stat,
- "cudaStreamCreateWithPriority on stream[InteractionLocality::NonLocal] failed");
+ nb->deviceStreams[InteractionLocality::NonLocal].init(
+ *nb->deviceInfo, DeviceContext(), DeviceStreamPriority::High, nb->bDoTime);
}
/* init events for sychronization (timing disabled for performance reasons!) */
const interaction_const_t gmx_unused* ic,
const PairlistParams gmx_unused& listParams,
const nbnxn_atomdata_t gmx_unused* nbat,
- int gmx_unused rank,
/* true if both local and non-local are done on GPU */
bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
gpu_nbv = gpu_init(deviceInfo, *deviceContext, fr->ic, pairlistParams, nbat.get(),
- cr->nodeid, haveMultipleDomains);
+ haveMultipleDomains);
minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
}
const interaction_const_t* ic,
const PairlistParams& listParams,
const nbnxn_atomdata_t* nbat,
- const int rank,
const bool bLocalAndNonlocal)
{
- cl_int cl_error;
- cl_command_queue_properties queue_properties;
-
GMX_ASSERT(ic, "Need a valid interaction constants object");
auto nb = new NbnxmGpu;
/* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
- /* Create queues only after bDoTime has been initialized */
- if (nb->bDoTime)
- {
- queue_properties = CL_QUEUE_PROFILING_ENABLE;
- }
- else
- {
- queue_properties = 0;
- }
-
- cl_command_queue localStream =
- clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
- nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
/* local/non-local GPU streams */
- nb->deviceStreams[InteractionLocality::Local].setStream(localStream);
-
- if (CL_SUCCESS != cl_error)
- {
- gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d", rank,
- nb->deviceInfo->device_name, cl_error);
- }
+ nb->deviceStreams[InteractionLocality::Local].init(*nb->deviceInfo, nb->dev_rundata->deviceContext_,
+ DeviceStreamPriority::Normal, nb->bDoTime);
if (nb->bUseTwoStreams)
{
init_plist(nb->plist[InteractionLocality::NonLocal]);
- cl_command_queue nonLocalStream =
- clCreateCommandQueue(nb->dev_rundata->deviceContext_.context(),
- nb->deviceInfo->oclDeviceId, queue_properties, &cl_error);
- nb->deviceStreams[InteractionLocality::NonLocal].setStream(nonLocalStream);
-
- if (CL_SUCCESS != cl_error)
- {
- gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s: OpenCL error %d",
- rank, nb->deviceInfo->device_name, cl_error);
- }
+ nb->deviceStreams[InteractionLocality::NonLocal].init(
+ *nb->deviceInfo, nb->dev_rundata->deviceContext_, DeviceStreamPriority::High, nb->bDoTime);
}
if (nb->bDoTime)