*/
#if GMX_GPU_CUDA
- pmeGpu->kernelParams->usePipeline = false;
+ pmeGpu->kernelParams->usePipeline = char(false);
pmeGpu->kernelParams->pipelineAtomStart = 0;
pmeGpu->kernelParams->pipelineAtomEnd = 0;
pmeGpu->maxGridWidthX = deviceContext.deviceInfo().prop.maxGridSize[0];
"inconsistent atom data padding vs. spreading block size");
// Ensure that coordinates are ready on the device before launching spread;
- // only needed with CUDA on PP+PME ranks, not on separate PME ranks, in unit tests
- // nor in OpenCL as these cases use a single stream (hence xReadyOnDevice == nullptr).
- GMX_ASSERT(!GMX_GPU_CUDA || xReadyOnDevice != nullptr || pmeGpu->common->isRankPmeOnly
+ // only needed on PP+PME ranks, not on separate PME ranks, in unit tests
+ // as these cases use a single stream (hence xReadyOnDevice == nullptr).
+ GMX_ASSERT(xReadyOnDevice != nullptr || pmeGpu->common->isRankPmeOnly
|| pme_gpu_settings(pmeGpu).copyAllOutputs,
"Need a valid coordinate synchronizer on PP+PME ranks with CUDA.");
pme_gpu_start_timing(pmeGpu, timingId);
auto* timingEvent = pme_gpu_fetch_timing_event(pmeGpu, timingId);
- kernelParamsPtr->usePipeline = computeSplines && spreadCharges && useGpuDirectComm
- && (pmeCoordinateReceiverGpu->ppCommNumSenderRanks() > 1)
- && !writeGlobalOrSaveSplines;
- if (kernelParamsPtr->usePipeline)
+ kernelParamsPtr->usePipeline = char(computeSplines && spreadCharges && useGpuDirectComm
+ && (pmeCoordinateReceiverGpu->ppCommNumSenderRanks() > 1)
+ && !writeGlobalOrSaveSplines);
+ if (kernelParamsPtr->usePipeline != 0)
{
int numStagesInPipeline = pmeCoordinateReceiverGpu->ppCommNumSenderRanks();