config.blockSize[2] = atomsPerBlock;
config.gridSize[0] = dimGrid.first;
config.gridSize[1] = dimGrid.second;
- config.stream = pmeGpu->archSpecific->pmeStream_.stream();
int timingId;
PmeGpuProgramImpl::PmeKernelHandle kernelPtr = nullptr;
&kernelParamsPtr->atoms.d_coordinates);
#endif
- launchGpuKernel(kernelPtr, config, timingEvent, "PME spline/spread", kernelArgs);
+ launchGpuKernel(kernelPtr, config, pmeGpu->archSpecific->pmeStream_, timingEvent,
+ "PME spline/spread", kernelArgs);
pme_gpu_stop_timing(pmeGpu, timingId);
const auto& settings = pmeGpu->settings;
config.gridSize[1] = (pmeGpu->kernelParams->grid.complexGridSize[middleDim] + gridLinesPerBlock - 1)
/ gridLinesPerBlock;
config.gridSize[2] = pmeGpu->kernelParams->grid.complexGridSize[majorDim];
- config.stream = pmeGpu->archSpecific->pmeStream_.stream();
int timingId = gtPME_SOLVE;
PmeGpuProgramImpl::PmeKernelHandle kernelPtr = nullptr;
kernelPtr, config, kernelParamsPtr, &kernelParamsPtr->grid.d_splineModuli,
&kernelParamsPtr->constants.d_virialAndEnergy, &kernelParamsPtr->grid.d_fourierGrid);
#endif
- launchGpuKernel(kernelPtr, config, timingEvent, "PME solve", kernelArgs);
+ launchGpuKernel(kernelPtr, config, pmeGpu->archSpecific->pmeStream_, timingEvent, "PME solve",
+ kernelArgs);
pme_gpu_stop_timing(pmeGpu, timingId);
if (computeEnergyAndVirial)
config.blockSize[2] = atomsPerBlock;
config.gridSize[0] = dimGrid.first;
config.gridSize[1] = dimGrid.second;
- config.stream = pmeGpu->archSpecific->pmeStream_.stream();
// TODO test different cache configs
&kernelParamsPtr->atoms.d_dtheta, &kernelParamsPtr->atoms.d_gridlineIndices,
&kernelParamsPtr->atoms.d_forces);
#endif
- launchGpuKernel(kernelPtr, config, timingEvent, "PME gather", kernelArgs);
+ launchGpuKernel(kernelPtr, config, pmeGpu->archSpecific->pmeStream_, timingEvent, "PME gather",
+ kernelArgs);
pme_gpu_stop_timing(pmeGpu, timingId);
if (pmeGpu->settings.useGpuForceReduction)