// Pull force data from remote GPU
void* pmeForcePtr = receivePmeForceToGpu ? static_cast<void*>(d_pmeForces_) : recvPtr;
- cudaError_t stat = cudaMemcpyAsync(pmeForcePtr, remotePmeFBuffer_, recvSize * DIM * sizeof(float),
- cudaMemcpyDefault, pmePpCommStream_.stream());
+ cudaError_t stat = cudaMemcpyAsync(pmeForcePtr,
+ remotePmeFBuffer_,
+ recvSize * DIM * sizeof(float),
+ cudaMemcpyDefault,
+ pmePpCommStream_.stream());
CU_RET_ERR(stat, "cudaMemcpyAsync on Recv from PME CUDA direct data transfer failed");
if (receivePmeForceToGpu)
// ensure stream waits until coordinate data is available on device
coordinatesReadyOnDeviceEvent->enqueueWaitEvent(pmePpCommStream_);
- cudaError_t stat = cudaMemcpyAsync(remotePmeXBuffer_, sendPtr, sendSize * DIM * sizeof(float),
- cudaMemcpyDefault, pmePpCommStream_.stream());
+ cudaError_t stat = cudaMemcpyAsync(remotePmeXBuffer_,
+ sendPtr,
+ sendSize * DIM * sizeof(float),
+ cudaMemcpyDefault,
+ pmePpCommStream_.stream());
CU_RET_ERR(stat, "cudaMemcpyAsync on Send to PME CUDA direct data transfer failed");
// Record and send event to allow PME task to sync to above transfer before commencing force calculations
bool sendPmeCoordinatesFromGpu,
GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
{
- impl_->sendCoordinatesToPmeCudaDirect(sendPtr, sendSize, sendPmeCoordinatesFromGpu,
- coordinatesReadyOnDeviceEvent);
+ impl_->sendCoordinatesToPmeCudaDirect(
+ sendPtr, sendSize, sendPmeCoordinatesFromGpu, coordinatesReadyOnDeviceEvent);
}
void* PmePpCommGpu::getGpuForceStagingPtr()