*/
void communicateHaloForces(bool accumulateForces);
+ /*! \brief Get the event synchronizer for the forces ready on device.
+ * \returns The event to synchronize the stream that consumes forces on device.
+ */
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent();
private:
class Impl;
"A CPU stub for GPU Halo Exchange was called insted of the correct implementation.");
}
+/*!\brief get forces ready on device event stub. */
+GpuEventSynchronizer* GpuHaloExchange::getForcesReadyOnDeviceEvent()
+{
+ GMX_ASSERT(false,
+ "A CPU stub for GPU Halo Exchange was called insted of the correct implementation.");
+ return nullptr;
+}
+
} // namespace gmx
#endif /* GMX_GPU != GMX_GPU_CUDA */
launchGpuKernel(kernelFn, config, nullptr, "Domdec GPU Apply F Halo Exchange", kernelArgs);
}
+ fReadyOnDevice_.markEvent(nonLocalStream_);
}
#endif
}
+GpuEventSynchronizer* GpuHaloExchange::Impl::getForcesReadyOnDeviceEvent()
+{
+ return &fReadyOnDevice_;
+}
+
/*! \brief Create Domdec GPU object */
GpuHaloExchange::Impl::Impl(gmx_domdec_t* dd, MPI_Comm mpi_comm_mysim, void* localStream, void* nonLocalStream) :
dd_(dd),
impl_->communicateHaloForces(accumulateForces);
}
+GpuEventSynchronizer* GpuHaloExchange::getForcesReadyOnDeviceEvent()
+{
+ return impl_->getForcesReadyOnDeviceEvent();
+}
} // namespace gmx
*/
void communicateHaloForces(bool accumulateForces);
+ /*! \brief Get the event synchronizer for the forces ready on device.
+ * \returns The event to synchronize the stream that consumes forces on device.
+ */
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent();
+
private:
/*! \brief Data transfer wrapper for GPU halo exchange
* \param [inout] d_ptr pointer to coordinates or force buffer in GPU memory
float3* d_x_ = nullptr;
//! full forces buffer in GPU memory
float3* d_f_ = nullptr;
+ //! An event recorded once the exchanged forces are ready on the GPU
+ GpuEventSynchronizer fReadyOnDevice_;
};
} // namespace gmx
fr->pmePpCommGpu->getForcesReadySynchronizer())) // buffer received from other GPU
: nullptr; // PME reduction not active on GPU
- gmx::FixedCapacityVector<GpuEventSynchronizer*, 2> dependencyList;
+ gmx::FixedCapacityVector<GpuEventSynchronizer*, 3> dependencyList;
if (stepWork.useGpuPmeFReduction)
{
}
if (useGpuForcesHaloExchange)
{
- // Add a stream synchronization to satisfy a dependency
- // for the local buffer ops on the result of GPU halo
- // exchange, which operates in the non-local stream and
- // writes to to local parf og the force buffer.
- //
- // TODO improve this through use of an event - see Redmine #3093
- // push the event into the dependencyList
- nbv->stream_local_wait_for_nonlocal();
+ dependencyList.push_back(gpuHaloExchange->getForcesReadyOnDeviceEvent());
}
nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::Local, stateGpu->getForces(), pmeForcePtr,
dependencyList, stepWork.useGpuPmeFReduction,
}
}
-void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_cuda_t* nb)
-{
- cudaStream_t localStream = nb->stream[InteractionLocality::Local];
- cudaStream_t nonLocalStream = nb->stream[InteractionLocality::NonLocal];
-
- GpuEventSynchronizer event;
- event.markEvent(nonLocalStream);
- event.enqueueWaitEvent(localStream);
-}
-
} // namespace Nbnxm
Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality);
}
-void nonbonded_verlet_t::stream_local_wait_for_nonlocal()
-{
- Nbnxm::nbnxn_stream_local_wait_for_nonlocal(gpu_nbv);
-}
-
/*! \endcond */
/*! \brief return GPU pointer to f in rvec format */
void* get_gpu_frvec();
- /*! \brief Ensure local stream waits for non-local stream */
- void stream_local_wait_for_nonlocal();
-
//! Return the kernel setup
const Nbnxm::KernelSetup& kernelSetup() const { return kernelSetup_; }
CUDA_FUNC_QUALIFIER
void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
-/*! \brief Ensure local stream waits for non-local stream
- * \param[in] nb The nonbonded data GPU structure
- */
-CUDA_FUNC_QUALIFIER
-void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
-
} // namespace Nbnxm
#endif