{
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
- nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::NonLocal);
+ stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
}
dd_move_f(cr->dd, &forceOut.forceWithShiftForces(), wcycle);
}
pme_gpu_get_device_f(fr->pmedata),
dependencyList,
stepWork.useGpuPmeFReduction, haveLocalForceContribInCpuBuffer);
- // This function call synchronizes the local stream
- nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::Local);
stateGpu->copyForcesFromGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
+ stateGpu->waitForcesReadyOnHost(gmx::StatePropagatorDataGpu::AtomLocality::Local);
}
else
{
GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying forces with given atom locality.");
copyToDevice(d_f_, h_f, d_fSize_, atomLocality, commandStream);
- // TODO: Remove When event-based synchronization is introduced
- gpuStreamSynchronize(commandStream);
fReadyOnDevice_[atomLocality].markEvent(commandStream);
}
GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying forces with given atom locality.");
copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, commandStream);
- // TODO: Remove When event-based synchronization is introduced
- gpuStreamSynchronize(commandStream);
fReadyOnHost_[atomLocality].markEvent(commandStream);
}
}
-void nbnxn_wait_for_gpu_force_reduction(const AtomLocality gmx_unused atomLocality,
- gmx_nbnxn_gpu_t *nb)
-{
- GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
-
- const InteractionLocality iLocality = gpuAtomToInteractionLocality(atomLocality);
-
- cudaStream_t stream = nb->stream[iLocality];
-
- cudaStreamSynchronize(stream);
-
-}
-
void* nbnxn_get_x_on_device_event(const gmx_nbnxn_cuda_t *nb)
{
return static_cast<void*> (nb->xAvailableOnDevice);
Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality);
}
-void nonbonded_verlet_t::wait_for_gpu_force_reduction(const Nbnxm::AtomLocality locality)
-{
- nbnxn_wait_for_gpu_force_reduction(locality, gpu_nbv);
-}
-
void* nonbonded_verlet_t::get_x_on_device_event()
{
return Nbnxm::nbnxn_get_x_on_device_event(gpu_nbv);
/*! \brief Outer body of function to perform initialization for F buffer operations on GPU. */
void atomdata_init_add_nbat_f_to_f_gpu();
- /*! \brief Wait for GPU force reduction task and D2H transfer of its results to complete
- *
- * FIXME: need more details: when should be called / after which operation, etc.
- */
- void wait_for_gpu_force_reduction(Nbnxm::AtomLocality locality);
-
/*! \brief return pointer to GPU event recorded when coordinates have been copied to device */
void* get_x_on_device_event();
bool gmx_unused useGpuFPmeReduction,
bool gmx_unused accumulateForce) CUDA_FUNC_TERM;
-/*! \brief Wait for GPU stream to complete */
-CUDA_FUNC_QUALIFIER
-void nbnxn_wait_for_gpu_force_reduction(AtomLocality gmx_unused atomLocality,
- gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM;
-
/*! \brief sync CPU thread on coordinate copy to device
* \param[in] nb The nonbonded data GPU structure
*/