Fixes bug introduced in I2e2ba1b6436f087d1f2fef4ff876445814a724e7,
which replaced the NB-module D2H nonlocal coordinate copy launch with
a new State Propagator module equivalent, but did not change the
corresponding event wait call, such that the dependency was not
satisfied. This change replaces the NB event wait with the correct
State Propagator event wait.
Change-Id: I7c812974c145d315fa6516b2bbea39164439728e
if (ddUsesGpuDirectCommunication && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
{
/* Wait for non-local coordinate data to be copied from device */
if (ddUsesGpuDirectCommunication && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
{
/* Wait for non-local coordinate data to be copied from device */
- nbv->wait_nonlocal_x_copy_D2H_done();
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal);
}
/* Compute the bonded and non-bonded energies and optionally forces */
do_force_lowlevel(fr, inputrec, &(top->idef), cr, ms, nrnb, wcycle, mdatoms, x, hist, &forceOut, enerd,
}
/* Compute the bonded and non-bonded energies and optionally forces */
do_force_lowlevel(fr, inputrec, &(top->idef), cr, ms, nrnb, wcycle, mdatoms, x, hist, &forceOut, enerd,
-void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_cuda_t* nb)
-{
- nb->xNonLocalCopyD2HDone->waitForEvent();
-}
-
void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_cuda_t* nb)
{
cudaStream_t localStream = nb->stream[InteractionLocality::Local];
void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_cuda_t* nb)
{
cudaStream_t localStream = nb->stream[InteractionLocality::Local];
Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality);
}
Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality);
}
-void nonbonded_verlet_t::wait_nonlocal_x_copy_D2H_done()
-{
- Nbnxm::nbnxn_wait_nonlocal_x_copy_D2H_done(gpu_nbv);
-}
-
void nonbonded_verlet_t::stream_local_wait_for_nonlocal()
{
Nbnxm::nbnxn_stream_local_wait_for_nonlocal(gpu_nbv);
void nonbonded_verlet_t::stream_local_wait_for_nonlocal()
{
Nbnxm::nbnxn_stream_local_wait_for_nonlocal(gpu_nbv);
*/
void atomdata_init_add_nbat_f_to_f_gpu(GpuEventSynchronizer* localReductionDone);
*/
void atomdata_init_add_nbat_f_to_f_gpu(GpuEventSynchronizer* localReductionDone);
- /*! \brief Wait for non-local copy of coordinate buffer from device to host */
- void wait_nonlocal_x_copy_D2H_done();
-
/*! \brief return GPU pointer to f in rvec format */
void* get_gpu_frvec();
/*! \brief return GPU pointer to f in rvec format */
void* get_gpu_frvec();
CUDA_FUNC_QUALIFIER
void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
CUDA_FUNC_QUALIFIER
void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
-/*! \brief Wait for non-local copy of coordinate buffer from device to host
- * \param[in] nb The nonbonded data GPU structure
- */
-CUDA_FUNC_QUALIFIER
-void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
-
/*! \brief Ensure local stream waits for non-local stream
* \param[in] nb The nonbonded data GPU structure
*/
/*! \brief Ensure local stream waits for non-local stream
* \param[in] nb The nonbonded data GPU structure
*/