From 422429f5cdd3ea0fa14d7e5466ba10a16d8e9653 Mon Sep 17 00:00:00 2001 From: Alan Gray Date: Tue, 26 Nov 2019 02:12:02 -0800 Subject: [PATCH] Bug fix for nonlocal D2H coordinate transfer synchronization Fixes bug introduced in I2e2ba1b6436f087d1f2fef4ff876445814a724e7, which replaced the NB-module D2H nonlocal coordinate copy launch with a new State Propagator module equivalent, but did not change the corresponding event wait call, such that the dependency was not satisfied. This change replaces the NB event wait with the correct State Propagator event wait. Change-Id: I7c812974c145d315fa6516b2bbea39164439728e --- src/gromacs/mdlib/sim_util.cpp | 2 +- src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 5 ----- src/gromacs/nbnxm/nbnxm.cpp | 5 ----- src/gromacs/nbnxm/nbnxm.h | 3 --- src/gromacs/nbnxm/nbnxm_gpu.h | 6 ------ 5 files changed, 1 insertion(+), 20 deletions(-) diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 0183e47271..6009b1a5a6 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1474,7 +1474,7 @@ void do_force(FILE* fplog, if (ddUsesGpuDirectCommunication && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork)) { /* Wait for non-local coordinate data to be copied from device */ - nbv->wait_nonlocal_x_copy_D2H_done(); + stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal); } /* Compute the bonded and non-bonded energies and optionally forces */ do_force_lowlevel(fr, inputrec, &(top->idef), cr, ms, nrnb, wcycle, mdatoms, x, hist, &forceOut, enerd, diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index c59a4a8285..4f2c54132f 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -945,11 +945,6 @@ void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality atomLo } } -void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_cuda_t* nb) -{ - nb->xNonLocalCopyD2HDone->waitForEvent(); -} - void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_cuda_t* nb) { cudaStream_t localStream = nb->stream[InteractionLocality::Local]; diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp index 8cf4523d08..089c7f277f 100644 --- a/src/gromacs/nbnxm/nbnxm.cpp +++ b/src/gromacs/nbnxm/nbnxm.cpp @@ -245,11 +245,6 @@ void nonbonded_verlet_t::insertNonlocalGpuDependency(const gmx::InteractionLocal Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality); } -void nonbonded_verlet_t::wait_nonlocal_x_copy_D2H_done() -{ - Nbnxm::nbnxn_wait_nonlocal_x_copy_D2H_done(gpu_nbv); -} - void nonbonded_verlet_t::stream_local_wait_for_nonlocal() { Nbnxm::nbnxn_stream_local_wait_for_nonlocal(gpu_nbv); diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index 4574167c18..fd11ec9de5 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -350,9 +350,6 @@ public: */ void atomdata_init_add_nbat_f_to_f_gpu(GpuEventSynchronizer* localReductionDone); - /*! \brief Wait for non-local copy of coordinate buffer from device to host */ - void wait_nonlocal_x_copy_D2H_done(); - /*! \brief return GPU pointer to f in rvec format */ void* get_gpu_frvec(); diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index ecae0d39d2..423ec5b925 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -328,12 +328,6 @@ void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality, CUDA_FUNC_QUALIFIER void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM; -/*! \brief Wait for non-local copy of coordinate buffer from device to host - * \param[in] nb The nonbonded data GPU structure - */ -CUDA_FUNC_QUALIFIER -void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM; - /*! \brief Ensure local stream waits for non-local stream * \param[in] nb The nonbonded data GPU structure */ -- 2.22.0