Bug fix for nonlocal D2H coordinate transfer synchronization
authorAlan Gray <alangraygerrit@gmail.com>
Tue, 26 Nov 2019 10:12:02 +0000 (02:12 -0800)
committerArtem Zhmurov <zhmurov@gmail.com>
Tue, 26 Nov 2019 12:46:51 +0000 (13:46 +0100)
Fixes bug introduced in I2e2ba1b6436f087d1f2fef4ff876445814a724e7,
which replaced the NB-module D2H nonlocal coordinate copy launch with
a new State Propagator module equivalent, but did not change the
corresponding event wait call, such that the dependency was not
satisfied. This change replaces the NB event wait with the correct
State Propagator event wait.

Change-Id: I7c812974c145d315fa6516b2bbea39164439728e

src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
src/gromacs/nbnxm/nbnxm.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_gpu.h

index 0183e47271daf1ac825ab903215242bf3b021a6d..6009b1a5a653526d1d19d316e258b5073aa27a54 100644 (file)
@@ -1474,7 +1474,7 @@ void do_force(FILE*                               fplog,
     if (ddUsesGpuDirectCommunication && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
     {
         /* Wait for non-local coordinate data to be copied from device */
-        nbv->wait_nonlocal_x_copy_D2H_done();
+        stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal);
     }
     /* Compute the bonded and non-bonded energies and optionally forces */
     do_force_lowlevel(fr, inputrec, &(top->idef), cr, ms, nrnb, wcycle, mdatoms, x, hist, &forceOut, enerd,
index c59a4a828573c8ce324d265bbf7e1654817cd63e..4f2c54132fdac61d9858a77be88a07329acbe56f 100644 (file)
@@ -945,11 +945,6 @@ void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality                         atomLo
     }
 }
 
-void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_cuda_t* nb)
-{
-    nb->xNonLocalCopyD2HDone->waitForEvent();
-}
-
 void nbnxn_stream_local_wait_for_nonlocal(gmx_nbnxn_cuda_t* nb)
 {
     cudaStream_t localStream    = nb->stream[InteractionLocality::Local];
index 8cf4523d08bddf8726839b18715c08342c9d8c4c..089c7f277f1f861003f349d4a291aed9c7b01513 100644 (file)
@@ -245,11 +245,6 @@ void nonbonded_verlet_t::insertNonlocalGpuDependency(const gmx::InteractionLocal
     Nbnxm::nbnxnInsertNonlocalGpuDependency(gpu_nbv, interactionLocality);
 }
 
-void nonbonded_verlet_t::wait_nonlocal_x_copy_D2H_done()
-{
-    Nbnxm::nbnxn_wait_nonlocal_x_copy_D2H_done(gpu_nbv);
-}
-
 void nonbonded_verlet_t::stream_local_wait_for_nonlocal()
 {
     Nbnxm::nbnxn_stream_local_wait_for_nonlocal(gpu_nbv);
index 4574167c187a4d3fa234defd94933ba679aa3a9b..fd11ec9de527f203e975d4c99c0b1e77f8306712 100644 (file)
@@ -350,9 +350,6 @@ public:
      */
     void atomdata_init_add_nbat_f_to_f_gpu(GpuEventSynchronizer* localReductionDone);
 
-    /*! \brief Wait for non-local copy of coordinate buffer from device to host */
-    void wait_nonlocal_x_copy_D2H_done();
-
     /*! \brief return GPU pointer to f in rvec format */
     void* get_gpu_frvec();
 
index ecae0d39d21c21dee5c7f18e717bed0d86aac512..423ec5b9254cdc9a7f9b7a3cc86bb739e2960aa9 100644 (file)
@@ -328,12 +328,6 @@ void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
 CUDA_FUNC_QUALIFIER
 void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
 
-/*! \brief Wait for non-local copy of coordinate buffer from device to host
- * \param[in] nb                   The nonbonded data GPU structure
- */
-CUDA_FUNC_QUALIFIER
-void nbnxn_wait_nonlocal_x_copy_D2H_done(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
-
 /*! \brief Ensure local stream waits for non-local stream
  * \param[in] nb                   The nonbonded data GPU structure
  */