Unify gpu_copy_xq_to_gpu(...) function

[alexxy/gromacs.git] / src / gromacs / nbnxm / sycl / nbnxm_sycl.cpp
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp

index 7428869888637b3391eac639b1586711b81dcc34..1f57c36bf66880bb114729207524d45eaed180dc 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl.cpp
@@ -160,63 +160,6 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
      }
  }
  
-/*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
-{
-    GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
-    validateGpuAtomLocality(atomLocality);
-
-    const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
-
-    NBAtomData*         adat         = nb->atdat;
-    gpu_plist*          plist        = nb->plist[iloc];
-    const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
-
-    /* Don't launch the non-local H2D copy if there is no dependent
-       work to do: neither non-local nor other (e.g. bonded) work
-       to do that has as input the nbnxn coordinates.
-       Doing the same for the local kernel is more complicated, since the
-       local part of the force array also depends on the non-local kernel.
-       So to avoid complicating the code and to reduce the risk of bugs,
-       we always call the local local x+q copy (and the rest of the local
-       work in nbnxn_gpu_launch_kernel().
-     */
-    if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(*nb, iloc))
-    {
-        plist->haveFreshList = false;
-
-        // The event is marked for Local interactions unconditionally,
-        // so it has to be released here because of the early return
-        // for NonLocal interactions.
-        nb->misc_ops_and_local_H2D_done.reset();
-
-        return;
-    }
-
-    /* local/nonlocal offset and length used for xq and f */
-    auto atomsRange = getGpuAtomRange(adat, atomLocality);
-
-    /* HtoD x, q */
-    GMX_ASSERT(adat->xq.elementSize() == sizeof(Float4),
-               "The size of the xyzq buffer element should be equal to the size of float4.");
-    copyToDeviceBuffer(&adat->xq,
-                       reinterpret_cast<const Float4*>(nbatom->x().data()) + atomsRange.begin(),
-                       atomsRange.begin(),
-                       atomsRange.size(),
-                       deviceStream,
-                       GpuApiCallBehavior::Async,
-                       nullptr);
-
-    /* No need to enforce stream synchronization with events like we do in CUDA/OpenCL.
-     * Runtime should do the scheduling correctly based on data dependencies.
-     * But for consistency's sake, we do it anyway. */
-    /* When we get here all misc operations issued in the local stream as well as
-     * the local xq H2D are done, so we record that in the local stream and wait for it in the
-     * nonlocal one. This wait needs to precede any PP tasks, bonded or nonbonded, that may
-     * compute on interactions between local and nonlocal atoms. */
-    nbnxnInsertNonlocalGpuDependency(nb, iloc);
-}
-
  void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
      gpu_plist* plist = nb->plist[iloc];