Refactor tracking of GPU short-range work/skipping

[alexxy/gromacs.git] / src / gromacs / nbnxm / opencl / nbnxm_ocl.cpp
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index d3b860e6e6f30389d059de0fc892a565fb3d4456..659c4842874f53f48bad6b0e702c0f13c40f18fc 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -365,9 +365,10 @@ static void sync_ocl_event(cl_command_queue stream, cl_event *ocl_event)
  /*! \brief Launch asynchronously the xq buffer host to device copy. */
  void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t        *nb,
                          const nbnxn_atomdata_t *nbatom,
-                        const AtomLocality      atomLocality,
-                        const bool              haveOtherWork)
+                        const AtomLocality      atomLocality)
  {
+    GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
+
      const InteractionLocality iloc = gpuAtomToInteractionLocality(atomLocality);
  
      /* local/nonlocal offset and length used for xq and f */
@@ -389,7 +390,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t        *nb,
         we always call the local local x+q copy (and the rest of the local
         work in nbnxn_gpu_launch_kernel().
       */
-    if (!haveOtherWork && canSkipWork(*nb, iloc))
+    if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(*nb, iloc))
      {
          plist->haveFreshList = false;
  
@@ -491,7 +492,7 @@ void gpu_launch_kernel(gmx_nbnxn_ocl_t                  *nb,
         clearing. All these operations, except for the local interaction kernel,
         are needed for the non-local interactions. The skip of the local kernel
         call is taken care of later in this function. */
-    if (canSkipWork(*nb, iloc))
+    if (canSkipNonbondedWork(*nb, iloc))
      {
          plist->haveFreshList = false;
  
@@ -733,9 +734,10 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t           *nb,
  void gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
                          struct nbnxn_atomdata_t       *nbatom,
                          const int                      flags,
-                        const AtomLocality             aloc,
-                        const bool                     haveOtherWork)
+                        const AtomLocality             aloc)
  {
+    GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
+
      cl_int gmx_unused cl_error;
      int               adat_begin, adat_len; /* local/nonlocal offset and length used for xq and f */
  
@@ -752,7 +754,7 @@ void gpu_launch_cpyback(gmx_nbnxn_ocl_t               *nb,
  
  
      /* don't launch non-local copy-back if there was no non-local work to do */
-    if (!haveOtherWork && canSkipWork(*nb, iloc))
+    if ((iloc == InteractionLocality::NonLocal) && !haveGpuShortRangeWork(*nb, iloc))
      {
          /* TODO An alternative way to signal that non-local work is
             complete is to use a clEnqueueMarker+clEnqueueBarrier