GpuEventSynchronizer: extract backend-specific functionality

[alexxy/gromacs.git] / src / gromacs / gpu_utils / device_event_ocl.h
diff --git a/src/gromacs/gpu_utils/gpueventsynchronizer_ocl.h b/src/gromacs/gpu_utils/device_event_ocl.h

similarity index 55%

rename from src/gromacs/gpu_utils/gpueventsynchronizer_ocl.h

rename to src/gromacs/gpu_utils/device_event_ocl.h

index 0b9905450fd4014e1d70827621c96e63395d55ce..99bd5a43d7f78b99d6d392161b6fa2a49adcea53 100644 (file)
--- a/src/gromacs/gpu_utils/gpueventsynchronizer_ocl.h
+++ b/src/gromacs/gpu_utils/device_event_ocl.h
@@ -33,62 +33,49 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \libinternal \file
- *  \brief Implements a GpuEventSynchronizer class for OpenCL.
+ *  \brief Implements a DeviceEvent class for OpenCL.
   *
   *  \author Aleksei Iupinov <a.yupinov@gmail.com>
+ *  \author Andrey Alekseenko <al42and@gmail.com>
   * \inlibraryapi
   */
-#ifndef GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_OCL_H
-#define GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_OCL_H
+#ifndef GMX_GPU_UTILS_DEVICE_EVENT_OCL_H
+#define GMX_GPU_UTILS_DEVICE_EVENT_OCL_H
  
-#ifndef DOXYGEN
+#include "gromacs/gpu_utils/gputraits_ocl.h"
+#include "gromacs/gpu_utils/oclutils.h"
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/gmxassert.h"
  
-#    include "gromacs/gpu_utils/gputraits_ocl.h"
-#    include "gromacs/gpu_utils/oclutils.h"
-#    include "gromacs/utility/exceptions.h"
-#    include "gromacs/utility/gmxassert.h"
+#ifndef DOXYGEN
  
-/*! \libinternal \brief
- * A class which allows for CPU thread to mark and wait for certain GPU stream execution point.
- * The event can be put into the stream with markEvent() and then later waited on with waitForEvent().
- * This can be repeated as necessary, but the current implementation does not allow waiting on
- * completed event more than once, expecting only exact pairs of markEvent(stream); waitForEvent().
- * The class generally attempts to track the correctness of its state transitions, but
- * please note that calling waitForEvent() right after the construction will fail with OpenCL but succeed with CUDA.
- *
- * Another possible mode of operation can be implemented if needed:
- * multiple calls to waitForEvent() after a single markEvent(). For this, clReleaseEvent() call
- * from waitForEvent() should instead happen conditionally at the beginning of markEvent(), replacing
- * the GMX_ASSERT(). This was tested to work both with CUDA and NVidia OpenCL, but not with AMD/Intel OpenCl.
- */
-class GpuEventSynchronizer
+class DeviceEvent
  {
  public:
      //! A constructor
-    GpuEventSynchronizer() : event_(nullptr) {}
+    DeviceEvent() : event_(sc_nullEvent) {}
+    DeviceEvent(cl_event event) : event_(event) {}
      //! A destructor
-    ~GpuEventSynchronizer()
+    ~DeviceEvent()
      {
-        // This additional code only prevents cl_event leak in an unlikely situation of destructor
-        // being called after markEvent() but before waitForEvent() / enqueueWaitEvent().
-        if (event_)
+        if (isMarked())
          {
+            // Can not throw in destructor, so not checking for any error
              clReleaseEvent(event_);
          }
      }
-    //! No copying
-    GpuEventSynchronizer(const GpuEventSynchronizer&) = delete;
-    //! No assignment
-    GpuEventSynchronizer& operator=(GpuEventSynchronizer&&) = delete;
-    //! Moving is disabled but can be considered in the future if needed
-    GpuEventSynchronizer(GpuEventSynchronizer&&) = delete;
+    // Disable copy, move, and assignment. Move can be allowed, but not needed yet.
+    DeviceEvent& operator=(const DeviceEvent&) = delete;
+    DeviceEvent(const DeviceEvent&)            = delete;
+    DeviceEvent& operator=(DeviceEvent&&) = delete;
+    DeviceEvent(DeviceEvent&&)            = delete;
  
      /*! \brief Marks the synchronization point in the \p stream.
-     * Should be called first and then followed by waitForEvent().
+     * Should be called first and then followed by wait().
       */
-    inline void markEvent(const DeviceStream& deviceStream)
+    inline void mark(const DeviceStream& deviceStream)
      {
-        GMX_ASSERT(nullptr == event_, "Do not call markEvent more than once!");
+        reset();
          cl_int clError = clEnqueueMarkerWithWaitList(deviceStream.stream(), 0, nullptr, &event_);
          if (CL_SUCCESS != clError)
          {
@@ -96,21 +83,35 @@ public:
                                           + ocl_get_error_string(clError)));
          }
      }
+
      /*! \brief Synchronizes the host thread on the marked event. */
-    inline void waitForEvent()
+    inline void wait()
      {
+        GMX_RELEASE_ASSERT(isMarked(), "Can not wait for an unmarked event");
          cl_int clError = clWaitForEvents(1, &event_);
          if (CL_SUCCESS != clError)
          {
              GMX_THROW(gmx::InternalError("Failed to synchronize on the GPU event: "
                                           + ocl_get_error_string(clError)));
          }
+    }
  
-        reset();
+    /*! \brief Enqueues a wait for the recorded event in stream \p stream. */
+    inline void enqueueWait(const DeviceStream& deviceStream)
+    {
+        GMX_RELEASE_ASSERT(isMarked(), "Can not enqueue an unmarked event");
+        cl_int clError = clEnqueueBarrierWithWaitList(deviceStream.stream(), 1, &event_, nullptr);
+        if (CL_SUCCESS != clError)
+        {
+            GMX_THROW(gmx::InternalError("Failed to enqueue device barrier for the GPU event: "
+                                         + ocl_get_error_string(clError)));
+        }
      }
-    /*! \brief Checks the completion of the underlying event and resets the object if it was. */
+
+    //!  Checks the completion of the underlying event.
      inline bool isReady()
      {
+        GMX_RELEASE_ASSERT(isMarked(), "Can not check the status of unmarked event");
          cl_int result;
          cl_int clError = clGetEventInfo(
                  event_, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &result, nullptr);
@@ -118,44 +119,32 @@ public:
          {
              GMX_THROW(gmx::InternalError("Failed to retrieve event info: " + ocl_get_error_string(clError)));
          }
-        bool hasTriggered = (result == CL_COMPLETE);
-        if (hasTriggered)
-        {
-            reset();
-        }
-        return hasTriggered;
+        return (result == CL_COMPLETE);
      }
-    /*! \brief Enqueues a wait for the recorded event in stream \p stream
-     *
-     *  After enqueue, the associated event is released, so this method should
-     *  be only called once per markEvent() call.
-     */
-    inline void enqueueWaitEvent(const DeviceStream& deviceStream)
-    {
-        cl_int clError = clEnqueueBarrierWithWaitList(deviceStream.stream(), 1, &event_, nullptr);
-        if (CL_SUCCESS != clError)
-        {
-            GMX_THROW(gmx::InternalError("Failed to enqueue device barrier for the GPU event: "
-                                         + ocl_get_error_string(clError)));
-        }
  
-        reset();
-    }
+    //! Checks whether this object encapsulates an underlying event.
+    inline bool isMarked() { return event_ != sc_nullEvent; }
  
      //! Reset (release) the event to unmarked state.
      inline void reset()
      {
-        cl_int clError = clReleaseEvent(event_);
-        if (CL_SUCCESS != clError)
+        if (isMarked())
          {
-            GMX_THROW(gmx::InternalError("Failed to release the GPU event: "
-                                         + ocl_get_error_string(clError)));
+            cl_int clError = clReleaseEvent(event_);
+            if (CL_SUCCESS != clError)
+            {
+                GMX_THROW(gmx::InternalError("Failed to release the GPU event: "
+                                             + ocl_get_error_string(clError)));
+            }
          }
-        event_ = nullptr;
+        event_ = sc_nullEvent;
      }
  
  private:
      cl_event event_;
+
+    //! Magic value to indicate uninitialized state.
+    static constexpr cl_event sc_nullEvent = nullptr;
  };
  
  #endif