src/gromacs/gpu_utils/gpueventsynchronizer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \libinternal \file
  36  *  \brief Implements a GpuEventSynchronizer class.
  37  *
  38  *  \author Andrey Alekseenko <al42and@gmail.com>
  39  *  \author Artem Zhmurov <zhmurov@gmail.com>
  40  *  \author Aleksei Iupinov <a.yupinov@gmail.com>
  41  * \inlibraryapi
  42  */
  43 #ifndef GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_H
  44 #define GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_H
  45
  46 #include "config.h"
  47
  48 #include "gromacs/utility/classhelpers.h"
  49 #include "gromacs/utility/exceptions.h"
  50 #include "gromacs/utility/gmxassert.h"
  51
  52 #include "device_event.h"
  53
  54 /*! \libinternal \brief
  55  * A class which allows for CPU thread to mark and wait for certain GPU stream execution point.
  56  *
  57  * The event can be put into the stream with \ref markEvent and then later waited on with \ref
  58  * waitForEvent or \ref enqueueWaitEvent.
  59  *
  60  * Additionally, this class offers facilities for runtime checking of correctness by counting
  61  * how many times each marked event is used as a synchronization point.
  62  *
  63  * - When the class is constructed, a required minimal (\c minConsumptionCount) and maximal (\c maxConsumptionCount) number of
  64  * consumptions can be specified. By default, both are set to 1.
  65  * - The event is considered <em>fully consumed</em> if its current number of consumptions \c c equals
  66  * \c maxConsumptionCount.
  67  * - The event is considered <em>sufficiently consumed</em> if <tt>minConsumptionCount <= c <= maxConsumptionCount</tt>.
  68  * - The class is initialized in the <em>fully consumed</em> state, so it can not be consumed right away.
  69  * - Consuming the event is only possible if it is not <em>fully consumed</em> (<tt>c < maxConsumptionCount</tt>).
  70  * Consuming the event increments \c c by 1. Trying to consume <em>fully consumed</em> event
  71  * throws \ref gmx::InternalError.
  72  * - \ref reset returns object into the initial <em>fully consumed</em> state.
  73  * This function is intended to manually override the consumption limits.
  74  * - \ref consume \em consumes the event, without doing anything else.
  75  * This function is intended to manually override the consumption limits.
  76  * - \ref markEvent enqueues new event into the provided stream, and sets \c to 0. Marking is only
  77  * possible if the event is <em>sufficiently consumed</em>, otherwise \ref gmx::InternalError
  78  * is thrown.
  79  * - \ref waitForEvent \em consumes the event and blocks the host thread until the event
  80  * is ready (complete).
  81  * - \ref enqueueWaitEvent \em consumes the event and blocks the inserts a blocking barrier
  82  * into the provided stream which blocks the execution of all tasks later submitted to this
  83  * stream until the event is ready (completes).
  84  *
  85  * Default <tt>minConsumptionCount=maxConsumptionCount=1</tt> limits mean that each call to \ref markEvent must be followed
  86  * by exactly one \ref enqueueWaitEvent or \ref enqueueWaitEvent. This is the recommended pattern
  87  * for most use cases. By providing other constructor arguments, this requirement can be relaxed
  88  * as needed.
  89  */
  90 class GpuEventSynchronizer
  91 {
  92 public:
  93     //! A constructor
  94     GpuEventSynchronizer(int minConsumptionCount, int maxConsumptionCount) :
  95         minConsumptionCount_(minConsumptionCount), maxConsumptionCount_(maxConsumptionCount)
  96     {
  97         reset();
  98     }
  99     GpuEventSynchronizer() : GpuEventSynchronizer(1, 1) {}
 100     //! A destructor
 101     ~GpuEventSynchronizer() = default;
 102     //! Remove copy assignment, because we can not copy the underlying event object.
 103     GpuEventSynchronizer& operator=(const GpuEventSynchronizer&) = delete;
 104     //! Remove copy constructor, because we can not copy the underlying event object.
 105     GpuEventSynchronizer(const GpuEventSynchronizer&) = delete;
 106     //! Remove move assignment, because we don't allow moving the underlying event object.
 107     GpuEventSynchronizer& operator=(GpuEventSynchronizer&&) = delete;
 108     //! Remove move constructor, because we don't allow moving the underlying event object.
 109     GpuEventSynchronizer(GpuEventSynchronizer&&) = delete;
 110
 111     /*! \brief Marks the synchronization point in the \p stream and reset the consumption counter.
 112      *
 113      * Should be called before implicitly consuming actions (\ref waitForEvent() or \ref enqueueWaitEvent()) are executed or explicit \ref consume() calls are made.
 114      *
 115      * If the event has been marked before and not fully consumed, throws \ref gmx::InternalError.
 116      */
 117     inline void markEvent(const DeviceStream& deviceStream)
 118     {
 119 #if !GMX_GPU_CUDA // For now, we have relaxed conditions for CUDA
 120         if (consumptionCount_ < minConsumptionCount_)
 121         {
 122             GMX_THROW(gmx::InternalError("Trying to mark event before fully consuming it"));
 123         }
 124 #endif
 125         event_.mark(deviceStream);
 126         consumptionCount_ = 0;
 127     }
 128     /*! \brief Synchronizes the host thread on the marked event.
 129      *
 130      * Consumes the event if able, otherwise throws \ref gmx::InternalError.
 131      */
 132     inline void waitForEvent()
 133     {
 134         consume();
 135         event_.wait();
 136         resetIfFullyConsumed();
 137     }
 138     //! Checks the completion of the underlying event and consumes the event if it is ready.
 139     inline bool isReady()
 140     {
 141         bool isReady = event_.isReady();
 142         if (isReady)
 143         {
 144             consume();
 145             resetIfFullyConsumed();
 146         }
 147         return isReady;
 148     }
 149     //! Checks whether the event was marked (and was not reset since then).
 150     inline bool isMarked() const { return event_.isMarked(); }
 151     /*! \brief Manually consume the event without waiting for it.
 152      *
 153      * If the event is already fully consumed, throws \ref gmx::InternalError.
 154      */
 155     inline void consume()
 156     {
 157 #if !GMX_GPU_CUDA // For now, we have relaxed conditions for CUDA
 158         if (consumptionCount_ >= maxConsumptionCount_)
 159         {
 160             GMX_THROW(gmx::InternalError(
 161                     "Trying to consume an event before marking it or after fully consuming it"));
 162         }
 163 #endif
 164         consumptionCount_++;
 165     }
 166     //! Helper function to reset the event when it is fully consumed.
 167     inline void resetIfFullyConsumed()
 168     {
 169         if (consumptionCount_ == maxConsumptionCount_)
 170         {
 171             event_.reset();
 172         }
 173     }
 174     /*! \brief Enqueues a wait for the recorded event in stream \p deviceStream.
 175      *
 176      * Consumes the event if able, otherwise throws \ref gmx::InternalError.
 177      */
 178     inline void enqueueWaitEvent(const DeviceStream& deviceStream)
 179     {
 180         consume();
 181         event_.enqueueWait(deviceStream);
 182         resetIfFullyConsumed();
 183     }
 184
 185     //! Resets the event to unmarked state, releasing the underlying event object if needed.
 186     inline void reset()
 187     {
 188         // Set such that we can mark new event without triggering an exception, but can not consume.
 189         consumptionCount_ = maxConsumptionCount_;
 190         event_.reset();
 191     }
 192
 193 private:
 194     DeviceEvent event_;
 195     int         consumptionCount_;
 196 #if defined(__clang__) && GMX_GPU_CUDA
 197     [[maybe_unused]]
 198 #endif
 199     int minConsumptionCount_; // Unused in CUDA builds, yet
 200     int maxConsumptionCount_;
 201 };
 202
 203 #endif