2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \libinternal \file
36 * \brief Implements a GpuEventSynchronizer class.
38 * \author Andrey Alekseenko <al42and@gmail.com>
39 * \author Artem Zhmurov <zhmurov@gmail.com>
40 * \author Aleksei Iupinov <a.yupinov@gmail.com>
43 #ifndef GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_H
44 #define GMX_GPU_UTILS_GPUEVENTSYNCHRONIZER_H
48 #include "gromacs/utility/classhelpers.h"
49 #include "gromacs/utility/exceptions.h"
50 #include "gromacs/utility/gmxassert.h"
52 #include "device_event.h"
54 /*! \libinternal \brief
55 * A class which allows for CPU thread to mark and wait for certain GPU stream execution point.
57 * The event can be put into the stream with \ref markEvent and then later waited on with \ref
58 * waitForEvent or \ref enqueueWaitEvent.
60 * Additionally, this class offers facilities for runtime checking of correctness by counting
61 * how many times each marked event is used as a synchronization point.
63 * - When the class is constructed, a required minimal (\c minConsumptionCount) and maximal (\c maxConsumptionCount) number of
64 * consumptions can be specified. By default, both are set to 1.
65 * - The event is considered <em>fully consumed</em> if its current number of consumptions \c c equals
66 * \c maxConsumptionCount.
67 * - The event is considered <em>sufficiently consumed</em> if <tt>minConsumptionCount <= c <= maxConsumptionCount</tt>.
68 * - The class is initialized in the <em>fully consumed</em> state, so it can not be consumed right away.
69 * - Consuming the event is only possible if it is not <em>fully consumed</em> (<tt>c < maxConsumptionCount</tt>).
70 * Consuming the event increments \c c by 1. Trying to consume <em>fully consumed</em> event
71 * throws \ref gmx::InternalError.
72 * - \ref reset returns object into the initial <em>fully consumed</em> state.
73 * This function is intended to manually override the consumption limits.
74 * - \ref consume \em consumes the event, without doing anything else.
75 * This function is intended to manually override the consumption limits.
76 * - \ref markEvent enqueues new event into the provided stream, and sets \c to 0. Marking is only
77 * possible if the event is <em>sufficiently consumed</em>, otherwise \ref gmx::InternalError
79 * - \ref waitForEvent \em consumes the event and blocks the host thread until the event
80 * is ready (complete).
81 * - \ref enqueueWaitEvent \em consumes the event and blocks the inserts a blocking barrier
82 * into the provided stream which blocks the execution of all tasks later submitted to this
83 * stream until the event is ready (completes).
85 * Default <tt>minConsumptionCount=maxConsumptionCount=1</tt> limits mean that each call to \ref markEvent must be followed
86 * by exactly one \ref enqueueWaitEvent or \ref enqueueWaitEvent. This is the recommended pattern
87 * for most use cases. By providing other constructor arguments, this requirement can be relaxed
90 class GpuEventSynchronizer
94 GpuEventSynchronizer(int minConsumptionCount, int maxConsumptionCount) :
95 minConsumptionCount_(minConsumptionCount), maxConsumptionCount_(maxConsumptionCount)
99 GpuEventSynchronizer() : GpuEventSynchronizer(1, 1) {}
101 ~GpuEventSynchronizer() = default;
102 //! Remove copy assignment, because we can not copy the underlying event object.
103 GpuEventSynchronizer& operator=(const GpuEventSynchronizer&) = delete;
104 //! Remove copy constructor, because we can not copy the underlying event object.
105 GpuEventSynchronizer(const GpuEventSynchronizer&) = delete;
106 //! Remove move assignment, because we don't allow moving the underlying event object.
107 GpuEventSynchronizer& operator=(GpuEventSynchronizer&&) = delete;
108 //! Remove move constructor, because we don't allow moving the underlying event object.
109 GpuEventSynchronizer(GpuEventSynchronizer&&) = delete;
111 /*! \brief Marks the synchronization point in the \p stream and reset the consumption counter.
113 * Should be called before implicitly consuming actions (\ref waitForEvent() or \ref enqueueWaitEvent()) are executed or explicit \ref consume() calls are made.
115 * If the event has been marked before and not fully consumed, throws \ref gmx::InternalError.
117 inline void markEvent(const DeviceStream& deviceStream)
119 #if !GMX_GPU_CUDA // For now, we have relaxed conditions for CUDA
120 if (consumptionCount_ < minConsumptionCount_)
122 GMX_THROW(gmx::InternalError("Trying to mark event before fully consuming it"));
125 event_.mark(deviceStream);
126 consumptionCount_ = 0;
128 /*! \brief Synchronizes the host thread on the marked event.
130 * Consumes the event if able, otherwise throws \ref gmx::InternalError.
132 inline void waitForEvent()
136 resetIfFullyConsumed();
138 //! Checks the completion of the underlying event and consumes the event if it is ready.
139 inline bool isReady()
141 bool isReady = event_.isReady();
145 resetIfFullyConsumed();
149 //! Checks whether the event was marked (and was not reset since then).
150 inline bool isMarked() const { return event_.isMarked(); }
151 /*! \brief Manually consume the event without waiting for it.
153 * If the event is already fully consumed, throws \ref gmx::InternalError.
155 inline void consume()
157 #if !GMX_GPU_CUDA // For now, we have relaxed conditions for CUDA
158 if (consumptionCount_ >= maxConsumptionCount_)
160 GMX_THROW(gmx::InternalError(
161 "Trying to consume an event before marking it or after fully consuming it"));
166 //! Helper function to reset the event when it is fully consumed.
167 inline void resetIfFullyConsumed()
169 if (consumptionCount_ == maxConsumptionCount_)
174 /*! \brief Enqueues a wait for the recorded event in stream \p deviceStream.
176 * Consumes the event if able, otherwise throws \ref gmx::InternalError.
178 inline void enqueueWaitEvent(const DeviceStream& deviceStream)
181 event_.enqueueWait(deviceStream);
182 resetIfFullyConsumed();
185 //! Resets the event to unmarked state, releasing the underlying event object if needed.
188 // Set such that we can mark new event without triggering an exception, but can not consume.
189 consumptionCount_ = maxConsumptionCount_;
195 int consumptionCount_;
196 #if defined(__clang__) && GMX_GPU_CUDA
199 int minConsumptionCount_; // Unused in CUDA builds, yet
200 int maxConsumptionCount_;