2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
37 * \brief Declares functions to manage GPU resources.
39 * This has several implementations: one for each supported GPU platform,
40 * and a stub implementation if the build does not support GPUs.
42 * \author Anca Hamuraru <anca@streamcomputing.eu>
43 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
44 * \author Teemu Virolainen <teemu@streamcomputing.eu>
45 * \author Mark Abraham <mark.j.abraham@gmail.com>
46 * \author Szilárd Páll <pall.szilard@gmail.com>
47 * \author Artem Zhmurov <zhmurov@gmail.com>
50 * \ingroup module_hardware
52 #ifndef GMX_HARDWARE_DEVICE_MANAGEMENT_H
53 #define GMX_HARDWARE_DEVICE_MANAGEMENT_H
59 #include "gromacs/utility/arrayref.h"
60 #include "gromacs/utility/basedefinitions.h"
61 #include "gromacs/utility/iserializer.h"
63 struct DeviceInformation;
64 enum class DeviceVendor : int;
66 /*! \brief Return whether GPUs can be detected.
68 * Returns true when this is a build of GROMACS configured to support
69 * GPU usage, GPU detection is not disabled by \c GMX_DISABLE_GPU_DETECTION
70 * environment variable and a valid device driver, ICD, and/or runtime was
71 * detected. Does not throw.
73 * \param[out] errorMessage When returning false on a build configured with
74 * GPU support and non-nullptr was passed,
75 * the string contains a descriptive message about
76 * why GPUs cannot be detected.
78 bool canPerformDeviceDetection(std::string* errorMessage);
80 /*! \brief Return whether GPU detection is enabled
82 * Returns true when this is a build of GROMACS configured to support
83 * GPU usage and GPU detection is not disabled by \c GMX_DISABLE_GPU_DETECTION
84 * environment variable.
88 bool isDeviceDetectionEnabled();
90 /*! \brief Return whether GPU detection is functioning correctly
92 * Returns true when this is a build of GROMACS configured to support
93 * GPU usage, and a valid device driver, ICD, and/or runtime was detected.
95 * This function is not intended to be called from build
96 * configurations that do not support GPUs, and there will be no
97 * descriptive message in that case.
99 * \param[out] errorMessage When returning false on a build configured with
100 * GPU support and non-nullptr was passed,
101 * the string contains a descriptive message about
102 * why GPUs cannot be detected.
106 bool isDeviceDetectionFunctional(std::string* errorMessage);
108 /*! \brief Returns an DeviceVendor value corresponding to the input OpenCL vendor name.
110 * \returns DeviceVendor value for the input vendor name
112 DeviceVendor getDeviceVendor(const char* vendorName);
114 /*! \brief Find all GPUs in the system.
116 * Will detect every GPU supported by the device driver in use.
117 * Must only be called if \c canPerformDeviceDetection() has returned true.
118 * This routine also checks for the compatibility of each device and fill the
119 * deviceInfo array with the required information on each device: ID, device
120 * properties, status.
122 * Note that this function leaves the GPU runtime API error state clean;
123 * this is implemented ATM in the CUDA flavor. This invalidates any existing
124 * CUDA streams, allocated memory on GPU, etc.
126 * \todo: Check if errors do propagate in OpenCL as they do in CUDA and
127 * whether there is a mechanism to "clear" them.
129 * \return Standard vector with the list of devices found
131 * \throws InternalError if a GPU API returns an unexpected failure (because
132 * the call to canDetectGpus() should always prevent this occuring)
134 std::vector<std::unique_ptr<DeviceInformation>> findDevices();
136 /*! \brief Return a container of device-information handles that are compatible.
138 * This function filters the result of the detection for compatible
139 * GPUs, based on the previously run compatibility tests.
141 * \param[in] deviceInfoList An information on available devices.
143 * \return Vector of DeviceInformations on GPUs recorded as compatible
145 std::vector<std::reference_wrapper<DeviceInformation>>
146 getCompatibleDevices(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList);
148 /*! \brief Return a container of the IDs of the compatible GPU ids.
150 * This function filters the result of the detection for compatible
151 * GPUs, based on the previously run compatibility tests.
153 * \param[in] deviceInfoList An information on available devices.
155 * \return Vector of compatible GPU ids.
157 std::vector<int> getCompatibleDeviceIds(gmx::ArrayRef<const std::unique_ptr<DeviceInformation>> deviceInfoList);
159 /*! \brief Return whether \p deviceId is found in \p deviceInfoList and is compatible
161 * This function filters the result of the detection for compatible
162 * GPUs, based on the previously run compatibility tests.
164 * \param[in] deviceInfoList An information on available devices.
165 * \param[in] deviceId The device ID to find in the list.
167 * \throws RangeError If \p deviceId does not match the id of any device in \c deviceInfoList
169 * \return Whether \c deviceId is compatible.
171 bool deviceIdIsCompatible(gmx::ArrayRef<const std::unique_ptr<DeviceInformation>> deviceInfoList,
174 /*! \brief Set the active GPU.
176 * This sets the device for which the device information is passed active. Essential in CUDA, where
177 * the device buffers and kernel launches are not connected to the device context. In OpenCL, checks
178 * the device vendor and makes vendor-specific performance adjustments.
180 * \param[in] deviceInfo Information on the device to be set.
182 * Issues a fatal error for any critical errors that occur during
185 void setActiveDevice(const DeviceInformation& deviceInfo);
187 /*! \brief Releases the GPU device used by the active context at the time of calling (CUDA only).
189 * If \c deviceInfo is nullptr, then it is understood that no device
190 * was selected so no context is active to be freed. Otherwise, the
191 * context is explicitly destroyed and therefore all data uploaded to
192 * the GPU is lost. This must only be called when none of this data is
193 * required anymore, because subsequent attempts to free memory
194 * associated with the context will otherwise fail.
196 * Calls \c gmx_warning upon errors.
198 * \todo This should go through all the devices, not only the one currently active.
199 * Reseting only one device will not work, e.g. in CUDA tests.
201 * \param[in] deviceInfo Information on the device to be released.
203 void releaseDevice(DeviceInformation* deviceInfo);
205 /*! \brief Formats and returns a device information string for a given GPU.
207 * Given an index *directly* into the array of available GPUs, returns
208 * a formatted info string for the respective GPU which includes ID, name,
209 * compute capability, and detection status.
211 * \param[in] deviceInfo An information on device that is to be set.
213 * \returns A string describing the device.
215 std::string getDeviceInformationString(const DeviceInformation& deviceInfo);
217 /*! \brief Return a string describing how compatible the GPU with given \c deviceId is.
219 * \param[in] deviceInfoList An information on available devices.
220 * \param[in] deviceId An index of the device to check
221 * \returns A string describing the compatibility status, useful for error messages.
223 std::string getDeviceCompatibilityDescription(gmx::ArrayRef<const std::unique_ptr<DeviceInformation>> deviceInfoList,
226 /*! \brief Serialization of information on devices for MPI broadcasting.
228 * \param[in] deviceInfoList The vector with device informations to serialize.
229 * \param[in] serializer Serializing object.
231 void serializeDeviceInformations(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList,
232 gmx::ISerializer* serializer);
234 /*! \brief Deserialization of information on devices after MPI broadcasting.
236 * \param[in] serializer Serializing object.
238 * \return deviceInfoList Deserialized vector with device informations.
240 std::vector<std::unique_ptr<DeviceInformation>> deserializeDeviceInformations(gmx::ISerializer* serializer);
242 #endif // GMX_HARDWARE_DEVICE_MANAGEMENT_H