2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines the SYCL implementations of the device management.
38 * \author Paul Bauer <paul.bauer.q@gmail.com>
39 * \author Erik Lindahl <erik.lindahl@gmail.com>
40 * \author Artem Zhmurov <zhmurov@gmail.com>
41 * \author Andrey Alekseenko <al42and@gmail.com>
43 * \ingroup module_hardware
49 #include "gromacs/gpu_utils/gmxsycl.h"
50 #include "gromacs/hardware/device_management.h"
51 #include "gromacs/utility/fatalerror.h"
52 #include "gromacs/utility/stringutil.h"
54 #include "device_information.h"
57 bool isDeviceDetectionFunctional(std::string* errorMessage)
61 const std::vector<cl::sycl::platform> platforms = cl::sycl::platform::get_platforms();
62 // SYCL should always have the "host" platform, but just in case:
63 if (platforms.empty() && errorMessage != nullptr)
65 errorMessage->assign("No SYCL platforms found.");
67 return !platforms.empty();
69 catch (const std::exception& e)
71 if (errorMessage != nullptr)
74 gmx::formatString("Unable to get the list of SYCL platforms: %s", e.what()));
82 * \brief Checks that device \c deviceInfo is compatible with GROMACS.
84 * For now, only checks that the vendor is Intel and it is a GPU.
86 * \param[in] syclDevice The SYCL device pointer.
87 * \returns The status enumeration value for the checked device:
89 static DeviceStatus isDeviceCompatible(const cl::sycl::device& syclDevice)
91 if (getenv("GMX_GPU_DISABLE_COMPATIBILITY_CHECK") != nullptr)
93 // Assume the device is compatible because checking has been disabled.
94 return DeviceStatus::Compatible;
97 if (syclDevice.get_info<cl::sycl::info::device::local_mem_type>() == cl::sycl::info::local_mem_type::none)
99 // While some kernels (leapfrog) can run without shared/local memory, this is a bad sign
100 return DeviceStatus::Incompatible;
103 if (syclDevice.is_host())
105 // Host device does not support subgroups or even querying for sub_group_sizes
106 return DeviceStatus::Incompatible;
110 /* At the time of writing:
111 * 1. SYCL NB kernels currently don't support sub_group size of 32 or 64, which are the only
112 * ones available on NVIDIA and AMD hardware, respectively. That's not a fundamental limitation,
113 * but requires porting more OpenCL code, see #3934.
114 * 2. hipSYCL does not support cl::sycl::info::device::sub_group_sizes,
115 * see https://github.com/illuhad/hipSYCL/pull/449
117 const std::vector<size_t> supportedSubGroupSizes{ warpSize };
119 const std::vector<size_t> supportedSubGroupSizes =
120 syclDevice.get_info<cl::sycl::info::device::sub_group_sizes>();
122 const size_t requiredSubGroupSizeForNBNXM = 8;
123 if (std::find(supportedSubGroupSizes.begin(), supportedSubGroupSizes.end(), requiredSubGroupSizeForNBNXM)
124 == supportedSubGroupSizes.end())
126 return DeviceStatus::IncompatibleClusterSize;
129 /* Host device can not be used, because NBNXM requires sub-groups, which are not supported.
130 * Accelerators (FPGAs and their emulators) are not supported.
131 * So, the only viable options are CPUs and GPUs. */
132 const bool forceCpu = (getenv("GMX_SYCL_FORCE_CPU") != nullptr);
134 if (forceCpu && syclDevice.is_cpu())
136 return DeviceStatus::Compatible;
138 else if (!forceCpu && syclDevice.is_gpu())
140 return DeviceStatus::Compatible;
144 return DeviceStatus::Incompatible;
150 * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
152 * Compiles and runs a dummy kernel to determine whether the given
153 * SYCL device functions properly.
156 * \param[in] syclDevice The device info pointer.
157 * \param[out] errorMessage An error message related to a SYCL error.
158 * \throws std::bad_alloc When out of memory.
159 * \returns Whether the device passed sanity checks
161 static bool isDeviceFunctional(const cl::sycl::device& syclDevice, std::string* errorMessage)
163 static const int numThreads = 8;
166 cl::sycl::queue queue(syclDevice);
167 cl::sycl::buffer<int, 1> buffer(numThreads);
168 queue.submit([&](cl::sycl::handler& cgh) {
169 auto d_buffer = buffer.get_access<cl::sycl::access::mode::discard_write>(cgh);
170 cl::sycl::range<1> range{ numThreads };
171 cgh.parallel_for<class DummyKernel>(range, [=](cl::sycl::id<1> threadId) {
172 d_buffer[threadId] = threadId.get(0);
176 const auto h_Buffer = buffer.get_access<cl::sycl::access::mode::read>();
177 for (int i = 0; i < numThreads; i++)
179 if (h_Buffer[i] != i)
181 if (errorMessage != nullptr)
183 errorMessage->assign("Dummy kernel produced invalid values");
189 catch (const std::exception& e)
191 if (errorMessage != nullptr)
193 errorMessage->assign(
194 gmx::formatString("Unable to run dummy kernel on device %s: %s",
195 syclDevice.get_info<cl::sycl::info::device::name>().c_str(),
205 * \brief Checks that device \c deviceInfo is compatible and functioning.
207 * Checks the given SYCL device for compatibility and runs a dummy kernel on it to determine
208 * whether the device functions properly.
211 * \param[in] deviceId Device number (internal to GROMACS).
212 * \param[in] deviceInfo The device info pointer.
213 * \returns The status of device.
215 static DeviceStatus checkDevice(size_t deviceId, const DeviceInformation& deviceInfo)
218 DeviceStatus supportStatus = isDeviceCompatible(deviceInfo.syclDevice);
219 if (supportStatus != DeviceStatus::Compatible)
221 return supportStatus;
224 std::string errorMessage;
225 if (!isDeviceFunctional(deviceInfo.syclDevice, &errorMessage))
227 gmx_warning("While sanity checking device #%zu, %s", deviceId, errorMessage.c_str());
228 return DeviceStatus::NonFunctional;
231 return DeviceStatus::Compatible;
234 /* In DPCPP, the same physical device can appear as different virtual devices provided
235 * by different backends (e.g., the same GPU can be accessible via both OpenCL and L0).
236 * Thus, using devices from two backends is more likely to be a user error than the
237 * desired behavior. In this function, we choose the backend with the most compatible
238 * devices. In case of a tie, we choose OpenCL (if present), or some arbitrary backend
239 * among those with the most devices.
241 * In hipSYCL, this problem is unlikely to manifest. It has (as of 2021-03-03) another
242 * issues: D2D copy between different backends is not allowed. We don't use D2D in
243 * SYCL yet. Additionally, hipSYCL does not implement the `sycl::platform::get_backend()`
245 * Thus, we only do the backend filtering with DPCPP.
248 static std::optional<cl::sycl::backend>
249 chooseBestBackend(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfos)
251 // Count the number of compatible devices per backend
252 std::map<cl::sycl::backend, int> countDevicesByBackend; // Default initialized with zeros
253 for (const auto& deviceInfo : deviceInfos)
255 if (deviceInfo->status == DeviceStatus::Compatible)
257 const cl::sycl::backend backend = deviceInfo->syclDevice.get_platform().get_backend();
258 ++countDevicesByBackend[backend];
261 // If we have devices from more than one backend...
262 if (countDevicesByBackend.size() > 1)
264 // Find backend with most devices
265 const auto backendWithMostDevices = std::max_element(
266 countDevicesByBackend.cbegin(),
267 countDevicesByBackend.cend(),
268 [](const auto& kv1, const auto& kv2) { return kv1.second < kv2.second; });
269 // Count devices provided by OpenCL. Will be zero if no OpenCL devices found.
270 const int devicesInOpenCL = countDevicesByBackend[cl::sycl::backend::opencl];
271 if (devicesInOpenCL == backendWithMostDevices->second)
273 // Prefer OpenCL backend as more stable, if it has as many devices as others
274 return cl::sycl::backend::opencl;
278 // Otherwise, just return max
279 return backendWithMostDevices->first;
282 else if (countDevicesByBackend.size() == 1)
284 return countDevicesByBackend.cbegin()->first;
286 else // No devices found
293 std::vector<std::unique_ptr<DeviceInformation>> findDevices()
295 std::vector<std::unique_ptr<DeviceInformation>> deviceInfos(0);
296 const std::vector<cl::sycl::device> devices = cl::sycl::device::get_devices();
297 deviceInfos.reserve(devices.size());
298 for (const auto& syclDevice : devices)
300 deviceInfos.emplace_back(std::make_unique<DeviceInformation>());
302 size_t i = deviceInfos.size() - 1;
304 deviceInfos[i]->id = i;
305 deviceInfos[i]->syclDevice = syclDevice;
306 deviceInfos[i]->status = checkDevice(i, *deviceInfos[i]);
307 deviceInfos[i]->deviceVendor =
308 getDeviceVendor(syclDevice.get_info<cl::sycl::info::device::vendor>().c_str());
311 // Now, filter by the backend if we did not disable compatibility check
312 if (getenv("GMX_GPU_DISABLE_COMPATIBILITY_CHECK") == nullptr)
314 std::optional<cl::sycl::backend> preferredBackend = chooseBestBackend(deviceInfos);
315 if (preferredBackend.has_value())
317 for (auto& deviceInfo : deviceInfos)
319 if (deviceInfo->syclDevice.get_platform().get_backend() != *preferredBackend)
321 deviceInfo->status = DeviceStatus::NotPreferredBackend;
330 void setActiveDevice(const DeviceInformation& /*deviceInfo*/) {}
332 void releaseDevice(DeviceInformation* /* deviceInfo */) {}
334 std::string getDeviceInformationString(const DeviceInformation& deviceInfo)
336 bool deviceExists = (deviceInfo.status != DeviceStatus::Nonexistent
337 && deviceInfo.status != DeviceStatus::NonFunctional);
341 return gmx::formatString(
342 "#%d: %s, status: %s", deviceInfo.id, "N/A", c_deviceStateString[deviceInfo.status]);
346 return gmx::formatString(
347 "#%d: name: %s, vendor: %s, device version: %s, status: %s",
349 deviceInfo.syclDevice.get_info<cl::sycl::info::device::name>().c_str(),
350 deviceInfo.syclDevice.get_info<cl::sycl::info::device::vendor>().c_str(),
351 deviceInfo.syclDevice.get_info<cl::sycl::info::device::version>().c_str(),
352 c_deviceStateString[deviceInfo.status]);