2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "printhardware.h"
47 #include "gromacs/hardware/cpuinfo.h"
48 #include "gromacs/hardware/device_management.h"
49 #include "gromacs/hardware/hardwaretopology.h"
50 #include "gromacs/hardware/hw_info.h"
51 #include "gromacs/hardware/identifyavx512fmaunits.h"
52 #include "gromacs/simd/support.h"
53 #include "gromacs/utility/basedefinitions.h"
54 #include "gromacs/utility/basenetwork.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/gmxmpi.h"
58 #include "gromacs/utility/logger.h"
59 #include "gromacs/utility/programcontext.h"
60 #include "gromacs/utility/stringutil.h"
61 #include "gromacs/utility/sysinfo.h"
63 //! Constant used to help minimize preprocessed code
64 static constexpr bool bGPUBinary = (GMX_GPU != 0);
67 * Returns the GPU information text, one GPU per line.
69 static std::string sprint_gpus(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList)
71 std::vector<std::string> gpuStrings(0);
72 for (const auto& deviceInfo : deviceInfoList)
74 gpuStrings.emplace_back(" " + getDeviceInformationString(*deviceInfo));
76 return gmx::joinStrings(gpuStrings, "\n");
79 /* Give a suitable fatal error or warning if the build configuration
80 and runtime CPU do not match. */
81 static void check_use_of_rdtscp_on_this_cpu(const gmx::MDLogger& mdlog, const gmx::CpuInfo& cpuInfo)
83 bool binaryUsesRdtscp = GMX_USE_RDTSCP;
85 const char* programName = gmx::getProgramContext().displayName();
87 if (cpuInfo.supportLevel() < gmx::CpuInfo::SupportLevel::Features)
91 GMX_LOG(mdlog.warning)
94 "The %s executable was compiled to use the rdtscp CPU instruction. "
95 "We cannot detect the features of your current CPU, but will proceed "
97 "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake "
104 bool cpuHasRdtscp = cpuInfo.feature(gmx::CpuInfo::Feature::X86_Rdtscp);
106 if (!cpuHasRdtscp && binaryUsesRdtscp)
109 "The %s executable was compiled to use the rdtscp CPU instruction. "
110 "However, this is not supported by the current hardware and continuing would "
112 "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
116 if (cpuHasRdtscp && !binaryUsesRdtscp)
118 GMX_LOG(mdlog.warning)
120 .appendTextFormatted(
121 "The current CPU can measure timings more accurately than the code in\n"
122 "%s was configured to use. This might affect your simulation\n"
123 "speed as accurate timings are needed for load-balancing.\n"
124 "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake "
126 programName, programName);
131 static std::string detected_hardware_string(const gmx_hw_info_t* hwinfo, bool bFullCpuInfo)
135 const gmx::CpuInfo& cpuInfo = *hwinfo->cpuInfo;
136 const gmx::HardwareTopology& hwTop = *hwinfo->hardwareTopology;
138 s = gmx::formatString("\n");
139 s += gmx::formatString("Running on %d node%s with total", hwinfo->nphysicalnode,
140 hwinfo->nphysicalnode == 1 ? "" : "s");
141 if (hwinfo->ncore_tot > 0)
143 s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
145 s += gmx::formatString(" %d logical cores", hwinfo->nhwthread_tot);
146 if (canPerformDeviceDetection(nullptr))
148 s += gmx::formatString(", %d compatible GPU%s", hwinfo->ngpu_compatible_tot,
149 hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
153 if (isDeviceDetectionEnabled())
155 s += gmx::formatString(" (GPU detection failed)");
159 s += gmx::formatString(" (GPU detection deactivated)");
162 s += gmx::formatString("\n");
164 if (hwinfo->nphysicalnode > 1)
166 /* Print per node hardware feature counts */
167 if (hwinfo->ncore_max > 0)
169 s += gmx::formatString(" Cores per node: %2d", hwinfo->ncore_min);
170 if (hwinfo->ncore_max > hwinfo->ncore_min)
172 s += gmx::formatString(" - %2d", hwinfo->ncore_max);
174 s += gmx::formatString("\n");
176 s += gmx::formatString(" Logical cores per node: %2d", hwinfo->nhwthread_min);
177 if (hwinfo->nhwthread_max > hwinfo->nhwthread_min)
179 s += gmx::formatString(" - %2d", hwinfo->nhwthread_max);
181 s += gmx::formatString("\n");
184 s += gmx::formatString(" Compatible GPUs per node: %2d", hwinfo->ngpu_compatible_min);
185 if (hwinfo->ngpu_compatible_max > hwinfo->ngpu_compatible_min)
187 s += gmx::formatString(" - %2d", hwinfo->ngpu_compatible_max);
189 s += gmx::formatString("\n");
190 if (hwinfo->ngpu_compatible_tot > 0)
192 if (hwinfo->bIdenticalGPUs)
194 s += gmx::formatString(" All nodes have identical type(s) of GPUs\n");
198 /* This message will also appear with identical GPU types
199 * when at least one node has no GPU.
201 s += gmx::formatString(
202 " Different nodes have different type(s) and/or order of GPUs\n");
212 gmx_gethostname(host, STRLEN);
214 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
216 // TODO Use a wrapper around MPI_Get_processor_name instead.
217 s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n", host, rank);
219 s += gmx::formatString("Hardware detected:\n");
221 s += gmx::formatString(" CPU info:\n");
223 s += gmx::formatString(" Vendor: %s\n", cpuInfo.vendorString().c_str());
225 s += gmx::formatString(" Brand: %s\n", cpuInfo.brandString().c_str());
229 s += gmx::formatString(" Family: %d Model: %d Stepping: %d\n", cpuInfo.family(),
230 cpuInfo.model(), cpuInfo.stepping());
232 s += gmx::formatString(" Features:");
233 for (auto& f : cpuInfo.featureSet())
235 s += gmx::formatString(" %s", gmx::CpuInfo::featureString(f).c_str());
237 s += gmx::formatString("\n");
240 if (cpuInfo.feature(gmx::CpuInfo::Feature::X86_Avx512F))
242 int avx512fmaunits = gmx::identifyAvx512FmaUnits();
243 s += gmx::formatString(" Number of AVX-512 FMA units:");
244 if (avx512fmaunits > 0)
246 s += gmx::formatString(" %d", avx512fmaunits);
247 if (avx512fmaunits == 1)
249 s += gmx::formatString(" (AVX2 is faster w/o 2 AVX-512 FMA units)");
254 s += gmx::formatString(" Cannot run AVX-512 detection - assuming 2");
256 s += gmx::formatString("\n");
259 s += gmx::formatString(" Hardware topology: ");
260 switch (hwTop.supportLevel())
262 case gmx::HardwareTopology::SupportLevel::None: s += gmx::formatString("None\n"); break;
263 case gmx::HardwareTopology::SupportLevel::LogicalProcessorCount:
264 s += gmx::formatString("Only logical processor count\n");
266 case gmx::HardwareTopology::SupportLevel::Basic: s += gmx::formatString("Basic\n"); break;
267 case gmx::HardwareTopology::SupportLevel::Full: s += gmx::formatString("Full\n"); break;
268 case gmx::HardwareTopology::SupportLevel::FullWithDevices:
269 s += gmx::formatString("Full, with devices\n");
273 if (!hwTop.isThisSystem())
275 s += gmx::formatString(" NOTE: Hardware topology cached or synthetic, not detected.\n");
276 if (char* p = std::getenv("HWLOC_XMLFILE"))
278 s += gmx::formatString(" HWLOC_XMLFILE=%s\n", p);
284 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic)
286 s += gmx::formatString(" Sockets, cores, and logical processors:\n");
288 for (auto& socket : hwTop.machine().sockets)
290 s += gmx::formatString(" Socket %2d:", socket.id);
291 for (auto& c : socket.cores)
293 s += gmx::formatString(" [");
294 for (auto& t : c.hwThreads)
296 s += gmx::formatString(" %3d", t.logicalProcessorId);
298 s += gmx::formatString("]");
300 s += gmx::formatString("\n");
303 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Full)
305 s += gmx::formatString(" Numa nodes:\n");
306 for (auto& n : hwTop.machine().numa.nodes)
308 s += gmx::formatString(" Node %2d (%zu bytes mem):", n.id, n.memory);
309 for (auto& l : n.logicalProcessorId)
311 s += gmx::formatString(" %3d", l);
313 s += gmx::formatString("\n");
315 s += gmx::formatString(" Latency:\n ");
316 for (std::size_t j = 0; j < hwTop.machine().numa.nodes.size(); j++)
318 s += gmx::formatString(" %5zu", j);
320 s += gmx::formatString("\n");
321 for (std::size_t i = 0; i < hwTop.machine().numa.nodes.size(); i++)
323 s += gmx::formatString(" %5zu", i);
324 for (std::size_t j = 0; j < hwTop.machine().numa.nodes.size(); j++)
326 s += gmx::formatString(" %5.2f", hwTop.machine().numa.relativeLatency[i][j]);
328 s += gmx::formatString("\n");
332 s += gmx::formatString(" Caches:\n");
333 for (auto& c : hwTop.machine().caches)
335 s += gmx::formatString(
336 " L%d: %zu bytes, linesize %d bytes, assoc. %d, shared %d ways\n",
337 c.level, c.size, c.linesize, c.associativity, c.shared);
340 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::FullWithDevices)
342 s += gmx::formatString(" PCI devices:\n");
343 for (auto& d : hwTop.machine().devices)
345 s += gmx::formatString(
346 " %04x:%02x:%02x.%1x Id: %04x:%04x Class: 0x%04x Numa: %d\n", d.domain,
347 d.bus, d.dev, d.func, d.vendorId, d.deviceId, d.classId, d.numaNodeId);
352 if (bGPUBinary && !hwinfo->deviceInfoList.empty())
354 s += gmx::formatString(" GPU info:\n");
355 s += gmx::formatString(" Number of GPUs detected: %d\n",
356 static_cast<int>(hwinfo->deviceInfoList.size()));
357 s += sprint_gpus(hwinfo->deviceInfoList) + "\n";
362 void gmx_print_detected_hardware(FILE* fplog,
363 const bool warnToStdErr,
364 const gmx::MDLogger& mdlog,
365 const gmx_hw_info_t* hwinfo)
367 const gmx::CpuInfo& cpuInfo = *hwinfo->cpuInfo;
369 if (fplog != nullptr)
371 std::string detected;
373 detected = detected_hardware_string(hwinfo, TRUE);
375 fprintf(fplog, "%s\n", detected.c_str());
378 // Do not spam stderr with all our internal information unless
379 // there was something that actually went wrong; general information
380 // belongs in the logfile.
382 /* Check the compiled SIMD instruction set against that of the node
383 * with the lowest SIMD level support (skip if SIMD detection did not work)
385 if (cpuInfo.supportLevel() >= gmx::CpuInfo::SupportLevel::Features)
387 gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, warnToStdErr);
390 /* For RDTSCP we only check on our local node and skip the MPI reduction */
391 check_use_of_rdtscp_on_this_cpu(mdlog, cpuInfo);