GCC 7 needs a work-around just like older CUDA does.
Removed unnecessary use of C++17 features in compat/pointers.h as we
do in other parts of the code while CUDA compilation must be within
C++14.
Refs #3608
Fixes #3783, #3780
# version.
list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
else()
- list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+ # gcc-7 pre-dated C++17, so uses the -std=c++1z compiler flag for it,
+ # which modern nvcc does not recognize. So we work around that by
+ # compiling in C++14 mode. Clang doesn't have this problem because nvcc
+ # only supports version of clang that already understood -std=c++17
+ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)
+ list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
+ else()
+ list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+ endif()
endif()
# assemble the CUDA flags
class not_null
{
public:
- static_assert(std::is_assignable_v<T&, std::nullptr_t>, "T cannot be assigned nullptr.");
+ static_assert(std::is_assignable<T&, std::nullptr_t>::value, "T cannot be assigned nullptr.");
//! Move constructor. Asserts in debug mode if \c is nullptr.
- template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+ template<typename U, typename = std::enable_if_t<std::is_convertible<U, T>::value>>
constexpr explicit not_null(U&& u) : ptr_(std::forward<U>(u))
{
Expects(ptr_ != nullptr);
}
//! Simple constructor. Asserts in debug mode if \c u is nullptr.
- template<typename = std::enable_if_t<!std::is_same_v<std::nullptr_t, T>>>
+ template<typename = std::enable_if_t<!std::is_same<std::nullptr_t, T>::value>>
constexpr explicit not_null(T u) : ptr_(u)
{
Expects(ptr_ != nullptr);
}
//! Copy constructor.
- template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+ template<typename U, typename = std::enable_if_t<std::is_convertible<U, T>::value>>
constexpr not_null(const not_null<U>& other) : not_null(other.get())
{
}
detecthardware.cpp
device_management_common.cpp
hardwaretopology.cpp
+ prepare_detection.cpp
printhardware.cpp
identifyavx512fmaunits.cpp
)
#include <algorithm>
#include <array>
-#include <chrono>
#include <memory>
#include <string>
-#include <thread>
#include <vector>
-#include "gromacs/compat/pointers.h"
#include "gromacs/hardware/cpuinfo.h"
#include "gromacs/hardware/device_management.h"
#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/inmemoryserializer.h"
#include "gromacs/utility/logger.h"
-#include "gromacs/utility/mutex.h"
#include "gromacs/utility/physicalnodecommunicator.h"
#include "architecture.h"
#include "device_information.h"
+#include "prepare_detection.h"
#ifdef HAVE_UNISTD_H
# include <unistd.h> // sysconf()
// Read-only access is enforced with providing those ranks with a
// handle to a const object, so usage is also free of races.
GMX_UNUSED_VALUE(physicalNodeComm);
- isMasterRankOfPhysicalNode = true;
+ isMasterRankOfPhysicalNode = true;
#endif
/* The SYCL and OpenCL support requires us to run detection on all
}
//! Reduce the locally collected \p hardwareInfo over MPI ranks
-static void gmx_collect_hardware_mpi(const gmx::CpuInfo& cpuInfo,
- const PhysicalNodeCommunicator& physicalNodeComm,
- compat::not_null<gmx_hw_info_t*> hardwareInfo)
+static void gmx_collect_hardware_mpi(const gmx::CpuInfo& cpuInfo,
+ const PhysicalNodeCommunicator& physicalNodeComm,
+ gmx_hw_info_t* hardwareInfo)
{
const int ncore = hardwareInfo->hardwareTopology->numberOfCores();
/* Zen1 is assumed for:
hardwareInfo->bIdenticalGPUs = (maxMinReduced[4] == -maxMinReduced[9]);
hardwareInfo->haveAmdZen1Cpu = (maxMinReduced[10] > 0);
#else
- /* All ranks use the same pointer, protected by a mutex in the caller */
hardwareInfo->nphysicalnode = 1;
hardwareInfo->ncore_tot = ncore;
hardwareInfo->ncore_min = ncore;
#endif
}
-/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
- *
- * This routine will check the number of cores configured and online
- * (using sysconf), and the spins doing dummy compute operations for up to
- * 2 seconds, or until all cores have come online. This can be used prior to
- * hardware detection for platforms that take unused processors offline.
- *
- * This routine will not throw exceptions. In principle it should be
- * declared noexcept, but at least icc 19.1 and 21-beta08 with the
- * libstdc++-7.5 has difficulty implementing a std::vector of
- * std::thread started with this function when declared noexcept. It
- * is not clear whether the problem is the compiler or the standard
- * library. Fortunately, this function is not performance sensitive,
- * and only runs on platforms other than x86 and POWER (ie ARM),
- * so the possible overhead introduced by omitting noexcept is not
- * important.
- */
-static void spinUpCore()
-{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
- float dummy = 0.1;
- int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept
- auto start = std::chrono::steady_clock::now(); // noexcept
-
- while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
- && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
- {
- for (int i = 1; i < 10000; i++)
- {
- dummy /= i;
- }
- }
-
- if (dummy < 0)
- {
- printf("This cannot happen, but prevents loop from being optimized away.");
- }
-#endif
-}
-
-/*! \brief Prepare the system before hardware topology detection
- *
- * This routine should perform any actions we want to put the system in a state
- * where we want it to be before detecting the hardware topology. For most
- * processors there is nothing to do, but some architectures (in particular ARM)
- * have support for taking configured cores offline, which will make them disappear
- * from the online processor count.
- *
- * This routine checks if there is a mismatch between the number of cores
- * configured and online, and in that case we issue a small workload that
- * attempts to wake sleeping cores before doing the actual detection.
- *
- * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
- * been disabled in the kernel (rather than bios). Since those cores will never
- * come online automatically, we currently skip this test for x86 & PowerPC to
- * avoid wasting 2 seconds. We also skip the test if there is no thread support.
- *
- * \note Cores will sleep relatively quickly again, so it's important to issue
- * the real detection code directly after this routine.
- */
-static void hardwareTopologyPrepareDetection()
-{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
- && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
-
- // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
- if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
- {
- int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
- std::vector<std::thread> workThreads(countConfigured);
-
- for (auto& t : workThreads)
- {
- t = std::thread(spinUpCore);
- }
-
- for (auto& t : workThreads)
- {
- t.join();
- }
- }
-#endif
-}
-
void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
const gmx::HardwareTopology gmx_unused& hardwareTopology)
{
std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm)
{
- // Make the new hardwareInfo in a temporary.
+ // Ensure all cores have spun up, where applicable.
hardwareTopologyPrepareDetection();
// TODO: We should also do CPU hardware detection only once on each
std::swap(hardwareInfo->hardwareDetectionWarnings_, deviceDetectionResult.deviceDetectionWarnings_);
}
- gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
+ gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, hardwareInfo.get());
return hardwareInfo;
}
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Defines routine for activating potentially deactivated cores
+ * so they can be detected.
+ *
+ * The use of std::thread makes for brittle interaction with std
+ * library headers. Its caller also handles GPU detection and
+ * allocation of device-specific data structures. This is more
+ * manageable when separated into two distinct translation units.
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "prepare_detection.h"
+
+#include "config.h"
+
+#include <cstdio>
+
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "architecture.h"
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h> // sysconf()
+#endif
+
+namespace gmx
+{
+
+/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
+ *
+ * This routine will check the number of cores configured and online
+ * (using sysconf), and the spins doing dummy compute operations for up to
+ * 2 seconds, or until all cores have come online. This can be used prior to
+ * hardware detection for platforms that take unused processors offline.
+ *
+ * This routine will not throw exceptions. In principle it should be
+ * declared noexcept, but at least icc 19.1 and 21-beta08 with the
+ * libstdc++-7.5 has difficulty implementing a std::vector of
+ * std::thread started with this function when declared noexcept. It
+ * is not clear whether the problem is the compiler or the standard
+ * library. Fortunately, this function is not performance sensitive,
+ * and only runs on platforms other than x86 and POWER (ie ARM),
+ * so the possible overhead introduced by omitting noexcept is not
+ * important.
+ */
+static void spinUpCore()
+{
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
+ float dummy = 0.1;
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept
+ auto start = std::chrono::steady_clock::now(); // noexcept
+
+ while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
+ && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
+ {
+ for (int i = 1; i < 10000; i++)
+ {
+ dummy /= i;
+ }
+ }
+
+ if (dummy < 0)
+ {
+ printf("This cannot happen, but prevents loop from being optimized away.");
+ }
+#endif
+}
+
+void hardwareTopologyPrepareDetection()
+{
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
+ && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
+
+ // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
+ if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
+ {
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
+ std::vector<std::thread> workThreads(countConfigured);
+
+ for (auto& t : workThreads)
+ {
+ t = std::thread(spinUpCore);
+ }
+
+ for (auto& t : workThreads)
+ {
+ t.join();
+ }
+ }
+#endif
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal
+ * \file
+ * \brief Declares routine for activating potentially deactivated
+ * cores so they can be detected.
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_PREPAREDETECTION_H
+#define GMX_HARDWARE_PREPAREDETECTION_H
+
+namespace gmx
+{
+
+/*! \brief Prepare the system before hardware topology detection
+ *
+ * This routine should perform any actions we want to put the system in a state
+ * where we want it to be before detecting the hardware topology. For most
+ * processors there is nothing to do, but some architectures (in particular ARM)
+ * have support for taking configured cores offline, which will make them disappear
+ * from the online processor count.
+ *
+ * This routine checks if there is a mismatch between the number of cores
+ * configured and online, and in that case we issue a small workload that
+ * attempts to wake sleeping cores before doing the actual detection.
+ *
+ * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
+ * been disabled in the kernel (rather than bios). Since those cores will never
+ * come online automatically, we currently skip this test for x86 & PowerPC to
+ * avoid wasting 2 seconds. We also skip the test if there is no thread support.
+ *
+ * \note Cores will sleep relatively quickly again, so it's important to issue
+ * the real detection code directly after this routine.
+ */
+void hardwareTopologyPrepareDetection();
+
+} // namespace gmx
+
+#endif