'stddef.h', 'stdint.h', 'stdio.h', 'stdlib.h', 'string.h',
'time.h']
_std_c_cpp_headers = ['c' + x[:-2] for x in _std_c_headers]
- _std_cpp_headers = ['algorithm', 'array', 'deque', 'exception', 'fstream',
+ _std_cpp_headers = ['algorithm', 'array', 'chrono', 'deque', 'exception', 'fstream',
- 'functional', 'iomanip', 'ios', 'iosfwd', 'iostream', 'istream', 'iterator',
+ 'functional', 'initializer_list', 'iomanip', 'ios', 'iosfwd',
+ 'iostream', 'istream', 'iterator',
'limits', 'list', 'map', 'memory', 'new', 'numeric', 'ostream', 'random',
'regex', 'set', 'sstream', 'stdexcept', 'streambuf', 'string', 'strstream',
- 'tuple', 'type_traits', 'typeindex', 'typeinfo', 'vector', 'utility']
+ 'thread', 'tuple', 'type_traits', 'typeindex', 'typeinfo', 'vector', 'utility']
def __init__(self, style='pub-priv', absolute=False):
"""Initialize a sorted with the given style."""
/* Note: Distinguishing between different types of GPUs here might be necessary in the future,
e.g. if max application clocks should not be used for certain GPUs. */
- if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock && gpu_info->gpu_dev[gpuid].nvml_is_restricted == NVML_FEATURE_DISABLED)
+ if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock && cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED)
{
- GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
- md_print_info(fplog, "Changing GPU application clocks for %s to (%d,%d)\n", cuda_dev->prop.name, max_mem_clock, max_sm_clock);
++ GMX_LOG(mdlog.warning).appendTextFormatted(
+ "Changing GPU application clocks for %s to (%d,%d)",
- gpu_info->gpu_dev[gpuid].prop.name, max_mem_clock, max_sm_clock);
- nvml_stat = nvmlDeviceSetApplicationsClocks ( gpu_info->gpu_dev[gpuid].nvml_device_id, max_mem_clock, max_sm_clock );
++ cuda_dev->prop.name, max_mem_clock, max_sm_clock);
+ nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, max_mem_clock, max_sm_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
- gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = true;
+ cuda_dev->nvml_app_clocks_changed = true;
+ cuda_dev->nvml_set_app_sm_clock = max_sm_clock;
+ cuda_dev->nvml_set_app_mem_clock = max_mem_clock;
}
- else if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock)
+ else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock)
{
- md_print_warn(fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock, max_mem_clock, max_sm_clock);
+ GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+ "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.",
- gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock, max_mem_clock, max_sm_clock);
++ cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock, max_mem_clock, max_sm_clock);
}
- else if (nvml_stat == NVML_SUCCESS && app_sm_clock == max_sm_clock)
+ else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock == max_sm_clock)
{
- md_print_info(fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
+ //TODO: This should probably be integrated into the GPU Properties table.
+ GMX_LOG(mdlog.warning).appendTextFormatted(
+ "Application clocks (GPU clocks) for %s are (%d,%d)",
- gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock);
++ cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
}
else
{
#include <cstring>
#include <algorithm>
+ #include <chrono>
#include <string>
+ #include <thread>
#include <vector>
- #ifdef HAVE_UNISTD_H
- /* For sysconf */
- #include <unistd.h>
- #endif
- #if GMX_NATIVE_WINDOWS
- #include <windows.h>
- #endif
-
#include "thread_mpi/threads.h"
-#include "gromacs/gmxlib/md_logging.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/hardware/cpuinfo.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
- #include "gromacs/utility/gmxomp.h"
+#include "gromacs/utility/logger.h"
#include "gromacs/utility/programcontext.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
return uniq_count;
}
- /* On e.g. Arm, the Linux kernel can use advanced power saving features where
- * processors are brought online/offline dynamically. This will cause
- * _SC_NPROCESSORS_ONLN to report 1 at the beginning of the run. For this
- * reason we now warn if this mismatches with the detected core count.
- */
- static void check_nthreads_hw_avail(const gmx::MDLogger gmx_unused &mdlog, int nthreads)
- {
- // Now check if we have the argument to use before executing the call
- #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
- if (nthreads != sysconf(_SC_NPROCESSORS_ONLN))
- {
- GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
- "%d CPUs configured, but only %d of them are online.\n"
- "This can happen on embedded platforms (e.g. ARM) where the OS shuts some cores\n"
- "off to save power, and will turn them back on later when the load increases.\n"
- "However, this will likely mean GROMACS cannot pin threads to those cores. You\n"
- "will likely see much better performance by forcing all cores to be online, and\n"
- "making sure they run at their full clock frequency.",
- nthreads, sysconf(_SC_NPROCESSORS_ONLN));
- }
- #endif
-
- if (debug)
- {
- fprintf(debug, "Detected %d hardware threads to use.\n", nthreads);
- }
-
- #if GMX_OPENMP
- if (nthreads != gmx_omp_get_num_procs())
- {
- GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
- "Number of logical cores detected (%d) does not match the number reported by OpenMP (%d).\n"
- "Consider setting the launch configuration manually!",
- nthreads, gmx_omp_get_num_procs());
- }
- #endif
- }
-
-static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
+static void gmx_detect_gpus(const gmx::MDLogger &mdlog, const t_commrec *cr)
{
#if GMX_LIB_MPI
int rank_world;
#endif
}
-/*! \brief Sanity check hardware topology and optionally print some notes to log
+ /*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
+ *
+ * This routine will check the number of cores configured and online
+ * (using sysconf), and the spins doing dummy compute operations for up to
+ * 2 seconds, or until all cores have come online. This can be used prior to
+ * hardware detection for platforms that take unused processors offline.
+ *
+ * This routine will not throw exceptions.
+ */
+ static void
+ spinUpCore() noexcept
+ {
+ #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
+ // steady_clock is better than system_clock, but unsupported in gcc-4.6.4.
+ // For release-2017 we can retire gcc-4.6 support and move to steady_clock.
+ float dummy = 0.1;
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept
+ auto start = std::chrono::system_clock::now(); // noexcept
+
+ while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured &&
+ std::chrono::system_clock::now() - start < std::chrono::seconds(2))
+ {
+ for (int i = 1; i < 10000; i++)
+ {
+ dummy /= i;
+ }
+ }
+
+ if (dummy < 0)
+ {
+ printf("This cannot happen, but prevents loop from being optimized away.");
+ }
+ #endif
+ }
+
+ /*! \brief Prepare the system before hardware topology detection
+ *
+ * This routine should perform any actions we want to put the system in a state
+ * where we want it to be before detecting the hardware topology. For most
+ * processors there is nothing to do, but some architectures (in particular ARM)
+ * have support for taking configured cores offline, which will make them disappear
+ * from the online processor count.
+ *
+ * This routine checks if there is a mismatch between the number of cores
+ * configured and online, and in that case we issue a small workload that
+ * attempts to wake sleeping cores before doing the actual detection.
+ *
+ * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
+ * been disabled in the kernel (rather than bios). Since those cores will never
+ * come online automatically, we currently skip this test for x86 & PowerPC to
+ * avoid wasting 2 seconds. We also skip the test if there is no thread support.
+ *
+ * \note Cores will sleep relatively quickly again, so it's important to issue
+ * the real detection code directly after this routine.
+ */
+ static void
+ hardwareTopologyPrepareDetection()
+ {
+ #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && \
+ (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
+
+ // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
+ if (!isX86 && !isPowerPC)
+ {
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
+ std::vector<std::thread> workThreads(countConfigured);
+
+ for (auto &t : workThreads)
+ {
+ t = std::thread(spinUpCore);
+ }
+
+ for (auto &t : workThreads)
+ {
+ t.join();
+ }
+ }
+ #endif
+ }
+
- * \param fplog Log file pointer. This can be NULL, but the then routine
- * will not do anything.
++/*! \brief Sanity check hardware topology and print some notes to log
+ *
-hardwareTopologyDoubleCheckDetection(FILE gmx_unused *fplog,
- const gmx::HardwareTopology gmx_unused &hardwareTopology)
++ * \param mdlog Logger.
+ * \param hardwareTopology Reference to hardwareTopology object.
+ */
+ static void
- if (fplog == NULL ||
- hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
++hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused &mdlog,
++ const gmx::HardwareTopology gmx_unused &hardwareTopology)
+ {
+ #if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
- fprintf(fplog, "Note: %d CPUs configured, but only %d were detected to be online.\n", countConfigured, countFromDetection);
++ if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
+ {
+ return;
+ }
+
+ int countFromDetection = hardwareTopology.machine().logicalProcessorCount;
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
+
+ /* BIOS, kernel or user actions can take physical processors
+ * offline. We already cater for the some of the cases inside the hardwareToplogy
+ * by trying to spin up cores just before we detect, but there could be other
+ * cases where it is worthwhile to hint that there might be more resources available.
+ */
+ if (countConfigured >= 0 && countConfigured != countFromDetection)
+ {
- fprintf(fplog, " X86 Hyperthreading is likely disabled; enable it for better performance.\n");
++ GMX_LOG(mdlog.info).
++ appendTextFormatted("Note: %d CPUs configured, but only %d were detected to be online.\n", countConfigured, countFromDetection);
+
+ if (isX86 && countConfigured == 2*countFromDetection)
+ {
- fprintf(fplog, " PowerPC SMT is likely disabled; enable SMT2/SMT4 for better performance.\n");
++ GMX_LOG(mdlog.info).
++ appendText(" X86 Hyperthreading is likely disabled; enable it for better performance.");
+ }
+ // For PowerPC (likely Power8) it is possible to set SMT to either 2,4, or 8-way hardware threads.
+ // We only warn if it is completely disabled since default performance drops with SMT8.
+ if (isPowerPC && countConfigured == 8*countFromDetection)
+ {
-gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
++ GMX_LOG(mdlog.info).
++ appendText(" PowerPC SMT is likely disabled; enable SMT2/SMT4 for better performance.");
+ }
+ }
+ #endif
+ }
+
+
+gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog, const t_commrec *cr,
gmx_bool bDetectGPUs)
{
int ret;
// If we detected the topology on this system, double-check that it makes sense
if (hwinfo_g->hardwareTopology->isThisSystem())
{
- check_nthreads_hw_avail(mdlog, hwinfo_g->nthreads_hw_avail);
- hardwareTopologyDoubleCheckDetection(fplog, *(hwinfo_g->hardwareTopology));
++ hardwareTopologyDoubleCheckDetection(mdlog, *(hwinfo_g->hardwareTopology));
}
+ // TODO: Get rid of this altogether.
+ hwinfo_g->nthreads_hw_avail = hwinfo_g->hardwareTopology->machine().logicalProcessorCount;
+
/* detect GPUs */
hwinfo_g->gpu_info.n_dev = 0;
hwinfo_g->gpu_info.n_dev_compatible = 0;
namespace gmx
{
-
+ class HardwareTopology;
-
-} // namespace
+class MDLogger;
+}
/*! \brief Return whether mdrun can use more than one GPU per node
*
* example. */
gmx_bool gmx_gpu_sharing_supported();
- /* Construct the global hwinfo structure and return a pointer to
- it. Caller is responsible for freeing this pointer. */
+ /*! \brief Run detection, consistency checks, and make available on all ranks.
+ *
+ * This routine constructs the global hwinfo structure and returns a pointer to
+ * it. It will run a preamble before executing cpu and hardware checks, and
+ * then run consistency checks afterwards. The results will also be made
+ * available on all nodes.
+ * Caller is responsible for freeing this pointer.
+ */
-gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
- gmx_bool bDetectGPUs);
+gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog,
+ const t_commrec *cr, gmx_bool bDetectGPUs);
/* Print information about the detected hardware to fplog (if != NULL)
* and to stderr the master rank.
public:
- /*! \brief Detects the hardware topology.
- */
+ /*! \brief Detects the hardware topology. */
static HardwareTopology detect();
+ /*! \brief Creates a topology with given number of logical cores.
+ *
+ * The support level will be either None or LogicalProcessorCount.
+ *
+ * Intended for testing of code that uses the hardware topology.
+ */
+ explicit HardwareTopology(int logicalProcessorCount);
+
/*! \brief Check what topology information that is available and valid
*
* The amount of hardware topology information that can be detected depends
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/fatalerror.h"
+ #include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/logger.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/snprintf.h"
ir->nstcomm = nstglobalcomm;
}
- if (fplog)
- {
- fprintf(fplog, "Intra-simulation communication will occur every %d steps.\n", nstglobalcomm);
- }
++ GMX_LOG(mdlog.info).appendTextFormatted(
++ "Intra-simulation communication will occur every %d steps.\n", nstglobalcomm);
return nstglobalcomm;
}
namespace gmx
{
-
+class MDLogger;
+ class SimulationSignaller;
-
}
/* Define a number of flags to better control the information
#define CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS (1<<12)
- /* return the number of steps between global communcations */
- int check_nstglobalcomm(const gmx::MDLogger &mdlog, int nstglobalcomm, t_inputrec *ir);
+ /*! \brief Return the number of steps that will take place between
+ * intra-simulation communications, given the constraints of the
+ * inputrec and the value of mdrun -gcom. */
-int check_nstglobalcomm(FILE *fplog,
- t_commrec *cr,
- int nstglobalcomm,
- t_inputrec *ir);
-
-/* check whether an 'nst'-style parameter p is a multiple of nst, and
- set it to be one if not, with a warning. */
-void check_nst_param(FILE *fplog, t_commrec *cr,
- const char *desc_nst, int nst,
- const char *desc_p, int *p);
++int check_nstglobalcomm(const gmx::MDLogger &mdlog,
++ int nstglobalcomm,
++ t_inputrec *ir);
- /* check which of the multisim simulations has the shortest number of
- steps and return that number of nsteps */
- gmx_int64_t get_multisim_nsteps(const t_commrec *cr,
- gmx_int64_t nsteps);
+ /*! \brief Return true if the \p value is equal across the set of multi-simulations
+ *
+ * \todo This duplicates some of check_multi_int. Consolidate. */
+ bool multisim_int_all_are_equal(const gmx_multisim_t *ms,
+ gmx_int64_t value);
void rerun_parallel_comm(t_commrec *cr, t_trxframe *fr,
gmx_bool *bNotLastFrame);
{
/*! \brief Do conjugate gradients minimization
- \copydoc integrator_t (FILE *fplog, t_commrec *cr,
+ \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
- int nfile, const t_filenm fnm[],
- const gmx_output_env_t *oenv, gmx_bool bVerbose,
- int nstglobalcomm,
- gmx_vsite_t *vsite, gmx_constr_t constr,
- int stepout,
- t_inputrec *inputrec,
- gmx_mtop_t *top_global, t_fcdata *fcd,
- t_state *state_global,
- t_mdatoms *mdatoms,
- t_nrnb *nrnb, gmx_wallcycle_t wcycle,
- gmx_edsam_t ed,
- t_forcerec *fr,
- int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
- real cpt_period, real max_hours,
- int imdport,
- unsigned long Flags,
- gmx_walltime_accounting_t walltime_accounting)
+ int nfile, const t_filenm fnm[],
+ const gmx_output_env_t *oenv, gmx_bool bVerbose,
+ int nstglobalcomm,
+ gmx_vsite_t *vsite, gmx_constr_t constr,
+ int stepout,
+ t_inputrec *inputrec,
+ gmx_mtop_t *top_global, t_fcdata *fcd,
+ t_state *state_global,
+ t_mdatoms *mdatoms,
+ t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ gmx_edsam_t ed,
+ t_forcerec *fr,
+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ gmx_membed_t gmx_unused *membed,
+ real cpt_period, real max_hours,
+ int imdport,
+ unsigned long Flags,
+ gmx_walltime_accounting_t walltime_accounting)
*/
-double do_cg(FILE *fplog, t_commrec *cr,
+double do_cg(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
int nfile, const t_filenm fnm[],
const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
int gmx_unused nstglobalcomm,
step = ir->init_step;
step_rel = 0;
- if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
+ // TODO extract this to new multi-simulation module
+ if (MASTER(cr) && MULTISIM(cr) && (repl_ex_nst <= 0 ))
{
- /* check how many steps are left in other sims */
- multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
+ if (!multisim_int_all_are_equal(cr->ms, ir->nsteps))
+ {
- md_print_info(cr, fplog,
- "Note: The number of steps is not consistent across multi simulations,\n"
- "but we are proceeding anyway!\n");
++ GMX_LOG(mdlog.warning).appendText(
++ "Note: The number of steps is not consistent across multi simulations,\n"
++ "but we are proceeding anyway!");
+ }
+ if (!multisim_int_all_are_equal(cr->ms, ir->init_step))
+ {
- md_print_info(cr, fplog,
- "Note: The initial step is not consistent across multi simulations,\n"
- "but we are proceeding anyway!\n");
++ GMX_LOG(mdlog.warning).appendText(
++ "Note: The initial step is not consistent across multi simulations,\n"
++ "but we are proceeding anyway!");
+ }
}
-
/* and stop now if we should */
- bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
- ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
+ bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
while (!bLastStep)
{