+void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
+ const gmx_hw_opt_t *hw_opt,
+ gmx_bool bNTOptSet,
+ t_commrec *cr,
+ FILE *fplog)
+{
+#if defined GMX_OPENMP && defined GMX_MPI
+ int nth_omp_min, nth_omp_max, ngpu;
+ char buf[1000];
+#ifdef GMX_THREAD_MPI
+ const char *mpi_option = " (option -ntmpi)";
+#else
+ const char *mpi_option = "";
+#endif
+
+ /* This function should be called after thread-MPI (when configured) and
+ * OpenMP have been initialized. Check that here.
+ */
+#ifdef GMX_THREAD_MPI
+ assert(hw_opt->nthreads_tmpi >= 1);
+#endif
+ assert(gmx_omp_nthreads_get(emntDefault) >= 1);
+
+ nth_omp_min = gmx_omp_nthreads_get(emntDefault);
+ nth_omp_max = gmx_omp_nthreads_get(emntDefault);
+ ngpu = hw_opt->gpu_opt.n_dev_use;
+
+ /* Thread-MPI seems to have a bug with reduce on 1 node, so use a cond. */
+ if (cr->nnodes + cr->npmenodes > 1)
+ {
+ int count[3], count_max[3];
+
+ count[0] = -nth_omp_min;
+ count[1] = nth_omp_max;
+ count[2] = ngpu;
+
+ MPI_Allreduce(count, count_max, 3, MPI_INT, MPI_MAX, cr->mpi_comm_mysim);
+
+ /* In case of an inhomogeneous run setup we use the maximum counts */
+ nth_omp_min = -count_max[0];
+ nth_omp_max = count_max[1];
+ ngpu = count_max[2];
+ }
+
+ int nthreads_omp_mpi_ok_min;
+
+ if (ngpu == 0)
+ {
+ nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_cpu;
+ }
+ else
+ {
+ /* With GPUs we set the minimum number of OpenMP threads to 2 to catch
+ * cases where the user specifies #ranks == #cores.
+ */
+ nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_gpu;
+ }
+
+ if (DOMAINDECOMP(cr) && cr->nnodes > 1)
+ {
+ if (nth_omp_max < nthreads_omp_mpi_ok_min ||
+ (!(ngpu > 0 && !bGpuSharingSupported) &&
+ nth_omp_max > nthreads_omp_mpi_ok_max))
+ {
+ /* Note that we print target_max here, not ok_max */
+ sprintf(buf, "Your choice of number of MPI ranks and amount of resources results in using %d OpenMP threads per rank, which is most likely inefficient. The optimum is usually between %d and %d threads per rank.",
+ nth_omp_max,
+ nthreads_omp_mpi_ok_min,
+ nthreads_omp_mpi_target_max);
+
+ if (bNTOptSet)
+ {
+ md_print_warn(cr, fplog, "NOTE: %s\n", buf);
+ }
+ else
+ {
+ /* This fatal error, and the one below, is nasty, but it's
+ * probably the only way to ensure that all users don't waste
+ * a lot of resources, since many users don't read logs/stderr.
+ */
+ gmx_fatal(FARGS, "%s If you want to run with this setup, specify the -ntomp option. But we suggest to change the number of MPI ranks%s.", buf, mpi_option);
+ }
+ }
+ }
+ else
+ {
+ /* No domain decomposition (or only one domain) */
+ if (!(ngpu > 0 && !bGpuSharingSupported) &&
+ nth_omp_max > nthreads_omp_always_faster(hwinfo->cpuid_info, ngpu > 0))
+ {
+ /* To arrive here, the user/system set #ranks and/or #OMPthreads */
+ gmx_bool bEnvSet;
+ char buf2[256];
+
+ bEnvSet = (getenv("OMP_NUM_THREADS") != NULL);
+
+ if (bNTOptSet || bEnvSet)
+ {
+ sprintf(buf2, "You requested %d OpenMP threads", nth_omp_max);
+ }
+ else
+ {
+ sprintf(buf2, "Your choice of %d MPI rank%s and the use of %d total threads %sleads to the use of %d OpenMP threads",
+ cr->nnodes + cr->npmenodes,
+ cr->nnodes + cr->npmenodes == 1 ? "" : "s",
+ hw_opt->nthreads_tot > 0 ? hw_opt->nthreads_tot : hwinfo->nthreads_hw_avail,
+ hwinfo->nphysicalnode > 1 ? "on a node " : "",
+ nth_omp_max);
+ }
+ sprintf(buf, "%s, whereas we expect the optimum to be with more MPI ranks with %d to %d OpenMP threads.",
+ buf2, nthreads_omp_mpi_ok_min, nthreads_omp_mpi_target_max);
+
+ /* We can not quit with a fatal error when OMP_NUM_THREADS is set
+ * with different values per rank or node, since in that case
+ * the user can not set -ntomp to override the error.
+ */
+ if (bNTOptSet || (bEnvSet && nth_omp_min != nth_omp_max))
+ {
+ md_print_warn(cr, fplog, "NOTE: %s\n", buf);
+ }
+ else
+ {
+ gmx_fatal(FARGS, "%s If you want to run with this many OpenMP threads, specify the -ntomp option. But we suggest to increase the number of MPI ranks%s.", buf, mpi_option);
+ }
+ }
+ }
+#else /* GMX_OPENMP && GMX_MPI */
+ /* No OpenMP and/or MPI: it doesn't make much sense to check */
+ GMX_UNUSED_VALUE(hw_opt);
+ GMX_UNUSED_VALUE(bNTOptSet);
+ /* Check if we have more than 1 physical core, if detected,
+ * or more than 1 hardware thread if physical cores were not detected.
+ */
+ if ((hwinfo->ncore > 1) ||
+ (hwinfo->ncore == 0 && hwinfo->nthreads_hw_avail > 1))
+ {
+ md_print_warn(cr, fplog, "NOTE: GROMACS was compiled without OpenMP and (thread-)MPI support, can only use a single CPU core\n");
+ }
+#endif /* GMX_OPENMP && GMX_MPI */
+}
+
+