From 4ba9b1e42a72e1ad81d41e089eaa8ce843b6c86c Mon Sep 17 00:00:00 2001
From: Kevin Boyd <kevin.boyd@uconn.edu>
Date: Mon, 3 Sep 2018 14:55:47 -0400
Subject: [PATCH] Reduce preprocessor dependency in resourcedivision.cpp

Also added some constexpr

Also broke up some long text blocks

Change-Id: I28a8207008d65fe39c10429542f44373ba0bbafd
---
 .../taskassignment/resourcedivision.cpp       | 165 ++++++++++--------
 1 file changed, 91 insertions(+), 74 deletions(-)

diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp
index 79fb5c0945..ae9c22026d 100644
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -81,19 +81,16 @@
  * and after a switch point doesn't change too much.
  */
 
-//! Constant used to help minimize preprocessed code
-static const bool bHasOmpSupport = GMX_OPENMP;
-
 /*! \brief The minimum number of atoms per thread-MPI thread when GPUs
  * are present. With fewer atoms than this, the number of thread-MPI
  * ranks will get lowered.
  */
-static const int min_atoms_per_mpi_thread =  90;
+static constexpr int min_atoms_per_mpi_thread =  90;
 /*! \brief The minimum number of atoms per GPU with thread-MPI
  * active. With fewer atoms than this, the number of thread-MPI ranks
  * will get lowered.
  */
-static const int min_atoms_per_gpu        = 900;
+static constexpr int min_atoms_per_gpu        = 900;
 
 /**@{*/
 /*! \brief Constants for implementing default divisions of threads */
@@ -114,17 +111,17 @@ static const int min_atoms_per_gpu        = 900;
  * Sandy/Ivy Bridge, Has/Broadwell. By checking for AVX instead of
  * model numbers we ensure also future Intel CPUs are covered.
  */
-const int nthreads_omp_faster_default   =  8;
-const int nthreads_omp_faster_Nehalem   = 12;
-const int nthreads_omp_faster_Intel_AVX = 16;
-const int nthreads_omp_faster_AMD_Ryzen = 16;
+constexpr int nthreads_omp_faster_default   =  8;
+constexpr int nthreads_omp_faster_Nehalem   = 12;
+constexpr int nthreads_omp_faster_Intel_AVX = 16;
+constexpr int nthreads_omp_faster_AMD_Ryzen = 16;
 /* For CPU only runs the fastest options are usually MPI or OpenMP only.
  * With one GPU, using MPI only is almost never optimal, so we need to
  * compare running pure OpenMP with combined MPI+OpenMP. This means higher
  * OpenMP threads counts can still be ok. Multiplying the numbers above
  * by a factor of 2 seems to be a good estimate.
  */
-const int nthreads_omp_faster_gpu_fac   =  2;
+constexpr int nthreads_omp_faster_gpu_fac   =  2;
 
 /* This is the case with MPI (2 or more MPI PP ranks).
  * By default we will terminate with a fatal error when more than 8
@@ -134,12 +131,10 @@ const int nthreads_omp_faster_gpu_fac   =  2;
  * we first try 6 OpenMP threads and then less until the number of MPI ranks
  * is divisible by the number of GPUs.
  */
-#if GMX_OPENMP && GMX_MPI
-const int nthreads_omp_mpi_ok_max              =  8;
-const int nthreads_omp_mpi_ok_min_cpu          =  1;
-#endif
-const int nthreads_omp_mpi_ok_min_gpu          =  2;
-const int nthreads_omp_mpi_target_max          =  6;
+constexpr int nthreads_omp_mpi_ok_max              =  8;
+constexpr int nthreads_omp_mpi_ok_min_cpu          =  1;
+constexpr int nthreads_omp_mpi_ok_min_gpu          =  2;
+constexpr int nthreads_omp_mpi_target_max          =  6;
 
 /**@}*/
 
@@ -185,13 +180,11 @@ gmx_unused static int nthreads_omp_efficient_max(int gmx_unused       nrank,
                                                  const gmx::CpuInfo  &cpuInfo,
                                                  gmx_bool             bUseGPU)
 {
-#if GMX_OPENMP && GMX_MPI
-    if (nrank > 1)
+    if (GMX_OPENMP && GMX_MPI && (nrank > 1))
     {
         return nthreads_omp_mpi_ok_max;
     }
     else
-#endif
     {
         return nthreads_omp_faster(cpuInfo, bUseGPU);
     }
@@ -222,7 +215,9 @@ gmx_unused static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
             /* In this case it is unclear if we should use 1 rank per GPU
              * or more or less, so we require also setting the number of ranks.
              */
-            gmx_fatal(FARGS, "When using GPUs, setting the number of OpenMP threads without specifying the number of ranks can lead to conflicting demands. Please specify the number of thread-MPI ranks as well (option -ntmpi).");
+            gmx_fatal(FARGS, "When using GPUs, setting the number of OpenMP threads without specifying the number "
+                      "of ranks can lead to conflicting demands. Please specify the number of thread-MPI ranks "
+                      "as well (option -ntmpi).");
         }
 
         nrank = ngpu;
@@ -233,7 +228,8 @@ gmx_unused static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
          * If the user does not set the number of OpenMP threads, nthreads_omp==0 and
          * this code has no effect.
          */
-        GMX_RELEASE_ASSERT(hw_opt.nthreads_omp >= 0, "nthreads_omp is negative, but previous checks should have prevented this");
+        GMX_RELEASE_ASSERT(hw_opt.nthreads_omp >= 0, "nthreads_omp is negative, but previous checks should "
+                           "have prevented this");
         while (nrank*hw_opt.nthreads_omp > hwinfo->nthreads_hw_avail && nrank > 1)
         {
             nrank--;
@@ -387,7 +383,8 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
             std::string message = checker.getMessage();
             if (hw_opt->nthreads_tmpi > 1)
             {
-                gmx_fatal(FARGS, "%s However, you asked for more than 1 thread-MPI rank, so mdrun cannot continue. Choose a single rank, or a different algorithm.", message.c_str());
+                gmx_fatal(FARGS, "%s However, you asked for more than 1 thread-MPI rank, so mdrun cannot continue. "
+                          "Choose a single rank, or a different algorithm.", message.c_str());
             }
             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted("%s Choosing to use only a single thread-MPI rank.", message.c_str());
             return 1;
@@ -406,7 +403,9 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
     if (nthreads_hw <= 0)
     {
         /* This should normally not happen, but if it does, we handle it */
-        gmx_fatal(FARGS, "The number of available hardware threads can not be detected, please specify the number of MPI ranks and the number of OpenMP threads (if supported) manually with options -ntmpi and -ntomp, respectively");
+        gmx_fatal(FARGS, "The number of available hardware threads can not be detected, please specify the number of "
+                  "MPI ranks and the number of OpenMP threads (if supported) manually with options "
+                  "-ntmpi and -ntomp, respectively");
     }
 
     /* How many total (#tMPI*#OpenMP) threads can we start? */
@@ -511,7 +510,7 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
         /* We reduced the number of tMPI ranks, which means we might violate
          * our own efficiency checks if we simply use all hardware threads.
          */
-        if (bHasOmpSupport && hw_opt->nthreads_omp <= 0 && hw_opt->nthreads_tot <= 0)
+        if (GMX_OPENMP && hw_opt->nthreads_omp <= 0 && hw_opt->nthreads_tot <= 0)
         {
             /* The user set neither the total nor the OpenMP thread count,
              * we should use all hardware threads, unless we will violate
@@ -544,22 +543,21 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
                                         t_commrec           *cr,
                                         const gmx::MDLogger &mdlog)
 {
-    GMX_UNUSED_VALUE(hwinfo);
 #if GMX_OPENMP && GMX_MPI
+    GMX_UNUSED_VALUE(hwinfo);
+
     int         nth_omp_min, nth_omp_max;
     char        buf[1000];
-#if GMX_THREAD_MPI
-    const char *mpi_option = " (option -ntmpi)";
-#else
-    const char *mpi_option = "";
-#endif
+    const char *mpi_option = GMX_THREAD_MPI ?  " (option -ntmpi)" : "";
 
     /* This function should be called after thread-MPI (when configured) and
      * OpenMP have been initialized. Check that here.
      */
-#if GMX_THREAD_MPI
-    GMX_RELEASE_ASSERT(nthreads_omp_faster_default >= nthreads_omp_mpi_ok_max, "Inconsistent OpenMP thread count default values");
-#endif
+    if (GMX_THREAD_MPI)
+    {
+        GMX_RELEASE_ASSERT(nthreads_omp_faster_default >= nthreads_omp_mpi_ok_max,
+                           "Inconsistent OpenMP thread count default values");
+    }
     GMX_RELEASE_ASSERT(gmx_omp_nthreads_get(emntDefault) >= 1, "Must have at least one OpenMP thread");
 
     nth_omp_min = gmx_omp_nthreads_get(emntDefault);
@@ -572,14 +570,14 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
         int count[3], count_max[3];
 
         count[0] = -nth_omp_min;
-        count[1] =  nth_omp_max;
-        count[2] =  int(willUsePhysicalGpu);
+        count[1] = nth_omp_max;
+        count[2] = int(willUsePhysicalGpu);
 
         MPI_Allreduce(count, count_max, 3, MPI_INT, MPI_MAX, cr->mpi_comm_mysim);
 
         /* In case of an inhomogeneous run setup we use the maximum counts */
         nth_omp_min        = -count_max[0];
-        nth_omp_max        =  count_max[1];
+        nth_omp_max        = count_max[1];
         anyRankIsUsingGpus = count_max[2] > 0;
     }
 
@@ -603,7 +601,9 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
             nth_omp_max > nthreads_omp_mpi_ok_max)
         {
             /* Note that we print target_max here, not ok_max */
-            sprintf(buf, "Your choice of number of MPI ranks and amount of resources results in using %d OpenMP threads per rank, which is most likely inefficient. The optimum is usually between %d and %d threads per rank.",
+            sprintf(buf, "Your choice of number of MPI ranks and amount of resources results in using %d OpenMP "
+                    "threads per rank, which is most likely inefficient. The optimum is usually between %d and"
+                    " %d threads per rank.",
                     nth_omp_max,
                     nthreads_omp_mpi_ok_min,
                     nthreads_omp_mpi_target_max);
@@ -618,23 +618,27 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
                  * probably the only way to ensure that all users don't waste
                  * a lot of resources, since many users don't read logs/stderr.
                  */
-                gmx_fatal(FARGS, "%s If you want to run with this setup, specify the -ntomp option. But we suggest to change the number of MPI ranks%s.", buf, mpi_option);
+                gmx_fatal(FARGS, "%s If you want to run with this setup, specify the -ntomp option. But we suggest to "
+                          "change the number of MPI ranks%s.",
+                          buf, mpi_option);
             }
         }
     }
-#else /* GMX_OPENMP && GMX_MPI */
-      /* No OpenMP and/or MPI: it doesn't make much sense to check */
+#else // !GMX_OPENMP || ! GMX_MPI
     GMX_UNUSED_VALUE(bNtOmpOptionSet);
     GMX_UNUSED_VALUE(willUsePhysicalGpu);
     GMX_UNUSED_VALUE(cr);
+    GMX_UNUSED_VALUE(nthreads_omp_mpi_ok_max);
+    GMX_UNUSED_VALUE(nthreads_omp_mpi_ok_min_cpu);
     /* Check if we have more than 1 physical core, if detected,
      * or more than 1 hardware thread if physical cores were not detected.
      */
     if (!GMX_OPENMP && !GMX_MPI && hwinfo->hardwareTopology->numberOfCores() > 1)
     {
-        GMX_LOG(mdlog.warning).asParagraph().appendText("NOTE: GROMACS was compiled without OpenMP and (thread-)MPI support, can only use a single CPU core");
+        GMX_LOG(mdlog.warning).asParagraph().appendText(
+                "NOTE: GROMACS was compiled without OpenMP and (thread-)MPI support, can only use a single CPU core");
     }
-#endif /* GMX_OPENMP && GMX_MPI */
+#endif // end GMX_OPENMP && GMX_MPI
 }
 
 
@@ -660,7 +664,8 @@ void check_and_update_hw_opt_1(const gmx::MDLogger &mdlog,
      */
     if (hw_opt->nthreads_omp < 0)
     {
-        gmx_fatal(FARGS, "The number of OpenMP threads supplied on the command line is %d, which is negative and not allowed", hw_opt->nthreads_omp);
+        gmx_fatal(FARGS, "The number of OpenMP threads supplied on the command line is %d, which is negative "
+                  "and not allowed", hw_opt->nthreads_omp);
     }
 
     /* Check for OpenMP settings stored in environment variables, which can
@@ -671,17 +676,20 @@ void check_and_update_hw_opt_1(const gmx::MDLogger &mdlog,
     /* Check restrictions on the user supplied options before modifying them.
      * TODO: Put the user values in a const struct and preserve them.
      */
-#if !GMX_THREAD_MPI
-    if (hw_opt->nthreads_tot > 0)
-    {
-        gmx_fatal(FARGS, "Setting the total number of threads is only supported with thread-MPI and GROMACS was compiled without thread-MPI");
-    }
-    if (hw_opt->nthreads_tmpi > 0)
+    if (!GMX_THREAD_MPI)
     {
-        gmx_fatal(FARGS, "Setting the number of thread-MPI ranks is only supported with thread-MPI and GROMACS was compiled without thread-MPI");
-    }
-#endif
 
+        if (hw_opt->nthreads_tot > 0)
+        {
+            gmx_fatal(FARGS, "Setting the total number of threads is only supported with thread-MPI and GROMACS was "
+                      "compiled without thread-MPI");
+        }
+        if (hw_opt->nthreads_tmpi > 0)
+        {
+            gmx_fatal(FARGS, "Setting the number of thread-MPI ranks is only supported with thread-MPI and GROMACS was "
+                      "compiled without thread-MPI");
+        }
+    }
     /* With thread-MPI the master thread sets hw_opt->totNumThreadsIsAuto.
      * The other threads receive a partially processed hw_opt from the master
      * thread and should not set hw_opt->totNumThreadsIsAuto again.
@@ -692,7 +700,7 @@ void check_and_update_hw_opt_1(const gmx::MDLogger &mdlog,
         hw_opt->totNumThreadsIsAuto = (hw_opt->nthreads_omp == 0 && hw_opt->nthreads_omp_pme == 0 && hw_opt->nthreads_tot == 0);
     }
 
-    if (bHasOmpSupport)
+    if (GMX_OPENMP)
     {
         /* Check restrictions on PME thread related options set by the user */
 
@@ -709,7 +717,8 @@ void check_and_update_hw_opt_1(const gmx::MDLogger &mdlog,
              * but since the thread count can differ per rank,
              * we can't easily avoid this.
              */
-            gmx_fatal(FARGS, "You need to explicitly specify the number of PME ranks (-npme) when using different number of OpenMP threads for PP and PME ranks");
+            gmx_fatal(FARGS, "You need to explicitly specify the number of PME ranks (-npme) when using "
+                      "different numbers of OpenMP threads for PP and PME ranks");
         }
     }
     else
@@ -733,21 +742,24 @@ void check_and_update_hw_opt_1(const gmx::MDLogger &mdlog,
             hw_opt->nthreads_omp > 0 &&
             hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
         {
-            gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI ranks (%d) times the OpenMP threads (%d) requested",
+            gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI ranks (%d) "
+                      "times the OpenMP threads (%d) requested",
                       hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp);
         }
 
         if (hw_opt->nthreads_tmpi > 0 &&
             hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
         {
-            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI ranks requested (%d)",
+            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI "
+                      "ranks requested (%d)",
                       hw_opt->nthreads_tot, hw_opt->nthreads_tmpi);
         }
 
         if (hw_opt->nthreads_omp > 0 &&
             hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
         {
-            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
+            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of OpenMP "
+                      "threads requested (%d)",
                       hw_opt->nthreads_tot, hw_opt->nthreads_omp);
         }
     }
@@ -756,13 +768,15 @@ void check_and_update_hw_opt_1(const gmx::MDLogger &mdlog,
     {
         if (hw_opt->nthreads_omp > hw_opt->nthreads_tot)
         {
-            gmx_fatal(FARGS, "You requested %d OpenMP threads with %d total threads. Choose a total number of threads that is a multiple of the number of OpenMP threads.",
+            gmx_fatal(FARGS, "You requested %d OpenMP threads with %d total threads. Choose a total number of threads "
+                      "that is a multiple of the number of OpenMP threads.",
                       hw_opt->nthreads_omp, hw_opt->nthreads_tot);
         }
 
         if (hw_opt->nthreads_tmpi > hw_opt->nthreads_tot)
         {
-            gmx_fatal(FARGS, "You requested %d thread-MPI ranks with %d total threads. Choose a total number of threads that is a multiple of the number of thread-MPI ranks.",
+            gmx_fatal(FARGS, "You requested %d thread-MPI ranks with %d total threads. Choose a total number of "
+                      "threads that is a multiple of the number of thread-MPI ranks.",
                       hw_opt->nthreads_tmpi, hw_opt->nthreads_tot);
         }
     }
@@ -786,7 +800,8 @@ void check_and_update_hw_opt_2(gmx_hw_opt_t *hw_opt,
         /* We only have OpenMP support for PME only nodes */
         if (hw_opt->nthreads_omp > 1)
         {
-            gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
+            gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported "
+                      "with cut-off scheme %s",
                       ecutscheme_names[cutoff_scheme],
                       ecutscheme_names[ecutsVERLET]);
         }
@@ -802,23 +817,25 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
                                              PmeRunMode            pmeRunMode,
                                              const gmx_mtop_t     &mtop)
 {
-#if GMX_THREAD_MPI
-    GMX_RELEASE_ASSERT(hw_opt->nthreads_tmpi >= 1, "Must have at least one thread-MPI rank");
-
-    /* If the user set the total number of threads on the command line
-     * and did not specify the number of OpenMP threads, set the latter here.
-     */
-    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
+    if (GMX_THREAD_MPI)
     {
-        hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 
-        if (!bHasOmpSupport && hw_opt->nthreads_omp > 1)
+        GMX_RELEASE_ASSERT(hw_opt->nthreads_tmpi >= 1, "Must have at least one thread-MPI rank");
+
+        /* If the user set the total number of threads on the command line
+         * and did not specify the number of OpenMP threads, set the latter here.
+         */
+        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
         {
-            gmx_fatal(FARGS, "You (indirectly) asked for OpenMP threads by setting -nt > -ntmpi, but GROMACS was compiled without OpenMP support");
+            hw_opt->nthreads_omp = hw_opt->nthreads_tot / hw_opt->nthreads_tmpi;
+
+            if (!GMX_OPENMP && hw_opt->nthreads_omp > 1)
+            {
+                gmx_fatal(FARGS, "You (indirectly) asked for OpenMP threads by setting -nt > -ntmpi, but GROMACS was "
+                          "compiled without OpenMP support");
+            }
         }
     }
-#endif
-
     /* With both non-bonded and PME on GPU, the work left on the CPU is often
      * (much) slower with SMT than without SMT. This is mostly the case with
      * few atoms per core. Thus, if the number of threads is set to auto,
@@ -841,7 +858,7 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
      * We currently only limit SMT for simulations using a single rank.
      * TODO: Consider limiting also for multi-rank simulations.
      */
-    bool canChooseNumOpenmpThreads      = (bHasOmpSupport && hw_opt->nthreads_omp <= 0);
+    bool canChooseNumOpenmpThreads      = (GMX_OPENMP && hw_opt->nthreads_omp <= 0);
     bool haveSmtSupport                 = (hwinfo.hardwareTopology->supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic &&
                                            hwinfo.hardwareTopology->machine().logicalProcessorCount > hwinfo.hardwareTopology->numberOfCores());
     bool simRunsSingleRankNBAndPmeOnGpu = (cr->nnodes == 1 && pmeRunMode == PmeRunMode::GPU);
@@ -863,7 +880,7 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
         }
     }
 
-    GMX_RELEASE_ASSERT(bHasOmpSupport || hw_opt->nthreads_omp == 1, "Without OpenMP support, only one thread per rank can be used");
+    GMX_RELEASE_ASSERT(GMX_OPENMP || hw_opt->nthreads_omp == 1, "Without OpenMP support, only one thread per rank can be used");
 
     /* We are done with updating nthreads_omp, we can set nthreads_omp_pme */
     if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
-- 
2.22.0