Merge remote-tracking branch 'origin/release-5-1' into master

author Szilárd Páll <pall.szilard@gmail.com>

Wed, 20 Jan 2016 20:35:28 +0000 (21:35 +0100)

committer Szilárd Páll <pall.szilard@gmail.com>

Wed, 20 Jan 2016 20:35:28 +0000 (21:35 +0100)
author Szilárd Páll <pall.szilard@gmail.com>
Wed, 20 Jan 2016 20:35:28 +0000 (21:35 +0100)
committer Szilárd Páll <pall.szilard@gmail.com>
Wed, 20 Jan 2016 20:35:28 +0000 (21:35 +0100)
diff --git a/cmake/gmxManageGPU.cmake b/cmake/gmxManageGPU.cmake

index 0e749f9911f9642154616200d9bbaacb52507959..eb889d9e471870185c8c2412c231cd08de596366 100644 (file)
--- a/cmake/gmxManageGPU.cmake
+++ b/cmake/gmxManageGPU.cmake
@@ -158,11 +158,17 @@ if (GMX_GPU)
          set(NVML_FIND_QUIETLY TRUE)
      endif()
      find_package(NVML)
-    if(NVML_FOUND)
-        include_directories(SYSTEM ${NVML_INCLUDE_DIR})
-        set(HAVE_NVML 1)
-        list(APPEND GMX_EXTRA_LIBRARIES ${NVML_LIBRARY})
-    endif(NVML_FOUND)
+    option(GMX_USE_NVML "Use NVML support for better CUDA performance" ${HAVE_NVML})
+    mark_as_advanced(GMX_USE_NVML)
+    if(GMX_USE_NVML)
+        if(NVML_FOUND)
+            include_directories(SYSTEM ${NVML_INCLUDE_DIR})
+            set(HAVE_NVML 1)
+            list(APPEND GMX_EXTRA_LIBRARIES ${NVML_LIBRARY})
+        else()
+            message(FATAL_ERROR "NVML support was required, but was not detected. Please consult the install guide.")
+        endif()
+    endif()
  endif()
  
  # Annoyingly enough, FindCUDA leaves a few variables behind as non-advanced.
diff --git a/docs/install-guide/index.rst b/docs/install-guide/index.rst

index 5a814fd1813744c8eaa863712dbf5ebefd050225..2b30d2613e656cf8a70f61c3184c7b3f8e2c90b1 100644 (file)
--- a/docs/install-guide/index.rst
+++ b/docs/install-guide/index.rst
@@ -579,7 +579,9 @@ If you have the CUDA_ Toolkit installed, you can use ``cmake`` with:
  
  (or whichever path has your installation). In some cases, you might
  need to specify manually which of your C++ compilers should be used,
-e.g. with the advanced option ``CUDA_HOST_COMPILER``. To make it
+e.g. with the advanced option ``CUDA_HOST_COMPILER``.
+
+To make it
  possible to get best performance from NVIDIA Tesla and Quadro GPUs,
  you should install the `GPU Deployment Kit
  <https://developer.nvidia.com/gpu-deployment-kit>`_ and configure
@@ -595,6 +597,8 @@ permissions, clocks cannot be changed, and in that case informative
  log file messages will be produced. Background details can be found at
  this `NVIDIA blog post
  <http://devblogs.nvidia.com/parallelforall/increase-performance-gpu-boost-k80-autoboost/>`_.
+NVML support is only available if detected, and may be disabled by
+turning off the ``GMX_USE_NVML`` CMake advanced option.
  
  By default, optimized code will be generated for CUDA architectures
  supported by the nvcc compiler (and the |Gromacs| build system). 
diff --git a/src/gromacs/ewald/pme-load-balancing.cpp b/src/gromacs/ewald/pme-load-balancing.cpp

index 7bad8e244ec3a04b0233d766efa3961bb13c86b4..14be36b73a570e4c57bec4f1948b00824af45eb5 100644 (file)
--- a/src/gromacs/ewald/pme-load-balancing.cpp
+++ b/src/gromacs/ewald/pme-load-balancing.cpp
@@ -459,6 +459,9 @@ static void print_loadbal_limited(FILE *fp_err, FILE *fp_log,
   * In this stage, only reasonably fast setups are run again. */
  static void switch_to_stage1(pme_load_balancing_t *pme_lb)
  {
+    /* Increase start until we find a setup that is not slower than
+     * maxRelativeSlowdownAccepted times the fastest setup.
+     */
      pme_lb->start = pme_lb->lower_limit;
      while (pme_lb->start + 1 < pme_lb->n &&
             (pme_lb->setup[pme_lb->start].count == 0 ||
@@ -467,11 +470,18 @@ static void switch_to_stage1(pme_load_balancing_t *pme_lb)
      {
          pme_lb->start++;
      }
-    while (pme_lb->start > 0 && pme_lb->setup[pme_lb->start - 1].cycles == 0)
+    /* While increasing start, we might have skipped setups that we did not
+     * time during stage 0. We want to extend the range for stage 1 to include
+     * any skipped setups that lie between setups that were measured to be
+     * acceptably fast and too slow.
+     */
+    while (pme_lb->start > pme_lb->lower_limit &&
+           pme_lb->setup[pme_lb->start - 1].count == 0)
      {
          pme_lb->start--;
      }
  
+    /* Decrease end only with setups that we timed and that are slow. */
      pme_lb->end = pme_lb->n;
      if (pme_lb->setup[pme_lb->end - 1].count > 0 &&
          pme_lb->setup[pme_lb->end - 1].cycles >
@@ -483,7 +493,7 @@ static void switch_to_stage1(pme_load_balancing_t *pme_lb)
      pme_lb->stage = 1;
  
      /* Next we want to choose setup pme_lb->end-1, but as we will decrease
-     * pme_ln->cur by one right after returning, we set cur to end.
+     * pme_lb->cur by one right after returning, we set cur to end.
       */
      pme_lb->cur = pme_lb->end;
  }
@@ -691,8 +701,11 @@ pme_load_balance(pme_load_balancing_t      *pme_lb,
           */
          do
          {
-            pme_lb->cur--;
-            if (pme_lb->cur == pme_lb->start)
+            if (pme_lb->cur > pme_lb->start)
+            {
+                pme_lb->cur--;
+            }
+            else
              {
                  pme_lb->stage++;
  
diff --git a/src/gromacs/gmxana/gmx_tune_pme.cpp b/src/gromacs/gmxana/gmx_tune_pme.cpp

index c8143b159feb2d745414c4dfbd2d6e6705d9cad9..eaf69856ba4e3ba12cbd088073ed81f8b2556ba0 100644 (file)
--- a/src/gromacs/gmxana/gmx_tune_pme.cpp
+++ b/src/gromacs/gmxana/gmx_tune_pme.cpp
@@ -1415,6 +1415,8 @@ static void make_sure_it_runs(char *mdrun_cmd_line, int length, FILE *fp,
      remove_if_exists(opt2fn("-be", nfile, fnm));
      remove_if_exists(opt2fn("-bcpo", nfile, fnm));
      remove_if_exists(opt2fn("-bg", nfile, fnm));
+    remove_if_exists(opt2fn("-bo", nfile, fnm));
+    remove_if_exists(opt2fn("-bx", nfile, fnm));
  
      sfree(command);
      sfree(msg    );
@@ -1940,7 +1942,8 @@ static void create_command_line_snippets(
          t_filenm  fnm[],
          char     *cmd_args_bench[],  /* command line arguments for benchmark runs */
          char     *cmd_args_launch[], /* command line arguments for simulation run */
-        char      extra_args[])      /* Add this to the end of the command line */
+        char      extra_args[],      /* Add this to the end of the command line */
+        char     *deffnm)            /* Default file names, or NULL if not set */
  {
      int         i;
      char       *opt;
@@ -1965,6 +1968,11 @@ static void create_command_line_snippets(
          add_to_string(cmd_args_bench, strbuf);
      }
      /* These switches take effect only at launch time */
+    if (deffnm)
+    {
+        sprintf(strbuf, "-deffnm %s ", deffnm);
+        add_to_string(cmd_args_launch, strbuf);
+    }
      if (FALSE == bAppendFiles)
      {
          add_to_string(cmd_args_launch, "-noappend ");
@@ -2201,6 +2209,7 @@ int gmx_tune_pme(int argc, char *argv[])
      char           *ExtraArgs      = NULL;
      char          **tpr_names      = NULL;
      const char     *simulation_tpr = NULL;
+    char           *deffnm         = NULL;
      int             best_npme, best_tpr;
      int             sim_part = 1; /* For benchmarks with checkpoint files */
      char            bbuf[STRLEN];
@@ -2354,6 +2363,8 @@ int gmx_tune_pme(int argc, char *argv[])
            "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
          { "-cpnum",    FALSE, etBOOL, {&bKeepAndNumCPT},
            "Keep and number checkpoint files (launch only)" },
+        { "-deffnm",   FALSE, etSTR,  {&deffnm},
+          "Set the default filenames (launch only)" },
          { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
            "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
      };
@@ -2426,7 +2437,7 @@ int gmx_tune_pme(int argc, char *argv[])
      cmd_np = bbuf;
  
      create_command_line_snippets(bAppendFiles, bKeepAndNumCPT, bResetCountersHalfWay, presteps,
-                                 NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs);
+                                 NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs, deffnm);
  
      /* Prepare to use checkpoint file if requested */
      sim_part = 1;
diff --git a/src/programs/mdrun/md.cpp b/src/programs/mdrun/md.cpp

index a9312ff7cf6730ac288fc6154b200242a3fc197f..5944f106c27d0e7bebe234966c03b6b01d83bc3e 100644 (file)
--- a/src/programs/mdrun/md.cpp
+++ b/src/programs/mdrun/md.cpp
@@ -1321,7 +1321,7 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
  
          /* #########   START SECOND UPDATE STEP ################# */
  
-        /* at the start of step, randomize the velocities (if vv. Restriction of Andersen controlled
+        /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
             in preprocessing */
  
          if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
author	Szilárd Páll <pall.szilard@gmail.com>
	Wed, 20 Jan 2016 20:35:28 +0000 (21:35 +0100)
committer	Szilárd Páll <pall.szilard@gmail.com>
	Wed, 20 Jan 2016 20:35:28 +0000 (21:35 +0100)
cmake/gmxManageGPU.cmake		patch \| blob \| history
docs/install-guide/index.rst		patch \| blob \| history
src/gromacs/ewald/pme-load-balancing.cpp		patch \| blob \| history
src/gromacs/gmxana/gmx_tune_pme.cpp		patch \| blob \| history
src/programs/mdrun/md.cpp		patch \| blob \| history