Fix mdrun -nb auto -rerun with GPU and energy groups

author Mark Abraham <mark.j.abraham@gmail.com>

Tue, 15 Sep 2015 10:01:41 +0000 (12:01 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Sat, 3 Oct 2015 08:32:39 +0000 (10:32 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Tue, 15 Sep 2015 10:01:41 +0000 (12:01 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Sat, 3 Oct 2015 08:32:39 +0000 (10:32 +0200)
diff --git a/src/gromacs/legacyheaders/force.h b/src/gromacs/legacyheaders/force.h

index fa5c22cff41ea6a0a92d868b5983e5db28a368cf..260cdc92999532942cfe310a0548a777266b4285 100644 (file)
--- a/src/gromacs/legacyheaders/force.h
+++ b/src/gromacs/legacyheaders/force.h
@@ -136,12 +136,20 @@ gmx_bool can_use_allvsall(const t_inputrec *ir,
   */
  
  
-gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
-                                      const t_commrec  *cr,
-                                      const t_inputrec *ir,
-                                      gmx_bool          bGPU);
-/* Return if GPU/CPU-SIMD acceleration is supported with the given inputrec
- * with bGPU TRUE/FALSE.
+gmx_bool nbnxn_gpu_acceleration_supported(FILE             *fplog,
+                                          const t_commrec  *cr,
+                                          const t_inputrec *ir,
+                                          gmx_bool          bRerunMD);
+/* Return if GPU acceleration is supported with the given settings.
+ *
+ * If the return value is FALSE and fplog/cr != NULL, prints a fallback
+ * message to fplog/stderr.
+ */
+
+gmx_bool nbnxn_simd_supported(FILE             *fplog,
+                              const t_commrec  *cr,
+                              const t_inputrec *ir);
+/* Return if CPU SIMD support exists for the given inputrec
   * If the return value is FALSE and fplog/cr != NULL, prints a fallback
   * message to fplog/stderr.
   */
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index 91f5c5f0893e2bc917be75426565b569b69a5ee0..79884576ff5f6c4a50851359e4427fdc2ff67298 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -1540,16 +1540,47 @@ gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *
  }
  
  
-gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
-                                      const t_commrec  *cr,
-                                      const t_inputrec *ir,
-                                      gmx_bool          bGPU)
+gmx_bool nbnxn_gpu_acceleration_supported(FILE             *fplog,
+                                          const t_commrec  *cr,
+                                          const t_inputrec *ir,
+                                          gmx_bool          bRerunMD)
  {
-    if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
+    if (bRerunMD && ir->opts.ngener > 1)
+    {
+        /* Rerun execution time is dominated by I/O and pair search,
+         * so GPUs are not very useful, plus they do not support more
+         * than one energy group. If the user requested GPUs
+         * explicitly, a fatal error is given later.  With non-reruns,
+         * we fall back to a single whole-of system energy group
+         * (which runs much faster than a multiple-energy-groups
+         * implementation would), and issue a note in the .log
+         * file. Users can re-run if they want the information. */
+        md_print_warn(cr, fplog, "Rerun with energy groups is not implemented for GPUs, falling back to the CPU\n");
+        return FALSE;
+    }
+
+    if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
+    {
+        /* LJ PME with LB combination rule does 7 mesh operations.
+         * This so slow that we don't compile GPU non-bonded kernels for that.
+         */
+        md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with GPUs, falling back to CPU only\n");
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+gmx_bool nbnxn_simd_supported(FILE             *fplog,
+                              const t_commrec  *cr,
+                              const t_inputrec *ir)
+{
+    if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
      {
-        md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
-                      bGPU ? "GPUs" : "SIMD kernels",
-                      bGPU ? "CPU only" : "plain-C kernels");
+        /* LJ PME with LB combination rule does 7 mesh operations.
+         * This so slow that we don't compile SIMD non-bonded kernels
+         * for that. */
+        md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with SIMD kernels, falling back to plain C kernels\n");
          return FALSE;
      }
  
@@ -1723,11 +1754,8 @@ static void pick_nbnxn_kernel(FILE                *fp,
  
      if (*kernel_type == nbnxnkNotSet)
      {
-        /* LJ PME with LB combination rule does 7 mesh operations.
-         * This so slow that we don't compile SIMD non-bonded kernels for that.
-         */
          if (use_simd_kernels &&
-            nbnxn_acceleration_supported(fp, cr, ir, FALSE))
+            nbnxn_simd_supported(fp, cr, ir))
          {
              pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
          }
diff --git a/src/programs/mdrun/md.cpp b/src/programs/mdrun/md.cpp

index 4f33d897e950a2793d76d963e8451ed0d6c8dd96..6c0dc10aa41f26dad177d6fb502860240ef8436d 100644 (file)
--- a/src/programs/mdrun/md.cpp
+++ b/src/programs/mdrun/md.cpp
@@ -338,11 +338,6 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
          gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
      }
  
-    if (bRerunMD && fr->cutoff_scheme == ecutsVERLET && ir->opts.ngener > 1 && usingGpu(fr->nbv))
-    {
-        gmx_fatal(FARGS, "The Verlet scheme on GPUs does not support energy groups, so your rerun should probably use a .tpr file without energy groups, or mdrun -nb auto");
-    }
-
      if (DEFORM(*ir))
      {
          tMPI_Thread_mutex_lock(&deform_init_box_mutex);
diff --git a/src/programs/mdrun/resource-division.cpp b/src/programs/mdrun/resource-division.cpp

index 0ef7b583f4903c68bb4a67c83ff20048a99b804c..309817e1ef16f175b14f1b55469507780a6ab565 100644 (file)
--- a/src/programs/mdrun/resource-division.cpp
+++ b/src/programs/mdrun/resource-division.cpp
@@ -241,12 +241,14 @@ static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
  }
  
  
-static int getMaxGpuUsable(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo, int cutoff_scheme)
+static int getMaxGpuUsable(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo,
+                           int cutoff_scheme, gmx_bool bUseGpu)
  {
      /* This code relies on the fact that GPU are not detected when GPU
       * acceleration was disabled at run time by the user.
       */
      if (cutoff_scheme == ecutsVERLET &&
+        bUseGpu &&
          hwinfo->gpu_info.n_dev_compatible > 0)
      {
          if (gmx_multiple_gpu_per_node_supported())
@@ -282,7 +284,8 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
                       const t_inputrec    *inputrec,
                       const gmx_mtop_t    *mtop,
                       const t_commrec     *cr,
-                     FILE                *fplog)
+                     FILE                *fplog,
+                     gmx_bool             bUseGpu)
  {
      int      nthreads_hw, nthreads_tot_max, nrank, ngpu;
      int      min_atoms_per_mpi_rank;
@@ -324,7 +327,7 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
          nthreads_tot_max = nthreads_hw;
      }
  
-    ngpu = getMaxGpuUsable(fplog, cr, hwinfo, inputrec->cutoff_scheme);
+    ngpu = getMaxGpuUsable(fplog, cr, hwinfo, inputrec->cutoff_scheme, bUseGpu);
  
      if (inputrec->cutoff_scheme == ecutsGROUP)
      {
diff --git a/src/programs/mdrun/resource-division.h b/src/programs/mdrun/resource-division.h

index cbe9966249aedd9d2293224984ff908a4e6cf40a..cd1c7a97e50b62af8a2720a06b1d63282e974b3b 100644 (file)
--- a/src/programs/mdrun/resource-division.h
+++ b/src/programs/mdrun/resource-division.h
@@ -52,7 +52,8 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
                       const t_inputrec    *inputrec,
                       const gmx_mtop_t    *mtop,
                       const t_commrec     *cr,
-                     FILE                *fplog);
+                     FILE                *fplog,
+                     gmx_bool             bUseGpu);
  
  /* Check if the number of OpenMP threads is within reasonable range
   * considering the hardware used. This is a crude check, but mainly
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index f77fcddceb69d45738a1833eda4837c439224f82..cd696963004a82943f1b1863832aa2882847d73d 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -603,7 +603,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
               int repl_ex_seed, real pforce, real cpt_period, real max_hours,
               int imdport, unsigned long Flags)
  {
-    gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD, bCantUseGPU;
+    gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD;
      t_inputrec               *inputrec;
      t_state                  *state = NULL;
      matrix                    box;
@@ -646,17 +646,6 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
      bRerunMD     = (Flags & MD_RERUN);
      bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
      bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
-    /* Rerun execution time is dominated by I/O and pair search, so
-     * GPUs are not very useful, plus they do not support more than
-     * one energy group. Don't select them when they can't be used,
-     * unless the user requested it, then fatal_error is called later.
-     *
-     * TODO it would be nice to notify the user that if this check
-     * causes GPUs not to be used that this is what is happening, and
-     * why, but that will be easier to do after some future
-     * cleanup. */
-    bCantUseGPU = bRerunMD && (inputrec->opts.ngener > 1);
-    bTryUseGPU  = bTryUseGPU && !(bCantUseGPU && !bForceUseGPU);
  
      /* Detect hardware, gather information. This is an operation that is
       * global for this process (MPI rank). */
@@ -693,7 +682,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
               * update the message text and the content of nbnxn_acceleration_supported.
               */
              if (bUseGPU &&
-                !nbnxn_acceleration_supported(fplog, cr, inputrec, bUseGPU))
+                !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD))
              {
                  /* Fallback message printed by nbnxn_acceleration_supported */
                  if (bForceUseGPU)
@@ -768,7 +757,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
          hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                   hw_opt,
                                                   inputrec, mtop,
-                                                 cr, fplog);
+                                                 cr, fplog, bUseGPU);
  
          if (hw_opt->nthreads_tmpi > 1)
          {
author	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 15 Sep 2015 10:01:41 +0000 (12:01 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Sat, 3 Oct 2015 08:32:39 +0000 (10:32 +0200)
src/gromacs/legacyheaders/force.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.cpp		patch \| blob \| history
src/programs/mdrun/md.cpp		patch \| blob \| history
src/programs/mdrun/resource-division.cpp		patch \| blob \| history
src/programs/mdrun/resource-division.h		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history