*/
-gmx_bool nbnxn_acceleration_supported(FILE *fplog,
- const t_commrec *cr,
- const t_inputrec *ir,
- gmx_bool bGPU);
-/* Return if GPU/CPU-SIMD acceleration is supported with the given inputrec
- * with bGPU TRUE/FALSE.
+gmx_bool nbnxn_gpu_acceleration_supported(FILE *fplog,
+ const t_commrec *cr,
+ const t_inputrec *ir,
+ gmx_bool bRerunMD);
+/* Return if GPU acceleration is supported with the given settings.
+ *
+ * If the return value is FALSE and fplog/cr != NULL, prints a fallback
+ * message to fplog/stderr.
+ */
+
+gmx_bool nbnxn_simd_supported(FILE *fplog,
+ const t_commrec *cr,
+ const t_inputrec *ir);
+/* Return if CPU SIMD support exists for the given inputrec
* If the return value is FALSE and fplog/cr != NULL, prints a fallback
* message to fplog/stderr.
*/
}
-gmx_bool nbnxn_acceleration_supported(FILE *fplog,
- const t_commrec *cr,
- const t_inputrec *ir,
- gmx_bool bGPU)
+gmx_bool nbnxn_gpu_acceleration_supported(FILE *fplog,
+ const t_commrec *cr,
+ const t_inputrec *ir,
+ gmx_bool bRerunMD)
{
- if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
+ if (bRerunMD && ir->opts.ngener > 1)
+ {
+ /* Rerun execution time is dominated by I/O and pair search,
+ * so GPUs are not very useful, plus they do not support more
+ * than one energy group. If the user requested GPUs
+ * explicitly, a fatal error is given later. With non-reruns,
+ * we fall back to a single whole-of system energy group
+ * (which runs much faster than a multiple-energy-groups
+ * implementation would), and issue a note in the .log
+ * file. Users can re-run if they want the information. */
+ md_print_warn(cr, fplog, "Rerun with energy groups is not implemented for GPUs, falling back to the CPU\n");
+ return FALSE;
+ }
+
+ if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
+ {
+ /* LJ PME with LB combination rule does 7 mesh operations.
+ * This so slow that we don't compile GPU non-bonded kernels for that.
+ */
+ md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with GPUs, falling back to CPU only\n");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gmx_bool nbnxn_simd_supported(FILE *fplog,
+ const t_commrec *cr,
+ const t_inputrec *ir)
+{
+ if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
{
- md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
- bGPU ? "GPUs" : "SIMD kernels",
- bGPU ? "CPU only" : "plain-C kernels");
+ /* LJ PME with LB combination rule does 7 mesh operations.
+ * This so slow that we don't compile SIMD non-bonded kernels
+ * for that. */
+ md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with SIMD kernels, falling back to plain C kernels\n");
return FALSE;
}
if (*kernel_type == nbnxnkNotSet)
{
- /* LJ PME with LB combination rule does 7 mesh operations.
- * This so slow that we don't compile SIMD non-bonded kernels for that.
- */
if (use_simd_kernels &&
- nbnxn_acceleration_supported(fp, cr, ir, FALSE))
+ nbnxn_simd_supported(fp, cr, ir))
{
pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
}
gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
}
- if (bRerunMD && fr->cutoff_scheme == ecutsVERLET && ir->opts.ngener > 1 && usingGpu(fr->nbv))
- {
- gmx_fatal(FARGS, "The Verlet scheme on GPUs does not support energy groups, so your rerun should probably use a .tpr file without energy groups, or mdrun -nb auto");
- }
-
if (DEFORM(*ir))
{
tMPI_Thread_mutex_lock(&deform_init_box_mutex);
}
-static int getMaxGpuUsable(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo, int cutoff_scheme)
+static int getMaxGpuUsable(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t *hwinfo,
+ int cutoff_scheme, gmx_bool bUseGpu)
{
/* This code relies on the fact that GPU are not detected when GPU
* acceleration was disabled at run time by the user.
*/
if (cutoff_scheme == ecutsVERLET &&
+ bUseGpu &&
hwinfo->gpu_info.n_dev_compatible > 0)
{
if (gmx_multiple_gpu_per_node_supported())
const t_inputrec *inputrec,
const gmx_mtop_t *mtop,
const t_commrec *cr,
- FILE *fplog)
+ FILE *fplog,
+ gmx_bool bUseGpu)
{
int nthreads_hw, nthreads_tot_max, nrank, ngpu;
int min_atoms_per_mpi_rank;
nthreads_tot_max = nthreads_hw;
}
- ngpu = getMaxGpuUsable(fplog, cr, hwinfo, inputrec->cutoff_scheme);
+ ngpu = getMaxGpuUsable(fplog, cr, hwinfo, inputrec->cutoff_scheme, bUseGpu);
if (inputrec->cutoff_scheme == ecutsGROUP)
{
const t_inputrec *inputrec,
const gmx_mtop_t *mtop,
const t_commrec *cr,
- FILE *fplog);
+ FILE *fplog,
+ gmx_bool bUseGpu);
/* Check if the number of OpenMP threads is within reasonable range
* considering the hardware used. This is a crude check, but mainly
int repl_ex_seed, real pforce, real cpt_period, real max_hours,
int imdport, unsigned long Flags)
{
- gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD, bCantUseGPU;
+ gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD;
t_inputrec *inputrec;
t_state *state = NULL;
matrix box;
bRerunMD = (Flags & MD_RERUN);
bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
- /* Rerun execution time is dominated by I/O and pair search, so
- * GPUs are not very useful, plus they do not support more than
- * one energy group. Don't select them when they can't be used,
- * unless the user requested it, then fatal_error is called later.
- *
- * TODO it would be nice to notify the user that if this check
- * causes GPUs not to be used that this is what is happening, and
- * why, but that will be easier to do after some future
- * cleanup. */
- bCantUseGPU = bRerunMD && (inputrec->opts.ngener > 1);
- bTryUseGPU = bTryUseGPU && !(bCantUseGPU && !bForceUseGPU);
/* Detect hardware, gather information. This is an operation that is
* global for this process (MPI rank). */
* update the message text and the content of nbnxn_acceleration_supported.
*/
if (bUseGPU &&
- !nbnxn_acceleration_supported(fplog, cr, inputrec, bUseGPU))
+ !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD))
{
/* Fallback message printed by nbnxn_acceleration_supported */
if (bForceUseGPU)
hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
hw_opt,
inputrec, mtop,
- cr, fplog);
+ cr, fplog, bUseGPU);
if (hw_opt->nthreads_tmpi > 1)
{