set(NVML_FIND_QUIETLY TRUE)
endif()
find_package(NVML)
- if(NVML_FOUND)
- include_directories(SYSTEM ${NVML_INCLUDE_DIR})
- set(HAVE_NVML 1)
- list(APPEND GMX_EXTRA_LIBRARIES ${NVML_LIBRARY})
- endif(NVML_FOUND)
+ option(GMX_USE_NVML "Use NVML support for better CUDA performance" ${HAVE_NVML})
+ mark_as_advanced(GMX_USE_NVML)
+ if(GMX_USE_NVML)
+ if(NVML_FOUND)
+ include_directories(SYSTEM ${NVML_INCLUDE_DIR})
+ set(HAVE_NVML 1)
+ list(APPEND GMX_EXTRA_LIBRARIES ${NVML_LIBRARY})
+ else()
+ message(FATAL_ERROR "NVML support was required, but was not detected. Please consult the install guide.")
+ endif()
+ endif()
endif()
# Annoyingly enough, FindCUDA leaves a few variables behind as non-advanced.
(or whichever path has your installation). In some cases, you might
need to specify manually which of your C++ compilers should be used,
-e.g. with the advanced option ``CUDA_HOST_COMPILER``. To make it
+e.g. with the advanced option ``CUDA_HOST_COMPILER``.
+
+To make it
possible to get best performance from NVIDIA Tesla and Quadro GPUs,
you should install the `GPU Deployment Kit
<https://developer.nvidia.com/gpu-deployment-kit>`_ and configure
log file messages will be produced. Background details can be found at
this `NVIDIA blog post
<http://devblogs.nvidia.com/parallelforall/increase-performance-gpu-boost-k80-autoboost/>`_.
+NVML support is only available if detected, and may be disabled by
+turning off the ``GMX_USE_NVML`` CMake advanced option.
By default, optimized code will be generated for CUDA architectures
supported by the nvcc compiler (and the |Gromacs| build system).
* In this stage, only reasonably fast setups are run again. */
static void switch_to_stage1(pme_load_balancing_t *pme_lb)
{
+ /* Increase start until we find a setup that is not slower than
+ * maxRelativeSlowdownAccepted times the fastest setup.
+ */
pme_lb->start = pme_lb->lower_limit;
while (pme_lb->start + 1 < pme_lb->n &&
(pme_lb->setup[pme_lb->start].count == 0 ||
{
pme_lb->start++;
}
- while (pme_lb->start > 0 && pme_lb->setup[pme_lb->start - 1].cycles == 0)
+ /* While increasing start, we might have skipped setups that we did not
+ * time during stage 0. We want to extend the range for stage 1 to include
+ * any skipped setups that lie between setups that were measured to be
+ * acceptably fast and too slow.
+ */
+ while (pme_lb->start > pme_lb->lower_limit &&
+ pme_lb->setup[pme_lb->start - 1].count == 0)
{
pme_lb->start--;
}
+ /* Decrease end only with setups that we timed and that are slow. */
pme_lb->end = pme_lb->n;
if (pme_lb->setup[pme_lb->end - 1].count > 0 &&
pme_lb->setup[pme_lb->end - 1].cycles >
pme_lb->stage = 1;
/* Next we want to choose setup pme_lb->end-1, but as we will decrease
- * pme_ln->cur by one right after returning, we set cur to end.
+ * pme_lb->cur by one right after returning, we set cur to end.
*/
pme_lb->cur = pme_lb->end;
}
*/
do
{
- pme_lb->cur--;
- if (pme_lb->cur == pme_lb->start)
+ if (pme_lb->cur > pme_lb->start)
+ {
+ pme_lb->cur--;
+ }
+ else
{
pme_lb->stage++;
remove_if_exists(opt2fn("-be", nfile, fnm));
remove_if_exists(opt2fn("-bcpo", nfile, fnm));
remove_if_exists(opt2fn("-bg", nfile, fnm));
+ remove_if_exists(opt2fn("-bo", nfile, fnm));
+ remove_if_exists(opt2fn("-bx", nfile, fnm));
sfree(command);
sfree(msg );
t_filenm fnm[],
char *cmd_args_bench[], /* command line arguments for benchmark runs */
char *cmd_args_launch[], /* command line arguments for simulation run */
- char extra_args[]) /* Add this to the end of the command line */
+ char extra_args[], /* Add this to the end of the command line */
+ char *deffnm) /* Default file names, or NULL if not set */
{
int i;
char *opt;
add_to_string(cmd_args_bench, strbuf);
}
/* These switches take effect only at launch time */
+ if (deffnm)
+ {
+ sprintf(strbuf, "-deffnm %s ", deffnm);
+ add_to_string(cmd_args_launch, strbuf);
+ }
if (FALSE == bAppendFiles)
{
add_to_string(cmd_args_launch, "-noappend ");
char *ExtraArgs = NULL;
char **tpr_names = NULL;
const char *simulation_tpr = NULL;
+ char *deffnm = NULL;
int best_npme, best_tpr;
int sim_part = 1; /* For benchmarks with checkpoint files */
char bbuf[STRLEN];
"Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" },
{ "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
"Keep and number checkpoint files (launch only)" },
+ { "-deffnm", FALSE, etSTR, {&deffnm},
+ "Set the default filenames (launch only)" },
{ "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
"HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" }
};
cmd_np = bbuf;
create_command_line_snippets(bAppendFiles, bKeepAndNumCPT, bResetCountersHalfWay, presteps,
- NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs);
+ NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs, deffnm);
/* Prepare to use checkpoint file if requested */
sim_part = 1;
/* ######### START SECOND UPDATE STEP ################# */
- /* at the start of step, randomize the velocities (if vv. Restriction of Andersen controlled
+ /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
in preprocessing */
if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */