#include <config.h>
#endif
-
#include <time.h>
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
-
-
#include "gromacs/commandline/pargs.h"
#include "typedefs.h"
#include "types/commrec.h"
#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/math/utilities.h"
+#include "gmx_fatal.h"
/* Enum for situations that can occur during log file parsing, the
* corresponding string entries can be found in do_the_tests() in
/* Look for domain decomp grid and separate PME nodes: */
if (str_starts(line, matchstrdd))
{
- sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d",
+ sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
&(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme);
if (perfdata->nPMEnodes == -1)
{
}
else if (perfdata->nPMEnodes != npme)
{
- gmx_fatal(FARGS, "PME nodes from command line and output file are not identical");
+ gmx_fatal(FARGS, "PME ranks from command line and output file are not identical");
}
iFound = eFoundDDStr;
}
fclose(fp);
return eParselogNoDDGrid;
}
- else if (str_starts(line, "The number of nodes you selected"))
+ else if (str_starts(line, "The number of ranks you selected"))
{
fclose(fp);
return eParselogLargePrimeFactor;
/* Already found matchstring - look for cycle data */
if (str_starts(line, "Total "))
{
- sscanf(line, "Total %lf", &(perfdata->Gcycles[test_nr]));
+ sscanf(line, "Total %*f %lf", &(perfdata->Gcycles[test_nr]));
iFound = eFoundCycleStr;
}
break;
{
sep_line(fp);
fprintf(fp, "Summary of successful runs:\n");
- fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f");
+ fprintf(fp, "Line tpr PME ranks Gcycles Av. Std.dev. ns/day PME/f");
if (nnodes > 1)
{
fprintf(fp, " DD grid");
/* We have optimized the number of PME-only nodes */
if (winPME == -1)
{
- sprintf(strbuf, "%s", "the automatic number of PME nodes");
+ sprintf(strbuf, "%s", "the automatic number of PME ranks");
}
else
{
- sprintf(strbuf, "%d PME nodes", winPME);
+ sprintf(strbuf, "%d PME ranks", winPME);
}
}
fprintf(fp, "Best performance was achieved with %s", strbuf);
gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n"
"(%s)\n"
"seems to have been compiled with MPI instead.",
- *cmd_mdrun);
+ cmd_mdrun);
}
}
else
gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n"
"(%s)\n"
"seems to have been compiled without MPI support.",
- *cmd_mdrun);
+ cmd_mdrun);
}
}
sfree(ir);
}
+static gmx_bool can_scale_rvdw(int vdwtype)
+{
+ return (evdwCUT == vdwtype ||
+ evdwPME == vdwtype);
+}
#define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
fprintf(fp, " No. scaling rcoulomb");
fprintf(fp, " nkx nky nkz");
fprintf(fp, " spacing");
- if (evdwCUT == ir->vdwtype)
+ if (can_scale_rvdw(ir->vdwtype))
{
fprintf(fp, " rvdw");
}
ir->rlist = ir->rcoulomb + nlist_buffer;
}
- if (bScaleRvdw && evdwCUT == ir->vdwtype)
+ if (bScaleRvdw && can_scale_rvdw(ir->vdwtype))
{
- if (ecutsVERLET == ir->cutoff_scheme)
+ if (ecutsVERLET == ir->cutoff_scheme ||
+ evdwPME == ir->vdwtype)
{
- /* With Verlet, the van der Waals radius must always equal the Coulomb radius */
+ /* With either the Verlet cutoff-scheme or LJ-PME,
+ the van der Waals radius must always equal the
+ Coulomb radius */
ir->rvdw = ir->rcoulomb;
}
else
fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb);
fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz);
fprintf(fp, " %9f ", info->fsx[j]);
- if (evdwCUT == ir->vdwtype)
+ if (can_scale_rvdw(ir->vdwtype))
{
fprintf(fp, "%10f", ir->rvdw);
}
{
/* To prevent confusion, do not again issue a gmx_fatal here since we already
* get the error message from mdrun itself */
- sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n"
+ sprintf(msg,
+ "Cannot run the first benchmark simulation! Please check the error message of\n"
"mdrun for the source of the problem. Did you provide a command line\n"
- "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
+ "argument that neither gmx tune_pme nor mdrun understands? If you're\n"
+ "sure your command line should work, you can bypass this check with \n"
+ "gmx tune_pme -nocheck. The failing command was:\n"
"\n%s\n\n", command);
fprintf(stderr, "%s", msg);
int npme_fixed, /* If >= -1, test fixed number of PME
* nodes only */
const char *npmevalues_opt, /* Which -npme values should be tested */
- t_perf **perfdata, /* Here the performace data is stored */
+ t_perf **perfdata, /* Here the performance data is stored */
int *pmeentries, /* Entries in the nPMEnodes list */
int repeats, /* Repeat each test this often */
int nnodes, /* Total number of nodes = nPP + nPME */
const t_filenm *fnm, /* List of filenames from command line */
int nfile, /* Number of files specified on the cmdl. */
int presteps, /* DLB equilibration steps, is checked */
- gmx_int64_t cpt_steps) /* Time step counter in the checkpoint */
+ gmx_int64_t cpt_steps, /* Time step counter in the checkpoint */
+ gmx_bool bCheck) /* Check whether benchmark mdrun works */
{
int i, nr, k, ret, count = 0, totaltests;
int *nPMEnodes = NULL;
"No DD grid found for these settings.",
"TPX version conflict!",
"mdrun was not started in parallel!",
- "Number of PP nodes has a prime factor that is too large.",
+ "Number of PP ranks has a prime factor that is too large.",
"An error occured."
};
char str_PME_f_load[13];
*pmeentries = 1;
snew(nPMEnodes, 1);
nPMEnodes[0] = npme_fixed;
- fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]);
+ fprintf(stderr, "Will use a fixed number of %d PME-only ranks.\n", nPMEnodes[0]);
}
if (0 == repeats)
for (k = 0; k < nr_tprs; k++)
{
fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]);
- fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n");
+ fprintf(fp, "PME ranks Gcycles ns/day PME/f Remark\n");
/* Loop over various numbers of PME nodes: */
for (i = 0; i < *pmeentries; i++)
{
cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
/* To prevent that all benchmarks fail due to a show-stopper argument
- * on the mdrun command line, we make a quick check first */
- if (bFirst)
+ * on the mdrun command line, we make a quick check first.
+ * This check can be turned off in cases where the automatically chosen
+ * number of PME-only ranks leads to a number of PP ranks for which no
+ * decomposition can be found (e.g. for large prime numbers) */
+ if (bFirst && bCheck)
{
make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp, fnm, nfile);
}
/* Check number of nodes */
if (nnodes < 1)
{
- gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer.");
+ gmx_fatal(FARGS, "Number of ranks/threads must be a positive integer.");
}
/* Automatically choose -ntpr if not set */
/* No more than 50% of all nodes can be assigned as PME-only nodes. */
if (2*npme_fixed > nnodes)
{
- gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n",
+ gmx_fatal(FARGS, "Cannot have more than %d PME-only ranks for a total of %d ranks (you chose %d).\n",
nnodes/2, nnodes, npme_fixed);
}
if ((npme_fixed > 0) && (5*npme_fixed < nnodes))
{
- fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n",
+ fprintf(stderr, "WARNING: Only %g percent of the ranks are assigned as PME-only ranks.\n",
100.0*((real)npme_fixed / (real)nnodes));
}
if (opt2parg_bSet("-min", npargs, pa) || opt2parg_bSet("-max", npargs, pa))
{
fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n"
- " fixed number of PME-only nodes is requested with -fix.\n");
+ " fixed number of PME-only ranks is requested with -fix.\n");
}
}
}
int gmx_tune_pme(int argc, char *argv[])
{
const char *desc[] = {
- "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of processors/threads, [THISMODULE] systematically",
- "times [gmx-mdrun] with various numbers of PME-only nodes and determines",
+ "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of ranks, [THISMODULE] systematically",
+ "times [gmx-mdrun] with various numbers of PME-only ranks and determines",
"which setting is fastest. It will also test whether performance can",
"be enhanced by shifting load from the reciprocal to the real space",
"part of the Ewald sum. ",
"need to provide a machine- or hostfile. This can also be passed",
"via the MPIRUN variable, e.g.[PAR]",
"[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
+ "Before doing the actual benchmark runs, [THISMODULE] will do a quick",
+ "check whether mdrun works as expected with the provided parallel settings",
+ "if the [TT]-check[tt] option is activated (the default).",
"Please call [THISMODULE] with the normal options you would pass to",
- "[gmx-mdrun] and add [TT]-np[tt] for the number of processors to perform the",
+ "[gmx-mdrun] and add [TT]-np[tt] for the number of ranks to perform the",
"tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]",
"to repeat each test several times to get better statistics. [PAR]",
"[THISMODULE] can test various real space / reciprocal space workloads",
"written with enlarged cutoffs and smaller Fourier grids respectively.",
"Typically, the first test (number 0) will be with the settings from the input",
"[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
- "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
+ "specified by [TT]-rmax[tt] with a somewhat smaller PME grid at the same time. ",
"In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
"The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
"spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
- "if you just seek the optimal number of PME-only nodes; in that case",
+ "if you just seek the optimal number of PME-only ranks; in that case",
"your input [TT].tpr[tt] file will remain unchanged.[PAR]",
"For the benchmark runs, the default of 1000 time steps should suffice for most",
"MD systems. The dynamic load balancing needs about 100 time steps",
"to adapt to local load imbalances, therefore the time step counters",
"are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
- "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
+ "for a higher accuracy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
"From the 'DD' load imbalance entries in the md.log output file you",
"can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
"[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
gmx_bool bKeepAndNumCPT = FALSE;
gmx_bool bResetCountersHalfWay = FALSE;
gmx_bool bBenchmark = TRUE;
+ gmx_bool bCheck = TRUE;
output_env_t oenv = NULL;
/* g_tune_pme options: */
/***********************/
{ "-np", FALSE, etINT, {&nnodes},
- "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" },
+ "Number of ranks to run the tests on (must be > 2 for separate PME ranks)" },
{ "-npstring", FALSE, etENUM, {procstring},
- "Specify the number of processors to [TT]$MPIRUN[tt] using this string"},
+ "Specify the number of ranks to [TT]$MPIRUN[tt] using this string"},
{ "-ntmpi", FALSE, etINT, {&nthreads},
"Number of MPI-threads to run the tests on (turns MPI & mpirun off)"},
{ "-r", FALSE, etINT, {&repeats},
"Repeat each test this often" },
{ "-max", FALSE, etREAL, {&maxPMEfraction},
- "Max fraction of PME nodes to test with" },
+ "Max fraction of PME ranks to test with" },
{ "-min", FALSE, etREAL, {&minPMEfraction},
- "Min fraction of PME nodes to test with" },
+ "Min fraction of PME ranks to test with" },
{ "-npme", FALSE, etENUM, {npmevalues_opt},
"Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. "
"Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"},
{ "-fix", FALSE, etINT, {&npme_fixed},
- "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
+ "If >= -1, do not vary the number of PME-only ranks, instead use this fixed value and only vary rcoulomb and the PME grid spacing."},
{ "-rmax", FALSE, etREAL, {&rmax},
"If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" },
{ "-rmin", FALSE, etREAL, {&rmin},
"Launch the real simulation after optimization" },
{ "-bench", FALSE, etBOOL, {&bBenchmark},
"Run the benchmarks or just create the input [TT].tpr[tt] files?" },
+ { "-check", FALSE, etBOOL, {&bCheck},
+ "Before the benchmark runs, check whether mdrun works in parallel" },
/******************/
/* mdrun options: */
/******************/
{
fprintf(stdout, "- %d ", maxPMEnodes);
}
- fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n");
+ fprintf(stdout, "PME-only ranks.\n Note that the automatic number of PME-only ranks and no separate PME ranks are always tested.\n");
}
}
else
fprintf(fp, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion());
if (!bThreads)
{
- fprintf(fp, "Number of nodes : %d\n", nnodes);
+ fprintf(fp, "Number of ranks : %d\n", nnodes);
fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun);
if (strcmp(procstring[0], "none") != 0)
{
- fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]);
+ fprintf(fp, "Passing # of ranks via : %s\n", procstring[0]);
}
else
{
- fprintf(fp, "Not setting number of nodes in system call\n");
+ fprintf(fp, "Not setting number of ranks in system call\n");
}
}
else
{
do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
- cmd_args_bench, fnm, NFILE, presteps, cpt_steps);
+ cmd_args_bench, fnm, NFILE, presteps, cpt_steps, bCheck);
fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0);