man/man1/g_tune_pme.1

   1 .TH g_tune_pme 1 "Fri 18 Jan 2013" "" "GROMACS suite, VERSION 4.5.6"
   2 .SH NAME
   3 g_tune_pme - time mdrun as a function of PME nodes to optimize settings
   4
   5 .B VERSION 4.5.6
   6 .SH SYNOPSIS
   7 \f3g_tune_pme\fP
   8 .BI "\-p" " perf.out "
   9 .BI "\-err" " errors.log "
  10 .BI "\-so" " tuned.tpr "
  11 .BI "\-s" " topol.tpr "
  12 .BI "\-o" " traj.trr "
  13 .BI "\-x" " traj.xtc "
  14 .BI "\-cpi" " state.cpt "
  15 .BI "\-cpo" " state.cpt "
  16 .BI "\-c" " confout.gro "
  17 .BI "\-e" " ener.edr "
  18 .BI "\-g" " md.log "
  19 .BI "\-dhdl" " dhdl.xvg "
  20 .BI "\-field" " field.xvg "
  21 .BI "\-table" " table.xvg "
  22 .BI "\-tablep" " tablep.xvg "
  23 .BI "\-tableb" " table.xvg "
  24 .BI "\-rerun" " rerun.xtc "
  25 .BI "\-tpi" " tpi.xvg "
  26 .BI "\-tpid" " tpidist.xvg "
  27 .BI "\-ei" " sam.edi "
  28 .BI "\-eo" " sam.edo "
  29 .BI "\-j" " wham.gct "
  30 .BI "\-jo" " bam.gct "
  31 .BI "\-ffout" " gct.xvg "
  32 .BI "\-devout" " deviatie.xvg "
  33 .BI "\-runav" " runaver.xvg "
  34 .BI "\-px" " pullx.xvg "
  35 .BI "\-pf" " pullf.xvg "
  36 .BI "\-mtx" " nm.mtx "
  37 .BI "\-dn" " dipole.ndx "
  38 .BI "\-bo" " bench.trr "
  39 .BI "\-bx" " bench.xtc "
  40 .BI "\-bcpo" " bench.cpt "
  41 .BI "\-bc" " bench.gro "
  42 .BI "\-be" " bench.edr "
  43 .BI "\-bg" " bench.log "
  44 .BI "\-beo" " bench.edo "
  45 .BI "\-bdhdl" " benchdhdl.xvg "
  46 .BI "\-bfield" " benchfld.xvg "
  47 .BI "\-btpi" " benchtpi.xvg "
  48 .BI "\-btpid" " benchtpid.xvg "
  49 .BI "\-bjo" " bench.gct "
  50 .BI "\-bffout" " benchgct.xvg "
  51 .BI "\-bdevout" " benchdev.xvg "
  52 .BI "\-brunav" " benchrnav.xvg "
  53 .BI "\-bpx" " benchpx.xvg "
  54 .BI "\-bpf" " benchpf.xvg "
  55 .BI "\-bmtx" " benchn.mtx "
  56 .BI "\-bdn" " bench.ndx "
  57 .BI "\-[no]h" ""
  58 .BI "\-[no]version" ""
  59 .BI "\-nice" " int "
  60 .BI "\-xvg" " enum "
  61 .BI "\-np" " int "
  62 .BI "\-npstring" " enum "
  63 .BI "\-nt" " int "
  64 .BI "\-r" " int "
  65 .BI "\-max" " real "
  66 .BI "\-min" " real "
  67 .BI "\-npme" " enum "
  68 .BI "\-fix" " int "
  69 .BI "\-upfac" " real "
  70 .BI "\-downfac" " real "
  71 .BI "\-ntpr" " int "
  72 .BI "\-four" " real "
  73 .BI "\-steps" " step "
  74 .BI "\-resetstep" " int "
  75 .BI "\-simsteps" " step "
  76 .BI "\-[no]launch" ""
  77 .BI "\-deffnm" " string "
  78 .BI "\-ddorder" " enum "
  79 .BI "\-[no]ddcheck" ""
  80 .BI "\-rdd" " real "
  81 .BI "\-rcon" " real "
  82 .BI "\-dlb" " enum "
  83 .BI "\-dds" " real "
  84 .BI "\-gcom" " int "
  85 .BI "\-[no]v" ""
  86 .BI "\-[no]compact" ""
  87 .BI "\-[no]seppot" ""
  88 .BI "\-pforce" " real "
  89 .BI "\-[no]reprod" ""
  90 .BI "\-cpt" " real "
  91 .BI "\-[no]cpnum" ""
  92 .BI "\-[no]append" ""
  93 .BI "\-maxh" " real "
  94 .BI "\-multi" " int "
  95 .BI "\-replex" " int "
  96 .BI "\-reseed" " int "
  97 .BI "\-[no]ionize" ""
  98 .SH DESCRIPTION
  99 \&For a given number \fB \-np\fR or \fB \-nt\fR of processors/threads, this program systematically
 100 \&times \fB mdrun\fR with various numbers of PME\-only nodes and determines
 101 \&which setting is fastest. It will also test whether performance can
 102 \&be enhanced by shifting load from the reciprocal to the real space
 103 \&part of the Ewald sum.
 104 \&Simply pass your \fB .tpr\fR file to \fB g_tune_pme\fR together with other options
 105 \&for \fB mdrun\fR as needed.
 106
 107
 108 \&Which executables are used can be set in the environment variables
 109 \&MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'
 110 \&will be used as defaults. Note that for certain MPI frameworks you
 111 \&need to provide a machine\- or hostfile. This can also be passed
 112 \&via the MPIRUN variable, e.g.
 113
 114
 115 \&\fB export MPIRUN="/usr/local/mpirun \-machinefile hosts"\fR
 116
 117
 118 \&Please call \fB g_tune_pme\fR with the normal options you would pass to
 119 \&\fB mdrun\fR and add \fB \-np\fR for the number of processors to perform the
 120 \&tests on, or \fB \-nt\fR for the number of threads. You can also add \fB \-r\fR
 121 \&to repeat each test several times to get better statistics.
 122
 123
 124 \&\fB g_tune_pme\fR can test various real space / reciprocal space workloads
 125 \&for you. With \fB \-ntpr\fR you control how many extra \fB .tpr\fR files will be
 126 \&written with enlarged cutoffs and smaller fourier grids respectively.
 127 \&Typically, the first test (number 0) will be with the settings from the input
 128 \&\fB .tpr\fR file; the last test (number \fB ntpr\fR) will have cutoffs multiplied
 129 \&by (and at the same time fourier grid dimensions divided by) the scaling
 130 \&factor \fB \-fac\fR (default 1.2). The remaining \fB .tpr\fR files will have about
 131 \&equally\-spaced values in between these extremes. \fB Note\fR that you can set \fB \-ntpr\fR to 1
 132 \&if you just want to find the optimal number of PME\-only nodes; in that case
 133 \&your input \fB .tpr\fR file will remain unchanged.
 134
 135
 136 \&For the benchmark runs, the default of 1000 time steps should suffice for most
 137 \&MD systems. The dynamic load balancing needs about 100 time steps
 138 \&to adapt to local load imbalances, therefore the time step counters
 139 \&are by default reset after 100 steps. For large systems
 140 \&(1M atoms) you may have to set \fB \-resetstep\fR to a higher value.
 141 \&From the 'DD' load imbalance entries in the md.log output file you
 142 \&can tell after how many steps the load is sufficiently balanced. Example call:
 143
 144 \fB g_tune_pme \-np 64 \-s protein.tpr \-launch\fR
 145
 146
 147 \&After calling \fB mdrun\fR several times, detailed performance information
 148 \&is available in the output file \fB perf.out.\fR
 149 \&\fB Note\fR that during the benchmarks, a couple of temporary files are written
 150 \&(options \fB \-b\fR*), these will be automatically deleted after each test.
 151
 152
 153 \&If you want the simulation to be started automatically with the
 154 \&optimized parameters, use the command line option \fB \-launch\fR.
 155
 156
 157 .SH FILES
 158 .BI "\-p" " perf.out"
 159 .B Output
 160  Generic output file
 161
 162 .BI "\-err" " errors.log"
 163 .B Output
 164  Log file
 165
 166 .BI "\-so" " tuned.tpr"
 167 .B Output
 168  Run input file: tpr tpb tpa
 169
 170 .BI "\-s" " topol.tpr"
 171 .B Input
 172  Run input file: tpr tpb tpa
 173
 174 .BI "\-o" " traj.trr"
 175 .B Output
 176  Full precision trajectory: trr trj cpt
 177
 178 .BI "\-x" " traj.xtc"
 179 .B Output, Opt.
 180  Compressed trajectory (portable xdr format)
 181
 182 .BI "\-cpi" " state.cpt"
 183 .B Input, Opt.
 184  Checkpoint file
 185
 186 .BI "\-cpo" " state.cpt"
 187 .B Output, Opt.
 188  Checkpoint file
 189
 190 .BI "\-c" " confout.gro"
 191 .B Output
 192  Structure file: gro g96 pdb etc.
 193
 194 .BI "\-e" " ener.edr"
 195 .B Output
 196  Energy file
 197
 198 .BI "\-g" " md.log"
 199 .B Output
 200  Log file
 201
 202 .BI "\-dhdl" " dhdl.xvg"
 203 .B Output, Opt.
 204  xvgr/xmgr file
 205
 206 .BI "\-field" " field.xvg"
 207 .B Output, Opt.
 208  xvgr/xmgr file
 209
 210 .BI "\-table" " table.xvg"
 211 .B Input, Opt.
 212  xvgr/xmgr file
 213
 214 .BI "\-tablep" " tablep.xvg"
 215 .B Input, Opt.
 216  xvgr/xmgr file
 217
 218 .BI "\-tableb" " table.xvg"
 219 .B Input, Opt.
 220  xvgr/xmgr file
 221
 222 .BI "\-rerun" " rerun.xtc"
 223 .B Input, Opt.
 224  Trajectory: xtc trr trj gro g96 pdb cpt
 225
 226 .BI "\-tpi" " tpi.xvg"
 227 .B Output, Opt.
 228  xvgr/xmgr file
 229
 230 .BI "\-tpid" " tpidist.xvg"
 231 .B Output, Opt.
 232  xvgr/xmgr file
 233
 234 .BI "\-ei" " sam.edi"
 235 .B Input, Opt.
 236  ED sampling input
 237
 238 .BI "\-eo" " sam.edo"
 239 .B Output, Opt.
 240  ED sampling output
 241
 242 .BI "\-j" " wham.gct"
 243 .B Input, Opt.
 244  General coupling stuff
 245
 246 .BI "\-jo" " bam.gct"
 247 .B Output, Opt.
 248  General coupling stuff
 249
 250 .BI "\-ffout" " gct.xvg"
 251 .B Output, Opt.
 252  xvgr/xmgr file
 253
 254 .BI "\-devout" " deviatie.xvg"
 255 .B Output, Opt.
 256  xvgr/xmgr file
 257
 258 .BI "\-runav" " runaver.xvg"
 259 .B Output, Opt.
 260  xvgr/xmgr file
 261
 262 .BI "\-px" " pullx.xvg"
 263 .B Output, Opt.
 264  xvgr/xmgr file
 265
 266 .BI "\-pf" " pullf.xvg"
 267 .B Output, Opt.
 268  xvgr/xmgr file
 269
 270 .BI "\-mtx" " nm.mtx"
 271 .B Output, Opt.
 272  Hessian matrix
 273
 274 .BI "\-dn" " dipole.ndx"
 275 .B Output, Opt.
 276  Index file
 277
 278 .BI "\-bo" " bench.trr"
 279 .B Output
 280  Full precision trajectory: trr trj cpt
 281
 282 .BI "\-bx" " bench.xtc"
 283 .B Output
 284  Compressed trajectory (portable xdr format)
 285
 286 .BI "\-bcpo" " bench.cpt"
 287 .B Output
 288  Checkpoint file
 289
 290 .BI "\-bc" " bench.gro"
 291 .B Output
 292  Structure file: gro g96 pdb etc.
 293
 294 .BI "\-be" " bench.edr"
 295 .B Output
 296  Energy file
 297
 298 .BI "\-bg" " bench.log"
 299 .B Output
 300  Log file
 301
 302 .BI "\-beo" " bench.edo"
 303 .B Output, Opt.
 304  ED sampling output
 305
 306 .BI "\-bdhdl" " benchdhdl.xvg"
 307 .B Output, Opt.
 308  xvgr/xmgr file
 309
 310 .BI "\-bfield" " benchfld.xvg"
 311 .B Output, Opt.
 312  xvgr/xmgr file
 313
 314 .BI "\-btpi" " benchtpi.xvg"
 315 .B Output, Opt.
 316  xvgr/xmgr file
 317
 318 .BI "\-btpid" " benchtpid.xvg"
 319 .B Output, Opt.
 320  xvgr/xmgr file
 321
 322 .BI "\-bjo" " bench.gct"
 323 .B Output, Opt.
 324  General coupling stuff
 325
 326 .BI "\-bffout" " benchgct.xvg"
 327 .B Output, Opt.
 328  xvgr/xmgr file
 329
 330 .BI "\-bdevout" " benchdev.xvg"
 331 .B Output, Opt.
 332  xvgr/xmgr file
 333
 334 .BI "\-brunav" " benchrnav.xvg"
 335 .B Output, Opt.
 336  xvgr/xmgr file
 337
 338 .BI "\-bpx" " benchpx.xvg"
 339 .B Output, Opt.
 340  xvgr/xmgr file
 341
 342 .BI "\-bpf" " benchpf.xvg"
 343 .B Output, Opt.
 344  xvgr/xmgr file
 345
 346 .BI "\-bmtx" " benchn.mtx"
 347 .B Output, Opt.
 348  Hessian matrix
 349
 350 .BI "\-bdn" " bench.ndx"
 351 .B Output, Opt.
 352  Index file
 353
 354 .SH OTHER OPTIONS
 355 .BI "\-[no]h"  "no    "
 356  Print help info and quit
 357
 358 .BI "\-[no]version"  "no    "
 359  Print version info and quit
 360
 361 .BI "\-nice"  " int" " 0"
 362  Set the nicelevel
 363
 364 .BI "\-xvg"  " enum" " xmgrace"
 365  xvg plot formatting: \fB xmgrace\fR, \fB xmgr\fR or \fB none\fR
 366
 367 .BI "\-np"  " int" " 1"
 368  Number of nodes to run the tests on (must be  2 for separate PME nodes)
 369
 370 .BI "\-npstring"  " enum" " \-np"
 371  Specify the number of processors to \fB $MPIRUN\fR using this string: \fB \-np\fR, \fB \-n\fR or \fB none\fR
 372
 373 .BI "\-nt"  " int" " 1"
 374  Number of threads to run the tests on (turns MPI & mpirun off)
 375
 376 .BI "\-r"  " int" " 2"
 377  Repeat each test this often
 378
 379 .BI "\-max"  " real" " 0.5   "
 380  Max fraction of PME nodes to test with
 381
 382 .BI "\-min"  " real" " 0.25  "
 383  Min fraction of PME nodes to test with
 384
 385 .BI "\-npme"  " enum" " auto"
 386  Benchmark all possible values for \fB \-npme\fR or just the subset that is expected to perform well: \fB auto\fR, \fB all\fR or \fB subset\fR
 387
 388 .BI "\-fix"  " int" " \-2"
 389  If = \-1, do not vary the number of PME\-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing.
 390
 391 .BI "\-upfac"  " real" " 1.2   "
 392  Upper limit for rcoulomb scaling factor (Note that rcoulomb upscaling results in fourier grid downscaling)
 393
 394 .BI "\-downfac"  " real" " 1     "
 395  Lower limit for rcoulomb scaling factor
 396
 397 .BI "\-ntpr"  " int" " 0"
 398  Number of \fB .tpr\fR files to benchmark. Create this many files with scaling factors ranging from 1.0 to fac. If  1, automatically choose the number of \fB .tpr\fR files to test
 399
 400 .BI "\-four"  " real" " 0     "
 401  Use this fourierspacing value instead of the grid found in the \fB .tpr\fR input file. (Spacing applies to a scaling factor of 1.0 if multiple \fB .tpr\fR files are written)
 402
 403 .BI "\-steps"  " step" " 1000"
 404  Take timings for this many steps in the benchmark runs
 405
 406 .BI "\-resetstep"  " int" " 100"
 407  Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)
 408
 409 .BI "\-simsteps"  " step" " \-1"
 410  If non\-negative, perform this many steps in the real run (overwrites nsteps from \fB .tpr\fR, add \fB .cpt\fR steps)
 411
 412 .BI "\-[no]launch"  "no    "
 413  Launch the real simulation after optimization
 414
 415 .BI "\-deffnm"  " string" " "
 416  Set the default filename for all file options at launch time
 417
 418 .BI "\-ddorder"  " enum" " interleave"
 419  DD node order: \fB interleave\fR, \fB pp_pme\fR or \fB cartesian\fR
 420
 421 .BI "\-[no]ddcheck"  "yes   "
 422  Check for all bonded interactions with DD
 423
 424 .BI "\-rdd"  " real" " 0     "
 425  The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates
 426
 427 .BI "\-rcon"  " real" " 0     "
 428  Maximum distance for P\-LINCS (nm), 0 is estimate
 429
 430 .BI "\-dlb"  " enum" " auto"
 431  Dynamic load balancing (with DD): \fB auto\fR, \fB no\fR or \fB yes\fR
 432
 433 .BI "\-dds"  " real" " 0.8   "
 434  Minimum allowed dlb scaling of the DD cell size
 435
 436 .BI "\-gcom"  " int" " \-1"
 437  Global communication frequency
 438
 439 .BI "\-[no]v"  "no    "
 440  Be loud and noisy
 441
 442 .BI "\-[no]compact"  "yes   "
 443  Write a compact log file
 444
 445 .BI "\-[no]seppot"  "no    "
 446  Write separate V and dVdl terms for each interaction type and node to the log file(s)
 447
 448 .BI "\-pforce"  " real" " \-1    "
 449  Print all forces larger than this (kJ/mol nm)
 450
 451 .BI "\-[no]reprod"  "no    "
 452  Try to avoid optimizations that affect binary reproducibility
 453
 454 .BI "\-cpt"  " real" " 15    "
 455  Checkpoint interval (minutes)
 456
 457 .BI "\-[no]cpnum"  "no    "
 458  Keep and number checkpoint files
 459
 460 .BI "\-[no]append"  "yes   "
 461  Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)
 462
 463 .BI "\-maxh"  " real" " \-1    "
 464  Terminate after 0.99 times this time (hours)
 465
 466 .BI "\-multi"  " int" " 0"
 467  Do multiple simulations in parallel
 468
 469 .BI "\-replex"  " int" " 0"
 470  Attempt replica exchange periodically with this period (steps)
 471
 472 .BI "\-reseed"  " int" " \-1"
 473  Seed for replica exchange, \-1 is generate a seed
 474
 475 .BI "\-[no]ionize"  "no    "
 476  Do a simulation including the effect of an X\-ray bombardment on your system
 477
 478 .SH SEE ALSO
 479 .BR gromacs(7)
 480
 481 More information about \fBGROMACS\fR is available at <\fIhttp://www.gromacs.org/\fR>.