2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "checkpoint.h"
55 #include "thread_mpi.h"
61 int cmain(int argc,char *argv[])
63 const char *desc[] = {
64 "This is an experimental release of GROMACS for accelerated",
65 "Molecular Dynamics simulations on GPU processors. Support is provided",
66 "by the OpenMM library (https://simtk.org/home/openmm).[PAR]",
68 "This release is targeted at developers and advanced users and",
69 "care should be taken before production use. The following should be",
70 "noted before using the program:[PAR]",
71 " * The current release runs only on modern nVidia GPU hardware with CUDA support.",
72 "Make sure that the necessary CUDA drivers and libraries for your operating system",
73 "are already installed. The CUDA SDK also should be installed in order to compile",
74 "the program from source (http://www.nvidia.com/object/cuda_home.html).[PAR]",
75 " * Multiple GPU cards are not supported.[PAR]",
76 " * Only a small subset of the GROMACS features and options are supported on the GPUs.",
77 "See below for a detailed list.[PAR]",
78 " * Consumer level GPU cards are known to often have problems with faulty memory.",
79 "It is recommended that a full memory check of the cards is done at least once",
80 "(for example, using the memtest=full option).",
81 "A partial memory check (for example, memtest=15) before and",
82 "after the simulation run would help spot",
83 "problems resulting from processor overheating.[PAR]",
84 " * The maximum size of the simulated systems depends on the available",
85 "GPU memory,for example, a GTX280 with 1GB memory has been tested with systems",
86 "of up to about 100,000 atoms.[PAR]",
87 " * In order to take a full advantage of the GPU platform features, many algorithms",
88 "have been implemented in a very different way than they are on the CPUs.",
89 "Therefore numercal correspondence between properties of the state of",
90 "simulated systems should not be expected. Moreover, the values will likely vary",
91 "when simulations are done on different GPU hardware.[PAR]",
92 " * Frequent retrieval of system state information such as",
93 "trajectory coordinates and energies can greatly influence the performance",
94 "of the program due to slow CPU<->GPU memory transfer speed.[PAR]",
95 " * MD algorithms are complex, and although the Gromacs code is highly tuned for them,",
96 "they often do not translate very well onto the streaming architetures.",
97 "Realistic expectations about the achievable speed-up from test with GTX280:",
98 "For small protein systems in implicit solvent using all-vs-all kernels the acceleration",
99 "can be as high as 20 times, but in most other setups involving cutoffs and PME the",
100 "acceleration is usually only ~4 times relative to a 3GHz CPU.[PAR]",
101 "Supported features:[PAR]",
102 " * Integrators: md/md-vv/md-vv-avek, sd/sd1 and bd.\n",
103 " * Long-range interactions (option coulombtype): Reaction-Field, Ewald, PME, and cut-off (for Implicit Solvent only)\n",
104 " * Temperature control: Supported only with the md/md-vv/md-vv-avek, sd/sd1 and bd integrators.\n",
105 " * Pressure control: Supported.\n",
106 " * Implicit solvent: Supported.\n",
107 "A detailed description can be found on the GROMACS website:\n",
108 "http://www.gromacs.org/gpu[PAR]",
109 /* From the original mdrun documentaion */
110 "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
111 "and distributes the topology over nodes if needed.",
112 "[TT]mdrun[tt] produces at least four output files.",
113 "A single log file ([TT]-g[tt]) is written, unless the option",
114 "[TT]-seppot[tt] is used, in which case each node writes a log file.",
115 "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
116 "optionally forces.",
117 "The structure file ([TT]-c[tt]) contains the coordinates and",
118 "velocities of the last step.",
119 "The energy file ([TT]-e[tt]) contains energies, the temperature,",
120 "pressure, etc, a lot of these things are also printed in the log file.",
121 "Optionally coordinates can be written to a compressed trajectory file",
122 "([TT]-x[tt]).[PAR]",
123 /* openmm specific information */
124 "Usage with OpenMM:[BR]",
125 "[TT]mdrun -device \"OpenMM:platform=Cuda,memtest=15,deviceid=0,force-device=no\"[tt][PAR]",
127 " [TT]platform[tt] = Cuda\t\t:\tThe only available value. OpenCL support will be available in future.\n",
128 " [TT]memtest[tt] = 15\t\t:\tRun a partial, random GPU memory test for the given amount of seconds. A full test",
129 "(recommended!) can be run with \"memtest=full\". Memory testing can be disabled with \"memtest=off\".\n",
130 " [TT]deviceid[tt] = 0\t\t:\tSpecify the target device when multiple cards are present.",
131 "Only one card can be used at any given time though.\n",
132 " [TT]force-device[tt] = no\t\t:\tIf set to \"yes\" [TT]mdrun[tt] will be forced to execute on",
133 "hardware that is not officially supported. GPU acceleration can also be achieved on older",
134 "but Cuda capable cards, although the simulation might be too slow, and the memory limits too strict.",
138 { efTPX, NULL, NULL, ffREAD },
139 { efTRN, "-o", NULL, ffWRITE },
140 { efXTC, "-x", NULL, ffOPTWR },
141 { efCPT, "-cpi", NULL, ffOPTRD },
142 { efCPT, "-cpo", NULL, ffOPTWR },
143 { efSTO, "-c", "confout", ffWRITE },
144 { efEDR, "-e", "ener", ffWRITE },
145 { efLOG, "-g", "md", ffWRITE },
146 { efXVG, "-dhdl", "dhdl", ffOPTWR },
147 { efXVG, "-field", "field", ffOPTWR },
148 { efXVG, "-table", "table", ffOPTRD },
149 { efXVG, "-tabletf", "tabletf", ffOPTRD },
150 { efXVG, "-tablep", "tablep", ffOPTRD },
151 { efXVG, "-tableb", "table", ffOPTRD },
152 { efTRX, "-rerun", "rerun", ffOPTRD },
153 { efXVG, "-tpi", "tpi", ffOPTWR },
154 { efXVG, "-tpid", "tpidist", ffOPTWR },
155 { efEDI, "-ei", "sam", ffOPTRD },
156 { efEDO, "-eo", "sam", ffOPTWR },
157 { efGCT, "-j", "wham", ffOPTRD },
158 { efGCT, "-jo", "bam", ffOPTWR },
159 { efXVG, "-ffout", "gct", ffOPTWR },
160 { efXVG, "-devout", "deviatie", ffOPTWR },
161 { efXVG, "-runav", "runaver", ffOPTWR },
162 { efXVG, "-px", "pullx", ffOPTWR },
163 { efXVG, "-pf", "pullf", ffOPTWR },
164 { efXVG, "-ro", "rotation", ffOPTWR },
165 { efLOG, "-ra", "rotangles",ffOPTWR },
166 { efLOG, "-rs", "rotslabs", ffOPTWR },
167 { efLOG, "-rt", "rottorque",ffOPTWR },
168 { efMTX, "-mtx", "nm", ffOPTWR },
169 { efNDX, "-dn", "dipole", ffOPTWR },
170 { efRND, "-multidir",NULL, ffOPTRDMULT},
171 { efDAT, "-membed", "membed", ffOPTRD },
172 { efTOP, "-mp", "membed", ffOPTRD },
173 { efNDX, "-mn", "membed", ffOPTRD }
175 #define NFILE asize(fnm)
177 /* Command line options ! */
178 gmx_bool bCart = FALSE;
179 gmx_bool bPPPME = FALSE;
180 gmx_bool bPartDec = FALSE;
181 gmx_bool bDDBondCheck = TRUE;
182 gmx_bool bDDBondComm = TRUE;
183 gmx_bool bTunePME = TRUE;
184 gmx_bool bTestVerlet = FALSE;
185 gmx_bool bVerbose = FALSE;
186 gmx_bool bCompact = TRUE;
187 gmx_bool bSepPot = FALSE;
188 gmx_bool bRerunVSite = FALSE;
189 gmx_bool bIonize = FALSE;
190 gmx_bool bConfout = TRUE;
191 gmx_bool bReproducible = FALSE;
195 int nstglobalcomm=-1;
201 int nsteps=-2; /* the value -2 means that the mdp option will be used */
203 rvec realddxyz={0,0,0};
204 const char *ddno_opt[ddnoNR+1] =
205 { NULL, "interleave", "pp_pme", "cartesian", NULL };
206 const char *dddlb_opt[] =
207 { NULL, "auto", "no", "yes", NULL };
208 const char *nbpu_opt[] =
209 { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
210 real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
211 char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
212 real cpt_period=15.0,max_hours=-1;
213 gmx_bool bAppendFiles=TRUE;
214 gmx_bool bKeepAndNumCPT=FALSE;
215 gmx_bool bResetCountersHalfWay=FALSE;
216 output_env_t oenv=NULL;
217 const char *deviceOptions = "";
219 gmx_hw_opt_t hw_opt={0,0,0,0,TRUE,FALSE,0,NULL};
223 { "-pd", FALSE, etBOOL,{&bPartDec},
224 "Use particle decompostion" },
225 { "-dd", FALSE, etRVEC,{&realddxyz},
226 "Domain decomposition grid, 0 is optimize" },
227 { "-ddorder", FALSE, etENUM, {ddno_opt},
229 { "-npme", FALSE, etINT, {&npme},
230 "Number of separate nodes to be used for PME, -1 is guess" },
231 { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot},
232 "Total number of threads to start (0 is guess)" },
233 { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi},
234 "Number of thread-MPI threads to start (0 is guess)" },
235 { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp},
236 "Number of OpenMP threads per MPI process/thread to start (0 is guess)" },
237 { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
238 "Number of OpenMP threads per MPI process/thread to start (0 is -ntomp)" },
239 { "-pin", FALSE, etBOOL, {&hw_opt.bThreadPinning},
240 "Pin OpenMP threads to cores" },
241 { "-pinht", FALSE, etBOOL, {&hw_opt.bPinHyperthreading},
242 "Always pin threads to Hyper-Threading cores" },
243 { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
244 "Core offset for pinning (for running multiple mdrun processes on a single physical node)" },
245 { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_id},
246 "List of GPU id's to use" },
247 { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
248 "Check for all bonded interactions with DD" },
249 { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
250 "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
251 { "-rdd", FALSE, etREAL, {&rdd},
252 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
253 { "-rcon", FALSE, etREAL, {&rconstr},
254 "Maximum distance for P-LINCS (nm), 0 is estimate" },
255 { "-dlb", FALSE, etENUM, {dddlb_opt},
256 "Dynamic load balancing (with DD)" },
257 { "-dds", FALSE, etREAL, {&dlb_scale},
258 "Minimum allowed dlb scaling of the DD cell size" },
259 { "-ddcsx", FALSE, etSTR, {&ddcsx},
260 "HIDDENThe DD cell sizes in x" },
261 { "-ddcsy", FALSE, etSTR, {&ddcsy},
262 "HIDDENThe DD cell sizes in y" },
263 { "-ddcsz", FALSE, etSTR, {&ddcsz},
264 "HIDDENThe DD cell sizes in z" },
265 { "-gcom", FALSE, etINT,{&nstglobalcomm},
266 "Global communication frequency" },
267 { "-nb", FALSE, etENUM, {&nbpu_opt},
268 "Calculate non-bonded interactions on" },
269 { "-tunepme", FALSE, etBOOL, {&bTunePME},
270 "Optimize PME load between PP/PME nodes or GPU/CPU" },
271 { "-testverlet", FALSE, etBOOL, {&bTestVerlet},
272 "Test the Verlet non-bonded scheme" },
273 { "-v", FALSE, etBOOL,{&bVerbose},
274 "Be loud and noisy" },
275 { "-compact", FALSE, etBOOL,{&bCompact},
276 "Write a compact log file" },
277 { "-seppot", FALSE, etBOOL, {&bSepPot},
278 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
279 { "-pforce", FALSE, etREAL, {&pforce},
280 "Print all forces larger than this (kJ/mol nm)" },
281 { "-reprod", FALSE, etBOOL,{&bReproducible},
282 "Try to avoid optimizations that affect binary reproducibility" },
283 { "-cpt", FALSE, etREAL, {&cpt_period},
284 "Checkpoint interval (minutes)" },
285 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
286 "Keep and number checkpoint files" },
287 { "-append", FALSE, etBOOL, {&bAppendFiles},
288 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
289 { "-nsteps", FALSE, etINT, {&nsteps},
290 "Run this number of steps, overrides .mdp file option" },
291 { "-maxh", FALSE, etREAL, {&max_hours},
292 "Terminate after 0.99 times this time (hours)" },
293 { "-multi", FALSE, etINT,{&nmultisim},
294 "Do multiple simulations in parallel" },
295 { "-replex", FALSE, etINT, {&repl_ex_nst},
296 "Attempt replica exchange periodically with this period (steps)" },
297 { "-nex", FALSE, etINT, {&repl_ex_nex},
298 "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." },
299 { "-reseed", FALSE, etINT, {&repl_ex_seed},
300 "Seed for replica exchange, -1 is generate a seed" },
301 { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
302 "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
303 { "-ionize", FALSE, etBOOL,{&bIonize},
304 "Do a simulation including the effect of an X-Ray bombardment on your system" },
305 { "-confout", FALSE, etBOOL, {&bConfout},
306 "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
307 { "-stepout", FALSE, etINT, {&nstepout},
308 "HIDDENFrequency of writing the remaining runtime" },
309 { "-resetstep", FALSE, etINT, {&resetstep},
310 "HIDDENReset cycle counters after these many time steps" },
311 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
312 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" },
313 { "-device", FALSE, etSTR, {&deviceOptions},
314 "Device option string" }
317 unsigned long Flags, PCA_Flags;
322 int sim_part,sim_part_fn;
323 const char *part_suffix=".part";
326 char **multidir=NULL;
329 cr = init_par(&argc,&argv);
332 CopyRight(stderr, argv[0]);
334 PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET));
336 /* Comment this in to do fexist calls only on master
337 * works not with rerun or tables at the moment
338 * also comment out the version of init_forcerec in md.c
339 * with NULL instead of opt2fn
344 PCA_Flags |= PCA_NOT_READ_NODE;
348 parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
349 asize(desc),desc,0,NULL, &oenv);
353 /* we set these early because they might be used in init_multisystem()
354 Note that there is the potential for npme>nnodes until the number of
355 threads is set later on, if there's thread parallelization. That shouldn't
357 dd_node_order = nenum(ddno_opt);
358 cr->npmenodes = npme;
360 /* now check the -multi and -multidir option */
361 if (opt2bSet("-multidir", NFILE, fnm))
366 gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
368 nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
372 if (repl_ex_nst != 0 && nmultisim < 2)
373 gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
376 gmx_fatal(FARGS,"Replica exchange number of exchanges needs to be positive");
379 #ifndef GMX_THREAD_MPI
380 gmx_bool bParFn = (multidir == NULL);
381 init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
383 gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
387 bAddPart = !bAppendFiles;
389 /* Check if there is ANY checkpoint file available */
391 sim_part_fn = sim_part;
392 if (opt2bSet("-cpi",NFILE,fnm))
394 if (bSepPot && bAppendFiles)
396 gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
400 read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
402 &sim_part_fn,NULL,cr,
403 bAppendFiles,NFILE,fnm,
404 part_suffix,&bAddPart);
405 if (sim_part_fn==0 && MASTER(cr))
407 fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
411 sim_part = sim_part_fn + 1;
414 if (MULTISIM(cr) && MASTER(cr))
416 check_multi_int(stdout,cr->ms,sim_part,"simulation part");
421 bAppendFiles = FALSE;
426 sim_part_fn = sim_part;
431 /* Rename all output files (except checkpoint files) */
432 /* create new part name first (zero-filled) */
433 sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
435 add_suffix_to_output_names(fnm,NFILE,suffix);
438 fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
442 Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
443 Flags = Flags | (bSepPot ? MD_SEPPOT : 0);
444 Flags = Flags | (bIonize ? MD_IONIZE : 0);
445 Flags = Flags | (bPartDec ? MD_PARTDEC : 0);
446 Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0);
447 Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0);
448 Flags = Flags | (bTunePME ? MD_TUNEPME : 0);
449 Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0);
450 Flags = Flags | (bConfout ? MD_CONFOUT : 0);
451 Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0);
452 Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
453 Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0);
454 Flags = Flags | (opt2parg_bSet("-append", asize(pa),pa) ? MD_APPENDFILESSET : 0);
455 Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0);
456 Flags = Flags | (sim_part>1 ? MD_STARTFROMCPT : 0);
457 Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
460 /* We postpone opening the log file if we are appending, so we can
461 first truncate the old log file and append to the correct position
463 if ((MASTER(cr) || bSepPot) && !bAppendFiles)
465 gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,
466 !bSepPot,Flags & MD_APPENDFILES,&fplog);
467 CopyRight(fplog,argv[0]);
468 please_cite(fplog,"Hess2008b");
469 please_cite(fplog,"Spoel2005a");
470 please_cite(fplog,"Lindahl2001a");
471 please_cite(fplog,"Berendsen95a");
473 else if (!MASTER(cr) && bSepPot)
475 gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
482 ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
483 ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
484 ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
486 rc = mdrunner(&hw_opt, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
487 nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
488 dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
490 nsteps,nstepout,resetstep,
491 nmultisim,repl_ex_nst,repl_ex_nex,repl_ex_seed,
492 pforce, cpt_period,max_hours,deviceOptions,Flags);
496 if (MULTIMASTER(cr)) {
500 /* Log file has to be closed in mdrunner if we are appending to it
501 (fplog not set here) */
502 if (MASTER(cr) && !bAppendFiles)
504 gmx_log_close(fplog);