2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "checkpoint.h"
55 #include "thread_mpi.h"
61 int cmain(int argc,char *argv[])
63 const char *desc[] = {
64 "This is an experimental release of GROMACS for accelerated",
65 "Molecular Dynamics simulations on GPU processors. Support is provided",
66 "by the OpenMM library (https://simtk.org/home/openmm).[PAR]",
68 "This release is targeted at developers and advanced users and",
69 "care should be taken before production use. The following should be",
70 "noted before using the program:[PAR]",
71 " * The current release runs only on modern nVidia GPU hardware with CUDA support.",
72 "Make sure that the necessary CUDA drivers and libraries for your operating system",
73 "are already installed. The CUDA SDK also should be installed in order to compile",
74 "the program from source (http://www.nvidia.com/object/cuda_home.html).[PAR]",
75 " * Multiple GPU cards are not supported.[PAR]",
76 " * Only a small subset of the GROMACS features and options are supported on the GPUs.",
77 "See below for a detailed list.[PAR]",
78 " * Consumer level GPU cards are known to often have problems with faulty memory.",
79 "It is recommended that a full memory check of the cards is done at least once",
80 "(for example, using the memtest=full option).",
81 "A partial memory check (for example, memtest=15) before and",
82 "after the simulation run would help spot",
83 "problems resulting from processor overheating.[PAR]",
84 " * The maximum size of the simulated systems depends on the available",
85 "GPU memory,for example, a GTX280 with 1GB memory has been tested with systems",
86 "of up to about 100,000 atoms.[PAR]",
87 " * In order to take a full advantage of the GPU platform features, many algorithms",
88 "have been implemented in a very different way than they are on the CPUs.",
89 "Therefore numercal correspondence between properties of the state of",
90 "simulated systems should not be expected. Moreover, the values will likely vary",
91 "when simulations are done on different GPU hardware.[PAR]",
92 " * Frequent retrieval of system state information such as",
93 "trajectory coordinates and energies can greatly influence the performance",
94 "of the program due to slow CPU<->GPU memory transfer speed.[PAR]",
95 " * MD algorithms are complex, and although the Gromacs code is highly tuned for them,",
96 "they often do not translate very well onto the streaming architetures.",
97 "Realistic expectations about the achievable speed-up from test with GTX280:",
98 "For small protein systems in implicit solvent using all-vs-all kernels the acceleration",
99 "can be as high as 20 times, but in most other setups involving cutoffs and PME the",
100 "acceleration is usually only ~4 times relative to a 3GHz CPU.[PAR]",
101 "Supported features:[PAR]",
102 " * Integrators: md/md-vv/md-vv-avek, sd/sd1 and bd.\n",
103 " * Long-range interactions (option coulombtype): Reaction-Field, Ewald, PME, and cut-off (for Implicit Solvent only)\n",
104 " * Temperature control: Supported only with the md/md-vv/md-vv-avek, sd/sd1 and bd integrators.\n",
105 " * Pressure control: Supported.\n",
106 " * Implicit solvent: Supported.\n",
107 "A detailed description can be found on the GROMACS website:\n",
108 "http://www.gromacs.org/gpu[PAR]",
109 /* From the original mdrun documentaion */
110 "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
111 "and distributes the topology over nodes if needed.",
112 "[TT]mdrun[tt] produces at least four output files.",
113 "A single log file ([TT]-g[tt]) is written, unless the option",
114 "[TT]-seppot[tt] is used, in which case each node writes a log file.",
115 "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
116 "optionally forces.",
117 "The structure file ([TT]-c[tt]) contains the coordinates and",
118 "velocities of the last step.",
119 "The energy file ([TT]-e[tt]) contains energies, the temperature,",
120 "pressure, etc, a lot of these things are also printed in the log file.",
121 "Optionally coordinates can be written to a compressed trajectory file",
122 "([TT]-x[tt]).[PAR]",
123 /* openmm specific information */
124 "Usage with OpenMM:[BR]",
125 "[TT]mdrun -device \"OpenMM:platform=Cuda,memtest=15,deviceid=0,force-device=no\"[tt][PAR]",
127 " [TT]platform[tt] = Cuda\t\t:\tThe only available value. OpenCL support will be available in future.\n",
128 " [TT]memtest[tt] = 15\t\t:\tRun a partial, random GPU memory test for the given amount of seconds. A full test",
129 "(recommended!) can be run with \"memtest=full\". Memory testing can be disabled with \"memtest=off\".\n",
130 " [TT]deviceid[tt] = 0\t\t:\tSpecify the target device when multiple cards are present.",
131 "Only one card can be used at any given time though.\n",
132 " [TT]force-device[tt] = no\t\t:\tIf set to \"yes\" [TT]mdrun[tt] will be forced to execute on",
133 "hardware that is not officially supported. GPU acceleration can also be achieved on older",
134 "but Cuda capable cards, although the simulation might be too slow, and the memory limits too strict.",
138 { efTPX, NULL, NULL, ffREAD },
139 { efTRN, "-o", NULL, ffWRITE },
140 { efXTC, "-x", NULL, ffOPTWR },
141 { efCPT, "-cpi", NULL, ffOPTRD },
142 { efCPT, "-cpo", NULL, ffOPTWR },
143 { efSTO, "-c", "confout", ffWRITE },
144 { efEDR, "-e", "ener", ffWRITE },
145 { efLOG, "-g", "md", ffWRITE },
146 { efXVG, "-dhdl", "dhdl", ffOPTWR },
147 { efXVG, "-field", "field", ffOPTWR },
148 { efXVG, "-table", "table", ffOPTRD },
149 { efXVG, "-tabletf", "tabletf", ffOPTRD },
150 { efXVG, "-tablep", "tablep", ffOPTRD },
151 { efXVG, "-tableb", "table", ffOPTRD },
152 { efTRX, "-rerun", "rerun", ffOPTRD },
153 { efXVG, "-tpi", "tpi", ffOPTWR },
154 { efXVG, "-tpid", "tpidist", ffOPTWR },
155 { efEDI, "-ei", "sam", ffOPTRD },
156 { efXVG, "-eo", "sam", ffOPTWR },
157 { efGCT, "-j", "wham", ffOPTRD },
158 { efGCT, "-jo", "bam", ffOPTWR },
159 { efXVG, "-ffout", "gct", ffOPTWR },
160 { efXVG, "-devout", "deviatie", ffOPTWR },
161 { efXVG, "-runav", "runaver", ffOPTWR },
162 { efXVG, "-px", "pullx", ffOPTWR },
163 { efXVG, "-pf", "pullf", ffOPTWR },
164 { efXVG, "-ro", "rotation", ffOPTWR },
165 { efLOG, "-ra", "rotangles",ffOPTWR },
166 { efLOG, "-rs", "rotslabs", ffOPTWR },
167 { efLOG, "-rt", "rottorque",ffOPTWR },
168 { efMTX, "-mtx", "nm", ffOPTWR },
169 { efNDX, "-dn", "dipole", ffOPTWR },
170 { efRND, "-multidir",NULL, ffOPTRDMULT},
171 { efDAT, "-membed", "membed", ffOPTRD },
172 { efTOP, "-mp", "membed", ffOPTRD },
173 { efNDX, "-mn", "membed", ffOPTRD }
175 #define NFILE asize(fnm)
177 /* Command line options ! */
178 gmx_bool bCart = FALSE;
179 gmx_bool bPPPME = FALSE;
180 gmx_bool bPartDec = FALSE;
181 gmx_bool bDDBondCheck = TRUE;
182 gmx_bool bDDBondComm = TRUE;
183 gmx_bool bTunePME = TRUE;
184 gmx_bool bTestVerlet = FALSE;
185 gmx_bool bVerbose = FALSE;
186 gmx_bool bCompact = TRUE;
187 gmx_bool bSepPot = FALSE;
188 gmx_bool bRerunVSite = FALSE;
189 gmx_bool bIonize = FALSE;
190 gmx_bool bConfout = TRUE;
191 gmx_bool bReproducible = FALSE;
195 int nstglobalcomm=-1;
201 int nsteps=-2; /* the value -2 means that the mdp option will be used */
203 rvec realddxyz={0,0,0};
204 const char *ddno_opt[ddnoNR+1] =
205 { NULL, "interleave", "pp_pme", "cartesian", NULL };
206 const char *dddlb_opt[] =
207 { NULL, "auto", "no", "yes", NULL };
208 const char *thread_aff_opt[threadaffNR+1] =
209 { NULL, "auto", "no", "yes", NULL };
210 const char *nbpu_opt[] =
211 { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
212 real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
213 char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
214 real cpt_period=15.0,max_hours=-1;
215 gmx_bool bAppendFiles=TRUE;
216 gmx_bool bKeepAndNumCPT=FALSE;
217 gmx_bool bResetCountersHalfWay=FALSE;
218 output_env_t oenv=NULL;
219 const char *deviceOptions = "";
221 gmx_hw_opt_t hw_opt={0,0,0,0,TRUE,FALSE,0,NULL};
225 { "-pd", FALSE, etBOOL,{&bPartDec},
226 "Use particle decompostion" },
227 { "-dd", FALSE, etRVEC,{&realddxyz},
228 "Domain decomposition grid, 0 is optimize" },
229 { "-ddorder", FALSE, etENUM, {ddno_opt},
231 { "-npme", FALSE, etINT, {&npme},
232 "Number of separate nodes to be used for PME, -1 is guess" },
233 { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot},
234 "Total number of threads to start (0 is guess)" },
235 { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi},
236 "Number of thread-MPI threads to start (0 is guess)" },
237 { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp},
238 "Number of OpenMP threads per MPI process/thread to start (0 is guess)" },
239 { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
240 "Number of OpenMP threads per MPI process/thread to start (0 is -ntomp)" },
241 { "-pin", FALSE, etBOOL, {thread_aff_opt},
242 "Pin OpenMP threads to cores" },
243 { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
244 "Core offset for pinning (for running multiple mdrun processes on a single physical node)" },
245 { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
246 "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
247 { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_id},
248 "List of GPU id's to use" },
249 { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
250 "Check for all bonded interactions with DD" },
251 { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
252 "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
253 { "-rdd", FALSE, etREAL, {&rdd},
254 "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
255 { "-rcon", FALSE, etREAL, {&rconstr},
256 "Maximum distance for P-LINCS (nm), 0 is estimate" },
257 { "-dlb", FALSE, etENUM, {dddlb_opt},
258 "Dynamic load balancing (with DD)" },
259 { "-dds", FALSE, etREAL, {&dlb_scale},
260 "Minimum allowed dlb scaling of the DD cell size" },
261 { "-ddcsx", FALSE, etSTR, {&ddcsx},
262 "HIDDENThe DD cell sizes in x" },
263 { "-ddcsy", FALSE, etSTR, {&ddcsy},
264 "HIDDENThe DD cell sizes in y" },
265 { "-ddcsz", FALSE, etSTR, {&ddcsz},
266 "HIDDENThe DD cell sizes in z" },
267 { "-gcom", FALSE, etINT,{&nstglobalcomm},
268 "Global communication frequency" },
269 { "-nb", FALSE, etENUM, {&nbpu_opt},
270 "Calculate non-bonded interactions on" },
271 { "-tunepme", FALSE, etBOOL, {&bTunePME},
272 "Optimize PME load between PP/PME nodes or GPU/CPU" },
273 { "-testverlet", FALSE, etBOOL, {&bTestVerlet},
274 "Test the Verlet non-bonded scheme" },
275 { "-v", FALSE, etBOOL,{&bVerbose},
276 "Be loud and noisy" },
277 { "-compact", FALSE, etBOOL,{&bCompact},
278 "Write a compact log file" },
279 { "-seppot", FALSE, etBOOL, {&bSepPot},
280 "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
281 { "-pforce", FALSE, etREAL, {&pforce},
282 "Print all forces larger than this (kJ/mol nm)" },
283 { "-reprod", FALSE, etBOOL,{&bReproducible},
284 "Try to avoid optimizations that affect binary reproducibility" },
285 { "-cpt", FALSE, etREAL, {&cpt_period},
286 "Checkpoint interval (minutes)" },
287 { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
288 "Keep and number checkpoint files" },
289 { "-append", FALSE, etBOOL, {&bAppendFiles},
290 "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
291 { "-nsteps", FALSE, etINT, {&nsteps},
292 "Run this number of steps, overrides .mdp file option" },
293 { "-maxh", FALSE, etREAL, {&max_hours},
294 "Terminate after 0.99 times this time (hours)" },
295 { "-multi", FALSE, etINT,{&nmultisim},
296 "Do multiple simulations in parallel" },
297 { "-replex", FALSE, etINT, {&repl_ex_nst},
298 "Attempt replica exchange periodically with this period (steps)" },
299 { "-nex", FALSE, etINT, {&repl_ex_nex},
300 "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." },
301 { "-reseed", FALSE, etINT, {&repl_ex_seed},
302 "Seed for replica exchange, -1 is generate a seed" },
303 { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
304 "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
305 { "-ionize", FALSE, etBOOL,{&bIonize},
306 "Do a simulation including the effect of an X-Ray bombardment on your system" },
307 { "-confout", FALSE, etBOOL, {&bConfout},
308 "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
309 { "-stepout", FALSE, etINT, {&nstepout},
310 "HIDDENFrequency of writing the remaining runtime" },
311 { "-resetstep", FALSE, etINT, {&resetstep},
312 "HIDDENReset cycle counters after these many time steps" },
313 { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
314 "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" },
315 { "-device", FALSE, etSTR, {&deviceOptions},
316 "Device option string" }
319 unsigned long Flags, PCA_Flags;
324 int sim_part,sim_part_fn;
325 const char *part_suffix=".part";
328 char **multidir=NULL;
331 cr = init_par(&argc,&argv);
334 CopyRight(stderr, argv[0]);
336 PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET));
338 /* Comment this in to do fexist calls only on master
339 * works not with rerun or tables at the moment
340 * also comment out the version of init_forcerec in md.c
341 * with NULL instead of opt2fn
346 PCA_Flags |= PCA_NOT_READ_NODE;
350 parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
351 asize(desc),desc,0,NULL, &oenv);
355 /* we set these early because they might be used in init_multisystem()
356 Note that there is the potential for npme>nnodes until the number of
357 threads is set later on, if there's thread parallelization. That shouldn't
359 dd_node_order = nenum(ddno_opt);
360 cr->npmenodes = npme;
362 /* now check the -multi and -multidir option */
363 if (opt2bSet("-multidir", NFILE, fnm))
368 gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
370 nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
374 if (repl_ex_nst != 0 && nmultisim < 2)
375 gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
378 gmx_fatal(FARGS,"Replica exchange number of exchanges needs to be positive");
381 #ifndef GMX_THREAD_MPI
382 gmx_bool bParFn = (multidir == NULL);
383 init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
385 gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
389 bAddPart = !bAppendFiles;
391 /* Check if there is ANY checkpoint file available */
393 sim_part_fn = sim_part;
394 if (opt2bSet("-cpi",NFILE,fnm))
396 if (bSepPot && bAppendFiles)
398 gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
402 read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
404 &sim_part_fn,NULL,cr,
405 bAppendFiles,NFILE,fnm,
406 part_suffix,&bAddPart);
407 if (sim_part_fn==0 && MASTER(cr))
409 fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
413 sim_part = sim_part_fn + 1;
416 if (MULTISIM(cr) && MASTER(cr))
418 check_multi_int(stdout,cr->ms,sim_part,"simulation part", TRUE);
423 bAppendFiles = FALSE;
428 sim_part_fn = sim_part;
433 /* Rename all output files (except checkpoint files) */
434 /* create new part name first (zero-filled) */
435 sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
437 add_suffix_to_output_names(fnm,NFILE,suffix);
440 fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
444 Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
445 Flags = Flags | (bSepPot ? MD_SEPPOT : 0);
446 Flags = Flags | (bIonize ? MD_IONIZE : 0);
447 Flags = Flags | (bPartDec ? MD_PARTDEC : 0);
448 Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0);
449 Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0);
450 Flags = Flags | (bTunePME ? MD_TUNEPME : 0);
451 Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0);
452 Flags = Flags | (bConfout ? MD_CONFOUT : 0);
453 Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0);
454 Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
455 Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0);
456 Flags = Flags | (opt2parg_bSet("-append", asize(pa),pa) ? MD_APPENDFILESSET : 0);
457 Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0);
458 Flags = Flags | (sim_part>1 ? MD_STARTFROMCPT : 0);
459 Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
462 /* We postpone opening the log file if we are appending, so we can
463 first truncate the old log file and append to the correct position
465 if ((MASTER(cr) || bSepPot) && !bAppendFiles)
467 gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,
468 !bSepPot,Flags & MD_APPENDFILES,&fplog);
469 CopyRight(fplog,argv[0]);
470 please_cite(fplog,"Hess2008b");
471 please_cite(fplog,"Spoel2005a");
472 please_cite(fplog,"Lindahl2001a");
473 please_cite(fplog,"Berendsen95a");
475 else if (!MASTER(cr) && bSepPot)
477 gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
484 ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
485 ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
486 ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
488 rc = mdrunner(&hw_opt, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
489 nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
490 dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
492 nsteps,nstepout,resetstep,
493 nmultisim,repl_ex_nst,repl_ex_nex,repl_ex_seed,
494 pforce, cpt_period,max_hours,deviceOptions,Flags);
498 if (MULTIMASTER(cr)) {
502 /* Log file has to be closed in mdrunner if we are appending to it
503 (fplog not set here) */
504 if (MASTER(cr) && !bAppendFiles)
506 gmx_log_close(fplog);