1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
39 #include "gromacs/utility/gmx_header_config.h"
47 #ifdef HAVE_SYS_TIME_H
52 #include "gmx_fatal.h"
63 #include "thread_mpi.h"
66 /* The source code in this file should be thread-safe.
67 Please keep it that way. */
74 #ifdef GMX_NATIVE_WINDOWS
79 /* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
81 gmx_ctime_r(const time_t *clock, char *buf, int n);
87 static void par_fn(char *base, int ftp, const t_commrec *cr,
88 gmx_bool bAppendSimId, gmx_bool bAppendNodeId,
89 char buf[], int bufsize)
93 if ((size_t)bufsize < (strlen(base)+10))
95 gmx_mem("Character buffer too small!");
98 /* Copy to buf, and strip extension */
100 buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
104 sprintf(buf+strlen(buf), "%d", cr->ms->sim);
108 strcat(buf, "_node");
109 sprintf(buf+strlen(buf), "%d", cr->nodeid);
113 /* Add extension again */
114 strcat(buf, (ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
117 fprintf(debug, "node %d par_fn '%s'\n", cr->nodeid, buf);
118 if (fn2ftp(buf) == efLOG)
120 fprintf(debug, "log\n");
125 void check_multi_int(FILE *log, const gmx_multisim_t *ms, int val,
130 gmx_bool bCompatible;
132 if (NULL != log && !bQuiet)
134 fprintf(log, "Multi-checking %s ... ", name);
140 "check_multi_int called with a NULL communication pointer");
143 snew(ibuf, ms->nsim);
145 gmx_sumi_sim(ms->nsim, ibuf, ms);
148 for (p = 1; p < ms->nsim; p++)
150 bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
155 if (NULL != log && !bQuiet)
157 fprintf(log, "OK\n");
164 fprintf(log, "\n%s is not equal for all subsystems\n", name);
165 for (p = 0; p < ms->nsim; p++)
167 fprintf(log, " subsystem %d: %d\n", p, ibuf[p]);
170 gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->nsim);
176 void check_multi_large_int(FILE *log, const gmx_multisim_t *ms,
177 gmx_large_int_t val, const char *name,
180 gmx_large_int_t *ibuf;
182 gmx_bool bCompatible;
184 if (NULL != log && !bQuiet)
186 fprintf(log, "Multi-checking %s ... ", name);
192 "check_multi_int called with a NULL communication pointer");
195 snew(ibuf, ms->nsim);
197 gmx_sumli_sim(ms->nsim, ibuf, ms);
200 for (p = 1; p < ms->nsim; p++)
202 bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
207 if (NULL != log && !bQuiet)
209 fprintf(log, "OK\n");
216 fprintf(log, "\n%s is not equal for all subsystems\n", name);
217 for (p = 0; p < ms->nsim; p++)
220 /* first make the format string */
221 snprintf(strbuf, 255, " subsystem %%d: %s\n",
223 fprintf(log, strbuf, p, ibuf[p]);
226 gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->nsim);
233 char *gmx_gethostname(char *name, size_t len)
237 gmx_incons("gmx_gethostname called with len<8");
240 if (gethostname(name, len-1) != 0)
242 strncpy(name, "unknown", 8);
245 strncpy(name, "unknown", 8);
252 void gmx_log_open(const char *lognm, const t_commrec *cr, gmx_bool bMasterOnly,
253 gmx_bool bAppendFiles, FILE** fplog)
255 int len, testlen, pid;
256 char buf[256], host[256];
258 char timebuf[STRLEN];
264 /* Communicate the filename for logfile */
265 if (cr->nnodes > 1 && !bMasterOnly
266 #ifdef GMX_THREAD_MPI
267 /* With thread MPI the non-master log files are opened later
268 * when the files names are already known on all nodes.
276 len = strlen(lognm) + 1;
278 gmx_bcast(sizeof(len), &len, cr);
285 tmpnm = gmx_strdup(lognm);
287 gmx_bcast(len*sizeof(*tmpnm), tmpnm, cr);
291 tmpnm = gmx_strdup(lognm);
296 if (!bMasterOnly && !MASTER(cr))
298 /* Since log always ends with '.log' let's use this info */
299 par_fn(tmpnm, efLOG, cr, FALSE, !bMasterOnly, buf, 255);
300 fp = gmx_fio_fopen(buf, bAppendFiles ? "a+" : "w+" );
302 else if (!bAppendFiles)
304 fp = gmx_fio_fopen(tmpnm, bAppendFiles ? "a+" : "w+" );
309 gmx_fatal_set_log_file(fp);
311 /* Get some machine parameters */
312 gmx_gethostname(host, 256);
317 # ifdef GMX_NATIVE_WINDOWS
331 "-----------------------------------------------------------\n"
332 "Restarting from checkpoint, appending to previous log file.\n"
337 gmx_ctime_r(&t, timebuf, STRLEN);
340 "Log file opened on %s"
341 "Host: %s pid: %d nodeid: %d nnodes: %d\n",
342 timebuf, host, pid, cr->nodeid, cr->nnodes);
343 gmx_print_version_info(fp);
352 void gmx_log_close(FILE *fp)
356 gmx_fatal_set_log_file(NULL);
361 static void comm_args(const t_commrec *cr, int *argc, char ***argv)
367 gmx_bcast(sizeof(*argc), argc, cr);
372 snew(*argv, *argc+1);
376 fprintf(debug, "NODEID=%d argc=%d\n", cr->nodeid, *argc);
378 for (i = 0; (i < *argc); i++)
382 len = strlen((*argv)[i])+1;
384 gmx_bcast(sizeof(len), &len, cr);
387 snew((*argv)[i], len);
389 /*gmx_bcast(len*sizeof((*argv)[i][0]),(*argv)[i],cr);*/
390 gmx_bcast(len*sizeof(char), (*argv)[i], cr);
395 void init_multisystem(t_commrec *cr, int nsim, char **multidirs,
396 int nfile, const t_filenm fnm[], gmx_bool bParFn)
399 int nnodes, nnodpersim, sim, i, ftp;
402 MPI_Group mpi_group_world;
409 gmx_fatal(FARGS, "This binary is compiled without MPI support, can not do multiple simulations.");
414 if (nnodes % nsim != 0)
416 gmx_fatal(FARGS, "The number of nodes (%d) is not a multiple of the number of simulations (%d)", nnodes, nsim);
419 nnodpersim = nnodes/nsim;
420 sim = cr->nodeid/nnodpersim;
424 fprintf(debug, "We have %d simulations, %d nodes per simulation, local simulation is %d\n", nsim, nnodpersim, sim);
432 /* Create a communicator for the master nodes */
433 snew(rank, ms->nsim);
434 for (i = 0; i < ms->nsim; i++)
436 rank[i] = i*nnodpersim;
438 MPI_Comm_group(MPI_COMM_WORLD, &mpi_group_world);
439 MPI_Group_incl(mpi_group_world, nsim, rank, &ms->mpi_group_masters);
441 MPI_Comm_create(MPI_COMM_WORLD, ms->mpi_group_masters,
442 &ms->mpi_comm_masters);
444 #if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
445 /* initialize the MPI_IN_PLACE replacement buffers */
447 ms->mpb->ibuf = NULL;
448 ms->mpb->libuf = NULL;
449 ms->mpb->fbuf = NULL;
450 ms->mpb->dbuf = NULL;
451 ms->mpb->ibuf_alloc = 0;
452 ms->mpb->libuf_alloc = 0;
453 ms->mpb->fbuf_alloc = 0;
454 ms->mpb->dbuf_alloc = 0;
459 /* Reduce the intra-simulation communication */
460 cr->sim_nodeid = cr->nodeid % nnodpersim;
461 cr->nnodes = nnodpersim;
463 MPI_Comm_split(MPI_COMM_WORLD, sim, cr->sim_nodeid, &cr->mpi_comm_mysim);
464 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
465 cr->nodeid = cr->sim_nodeid;
470 fprintf(debug, "This is simulation %d", cr->ms->sim);
473 fprintf(debug, ", local number of nodes %d, local nodeid %d",
474 cr->nnodes, cr->sim_nodeid);
476 fprintf(debug, "\n\n");
484 fprintf(debug, "Changing to directory %s\n", multidirs[cr->ms->sim]);
486 gmx_chdir(multidirs[cr->ms->sim]);
490 /* Patch output and tpx, cpt and rerun input file names */
491 for (i = 0; (i < nfile); i++)
493 /* Because of possible multiple extensions per type we must look
494 * at the actual file name
496 if (is_output(&fnm[i]) ||
497 fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
498 strcmp(fnm[i].opt, "-rerun") == 0)
500 ftp = fn2ftp(fnm[i].fns[0]);
501 par_fn(fnm[i].fns[0], ftp, cr, TRUE, FALSE, buf, 255);
502 sfree(fnm[i].fns[0]);
503 fnm[i].fns[0] = gmx_strdup(buf);
509 t_commrec *init_par(int gmx_unused *argc, char ***argv_ptr)
518 argv = argv_ptr ? *argv_ptr : NULL;
520 #if defined GMX_MPI && !defined GMX_THREAD_MPI
521 cr->sim_nodeid = gmx_setup(argc, argv, &cr->nnodes);
523 if (!PAR(cr) && (cr->sim_nodeid != 0))
525 gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
528 cr->mpi_comm_mysim = MPI_COMM_WORLD;
529 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
531 /* These should never be accessed */
532 cr->mpi_comm_mysim = NULL;
533 cr->mpi_comm_mygroup = NULL;
538 cr->nodeid = cr->sim_nodeid;
540 cr->duty = (DUTY_PP | DUTY_PME);
542 /* Communicate arguments if parallel */
543 #ifndef GMX_THREAD_MPI
546 comm_args(cr, argc, argv_ptr);
548 #endif /* GMX_THREAD_MPI */
551 #if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
552 /* initialize the MPI_IN_PLACE replacement buffers */
554 cr->mpb->ibuf = NULL;
555 cr->mpb->libuf = NULL;
556 cr->mpb->fbuf = NULL;
557 cr->mpb->dbuf = NULL;
558 cr->mpb->ibuf_alloc = 0;
559 cr->mpb->libuf_alloc = 0;
560 cr->mpb->fbuf_alloc = 0;
561 cr->mpb->dbuf_alloc = 0;
568 t_commrec *init_par_threads(const t_commrec *cro)
570 #ifdef GMX_THREAD_MPI
574 /* make a thread-specific commrec */
576 /* now copy the whole thing, so settings like the number of PME nodes
580 /* and we start setting our own thread-specific values for things */
581 MPI_Initialized(&initialized);
584 gmx_comm("Initializing threads without comm");
586 /* once threads will be used together with MPI, we'll
587 fill the cr structure with distinct data here. This might even work: */
588 cr->sim_nodeid = gmx_setup(0, NULL, &cr->nnodes);
590 cr->mpi_comm_mysim = MPI_COMM_WORLD;
591 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
592 cr->nodeid = cr->sim_nodeid;
593 cr->duty = (DUTY_PP | DUTY_PME);