3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * GROningen Mixture of Alchemy and Childrens' Stories
45 #include "gmx_fatal.h"
55 #include "thread_mpi.h"
58 /* The source code in this file should be thread-safe.
59 Please keep it that way. */
66 #if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
73 /* this is not strictly thread-safe, but it's only written to at the beginning
74 of the simulation, once by each thread with the same value. We assume
75 that writing to an int is atomic.*/
76 static bool parallel_env_val;
78 tMPI_Thread_mutex_t parallel_env_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
81 bool gmx_parallel_env(void)
85 tMPI_Thread_mutex_lock(¶llel_env_mutex);
89 tMPI_Thread_mutex_unlock(¶llel_env_mutex);
94 static void set_parallel_env(bool val)
97 tMPI_Thread_mutex_lock(¶llel_env_mutex);
99 if (!parallel_env_val)
101 /* we only allow it to be set, not unset */
102 parallel_env_val=val;
105 tMPI_Thread_mutex_unlock(¶llel_env_mutex);
111 static void par_fn(char *base,int ftp,const t_commrec *cr,
113 char buf[],int bufsize)
117 if((size_t)bufsize<(strlen(base)+4))
118 gmx_mem("Character buffer too small!");
120 /* Copy to buf, and strip extension */
122 buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
127 if (MULTISIM(cr) && !bUnderScore) {
128 sprintf(buf+strlen(buf),"%d",cr->ms->sim);
129 } else if (PAR(cr)) {
130 sprintf(buf+strlen(buf),"%d",cr->nodeid);
134 /* Add extension again */
135 strcat(buf,(ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
138 void check_multi_int(FILE *log,const gmx_multisim_t *ms,int val,
144 fprintf(log,"Multi-checking %s ... ",name);
148 "check_multi_int called with a NULL communication pointer");
152 gmx_sumi_sim(ms->nsim,ibuf,ms);
155 for(p=1; p<ms->nsim; p++)
156 bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
161 fprintf(log,"\n%s is not equal for all subsystems\n",name);
162 for(p=0; p<ms->nsim; p++)
163 fprintf(log," subsystem %d: %d\n",p,ibuf[p]);
164 gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
170 FILE *gmx_log_open(const char *lognm,const t_commrec *cr,bool bMasterOnly,
174 char buf[256],host[256];
179 bool bAppend = Flags & MD_APPENDFILES;
183 /* Communicate the filename for logfile */
184 if (cr->nnodes > 1 && !bMasterOnly) {
186 len = strlen(lognm)+1;
187 gmx_bcast(sizeof(len),&len,cr);
192 gmx_bcast(len*sizeof(*tmpnm),tmpnm,cr);
201 if (PAR(cr) && !bMasterOnly) {
202 /* Since log always ends with '.log' let's use this info */
203 par_fn(tmpnm,efLOG,cr,cr->ms!=NULL,buf,255);
204 fp = gmx_fio_fopen(buf, bAppend ? "a" : "w" );
206 fp = gmx_fio_fopen(tmpnm, bAppend ? "a" : "w" );
211 gmx_fatal_set_log_file(fp);
213 /* Get some machine parameters */
215 if( gethostname(host,255) != 0)
216 sprintf(host,"unknown");
218 sprintf(host,"unknown");
224 # if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
237 "-----------------------------------------------------------\n"
238 "Restarting from checkpoint, appending to previous log file.\n\n"
243 "Log file opened on %s"
244 "Host: %s pid: %d nodeid: %d nnodes: %d\n",
245 ctime(&t),host,pid,cr->nodeid,cr->nnodes);
247 #if (defined BUILD_MACHINE && defined BUILD_TIME && defined BUILD_USER)
249 "The Gromacs distribution was built %s by\n"
250 "%s (%s)\n\n\n",BUILD_TIME,BUILD_USER,BUILD_MACHINE);
259 void gmx_log_close(FILE *fp)
262 gmx_fatal_set_log_file(NULL);
267 static void comm_args(const t_commrec *cr,int *argc,char ***argv)
272 gmx_bcast(sizeof(*argc),argc,cr);
276 fprintf(stderr,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
277 for(i=0; (i<*argc); i++) {
279 len = strlen((*argv)[i])+1;
280 gmx_bcast(sizeof(len),&len,cr);
282 snew((*argv)[i],len);
283 /*gmx_bcast(len*sizeof((*argv)[i][0]),(*argv)[i],cr);*/
284 gmx_bcast(len*sizeof(char),(*argv)[i],cr);
289 void init_multisystem(t_commrec *cr,int nsim, int nfile,
290 const t_filenm fnm[],bool bParFn)
293 int nnodes,nnodpersim,sim,i,ftp;
296 MPI_Group mpi_group_world;
301 if (nnodes % nsim != 0)
302 gmx_fatal(FARGS,"The number of nodes (%d) is not a multiple of the number of simulations (%d)",nnodes,nsim);
304 nnodpersim = nnodes/nsim;
305 sim = cr->nodeid/nnodpersim;
308 fprintf(debug,"We have %d simulations, %d nodes per simulation, local simulation is %d\n",nsim,nnodpersim,sim);
315 /* Create a communicator for the master nodes */
317 for(i=0; i<ms->nsim; i++)
318 rank[i] = i*nnodpersim;
319 MPI_Comm_group(MPI_COMM_WORLD,&mpi_group_world);
320 MPI_Group_incl(mpi_group_world,nsim,rank,&ms->mpi_group_masters);
322 MPI_Comm_create(MPI_COMM_WORLD,ms->mpi_group_masters,
323 &ms->mpi_comm_masters);
326 /* Reduce the intra-simulation communication */
327 cr->sim_nodeid = cr->nodeid % nnodpersim;
328 cr->nnodes = nnodpersim;
330 MPI_Comm_split(MPI_COMM_WORLD,sim,cr->sim_nodeid,&cr->mpi_comm_mysim);
331 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
332 cr->nodeid = cr->sim_nodeid;
336 fprintf(debug,"This is simulation %d",cr->ms->sim);
338 fprintf(debug,", local number of nodes %d, local nodeid %d",
339 cr->nnodes,cr->sim_nodeid);
340 fprintf(debug,"\n\n");
344 /* Patch output and tpx file names (except log which has been done already)
346 for(i=0; (i<nfile); i++) {
347 /* Because of possible multiple extensions per type we must look
348 * at the actual file name
350 if (is_output(&fnm[i]) ||
351 fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
352 strcmp(fnm[i].opt,"-rerun") == 0) {
353 ftp = fn2ftp(fnm[i].fns[0]);
354 par_fn(fnm[i].fns[0],ftp,cr,FALSE,buf,255);
355 sfree(fnm[i].fns[0]);
356 fnm[i].fns[0] = strdup(buf);
362 t_commrec *init_par(int *argc,char ***argv_ptr)
376 if (tMPI_Get_N(argc, argv_ptr)>1)
380 #endif /* GMX_THREADS */
384 #ifdef GMX_CHECK_MPI_ENV
385 /* Do not use MPI calls when env.var. GMX_CHECK_MPI_ENV is not set */
386 if (getenv(GMX_CHECK_MPI_ENV) == NULL)
388 #endif /* GMX_CHECK_MPI_ENV */
389 #endif /* GMX_LIB_MPI */
390 set_parallel_env(pe);
392 cr->sim_nodeid = gmx_setup(argc,argv,&cr->nnodes);
399 set_parallel_env(pe);
404 if (!PAR(cr) && (cr->sim_nodeid != 0))
405 gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
409 cr->mpi_comm_mysim = MPI_COMM_WORLD;
410 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
413 cr->nodeid = cr->sim_nodeid;
415 cr->duty = (DUTY_PP | DUTY_PME);
417 /* Communicate arguments if parallel */
420 comm_args(cr,argc,argv_ptr);
421 #endif /* GMX_THREADS */
426 t_commrec *init_par_threads(t_commrec *cro)
433 MPI_Initialized(&initialized);
435 gmx_comm("Initializing threads without comm");
436 set_parallel_env(TRUE);
437 /* once threads will be getting along with MPI, we'll
438 fill the cr structure with more sensible data here */
439 cr->sim_nodeid = gmx_setup(0,NULL, &cr->nnodes);
441 cr->mpi_comm_mysim = MPI_COMM_WORLD;
442 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
443 cr->nodeid = cr->sim_nodeid;
444 cr->duty = (DUTY_PP | DUTY_PME);
452 t_commrec *init_cr_nopar(void)
463 cr->duty = (DUTY_PP | DUTY_PME);