/*! Read the OMP_NUM_THREADS env. var. and check against the value set on the command line. */
GMX_LIBGMX_EXPORT
-void gmx_omp_nthreads_read_env(int *nthreads_omp);
+void gmx_omp_nthreads_read_env(int *nthreads_omp,
+ gmx_bool bIsSimMaster);
#endif /* GMX_OMP_NTHREADS */
GMX_LIBGMX_EXPORT
void check_multi_int(FILE *log,const gmx_multisim_t *ms,
- int val,const char *name);
+ int val,const char *name,
+ gmx_bool bQuiet);
GMX_LIBGMX_EXPORT
void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
- gmx_large_int_t val,const char *name);
+ gmx_large_int_t val,const char *name,
+ gmx_bool bQuiet);
/* Check if val is the same on all processors for a mdrun -multi run
* The string name is used to print to the log file and in a fatal error
- * if the val's don't match.
+ * if the val's don't match. If bQuiet is true and the check passes,
+ * no output is written.
*/
+
GMX_LIBGMX_EXPORT
void init_multisystem(t_commrec *cr, int nsim, char **multidirs,
int nfile, const t_filenm fnm[], gmx_bool bParFn);
fprintf(fplog,"Found GMX_DISRE_ENSEMBLE_SIZE set to %d systems per ensemble\n",dd->nsystems);
}
check_multi_int(fplog,cr->ms,dd->nsystems,
- "the number of systems per ensemble");
+ "the number of systems per ensemble",
+ FALSE);
/* We use to allow any value of nsystems which was a divisor
* of ms->nsim. But this required an extra communicator which
* was stored in t_fcdata. This pulled in mpi.h in nearly all C files.
if (cr && cr->ms)
{
check_multi_int(fplog,cr->ms,fcd->disres.nres,
- "the number of distance restraints");
+ "the number of distance restraints",
+ FALSE);
}
please_cite(fplog,"Tropp80a");
please_cite(fplog,"Torda89a");
return modth.nth[m] = nth;
}
-void gmx_omp_nthreads_read_env(int *nthreads_omp)
+void gmx_omp_nthreads_read_env(int *nthreads_omp,
+ gmx_bool bIsSimMaster)
{
char *env;
+ gmx_bool bCommandLineSetNthreadsOMP = *nthreads_omp > 0;
+ char buffer[STRLEN];
assert(nthreads_omp);
gmx_fatal(FARGS,"OMP_NUM_THREADS is invalid: '%s'",env);
}
- if (*nthreads_omp > 0 && nt_omp != *nthreads_omp)
+ if (bCommandLineSetNthreadsOMP && nt_omp != *nthreads_omp)
{
- gmx_fatal(FARGS,"OMP_NUM_THREADS (%d) and the number of threads requested on the command line (%d) have different values",nt_omp,*nthreads_omp);
+ gmx_fatal(FARGS,"Environment variable OMP_NUM_THREADS (%d) and the number of threads requested on the command line (%d) have different values. Either omit one, or set them both to the same value.",nt_omp,*nthreads_omp);
}
- /* Setting the number of OpenMP threads.
- * NOTE: with tMPI this function is only called on the master node,
- * but with MPI on all nodes which means lots of messages on stderr.
- */
- fprintf(stderr,"Getting the number of OpenMP threads from OMP_NUM_THREADS: %d\n",nt_omp);
+ /* Setting the number of OpenMP threads. */
*nthreads_omp = nt_omp;
+
+ /* Output the results */
+ sprintf(buffer,
+ "The number of OpenMP threads was set by environment variable OMP_NUM_THREADS to %d%s\n",
+ nt_omp,
+ bCommandLineSetNthreadsOMP ? " (and the command-line setting agreed with that)" : "");
+ if (bIsSimMaster)
+ {
+ /* This prints once per simulation for multi-simulations,
+ * which might help diagnose issues with inhomogenous
+ * cluster setups. */
+ fputs(buffer, stderr);
+ }
+ if (debug)
+ {
+ /* This prints once per process for real MPI (i.e. once
+ * per debug file), and once per simulation for thread MPI
+ * (because of logic in the calling function). */
+ fputs(buffer, debug);
+ }
}
}
}
void check_multi_int(FILE *log,const gmx_multisim_t *ms,int val,
- const char *name)
+ const char *name,
+ gmx_bool bQuiet)
{
int *ibuf,p;
gmx_bool bCompatible;
- if (NULL != log)
+ if (NULL != log && !bQuiet)
fprintf(log,"Multi-checking %s ... ",name);
if (ms == NULL)
if (bCompatible)
{
- if (NULL != log)
+ if (NULL != log && !bQuiet)
fprintf(log,"OK\n");
}
else
}
void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
- gmx_large_int_t val, const char *name)
+ gmx_large_int_t val, const char *name,
+ gmx_bool bQuiet)
{
gmx_large_int_t *ibuf;
int p;
gmx_bool bCompatible;
- if (NULL != log)
+ if (NULL != log && !bQuiet)
fprintf(log,"Multi-checking %s ... ",name);
if (ms == NULL)
if (bCompatible)
{
- if (NULL != log)
+ if (NULL != log && !bQuiet)
fprintf(log,"OK\n");
}
else
if (!MASTER(cr))
snew(*argv,*argc+1);
- fprintf(stderr,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
+ if (debug)
+ {
+ fprintf(debug,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
+ }
for(i=0; (i<*argc); i++) {
if (MASTER(cr))
len = strlen((*argv)[i])+1;
#endif
#ifdef GMX_LIB_MPI
- fprintf(stderr,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
- mpi_num_nodes,mpi_my_rank,mpi_hostname);
+ if (debug)
+ {
+ fprintf(debug,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
+ mpi_num_nodes,mpi_my_rank,mpi_hostname);
+ }
#endif
*nnodes=mpi_num_nodes;
fprintf(fplog," the orientation restraints are ensemble averaged over %d systems\n",ms->nsim);
check_multi_int(fplog,ms,od->nr,
- "the number of orientation restraints");
+ "the number of orientation restraints",
+ FALSE);
check_multi_int(fplog,ms,od->nref,
- "the number of fit atoms for orientation restraining");
- check_multi_int(fplog,ms,ir->nsteps,"nsteps");
+ "the number of fit atoms for orientation restraining",
+ FALSE);
+ check_multi_int(fplog,ms,ir->nsteps,"nsteps",FALSE);
/* Copy the reference coordinates from the master to the other nodes */
gmx_sum_sim(DIM*od->nref,od->xref[0],ms);
}
ivec ddxyz;
int dd_node_order;
gmx_bool bAddPart;
- FILE *fplog,*fptest;
+ FILE *fplog,*fpmulti;
int sim_part,sim_part_fn;
const char *part_suffix=".part";
char suffix[STRLEN];
&sim_part_fn,NULL,cr,
bAppendFiles,NFILE,fnm,
part_suffix,&bAddPart);
- if (sim_part_fn==0 && MASTER(cr))
+ if (sim_part_fn==0 && MULTIMASTER(cr))
{
fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
}
if (MULTISIM(cr) && MASTER(cr))
{
- check_multi_int(stdout,cr->ms,sim_part,"simulation part");
+ if (MULTIMASTER(cr))
+ {
+ /* Log file is not yet available, so if there's a
+ * problem we can only write to stderr. */
+ fpmulti = stderr;
+ }
+ else
+ {
+ fpmulti = NULL;
+ }
+ check_multi_int(fpmulti,cr->ms,sim_part,"simulation part",TRUE);
}
}
else
sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
add_suffix_to_output_names(fnm,NFILE,suffix);
- if (MASTER(cr))
+ if (MULTIMASTER(cr))
{
fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
}
fprintf(fplog,"Repl There are %d replicas:\n",re->nrepl);
- check_multi_int(fplog,ms,state->natoms,"the number of atoms");
- check_multi_int(fplog,ms,ir->eI,"the integrator");
- check_multi_large_int(fplog,ms,ir->init_step+ir->nsteps,"init_step+nsteps");
+ check_multi_int(fplog,ms,state->natoms,"the number of atoms",FALSE);
+ check_multi_int(fplog,ms,ir->eI,"the integrator",FALSE);
+ check_multi_large_int(fplog,ms,ir->init_step+ir->nsteps,"init_step+nsteps",FALSE);
check_multi_large_int(fplog,ms,(ir->init_step+nst-1)/nst,
- "first exchange step: init_step/-replex");
- check_multi_int(fplog,ms,ir->etc,"the temperature coupling");
+ "first exchange step: init_step/-replex",FALSE);
+ check_multi_int(fplog,ms,ir->etc,"the temperature coupling",FALSE);
check_multi_int(fplog,ms,ir->opts.ngtc,
- "the number of temperature coupling groups");
- check_multi_int(fplog,ms,ir->epc,"the pressure coupling");
- check_multi_int(fplog,ms,ir->efep,"free energy");
- check_multi_int(fplog,ms,ir->fepvals->n_lambda,"number of lambda states");
+ "the number of temperature coupling groups",FALSE);
+ check_multi_int(fplog,ms,ir->epc,"the pressure coupling",FALSE);
+ check_multi_int(fplog,ms,ir->efep,"free energy",FALSE);
+ check_multi_int(fplog,ms,ir->fepvals->n_lambda,"number of lambda states",FALSE);
re->temp = ir->opts.ref_t[0];
for(i=1; (i<ir->opts.ngtc); i++)
static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
- int cutoff_scheme)
+ int cutoff_scheme,
+ gmx_bool bIsSimMaster)
{
- gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp);
+ gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);
#ifndef GMX_THREAD_MPI
if (hw_opt->nthreads_tot > 0)
if (SIMMASTER(cr))
#endif
{
- check_and_update_hw_opt(hw_opt,minf.cutoff_scheme);
+ check_and_update_hw_opt(hw_opt,minf.cutoff_scheme,SIMMASTER(cr));
#ifdef GMX_THREAD_MPI
/* Early check for externally set process affinity. Can't do over all
}
}
- if (DDMASTER(dd))
- {
- fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
- dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
- }
if (fplog)
{
fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",