When coordinating step numbers, multirun now sends large ints.
An MPI datatype for that is now introduced: GMX_MPI_LARGE_INT.
void check_multi_int(FILE *log,const gmx_multisim_t *ms,
int val,const char *name);
+void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
+ gmx_large_int_t val,const char *name);
/* Check if val is the same on all processors for a mdrun -multi run
* The string name is used to print to the log file and in a fatal error
* if the val's don't match.
void gmx_sumi(int nr,int r[],const t_commrec *cr);
/* Calculate the global sum of an array of ints */
+void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr);
+/* Calculate the global sum of an array of large ints */
+
void gmx_sumf(int nr,float r[],const t_commrec *cr);
/* Calculate the global sum of an array of floats */
void gmx_sumi_sim(int nr,int r[],const gmx_multisim_t *ms);
/* Calculate the sum over the simulations of an array of ints */
+void gmx_sumli_sim(int nr,gmx_large_int_t r[],const gmx_multisim_t *ms);
+/* Calculate the sum over the simulations of an array of large ints */
+
void gmx_sumf_sim(int nr,float r[],const gmx_multisim_t *ms);
/* Calculate the sum over the simulations of an array of floats */
#define gmx_large_int_pfmt "%lld"
#define SIZEOF_GMX_LARGE_INT 8
#define GMX_LARGE_INT_MAX 9223372036854775807LL
+#define GMX_MPI_LARGE_INT MPI_LONG_LONG_INT
#elif ( (defined LONG_MAX && LONG_MAX==9223372036854775807L) || (defined SIZEOF_LONG_INT && SIZEOF_LONG_INT==8) )
#define gmx_large_int_pfmt "%ld"
#define SIZEOF_GMX_LARGE_INT 8
#define GMX_LARGE_INT_MAX 9223372036854775807LL
+#define GMX_MPI_LARGE_INT MPI_LONG_INT
#elif ( (defined INT_MAX && INT_MAX==9223372036854775807L) || (defined SIZEOF_INT && SIZEOF_INT==8) )
#define gmx_large_int_pfmt "%d"
#define SIZEOF_GMX_LARGE_INT 8
#define GMX_LARGE_INT_MAX 9223372036854775807LL
+#define GMX_MPI_LARGE_INT MPI_INT
#elif ( (defined INT_MAX && INT_MAX==2147483647) || (defined SIZEOF_INT && SIZEOF_INT==4) )
#define gmx_large_int_pfmt "%d"
#define SIZEOF_GMX_LARGE_INT 4
#define GMX_LARGE_INT_MAX 2147483647
+#define GMX_MPI_LARGE_INT MPI_INT
#else
sfree(ibuf);
}
+void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
+ gmx_large_int_t val, const char *name)
+{
+ gmx_large_int_t *ibuf;
+ int p;
+ gmx_bool bCompatible;
+
+ if (NULL != log)
+ fprintf(log,"Multi-checking %s ... ",name);
+
+ if (ms == NULL)
+ gmx_fatal(FARGS,
+ "check_multi_int called with a NULL communication pointer");
+
+ snew(ibuf,ms->nsim);
+ ibuf[ms->sim] = val;
+ gmx_sumli_sim(ms->nsim,ibuf,ms);
+
+ bCompatible = TRUE;
+ for(p=1; p<ms->nsim; p++)
+ bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
+
+ if (bCompatible)
+ {
+ if (NULL != log)
+ fprintf(log,"OK\n");
+ }
+ else
+ {
+ if (NULL != log)
+ {
+ fprintf(log,"\n%s is not equal for all subsystems\n",name);
+ for(p=0; p<ms->nsim; p++)
+ {
+ char strbuf[255];
+ /* first make the format string */
+ snprintf(strbuf, 255, " subsystem %%d: %s\n",
+ gmx_large_int_pfmt);
+ fprintf(log,strbuf,p,ibuf[p]);
+ }
+ }
+ gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
+ }
+
+ sfree(ibuf);
+}
+
+
void gmx_log_open(const char *lognm,const t_commrec *cr,gmx_bool bMasterOnly,
unsigned long Flags, FILE** fplog)
{
#endif
}
+void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr)
+{
+#ifndef GMX_MPI
+ gmx_call("gmx_sumli");
+#else
+#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ if (cr->nc.bUse) {
+ /* Use two step summing */
+ if (cr->nc.rank_intra == 0)
+ {
+ MPI_Reduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,
+ cr->nc.comm_intra);
+ /* Sum with the buffers reversed */
+ MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ cr->nc.comm_inter);
+ }
+ else
+ {
+ /* This is here because of the silly MPI specification
+ that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ MPI_Reduce(r,NULL,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,cr->nc.comm_intra);
+ }
+ MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
+ }
+ else
+ {
+ MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,cr->mpi_comm_mygroup);
+ }
+#else
+ int i;
+
+ if (nr > cr->mpb->ibuf_alloc) {
+ cr->mpb->ibuf_alloc = nr;
+ srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
+ }
+ if (cr->nc.bUse) {
+ /* Use two step summing */
+ MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ cr->nc.comm_intra);
+ if (cr->nc.rank_intra == 0) {
+ /* Sum with the buffers reversed */
+ MPI_Allreduce(cr->mpb->ibuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ cr->nc.comm_inter);
+ }
+ MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
+ } else {
+ MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ cr->mpi_comm_mygroup);
+ for(i=0; i<nr; i++)
+ r[i] = cr->mpb->ibuf[i];
+ }
+#endif
+#endif
+}
+
+
+
#ifdef GMX_MPI
void gmx_sumd_comm(int nr,double r[],MPI_Comm mpi_comm)
{
void gmx_sumd_sim(int nr,double r[],const gmx_multisim_t *ms)
{
#ifndef GMX_MPI
- gmx_call("gmx_sumd");
+ gmx_call("gmx_sumd_sim");
#else
gmx_sumd_comm(nr,r,ms->mpi_comm_masters);
#endif
void gmx_sumf_sim(int nr,float r[],const gmx_multisim_t *ms)
{
#ifndef GMX_MPI
- gmx_call("gmx_sumf");
+ gmx_call("gmx_sumf_sim");
#else
gmx_sumf_comm(nr,r,ms->mpi_comm_masters);
#endif
void gmx_sumi_sim(int nr,int r[], const gmx_multisim_t *ms)
{
#ifndef GMX_MPI
- gmx_call("gmx_sumd");
+ gmx_call("gmx_sumi_sim");
#else
#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
#endif
}
+void gmx_sumli_sim(int nr,gmx_large_int_t r[], const gmx_multisim_t *ms)
+{
+#ifndef GMX_MPI
+ gmx_call("gmx_sumli_sim");
+#else
+#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ ms->mpi_comm_masters);
+#else
+ /* this is thread-unsafe, but it will do for now: */
+ int i;
+
+ if (nr > ms->mpb->ibuf_alloc) {
+ ms->mpb->ibuf_alloc = nr;
+ srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
+ }
+ MPI_Allreduce(r,ms->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ ms->mpi_comm_masters);
+ for(i=0; i<nr; i++)
+ r[i] = ms->mpb->ibuf[i];
+#endif
+#endif
+}
+
+
void gmx_finalize(void)
{
#ifndef GMX_MPI
static gmx_large_int_t get_multisim_nsteps(const t_commrec *cr,
gmx_large_int_t nsteps)
{
- int steps_out;
+ gmx_large_int_t steps_out;
if MASTER(cr)
{
- int *buf;
+ gmx_large_int_t *buf;
int s;
- int nsteps_int;
- gmx_bool set=FALSE;
int s_smallest;
snew(buf,cr->ms->nsim);
- nsteps_int=(int)nsteps;
- if (nsteps > INT_MAX)
- {
- nsteps_int=INT_MAX-1;
- }
- else if(nsteps<INT_MIN)
- {
- nsteps_int=-1;
- }
- buf[cr->ms->sim] = nsteps_int;
- /* TODO: fix this to be gmx_large_int_t */
- gmx_sumi_sim(cr->ms->nsim, buf, cr->ms);
+ buf[cr->ms->sim] = nsteps;
+ gmx_sumli_sim(cr->ms->nsim, buf, cr->ms);
- steps_out=INT_MAX-1;
+ steps_out=-1;
for(s=0; s<cr->ms->nsim; s++)
{
/* find the smallest positive number */
- if (buf[s]>= 0 && buf[s]<steps_out)
+ if (buf[s]>= 0 && ((steps_out < 0) || (buf[s]<steps_out)) )
{
steps_out=buf[s];
s_smallest=s;
- set=TRUE;
}
}
sfree(buf);
/* if we're the limiting simulation, don't do anything */
- if (!set ) /*|| (s_smallest == cr->ms->sim) )*/
- {
- steps_out=-1;
- }
- else if ((steps_out>=0 && steps_out<nsteps) && (s_smallest != cr->ms->sim) )
+ if ((steps_out>=0 && steps_out<nsteps) && (s_smallest != cr->ms->sim) )
{
- fprintf(stderr,
- "Will stop simulation %d after %d steps (because another simulation will end then).\n",
- cr->ms->sim, steps_out);
+ char strbuf[255];
+ snprintf(strbuf, 255, "Will stop simulation %%d after %s steps (another simulation will end then).\n", gmx_large_int_pfmt);
+ fprintf(stderr, strbuf, cr->ms->sim, steps_out);
}
}
/* broadcast to non-masters */
- gmx_bcast(sizeof(int), &steps_out, cr);
+ gmx_bcast(sizeof(gmx_large_int_t), &steps_out, cr);
return steps_out;
}
check_multi_int(fplog,ms,state->natoms,"the number of atoms");
check_multi_int(fplog,ms,ir->eI,"the integrator");
- check_multi_int(fplog,ms,ir->init_step+ir->nsteps,"init_step+nsteps");
- check_multi_int(fplog,ms,(ir->init_step+nst-1)/nst,
- "first exchange step: init_step/-replex");
+ check_multi_large_int(fplog,ms,ir->init_step+ir->nsteps,"init_step+nsteps");
+ check_multi_large_int(fplog,ms,(ir->init_step+nst-1)/nst,
+ "first exchange step: init_step/-replex");
check_multi_int(fplog,ms,ir->etc,"the temperature coupling");
check_multi_int(fplog,ms,ir->opts.ngtc,
"the number of temperature coupling groups");