Fix bug with large ints without MPI_IN_PLACE
authorMark Abraham <mark.j.abraham@gmail.com>
Tue, 6 Sep 2011 01:44:43 +0000 (11:44 +1000)
committerMark Abraham <mark.j.abraham@gmail.com>
Tue, 6 Sep 2011 01:51:21 +0000 (11:51 +1000)
Communication of gmx_large_int_t arrays when MPI_IN_PLACE was disabled
never worked when it was introduced just before 4.5.4, because it used
an int-sized buffer. This affected replica exchange sanity tests on the
number of steps.

IssueID #801 #803

Change-Id: I48f04b1c4dd80646711db99c5dab89cb910e79b4

include/types/commrec.h
src/gmxlib/main.c
src/gmxlib/network.c

index 6a07aeffa43f2ba0ec2bf57b9be5b7abf82d7528..2baa5844fe448f8a1d3af3457cbd08113b64a89d 100644 (file)
@@ -115,6 +115,9 @@ typedef struct {
   int *ibuf; /* for ints */
   int ibuf_alloc;
 
+  gmx_large_int_t *libuf;
+  int libuf_alloc;
+
   float *fbuf; /* for floats */
   int fbuf_alloc;
 
index 21ca907fcc32c63e32df58a35b4dfdc4b5d7ddcd..4b335c2a1eb34305a9c974ec8de3f58b6af720b2 100644 (file)
@@ -449,9 +449,11 @@ void init_multisystem(t_commrec *cr,int nsim, char **multidirs,
     /* initialize the MPI_IN_PLACE replacement buffers */
     snew(ms->mpb, 1);
     ms->mpb->ibuf=NULL;
+    ms->mpb->libuf=NULL;
     ms->mpb->fbuf=NULL;
     ms->mpb->dbuf=NULL;
     ms->mpb->ibuf_alloc=0;
+    ms->mpb->libuf_alloc=0;
     ms->mpb->fbuf_alloc=0;
     ms->mpb->dbuf_alloc=0;
 #endif
@@ -572,9 +574,11 @@ t_commrec *init_par(int *argc,char ***argv_ptr)
   /* initialize the MPI_IN_PLACE replacement buffers */
   snew(cr->mpb, 1);
   cr->mpb->ibuf=NULL;
+  cr->mpb->libuf=NULL;
   cr->mpb->fbuf=NULL;
   cr->mpb->dbuf=NULL;
   cr->mpb->ibuf_alloc=0;
+  cr->mpb->libuf_alloc=0;
   cr->mpb->fbuf_alloc=0;
   cr->mpb->dbuf_alloc=0;
 #endif
index 06a56a2ca25574859276863aff009dad28253900..693302103cfd062e697bb4047c9aefd8fe057bf0 100644 (file)
@@ -589,25 +589,25 @@ void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr)
 #else
     int i;
 
-    if (nr > cr->mpb->ibuf_alloc) {
-        cr->mpb->ibuf_alloc = nr;
-        srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
+    if (nr > cr->mpb->libuf_alloc) {
+        cr->mpb->libuf_alloc = nr;
+        srenew(cr->mpb->libuf,cr->mpb->libuf_alloc);
     }
     if (cr->nc.bUse) {
         /* Use two step summing */
-        MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
                       cr->nc.comm_intra);
         if (cr->nc.rank_intra == 0) {
             /* Sum with the buffers reversed */
-            MPI_Allreduce(cr->mpb->ibuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+            MPI_Allreduce(cr->mpb->libuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
                           cr->nc.comm_inter);
         }
         MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
     } else {
-        MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
                       cr->mpi_comm_mygroup);
         for(i=0; i<nr; i++)
-            r[i] = cr->mpb->ibuf[i];
+            r[i] = cr->mpb->libuf[i];
     }
 #endif
 #endif
@@ -709,14 +709,14 @@ void gmx_sumli_sim(int nr,gmx_large_int_t r[], const gmx_multisim_t *ms)
     /* this is thread-unsafe, but it will do for now: */
     int i;
 
-    if (nr > ms->mpb->ibuf_alloc) {
-        ms->mpb->ibuf_alloc = nr;
-        srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
+    if (nr > ms->mpb->libuf_alloc) {
+        ms->mpb->libuf_alloc = nr;
+        srenew(ms->mpb->libuf,ms->mpb->libuf_alloc);
     }
-    MPI_Allreduce(r,ms->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+    MPI_Allreduce(r,ms->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
                   ms->mpi_comm_masters);
     for(i=0; i<nr; i++)
-        r[i] = ms->mpb->ibuf[i];
+        r[i] = ms->mpb->libuf[i];
 #endif
 #endif
 }