Merge "Merge branch release-4-6 into release-5-0" into release-5-0
authorMark Abraham <mark.j.abraham@gmail.com>
Mon, 1 Sep 2014 16:20:15 +0000 (18:20 +0200)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Mon, 1 Sep 2014 16:20:15 +0000 (18:20 +0200)
src/gromacs/gmxana/gmx_covar.c
src/gromacs/legacyheaders/vsite.h
src/gromacs/mdlib/domdec.c
src/gromacs/mdlib/pme.c
src/gromacs/mdlib/vsite.c

index c9faa784de876a09870107c1decc63a88341790f..f3e575f2288a5cbb9555b707ea9ac1fb8887ddb4 100644 (file)
@@ -121,7 +121,7 @@ int gmx_covar(int argc, char *argv[])
         { "-pbc",  FALSE,  etBOOL, {&bPBC},
           "Apply corrections for periodic boundary conditions" }
     };
-    FILE           *out;
+    FILE           *out = NULL; /* initialization makes all compilers happy */
     t_trxstatus    *status;
     t_trxstatus    *trjout;
     t_topology      top;
@@ -550,18 +550,7 @@ int gmx_covar(int argc, char *argv[])
         fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
     }
 
-    fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
-
-    sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
-    out = xvgropen(eigvalfile,
-                   "Eigenvalues of the covariance matrix",
-                   "Eigenvector index", str, oenv);
-    for (i = 0; (i < end); i++)
-    {
-        fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
-    }
-    gmx_ffclose(out);
-
+    /* Set 'end', the maximum eigenvector and -value index used for output */
     if (end == -1)
     {
         if (nframes-1 < ndim)
@@ -576,6 +565,19 @@ int gmx_covar(int argc, char *argv[])
             end = ndim;
         }
     }
+
+    fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
+
+    sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
+    out = xvgropen(eigvalfile,
+                   "Eigenvalues of the covariance matrix",
+                   "Eigenvector index", str, oenv);
+    for (i = 0; (i < end); i++)
+    {
+        fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
+    }
+    gmx_ffclose(out);
+
     if (bFit)
     {
         /* misuse lambda: 0/1 mass weighted analysis no/yes */
index 2ccc6ed1f9988681711f79259165249ad60ca049..d6b453e3b236cb1a5106118511a463331c7d4e52 100644 (file)
@@ -111,6 +111,7 @@ gmx_vsite_t *init_vsite(gmx_mtop_t *mtop, t_commrec *cr,
  */
 
 void split_vsites_over_threads(const t_ilist   *ilist,
+                               const t_iparams *ip,
                                const t_mdatoms *mdatoms,
                                gmx_bool         bLimitRange,
                                gmx_vsite_t     *vsite);
index ae2a4f8098b696a91f846ef2e483abcfbc01b82f..401b6effdbb07ac88670d72211724ca19a907e1a 100644 (file)
@@ -9750,7 +9750,8 @@ void dd_partition_system(FILE                *fplog,
     if (vsite != NULL)
     {
         /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
-        split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
+        split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
+                                  mdatoms, FALSE, vsite);
     }
 
     if (shellfc)
index 532fa3a170e91d0fbca6bfd5b7d3179e16ca2edc..d070abdb597bccfcfa30ad19309601697d7766d1 100644 (file)
@@ -3130,8 +3130,9 @@ init_overlap_comm(pme_overlap_t *  ol,
     {
         /* s2g0 the local interpolation grid start.
          * s2g1 the local interpolation grid end.
-         * Because grid overlap communication only goes forward,
-         * the grid the slabs for fft's should be rounded down.
+         * Since in calc_pidx we divide particles, and not grid lines,
+         * spatially uniform along dimension x or y, we need to round
+         * s2g0 down and s2g1 up.
          */
         ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
         ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
@@ -3833,7 +3834,7 @@ reduce_threadgrid_overlap(gmx_pme_t pme,
     int  fft_my, fft_mz;
     int  buf_my = -1;
     int  nsx, nsy, nsz;
-    ivec ne;
+    ivec localcopy_end;
     int  offx, offy, offz, x, y, z, i0, i0t;
     int  sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
     gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
@@ -3869,8 +3870,14 @@ reduce_threadgrid_overlap(gmx_pme_t pme,
 
     for (d = 0; d < DIM; d++)
     {
-        ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
-                    local_fft_ndata[d]);
+        /* Determine up to where our thread needs to copy from the
+         * thread-local charge spreading grid to the rank-local FFT grid.
+         * This is up to our spreading grid end minus order-1 and
+         * not beyond the local FFT grid.
+         */
+        localcopy_end[d] =
+            min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
+                local_fft_ndata[d]);
     }
 
     offx = pmegrid->offset[XX];
@@ -3902,7 +3909,16 @@ reduce_threadgrid_overlap(gmx_pme_t pme,
         pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
         ox       += pmegrid_g->offset[XX];
         /* Determine the end of our part of the source grid */
-        tx1 = min(ox + pmegrid_g->n[XX], ne[XX]);
+        if (!bCommX)
+        {
+            /* Use our thread local source grid and target grid part */
+            tx1 = min(ox + pmegrid_g->n[XX], localcopy_end[XX]);
+        }
+        else
+        {
+            /* Use our thread local source grid and the spreading range */
+            tx1 = min(ox + pmegrid_g->n[XX], pme->pme_order);
+        }
 
         for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--)
         {
@@ -3918,7 +3934,16 @@ reduce_threadgrid_overlap(gmx_pme_t pme,
             pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
             oy       += pmegrid_g->offset[YY];
             /* Determine the end of our part of the source grid */
-            ty1 = min(oy + pmegrid_g->n[YY], ne[YY]);
+            if (!bCommY)
+            {
+                /* Use our thread local source grid and target grid part */
+                ty1 = min(oy + pmegrid_g->n[YY], localcopy_end[YY]);
+            }
+            else
+            {
+                /* Use our thread local source grid and the spreading range */
+                ty1 = min(oy + pmegrid_g->n[YY], pme->pme_order);
+            }
 
             for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--)
             {
@@ -3931,7 +3956,7 @@ reduce_threadgrid_overlap(gmx_pme_t pme,
                 }
                 pmegrid_g = &pmegrids->grid_th[fz];
                 oz       += pmegrid_g->offset[ZZ];
-                tz1       = min(oz + pmegrid_g->n[ZZ], ne[ZZ]);
+                tz1       = min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]);
 
                 if (sx == 0 && sy == 0 && sz == 0)
                 {
@@ -3987,8 +4012,13 @@ reduce_threadgrid_overlap(gmx_pme_t pme,
                     if (bCommY)
                     {
                         commbuf = commbuf_y;
-                        /* The y-size of the communication buffer is order-1 */
-                        buf_my  = pmegrid->order - 1;
+                        /* The y-size of the communication buffer is set by
+                         * the overlap of the grid part of our local slab
+                         * with the part starting at the next slab.
+                         */
+                        buf_my  =
+                            pme->overlap[1].s2g1[pme->nodeid_minor] -
+                            pme->overlap[1].s2g0[pme->nodeid_minor+1];
                         if (bCommX)
                         {
                             /* We index commbuf modulo the local grid size */
@@ -4101,6 +4131,12 @@ static void sum_fftgrid_dd(gmx_pme_t pme, real *fftgrid, int grid_index)
             sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
             recvptr = overlap->recvbuf;
 
+            if (debug != NULL)
+            {
+                fprintf(debug, "PME fftgrid comm y %2d x %2d x %2d\n",
+                        local_fft_ndata[XX], send_nindex, local_fft_ndata[ZZ]);
+            }
+
 #ifdef GMX_MPI
             MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
                          send_id, ipulse,
@@ -4167,7 +4203,7 @@ static void sum_fftgrid_dd(gmx_pme_t pme, real *fftgrid, int grid_index)
 
         if (debug != NULL)
         {
-            fprintf(debug, "PME fftgrid comm %2d x %2d x %2d\n",
+            fprintf(debug, "PME fftgrid comm %2d x %2d x %2d\n",
                     send_nindex, local_fft_ndata[YY], local_fft_ndata[ZZ]);
         }
 
index 7ad217f4a5cae44f14ecd28358ec664185f51b3e..c63d8aa756367a5ec7b87c15b879b5a5ca570bfc 100644 (file)
@@ -1840,6 +1840,7 @@ static void prepare_vsite_thread(const t_ilist      *ilist,
 }
 
 void split_vsites_over_threads(const t_ilist   *ilist,
+                               const t_iparams *ip,
                                const t_mdatoms *mdatoms,
                                gmx_bool         bLimitRange,
                                gmx_vsite_t     *vsite)
@@ -1878,16 +1879,38 @@ void split_vsites_over_threads(const t_ilist   *ilist,
         vsite_atom_range = -1;
         for (ftype = 0; ftype < F_NRE; ftype++)
         {
-            if ((interaction_function[ftype].flags & IF_VSITE) &&
-                ftype != F_VSITEN)
+            if (interaction_function[ftype].flags & IF_VSITE)
             {
-                nral1 = 1 + NRAL(ftype);
-                iat   = ilist[ftype].iatoms;
-                for (i = 0; i < ilist[ftype].nr; i += nral1)
+                if (ftype != F_VSITEN)
                 {
-                    for (j = i+1; j < i+nral1; j++)
+                    nral1 = 1 + NRAL(ftype);
+                    iat   = ilist[ftype].iatoms;
+                    for (i = 0; i < ilist[ftype].nr; i += nral1)
                     {
-                        vsite_atom_range = max(vsite_atom_range, iat[j]);
+                        for (j = i + 1; j < i + nral1; j++)
+                        {
+                            vsite_atom_range = max(vsite_atom_range, iat[j]);
+                        }
+                    }
+                }
+                else
+                {
+                    int vs_ind_end;
+
+                    iat = ilist[ftype].iatoms;
+
+                    i = 0;
+                    while (i < ilist[ftype].nr)
+                    {
+                        /* The 3 below is from 1+NRAL(ftype)=3 */
+                        vs_ind_end = i + ip[iat[i]].vsiten.n*3;
+
+                        vsite_atom_range = max(vsite_atom_range, iat[i+1]);
+                        while (i < vs_ind_end)
+                        {
+                            vsite_atom_range = max(vsite_atom_range, iat[i+2]);
+                            i               += 3;
+                        }
                     }
                 }
             }
@@ -1935,8 +1958,7 @@ void split_vsites_over_threads(const t_ilist   *ilist,
 
     for (ftype = 0; ftype < F_NRE; ftype++)
     {
-        if ((interaction_function[ftype].flags & IF_VSITE) &&
-            ftype != F_VSITEN)
+        if (interaction_function[ftype].flags & IF_VSITE)
         {
             nral1 = 1 + NRAL(ftype);
             inc   = nral1;
@@ -1950,7 +1972,7 @@ void split_vsites_over_threads(const t_ilist   *ilist,
                  */
                 if (ftype != F_VSITEN)
                 {
-                    for (j = i+2; j < i+nral1; j++)
+                    for (j = i + 2; j < i + nral1; j++)
                     {
                         if (th_ind[iat[j]] != th)
                         {
@@ -1963,8 +1985,9 @@ void split_vsites_over_threads(const t_ilist   *ilist,
                 }
                 else
                 {
-                    inc = iat[i];
-                    for (j = i+2; j < i+inc; j += 3)
+                    /* The 3 below is from 1+NRAL(ftype)=3 */
+                    inc = ip[iat[i]].vsiten.n*3;
+                    for (j = i + 2; j < i + inc; j += 3)
                     {
                         if (th_ind[iat[j]] != th)
                         {
@@ -1974,7 +1997,7 @@ void split_vsites_over_threads(const t_ilist   *ilist,
                 }
                 /* Copy this vsite to the thread data struct of thread th */
                 il_th = &vsite->tdata[th].ilist[ftype];
-                for (j = i; j < i+inc; j++)
+                for (j = i; j < i + inc; j++)
                 {
                     il_th->iatoms[il_th->nr++] = iat[j];
                 }
@@ -2031,6 +2054,7 @@ void set_vsite_top(gmx_vsite_t *vsite, gmx_localtop_t *top, t_mdatoms *md,
             gmx_fatal(FARGS, "The combination of threading, virtual sites and charge groups is not implemented");
         }
 
-        split_vsites_over_threads(top->idef.il, md, !DOMAINDECOMP(cr), vsite);
+        split_vsites_over_threads(top->idef.il, top->idef.iparams,
+                                  md, !DOMAINDECOMP(cr), vsite);
     }
 }