{ "-pbc", FALSE, etBOOL, {&bPBC},
"Apply corrections for periodic boundary conditions" }
};
- FILE *out;
+ FILE *out = NULL; /* initialization makes all compilers happy */
t_trxstatus *status;
t_trxstatus *trjout;
t_topology top;
fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
}
- fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
-
- sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
- out = xvgropen(eigvalfile,
- "Eigenvalues of the covariance matrix",
- "Eigenvector index", str, oenv);
- for (i = 0; (i < end); i++)
- {
- fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
- }
- gmx_ffclose(out);
-
+ /* Set 'end', the maximum eigenvector and -value index used for output */
if (end == -1)
{
if (nframes-1 < ndim)
end = ndim;
}
}
+
+ fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
+
+ sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
+ out = xvgropen(eigvalfile,
+ "Eigenvalues of the covariance matrix",
+ "Eigenvector index", str, oenv);
+ for (i = 0; (i < end); i++)
+ {
+ fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
+ }
+ gmx_ffclose(out);
+
if (bFit)
{
/* misuse lambda: 0/1 mass weighted analysis no/yes */
*/
void split_vsites_over_threads(const t_ilist *ilist,
+ const t_iparams *ip,
const t_mdatoms *mdatoms,
gmx_bool bLimitRange,
gmx_vsite_t *vsite);
if (vsite != NULL)
{
/* Now we have updated mdatoms, we can do the last vsite bookkeeping */
- split_vsites_over_threads(top_local->idef.il, mdatoms, FALSE, vsite);
+ split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
+ mdatoms, FALSE, vsite);
}
if (shellfc)
{
/* s2g0 the local interpolation grid start.
* s2g1 the local interpolation grid end.
- * Because grid overlap communication only goes forward,
- * the grid the slabs for fft's should be rounded down.
+ * Since in calc_pidx we divide particles, and not grid lines,
+ * spatially uniform along dimension x or y, we need to round
+ * s2g0 down and s2g1 up.
*/
ol->s2g0[i] = ( i *ndata + 0 )/nnodes;
ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
int fft_my, fft_mz;
int buf_my = -1;
int nsx, nsy, nsz;
- ivec ne;
+ ivec localcopy_end;
int offx, offy, offz, x, y, z, i0, i0t;
int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
for (d = 0; d < DIM; d++)
{
- ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
- local_fft_ndata[d]);
+ /* Determine up to where our thread needs to copy from the
+ * thread-local charge spreading grid to the rank-local FFT grid.
+ * This is up to our spreading grid end minus order-1 and
+ * not beyond the local FFT grid.
+ */
+ localcopy_end[d] =
+ min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
+ local_fft_ndata[d]);
}
offx = pmegrid->offset[XX];
pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
ox += pmegrid_g->offset[XX];
/* Determine the end of our part of the source grid */
- tx1 = min(ox + pmegrid_g->n[XX], ne[XX]);
+ if (!bCommX)
+ {
+ /* Use our thread local source grid and target grid part */
+ tx1 = min(ox + pmegrid_g->n[XX], localcopy_end[XX]);
+ }
+ else
+ {
+ /* Use our thread local source grid and the spreading range */
+ tx1 = min(ox + pmegrid_g->n[XX], pme->pme_order);
+ }
for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--)
{
pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
oy += pmegrid_g->offset[YY];
/* Determine the end of our part of the source grid */
- ty1 = min(oy + pmegrid_g->n[YY], ne[YY]);
+ if (!bCommY)
+ {
+ /* Use our thread local source grid and target grid part */
+ ty1 = min(oy + pmegrid_g->n[YY], localcopy_end[YY]);
+ }
+ else
+ {
+ /* Use our thread local source grid and the spreading range */
+ ty1 = min(oy + pmegrid_g->n[YY], pme->pme_order);
+ }
for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--)
{
}
pmegrid_g = &pmegrids->grid_th[fz];
oz += pmegrid_g->offset[ZZ];
- tz1 = min(oz + pmegrid_g->n[ZZ], ne[ZZ]);
+ tz1 = min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]);
if (sx == 0 && sy == 0 && sz == 0)
{
if (bCommY)
{
commbuf = commbuf_y;
- /* The y-size of the communication buffer is order-1 */
- buf_my = pmegrid->order - 1;
+ /* The y-size of the communication buffer is set by
+ * the overlap of the grid part of our local slab
+ * with the part starting at the next slab.
+ */
+ buf_my =
+ pme->overlap[1].s2g1[pme->nodeid_minor] -
+ pme->overlap[1].s2g0[pme->nodeid_minor+1];
if (bCommX)
{
/* We index commbuf modulo the local grid size */
sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
recvptr = overlap->recvbuf;
+ if (debug != NULL)
+ {
+ fprintf(debug, "PME fftgrid comm y %2d x %2d x %2d\n",
+ local_fft_ndata[XX], send_nindex, local_fft_ndata[ZZ]);
+ }
+
#ifdef GMX_MPI
MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
send_id, ipulse,
if (debug != NULL)
{
- fprintf(debug, "PME fftgrid comm %2d x %2d x %2d\n",
+ fprintf(debug, "PME fftgrid comm x %2d x %2d x %2d\n",
send_nindex, local_fft_ndata[YY], local_fft_ndata[ZZ]);
}
}
void split_vsites_over_threads(const t_ilist *ilist,
+ const t_iparams *ip,
const t_mdatoms *mdatoms,
gmx_bool bLimitRange,
gmx_vsite_t *vsite)
vsite_atom_range = -1;
for (ftype = 0; ftype < F_NRE; ftype++)
{
- if ((interaction_function[ftype].flags & IF_VSITE) &&
- ftype != F_VSITEN)
+ if (interaction_function[ftype].flags & IF_VSITE)
{
- nral1 = 1 + NRAL(ftype);
- iat = ilist[ftype].iatoms;
- for (i = 0; i < ilist[ftype].nr; i += nral1)
+ if (ftype != F_VSITEN)
{
- for (j = i+1; j < i+nral1; j++)
+ nral1 = 1 + NRAL(ftype);
+ iat = ilist[ftype].iatoms;
+ for (i = 0; i < ilist[ftype].nr; i += nral1)
{
- vsite_atom_range = max(vsite_atom_range, iat[j]);
+ for (j = i + 1; j < i + nral1; j++)
+ {
+ vsite_atom_range = max(vsite_atom_range, iat[j]);
+ }
+ }
+ }
+ else
+ {
+ int vs_ind_end;
+
+ iat = ilist[ftype].iatoms;
+
+ i = 0;
+ while (i < ilist[ftype].nr)
+ {
+ /* The 3 below is from 1+NRAL(ftype)=3 */
+ vs_ind_end = i + ip[iat[i]].vsiten.n*3;
+
+ vsite_atom_range = max(vsite_atom_range, iat[i+1]);
+ while (i < vs_ind_end)
+ {
+ vsite_atom_range = max(vsite_atom_range, iat[i+2]);
+ i += 3;
+ }
}
}
}
for (ftype = 0; ftype < F_NRE; ftype++)
{
- if ((interaction_function[ftype].flags & IF_VSITE) &&
- ftype != F_VSITEN)
+ if (interaction_function[ftype].flags & IF_VSITE)
{
nral1 = 1 + NRAL(ftype);
inc = nral1;
*/
if (ftype != F_VSITEN)
{
- for (j = i+2; j < i+nral1; j++)
+ for (j = i + 2; j < i + nral1; j++)
{
if (th_ind[iat[j]] != th)
{
}
else
{
- inc = iat[i];
- for (j = i+2; j < i+inc; j += 3)
+ /* The 3 below is from 1+NRAL(ftype)=3 */
+ inc = ip[iat[i]].vsiten.n*3;
+ for (j = i + 2; j < i + inc; j += 3)
{
if (th_ind[iat[j]] != th)
{
}
/* Copy this vsite to the thread data struct of thread th */
il_th = &vsite->tdata[th].ilist[ftype];
- for (j = i; j < i+inc; j++)
+ for (j = i; j < i + inc; j++)
{
il_th->iatoms[il_th->nr++] = iat[j];
}
gmx_fatal(FARGS, "The combination of threading, virtual sites and charge groups is not implemented");
}
- split_vsites_over_threads(top->idef.il, md, !DOMAINDECOMP(cr), vsite);
+ split_vsites_over_threads(top->idef.il, top->idef.iparams,
+ md, !DOMAINDECOMP(cr), vsite);
}
}