* /Erik 001109
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "gromacs/legacyheaders/pme.h"
+
+#include "config.h"
+#include <assert.h>
+#include <math.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <math.h>
-#include <assert.h>
-#include "typedefs.h"
-#include "txtdump.h"
-#include "vec.h"
-#include "smalloc.h"
-#include "coulomb.h"
-#include "gmx_fatal.h"
-#include "pme.h"
-#include "network.h"
-#include "physics.h"
-#include "nrnb.h"
-#include "macros.h"
#include "gromacs/fft/parallel_3dfft.h"
-#include "gromacs/fileio/futil.h"
#include "gromacs/fileio/pdbio.h"
+#include "gromacs/legacyheaders/coulomb.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/gmxcomplex.h"
+#include "gromacs/math/units.h"
+#include "gromacs/math/vec.h"
#include "gromacs/timing/cyclecounter.h"
#include "gromacs/timing/wallcycle.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/futil.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/gmxomp.h"
+#include "gromacs/utility/smalloc.h"
/* Include the SIMD macro file and then check for support */
#include "gromacs/simd/simd.h"
int n;
int nalloc;
rvec *x;
- real *q;
+ real *coefficient;
rvec *f;
gmx_bool bSpread; /* These coordinates are used for spreading */
int pme_order;
* the lower cell boundary
*/
int nthread;
- int *thread_idx; /* Which thread should spread which charge */
+ int *thread_idx; /* Which thread should spread which coefficient */
thread_plist_t *thread_plist;
splinedata_t *spline;
} pme_atomcomm_t;
int nthread; /* The number of threads doing PME on our rank */
gmx_bool bPPnode; /* Node also does particle-particle forces */
-
gmx_bool bFEP; /* Compute Free energy contribution */
gmx_bool bFEP_q;
gmx_bool bFEP_lj;
-
int nkx, nky, nkz; /* Grid dimensions */
-
gmx_bool bP3M; /* Do P3M: optimize the influence function */
int pme_order;
real epsilon_r;
* This can probably be done in a better way
* but this simple hack works for now
*/
- /* The PME charge spreading grid sizes/strides, includes pme_order-1 */
+ /* The PME coefficient spreading grid sizes/strides, includes pme_order-1 */
int pmegrid_nx, pmegrid_ny, pmegrid_nz;
/* pmegrid_nz might be larger than strictly necessary to ensure
* memory alignment, pmegrid_nz_base gives the real base size.
splinevec bsp_mod;
/* Buffers to store data for local atoms for L-B combination rule
* calculations in LJ-PME. lb_buf1 stores either the coefficients
- * for spreading/gathering (in serial), or the C6 parameters for
+ * for spreading/gathering (in serial), or the C6 coefficient for
* local atoms (in parallel). lb_buf2 is only used in parallel,
* and stores the sigma values for local atoms. */
real *lb_buf1, *lb_buf2;
if (atc->nslab > 1)
{
srenew(atc->x, atc->nalloc);
- srenew(atc->q, atc->nalloc);
+ srenew(atc->coefficient, atc->nalloc);
srenew(atc->f, atc->nalloc);
for (i = nalloc_old; i < atc->nalloc; i++)
{
#endif
}
-static void dd_pmeredist_x_q(gmx_pme_t pme,
- int n, gmx_bool bX, rvec *x, real *charge,
- pme_atomcomm_t *atc)
+static void dd_pmeredist_pos_coeffs(gmx_pme_t pme,
+ int n, gmx_bool bX, rvec *x, real *data,
+ pme_atomcomm_t *atc)
{
int *commnode, *buf_index;
int nnodes_comm, i, nsend, local_pos, buf_pos, node, scount, rcount;
{
if (atc->count[atc->nodeid] + nsend != n)
{
- gmx_fatal(FARGS, "%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
+ gmx_fatal(FARGS, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
"This usually means that your system is not well equilibrated.",
n - (atc->count[atc->nodeid] + nsend),
pme->nodeid, 'x'+atc->dimind);
/* Communicate the count */
if (debug)
{
- fprintf(debug, "dimind %d PME node %d send to node %d: %d\n",
+ fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n",
atc->dimind, atc->nodeid, commnode[i], scount);
}
pme_dd_sendrecv(atc, FALSE, i,
{
copy_rvec(x[i], atc->x[local_pos]);
}
- atc->q[local_pos] = charge[i];
+ atc->coefficient[local_pos] = data[i];
local_pos++;
}
else
{
copy_rvec(x[i], pme->bufv[buf_index[node]]);
}
- pme->bufr[buf_index[node]] = charge[i];
+ pme->bufr[buf_index[node]] = data[i];
buf_index[node]++;
}
}
pme->bufv[buf_pos], scount*sizeof(rvec),
atc->x[local_pos], rcount*sizeof(rvec));
}
- /* Communicate the charges */
+ /* Communicate the coefficients */
pme_dd_sendrecv(atc, FALSE, i,
pme->bufr+buf_pos, scount*sizeof(real),
- atc->q+local_pos, rcount*sizeof(real));
+ atc->coefficient+local_pos, rcount*sizeof(real));
buf_pos += scount;
local_pos += atc->rcount[i];
}
/* Since we have already (un)wrapped the overlap in the z-dimension,
* we only have to communicate 0 to nkz (not pmegrid_nz).
*/
- if (direction == GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_GRID_FORWARD)
{
send_id = overlap->send_id[ipulse];
recv_id = overlap->recv_id[ipulse];
/* Copy data to contiguous send buffer */
if (debug)
{
- fprintf(debug, "PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, send_id,
pme->pmegrid_start_iy,
send_index0-pme->pmegrid_start_iy,
/* Get data from contiguous recv buffer */
if (debug)
{
- fprintf(debug, "PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, recv_id,
pme->pmegrid_start_iy,
recv_index0-pme->pmegrid_start_iy,
for (k = 0; k < pme->nkz; k++)
{
iz = k;
- if (direction == GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_GRID_FORWARD)
{
grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
}
for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
{
- if (direction == GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_GRID_FORWARD)
{
send_id = overlap->send_id[ipulse];
recv_id = overlap->recv_id[ipulse];
if (debug)
{
- fprintf(debug, "PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, send_id,
pme->pmegrid_start_ix,
send_index0-pme->pmegrid_start_ix,
send_index0-pme->pmegrid_start_ix+send_nindex);
- fprintf(debug, "PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, recv_id,
pme->pmegrid_start_ix,
recv_index0-pme->pmegrid_start_ix,
overlap->mpi_comm, &stat);
/* ADD data from contiguous recv buffer */
- if (direction == GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_GRID_FORWARD)
{
p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
for (i = 0; i < recv_nindex*datasize; i++)
{
#ifdef DEBUG_PME
FILE *fp, *fp2;
- char fn[STRLEN], format[STRLEN];
+ char fn[STRLEN];
real val;
sprintf(fn, "pmegrid%d.pdb", pme->nodeid);
fp = gmx_ffopen(fn, "w");
sprintf(fn, "pmegrid%d.txt", pme->nodeid);
fp2 = gmx_ffopen(fn, "w");
- sprintf(format, "%s%s\n", pdbformat, "%6.2f%6.2f");
#endif
for (ix = 0; ix < local_fft_ndata[XX]; ix++)
val = 100*pmegrid[pmeidx];
if (pmegrid[pmeidx] != 0)
{
- fprintf(fp, format, "ATOM", pmeidx, "CA", "GLY", ' ', pmeidx, ' ',
- 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val);
+ gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx, ' ',
+ 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val, "");
}
if (pmegrid[pmeidx] != 0)
{
for (ithx = 0; (ithx < order); ithx++) \
{ \
index_x = (i0+ithx)*pny*pnz; \
- valx = qn*thx[ithx]; \
+ valx = coefficient*thx[ithx]; \
\
for (ithy = 0; (ithy < order); ithy++) \
{ \
}
-static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
- pme_atomcomm_t *atc,
- splinedata_t *spline,
- pme_spline_work_t gmx_unused *work)
+static void spread_coefficients_bsplines_thread(pmegrid_t *pmegrid,
+ pme_atomcomm_t *atc,
+ splinedata_t *spline,
+ pme_spline_work_t gmx_unused *work)
{
- /* spread charges from home atoms to local grid */
+ /* spread coefficients from home atoms to local grid */
real *grid;
pme_overlap_t *ol;
int b, i, nn, n, ithx, ithy, ithz, i0, j0, k0;
int * idxptr;
int order, norder, index_x, index_xy, index_xyz;
- real valx, valxy, qn;
+ real valx, valxy, coefficient;
real *thx, *thy, *thz;
int localsize, bndsize;
int pnx, pny, pnz, ndatatot;
for (nn = 0; nn < spline->n; nn++)
{
- n = spline->ind[nn];
- qn = atc->q[n];
+ n = spline->ind[nn];
+ coefficient = atc->coefficient[n];
- if (qn != 0)
+ if (coefficient != 0)
{
idxptr = atc->idx[n];
norder = nn*order;
#define PME_SPREAD_SIMD4_ALIGNED
#define PME_ORDER 4
#endif
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_BSPLINE(4);
#endif
#ifdef PME_SIMD4_SPREAD_GATHER
#define PME_SPREAD_SIMD4_ALIGNED
#define PME_ORDER 5
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_BSPLINE(5);
#endif
work->vir_lj[XX][YY] = work->vir_lj[YY][XX] = 0.25*virxy;
work->vir_lj[XX][ZZ] = work->vir_lj[ZZ][XX] = 0.25*virxz;
work->vir_lj[YY][ZZ] = work->vir_lj[ZZ][YY] = 0.25*viryz;
+
/* This energy should be corrected for a charged system */
work->energy_lj = 0.5*energy;
}
int index_x, index_xy;
int nx, ny, nz, pnx, pny, pnz;
int * idxptr;
- real tx, ty, dx, dy, qn;
+ real tx, ty, dx, dy, coefficient;
real fx, fy, fz, gval;
real fxy1, fz1;
real *thx, *thy, *thz, *dthx, *dthy, *dthz;
for (nn = 0; nn < spline->n; nn++)
{
- n = spline->ind[nn];
- qn = scale*atc->q[n];
+ n = spline->ind[nn];
+ coefficient = scale*atc->coefficient[n];
if (bClearF)
{
atc->f[n][YY] = 0;
atc->f[n][ZZ] = 0;
}
- if (qn != 0)
+ if (coefficient != 0)
{
fx = 0;
fy = 0;
#define PME_GATHER_F_SIMD4_ALIGNED
#define PME_ORDER 4
#endif
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_FSPLINE(4);
#endif
#ifdef PME_SIMD4_SPREAD_GATHER
#define PME_GATHER_F_SIMD4_ALIGNED
#define PME_ORDER 5
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_FSPLINE(5);
#endif
break;
}
- atc->f[n][XX] += -qn*( fx*nx*rxx );
- atc->f[n][YY] += -qn*( fx*nx*ryx + fy*ny*ryy );
- atc->f[n][ZZ] += -qn*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
+ atc->f[n][XX] += -coefficient*( fx*nx*rxx );
+ atc->f[n][YY] += -coefficient*( fx*nx*ryx + fy*ny*ryy );
+ atc->f[n][ZZ] += -coefficient*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
}
}
/* Since the energy and not forces are interpolated
int n, ithx, ithy, ithz, i0, j0, k0;
int index_x, index_xy;
int * idxptr;
- real energy, pot, tx, ty, qn, gval;
+ real energy, pot, tx, ty, coefficient, gval;
real *thx, *thy, *thz;
int norder;
int order;
energy = 0;
for (n = 0; (n < atc->n); n++)
{
- qn = atc->q[n];
+ coefficient = atc->coefficient[n];
- if (qn != 0)
+ if (coefficient != 0)
{
idxptr = atc->idx[n];
norder = n*order;
}
}
- energy += pot*qn;
+ energy += pot*coefficient;
}
}
}
void make_bsplines(splinevec theta, splinevec dtheta, int order,
- rvec fractx[], int nr, int ind[], real charge[],
+ rvec fractx[], int nr, int ind[], real coefficient[],
gmx_bool bDoSplines)
{
/* construct splines for local atoms */
for (i = 0; i < nr; i++)
{
- /* With free energy we do not use the charge check.
+ /* With free energy we do not use the coefficient check.
* In most cases this will be more efficient than calling make_bsplines
- * twice, since usually more than half the particles have charges.
+ * twice, since usually more than half the particles have non-zero coefficients.
*/
ii = ind[i];
- if (bDoSplines || charge[ii] != 0.0)
+ if (bDoSplines || coefficient[ii] != 0.0)
{
xptr = fractx[ii];
switch (order)
{
/* s2g0 the local interpolation grid start.
* s2g1 the local interpolation grid end.
- * Because grid overlap communication only goes forward,
- * the grid the slabs for fft's should be rounded down.
+ * Since in calc_pidx we divide particles, and not grid lines,
+ * spatially uniform along dimension x or y, we need to round
+ * s2g0 down and s2g1 up.
*/
ol->s2g0[i] = ( i *ndata + 0 )/nnodes;
ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
*bValidSettings = FALSE;
return;
}
- gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x should be >= pme_order (%d) or = pmeorder-1. To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
+ gmx_fatal(FARGS, "The number of PME grid lines per rank along x is %g. But when using OpenMP threads, the number of grid lines per rank along x should be >= pme_order (%d) or = pmeorder-1. To resolve this issue, use fewer ranks along x (and possibly more along y and/or z) by specifying -dd manually.",
nkx/(double)nnodes_major, pme_order);
}
MPI_Comm_size(pme->mpi_comm, &pme->nnodes);
if (pme->nnodes != nnodes_major*nnodes_minor)
{
- gmx_incons("PME node count mismatch");
+ gmx_incons("PME rank count mismatch");
}
}
else
{
if (pme->nnodes % nnodes_major != 0)
{
- gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
+ gmx_incons("For 2D PME decomposition, #PME ranks must be divisible by the number of ranks in the major dimension");
}
pme->ndecompdim = 2;
pme->bP3M = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != NULL);
pme->pme_order = ir->pme_order;
- /* Always constant electrostatics parameters */
+ /* Always constant electrostatics coefficients */
pme->epsilon_r = ir->epsilon_r;
- /* Always constant LJ parameters */
+ /* Always constant LJ coefficients */
pme->ljpme_combination_rule = ir->ljpme_combination_rule;
/* If we violate restrictions, generate a fatal error here */
MPI_Type_commit(&(pme->rvec_mpi));
#endif
- /* Note that the charge spreading and force gathering, which usually
+ /* Note that the coefficient spreading and force gathering, which usually
* takes about the same amount of time as FFT+solve_pme,
* is always fully load balanced
- * (unless the charge distribution is inhomogeneous).
+ * (unless the coefficient distribution is inhomogeneous).
*/
imbal = pme_load_imbalance(pme);
"\n"
"NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
" For optimal PME load balancing\n"
- " PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
- " and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
+ " PME grid_x (%d) and grid_y (%d) should be divisible by #PME_ranks_x (%d)\n"
+ " and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_ranks_y (%d)\n"
"\n",
(int)((imbal-1)*100 + 0.5),
pme->nkx, pme->nky, pme->nnodes_major,
int fft_my, fft_mz;
int buf_my = -1;
int nsx, nsy, nsz;
- ivec ne;
+ ivec localcopy_end;
int offx, offy, offz, x, y, z, i0, i0t;
int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
for (d = 0; d < DIM; d++)
{
- ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
- local_fft_ndata[d]);
+ /* Determine up to where our thread needs to copy from the
+ * thread-local charge spreading grid to the rank-local FFT grid.
+ * This is up to our spreading grid end minus order-1 and
+ * not beyond the local FFT grid.
+ */
+ localcopy_end[d] =
+ min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
+ local_fft_ndata[d]);
}
offx = pmegrid->offset[XX];
/* Now loop over all the thread data blocks that contribute
* to the grid region we (our thread) are operating on.
*/
- /* Note that ffy_nx/y is equal to the number of grid points
+ /* Note that fft_nx/y is equal to the number of grid points
* between the first point of our node grid and the one of the next node.
*/
for (sx = 0; sx >= -pmegrids->nthread_comm[XX]; sx--)
}
pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
ox += pmegrid_g->offset[XX];
+ /* Determine the end of our part of the source grid */
if (!bCommX)
{
- tx1 = min(ox + pmegrid_g->n[XX], ne[XX]);
+ /* Use our thread local source grid and target grid part */
+ tx1 = min(ox + pmegrid_g->n[XX], localcopy_end[XX]);
}
else
{
+ /* Use our thread local source grid and the spreading range */
tx1 = min(ox + pmegrid_g->n[XX], pme->pme_order);
}
}
pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
oy += pmegrid_g->offset[YY];
+ /* Determine the end of our part of the source grid */
if (!bCommY)
{
- ty1 = min(oy + pmegrid_g->n[YY], ne[YY]);
+ /* Use our thread local source grid and target grid part */
+ ty1 = min(oy + pmegrid_g->n[YY], localcopy_end[YY]);
}
else
{
+ /* Use our thread local source grid and the spreading range */
ty1 = min(oy + pmegrid_g->n[YY], pme->pme_order);
}
}
pmegrid_g = &pmegrids->grid_th[fz];
oz += pmegrid_g->offset[ZZ];
- tz1 = min(oz + pmegrid_g->n[ZZ], ne[ZZ]);
+ tz1 = min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]);
if (sx == 0 && sy == 0 && sz == 0)
{
if (bCommY)
{
commbuf = commbuf_y;
- buf_my = ty1 - offy;
+ /* The y-size of the communication buffer is set by
+ * the overlap of the grid part of our local slab
+ * with the part starting at the next slab.
+ */
+ buf_my =
+ pme->overlap[1].s2g1[pme->nodeid_minor] -
+ pme->overlap[1].s2g0[pme->nodeid_minor+1];
if (bCommX)
{
/* We index commbuf modulo the local grid size */
int x, y, z, indg, indb;
/* Note that this routine is only used for forward communication.
- * Since the force gathering, unlike the charge spreading,
+ * Since the force gathering, unlike the coefficient spreading,
* can be trivially parallelized over the particles,
* the backwards process is much simpler and can use the "old"
* communication setup.
sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
recvptr = overlap->recvbuf;
+ if (debug != NULL)
+ {
+ fprintf(debug, "PME fftgrid comm y %2d x %2d x %2d\n",
+ local_fft_ndata[XX], send_nindex, local_fft_ndata[ZZ]);
+ }
+
#ifdef GMX_MPI
MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
send_id, ipulse,
if (debug != NULL)
{
- fprintf(debug, "PME fftgrid comm %2d x %2d x %2d\n",
+ fprintf(debug, "PME fftgrid comm x %2d x %2d x %2d\n",
send_nindex, local_fft_ndata[YY], local_fft_ndata[ZZ]);
}
if (grids->nthread == 1)
{
- /* One thread, we operate on all charges */
+ /* One thread, we operate on all coefficients */
spline->n = atc->n;
}
else
if (bCalcSplines)
{
make_bsplines(spline->theta, spline->dtheta, pme->pme_order,
- atc->fractx, spline->n, spline->ind, atc->q, bDoSplines);
+ atc->fractx, spline->n, spline->ind, atc->coefficient, bDoSplines);
}
if (bSpread)
#ifdef PME_TIME_SPREAD
ct1a = omp_cyc_start();
#endif
- spread_q_bsplines_thread(grid, atc, spline, pme->spline_work);
+ spread_coefficients_bsplines_thread(grid, atc, spline, pme->spline_work);
if (pme->bUseThreads)
{
atc->pme_order = pme->pme_order;
atc->n = n;
pme_realloc_atomcomm_things(atc);
- atc->x = x;
- atc->q = q;
+ atc->x = x;
+ atc->coefficient = q;
/* We only use the A-charges grid */
grid = &pme->pmegrid[PME_GRID_QA];
do
{
/* Domain decomposition */
- ret = gmx_pme_recv_params_coords(pme_pp,
+ ret = gmx_pme_recv_coeffs_coords(pme_pp,
&natoms,
&chargeA, &chargeB,
&c6A, &c6B,
{
real sigma4;
- sigma4 = local_sigma[i];
- sigma4 = sigma4*sigma4;
- sigma4 = sigma4*sigma4;
- pme->atc[0].q[i] = local_c6[i] / sigma4;
+ sigma4 = local_sigma[i];
+ sigma4 = sigma4*sigma4;
+ sigma4 = sigma4*sigma4;
+ pme->atc[0].coefficient[i] = local_c6[i] / sigma4;
}
}
for (i = 0; i < pme->atc[0].n; ++i)
{
- pme->atc[0].q[i] *= local_sigma[i];
+ pme->atc[0].coefficient[i] *= local_sigma[i];
}
}
static void
-do_redist_x_q(gmx_pme_t pme, t_commrec *cr, int start, int homenr,
- gmx_bool bFirst, rvec x[], real *data)
+do_redist_pos_coeffs(gmx_pme_t pme, t_commrec *cr, int start, int homenr,
+ gmx_bool bFirst, rvec x[], real *data)
{
int d;
pme_atomcomm_t *atc;
{
int n_d;
rvec *x_d;
- real *q_d;
+ real *param_d;
if (d == pme->ndecompdim - 1)
{
- n_d = homenr;
- x_d = x + start;
- q_d = data;
+ n_d = homenr;
+ x_d = x + start;
+ param_d = data;
}
else
{
- n_d = pme->atc[d + 1].n;
- x_d = atc->x;
- q_d = atc->q;
+ n_d = pme->atc[d + 1].n;
+ x_d = atc->x;
+ param_d = atc->coefficient;
}
atc = &pme->atc[d];
atc->npd = n_d;
/* Redistribute x (only once) and qA/c6A or qB/c6B */
if (DOMAINDECOMP(cr))
{
- dd_pmeredist_x_q(pme, n_d, bFirst, x_d, q_d, atc);
+ dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc);
}
}
}
-/* TODO: when adding free-energy support, sigmaB will no longer be
- * unused */
int gmx_pme_do(gmx_pme_t pme,
int start, int homenr,
rvec x[], rvec f[],
real *chargeA, real *chargeB,
real *c6A, real *c6B,
- real *sigmaA, real gmx_unused *sigmaB,
+ real *sigmaA, real *sigmaB,
matrix box, t_commrec *cr,
int maxshift_x, int maxshift_y,
t_nrnb *nrnb, gmx_wallcycle_t wcycle,
real *dvdlambda_q, real *dvdlambda_lj,
int flags)
{
- int q, d, i, j, ntot, npme, qmax;
+ int d, i, j, k, ntot, npme, grid_index, max_grid_index;
int nx, ny, nz;
int n_d, local_ny;
pme_atomcomm_t *atc = NULL;
real *grid = NULL;
real *ptr;
rvec *x_d, *f_d;
- real *charge = NULL, *q_d;
+ real *coefficient = NULL;
real energy_AB[4];
matrix vir_AB[4];
real scale, lambda;
t_complex * cfftgrid;
int thread;
gmx_bool bFirst, bDoSplines;
+ int fep_state;
+ int fep_states_lj = pme->bFEP_lj ? 2 : 1;
const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
bDoSplines = pme->bFEP || ((flags & GMX_PME_DO_COULOMB) && (flags & GMX_PME_DO_LJ));
/* We need a maximum of four separate PME calculations:
- * q=0: Coulomb PME with charges from state A
- * q=1: Coulomb PME with charges from state B
- * q=2: LJ PME with C6 from state A
- * q=3: LJ PME with C6 from state B
+ * grid_index=0: Coulomb PME with charges from state A
+ * grid_index=1: Coulomb PME with charges from state B
+ * grid_index=2: LJ PME with C6 from state A
+ * grid_index=3: LJ PME with C6 from state B
* For Lorentz-Berthelot combination rules, a separate loop is used to
* calculate all the terms
*/
/* If we are doing LJ-PME with LB, we only do Q here */
- qmax = (pme->ljpme_combination_rule == eljpmeLB) ? DO_Q : DO_Q_AND_LJ;
+ max_grid_index = (pme->ljpme_combination_rule == eljpmeLB) ? DO_Q : DO_Q_AND_LJ;
- for (q = 0; q < qmax; ++q)
+ for (grid_index = 0; grid_index < max_grid_index; ++grid_index)
{
- /* Check if we should do calculations at this q
- * If q is odd we should be doing FEP
- * If q < 2 we should be doing electrostatic PME
- * If q >= 2 we should be doing LJ-PME
+ /* Check if we should do calculations at this grid_index
+ * If grid_index is odd we should be doing FEP
+ * If grid_index < 2 we should be doing electrostatic PME
+ * If grid_index >= 2 we should be doing LJ-PME
*/
- if ((q < DO_Q && (!(flags & GMX_PME_DO_COULOMB) ||
- (q == 1 && !pme->bFEP_q))) ||
- (q >= DO_Q && (!(flags & GMX_PME_DO_LJ) ||
- (q == 3 && !pme->bFEP_lj))))
+ if ((grid_index < DO_Q && (!(flags & GMX_PME_DO_COULOMB) ||
+ (grid_index == 1 && !pme->bFEP_q))) ||
+ (grid_index >= DO_Q && (!(flags & GMX_PME_DO_LJ) ||
+ (grid_index == 3 && !pme->bFEP_lj))))
{
continue;
}
/* Unpack structure */
- pmegrid = &pme->pmegrid[q];
- fftgrid = pme->fftgrid[q];
- cfftgrid = pme->cfftgrid[q];
- pfft_setup = pme->pfft_setup[q];
- switch (q)
+ pmegrid = &pme->pmegrid[grid_index];
+ fftgrid = pme->fftgrid[grid_index];
+ cfftgrid = pme->cfftgrid[grid_index];
+ pfft_setup = pme->pfft_setup[grid_index];
+ switch (grid_index)
{
- case 0: charge = chargeA + start; break;
- case 1: charge = chargeB + start; break;
- case 2: charge = c6A + start; break;
- case 3: charge = c6B + start; break;
+ case 0: coefficient = chargeA + start; break;
+ case 1: coefficient = chargeB + start; break;
+ case 2: coefficient = c6A + start; break;
+ case 3: coefficient = c6B + start; break;
}
grid = pmegrid->grid.grid;
if (debug)
{
- fprintf(debug, "PME: nnodes = %d, nodeid = %d\n",
+ fprintf(debug, "PME: number of ranks = %d, rank = %d\n",
cr->nnodes, cr->nodeid);
fprintf(debug, "Grid = %p\n", (void*)grid);
if (grid == NULL)
if (pme->nnodes == 1)
{
- atc->q = charge;
+ atc->coefficient = coefficient;
}
else
{
wallcycle_start(wcycle, ewcPME_REDISTXF);
- do_redist_x_q(pme, cr, start, homenr, bFirst, x, charge);
+ do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, coefficient);
where();
wallcycle_stop(wcycle, ewcPME_REDISTXF);
if (debug)
{
- fprintf(debug, "Node= %6d, pme local particles=%6d\n",
+ fprintf(debug, "Rank= %6d, pme local particles=%6d\n",
cr->nodeid, atc->n);
}
- if (flags & GMX_PME_SPREAD_Q)
+ if (flags & GMX_PME_SPREAD)
{
wallcycle_start(wcycle, ewcPME_SPREADGATHER);
- /* Spread the charges on a grid */
- spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, q);
+ /* Spread the coefficients on a grid */
+ spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
if (bFirst)
{
inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
}
- inc_nrnb(nrnb, eNR_SPREADQBSP,
+ inc_nrnb(nrnb, eNR_SPREADBSP,
pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
if (!pme->bUseThreads)
#ifdef GMX_MPI
if (pme->nnodes > 1)
{
- gmx_sum_qgrid_dd(pme, grid, GMX_SUM_QGRID_FORWARD);
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
where();
}
#endif
- copy_pmegrid_to_fftgrid(pme, grid, fftgrid, q);
+ copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
}
wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
/* solve in k-space for our local cells */
if (thread == 0)
{
- wallcycle_start(wcycle, (q < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+ wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
}
- if (q < DO_Q)
+ if (grid_index < DO_Q)
{
loop_count =
solve_pme_yzx(pme, cfftgrid, ewaldcoeff_q,
if (thread == 0)
{
- wallcycle_stop(wcycle, (q < DO_Q ? ewcPME_SOLVE : ewcLJPME));
+ wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME));
where();
inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
}
inc_nrnb(nrnb, eNR_FFT, 2*npme);
}
+ /* Note: this wallcycle region is closed below
+ outside an OpenMP region, so take care if
+ refactoring code here. */
wallcycle_start(wcycle, ewcPME_SPREADGATHER);
}
- copy_fftgrid_to_pmegrid(pme, fftgrid, grid, q, pme->nthread, thread);
+ copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
}
}
/* End of thread parallel section.
#ifdef GMX_MPI
if (pme->nnodes > 1)
{
- gmx_sum_qgrid_dd(pme, grid, GMX_SUM_QGRID_BACKWARD);
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
}
#endif
where();
* atc->f is the actual force array, not a buffer,
* therefore we should not clear it.
*/
- lambda = q < DO_Q ? lambda_q : lambda_lj;
+ lambda = grid_index < DO_Q ? lambda_q : lambda_lj;
bClearF = (bFirst && PAR(cr));
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
for (thread = 0; thread < pme->nthread; thread++)
{
gather_f_bsplines(pme, grid, bClearF, atc,
&atc->spline[thread],
- pme->bFEP ? (q % 2 == 0 ? 1.0-lambda : lambda) : 1.0);
+ pme->bFEP ? (grid_index % 2 == 0 ? 1.0-lambda : lambda) : 1.0);
}
where();
inc_nrnb(nrnb, eNR_GATHERFBSP,
pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ /* Note: this wallcycle region is opened above inside an OpenMP
+ region, so take care if refactoring code here. */
wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
}
/* This should only be called on the master thread
* and after the threads have synchronized.
*/
- if (q < 2)
+ if (grid_index < 2)
{
- get_pme_ener_vir_q(pme, pme->nthread, &energy_AB[q], vir_AB[q]);
+ get_pme_ener_vir_q(pme, pme->nthread, &energy_AB[grid_index], vir_AB[grid_index]);
}
else
{
- get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[q], vir_AB[q]);
+ get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[grid_index], vir_AB[grid_index]);
}
}
bFirst = FALSE;
- } /* of q-loop */
+ } /* of grid_index-loop */
/* For Lorentz-Berthelot combination rules in LJ-PME, we need to calculate
* seven terms. */
if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB)
{
- real *local_c6 = NULL, *local_sigma = NULL;
- if (pme->nnodes == 1)
+ /* Loop over A- and B-state if we are doing FEP */
+ for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
{
- if (pme->lb_buf1 == NULL)
+ real *local_c6 = NULL, *local_sigma = NULL, *RedistC6 = NULL, *RedistSigma = NULL;
+ if (pme->nnodes == 1)
{
- pme->lb_buf_nalloc = pme->atc[0].n;
- snew(pme->lb_buf1, pme->lb_buf_nalloc);
- }
- pme->atc[0].q = pme->lb_buf1;
- local_c6 = c6A;
- local_sigma = sigmaA;
- }
- else
- {
- atc = &pme->atc[0];
-
- wallcycle_start(wcycle, ewcPME_REDISTXF);
-
- do_redist_x_q(pme, cr, start, homenr, bFirst, x, c6A);
- if (pme->lb_buf_nalloc < atc->n)
- {
- pme->lb_buf_nalloc = atc->nalloc;
- srenew(pme->lb_buf1, pme->lb_buf_nalloc);
- srenew(pme->lb_buf2, pme->lb_buf_nalloc);
- }
- local_c6 = pme->lb_buf1;
- for (i = 0; i < atc->n; ++i)
- {
- local_c6[i] = atc->q[i];
- }
- where();
-
- do_redist_x_q(pme, cr, start, homenr, FALSE, x, sigmaA);
- local_sigma = pme->lb_buf2;
- for (i = 0; i < atc->n; ++i)
- {
- local_sigma[i] = atc->q[i];
+ if (pme->lb_buf1 == NULL)
+ {
+ pme->lb_buf_nalloc = pme->atc[0].n;
+ snew(pme->lb_buf1, pme->lb_buf_nalloc);
+ }
+ pme->atc[0].coefficient = pme->lb_buf1;
+ switch (fep_state)
+ {
+ case 0:
+ local_c6 = c6A;
+ local_sigma = sigmaA;
+ break;
+ case 1:
+ local_c6 = c6B;
+ local_sigma = sigmaB;
+ break;
+ default:
+ gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
+ }
}
- where();
-
- wallcycle_stop(wcycle, ewcPME_REDISTXF);
- }
- calc_initial_lb_coeffs(pme, local_c6, local_sigma);
-
- /*Seven terms in LJ-PME with LB, q < 2 reserved for electrostatics*/
- for (q = 2; q < 9; ++q)
- {
- /* Unpack structure */
- pmegrid = &pme->pmegrid[q];
- fftgrid = pme->fftgrid[q];
- cfftgrid = pme->cfftgrid[q];
- pfft_setup = pme->pfft_setup[q];
- calc_next_lb_coeffs(pme, local_sigma);
- grid = pmegrid->grid.grid;
- where();
-
- if (flags & GMX_PME_SPREAD_Q)
+ else
{
- wallcycle_start(wcycle, ewcPME_SPREADGATHER);
- /* Spread the charges on a grid */
- spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, q);
+ atc = &pme->atc[0];
+ switch (fep_state)
+ {
+ case 0:
+ RedistC6 = c6A;
+ RedistSigma = sigmaA;
+ break;
+ case 1:
+ RedistC6 = c6B;
+ RedistSigma = sigmaB;
+ break;
+ default:
+ gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
+ }
+ wallcycle_start(wcycle, ewcPME_REDISTXF);
- if (bFirst)
+ do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, RedistC6);
+ if (pme->lb_buf_nalloc < atc->n)
{
- inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+ pme->lb_buf_nalloc = atc->nalloc;
+ srenew(pme->lb_buf1, pme->lb_buf_nalloc);
+ srenew(pme->lb_buf2, pme->lb_buf_nalloc);
}
- inc_nrnb(nrnb, eNR_SPREADQBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
- if (pme->nthread == 1)
+ local_c6 = pme->lb_buf1;
+ for (i = 0; i < atc->n; ++i)
{
- wrap_periodic_pmegrid(pme, grid);
+ local_c6[i] = atc->coefficient[i];
+ }
+ where();
- /* sum contributions to local grid from other nodes */
-#ifdef GMX_MPI
- if (pme->nnodes > 1)
- {
- gmx_sum_qgrid_dd(pme, grid, GMX_SUM_QGRID_FORWARD);
- where();
- }
-#endif
- copy_pmegrid_to_fftgrid(pme, grid, fftgrid, q);
+ do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, RedistSigma);
+ local_sigma = pme->lb_buf2;
+ for (i = 0; i < atc->n; ++i)
+ {
+ local_sigma[i] = atc->coefficient[i];
}
+ where();
- wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
+ wallcycle_stop(wcycle, ewcPME_REDISTXF);
}
+ calc_initial_lb_coeffs(pme, local_c6, local_sigma);
- /*Here we start a large thread parallel region*/
-#pragma omp parallel num_threads(pme->nthread) private(thread)
+ /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
+ for (grid_index = 2; grid_index < 9; ++grid_index)
{
- thread = gmx_omp_get_thread_num();
- if (flags & GMX_PME_SOLVE)
+ /* Unpack structure */
+ pmegrid = &pme->pmegrid[grid_index];
+ fftgrid = pme->fftgrid[grid_index];
+ cfftgrid = pme->cfftgrid[grid_index];
+ pfft_setup = pme->pfft_setup[grid_index];
+ calc_next_lb_coeffs(pme, local_sigma);
+ grid = pmegrid->grid.grid;
+ where();
+
+ if (flags & GMX_PME_SPREAD)
{
- /* do 3d-fft */
- if (thread == 0)
+ wallcycle_start(wcycle, ewcPME_SPREADGATHER);
+ /* Spread the c6 on a grid */
+ spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+
+ if (bFirst)
{
- wallcycle_start(wcycle, ewcPME_FFT);
+ inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
}
- gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
- thread, wcycle);
- if (thread == 0)
+ inc_nrnb(nrnb, eNR_SPREADBSP,
+ pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+ if (pme->nthread == 1)
{
- wallcycle_stop(wcycle, ewcPME_FFT);
+ wrap_periodic_pmegrid(pme, grid);
+ /* sum contributions to local grid from other nodes */
+#ifdef GMX_MPI
+ if (pme->nnodes > 1)
+ {
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
+ where();
+ }
+#endif
+ copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
}
- where();
+ wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
}
- }
- bFirst = FALSE;
- }
- if (flags & GMX_PME_SOLVE)
- {
- int loop_count;
- /* solve in k-space for our local cells */
+ /*Here we start a large thread parallel region*/
#pragma omp parallel num_threads(pme->nthread) private(thread)
- {
- thread = gmx_omp_get_thread_num();
- if (thread == 0)
{
- wallcycle_start(wcycle, ewcLJPME);
- }
+ thread = gmx_omp_get_thread_num();
+ if (flags & GMX_PME_SOLVE)
+ {
+ /* do 3d-fft */
+ if (thread == 0)
+ {
+ wallcycle_start(wcycle, ewcPME_FFT);
+ }
- loop_count =
- solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj,
- box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
- bCalcEnerVir,
- pme->nthread, thread);
- if (thread == 0)
- {
- wallcycle_stop(wcycle, ewcLJPME);
- where();
- inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+ gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
+ thread, wcycle);
+ if (thread == 0)
+ {
+ wallcycle_stop(wcycle, ewcPME_FFT);
+ }
+ where();
+ }
}
+ bFirst = FALSE;
}
- }
-
- if (bCalcEnerVir)
- {
- /* This should only be called on the master thread and
- * after the threads have synchronized.
- */
- get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[2], vir_AB[2]);
- }
-
- if (bCalcF)
- {
- bFirst = !(flags & GMX_PME_DO_COULOMB);
- calc_initial_lb_coeffs(pme, local_c6, local_sigma);
- for (q = 8; q >= 2; --q)
+ if (flags & GMX_PME_SOLVE)
{
- /* Unpack structure */
- pmegrid = &pme->pmegrid[q];
- fftgrid = pme->fftgrid[q];
- cfftgrid = pme->cfftgrid[q];
- pfft_setup = pme->pfft_setup[q];
- grid = pmegrid->grid.grid;
- calc_next_lb_coeffs(pme, local_sigma);
- where();
+ /* solve in k-space for our local cells */
#pragma omp parallel num_threads(pme->nthread) private(thread)
{
+ int loop_count;
thread = gmx_omp_get_thread_num();
- /* do 3d-invfft */
if (thread == 0)
{
- where();
- wallcycle_start(wcycle, ewcPME_FFT);
+ wallcycle_start(wcycle, ewcLJPME);
}
- gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
- thread, wcycle);
+ loop_count =
+ solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj,
+ box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
+ bCalcEnerVir,
+ pme->nthread, thread);
if (thread == 0)
{
- wallcycle_stop(wcycle, ewcPME_FFT);
-
+ wallcycle_stop(wcycle, ewcLJPME);
where();
+ inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+ }
+ }
+ }
+
+ if (bCalcEnerVir)
+ {
+ /* This should only be called on the master thread and
+ * after the threads have synchronized.
+ */
+ get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[2+fep_state], vir_AB[2+fep_state]);
+ }
- if (pme->nodeid == 0)
+ if (bCalcF)
+ {
+ bFirst = !(flags & GMX_PME_DO_COULOMB);
+ calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+ for (grid_index = 8; grid_index >= 2; --grid_index)
+ {
+ /* Unpack structure */
+ pmegrid = &pme->pmegrid[grid_index];
+ fftgrid = pme->fftgrid[grid_index];
+ cfftgrid = pme->cfftgrid[grid_index];
+ pfft_setup = pme->pfft_setup[grid_index];
+ grid = pmegrid->grid.grid;
+ calc_next_lb_coeffs(pme, local_sigma);
+ where();
+#pragma omp parallel num_threads(pme->nthread) private(thread)
+ {
+ thread = gmx_omp_get_thread_num();
+ /* do 3d-invfft */
+ if (thread == 0)
{
- ntot = pme->nkx*pme->nky*pme->nkz;
- npme = ntot*log((real)ntot)/log(2.0);
- inc_nrnb(nrnb, eNR_FFT, 2*npme);
+ where();
+ wallcycle_start(wcycle, ewcPME_FFT);
+ }
+
+ gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
+ thread, wcycle);
+ if (thread == 0)
+ {
+ wallcycle_stop(wcycle, ewcPME_FFT);
+
+ where();
+
+ if (pme->nodeid == 0)
+ {
+ ntot = pme->nkx*pme->nky*pme->nkz;
+ npme = ntot*log((real)ntot)/log(2.0);
+ inc_nrnb(nrnb, eNR_FFT, 2*npme);
+ }
+ wallcycle_start(wcycle, ewcPME_SPREADGATHER);
}
- wallcycle_start(wcycle, ewcPME_SPREADGATHER);
- }
- copy_fftgrid_to_pmegrid(pme, fftgrid, grid, q, pme->nthread, thread);
+ copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
- } /*#pragma omp parallel*/
+ } /*#pragma omp parallel*/
- /* distribute local grid to all nodes */
+ /* distribute local grid to all nodes */
#ifdef GMX_MPI
- if (pme->nnodes > 1)
- {
- gmx_sum_qgrid_dd(pme, grid, GMX_SUM_QGRID_BACKWARD);
- }
+ if (pme->nnodes > 1)
+ {
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
+ }
#endif
- where();
+ where();
- unwrap_periodic_pmegrid(pme, grid);
+ unwrap_periodic_pmegrid(pme, grid);
- /* interpolate forces for our local atoms */
- where();
- bClearF = (bFirst && PAR(cr));
- scale = pme->bFEP ? (q < 9 ? 1.0-lambda_lj : lambda_lj) : 1.0;
- scale *= lb_scale_factor[q-2];
+ /* interpolate forces for our local atoms */
+ where();
+ bClearF = (bFirst && PAR(cr));
+ scale = pme->bFEP ? (fep_state < 1 ? 1.0-lambda_lj : lambda_lj) : 1.0;
+ scale *= lb_scale_factor[grid_index-2];
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
- for (thread = 0; thread < pme->nthread; thread++)
- {
- gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
- &pme->atc[0].spline[thread],
- scale);
- }
- where();
+ for (thread = 0; thread < pme->nthread; thread++)
+ {
+ gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
+ &pme->atc[0].spline[thread],
+ scale);
+ }
+ where();
- inc_nrnb(nrnb, eNR_GATHERFBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
- wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
+ inc_nrnb(nrnb, eNR_GATHERFBSP,
+ pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
- bFirst = FALSE;
- } /*for (q = 8; q >= 2; --q)*/
- } /*if (bCalcF)*/
- } /*if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB) */
+ bFirst = FALSE;
+ } /* for (grid_index = 8; grid_index >= 2; --grid_index) */
+ } /* if (bCalcF) */
+ } /* for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) */
+ } /* if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB) */
if (bCalcF && pme->nnodes > 1)
{