* /Erik 001109
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "gromacs/legacyheaders/pme.h"
+
+#include "config.h"
+#include <assert.h>
+#include <math.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <math.h>
-#include <assert.h>
-#include "typedefs.h"
-#include "txtdump.h"
-#include "vec.h"
-#include "smalloc.h"
-#include "coulomb.h"
-#include "gmx_fatal.h"
-#include "pme.h"
-#include "network.h"
-#include "physics.h"
-#include "nrnb.h"
-#include "macros.h"
#include "gromacs/fft/parallel_3dfft.h"
-#include "gromacs/fileio/futil.h"
#include "gromacs/fileio/pdbio.h"
+#include "gromacs/legacyheaders/coulomb.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/gmxcomplex.h"
+#include "gromacs/math/units.h"
+#include "gromacs/math/vec.h"
#include "gromacs/timing/cyclecounter.h"
#include "gromacs/timing/wallcycle.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/futil.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/gmxomp.h"
+#include "gromacs/utility/smalloc.h"
/* Include the SIMD macro file and then check for support */
#include "gromacs/simd/simd.h"
int nthread; /* The number of threads doing PME on our rank */
gmx_bool bPPnode; /* Node also does particle-particle forces */
-
gmx_bool bFEP; /* Compute Free energy contribution */
gmx_bool bFEP_q;
gmx_bool bFEP_lj;
-
int nkx, nky, nkz; /* Grid dimensions */
-
gmx_bool bP3M; /* Do P3M: optimize the influence function */
int pme_order;
real epsilon_r;
{
if (atc->count[atc->nodeid] + nsend != n)
{
- gmx_fatal(FARGS, "%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
+ gmx_fatal(FARGS, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
"This usually means that your system is not well equilibrated.",
n - (atc->count[atc->nodeid] + nsend),
pme->nodeid, 'x'+atc->dimind);
/* Communicate the count */
if (debug)
{
- fprintf(debug, "dimind %d PME node %d send to node %d: %d\n",
+ fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n",
atc->dimind, atc->nodeid, commnode[i], scount);
}
pme_dd_sendrecv(atc, FALSE, i,
/* Copy data to contiguous send buffer */
if (debug)
{
- fprintf(debug, "PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, send_id,
pme->pmegrid_start_iy,
send_index0-pme->pmegrid_start_iy,
/* Get data from contiguous recv buffer */
if (debug)
{
- fprintf(debug, "PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, recv_id,
pme->pmegrid_start_iy,
recv_index0-pme->pmegrid_start_iy,
if (debug)
{
- fprintf(debug, "PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, send_id,
pme->pmegrid_start_ix,
send_index0-pme->pmegrid_start_ix,
send_index0-pme->pmegrid_start_ix+send_nindex);
- fprintf(debug, "PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
pme->nodeid, overlap->nodeid, recv_id,
pme->pmegrid_start_ix,
recv_index0-pme->pmegrid_start_ix,
{
#ifdef DEBUG_PME
FILE *fp, *fp2;
- char fn[STRLEN], format[STRLEN];
+ char fn[STRLEN];
real val;
sprintf(fn, "pmegrid%d.pdb", pme->nodeid);
fp = gmx_ffopen(fn, "w");
sprintf(fn, "pmegrid%d.txt", pme->nodeid);
fp2 = gmx_ffopen(fn, "w");
- sprintf(format, "%s%s\n", pdbformat, "%6.2f%6.2f");
#endif
for (ix = 0; ix < local_fft_ndata[XX]; ix++)
val = 100*pmegrid[pmeidx];
if (pmegrid[pmeidx] != 0)
{
- fprintf(fp, format, "ATOM", pmeidx, "CA", "GLY", ' ', pmeidx, ' ',
- 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val);
+ gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx, ' ',
+ 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val, "");
}
if (pmegrid[pmeidx] != 0)
{
#define PME_SPREAD_SIMD4_ALIGNED
#define PME_ORDER 4
#endif
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_BSPLINE(4);
#endif
#ifdef PME_SIMD4_SPREAD_GATHER
#define PME_SPREAD_SIMD4_ALIGNED
#define PME_ORDER 5
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_BSPLINE(5);
#endif
#define PME_GATHER_F_SIMD4_ALIGNED
#define PME_ORDER 4
#endif
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_FSPLINE(4);
#endif
#ifdef PME_SIMD4_SPREAD_GATHER
#define PME_GATHER_F_SIMD4_ALIGNED
#define PME_ORDER 5
-#include "pme_simd4.h"
+#include "gromacs/mdlib/pme_simd4.h"
#else
DO_FSPLINE(5);
#endif
{
/* s2g0 the local interpolation grid start.
* s2g1 the local interpolation grid end.
- * Because grid overlap communication only goes forward,
- * the grid the slabs for fft's should be rounded down.
+ * Since in calc_pidx we divide particles, and not grid lines,
+ * spatially uniform along dimension x or y, we need to round
+ * s2g0 down and s2g1 up.
*/
ol->s2g0[i] = ( i *ndata + 0 )/nnodes;
ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
*bValidSettings = FALSE;
return;
}
- gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x should be >= pme_order (%d) or = pmeorder-1. To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
+ gmx_fatal(FARGS, "The number of PME grid lines per rank along x is %g. But when using OpenMP threads, the number of grid lines per rank along x should be >= pme_order (%d) or = pmeorder-1. To resolve this issue, use fewer ranks along x (and possibly more along y and/or z) by specifying -dd manually.",
nkx/(double)nnodes_major, pme_order);
}
MPI_Comm_size(pme->mpi_comm, &pme->nnodes);
if (pme->nnodes != nnodes_major*nnodes_minor)
{
- gmx_incons("PME node count mismatch");
+ gmx_incons("PME rank count mismatch");
}
}
else
{
if (pme->nnodes % nnodes_major != 0)
{
- gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
+ gmx_incons("For 2D PME decomposition, #PME ranks must be divisible by the number of ranks in the major dimension");
}
pme->ndecompdim = 2;
"\n"
"NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
" For optimal PME load balancing\n"
- " PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
- " and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
+ " PME grid_x (%d) and grid_y (%d) should be divisible by #PME_ranks_x (%d)\n"
+ " and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_ranks_y (%d)\n"
"\n",
(int)((imbal-1)*100 + 0.5),
pme->nkx, pme->nky, pme->nnodes_major,
int fft_my, fft_mz;
int buf_my = -1;
int nsx, nsy, nsz;
- ivec ne;
+ ivec localcopy_end;
int offx, offy, offz, x, y, z, i0, i0t;
int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
for (d = 0; d < DIM; d++)
{
- ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
- local_fft_ndata[d]);
+ /* Determine up to where our thread needs to copy from the
+ * thread-local charge spreading grid to the rank-local FFT grid.
+ * This is up to our spreading grid end minus order-1 and
+ * not beyond the local FFT grid.
+ */
+ localcopy_end[d] =
+ min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
+ local_fft_ndata[d]);
}
offx = pmegrid->offset[XX];
/* Now loop over all the thread data blocks that contribute
* to the grid region we (our thread) are operating on.
*/
- /* Note that ffy_nx/y is equal to the number of grid points
+ /* Note that fft_nx/y is equal to the number of grid points
* between the first point of our node grid and the one of the next node.
*/
for (sx = 0; sx >= -pmegrids->nthread_comm[XX]; sx--)
}
pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
ox += pmegrid_g->offset[XX];
+ /* Determine the end of our part of the source grid */
if (!bCommX)
{
- tx1 = min(ox + pmegrid_g->n[XX], ne[XX]);
+ /* Use our thread local source grid and target grid part */
+ tx1 = min(ox + pmegrid_g->n[XX], localcopy_end[XX]);
}
else
{
+ /* Use our thread local source grid and the spreading range */
tx1 = min(ox + pmegrid_g->n[XX], pme->pme_order);
}
}
pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
oy += pmegrid_g->offset[YY];
+ /* Determine the end of our part of the source grid */
if (!bCommY)
{
- ty1 = min(oy + pmegrid_g->n[YY], ne[YY]);
+ /* Use our thread local source grid and target grid part */
+ ty1 = min(oy + pmegrid_g->n[YY], localcopy_end[YY]);
}
else
{
+ /* Use our thread local source grid and the spreading range */
ty1 = min(oy + pmegrid_g->n[YY], pme->pme_order);
}
}
pmegrid_g = &pmegrids->grid_th[fz];
oz += pmegrid_g->offset[ZZ];
- tz1 = min(oz + pmegrid_g->n[ZZ], ne[ZZ]);
+ tz1 = min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]);
if (sx == 0 && sy == 0 && sz == 0)
{
if (bCommY)
{
commbuf = commbuf_y;
- buf_my = ty1 - offy;
+ /* The y-size of the communication buffer is set by
+ * the overlap of the grid part of our local slab
+ * with the part starting at the next slab.
+ */
+ buf_my =
+ pme->overlap[1].s2g1[pme->nodeid_minor] -
+ pme->overlap[1].s2g0[pme->nodeid_minor+1];
if (bCommX)
{
/* We index commbuf modulo the local grid size */
sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
recvptr = overlap->recvbuf;
+ if (debug != NULL)
+ {
+ fprintf(debug, "PME fftgrid comm y %2d x %2d x %2d\n",
+ local_fft_ndata[XX], send_nindex, local_fft_ndata[ZZ]);
+ }
+
#ifdef GMX_MPI
MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
send_id, ipulse,
if (debug != NULL)
{
- fprintf(debug, "PME fftgrid comm %2d x %2d x %2d\n",
+ fprintf(debug, "PME fftgrid comm x %2d x %2d x %2d\n",
send_nindex, local_fft_ndata[YY], local_fft_ndata[ZZ]);
}
}
}
-/* TODO: when adding free-energy support, sigmaB will no longer be
- * unused */
int gmx_pme_do(gmx_pme_t pme,
int start, int homenr,
rvec x[], rvec f[],
real *chargeA, real *chargeB,
real *c6A, real *c6B,
- real *sigmaA, real gmx_unused *sigmaB,
+ real *sigmaA, real *sigmaB,
matrix box, t_commrec *cr,
int maxshift_x, int maxshift_y,
t_nrnb *nrnb, gmx_wallcycle_t wcycle,
real *dvdlambda_q, real *dvdlambda_lj,
int flags)
{
- int d, i, j, ntot, npme, grid_index, max_grid_index;
+ int d, i, j, k, ntot, npme, grid_index, max_grid_index;
int nx, ny, nz;
int n_d, local_ny;
pme_atomcomm_t *atc = NULL;
t_complex * cfftgrid;
int thread;
gmx_bool bFirst, bDoSplines;
+ int fep_state;
+ int fep_states_lj = pme->bFEP_lj ? 2 : 1;
const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
if (debug)
{
- fprintf(debug, "PME: nnodes = %d, nodeid = %d\n",
+ fprintf(debug, "PME: number of ranks = %d, rank = %d\n",
cr->nnodes, cr->nodeid);
fprintf(debug, "Grid = %p\n", (void*)grid);
if (grid == NULL)
if (debug)
{
- fprintf(debug, "Node= %6d, pme local particles=%6d\n",
+ fprintf(debug, "Rank= %6d, pme local particles=%6d\n",
cr->nodeid, atc->n);
}
inc_nrnb(nrnb, eNR_FFT, 2*npme);
}
+ /* Note: this wallcycle region is closed below
+ outside an OpenMP region, so take care if
+ refactoring code here. */
wallcycle_start(wcycle, ewcPME_SPREADGATHER);
}
inc_nrnb(nrnb, eNR_GATHERFBSP,
pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ /* Note: this wallcycle region is opened above inside an OpenMP
+ region, so take care if refactoring code here. */
wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
}
if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB)
{
- real *local_c6 = NULL, *local_sigma = NULL;
- if (pme->nnodes == 1)
+ /* Loop over A- and B-state if we are doing FEP */
+ for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
{
- if (pme->lb_buf1 == NULL)
+ real *local_c6 = NULL, *local_sigma = NULL, *RedistC6 = NULL, *RedistSigma = NULL;
+ if (pme->nnodes == 1)
{
- pme->lb_buf_nalloc = pme->atc[0].n;
- snew(pme->lb_buf1, pme->lb_buf_nalloc);
- }
- pme->atc[0].coefficient = pme->lb_buf1;
- local_c6 = c6A;
- local_sigma = sigmaA;
- }
- else
- {
- atc = &pme->atc[0];
-
- wallcycle_start(wcycle, ewcPME_REDISTXF);
-
- do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, c6A);
- if (pme->lb_buf_nalloc < atc->n)
- {
- pme->lb_buf_nalloc = atc->nalloc;
- srenew(pme->lb_buf1, pme->lb_buf_nalloc);
- srenew(pme->lb_buf2, pme->lb_buf_nalloc);
- }
- local_c6 = pme->lb_buf1;
- for (i = 0; i < atc->n; ++i)
- {
- local_c6[i] = atc->coefficient[i];
- }
- where();
-
- do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, sigmaA);
- local_sigma = pme->lb_buf2;
- for (i = 0; i < atc->n; ++i)
- {
- local_sigma[i] = atc->coefficient[i];
+ if (pme->lb_buf1 == NULL)
+ {
+ pme->lb_buf_nalloc = pme->atc[0].n;
+ snew(pme->lb_buf1, pme->lb_buf_nalloc);
+ }
+ pme->atc[0].coefficient = pme->lb_buf1;
+ switch (fep_state)
+ {
+ case 0:
+ local_c6 = c6A;
+ local_sigma = sigmaA;
+ break;
+ case 1:
+ local_c6 = c6B;
+ local_sigma = sigmaB;
+ break;
+ default:
+ gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
+ }
}
- where();
-
- wallcycle_stop(wcycle, ewcPME_REDISTXF);
- }
- calc_initial_lb_coeffs(pme, local_c6, local_sigma);
-
- /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
- for (grid_index = 2; grid_index < 9; ++grid_index)
- {
- /* Unpack structure */
- pmegrid = &pme->pmegrid[grid_index];
- fftgrid = pme->fftgrid[grid_index];
- cfftgrid = pme->cfftgrid[grid_index];
- pfft_setup = pme->pfft_setup[grid_index];
- calc_next_lb_coeffs(pme, local_sigma);
- grid = pmegrid->grid.grid;
- where();
-
- if (flags & GMX_PME_SPREAD)
+ else
{
- wallcycle_start(wcycle, ewcPME_SPREADGATHER);
- /* Spread the c6 on a grid */
- spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+ atc = &pme->atc[0];
+ switch (fep_state)
+ {
+ case 0:
+ RedistC6 = c6A;
+ RedistSigma = sigmaA;
+ break;
+ case 1:
+ RedistC6 = c6B;
+ RedistSigma = sigmaB;
+ break;
+ default:
+ gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
+ }
+ wallcycle_start(wcycle, ewcPME_REDISTXF);
- if (bFirst)
+ do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, RedistC6);
+ if (pme->lb_buf_nalloc < atc->n)
{
- inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+ pme->lb_buf_nalloc = atc->nalloc;
+ srenew(pme->lb_buf1, pme->lb_buf_nalloc);
+ srenew(pme->lb_buf2, pme->lb_buf_nalloc);
}
- inc_nrnb(nrnb, eNR_SPREADBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
- if (pme->nthread == 1)
+ local_c6 = pme->lb_buf1;
+ for (i = 0; i < atc->n; ++i)
{
- wrap_periodic_pmegrid(pme, grid);
+ local_c6[i] = atc->coefficient[i];
+ }
+ where();
- /* sum contributions to local grid from other nodes */
-#ifdef GMX_MPI
- if (pme->nnodes > 1)
- {
- gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
- where();
- }
-#endif
- copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
+ do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, RedistSigma);
+ local_sigma = pme->lb_buf2;
+ for (i = 0; i < atc->n; ++i)
+ {
+ local_sigma[i] = atc->coefficient[i];
}
+ where();
- wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
+ wallcycle_stop(wcycle, ewcPME_REDISTXF);
}
+ calc_initial_lb_coeffs(pme, local_c6, local_sigma);
- /*Here we start a large thread parallel region*/
-#pragma omp parallel num_threads(pme->nthread) private(thread)
+ /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
+ for (grid_index = 2; grid_index < 9; ++grid_index)
{
- thread = gmx_omp_get_thread_num();
- if (flags & GMX_PME_SOLVE)
+ /* Unpack structure */
+ pmegrid = &pme->pmegrid[grid_index];
+ fftgrid = pme->fftgrid[grid_index];
+ cfftgrid = pme->cfftgrid[grid_index];
+ pfft_setup = pme->pfft_setup[grid_index];
+ calc_next_lb_coeffs(pme, local_sigma);
+ grid = pmegrid->grid.grid;
+ where();
+
+ if (flags & GMX_PME_SPREAD)
{
- /* do 3d-fft */
- if (thread == 0)
+ wallcycle_start(wcycle, ewcPME_SPREADGATHER);
+ /* Spread the c6 on a grid */
+ spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+
+ if (bFirst)
{
- wallcycle_start(wcycle, ewcPME_FFT);
+ inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
}
- gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
- thread, wcycle);
- if (thread == 0)
+ inc_nrnb(nrnb, eNR_SPREADBSP,
+ pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+ if (pme->nthread == 1)
{
- wallcycle_stop(wcycle, ewcPME_FFT);
+ wrap_periodic_pmegrid(pme, grid);
+ /* sum contributions to local grid from other nodes */
+#ifdef GMX_MPI
+ if (pme->nnodes > 1)
+ {
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
+ where();
+ }
+#endif
+ copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
}
- where();
+ wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
}
- }
- bFirst = FALSE;
- }
- if (flags & GMX_PME_SOLVE)
- {
- int loop_count;
- /* solve in k-space for our local cells */
+ /*Here we start a large thread parallel region*/
#pragma omp parallel num_threads(pme->nthread) private(thread)
- {
- thread = gmx_omp_get_thread_num();
- if (thread == 0)
{
- wallcycle_start(wcycle, ewcLJPME);
- }
+ thread = gmx_omp_get_thread_num();
+ if (flags & GMX_PME_SOLVE)
+ {
+ /* do 3d-fft */
+ if (thread == 0)
+ {
+ wallcycle_start(wcycle, ewcPME_FFT);
+ }
- loop_count =
- solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj,
- box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
- bCalcEnerVir,
- pme->nthread, thread);
- if (thread == 0)
- {
- wallcycle_stop(wcycle, ewcLJPME);
- where();
- inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+ gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
+ thread, wcycle);
+ if (thread == 0)
+ {
+ wallcycle_stop(wcycle, ewcPME_FFT);
+ }
+ where();
+ }
}
+ bFirst = FALSE;
}
- }
-
- if (bCalcEnerVir)
- {
- /* This should only be called on the master thread and
- * after the threads have synchronized.
- */
- get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[2], vir_AB[2]);
- }
-
- if (bCalcF)
- {
- bFirst = !(flags & GMX_PME_DO_COULOMB);
- calc_initial_lb_coeffs(pme, local_c6, local_sigma);
- for (grid_index = 8; grid_index >= 2; --grid_index)
+ if (flags & GMX_PME_SOLVE)
{
- /* Unpack structure */
- pmegrid = &pme->pmegrid[grid_index];
- fftgrid = pme->fftgrid[grid_index];
- cfftgrid = pme->cfftgrid[grid_index];
- pfft_setup = pme->pfft_setup[grid_index];
- grid = pmegrid->grid.grid;
- calc_next_lb_coeffs(pme, local_sigma);
- where();
+ /* solve in k-space for our local cells */
#pragma omp parallel num_threads(pme->nthread) private(thread)
{
+ int loop_count;
thread = gmx_omp_get_thread_num();
- /* do 3d-invfft */
if (thread == 0)
{
- where();
- wallcycle_start(wcycle, ewcPME_FFT);
+ wallcycle_start(wcycle, ewcLJPME);
}
- gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
- thread, wcycle);
+ loop_count =
+ solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj,
+ box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
+ bCalcEnerVir,
+ pme->nthread, thread);
if (thread == 0)
{
- wallcycle_stop(wcycle, ewcPME_FFT);
-
+ wallcycle_stop(wcycle, ewcLJPME);
where();
+ inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+ }
+ }
+ }
+
+ if (bCalcEnerVir)
+ {
+ /* This should only be called on the master thread and
+ * after the threads have synchronized.
+ */
+ get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[2+fep_state], vir_AB[2+fep_state]);
+ }
- if (pme->nodeid == 0)
+ if (bCalcF)
+ {
+ bFirst = !(flags & GMX_PME_DO_COULOMB);
+ calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+ for (grid_index = 8; grid_index >= 2; --grid_index)
+ {
+ /* Unpack structure */
+ pmegrid = &pme->pmegrid[grid_index];
+ fftgrid = pme->fftgrid[grid_index];
+ cfftgrid = pme->cfftgrid[grid_index];
+ pfft_setup = pme->pfft_setup[grid_index];
+ grid = pmegrid->grid.grid;
+ calc_next_lb_coeffs(pme, local_sigma);
+ where();
+#pragma omp parallel num_threads(pme->nthread) private(thread)
+ {
+ thread = gmx_omp_get_thread_num();
+ /* do 3d-invfft */
+ if (thread == 0)
{
- ntot = pme->nkx*pme->nky*pme->nkz;
- npme = ntot*log((real)ntot)/log(2.0);
- inc_nrnb(nrnb, eNR_FFT, 2*npme);
+ where();
+ wallcycle_start(wcycle, ewcPME_FFT);
+ }
+
+ gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
+ thread, wcycle);
+ if (thread == 0)
+ {
+ wallcycle_stop(wcycle, ewcPME_FFT);
+
+ where();
+
+ if (pme->nodeid == 0)
+ {
+ ntot = pme->nkx*pme->nky*pme->nkz;
+ npme = ntot*log((real)ntot)/log(2.0);
+ inc_nrnb(nrnb, eNR_FFT, 2*npme);
+ }
+ wallcycle_start(wcycle, ewcPME_SPREADGATHER);
}
- wallcycle_start(wcycle, ewcPME_SPREADGATHER);
- }
- copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
+ copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
- } /*#pragma omp parallel*/
+ } /*#pragma omp parallel*/
- /* distribute local grid to all nodes */
+ /* distribute local grid to all nodes */
#ifdef GMX_MPI
- if (pme->nnodes > 1)
- {
- gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
- }
+ if (pme->nnodes > 1)
+ {
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
+ }
#endif
- where();
+ where();
- unwrap_periodic_pmegrid(pme, grid);
+ unwrap_periodic_pmegrid(pme, grid);
- /* interpolate forces for our local atoms */
- where();
- bClearF = (bFirst && PAR(cr));
- scale = pme->bFEP ? (grid_index < 9 ? 1.0-lambda_lj : lambda_lj) : 1.0;
- scale *= lb_scale_factor[grid_index-2];
+ /* interpolate forces for our local atoms */
+ where();
+ bClearF = (bFirst && PAR(cr));
+ scale = pme->bFEP ? (fep_state < 1 ? 1.0-lambda_lj : lambda_lj) : 1.0;
+ scale *= lb_scale_factor[grid_index-2];
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
- for (thread = 0; thread < pme->nthread; thread++)
- {
- gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
- &pme->atc[0].spline[thread],
- scale);
- }
- where();
+ for (thread = 0; thread < pme->nthread; thread++)
+ {
+ gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
+ &pme->atc[0].spline[thread],
+ scale);
+ }
+ where();
- inc_nrnb(nrnb, eNR_GATHERFBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
- wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
+ inc_nrnb(nrnb, eNR_GATHERFBSP,
+ pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
- bFirst = FALSE;
- } /*for (grid_index = 8; grid_index >= 2; --grid_index)*/
- } /*if (bCalcF)*/
- } /*if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB) */
+ bFirst = FALSE;
+ } /* for (grid_index = 8; grid_index >= 2; --grid_index) */
+ } /* if (bCalcF) */
+ } /* for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) */
+ } /* if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB) */
if (bCalcF && pme->nnodes > 1)
{