#include "pme_internal.h"
#ifdef DEBUG_PME
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/futil.h"
+# include "gromacs/fileio/pdbio.h"
+# include "gromacs/utility/cstringutil.h"
+# include "gromacs/utility/futil.h"
#endif
#include "pme_simd.h"
*/
#define GMX_CACHE_SEP 64
-void gmx_sum_qgrid_dd(gmx_pme_t *pme,
- real *grid,
- const int direction)
+void gmx_sum_qgrid_dd(gmx_pme_t* pme, real* grid, const int direction)
{
#if GMX_MPI
- pme_overlap_t *overlap;
+ pme_overlap_t* overlap;
int send_index0, send_nindex;
int recv_index0, recv_nindex;
MPI_Status stat;
int i, j, k, ix, iy, iz, icnt;
int send_id, recv_id, datasize;
- real *p;
- real *sendptr, *recvptr;
+ real* p;
+ real * sendptr, *recvptr;
/* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
overlap = &pme->overlap[1];
*/
if (direction == GMX_SUM_GRID_FORWARD)
{
- send_id = overlap->comm_data[ipulse].send_id;
- recv_id = overlap->comm_data[ipulse].recv_id;
- send_index0 = overlap->comm_data[ipulse].send_index0;
- send_nindex = overlap->comm_data[ipulse].send_nindex;
- recv_index0 = overlap->comm_data[ipulse].recv_index0;
- recv_nindex = overlap->comm_data[ipulse].recv_nindex;
+ send_id = overlap->comm_data[ipulse].send_id;
+ recv_id = overlap->comm_data[ipulse].recv_id;
+ send_index0 = overlap->comm_data[ipulse].send_index0;
+ send_nindex = overlap->comm_data[ipulse].send_nindex;
+ recv_index0 = overlap->comm_data[ipulse].recv_index0;
+ recv_nindex = overlap->comm_data[ipulse].recv_nindex;
}
else
{
- send_id = overlap->comm_data[ipulse].recv_id;
- recv_id = overlap->comm_data[ipulse].send_id;
- send_index0 = overlap->comm_data[ipulse].recv_index0;
- send_nindex = overlap->comm_data[ipulse].recv_nindex;
- recv_index0 = overlap->comm_data[ipulse].send_index0;
- recv_nindex = overlap->comm_data[ipulse].send_nindex;
+ send_id = overlap->comm_data[ipulse].recv_id;
+ recv_id = overlap->comm_data[ipulse].send_id;
+ send_index0 = overlap->comm_data[ipulse].recv_index0;
+ send_nindex = overlap->comm_data[ipulse].recv_nindex;
+ recv_index0 = overlap->comm_data[ipulse].send_index0;
+ recv_nindex = overlap->comm_data[ipulse].send_nindex;
}
/* Copy data to contiguous send buffer */
if (debug)
{
fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
- pme->nodeid, overlap->nodeid, send_id,
- pme->pmegrid_start_iy,
- send_index0-pme->pmegrid_start_iy,
- send_index0-pme->pmegrid_start_iy+send_nindex);
+ pme->nodeid, overlap->nodeid, send_id, pme->pmegrid_start_iy,
+ send_index0 - pme->pmegrid_start_iy,
+ send_index0 - pme->pmegrid_start_iy + send_nindex);
}
icnt = 0;
for (i = 0; i < pme->pmegrid_nx; i++)
for (k = 0; k < pme->nkz; k++)
{
iz = k;
- overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
+ overlap->sendbuf[icnt++] =
+ grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz];
}
}
}
- datasize = pme->pmegrid_nx * pme->nkz;
+ datasize = pme->pmegrid_nx * pme->nkz;
- MPI_Sendrecv(overlap->sendbuf.data(), send_nindex*datasize, GMX_MPI_REAL,
- send_id, ipulse,
- overlap->recvbuf.data(), recv_nindex*datasize, GMX_MPI_REAL,
- recv_id, ipulse,
+ MPI_Sendrecv(overlap->sendbuf.data(), send_nindex * datasize, GMX_MPI_REAL, send_id, ipulse,
+ overlap->recvbuf.data(), recv_nindex * datasize, GMX_MPI_REAL, recv_id, ipulse,
overlap->mpi_comm, &stat);
/* Get data from contiguous recv buffer */
if (debug)
{
fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
- pme->nodeid, overlap->nodeid, recv_id,
- pme->pmegrid_start_iy,
- recv_index0-pme->pmegrid_start_iy,
- recv_index0-pme->pmegrid_start_iy+recv_nindex);
+ pme->nodeid, overlap->nodeid, recv_id, pme->pmegrid_start_iy,
+ recv_index0 - pme->pmegrid_start_iy,
+ recv_index0 - pme->pmegrid_start_iy + recv_nindex);
}
icnt = 0;
for (i = 0; i < pme->pmegrid_nx; i++)
iz = k;
if (direction == GMX_SUM_GRID_FORWARD)
{
- grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
+ grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz] +=
+ overlap->recvbuf[icnt++];
}
else
{
- grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] = overlap->recvbuf[icnt++];
+ grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz] =
+ overlap->recvbuf[icnt++];
}
}
}
{
if (direction == GMX_SUM_GRID_FORWARD)
{
- send_id = overlap->comm_data[ipulse].send_id;
- recv_id = overlap->comm_data[ipulse].recv_id;
- send_index0 = overlap->comm_data[ipulse].send_index0;
- send_nindex = overlap->comm_data[ipulse].send_nindex;
- recv_index0 = overlap->comm_data[ipulse].recv_index0;
- recv_nindex = overlap->comm_data[ipulse].recv_nindex;
- recvptr = overlap->recvbuf.data();
+ send_id = overlap->comm_data[ipulse].send_id;
+ recv_id = overlap->comm_data[ipulse].recv_id;
+ send_index0 = overlap->comm_data[ipulse].send_index0;
+ send_nindex = overlap->comm_data[ipulse].send_nindex;
+ recv_index0 = overlap->comm_data[ipulse].recv_index0;
+ recv_nindex = overlap->comm_data[ipulse].recv_nindex;
+ recvptr = overlap->recvbuf.data();
}
else
{
- send_id = overlap->comm_data[ipulse].recv_id;
- recv_id = overlap->comm_data[ipulse].send_id;
- send_index0 = overlap->comm_data[ipulse].recv_index0;
- send_nindex = overlap->comm_data[ipulse].recv_nindex;
- recv_index0 = overlap->comm_data[ipulse].send_index0;
- recv_nindex = overlap->comm_data[ipulse].send_nindex;
- recvptr = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
+ send_id = overlap->comm_data[ipulse].recv_id;
+ recv_id = overlap->comm_data[ipulse].send_id;
+ send_index0 = overlap->comm_data[ipulse].recv_index0;
+ send_nindex = overlap->comm_data[ipulse].recv_nindex;
+ recv_index0 = overlap->comm_data[ipulse].send_index0;
+ recv_nindex = overlap->comm_data[ipulse].send_nindex;
+ recvptr = grid + (recv_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
}
- sendptr = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
- datasize = pme->pmegrid_ny * pme->pmegrid_nz;
+ sendptr = grid + (send_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
+ datasize = pme->pmegrid_ny * pme->pmegrid_nz;
if (debug)
{
fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
- pme->nodeid, overlap->nodeid, send_id,
- pme->pmegrid_start_ix,
- send_index0-pme->pmegrid_start_ix,
- send_index0-pme->pmegrid_start_ix+send_nindex);
+ pme->nodeid, overlap->nodeid, send_id, pme->pmegrid_start_ix,
+ send_index0 - pme->pmegrid_start_ix,
+ send_index0 - pme->pmegrid_start_ix + send_nindex);
fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
- pme->nodeid, overlap->nodeid, recv_id,
- pme->pmegrid_start_ix,
- recv_index0-pme->pmegrid_start_ix,
- recv_index0-pme->pmegrid_start_ix+recv_nindex);
+ pme->nodeid, overlap->nodeid, recv_id, pme->pmegrid_start_ix,
+ recv_index0 - pme->pmegrid_start_ix,
+ recv_index0 - pme->pmegrid_start_ix + recv_nindex);
}
- MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
- send_id, ipulse,
- recvptr, recv_nindex*datasize, GMX_MPI_REAL,
- recv_id, ipulse,
- overlap->mpi_comm, &stat);
+ MPI_Sendrecv(sendptr, send_nindex * datasize, GMX_MPI_REAL, send_id, ipulse, recvptr,
+ recv_nindex * datasize, GMX_MPI_REAL, recv_id, ipulse, overlap->mpi_comm, &stat);
/* ADD data from contiguous recv buffer */
if (direction == GMX_SUM_GRID_FORWARD)
{
- p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
- for (i = 0; i < recv_nindex*datasize; i++)
+ p = grid + (recv_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
+ for (i = 0; i < recv_nindex * datasize; i++)
{
p[i] += overlap->recvbuf[i];
}
}
-int copy_pmegrid_to_fftgrid(const gmx_pme_t *pme, const real *pmegrid, real *fftgrid, int grid_index)
+int copy_pmegrid_to_fftgrid(const gmx_pme_t* pme, const real* pmegrid, real* fftgrid, int grid_index)
{
- ivec local_fft_ndata, local_fft_offset, local_fft_size;
- ivec local_pme_size;
- int ix, iy, iz;
- int pmeidx, fftidx;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
+ ivec local_pme_size;
+ int ix, iy, iz;
+ int pmeidx, fftidx;
/* Dimensions should be identical for A/B grid, so we just use A here */
- gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index],
- local_fft_ndata,
- local_fft_offset,
+ gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset,
local_fft_size);
local_pme_size[0] = pme->pmegrid_nx;
{
for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
{
- pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
- fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
+ pmeidx = ix * (local_pme_size[YY] * local_pme_size[ZZ])
+ + iy * (local_pme_size[ZZ]) + iz;
+ fftidx = ix * (local_fft_size[YY] * local_fft_size[ZZ])
+ + iy * (local_fft_size[ZZ]) + iz;
fftgrid[fftidx] = pmegrid[pmeidx];
#ifdef DEBUG_PME
- val = 100*pmegrid[pmeidx];
+ val = 100 * pmegrid[pmeidx];
if (pmegrid[pmeidx] != 0)
{
- gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx, ' ',
- 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val, "");
+ gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx,
+ ' ', 5.0 * ix, 5.0 * iy, 5.0 * iz, 1.0, val, "");
}
if (pmegrid[pmeidx] != 0)
{
- fprintf(fp2, "%-12s %5d %5d %5d %12.5e\n",
- "qgrid",
- pme->pmegrid_start_ix + ix,
- pme->pmegrid_start_iy + iy,
- pme->pmegrid_start_iz + iz,
- pmegrid[pmeidx]);
+ fprintf(fp2, "%-12s %5d %5d %5d %12.5e\n", "qgrid",
+ pme->pmegrid_start_ix + ix, pme->pmegrid_start_iy + iy,
+ pme->pmegrid_start_iz + iz, pmegrid[pmeidx]);
}
#endif
}
#endif
-int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pmegrid, int grid_index,
- int nthread, int thread)
+int copy_fftgrid_to_pmegrid(struct gmx_pme_t* pme, const real* fftgrid, real* pmegrid, int grid_index, int nthread, int thread)
{
- ivec local_fft_ndata, local_fft_offset, local_fft_size;
- ivec local_pme_size;
- int ixy0, ixy1, ixy, ix, iy, iz;
- int pmeidx, fftidx;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
+ ivec local_pme_size;
+ int ixy0, ixy1, ixy, ix, iy, iz;
+ int pmeidx, fftidx;
#ifdef PME_TIME_THREADS
gmx_cycles_t c1;
static double cs1 = 0;
c1 = omp_cyc_start();
#endif
/* Dimensions should be identical for A/B grid, so we just use A here */
- gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index],
- local_fft_ndata,
- local_fft_offset,
+ gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset,
local_fft_size);
local_pme_size[0] = pme->pmegrid_nx;
/* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
the offset is identical, and the PME grid always has more data (due to overlap)
*/
- ixy0 = ((thread )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
- ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
+ ixy0 = ((thread)*local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread;
+ ixy1 = ((thread + 1) * local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread;
for (ixy = ixy0; ixy < ixy1; ixy++)
{
- ix = ixy/local_fft_ndata[YY];
- iy = ixy - ix*local_fft_ndata[YY];
+ ix = ixy / local_fft_ndata[YY];
+ iy = ixy - ix * local_fft_ndata[YY];
- pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
- fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
+ pmeidx = (ix * local_pme_size[YY] + iy) * local_pme_size[ZZ];
+ fftidx = (ix * local_fft_size[YY] + iy) * local_fft_size[ZZ];
for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
{
- pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
+ pmegrid[pmeidx + iz] = fftgrid[fftidx + iz];
}
}
#ifdef PME_TIME_THREADS
- c1 = omp_cyc_end(c1);
+ c1 = omp_cyc_end(c1);
cs1 += (double)c1;
cnt++;
if (cnt % 20 == 0)
{
- printf("copy %.2f\n", cs1*1e-9);
+ printf("copy %.2f\n", cs1 * 1e-9);
}
#endif
}
-void wrap_periodic_pmegrid(const gmx_pme_t *pme, real *pmegrid)
+void wrap_periodic_pmegrid(const gmx_pme_t* pme, real* pmegrid)
{
- int nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
+ int nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
nx = pme->nkx;
ny = pme->nky;
{
for (iz = 0; iz < overlap; iz++)
{
- pmegrid[(ix*pny+iy)*pnz+iz] +=
- pmegrid[(ix*pny+iy)*pnz+nz+iz];
+ pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[(ix * pny + iy) * pnz + nz + iz];
}
}
}
{
for (iz = 0; iz < nz; iz++)
{
- pmegrid[(ix*pny+iy)*pnz+iz] +=
- pmegrid[(ix*pny+ny+iy)*pnz+iz];
+ pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[(ix * pny + ny + iy) * pnz + iz];
}
}
}
{
for (iz = 0; iz < nz; iz++)
{
- pmegrid[(ix*pny+iy)*pnz+iz] +=
- pmegrid[((nx+ix)*pny+iy)*pnz+iz];
+ pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[((nx + ix) * pny + iy) * pnz + iz];
}
}
}
}
-void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
+void unwrap_periodic_pmegrid(struct gmx_pme_t* pme, real* pmegrid)
{
- int nx, ny, nz, pny, pnz, ny_x, overlap, ix;
+ int nx, ny, nz, pny, pnz, ny_x, overlap, ix;
nx = pme->nkx;
ny = pme->nky;
{
for (iz = 0; iz < nz; iz++)
{
- pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
- pmegrid[(ix*pny+iy)*pnz+iz];
+ pmegrid[((nx + ix) * pny + iy) * pnz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
}
}
}
{
for (iz = 0; iz < nz; iz++)
{
- pmegrid[(ix*pny+ny+iy)*pnz+iz] =
- pmegrid[(ix*pny+iy)*pnz+iz];
+ pmegrid[(ix * pny + ny + iy) * pnz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
}
}
}
{
for (iz = 0; iz < overlap; iz++)
{
- pmegrid[(ix*pny+iy)*pnz+nz+iz] =
- pmegrid[(ix*pny+iy)*pnz+iz];
+ pmegrid[(ix * pny + iy) * pnz + nz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
}
}
}
}
-void set_grid_alignment(int gmx_unused *pmegrid_nz, int gmx_unused pme_order)
+void set_grid_alignment(int gmx_unused* pmegrid_nz, int gmx_unused pme_order)
{
#ifdef PME_SIMD4_SPREAD_GATHER
if (pme_order == 5
-#if !PME_4NSIMD_GATHER
+# if !PME_4NSIMD_GATHER
|| pme_order == 4
-#endif
- )
+# endif
+ )
{
/* Round nz up to a multiple of 4 to ensure alignment */
*pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
#endif
}
-static void set_gridsize_alignment(int gmx_unused *gridsize, int gmx_unused pme_order)
+static void set_gridsize_alignment(int gmx_unused* gridsize, int gmx_unused pme_order)
{
#ifdef PME_SIMD4_SPREAD_GATHER
-#if !PME_4NSIMD_GATHER
+# if !PME_4NSIMD_GATHER
if (pme_order == 4)
{
/* Add extra elements to ensured aligned operations do not go
*/
*gridsize += 4;
}
-#endif
+# endif
#endif
}
-void pmegrid_init(pmegrid_t *grid,
- int cx, int cy, int cz,
- int x0, int y0, int z0,
- int x1, int y1, int z1,
- gmx_bool set_alignment,
- int pme_order,
- real *ptr)
+void pmegrid_init(pmegrid_t* grid,
+ int cx,
+ int cy,
+ int cz,
+ int x0,
+ int y0,
+ int z0,
+ int x1,
+ int y1,
+ int z1,
+ gmx_bool set_alignment,
+ int pme_order,
+ real* ptr)
{
int nz, gridsize;
grid->order = pme_order;
if (ptr == nullptr)
{
- gridsize = grid->s[XX]*grid->s[YY]*grid->s[ZZ];
+ gridsize = grid->s[XX] * grid->s[YY] * grid->s[ZZ];
set_gridsize_alignment(&gridsize, pme_order);
snew_aligned(grid->grid, gridsize, SIMD4_ALIGNMENT);
}
static int div_round_up(int enumerator, int denominator)
{
- return (enumerator + denominator - 1)/denominator;
+ return (enumerator + denominator - 1) / denominator;
}
-static void make_subgrid_division(const ivec n, int ovl, int nthread,
- ivec nsub)
+static void make_subgrid_division(const ivec n, int ovl, int nthread, ivec nsub)
{
int gsize_opt, gsize;
int nsx, nsy, nsz;
- char *env;
+ char* env;
gsize_opt = -1;
for (nsx = 1; nsx <= nthread; nsx++)
{
for (nsy = 1; nsy <= nthread; nsy++)
{
- if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
+ if (nsx * nsy <= nthread && nthread % (nsx * nsy) == 0)
{
- nsz = nthread/(nsx*nsy);
+ nsz = nthread / (nsx * nsy);
/* Determine the number of grid points per thread */
- gsize =
- (div_round_up(n[XX], nsx) + ovl)*
- (div_round_up(n[YY], nsy) + ovl)*
- (div_round_up(n[ZZ], nsz) + ovl);
+ gsize = (div_round_up(n[XX], nsx) + ovl) * (div_round_up(n[YY], nsy) + ovl)
+ * (div_round_up(n[ZZ], nsz) + ovl);
/* Minimize the number of grids points per thread
* and, secondarily, the number of cuts in minor dimensions.
*/
- if (gsize_opt == -1 ||
- gsize < gsize_opt ||
- (gsize == gsize_opt &&
- (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
+ if (gsize_opt == -1 || gsize < gsize_opt
+ || (gsize == gsize_opt && (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
{
nsub[XX] = nsx;
nsub[YY] = nsy;
sscanf(env, "%20d %20d %20d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
}
- if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
+ if (nsub[XX] * nsub[YY] * nsub[ZZ] != nthread)
{
- gmx_fatal(FARGS, "PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)", nsub[XX], nsub[YY], nsub[ZZ], nthread);
+ gmx_fatal(FARGS,
+ "PME grid thread division (%d x %d x %d) does not match the total number of "
+ "threads (%d)",
+ nsub[XX], nsub[YY], nsub[ZZ], nthread);
}
}
-void pmegrids_init(pmegrids_t *grids,
- int nx, int ny, int nz, int nz_base,
- int pme_order,
- gmx_bool bUseThreads,
- int nthread,
- int overlap_x,
- int overlap_y)
+void pmegrids_init(pmegrids_t* grids,
+ int nx,
+ int ny,
+ int nz,
+ int nz_base,
+ int pme_order,
+ gmx_bool bUseThreads,
+ int nthread,
+ int overlap_x,
+ int overlap_y)
{
ivec n, n_base;
int t, x, y, z, d, i, tfac;
copy_ivec(n, n_base);
n_base[ZZ] = nz_base;
- pmegrid_init(&grids->grid, 0, 0, 0, 0, 0, 0, n[XX], n[YY], n[ZZ], FALSE, pme_order,
- nullptr);
+ pmegrid_init(&grids->grid, 0, 0, 0, 0, 0, 0, n[XX], n[YY], n[ZZ], FALSE, pme_order, nullptr);
grids->nthread = nthread;
- make_subgrid_division(n_base, pme_order-1, grids->nthread, grids->nc);
+ make_subgrid_division(n_base, pme_order - 1, grids->nthread, grids->nc);
if (bUseThreads)
{
if (debug)
{
- fprintf(debug, "pmegrid thread local division: %d x %d x %d\n",
- grids->nc[XX], grids->nc[YY], grids->nc[ZZ]);
- fprintf(debug, "pmegrid %d %d %d max thread pmegrid %d %d %d\n",
- nx, ny, nz,
- nst[XX], nst[YY], nst[ZZ]);
+ fprintf(debug, "pmegrid thread local division: %d x %d x %d\n", grids->nc[XX],
+ grids->nc[YY], grids->nc[ZZ]);
+ fprintf(debug, "pmegrid %d %d %d max thread pmegrid %d %d %d\n", nx, ny, nz, nst[XX],
+ nst[YY], nst[ZZ]);
}
snew(grids->grid_th, grids->nthread);
t = 0;
- gridsize = nst[XX]*nst[YY]*nst[ZZ];
+ gridsize = nst[XX] * nst[YY] * nst[ZZ];
set_gridsize_alignment(&gridsize, pme_order);
- snew_aligned(grids->grid_all,
- grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
+ snew_aligned(grids->grid_all, grids->nthread * gridsize + (grids->nthread + 1) * GMX_CACHE_SEP,
SIMD4_ALIGNMENT);
for (x = 0; x < grids->nc[XX]; x++)
{
for (z = 0; z < grids->nc[ZZ]; z++)
{
- pmegrid_init(&grids->grid_th[t],
- x, y, z,
- (n[XX]*(x ))/grids->nc[XX],
- (n[YY]*(y ))/grids->nc[YY],
- (n[ZZ]*(z ))/grids->nc[ZZ],
- (n[XX]*(x+1))/grids->nc[XX],
- (n[YY]*(y+1))/grids->nc[YY],
- (n[ZZ]*(z+1))/grids->nc[ZZ],
- TRUE,
- pme_order,
- grids->grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
+ pmegrid_init(&grids->grid_th[t], x, y, z, (n[XX] * (x)) / grids->nc[XX],
+ (n[YY] * (y)) / grids->nc[YY], (n[ZZ] * (z)) / grids->nc[ZZ],
+ (n[XX] * (x + 1)) / grids->nc[XX], (n[YY] * (y + 1)) / grids->nc[YY],
+ (n[ZZ] * (z + 1)) / grids->nc[ZZ], TRUE, pme_order,
+ grids->grid_all + GMX_CACHE_SEP + t * (gridsize + GMX_CACHE_SEP));
t++;
}
}
}
tfac = 1;
- for (d = DIM-1; d >= 0; d--)
+ for (d = DIM - 1; d >= 0; d--)
{
snew(grids->g2t[d], n[d]);
t = 0;
/* The second check should match the parameters
* of the pmegrid_init call above.
*/
- while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
+ while (t + 1 < grids->nc[d] && i >= (n[d] * (t + 1)) / grids->nc[d])
{
t++;
}
- grids->g2t[d][i] = t*tfac;
+ grids->g2t[d][i] = t * tfac;
}
tfac *= grids->nc[d];
switch (d)
{
- case XX: max_comm_lines = overlap_x; break;
- case YY: max_comm_lines = overlap_y; break;
+ case XX: max_comm_lines = overlap_x; break;
+ case YY: max_comm_lines = overlap_y; break;
case ZZ: max_comm_lines = pme_order - 1; break;
}
grids->nthread_comm[d] = 0;
- while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines &&
- grids->nthread_comm[d] < grids->nc[d])
+ while ((n[d] * grids->nthread_comm[d]) / grids->nc[d] < max_comm_lines
+ && grids->nthread_comm[d] < grids->nc[d])
{
grids->nthread_comm[d]++;
}
if (debug != nullptr)
{
- fprintf(debug, "pmegrid thread grid communication range in %c: %d\n",
- 'x'+d, grids->nthread_comm[d]);
+ fprintf(debug, "pmegrid thread grid communication range in %c: %d\n", 'x' + d,
+ grids->nthread_comm[d]);
}
/* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
* work, but this is not a problematic restriction.
*/
if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
{
- gmx_fatal(FARGS, "Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME", grids->nthread);
+ gmx_fatal(FARGS,
+ "Too many threads for PME (%d) compared to the number of grid lines, reduce "
+ "the number of threads doing PME",
+ grids->nthread);
}
}
}
-void pmegrids_destroy(pmegrids_t *grids)
+void pmegrids_destroy(pmegrids_t* grids)
{
if (grids->grid.grid != nullptr)
{
}
}
-void
-make_gridindex_to_localindex(int n, int local_start, int local_range,
- int **global_to_local,
- real **fraction_shift)
+void make_gridindex_to_localindex(int n, int local_start, int local_range, int** global_to_local, real** fraction_shift)
{
/* Here we construct array for looking up the grid line index and
* fraction for particles. This is done because it is slighlty
* to allow for particles to be out of the triclinic unit-cell.
*/
const int arraySize = c_pmeNeighborUnitcellCount * n;
- int * gtl;
- real * fsh;
+ int* gtl;
+ real* fsh;
snew(gtl, arraySize);
snew(fsh, arraySize);
*fraction_shift = fsh;
}
-void reuse_pmegrids(const pmegrids_t *oldgrid, pmegrids_t *newgrid)
+void reuse_pmegrids(const pmegrids_t* oldgrid, pmegrids_t* newgrid)
{
int d, t;