typedef struct {
#ifdef GMX_MPI
- MPI_Comm mpi_comm;
+ MPI_Comm mpi_comm;
#endif
- int nnodes,nodeid;
- int *s2g0;
- int *s2g1;
- int noverlap_nodes;
- int *send_id,*recv_id;
- int send_size; /* Send buffer width, used with OpenMP */
+ int nnodes, nodeid;
+ int *s2g0;
+ int *s2g1;
+ int noverlap_nodes;
+ int *send_id, *recv_id;
+ int send_size; /* Send buffer width, used with OpenMP */
pme_grid_comm_t *comm_data;
- real *sendbuf;
- real *recvbuf;
+ real *sendbuf;
+ real *recvbuf;
} pme_overlap_t;
typedef struct {
- int *n; /* Cumulative counts of the number of particles per thread */
- int nalloc; /* Allocation size of i */
- int *i; /* Particle indices ordered on thread index (n) */
+ int *n; /* Cumulative counts of the number of particles per thread */
+ int nalloc; /* Allocation size of i */
+ int *i; /* Particle indices ordered on thread index (n) */
} thread_plist_t;
typedef struct {
- int *thread_one;
- int n;
- int *ind;
+ int *thread_one;
+ int n;
+ int *ind;
splinevec theta;
- real *ptr_theta_z;
+ real *ptr_theta_z;
splinevec dtheta;
- real *ptr_dtheta_z;
+ real *ptr_dtheta_z;
} splinedata_t;
typedef struct {
- int dimind; /* The index of the dimension, 0=x, 1=y */
- int nslab;
- int nodeid;
+ int dimind; /* The index of the dimension, 0=x, 1=y */
+ int nslab;
+ int nodeid;
#ifdef GMX_MPI
MPI_Comm mpi_comm;
#endif
- int *node_dest; /* The nodes to send x and q to with DD */
- int *node_src; /* The nodes to receive x and q from with DD */
- int *buf_index; /* Index for commnode into the buffers */
+ int *node_dest; /* The nodes to send x and q to with DD */
+ int *node_src; /* The nodes to receive x and q from with DD */
+ int *buf_index; /* Index for commnode into the buffers */
- int maxshift;
+ int maxshift;
- int npd;
- int pd_nalloc;
- int *pd;
- int *count; /* The number of atoms to send to each node */
- int **count_thread;
- int *rcount; /* The number of atoms to receive */
-
- int n;
- int nalloc;
- rvec *x;
- real *q;
- rvec *f;
+ int npd;
+ int pd_nalloc;
+ int *pd;
+ int *count; /* The number of atoms to send to each node */
+ int **count_thread;
+ int *rcount; /* The number of atoms to receive */
+
+ int n;
+ int nalloc;
+ rvec *x;
+ real *q;
+ rvec *f;
gmx_bool bSpread; /* These coordinates are used for spreading */
- int pme_order;
- ivec *idx;
- rvec *fractx; /* Fractional coordinate relative to the
- * lower cell boundary
- */
- int nthread;
- int *thread_idx; /* Which thread should spread which charge */
+ int pme_order;
+ ivec *idx;
+ rvec *fractx; /* Fractional coordinate relative to the
+ * lower cell boundary
+ */
+ int nthread;
+ int *thread_idx; /* Which thread should spread which charge */
thread_plist_t *thread_plist;
- splinedata_t *spline;
+ splinedata_t *spline;
} pme_atomcomm_t;
#define FLBS 3
#define FLBSZ 4
typedef struct {
- ivec ci; /* The spatial location of this grid */
- ivec n; /* The used size of *grid, including order-1 */
- ivec offset; /* The grid offset from the full node grid */
- int order; /* PME spreading order */
- ivec s; /* The allocated size of *grid, s >= n */
- real *grid; /* The grid local thread, size n */
+ ivec ci; /* The spatial location of this grid */
+ ivec n; /* The used size of *grid, including order-1 */
+ ivec offset; /* The grid offset from the full node grid */
+ int order; /* PME spreading order */
+ ivec s; /* The allocated size of *grid, s >= n */
+ real *grid; /* The grid local thread, size n */
} pmegrid_t;
typedef struct {
- pmegrid_t grid; /* The full node grid (non thread-local) */
- int nthread; /* The number of threads operating on this grid */
- ivec nc; /* The local spatial decomposition over the threads */
- pmegrid_t *grid_th; /* Array of grids for each thread */
- real *grid_all; /* Allocated array for the grids in *grid_th */
- int **g2t; /* The grid to thread index */
- ivec nthread_comm; /* The number of threads to communicate with */
+ pmegrid_t grid; /* The full node grid (non thread-local) */
+ int nthread; /* The number of threads operating on this grid */
+ ivec nc; /* The local spatial decomposition over the threads */
+ pmegrid_t *grid_th; /* Array of grids for each thread */
+ real *grid_all; /* Allocated array for the grids in *grid_th */
+ int **g2t; /* The grid to thread index */
+ ivec nthread_comm; /* The number of threads to communicate with */
} pmegrids_t;
typedef struct {
#ifdef PME_SSE
/* Masks for SSE aligned spreading and gathering */
- __m128 mask_SSE0[6],mask_SSE1[6];
+ __m128 mask_SSE0[6], mask_SSE1[6];
#else
- int dummy; /* C89 requires that struct has at least one member */
+ int dummy; /* C89 requires that struct has at least one member */
#endif
} pme_spline_work_t;
} pme_work_t;
typedef struct gmx_pme {
- int ndecompdim; /* The number of decomposition dimensions */
- int nodeid; /* Our nodeid in mpi->mpi_comm */
- int nodeid_major;
- int nodeid_minor;
- int nnodes; /* The number of nodes doing PME */
- int nnodes_major;
- int nnodes_minor;
-
- MPI_Comm mpi_comm;
- MPI_Comm mpi_comm_d[2]; /* Indexed on dimension, 0=x, 1=y */
+ int ndecompdim; /* The number of decomposition dimensions */
+ int nodeid; /* Our nodeid in mpi->mpi_comm */
+ int nodeid_major;
+ int nodeid_minor;
+ int nnodes; /* The number of nodes doing PME */
+ int nnodes_major;
+ int nnodes_minor;
+
+ MPI_Comm mpi_comm;
+ MPI_Comm mpi_comm_d[2]; /* Indexed on dimension, 0=x, 1=y */
#ifdef GMX_MPI
- MPI_Datatype rvec_mpi; /* the pme vector's MPI type */
+ MPI_Datatype rvec_mpi; /* the pme vector's MPI type */
#endif
- int nthread; /* The number of threads doing PME */
+ int nthread; /* The number of threads doing PME */
- gmx_bool bPPnode; /* Node also does particle-particle forces */
- gmx_bool bFEP; /* Compute Free energy contribution */
- int nkx,nky,nkz; /* Grid dimensions */
- gmx_bool bP3M; /* Do P3M: optimize the influence function */
- int pme_order;
- real epsilon_r;
+ gmx_bool bPPnode; /* Node also does particle-particle forces */
+ gmx_bool bFEP; /* Compute Free energy contribution */
+ int nkx, nky, nkz; /* Grid dimensions */
+ gmx_bool bP3M; /* Do P3M: optimize the influence function */
+ int pme_order;
+ real epsilon_r;
pmegrids_t pmegridA; /* Grids on which we do spreading/interpolation, includes overlap */
pmegrids_t pmegridB;
/* The PME charge spreading grid sizes/strides, includes pme_order-1 */
- int pmegrid_nx,pmegrid_ny,pmegrid_nz;
+ int pmegrid_nx, pmegrid_ny, pmegrid_nz;
/* pmegrid_nz might be larger than strictly necessary to ensure
* memory alignment, pmegrid_nz_base gives the real base size.
*/
int pmegrid_nz_base;
/* The local PME grid starting indices */
- int pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
+ int pmegrid_start_ix, pmegrid_start_iy, pmegrid_start_iz;
/* Work data for spreading and gathering */
- pme_spline_work_t *spline_work;
+ pme_spline_work_t *spline_work;
- real *fftgridA; /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
- real *fftgridB; /* inside the interpolation grid, but separate for 2D PME decomp. */
- int fftgrid_nx,fftgrid_ny,fftgrid_nz;
+ real *fftgridA; /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
+ real *fftgridB; /* inside the interpolation grid, but separate for 2D PME decomp. */
+ int fftgrid_nx, fftgrid_ny, fftgrid_nz;
- t_complex *cfftgridA; /* Grids for complex FFT data */
- t_complex *cfftgridB;
- int cfftgrid_nx,cfftgrid_ny,cfftgrid_nz;
+ t_complex *cfftgridA; /* Grids for complex FFT data */
+ t_complex *cfftgridB;
+ int cfftgrid_nx, cfftgrid_ny, cfftgrid_nz;
gmx_parallel_3dfft_t pfft_setupA;
gmx_parallel_3dfft_t pfft_setupB;
- int *nnx,*nny,*nnz;
- real *fshx,*fshy,*fshz;
+ int *nnx, *nny, *nnz;
+ real *fshx, *fshy, *fshz;
- pme_atomcomm_t atc[2]; /* Indexed on decomposition index */
- matrix recipbox;
- splinevec bsp_mod;
+ pme_atomcomm_t atc[2]; /* Indexed on decomposition index */
+ matrix recipbox;
+ splinevec bsp_mod;
- pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
+ pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
- pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
+ pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
- rvec *bufv; /* Communication buffer */
- real *bufr; /* Communication buffer */
- int buf_nalloc; /* The communication buffer size */
+ rvec *bufv; /* Communication buffer */
+ real *bufr; /* Communication buffer */
+ int buf_nalloc; /* The communication buffer size */
/* thread local work data for solve_pme */
pme_work_t *work;
} t_gmx_pme;
-static void calc_interpolation_idx(gmx_pme_t pme,pme_atomcomm_t *atc,
- int start,int end,int thread)
+static void calc_interpolation_idx(gmx_pme_t pme, pme_atomcomm_t *atc,
+ int start, int end, int thread)
{
- int i;
- int *idxptr,tix,tiy,tiz;
- real *xptr,*fptr,tx,ty,tz;
- real rxx,ryx,ryy,rzx,rzy,rzz;
- int nx,ny,nz;
- int start_ix,start_iy,start_iz;
- int *g2tx,*g2ty,*g2tz;
- gmx_bool bThreads;
- int *thread_idx=NULL;
- thread_plist_t *tpl=NULL;
- int *tpl_n=NULL;
- int thread_i;
+ int i;
+ int *idxptr, tix, tiy, tiz;
+ real *xptr, *fptr, tx, ty, tz;
+ real rxx, ryx, ryy, rzx, rzy, rzz;
+ int nx, ny, nz;
+ int start_ix, start_iy, start_iz;
+ int *g2tx, *g2ty, *g2tz;
+ gmx_bool bThreads;
+ int *thread_idx = NULL;
+ thread_plist_t *tpl = NULL;
+ int *tpl_n = NULL;
+ int thread_i;
nx = pme->nkx;
ny = pme->nky;
tpl = &atc->thread_plist[thread];
tpl_n = tpl->n;
- for(i=0; i<atc->nthread; i++)
+ for (i = 0; i < atc->nthread; i++)
{
tpl_n[i] = 0;
}
}
- for(i=start; i<end; i++) {
+ for (i = start; i < end; i++)
+ {
xptr = atc->x[i];
idxptr = atc->idx[i];
fptr = atc->fractx[i];
idxptr[ZZ] = pme->nnz[tiz];
#ifdef DEBUG
- range_check(idxptr[XX],0,pme->pmegrid_nx);
- range_check(idxptr[YY],0,pme->pmegrid_ny);
- range_check(idxptr[ZZ],0,pme->pmegrid_nz);
+ range_check(idxptr[XX], 0, pme->pmegrid_nx);
+ range_check(idxptr[YY], 0, pme->pmegrid_ny);
+ range_check(idxptr[ZZ], 0, pme->pmegrid_nz);
#endif
if (bThreads)
{
- thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
+ thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
thread_idx[i] = thread_i;
tpl_n[thread_i]++;
}
/* Make a list of particle indices sorted on thread */
/* Get the cumulative count */
- for(i=1; i<atc->nthread; i++)
+ for (i = 1; i < atc->nthread; i++)
{
tpl_n[i] += tpl_n[i-1];
}
if (tpl_n[atc->nthread-1] > tpl->nalloc)
{
tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
- srenew(tpl->i,tpl->nalloc);
+ srenew(tpl->i, tpl->nalloc);
}
/* Set tpl_n to the cumulative start */
- for(i=atc->nthread-1; i>=1; i--)
+ for (i = atc->nthread-1; i >= 1; i--)
{
tpl_n[i] = tpl_n[i-1];
}
tpl_n[0] = 0;
/* Fill our thread local array with indices sorted on thread */
- for(i=start; i<end; i++)
+ for (i = start; i < end; i++)
{
tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
}
}
static void make_thread_local_ind(pme_atomcomm_t *atc,
- int thread,splinedata_t *spline)
+ int thread, splinedata_t *spline)
{
- int n,t,i,start,end;
+ int n, t, i, start, end;
thread_plist_t *tpl;
/* Combine the indices made by each thread into one index */
- n = 0;
+ n = 0;
start = 0;
- for(t=0; t<atc->nthread; t++)
+ for (t = 0; t < atc->nthread; t++)
{
tpl = &atc->thread_plist[t];
/* Copy our part (start - end) from the list of thread t */
start = tpl->n[thread-1];
}
end = tpl->n[thread];
- for(i=start; i<end; i++)
+ for (i = start; i < end; i++)
{
spline->ind[n++] = tpl->i[i];
}
matrix recipbox, rvec x[],
pme_atomcomm_t *atc, int *count)
{
- int nslab,i;
- int si;
- real *xptr,s;
- real rxx,ryx,rzx,ryy,rzy;
- int *pd;
+ int nslab, i;
+ int si;
+ real *xptr, s;
+ real rxx, ryx, rzx, ryy, rzy;
+ int *pd;
/* Calculate PME task index (pidx) for each grid index.
* Here we always assign equally sized slabs to each node
pd = atc->pd;
/* Reset the count */
- for(i=0; i<nslab; i++)
+ for (i = 0; i < nslab; i++)
{
count[i] = 0;
}
ryx = recipbox[YY][XX];
rzx = recipbox[ZZ][XX];
/* Calculate the node index in x-dimension */
- for(i=start; i<end; i++)
+ for (i = start; i < end; i++)
{
xptr = x[i];
/* Fractional coordinates along box vectors */
- s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
- si = (int)(s + 2*nslab) % nslab;
+ s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
+ si = (int)(s + 2*nslab) % nslab;
pd[i] = si;
count[si]++;
}
ryy = recipbox[YY][YY];
rzy = recipbox[ZZ][YY];
/* Calculate the node index in y-dimension */
- for(i=start; i<end; i++)
+ for (i = start; i < end; i++)
{
xptr = x[i];
/* Fractional coordinates along box vectors */
- s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
- si = (int)(s + 2*nslab) % nslab;
+ s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
+ si = (int)(s + 2*nslab) % nslab;
pd[i] = si;
count[si]++;
}
static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
pme_atomcomm_t *atc)
{
- int nthread,thread,slab;
+ int nthread, thread, slab;
nthread = atc->nthread;
#pragma omp parallel for num_threads(nthread) schedule(static)
- for(thread=0; thread<nthread; thread++)
+ for (thread = 0; thread < nthread; thread++)
{
pme_calc_pidx(natoms* thread /nthread,
natoms*(thread+1)/nthread,
- recipbox,x,atc,atc->count_thread[thread]);
+ recipbox, x, atc, atc->count_thread[thread]);
}
/* Non-parallel reduction, since nslab is small */
- for(thread=1; thread<nthread; thread++)
+ for (thread = 1; thread < nthread; thread++)
{
- for(slab=0; slab<atc->nslab; slab++)
+ for (slab = 0; slab < atc->nslab; slab++)
{
atc->count_thread[0][slab] += atc->count_thread[thread][slab];
}
}
}
-static void realloc_splinevec(splinevec th,real **ptr_z,int nalloc)
+static void realloc_splinevec(splinevec th, real **ptr_z, int nalloc)
{
- const int padding=4;
- int i;
+ const int padding = 4;
+ int i;
- srenew(th[XX],nalloc);
- srenew(th[YY],nalloc);
+ srenew(th[XX], nalloc);
+ srenew(th[YY], nalloc);
/* In z we add padding, this is only required for the aligned SSE code */
- srenew(*ptr_z,nalloc+2*padding);
+ srenew(*ptr_z, nalloc+2*padding);
th[ZZ] = *ptr_z + padding;
- for(i=0; i<padding; i++)
+ for (i = 0; i < padding; i++)
{
(*ptr_z)[ i] = 0;
(*ptr_z)[padding+nalloc+i] = 0;
static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
{
- int i,d;
+ int i, d;
- srenew(spline->ind,atc->nalloc);
+ srenew(spline->ind, atc->nalloc);
/* Initialize the index to identity so it works without threads */
- for(i=0; i<atc->nalloc; i++)
+ for (i = 0; i < atc->nalloc; i++)
{
spline->ind[i] = i;
}
- realloc_splinevec(spline->theta,&spline->ptr_theta_z,
+ realloc_splinevec(spline->theta, &spline->ptr_theta_z,
atc->pme_order*atc->nalloc);
- realloc_splinevec(spline->dtheta,&spline->ptr_dtheta_z,
+ realloc_splinevec(spline->dtheta, &spline->ptr_dtheta_z,
atc->pme_order*atc->nalloc);
}
static void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
{
- int nalloc_old,i,j,nalloc_tpl;
+ int nalloc_old, i, j, nalloc_tpl;
/* We have to avoid a NULL pointer for atc->x to avoid
* possible fatal errors in MPI routines.
*/
if (atc->n > atc->nalloc || atc->nalloc == 0)
{
- nalloc_old = atc->nalloc;
- atc->nalloc = over_alloc_dd(max(atc->n,1));
+ nalloc_old = atc->nalloc;
+ atc->nalloc = over_alloc_dd(max(atc->n, 1));
- if (atc->nslab > 1) {
- srenew(atc->x,atc->nalloc);
- srenew(atc->q,atc->nalloc);
- srenew(atc->f,atc->nalloc);
- for(i=nalloc_old; i<atc->nalloc; i++)
+ if (atc->nslab > 1)
+ {
+ srenew(atc->x, atc->nalloc);
+ srenew(atc->q, atc->nalloc);
+ srenew(atc->f, atc->nalloc);
+ for (i = nalloc_old; i < atc->nalloc; i++)
{
clear_rvec(atc->f[i]);
}
}
- if (atc->bSpread) {
- srenew(atc->fractx,atc->nalloc);
- srenew(atc->idx ,atc->nalloc);
+ if (atc->bSpread)
+ {
+ srenew(atc->fractx, atc->nalloc);
+ srenew(atc->idx, atc->nalloc);
if (atc->nthread > 1)
{
- srenew(atc->thread_idx,atc->nalloc);
+ srenew(atc->thread_idx, atc->nalloc);
}
- for(i=0; i<atc->nthread; i++)
+ for (i = 0; i < atc->nthread; i++)
{
- pme_realloc_splinedata(&atc->spline[i],atc);
+ pme_realloc_splinedata(&atc->spline[i], atc);
}
}
}
/* domain decomposition by x coordinate */
{
int *idxa;
- int i, ii;
-
- if(FALSE == pme->redist_init) {
- snew(pme->scounts,atc->nslab);
- snew(pme->rcounts,atc->nslab);
- snew(pme->sdispls,atc->nslab);
- snew(pme->rdispls,atc->nslab);
- snew(pme->sidx,atc->nslab);
+ int i, ii;
+
+ if (FALSE == pme->redist_init)
+ {
+ snew(pme->scounts, atc->nslab);
+ snew(pme->rcounts, atc->nslab);
+ snew(pme->sdispls, atc->nslab);
+ snew(pme->rdispls, atc->nslab);
+ snew(pme->sidx, atc->nslab);
pme->redist_init = TRUE;
}
- if (n > pme->redist_buf_nalloc) {
+ if (n > pme->redist_buf_nalloc)
+ {
pme->redist_buf_nalloc = over_alloc_dd(n);
- srenew(pme->redist_buf,pme->redist_buf_nalloc*DIM);
+ srenew(pme->redist_buf, pme->redist_buf_nalloc*DIM);
}
pme->idxa = atc->pd;
#ifdef GMX_MPI
- if (forw && bXF) {
+ if (forw && bXF)
+ {
/* forward, redistribution from pp to pme */
/* Calculate send counts and exchange them with other nodes */
- for(i=0; (i<atc->nslab); i++) pme->scounts[i]=0;
- for(i=0; (i<n); i++) pme->scounts[pme->idxa[i]]++;
+ for (i = 0; (i < atc->nslab); i++)
+ {
+ pme->scounts[i] = 0;
+ }
+ for (i = 0; (i < n); i++)
+ {
+ pme->scounts[pme->idxa[i]]++;
+ }
MPI_Alltoall( pme->scounts, 1, MPI_INT, pme->rcounts, 1, MPI_INT, atc->mpi_comm);
/* Calculate send and receive displacements and index into send
buffer */
- pme->sdispls[0]=0;
- pme->rdispls[0]=0;
- pme->sidx[0]=0;
- for(i=1; i<atc->nslab; i++) {
- pme->sdispls[i]=pme->sdispls[i-1]+pme->scounts[i-1];
- pme->rdispls[i]=pme->rdispls[i-1]+pme->rcounts[i-1];
- pme->sidx[i]=pme->sdispls[i];
+ pme->sdispls[0] = 0;
+ pme->rdispls[0] = 0;
+ pme->sidx[0] = 0;
+ for (i = 1; i < atc->nslab; i++)
+ {
+ pme->sdispls[i] = pme->sdispls[i-1]+pme->scounts[i-1];
+ pme->rdispls[i] = pme->rdispls[i-1]+pme->rcounts[i-1];
+ pme->sidx[i] = pme->sdispls[i];
}
/* Total # of particles to be received */
atc->n = pme->rdispls[atc->nslab-1] + pme->rcounts[atc->nslab-1];
pme_realloc_atomcomm_things(atc);
/* Copy particle coordinates into send buffer and exchange*/
- for(i=0; (i<n); i++) {
- ii=DIM*pme->sidx[pme->idxa[i]];
+ for (i = 0; (i < n); i++)
+ {
+ ii = DIM*pme->sidx[pme->idxa[i]];
pme->sidx[pme->idxa[i]]++;
- pme->redist_buf[ii+XX]=x_f[i][XX];
- pme->redist_buf[ii+YY]=x_f[i][YY];
- pme->redist_buf[ii+ZZ]=x_f[i][ZZ];
+ pme->redist_buf[ii+XX] = x_f[i][XX];
+ pme->redist_buf[ii+YY] = x_f[i][YY];
+ pme->redist_buf[ii+ZZ] = x_f[i][ZZ];
}
MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls,
pme->rvec_mpi, atc->x, pme->rcounts, pme->rdispls,
pme->rvec_mpi, atc->mpi_comm);
}
- if (forw) {
+ if (forw)
+ {
/* Copy charge into send buffer and exchange*/
- for(i=0; i<atc->nslab; i++) pme->sidx[i]=pme->sdispls[i];
- for(i=0; (i<n); i++) {
- ii=pme->sidx[pme->idxa[i]];
+ for (i = 0; i < atc->nslab; i++)
+ {
+ pme->sidx[i] = pme->sdispls[i];
+ }
+ for (i = 0; (i < n); i++)
+ {
+ ii = pme->sidx[pme->idxa[i]];
pme->sidx[pme->idxa[i]]++;
- pme->redist_buf[ii]=charge[i];
+ pme->redist_buf[ii] = charge[i];
}
MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls, mpi_type,
atc->q, pme->rcounts, pme->rdispls, mpi_type,
atc->mpi_comm);
}
- else { /* backward, redistribution from pme to pp */
+ else /* backward, redistribution from pme to pp */
+ {
MPI_Alltoallv(atc->f, pme->rcounts, pme->rdispls, pme->rvec_mpi,
pme->redist_buf, pme->scounts, pme->sdispls,
pme->rvec_mpi, atc->mpi_comm);
/* Copy data from receive buffer */
- for(i=0; i<atc->nslab; i++)
+ for (i = 0; i < atc->nslab; i++)
+ {
pme->sidx[i] = pme->sdispls[i];
- for(i=0; (i<n); i++) {
- ii = DIM*pme->sidx[pme->idxa[i]];
+ }
+ for (i = 0; (i < n); i++)
+ {
+ ii = DIM*pme->sidx[pme->idxa[i]];
x_f[i][XX] += pme->redist_buf[ii+XX];
x_f[i][YY] += pme->redist_buf[ii+YY];
x_f[i][ZZ] += pme->redist_buf[ii+ZZ];
}
static void pme_dd_sendrecv(pme_atomcomm_t *atc,
- gmx_bool bBackward,int shift,
- void *buf_s,int nbyte_s,
- void *buf_r,int nbyte_r)
+ gmx_bool bBackward, int shift,
+ void *buf_s, int nbyte_s,
+ void *buf_r, int nbyte_r)
{
#ifdef GMX_MPI
- int dest,src;
+ int dest, src;
MPI_Status stat;
- if (bBackward == FALSE) {
+ if (bBackward == FALSE)
+ {
dest = atc->node_dest[shift];
src = atc->node_src[shift];
- } else {
+ }
+ else
+ {
dest = atc->node_src[shift];
src = atc->node_dest[shift];
}
- if (nbyte_s > 0 && nbyte_r > 0) {
- MPI_Sendrecv(buf_s,nbyte_s,MPI_BYTE,
- dest,shift,
- buf_r,nbyte_r,MPI_BYTE,
- src,shift,
- atc->mpi_comm,&stat);
- } else if (nbyte_s > 0) {
- MPI_Send(buf_s,nbyte_s,MPI_BYTE,
- dest,shift,
+ if (nbyte_s > 0 && nbyte_r > 0)
+ {
+ MPI_Sendrecv(buf_s, nbyte_s, MPI_BYTE,
+ dest, shift,
+ buf_r, nbyte_r, MPI_BYTE,
+ src, shift,
+ atc->mpi_comm, &stat);
+ }
+ else if (nbyte_s > 0)
+ {
+ MPI_Send(buf_s, nbyte_s, MPI_BYTE,
+ dest, shift,
atc->mpi_comm);
- } else if (nbyte_r > 0) {
- MPI_Recv(buf_r,nbyte_r,MPI_BYTE,
- src,shift,
- atc->mpi_comm,&stat);
+ }
+ else if (nbyte_r > 0)
+ {
+ MPI_Recv(buf_r, nbyte_r, MPI_BYTE,
+ src, shift,
+ atc->mpi_comm, &stat);
}
#endif
}
int n, gmx_bool bX, rvec *x, real *charge,
pme_atomcomm_t *atc)
{
- int *commnode,*buf_index;
- int nnodes_comm,i,nsend,local_pos,buf_pos,node,scount,rcount;
+ int *commnode, *buf_index;
+ int nnodes_comm, i, nsend, local_pos, buf_pos, node, scount, rcount;
commnode = atc->node_dest;
buf_index = atc->buf_index;
- nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
+ nnodes_comm = min(2*atc->maxshift, atc->nslab-1);
nsend = 0;
- for(i=0; i<nnodes_comm; i++) {
+ for (i = 0; i < nnodes_comm; i++)
+ {
buf_index[commnode[i]] = nsend;
- nsend += atc->count[commnode[i]];
+ nsend += atc->count[commnode[i]];
}
- if (bX) {
+ if (bX)
+ {
if (atc->count[atc->nodeid] + nsend != n)
- gmx_fatal(FARGS,"%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
+ {
+ gmx_fatal(FARGS, "%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
"This usually means that your system is not well equilibrated.",
n - (atc->count[atc->nodeid] + nsend),
- pme->nodeid,'x'+atc->dimind);
+ pme->nodeid, 'x'+atc->dimind);
+ }
- if (nsend > pme->buf_nalloc) {
+ if (nsend > pme->buf_nalloc)
+ {
pme->buf_nalloc = over_alloc_dd(nsend);
- srenew(pme->bufv,pme->buf_nalloc);
- srenew(pme->bufr,pme->buf_nalloc);
+ srenew(pme->bufv, pme->buf_nalloc);
+ srenew(pme->bufr, pme->buf_nalloc);
}
atc->n = atc->count[atc->nodeid];
- for(i=0; i<nnodes_comm; i++) {
+ for (i = 0; i < nnodes_comm; i++)
+ {
scount = atc->count[commnode[i]];
/* Communicate the count */
if (debug)
- fprintf(debug,"dimind %d PME node %d send to node %d: %d\n",
- atc->dimind,atc->nodeid,commnode[i],scount);
- pme_dd_sendrecv(atc,FALSE,i,
- &scount,sizeof(int),
- &atc->rcount[i],sizeof(int));
+ {
+ fprintf(debug, "dimind %d PME node %d send to node %d: %d\n",
+ atc->dimind, atc->nodeid, commnode[i], scount);
+ }
+ pme_dd_sendrecv(atc, FALSE, i,
+ &scount, sizeof(int),
+ &atc->rcount[i], sizeof(int));
atc->n += atc->rcount[i];
}
}
local_pos = 0;
- for(i=0; i<n; i++) {
+ for (i = 0; i < n; i++)
+ {
node = atc->pd[i];
- if (node == atc->nodeid) {
+ if (node == atc->nodeid)
+ {
/* Copy direct to the receive buffer */
- if (bX) {
- copy_rvec(x[i],atc->x[local_pos]);
+ if (bX)
+ {
+ copy_rvec(x[i], atc->x[local_pos]);
}
atc->q[local_pos] = charge[i];
local_pos++;
- } else {
+ }
+ else
+ {
/* Copy to the send buffer */
- if (bX) {
- copy_rvec(x[i],pme->bufv[buf_index[node]]);
+ if (bX)
+ {
+ copy_rvec(x[i], pme->bufv[buf_index[node]]);
}
pme->bufr[buf_index[node]] = charge[i];
buf_index[node]++;
}
buf_pos = 0;
- for(i=0; i<nnodes_comm; i++) {
+ for (i = 0; i < nnodes_comm; i++)
+ {
scount = atc->count[commnode[i]];
rcount = atc->rcount[i];
- if (scount > 0 || rcount > 0) {
- if (bX) {
+ if (scount > 0 || rcount > 0)
+ {
+ if (bX)
+ {
/* Communicate the coordinates */
- pme_dd_sendrecv(atc,FALSE,i,
- pme->bufv[buf_pos],scount*sizeof(rvec),
- atc->x[local_pos],rcount*sizeof(rvec));
+ pme_dd_sendrecv(atc, FALSE, i,
+ pme->bufv[buf_pos], scount*sizeof(rvec),
+ atc->x[local_pos], rcount*sizeof(rvec));
}
/* Communicate the charges */
- pme_dd_sendrecv(atc,FALSE,i,
- pme->bufr+buf_pos,scount*sizeof(real),
- atc->q+local_pos,rcount*sizeof(real));
+ pme_dd_sendrecv(atc, FALSE, i,
+ pme->bufr+buf_pos, scount*sizeof(real),
+ atc->q+local_pos, rcount*sizeof(real));
buf_pos += scount;
local_pos += atc->rcount[i];
}
int n, rvec *f,
gmx_bool bAddF)
{
- int *commnode,*buf_index;
- int nnodes_comm,local_pos,buf_pos,i,scount,rcount,node;
-
- commnode = atc->node_dest;
- buf_index = atc->buf_index;
-
- nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
-
- local_pos = atc->count[atc->nodeid];
- buf_pos = 0;
- for(i=0; i<nnodes_comm; i++) {
- scount = atc->rcount[i];
- rcount = atc->count[commnode[i]];
- if (scount > 0 || rcount > 0) {
- /* Communicate the forces */
- pme_dd_sendrecv(atc,TRUE,i,
- atc->f[local_pos],scount*sizeof(rvec),
- pme->bufv[buf_pos],rcount*sizeof(rvec));
- local_pos += scount;
- }
- buf_index[commnode[i]] = buf_pos;
- buf_pos += rcount;
- }
+ int *commnode, *buf_index;
+ int nnodes_comm, local_pos, buf_pos, i, scount, rcount, node;
+
+ commnode = atc->node_dest;
+ buf_index = atc->buf_index;
+
+ nnodes_comm = min(2*atc->maxshift, atc->nslab-1);
+
+ local_pos = atc->count[atc->nodeid];
+ buf_pos = 0;
+ for (i = 0; i < nnodes_comm; i++)
+ {
+ scount = atc->rcount[i];
+ rcount = atc->count[commnode[i]];
+ if (scount > 0 || rcount > 0)
+ {
+ /* Communicate the forces */
+ pme_dd_sendrecv(atc, TRUE, i,
+ atc->f[local_pos], scount*sizeof(rvec),
+ pme->bufv[buf_pos], rcount*sizeof(rvec));
+ local_pos += scount;
+ }
+ buf_index[commnode[i]] = buf_pos;
+ buf_pos += rcount;
+ }
local_pos = 0;
if (bAddF)
{
- for(i=0; i<n; i++)
+ for (i = 0; i < n; i++)
{
node = atc->pd[i];
if (node == atc->nodeid)
{
/* Add from the local force array */
- rvec_inc(f[i],atc->f[local_pos]);
+ rvec_inc(f[i], atc->f[local_pos]);
local_pos++;
}
else
{
/* Add from the receive buffer */
- rvec_inc(f[i],pme->bufv[buf_index[node]]);
+ rvec_inc(f[i], pme->bufv[buf_index[node]]);
buf_index[node]++;
}
}
}
else
{
- for(i=0; i<n; i++)
+ for (i = 0; i < n; i++)
{
node = atc->pd[i];
if (node == atc->nodeid)
{
/* Copy from the local force array */
- copy_rvec(atc->f[local_pos],f[i]);
+ copy_rvec(atc->f[local_pos], f[i]);
local_pos++;
}
else
{
/* Copy from the receive buffer */
- copy_rvec(pme->bufv[buf_index[node]],f[i]);
+ copy_rvec(pme->bufv[buf_index[node]], f[i]);
buf_index[node]++;
}
}
gmx_sum_qgrid_dd(gmx_pme_t pme, real *grid, int direction)
{
pme_overlap_t *overlap;
- int send_index0,send_nindex;
- int recv_index0,recv_nindex;
- MPI_Status stat;
- int i,j,k,ix,iy,iz,icnt;
- int ipulse,send_id,recv_id,datasize;
- real *p;
- real *sendptr,*recvptr;
+ int send_index0, send_nindex;
+ int recv_index0, recv_nindex;
+ MPI_Status stat;
+ int i, j, k, ix, iy, iz, icnt;
+ int ipulse, send_id, recv_id, datasize;
+ real *p;
+ real *sendptr, *recvptr;
/* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
overlap = &pme->overlap[1];
- for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
+ for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
{
/* Since we have already (un)wrapped the overlap in the z-dimension,
* we only have to communicate 0 to nkz (not pmegrid_nz).
*/
- if (direction==GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_QGRID_FORWARD)
{
- send_id = overlap->send_id[ipulse];
- recv_id = overlap->recv_id[ipulse];
+ send_id = overlap->send_id[ipulse];
+ recv_id = overlap->recv_id[ipulse];
send_index0 = overlap->comm_data[ipulse].send_index0;
send_nindex = overlap->comm_data[ipulse].send_nindex;
recv_index0 = overlap->comm_data[ipulse].recv_index0;
}
else
{
- send_id = overlap->recv_id[ipulse];
- recv_id = overlap->send_id[ipulse];
+ send_id = overlap->recv_id[ipulse];
+ recv_id = overlap->send_id[ipulse];
send_index0 = overlap->comm_data[ipulse].recv_index0;
send_nindex = overlap->comm_data[ipulse].recv_nindex;
recv_index0 = overlap->comm_data[ipulse].send_index0;
/* Copy data to contiguous send buffer */
if (debug)
{
- fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
- pme->nodeid,overlap->nodeid,send_id,
+ fprintf(debug, "PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ pme->nodeid, overlap->nodeid, send_id,
pme->pmegrid_start_iy,
send_index0-pme->pmegrid_start_iy,
send_index0-pme->pmegrid_start_iy+send_nindex);
}
icnt = 0;
- for(i=0;i<pme->pmegrid_nx;i++)
+ for (i = 0; i < pme->pmegrid_nx; i++)
{
ix = i;
- for(j=0;j<send_nindex;j++)
+ for (j = 0; j < send_nindex; j++)
{
iy = j + send_index0 - pme->pmegrid_start_iy;
- for(k=0;k<pme->nkz;k++)
+ for (k = 0; k < pme->nkz; k++)
{
iz = k;
overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
datasize = pme->pmegrid_nx * pme->nkz;
- MPI_Sendrecv(overlap->sendbuf,send_nindex*datasize,GMX_MPI_REAL,
- send_id,ipulse,
- overlap->recvbuf,recv_nindex*datasize,GMX_MPI_REAL,
- recv_id,ipulse,
- overlap->mpi_comm,&stat);
+ MPI_Sendrecv(overlap->sendbuf, send_nindex*datasize, GMX_MPI_REAL,
+ send_id, ipulse,
+ overlap->recvbuf, recv_nindex*datasize, GMX_MPI_REAL,
+ recv_id, ipulse,
+ overlap->mpi_comm, &stat);
/* Get data from contiguous recv buffer */
if (debug)
{
- fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
- pme->nodeid,overlap->nodeid,recv_id,
+ fprintf(debug, "PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ pme->nodeid, overlap->nodeid, recv_id,
pme->pmegrid_start_iy,
recv_index0-pme->pmegrid_start_iy,
recv_index0-pme->pmegrid_start_iy+recv_nindex);
}
icnt = 0;
- for(i=0;i<pme->pmegrid_nx;i++)
+ for (i = 0; i < pme->pmegrid_nx; i++)
{
ix = i;
- for(j=0;j<recv_nindex;j++)
+ for (j = 0; j < recv_nindex; j++)
{
iy = j + recv_index0 - pme->pmegrid_start_iy;
- for(k=0;k<pme->nkz;k++)
+ for (k = 0; k < pme->nkz; k++)
{
iz = k;
- if(direction==GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_QGRID_FORWARD)
{
grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
}
*/
overlap = &pme->overlap[0];
- for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
+ for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
{
- if(direction==GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_QGRID_FORWARD)
{
- send_id = overlap->send_id[ipulse];
- recv_id = overlap->recv_id[ipulse];
+ send_id = overlap->send_id[ipulse];
+ recv_id = overlap->recv_id[ipulse];
send_index0 = overlap->comm_data[ipulse].send_index0;
send_nindex = overlap->comm_data[ipulse].send_nindex;
recv_index0 = overlap->comm_data[ipulse].recv_index0;
recv_nindex = overlap->comm_data[ipulse].recv_nindex;
- recvptr = overlap->recvbuf;
+ recvptr = overlap->recvbuf;
}
else
{
- send_id = overlap->recv_id[ipulse];
- recv_id = overlap->send_id[ipulse];
+ send_id = overlap->recv_id[ipulse];
+ recv_id = overlap->send_id[ipulse];
send_index0 = overlap->comm_data[ipulse].recv_index0;
send_nindex = overlap->comm_data[ipulse].recv_nindex;
recv_index0 = overlap->comm_data[ipulse].send_index0;
recv_nindex = overlap->comm_data[ipulse].send_nindex;
- recvptr = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
+ recvptr = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
}
sendptr = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
if (debug)
{
- fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
- pme->nodeid,overlap->nodeid,send_id,
+ fprintf(debug, "PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ pme->nodeid, overlap->nodeid, send_id,
pme->pmegrid_start_ix,
send_index0-pme->pmegrid_start_ix,
send_index0-pme->pmegrid_start_ix+send_nindex);
- fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
- pme->nodeid,overlap->nodeid,recv_id,
+ fprintf(debug, "PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ pme->nodeid, overlap->nodeid, recv_id,
pme->pmegrid_start_ix,
recv_index0-pme->pmegrid_start_ix,
recv_index0-pme->pmegrid_start_ix+recv_nindex);
}
- MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
- send_id,ipulse,
- recvptr,recv_nindex*datasize,GMX_MPI_REAL,
- recv_id,ipulse,
- overlap->mpi_comm,&stat);
+ MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
+ send_id, ipulse,
+ recvptr, recv_nindex*datasize, GMX_MPI_REAL,
+ recv_id, ipulse,
+ overlap->mpi_comm, &stat);
/* ADD data from contiguous recv buffer */
- if(direction==GMX_SUM_QGRID_FORWARD)
+ if (direction == GMX_SUM_QGRID_FORWARD)
{
p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
- for(i=0;i<recv_nindex*datasize;i++)
+ for (i = 0; i < recv_nindex*datasize; i++)
{
p[i] += overlap->recvbuf[i];
}
static int
copy_pmegrid_to_fftgrid(gmx_pme_t pme, real *pmegrid, real *fftgrid)
{
- ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
ivec local_pme_size;
- int i,ix,iy,iz;
- int pmeidx,fftidx;
+ int i, ix, iy, iz;
+ int pmeidx, fftidx;
/* Dimensions should be identical for A/B grid, so we just use A here */
gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
local_pme_size[2] = pme->pmegrid_nz;
/* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
- the offset is identical, and the PME grid always has more data (due to overlap)
+ the offset is identical, and the PME grid always has more data (due to overlap)
*/
{
#ifdef DEBUG_PME
- FILE *fp,*fp2;
- char fn[STRLEN],format[STRLEN];
- real val;
- sprintf(fn,"pmegrid%d.pdb",pme->nodeid);
- fp = ffopen(fn,"w");
- sprintf(fn,"pmegrid%d.txt",pme->nodeid);
- fp2 = ffopen(fn,"w");
- sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
+ FILE *fp, *fp2;
+ char fn[STRLEN], format[STRLEN];
+ real val;
+ sprintf(fn, "pmegrid%d.pdb", pme->nodeid);
+ fp = ffopen(fn, "w");
+ sprintf(fn, "pmegrid%d.txt", pme->nodeid);
+ fp2 = ffopen(fn, "w");
+ sprintf(format, "%s%s\n", pdbformat, "%6.2f%6.2f");
#endif
- for(ix=0;ix<local_fft_ndata[XX];ix++)
- {
- for(iy=0;iy<local_fft_ndata[YY];iy++)
+ for (ix = 0; ix < local_fft_ndata[XX]; ix++)
{
- for(iz=0;iz<local_fft_ndata[ZZ];iz++)
+ for (iy = 0; iy < local_fft_ndata[YY]; iy++)
{
- pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
- fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
- fftgrid[fftidx] = pmegrid[pmeidx];
+ for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
+ {
+ pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
+ fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
+ fftgrid[fftidx] = pmegrid[pmeidx];
#ifdef DEBUG_PME
- val = 100*pmegrid[pmeidx];
- if (pmegrid[pmeidx] != 0)
- fprintf(fp,format,"ATOM",pmeidx,"CA","GLY",' ',pmeidx,' ',
- 5.0*ix,5.0*iy,5.0*iz,1.0,val);
- if (pmegrid[pmeidx] != 0)
- fprintf(fp2,"%-12s %5d %5d %5d %12.5e\n",
- "qgrid",
- pme->pmegrid_start_ix + ix,
- pme->pmegrid_start_iy + iy,
- pme->pmegrid_start_iz + iz,
- pmegrid[pmeidx]);
+ val = 100*pmegrid[pmeidx];
+ if (pmegrid[pmeidx] != 0)
+ {
+ fprintf(fp, format, "ATOM", pmeidx, "CA", "GLY", ' ', pmeidx, ' ',
+ 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val);
+ }
+ if (pmegrid[pmeidx] != 0)
+ {
+ fprintf(fp2, "%-12s %5d %5d %5d %12.5e\n",
+ "qgrid",
+ pme->pmegrid_start_ix + ix,
+ pme->pmegrid_start_iy + iy,
+ pme->pmegrid_start_iz + iz,
+ pmegrid[pmeidx]);
+ }
#endif
+ }
}
}
- }
#ifdef DEBUG_PME
- ffclose(fp);
- ffclose(fp2);
+ ffclose(fp);
+ ffclose(fp2);
#endif
}
return 0;
static int
copy_fftgrid_to_pmegrid(gmx_pme_t pme, const real *fftgrid, real *pmegrid,
- int nthread,int thread)
+ int nthread, int thread)
{
- ivec local_fft_ndata,local_fft_offset,local_fft_size;
- ivec local_pme_size;
- int ixy0,ixy1,ixy,ix,iy,iz;
- int pmeidx,fftidx;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
+ ivec local_pme_size;
+ int ixy0, ixy1, ixy, ix, iy, iz;
+ int pmeidx, fftidx;
#ifdef PME_TIME_THREADS
- gmx_cycles_t c1;
- static double cs1=0;
- static int cnt=0;
+ gmx_cycles_t c1;
+ static double cs1 = 0;
+ static int cnt = 0;
#endif
#ifdef PME_TIME_THREADS
local_pme_size[2] = pme->pmegrid_nz;
/* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
- the offset is identical, and the PME grid always has more data (due to overlap)
+ the offset is identical, and the PME grid always has more data (due to overlap)
*/
ixy0 = ((thread )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
- for(ixy=ixy0;ixy<ixy1;ixy++)
+ for (ixy = ixy0; ixy < ixy1; ixy++)
{
ix = ixy/local_fft_ndata[YY];
iy = ixy - ix*local_fft_ndata[YY];
pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
- for(iz=0;iz<local_fft_ndata[ZZ];iz++)
+ for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
{
pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
}
}
#ifdef PME_TIME_THREADS
- c1 = omp_cyc_end(c1);
+ c1 = omp_cyc_end(c1);
cs1 += (double)c1;
cnt++;
if (cnt % 20 == 0)
{
- printf("copy %.2f\n",cs1*1e-9);
+ printf("copy %.2f\n", cs1*1e-9);
}
#endif
static void
wrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
{
- int nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix,iy,iz;
+ int nx, ny, nz, pnx, pny, pnz, ny_x, overlap, ix, iy, iz;
nx = pme->nkx;
ny = pme->nky;
overlap = pme->pme_order - 1;
/* Add periodic overlap in z */
- for(ix=0; ix<pme->pmegrid_nx; ix++)
+ for (ix = 0; ix < pme->pmegrid_nx; ix++)
{
- for(iy=0; iy<pme->pmegrid_ny; iy++)
+ for (iy = 0; iy < pme->pmegrid_ny; iy++)
{
- for(iz=0; iz<overlap; iz++)
+ for (iz = 0; iz < overlap; iz++)
{
pmegrid[(ix*pny+iy)*pnz+iz] +=
pmegrid[(ix*pny+iy)*pnz+nz+iz];
if (pme->nnodes_minor == 1)
{
- for(ix=0; ix<pme->pmegrid_nx; ix++)
- {
- for(iy=0; iy<overlap; iy++)
- {
- for(iz=0; iz<nz; iz++)
- {
- pmegrid[(ix*pny+iy)*pnz+iz] +=
- pmegrid[(ix*pny+ny+iy)*pnz+iz];
- }
- }
- }
+ for (ix = 0; ix < pme->pmegrid_nx; ix++)
+ {
+ for (iy = 0; iy < overlap; iy++)
+ {
+ for (iz = 0; iz < nz; iz++)
+ {
+ pmegrid[(ix*pny+iy)*pnz+iz] +=
+ pmegrid[(ix*pny+ny+iy)*pnz+iz];
+ }
+ }
+ }
}
if (pme->nnodes_major == 1)
{
ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
- for(ix=0; ix<overlap; ix++)
+ for (ix = 0; ix < overlap; ix++)
{
- for(iy=0; iy<ny_x; iy++)
+ for (iy = 0; iy < ny_x; iy++)
{
- for(iz=0; iz<nz; iz++)
+ for (iz = 0; iz < nz; iz++)
{
pmegrid[(ix*pny+iy)*pnz+iz] +=
pmegrid[((nx+ix)*pny+iy)*pnz+iz];
static void
unwrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
{
- int nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix;
+ int nx, ny, nz, pnx, pny, pnz, ny_x, overlap, ix;
nx = pme->nkx;
ny = pme->nky;
{
ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
- for(ix=0; ix<overlap; ix++)
+ for (ix = 0; ix < overlap; ix++)
{
- int iy,iz;
+ int iy, iz;
- for(iy=0; iy<ny_x; iy++)
+ for (iy = 0; iy < ny_x; iy++)
{
- for(iz=0; iz<nz; iz++)
+ for (iz = 0; iz < nz; iz++)
{
pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
pmegrid[(ix*pny+iy)*pnz+iz];
if (pme->nnodes_minor == 1)
{
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
- for(ix=0; ix<pme->pmegrid_nx; ix++)
- {
- int iy,iz;
+ for (ix = 0; ix < pme->pmegrid_nx; ix++)
+ {
+ int iy, iz;
- for(iy=0; iy<overlap; iy++)
- {
- for(iz=0; iz<nz; iz++)
- {
- pmegrid[(ix*pny+ny+iy)*pnz+iz] =
- pmegrid[(ix*pny+iy)*pnz+iz];
- }
- }
- }
+ for (iy = 0; iy < overlap; iy++)
+ {
+ for (iz = 0; iz < nz; iz++)
+ {
+ pmegrid[(ix*pny+ny+iy)*pnz+iz] =
+ pmegrid[(ix*pny+iy)*pnz+iz];
+ }
+ }
+ }
}
/* Copy periodic overlap in z */
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
- for(ix=0; ix<pme->pmegrid_nx; ix++)
+ for (ix = 0; ix < pme->pmegrid_nx; ix++)
{
- int iy,iz;
+ int iy, iz;
- for(iy=0; iy<pme->pmegrid_ny; iy++)
+ for (iy = 0; iy < pme->pmegrid_ny; iy++)
{
- for(iz=0; iz<overlap; iz++)
+ for (iz = 0; iz < overlap; iz++)
{
pmegrid[(ix*pny+iy)*pnz+nz+iz] =
pmegrid[(ix*pny+iy)*pnz+iz];
}
}
-static void clear_grid(int nx,int ny,int nz,real *grid,
- ivec fs,int *flag,
- int fx,int fy,int fz,
+static void clear_grid(int nx, int ny, int nz, real *grid,
+ ivec fs, int *flag,
+ int fx, int fy, int fz,
int order)
{
- int nc,ncz;
- int fsx,fsy,fsz,gx,gy,gz,g0x,g0y,x,y,z;
+ int nc, ncz;
+ int fsx, fsy, fsz, gx, gy, gz, g0x, g0y, x, y, z;
int flind;
nc = 2 + (order - 2)/FLBS;
ncz = 2 + (order - 2)/FLBSZ;
- for(fsx=fx; fsx<fx+nc; fsx++)
+ for (fsx = fx; fsx < fx+nc; fsx++)
{
- for(fsy=fy; fsy<fy+nc; fsy++)
+ for (fsy = fy; fsy < fy+nc; fsy++)
{
- for(fsz=fz; fsz<fz+ncz; fsz++)
+ for (fsz = fz; fsz < fz+ncz; fsz++)
{
flind = (fsx*fs[YY] + fsy)*fs[ZZ] + fsz;
if (flag[flind] == 0)
{
- gx = fsx*FLBS;
- gy = fsy*FLBS;
- gz = fsz*FLBSZ;
+ gx = fsx*FLBS;
+ gy = fsy*FLBS;
+ gz = fsz*FLBSZ;
g0x = (gx*ny + gy)*nz + gz;
- for(x=0; x<FLBS; x++)
+ for (x = 0; x < FLBS; x++)
{
g0y = g0x;
- for(y=0; y<FLBS; y++)
+ for (y = 0; y < FLBS; y++)
{
- for(z=0; z<FLBSZ; z++)
+ for (z = 0; z < FLBSZ; z++)
{
grid[g0y+z] = 0;
}
/* This has to be a macro to enable full compiler optimization with xlC (and probably others too) */
#define DO_BSPLINE(order) \
-for(ithx=0; (ithx<order); ithx++) \
-{ \
- index_x = (i0+ithx)*pny*pnz; \
- valx = qn*thx[ithx]; \
+ for (ithx = 0; (ithx < order); ithx++) \
+ { \
+ index_x = (i0+ithx)*pny*pnz; \
+ valx = qn*thx[ithx]; \
\
- for(ithy=0; (ithy<order); ithy++) \
- { \
- valxy = valx*thy[ithy]; \
- index_xy = index_x+(j0+ithy)*pnz; \
+ for (ithy = 0; (ithy < order); ithy++) \
+ { \
+ valxy = valx*thy[ithy]; \
+ index_xy = index_x+(j0+ithy)*pnz; \
\
- for(ithz=0; (ithz<order); ithz++) \
- { \
- index_xyz = index_xy+(k0+ithz); \
- grid[index_xyz] += valxy*thz[ithz]; \
- } \
- } \
-}
+ for (ithz = 0; (ithz < order); ithz++) \
+ { \
+ index_xyz = index_xy+(k0+ithz); \
+ grid[index_xyz] += valxy*thz[ithz]; \
+ } \
+ } \
+ }
static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
{
/* spread charges from home atoms to local grid */
- real *grid;
+ real *grid;
pme_overlap_t *ol;
- int b,i,nn,n,ithx,ithy,ithz,i0,j0,k0;
- int * idxptr;
- int order,norder,index_x,index_xy,index_xyz;
- real valx,valxy,qn;
- real *thx,*thy,*thz;
- int localsize, bndsize;
- int pnx,pny,pnz,ndatatot;
- int offx,offy,offz;
+ int b, i, nn, n, ithx, ithy, ithz, i0, j0, k0;
+ int * idxptr;
+ int order, norder, index_x, index_xy, index_xyz;
+ real valx, valxy, qn;
+ real *thx, *thy, *thz;
+ int localsize, bndsize;
+ int pnx, pny, pnz, ndatatot;
+ int offx, offy, offz;
pnx = pmegrid->s[XX];
pny = pmegrid->s[YY];
offz = pmegrid->offset[ZZ];
ndatatot = pnx*pny*pnz;
- grid = pmegrid->grid;
- for(i=0;i<ndatatot;i++)
+ grid = pmegrid->grid;
+ for (i = 0; i < ndatatot; i++)
{
grid[i] = 0;
}
-
+
order = pmegrid->order;
- for(nn=0; nn<spline->n; nn++)
+ for (nn = 0; nn < spline->n; nn++)
{
n = spline->ind[nn];
qn = atc->q[n];
thy = spline->theta[YY] + norder;
thz = spline->theta[ZZ] + norder;
- switch (order) {
- case 4:
+ switch (order)
+ {
+ case 4:
#ifdef PME_SSE
#ifdef PME_SSE_UNALIGNED
#define PME_SPREAD_SSE_ORDER4
#endif
#include "pme_sse_single.h"
#else
- DO_BSPLINE(4);
+ DO_BSPLINE(4);
#endif
- break;
- case 5:
+ break;
+ case 5:
#ifdef PME_SSE
#define PME_SPREAD_SSE_ALIGNED
#define PME_ORDER 5
#include "pme_sse_single.h"
#else
- DO_BSPLINE(5);
+ DO_BSPLINE(5);
#endif
- break;
- default:
- DO_BSPLINE(order);
- break;
+ break;
+ default:
+ DO_BSPLINE(order);
+ break;
}
}
}
}
-static void set_grid_alignment(int *pmegrid_nz,int pme_order)
+static void set_grid_alignment(int *pmegrid_nz, int pme_order)
{
#ifdef PME_SSE
if (pme_order == 5
#endif
}
-static void set_gridsize_alignment(int *gridsize,int pme_order)
+static void set_gridsize_alignment(int *gridsize, int pme_order)
{
#ifdef PME_SSE
#ifndef PME_SSE_UNALIGNED
* Note that for pme_order=5, the pme grid z-size alignment
* ensures that we will not go beyond the grid size.
*/
- *gridsize += 4;
+ *gridsize += 4;
}
#endif
#endif
int pme_order,
real *ptr)
{
- int nz,gridsize;
+ int nz, gridsize;
- grid->ci[XX] = cx;
- grid->ci[YY] = cy;
- grid->ci[ZZ] = cz;
+ grid->ci[XX] = cx;
+ grid->ci[YY] = cy;
+ grid->ci[ZZ] = cz;
grid->offset[XX] = x0;
grid->offset[YY] = y0;
grid->offset[ZZ] = z0;
grid->n[XX] = x1 - x0 + pme_order - 1;
grid->n[YY] = y1 - y0 + pme_order - 1;
grid->n[ZZ] = z1 - z0 + pme_order - 1;
- copy_ivec(grid->n,grid->s);
+ copy_ivec(grid->n, grid->s);
nz = grid->s[ZZ];
- set_grid_alignment(&nz,pme_order);
+ set_grid_alignment(&nz, pme_order);
if (set_alignment)
{
grid->s[ZZ] = nz;
if (ptr == NULL)
{
gridsize = grid->s[XX]*grid->s[YY]*grid->s[ZZ];
- set_gridsize_alignment(&gridsize,pme_order);
- snew_aligned(grid->grid,gridsize,16);
+ set_gridsize_alignment(&gridsize, pme_order);
+ snew_aligned(grid->grid, gridsize, 16);
}
else
{
}
}
-static int div_round_up(int enumerator,int denominator)
+static int div_round_up(int enumerator, int denominator)
{
return (enumerator + denominator - 1)/denominator;
}
-static void make_subgrid_division(const ivec n,int ovl,int nthread,
+static void make_subgrid_division(const ivec n, int ovl, int nthread,
ivec nsub)
{
- int gsize_opt,gsize;
- int nsx,nsy,nsz;
+ int gsize_opt, gsize;
+ int nsx, nsy, nsz;
char *env;
gsize_opt = -1;
- for(nsx=1; nsx<=nthread; nsx++)
+ for (nsx = 1; nsx <= nthread; nsx++)
{
if (nthread % nsx == 0)
{
- for(nsy=1; nsy<=nthread; nsy++)
+ for (nsy = 1; nsy <= nthread; nsy++)
{
if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
{
/* Determine the number of grid points per thread */
gsize =
- (div_round_up(n[XX],nsx) + ovl)*
- (div_round_up(n[YY],nsy) + ovl)*
- (div_round_up(n[ZZ],nsz) + ovl);
+ (div_round_up(n[XX], nsx) + ovl)*
+ (div_round_up(n[YY], nsy) + ovl)*
+ (div_round_up(n[ZZ], nsz) + ovl);
/* Minimize the number of grids points per thread
* and, secondarily, the number of cuts in minor dimensions.
(gsize == gsize_opt &&
(nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
{
- nsub[XX] = nsx;
- nsub[YY] = nsy;
- nsub[ZZ] = nsz;
+ nsub[XX] = nsx;
+ nsub[YY] = nsy;
+ nsub[ZZ] = nsz;
gsize_opt = gsize;
}
}
env = getenv("GMX_PME_THREAD_DIVISION");
if (env != NULL)
{
- sscanf(env,"%d %d %d",&nsub[XX],&nsub[YY],&nsub[ZZ]);
+ sscanf(env, "%d %d %d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
}
if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
{
- gmx_fatal(FARGS,"PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)",nsub[XX],nsub[YY],nsub[ZZ],nthread);
+ gmx_fatal(FARGS, "PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)", nsub[XX], nsub[YY], nsub[ZZ], nthread);
}
}
static void pmegrids_init(pmegrids_t *grids,
- int nx,int ny,int nz,int nz_base,
+ int nx, int ny, int nz, int nz_base,
int pme_order,
int nthread,
int overlap_x,
int overlap_y)
{
- ivec n,n_base,g0,g1;
- int t,x,y,z,d,i,tfac;
- int max_comm_lines=-1;
+ ivec n, n_base, g0, g1;
+ int t, x, y, z, d, i, tfac;
+ int max_comm_lines = -1;
n[XX] = nx - (pme_order - 1);
n[YY] = ny - (pme_order - 1);
n[ZZ] = nz - (pme_order - 1);
- copy_ivec(n,n_base);
+ copy_ivec(n, n_base);
n_base[ZZ] = nz_base;
- pmegrid_init(&grids->grid,0,0,0,0,0,0,n[XX],n[YY],n[ZZ],FALSE,pme_order,
+ pmegrid_init(&grids->grid, 0, 0, 0, 0, 0, 0, n[XX], n[YY], n[ZZ], FALSE, pme_order,
NULL);
grids->nthread = nthread;
- make_subgrid_division(n_base,pme_order-1,grids->nthread,grids->nc);
+ make_subgrid_division(n_base, pme_order-1, grids->nthread, grids->nc);
if (grids->nthread > 1)
{
ivec nst;
int gridsize;
- for(d=0; d<DIM; d++)
+ for (d = 0; d < DIM; d++)
{
- nst[d] = div_round_up(n[d],grids->nc[d]) + pme_order - 1;
+ nst[d] = div_round_up(n[d], grids->nc[d]) + pme_order - 1;
}
- set_grid_alignment(&nst[ZZ],pme_order);
+ set_grid_alignment(&nst[ZZ], pme_order);
if (debug)
{
- fprintf(debug,"pmegrid thread local division: %d x %d x %d\n",
- grids->nc[XX],grids->nc[YY],grids->nc[ZZ]);
- fprintf(debug,"pmegrid %d %d %d max thread pmegrid %d %d %d\n",
- nx,ny,nz,
- nst[XX],nst[YY],nst[ZZ]);
+ fprintf(debug, "pmegrid thread local division: %d x %d x %d\n",
+ grids->nc[XX], grids->nc[YY], grids->nc[ZZ]);
+ fprintf(debug, "pmegrid %d %d %d max thread pmegrid %d %d %d\n",
+ nx, ny, nz,
+ nst[XX], nst[YY], nst[ZZ]);
}
- snew(grids->grid_th,grids->nthread);
- t = 0;
+ snew(grids->grid_th, grids->nthread);
+ t = 0;
gridsize = nst[XX]*nst[YY]*nst[ZZ];
- set_gridsize_alignment(&gridsize,pme_order);
+ set_gridsize_alignment(&gridsize, pme_order);
snew_aligned(grids->grid_all,
grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
16);
- for(x=0; x<grids->nc[XX]; x++)
+ for (x = 0; x < grids->nc[XX]; x++)
{
- for(y=0; y<grids->nc[YY]; y++)
+ for (y = 0; y < grids->nc[YY]; y++)
{
- for(z=0; z<grids->nc[ZZ]; z++)
+ for (z = 0; z < grids->nc[ZZ]; z++)
{
pmegrid_init(&grids->grid_th[t],
- x,y,z,
+ x, y, z,
(n[XX]*(x ))/grids->nc[XX],
(n[YY]*(y ))/grids->nc[YY],
(n[ZZ]*(z ))/grids->nc[ZZ],
}
}
- snew(grids->g2t,DIM);
+ snew(grids->g2t, DIM);
tfac = 1;
- for(d=DIM-1; d>=0; d--)
+ for (d = DIM-1; d >= 0; d--)
{
- snew(grids->g2t[d],n[d]);
+ snew(grids->g2t[d], n[d]);
t = 0;
- for(i=0; i<n[d]; i++)
+ for (i = 0; i < n[d]; i++)
{
/* The second check should match the parameters
* of the pmegrid_init call above.
switch (d)
{
- case XX: max_comm_lines = overlap_x; break;
- case YY: max_comm_lines = overlap_y; break;
- case ZZ: max_comm_lines = pme_order - 1; break;
+ case XX: max_comm_lines = overlap_x; break;
+ case YY: max_comm_lines = overlap_y; break;
+ case ZZ: max_comm_lines = pme_order - 1; break;
}
grids->nthread_comm[d] = 0;
while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines &&
}
if (debug != NULL)
{
- fprintf(debug,"pmegrid thread grid communication range in %c: %d\n",
- 'x'+d,grids->nthread_comm[d]);
+ fprintf(debug, "pmegrid thread grid communication range in %c: %d\n",
+ 'x'+d, grids->nthread_comm[d]);
}
/* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
* work, but this is not a problematic restriction.
*/
if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
{
- gmx_fatal(FARGS,"Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME",grids->nthread);
+ gmx_fatal(FARGS, "Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME", grids->nthread);
}
}
}
if (grids->nthread > 0)
{
- for(t=0; t<grids->nthread; t++)
+ for (t = 0; t < grids->nthread; t++)
{
sfree(grids->grid_th[t].grid);
}
}
-static void realloc_work(pme_work_t *work,int nkx)
+static void realloc_work(pme_work_t *work, int nkx)
{
if (nkx > work->nalloc)
{
work->nalloc = nkx;
- srenew(work->mhx ,work->nalloc);
- srenew(work->mhy ,work->nalloc);
- srenew(work->mhz ,work->nalloc);
- srenew(work->m2 ,work->nalloc);
+ srenew(work->mhx, work->nalloc);
+ srenew(work->mhy, work->nalloc);
+ srenew(work->mhz, work->nalloc);
+ srenew(work->m2, work->nalloc);
/* Allocate an aligned pointer for SSE operations, including 3 extra
* elements at the end since SSE operates on 4 elements at a time.
*/
sfree_aligned(work->denom);
sfree_aligned(work->tmp1);
sfree_aligned(work->eterm);
- snew_aligned(work->denom,work->nalloc+3,16);
- snew_aligned(work->tmp1 ,work->nalloc+3,16);
- snew_aligned(work->eterm,work->nalloc+3,16);
- srenew(work->m2inv,work->nalloc);
+ snew_aligned(work->denom, work->nalloc+3, 16);
+ snew_aligned(work->tmp1, work->nalloc+3, 16);
+ snew_aligned(work->eterm, work->nalloc+3, 16);
+ srenew(work->m2inv, work->nalloc);
}
}
#ifdef PME_SSE
- /* Calculate exponentials through SSE in float precision */
+/* Calculate exponentials through SSE in float precision */
inline static void calc_exponentials(int start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
{
{
- const __m128 two = _mm_set_ps(2.0f,2.0f,2.0f,2.0f);
+ const __m128 two = _mm_set_ps(2.0f, 2.0f, 2.0f, 2.0f);
__m128 f_sse;
__m128 lu;
- __m128 tmp_d1,d_inv,tmp_r,tmp_e;
+ __m128 tmp_d1, d_inv, tmp_r, tmp_e;
int kx;
f_sse = _mm_load1_ps(&f);
- for(kx=0; kx<end; kx+=4)
+ for (kx = 0; kx < end; kx += 4)
{
tmp_d1 = _mm_load_ps(d_aligned+kx);
lu = _mm_rcp_ps(tmp_d1);
- d_inv = _mm_mul_ps(lu,_mm_sub_ps(two,_mm_mul_ps(lu,tmp_d1)));
+ d_inv = _mm_mul_ps(lu, _mm_sub_ps(two, _mm_mul_ps(lu, tmp_d1)));
tmp_r = _mm_load_ps(r_aligned+kx);
tmp_r = gmx_mm_exp_ps(tmp_r);
- tmp_e = _mm_mul_ps(f_sse,d_inv);
- tmp_e = _mm_mul_ps(tmp_e,tmp_r);
- _mm_store_ps(e_aligned+kx,tmp_e);
+ tmp_e = _mm_mul_ps(f_sse, d_inv);
+ tmp_e = _mm_mul_ps(tmp_e, tmp_r);
+ _mm_store_ps(e_aligned+kx, tmp_e);
}
}
}
inline static void calc_exponentials(int start, int end, real f, real *d, real *r, real *e)
{
int kx;
- for(kx=start; kx<end; kx++)
+ for (kx = start; kx < end; kx++)
{
d[kx] = 1.0/d[kx];
}
- for(kx=start; kx<end; kx++)
+ for (kx = start; kx < end; kx++)
{
r[kx] = exp(r[kx]);
}
- for(kx=start; kx<end; kx++)
+ for (kx = start; kx < end; kx++)
{
e[kx] = f*r[kx]*d[kx];
}
#endif
-static int solve_pme_yzx(gmx_pme_t pme,t_complex *grid,
- real ewaldcoeff,real vol,
+static int solve_pme_yzx(gmx_pme_t pme, t_complex *grid,
+ real ewaldcoeff, real vol,
gmx_bool bEnerVir,
- int nthread,int thread)
+ int nthread, int thread)
{
/* do recip sum over local cells in grid */
/* y major, z middle, x minor or continuous */
t_complex *p0;
- int kx,ky,kz,maxkx,maxky,maxkz;
- int nx,ny,nz,iyz0,iyz1,iyz,iy,iz,kxstart,kxend;
- real mx,my,mz;
- real factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
- real ets2,struct2,vfactor,ets2vf;
- real d1,d2,energy=0;
- real by,bz;
- real virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
- real rxx,ryx,ryy,rzx,rzy,rzz;
+ int kx, ky, kz, maxkx, maxky, maxkz;
+ int nx, ny, nz, iyz0, iyz1, iyz, iy, iz, kxstart, kxend;
+ real mx, my, mz;
+ real factor = M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
+ real ets2, struct2, vfactor, ets2vf;
+ real d1, d2, energy = 0;
+ real by, bz;
+ real virxx = 0, virxy = 0, virxz = 0, viryy = 0, viryz = 0, virzz = 0;
+ real rxx, ryx, ryy, rzx, rzy, rzz;
pme_work_t *work;
- real *mhx,*mhy,*mhz,*m2,*denom,*tmp1,*eterm,*m2inv;
- real mhxk,mhyk,mhzk,m2k;
+ real *mhx, *mhy, *mhz, *m2, *denom, *tmp1, *eterm, *m2inv;
+ real mhxk, mhyk, mhzk, m2k;
real corner_fac;
ivec complex_order;
- ivec local_ndata,local_offset,local_size;
+ ivec local_ndata, local_offset, local_size;
real elfac;
elfac = ONE_4PI_EPS0/pme->epsilon_r;
maxky = (ny+1)/2;
maxkz = nz/2+1;
- work = &pme->work[thread];
+ work = &pme->work[thread];
mhx = work->mhx;
mhy = work->mhy;
mhz = work->mhz;
iyz0 = local_ndata[YY]*local_ndata[ZZ]* thread /nthread;
iyz1 = local_ndata[YY]*local_ndata[ZZ]*(thread+1)/nthread;
- for(iyz=iyz0; iyz<iyz1; iyz++)
+ for (iyz = iyz0; iyz < iyz1; iyz++)
{
iy = iyz/local_ndata[ZZ];
iz = iyz - iy*local_ndata[ZZ];
* depend on kx for triclinic unit cells.
*/
- /* Two explicit loops to avoid a conditional inside the loop */
- for(kx=kxstart; kx<maxkx; kx++)
+ /* Two explicit loops to avoid a conditional inside the loop */
+ for (kx = kxstart; kx < maxkx; kx++)
{
mx = kx;
tmp1[kx] = -factor*m2k;
}
- for(kx=maxkx; kx<kxend; kx++)
+ for (kx = maxkx; kx < kxend; kx++)
{
mx = (kx - nx);
tmp1[kx] = -factor*m2k;
}
- for(kx=kxstart; kx<kxend; kx++)
+ for (kx = kxstart; kx < kxend; kx++)
{
m2inv[kx] = 1.0/m2[kx];
}
- calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
+ calc_exponentials(kxstart, kxend, elfac, denom, tmp1, eterm);
- for(kx=kxstart; kx<kxend; kx++,p0++)
+ for (kx = kxstart; kx < kxend; kx++, p0++)
{
d1 = p0->re;
d2 = p0->im;
tmp1[kx] = eterm[kx]*struct2;
}
- for(kx=kxstart; kx<kxend; kx++)
+ for (kx = kxstart; kx < kxend; kx++)
{
ets2 = corner_fac*tmp1[kx];
vfactor = (factor*m2[kx] + 1.0)*2.0*m2inv[kx];
/* Two explicit loops to avoid a conditional inside the loop */
- for(kx=kxstart; kx<maxkx; kx++)
+ for (kx = kxstart; kx < maxkx; kx++)
{
mx = kx;
tmp1[kx] = -factor*m2k;
}
- for(kx=maxkx; kx<kxend; kx++)
+ for (kx = maxkx; kx < kxend; kx++)
{
mx = (kx - nx);
tmp1[kx] = -factor*m2k;
}
- calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
+ calc_exponentials(kxstart, kxend, elfac, denom, tmp1, eterm);
- for(kx=kxstart; kx<kxend; kx++,p0++)
+ for (kx = kxstart; kx < kxend; kx++, p0++)
{
d1 = p0->re;
d2 = p0->im;
return local_ndata[YY]*local_ndata[XX];
}
-static void get_pme_ener_vir(const gmx_pme_t pme,int nthread,
- real *mesh_energy,matrix vir)
+static void get_pme_ener_vir(const gmx_pme_t pme, int nthread,
+ real *mesh_energy, matrix vir)
{
/* This function sums output over threads
* and should therefore only be called after thread synchronization.
int thread;
*mesh_energy = pme->work[0].energy;
- copy_mat(pme->work[0].vir,vir);
+ copy_mat(pme->work[0].vir, vir);
- for(thread=1; thread<nthread; thread++)
+ for (thread = 1; thread < nthread; thread++)
{
*mesh_energy += pme->work[thread].energy;
- m_add(vir,pme->work[thread].vir,vir);
+ m_add(vir, pme->work[thread].vir, vir);
}
}
#define DO_FSPLINE(order) \
-for(ithx=0; (ithx<order); ithx++) \
-{ \
- index_x = (i0+ithx)*pny*pnz; \
- tx = thx[ithx]; \
- dx = dthx[ithx]; \
+ for (ithx = 0; (ithx < order); ithx++) \
+ { \
+ index_x = (i0+ithx)*pny*pnz; \
+ tx = thx[ithx]; \
+ dx = dthx[ithx]; \
\
- for(ithy=0; (ithy<order); ithy++) \
- { \
- index_xy = index_x+(j0+ithy)*pnz; \
- ty = thy[ithy]; \
- dy = dthy[ithy]; \
- fxy1 = fz1 = 0; \
+ for (ithy = 0; (ithy < order); ithy++) \
+ { \
+ index_xy = index_x+(j0+ithy)*pnz; \
+ ty = thy[ithy]; \
+ dy = dthy[ithy]; \
+ fxy1 = fz1 = 0; \
\
- for(ithz=0; (ithz<order); ithz++) \
- { \
- gval = grid[index_xy+(k0+ithz)]; \
- fxy1 += thz[ithz]*gval; \
- fz1 += dthz[ithz]*gval; \
- } \
- fx += dx*ty*fxy1; \
- fy += tx*dy*fxy1; \
- fz += tx*ty*fz1; \
- } \
-}
+ for (ithz = 0; (ithz < order); ithz++) \
+ { \
+ gval = grid[index_xy+(k0+ithz)]; \
+ fxy1 += thz[ithz]*gval; \
+ fz1 += dthz[ithz]*gval; \
+ } \
+ fx += dx*ty*fxy1; \
+ fy += tx*dy*fxy1; \
+ fz += tx*ty*fz1; \
+ } \
+ }
-static void gather_f_bsplines(gmx_pme_t pme,real *grid,
- gmx_bool bClearF,pme_atomcomm_t *atc,
+static void gather_f_bsplines(gmx_pme_t pme, real *grid,
+ gmx_bool bClearF, pme_atomcomm_t *atc,
splinedata_t *spline,
real scale)
{
/* sum forces for local particles */
- int nn,n,ithx,ithy,ithz,i0,j0,k0;
- int index_x,index_xy;
- int nx,ny,nz,pnx,pny,pnz;
+ int nn, n, ithx, ithy, ithz, i0, j0, k0;
+ int index_x, index_xy;
+ int nx, ny, nz, pnx, pny, pnz;
int * idxptr;
- real tx,ty,dx,dy,qn;
- real fx,fy,fz,gval;
- real fxy1,fz1;
- real *thx,*thy,*thz,*dthx,*dthy,*dthz;
+ real tx, ty, dx, dy, qn;
+ real fx, fy, fz, gval;
+ real fxy1, fz1;
+ real *thx, *thy, *thz, *dthx, *dthy, *dthz;
int norder;
- real rxx,ryx,ryy,rzx,rzy,rzz;
+ real rxx, ryx, ryy, rzx, rzy, rzz;
int order;
pme_spline_work_t *work;
rzy = pme->recipbox[ZZ][YY];
rzz = pme->recipbox[ZZ][ZZ];
- for(nn=0; nn<spline->n; nn++)
+ for (nn = 0; nn < spline->n; nn++)
{
n = spline->ind[nn];
qn = scale*atc->q[n];
dthy = spline->dtheta[YY] + norder;
dthz = spline->dtheta[ZZ] + norder;
- switch (order) {
- case 4:
+ switch (order)
+ {
+ case 4:
#ifdef PME_SSE
#ifdef PME_SSE_UNALIGNED
#define PME_GATHER_F_SSE_ORDER4
#endif
#include "pme_sse_single.h"
#else
- DO_FSPLINE(4);
+ DO_FSPLINE(4);
#endif
- break;
- case 5:
+ break;
+ case 5:
#ifdef PME_SSE
#define PME_GATHER_F_SSE_ALIGNED
#define PME_ORDER 5
#include "pme_sse_single.h"
#else
- DO_FSPLINE(5);
+ DO_FSPLINE(5);
#endif
- break;
- default:
- DO_FSPLINE(order);
- break;
+ break;
+ default:
+ DO_FSPLINE(order);
+ break;
}
atc->f[n][XX] += -qn*( fx*nx*rxx );
}
-static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
+static real gather_energy_bsplines(gmx_pme_t pme, real *grid,
pme_atomcomm_t *atc)
{
splinedata_t *spline;
- int n,ithx,ithy,ithz,i0,j0,k0;
- int index_x,index_xy;
+ int n, ithx, ithy, ithz, i0, j0, k0;
+ int index_x, index_xy;
int * idxptr;
- real energy,pot,tx,ty,qn,gval;
- real *thx,*thy,*thz;
+ real energy, pot, tx, ty, qn, gval;
+ real *thx, *thy, *thz;
int norder;
int order;
order = pme->pme_order;
energy = 0;
- for(n=0; (n<atc->n); n++) {
+ for (n = 0; (n < atc->n); n++)
+ {
qn = atc->q[n];
- if (qn != 0) {
+ if (qn != 0)
+ {
idxptr = atc->idx[n];
norder = n*order;
thz = spline->theta[ZZ] + norder;
pot = 0;
- for(ithx=0; (ithx<order); ithx++)
+ for (ithx = 0; (ithx < order); ithx++)
{
index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
tx = thx[ithx];
- for(ithy=0; (ithy<order); ithy++)
+ for (ithy = 0; (ithy < order); ithy++)
{
index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
ty = thy[ithy];
- for(ithz=0; (ithz<order); ithz++)
+ for (ithz = 0; (ithz < order); ithz++)
{
gval = grid[index_xy+(k0+ithz)];
pot += tx*ty*thz[ithz]*gval;
* This gives a significant performance gain.
*/
#define CALC_SPLINE(order) \
-{ \
- int j,k,l; \
- real dr,div; \
- real data[PME_ORDER_MAX]; \
- real ddata[PME_ORDER_MAX]; \
+ { \
+ int j, k, l; \
+ real dr, div; \
+ real data[PME_ORDER_MAX]; \
+ real ddata[PME_ORDER_MAX]; \
\
- for(j=0; (j<DIM); j++) \
- { \
- dr = xptr[j]; \
+ for (j = 0; (j < DIM); j++) \
+ { \
+ dr = xptr[j]; \
\
- /* dr is relative offset from lower cell limit */ \
- data[order-1] = 0; \
- data[1] = dr; \
- data[0] = 1 - dr; \
+ /* dr is relative offset from lower cell limit */ \
+ data[order-1] = 0; \
+ data[1] = dr; \
+ data[0] = 1 - dr; \
\
- for(k=3; (k<order); k++) \
- { \
- div = 1.0/(k - 1.0); \
- data[k-1] = div*dr*data[k-2]; \
- for(l=1; (l<(k-1)); l++) \
- { \
- data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
- data[k-l-1]); \
- } \
- data[0] = div*(1-dr)*data[0]; \
- } \
- /* differentiate */ \
- ddata[0] = -data[0]; \
- for(k=1; (k<order); k++) \
- { \
- ddata[k] = data[k-1] - data[k]; \
- } \
+ for (k = 3; (k < order); k++) \
+ { \
+ div = 1.0/(k - 1.0); \
+ data[k-1] = div*dr*data[k-2]; \
+ for (l = 1; (l < (k-1)); l++) \
+ { \
+ data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
+ data[k-l-1]); \
+ } \
+ data[0] = div*(1-dr)*data[0]; \
+ } \
+ /* differentiate */ \
+ ddata[0] = -data[0]; \
+ for (k = 1; (k < order); k++) \
+ { \
+ ddata[k] = data[k-1] - data[k]; \
+ } \
\
- div = 1.0/(order - 1); \
- data[order-1] = div*dr*data[order-2]; \
- for(l=1; (l<(order-1)); l++) \
- { \
- data[order-l-1] = div*((dr+l)*data[order-l-2]+ \
- (order-l-dr)*data[order-l-1]); \
- } \
- data[0] = div*(1 - dr)*data[0]; \
+ div = 1.0/(order - 1); \
+ data[order-1] = div*dr*data[order-2]; \
+ for (l = 1; (l < (order-1)); l++) \
+ { \
+ data[order-l-1] = div*((dr+l)*data[order-l-2]+ \
+ (order-l-dr)*data[order-l-1]); \
+ } \
+ data[0] = div*(1 - dr)*data[0]; \
\
- for(k=0; k<order; k++) \
- { \
- theta[j][i*order+k] = data[k]; \
- dtheta[j][i*order+k] = ddata[k]; \
- } \
- } \
-}
+ for (k = 0; k < order; k++) \
+ { \
+ theta[j][i*order+k] = data[k]; \
+ dtheta[j][i*order+k] = ddata[k]; \
+ } \
+ } \
+ }
-void make_bsplines(splinevec theta,splinevec dtheta,int order,
- rvec fractx[],int nr,int ind[],real charge[],
+void make_bsplines(splinevec theta, splinevec dtheta, int order,
+ rvec fractx[], int nr, int ind[], real charge[],
gmx_bool bFreeEnergy)
{
/* construct splines for local atoms */
- int i,ii;
+ int i, ii;
real *xptr;
- for(i=0; i<nr; i++)
+ for (i = 0; i < nr; i++)
{
/* With free energy we do not use the charge check.
* In most cases this will be more efficient than calling make_bsplines
* twice, since usually more than half the particles have charges.
*/
ii = ind[i];
- if (bFreeEnergy || charge[ii] != 0.0) {
+ if (bFreeEnergy || charge[ii] != 0.0)
+ {
xptr = fractx[ii];
- switch(order) {
- case 4: CALC_SPLINE(4); break;
- case 5: CALC_SPLINE(5); break;
- default: CALC_SPLINE(order); break;
+ switch (order)
+ {
+ case 4: CALC_SPLINE(4); break;
+ case 5: CALC_SPLINE(5); break;
+ default: CALC_SPLINE(order); break;
}
}
}
}
-void make_dft_mod(real *mod,real *data,int ndata)
+void make_dft_mod(real *mod, real *data, int ndata)
{
- int i,j;
- real sc,ss,arg;
-
- for(i=0;i<ndata;i++) {
- sc=ss=0;
- for(j=0;j<ndata;j++) {
- arg=(2.0*M_PI*i*j)/ndata;
- sc+=data[j]*cos(arg);
- ss+=data[j]*sin(arg);
- }
- mod[i]=sc*sc+ss*ss;
- }
- for(i=0;i<ndata;i++)
- if(mod[i]<1e-7)
- mod[i]=(mod[i-1]+mod[i+1])*0.5;
+ int i, j;
+ real sc, ss, arg;
+
+ for (i = 0; i < ndata; i++)
+ {
+ sc = ss = 0;
+ for (j = 0; j < ndata; j++)
+ {
+ arg = (2.0*M_PI*i*j)/ndata;
+ sc += data[j]*cos(arg);
+ ss += data[j]*sin(arg);
+ }
+ mod[i] = sc*sc+ss*ss;
+ }
+ for (i = 0; i < ndata; i++)
+ {
+ if (mod[i] < 1e-7)
+ {
+ mod[i] = (mod[i-1]+mod[i+1])*0.5;
+ }
+ }
}
static void make_bspline_moduli(splinevec bsp_mod,
- int nx,int ny,int nz,int order)
+ int nx, int ny, int nz, int order)
{
- int nmax=max(nx,max(ny,nz));
- real *data,*ddata,*bsp_data;
- int i,k,l;
- real div;
-
- snew(data,order);
- snew(ddata,order);
- snew(bsp_data,nmax);
-
- data[order-1]=0;
- data[1]=0;
- data[0]=1;
-
- for(k=3;k<order;k++) {
- div=1.0/(k-1.0);
- data[k-1]=0;
- for(l=1;l<(k-1);l++)
- data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
- data[0]=div*data[0];
- }
- /* differentiate */
- ddata[0]=-data[0];
- for(k=1;k<order;k++)
- ddata[k]=data[k-1]-data[k];
- div=1.0/(order-1);
- data[order-1]=0;
- for(l=1;l<(order-1);l++)
- data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
- data[0]=div*data[0];
-
- for(i=0;i<nmax;i++)
- bsp_data[i]=0;
- for(i=1;i<=order;i++)
- bsp_data[i]=data[i-1];
-
- make_dft_mod(bsp_mod[XX],bsp_data,nx);
- make_dft_mod(bsp_mod[YY],bsp_data,ny);
- make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
-
- sfree(data);
- sfree(ddata);
- sfree(bsp_data);
+ int nmax = max(nx, max(ny, nz));
+ real *data, *ddata, *bsp_data;
+ int i, k, l;
+ real div;
+
+ snew(data, order);
+ snew(ddata, order);
+ snew(bsp_data, nmax);
+
+ data[order-1] = 0;
+ data[1] = 0;
+ data[0] = 1;
+
+ for (k = 3; k < order; k++)
+ {
+ div = 1.0/(k-1.0);
+ data[k-1] = 0;
+ for (l = 1; l < (k-1); l++)
+ {
+ data[k-l-1] = div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
+ }
+ data[0] = div*data[0];
+ }
+ /* differentiate */
+ ddata[0] = -data[0];
+ for (k = 1; k < order; k++)
+ {
+ ddata[k] = data[k-1]-data[k];
+ }
+ div = 1.0/(order-1);
+ data[order-1] = 0;
+ for (l = 1; l < (order-1); l++)
+ {
+ data[order-l-1] = div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
+ }
+ data[0] = div*data[0];
+
+ for (i = 0; i < nmax; i++)
+ {
+ bsp_data[i] = 0;
+ }
+ for (i = 1; i <= order; i++)
+ {
+ bsp_data[i] = data[i-1];
+ }
+
+ make_dft_mod(bsp_mod[XX], bsp_data, nx);
+ make_dft_mod(bsp_mod[YY], bsp_data, ny);
+ make_dft_mod(bsp_mod[ZZ], bsp_data, nz);
+
+ sfree(data);
+ sfree(ddata);
+ sfree(bsp_data);
}
/* Return the P3M optimal influence function */
static double do_p3m_influence(double z, int order)
{
- double z2,z4;
+ double z2, z4;
z2 = z*z;
z4 = z2*z2;
/* The formula and most constants can be found in:
* Ballenegger et al., JCTC 8, 936 (2012)
*/
- switch(order)
- {
- case 2:
- return 1.0 - 2.0*z2/3.0;
- break;
- case 3:
- return 1.0 - z2 + 2.0*z4/15.0;
- break;
- case 4:
- return 1.0 - 4.0*z2/3.0 + 2.0*z4/5.0 + 4.0*z2*z4/315.0;
- break;
- case 5:
- return 1.0 - 5.0*z2/3.0 + 7.0*z4/9.0 - 17.0*z2*z4/189.0 + 2.0*z4*z4/2835.0;
- break;
- case 6:
- return 1.0 - 2.0*z2 + 19.0*z4/15.0 - 256.0*z2*z4/945.0 + 62.0*z4*z4/4725.0 + 4.0*z2*z4*z4/155925.0;
- break;
- case 7:
- return 1.0 - 7.0*z2/3.0 + 28.0*z4/15.0 - 16.0*z2*z4/27.0 + 26.0*z4*z4/405.0 - 2.0*z2*z4*z4/1485.0 + 4.0*z4*z4*z4/6081075.0;
- case 8:
- return 1.0 - 8.0*z2/3.0 + 116.0*z4/45.0 - 344.0*z2*z4/315.0 + 914.0*z4*z4/4725.0 - 248.0*z4*z4*z2/22275.0 + 21844.0*z4*z4*z4/212837625.0 - 8.0*z4*z4*z4*z2/638512875.0;
- break;
+ switch (order)
+ {
+ case 2:
+ return 1.0 - 2.0*z2/3.0;
+ break;
+ case 3:
+ return 1.0 - z2 + 2.0*z4/15.0;
+ break;
+ case 4:
+ return 1.0 - 4.0*z2/3.0 + 2.0*z4/5.0 + 4.0*z2*z4/315.0;
+ break;
+ case 5:
+ return 1.0 - 5.0*z2/3.0 + 7.0*z4/9.0 - 17.0*z2*z4/189.0 + 2.0*z4*z4/2835.0;
+ break;
+ case 6:
+ return 1.0 - 2.0*z2 + 19.0*z4/15.0 - 256.0*z2*z4/945.0 + 62.0*z4*z4/4725.0 + 4.0*z2*z4*z4/155925.0;
+ break;
+ case 7:
+ return 1.0 - 7.0*z2/3.0 + 28.0*z4/15.0 - 16.0*z2*z4/27.0 + 26.0*z4*z4/405.0 - 2.0*z2*z4*z4/1485.0 + 4.0*z4*z4*z4/6081075.0;
+ case 8:
+ return 1.0 - 8.0*z2/3.0 + 116.0*z4/45.0 - 344.0*z2*z4/315.0 + 914.0*z4*z4/4725.0 - 248.0*z4*z4*z2/22275.0 + 21844.0*z4*z4*z4/212837625.0 - 8.0*z4*z4*z4*z2/638512875.0;
+ break;
}
return 0.0;
}
/* Calculate the P3M B-spline moduli for one dimension */
-static void make_p3m_bspline_moduli_dim(real *bsp_mod,int n,int order)
+static void make_p3m_bspline_moduli_dim(real *bsp_mod, int n, int order)
{
- double zarg,zai,sinzai,infl;
- int maxk,i;
+ double zarg, zai, sinzai, infl;
+ int maxk, i;
if (order > 8)
{
- gmx_fatal(FARGS,"The current P3M code only supports orders up to 8");
+ gmx_fatal(FARGS, "The current P3M code only supports orders up to 8");
}
zarg = M_PI/n;
maxk = (n + 1)/2;
- for(i=-maxk; i<0; i++)
+ for (i = -maxk; i < 0; i++)
{
- zai = zarg*i;
- sinzai = sin(zai);
- infl = do_p3m_influence(sinzai,order);
- bsp_mod[n+i] = infl*infl*pow(sinzai/zai,-2.0*order);
+ zai = zarg*i;
+ sinzai = sin(zai);
+ infl = do_p3m_influence(sinzai, order);
+ bsp_mod[n+i] = infl*infl*pow(sinzai/zai, -2.0*order);
}
bsp_mod[0] = 1.0;
- for(i=1; i<maxk; i++)
+ for (i = 1; i < maxk; i++)
{
- zai = zarg*i;
- sinzai = sin(zai);
- infl = do_p3m_influence(sinzai,order);
- bsp_mod[i] = infl*infl*pow(sinzai/zai,-2.0*order);
+ zai = zarg*i;
+ sinzai = sin(zai);
+ infl = do_p3m_influence(sinzai, order);
+ bsp_mod[i] = infl*infl*pow(sinzai/zai, -2.0*order);
}
}
/* Calculate the P3M B-spline moduli */
static void make_p3m_bspline_moduli(splinevec bsp_mod,
- int nx,int ny,int nz,int order)
+ int nx, int ny, int nz, int order)
{
- make_p3m_bspline_moduli_dim(bsp_mod[XX],nx,order);
- make_p3m_bspline_moduli_dim(bsp_mod[YY],ny,order);
- make_p3m_bspline_moduli_dim(bsp_mod[ZZ],nz,order);
+ make_p3m_bspline_moduli_dim(bsp_mod[XX], nx, order);
+ make_p3m_bspline_moduli_dim(bsp_mod[YY], ny, order);
+ make_p3m_bspline_moduli_dim(bsp_mod[ZZ], nz, order);
}
static void setup_coordinate_communication(pme_atomcomm_t *atc)
{
- int nslab,n,i;
- int fw,bw;
-
- nslab = atc->nslab;
-
- n = 0;
- for(i=1; i<=nslab/2; i++) {
- fw = (atc->nodeid + i) % nslab;
- bw = (atc->nodeid - i + nslab) % nslab;
- if (n < nslab - 1) {
- atc->node_dest[n] = fw;
- atc->node_src[n] = bw;
- n++;
- }
- if (n < nslab - 1) {
- atc->node_dest[n] = bw;
- atc->node_src[n] = fw;
- n++;
- }
- }
+ int nslab, n, i;
+ int fw, bw;
+
+ nslab = atc->nslab;
+
+ n = 0;
+ for (i = 1; i <= nslab/2; i++)
+ {
+ fw = (atc->nodeid + i) % nslab;
+ bw = (atc->nodeid - i + nslab) % nslab;
+ if (n < nslab - 1)
+ {
+ atc->node_dest[n] = fw;
+ atc->node_src[n] = bw;
+ n++;
+ }
+ if (n < nslab - 1)
+ {
+ atc->node_dest[n] = bw;
+ atc->node_src[n] = fw;
+ n++;
+ }
+ }
}
-int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata)
+int gmx_pme_destroy(FILE *log, gmx_pme_t *pmedata)
{
int thread;
- if(NULL != log)
+ if (NULL != log)
{
- fprintf(log,"Destroying PME data structures.\n");
+ fprintf(log, "Destroying PME data structures.\n");
}
sfree((*pmedata)->nnx);
sfree((*pmedata)->cfftgridB);
gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupB);
}
- for(thread=0; thread<(*pmedata)->nthread; thread++)
+ for (thread = 0; thread < (*pmedata)->nthread; thread++)
{
free_work(&(*pmedata)->work[thread]);
}
sfree(*pmedata);
*pmedata = NULL;
- return 0;
+ return 0;
}
-static int mult_up(int n,int f)
+static int mult_up(int n, int f)
{
return ((n + f - 1)/f)*f;
}
static double pme_load_imbalance(gmx_pme_t pme)
{
- int nma,nmi;
- double n1,n2,n3;
+ int nma, nmi;
+ double n1, n2, n3;
nma = pme->nnodes_major;
nmi = pme->nnodes_minor;
- n1 = mult_up(pme->nkx,nma)*mult_up(pme->nky,nmi)*pme->nkz;
- n2 = mult_up(pme->nkx,nma)*mult_up(pme->nkz,nmi)*pme->nky;
- n3 = mult_up(pme->nky,nma)*mult_up(pme->nkz,nmi)*pme->nkx;
+ n1 = mult_up(pme->nkx, nma)*mult_up(pme->nky, nmi)*pme->nkz;
+ n2 = mult_up(pme->nkx, nma)*mult_up(pme->nkz, nmi)*pme->nky;
+ n3 = mult_up(pme->nky, nma)*mult_up(pme->nkz, nmi)*pme->nkx;
/* pme_solve is roughly double the cost of an fft */
return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
}
-static void init_atomcomm(gmx_pme_t pme,pme_atomcomm_t *atc, t_commrec *cr,
- int dimind,gmx_bool bSpread)
+static void init_atomcomm(gmx_pme_t pme, pme_atomcomm_t *atc, t_commrec *cr,
+ int dimind, gmx_bool bSpread)
{
- int nk,k,s,thread;
+ int nk, k, s, thread;
- atc->dimind = dimind;
- atc->nslab = 1;
- atc->nodeid = 0;
+ atc->dimind = dimind;
+ atc->nslab = 1;
+ atc->nodeid = 0;
atc->pd_nalloc = 0;
#ifdef GMX_MPI
if (pme->nnodes > 1)
{
atc->mpi_comm = pme->mpi_comm_d[dimind];
- MPI_Comm_size(atc->mpi_comm,&atc->nslab);
- MPI_Comm_rank(atc->mpi_comm,&atc->nodeid);
+ MPI_Comm_size(atc->mpi_comm, &atc->nslab);
+ MPI_Comm_rank(atc->mpi_comm, &atc->nodeid);
}
if (debug)
{
- fprintf(debug,"For PME atom communication in dimind %d: nslab %d rank %d\n",atc->dimind,atc->nslab,atc->nodeid);
+ fprintf(debug, "For PME atom communication in dimind %d: nslab %d rank %d\n", atc->dimind, atc->nslab, atc->nodeid);
}
#endif
if (atc->nslab > 1)
{
/* These three allocations are not required for particle decomp. */
- snew(atc->node_dest,atc->nslab);
- snew(atc->node_src,atc->nslab);
+ snew(atc->node_dest, atc->nslab);
+ snew(atc->node_src, atc->nslab);
setup_coordinate_communication(atc);
- snew(atc->count_thread,pme->nthread);
- for(thread=0; thread<pme->nthread; thread++)
+ snew(atc->count_thread, pme->nthread);
+ for (thread = 0; thread < pme->nthread; thread++)
{
- snew(atc->count_thread[thread],atc->nslab);
+ snew(atc->count_thread[thread], atc->nslab);
}
atc->count = atc->count_thread[0];
- snew(atc->rcount,atc->nslab);
- snew(atc->buf_index,atc->nslab);
+ snew(atc->rcount, atc->nslab);
+ snew(atc->buf_index, atc->nslab);
}
atc->nthread = pme->nthread;
if (atc->nthread > 1)
{
- snew(atc->thread_plist,atc->nthread);
+ snew(atc->thread_plist, atc->nthread);
}
- snew(atc->spline,atc->nthread);
- for(thread=0; thread<atc->nthread; thread++)
+ snew(atc->spline, atc->nthread);
+ for (thread = 0; thread < atc->nthread; thread++)
{
if (atc->nthread > 1)
{
- snew(atc->thread_plist[thread].n,atc->nthread+2*GMX_CACHE_SEP);
+ snew(atc->thread_plist[thread].n, atc->nthread+2*GMX_CACHE_SEP);
atc->thread_plist[thread].n += GMX_CACHE_SEP;
}
- snew(atc->spline[thread].thread_one,pme->nthread);
+ snew(atc->spline[thread].thread_one, pme->nthread);
atc->spline[thread].thread_one[thread] = 1;
}
}
int ndata,
int commplainsize)
{
- int lbnd,rbnd,maxlr,b,i;
+ int lbnd, rbnd, maxlr, b, i;
int exten;
- int nn,nk;
+ int nn, nk;
pme_grid_comm_t *pgc;
gmx_bool bCont;
- int fft_start,fft_end,send_index1,recv_index1;
+ int fft_start, fft_end, send_index1, recv_index1;
#ifdef GMX_MPI
MPI_Status stat;
* that belong to higher nodes (modulo nnodes)
*/
- snew(ol->s2g0,ol->nnodes+1);
- snew(ol->s2g1,ol->nnodes);
- if (debug) { fprintf(debug,"PME slab boundaries:"); }
- for(i=0; i<nnodes; i++)
+ snew(ol->s2g0, ol->nnodes+1);
+ snew(ol->s2g1, ol->nnodes);
+ if (debug)
+ {
+ fprintf(debug, "PME slab boundaries:");
+ }
+ for (i = 0; i < nnodes; i++)
{
/* s2g0 the local interpolation grid start.
* s2g1 the local interpolation grid end.
if (debug)
{
- fprintf(debug," %3d %3d",ol->s2g0[i],ol->s2g1[i]);
+ fprintf(debug, " %3d %3d", ol->s2g0[i], ol->s2g1[i]);
}
}
ol->s2g0[nnodes] = ndata;
- if (debug) { fprintf(debug,"\n"); }
+ if (debug)
+ {
+ fprintf(debug, "\n");
+ }
/* Determine with how many nodes we need to communicate the grid overlap */
b = 0;
{
b++;
bCont = FALSE;
- for(i=0; i<nnodes; i++)
+ for (i = 0; i < nnodes; i++)
{
if ((i+b < nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
(i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
while (bCont && b < nnodes);
ol->noverlap_nodes = b - 1;
- snew(ol->send_id,ol->noverlap_nodes);
- snew(ol->recv_id,ol->noverlap_nodes);
- for(b=0; b<ol->noverlap_nodes; b++)
+ snew(ol->send_id, ol->noverlap_nodes);
+ snew(ol->recv_id, ol->noverlap_nodes);
+ for (b = 0; b < ol->noverlap_nodes; b++)
{
ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
snew(ol->comm_data, ol->noverlap_nodes);
ol->send_size = 0;
- for(b=0; b<ol->noverlap_nodes; b++)
+ for (b = 0; b < ol->noverlap_nodes; b++)
{
pgc = &ol->comm_data[b];
/* Send */
fft_start += ndata;
fft_end += ndata;
}
- send_index1 = ol->s2g1[nodeid];
- send_index1 = min(send_index1,fft_end);
- pgc->send_index0 = fft_start;
- pgc->send_nindex = max(0,send_index1 - pgc->send_index0);
+ send_index1 = ol->s2g1[nodeid];
+ send_index1 = min(send_index1, fft_end);
+ pgc->send_index0 = fft_start;
+ pgc->send_nindex = max(0, send_index1 - pgc->send_index0);
ol->send_size += pgc->send_nindex;
/* We always start receiving to the first index of our slab */
{
recv_index1 -= ndata;
}
- recv_index1 = min(recv_index1,fft_end);
+ recv_index1 = min(recv_index1, fft_end);
pgc->recv_index0 = fft_start;
- pgc->recv_nindex = max(0,recv_index1 - pgc->recv_index0);
+ pgc->recv_nindex = max(0, recv_index1 - pgc->recv_index0);
}
#ifdef GMX_MPI
/* Communicate the buffer sizes to receive */
- for(b=0; b<ol->noverlap_nodes; b++)
+ for (b = 0; b < ol->noverlap_nodes; b++)
{
- MPI_Sendrecv(&ol->send_size ,1,MPI_INT,ol->send_id[b],b,
- &ol->comm_data[b].recv_size,1,MPI_INT,ol->recv_id[b],b,
- ol->mpi_comm,&stat);
+ MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->send_id[b], b,
+ &ol->comm_data[b].recv_size, 1, MPI_INT, ol->recv_id[b], b,
+ ol->mpi_comm, &stat);
}
#endif
/* For non-divisible grid we need pme_order iso pme_order-1 */
- snew(ol->sendbuf,norder*commplainsize);
- snew(ol->recvbuf,norder*commplainsize);
+ snew(ol->sendbuf, norder*commplainsize);
+ snew(ol->recvbuf, norder*commplainsize);
}
static void
-make_gridindex5_to_localindex(int n,int local_start,int local_range,
+make_gridindex5_to_localindex(int n, int local_start, int local_range,
int **global_to_local,
real **fraction_shift)
{
int * gtl;
real * fsh;
- snew(gtl,5*n);
- snew(fsh,5*n);
- for(i=0; (i<5*n); i++)
+ snew(gtl, 5*n);
+ snew(fsh, 5*n);
+ for (i = 0; (i < 5*n); i++)
{
/* Determine the global to local grid index */
gtl[i] = (i - local_start + n) % n;
#ifdef PME_SSE
float tmp[8];
__m128 zero_SSE;
- int of,i;
+ int of, i;
- snew_aligned(work,1,16);
+ snew_aligned(work, 1, 16);
zero_SSE = _mm_setzero_ps();
* as we only operate on order of the 8 grid entries that are
* load into 2 SSE float registers.
*/
- for(of=0; of<8-(order-1); of++)
+ for (of = 0; of < 8-(order-1); of++)
{
- for(i=0; i<8; i++)
+ for (i = 0; i < 8; i++)
{
tmp[i] = (i >= of && i < of+order ? 1 : 0);
}
work->mask_SSE0[of] = _mm_loadu_ps(tmp);
work->mask_SSE1[of] = _mm_loadu_ps(tmp+4);
- work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
- work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
+ work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of], zero_SSE);
+ work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of], zero_SSE);
}
#else
work = NULL;
}
static void
-gmx_pme_check_grid_restrictions(FILE *fplog,char dim,int nnodes,int *nk)
+gmx_pme_check_grid_restrictions(FILE *fplog, char dim, int nnodes, int *nk)
{
int nk_new;
if (2*nk_new >= 3*(*nk))
{
- gmx_fatal(FARGS,"The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
- dim,*nk,dim,nnodes,dim);
+ gmx_fatal(FARGS, "The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
+ dim, *nk, dim, nnodes, dim);
}
if (fplog != NULL)
{
- fprintf(fplog,"\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
- dim,*nk,dim,nnodes,dim,nk_new,dim);
+ fprintf(fplog, "\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
+ dim, *nk, dim, nnodes, dim, nk_new, dim);
}
*nk = nk_new;
gmx_bool bReproducible,
int nthread)
{
- gmx_pme_t pme=NULL;
+ gmx_pme_t pme = NULL;
pme_atomcomm_t *atc;
ivec ndata;
if (debug)
- fprintf(debug,"Creating PME data structures.\n");
- snew(pme,1);
+ {
+ fprintf(debug, "Creating PME data structures.\n");
+ }
+ snew(pme, 1);
pme->redist_init = FALSE;
pme->sum_qgrid_tmp = NULL;
{
pme->mpi_comm = cr->mpi_comm_mygroup;
- MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
- MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
+ MPI_Comm_rank(pme->mpi_comm, &pme->nodeid);
+ MPI_Comm_size(pme->mpi_comm, &pme->nnodes);
if (pme->nnodes != nnodes_major*nnodes_minor)
{
gmx_incons("PME node count mismatch");
pme->mpi_comm_d[0] = MPI_COMM_NULL;
pme->mpi_comm_d[1] = MPI_COMM_NULL;
#endif
- pme->ndecompdim = 0;
+ pme->ndecompdim = 0;
pme->nodeid_major = 0;
pme->nodeid_minor = 0;
#ifdef GMX_MPI
pme->mpi_comm_d[0] = pme->mpi_comm;
pme->mpi_comm_d[1] = MPI_COMM_NULL;
#endif
- pme->ndecompdim = 1;
+ pme->ndecompdim = 1;
pme->nodeid_major = pme->nodeid;
pme->nodeid_minor = 0;
pme->mpi_comm_d[0] = MPI_COMM_NULL;
pme->mpi_comm_d[1] = pme->mpi_comm;
#endif
- pme->ndecompdim = 1;
+ pme->ndecompdim = 1;
pme->nodeid_major = 0;
pme->nodeid_minor = pme->nodeid;
}
pme->ndecompdim = 2;
#ifdef GMX_MPI
- MPI_Comm_split(pme->mpi_comm,pme->nodeid % nnodes_minor,
- pme->nodeid,&pme->mpi_comm_d[0]); /* My communicator along major dimension */
- MPI_Comm_split(pme->mpi_comm,pme->nodeid/nnodes_minor,
- pme->nodeid,&pme->mpi_comm_d[1]); /* My communicator along minor dimension */
-
- MPI_Comm_rank(pme->mpi_comm_d[0],&pme->nodeid_major);
- MPI_Comm_size(pme->mpi_comm_d[0],&pme->nnodes_major);
- MPI_Comm_rank(pme->mpi_comm_d[1],&pme->nodeid_minor);
- MPI_Comm_size(pme->mpi_comm_d[1],&pme->nnodes_minor);
+ MPI_Comm_split(pme->mpi_comm, pme->nodeid % nnodes_minor,
+ pme->nodeid, &pme->mpi_comm_d[0]); /* My communicator along major dimension */
+ MPI_Comm_split(pme->mpi_comm, pme->nodeid/nnodes_minor,
+ pme->nodeid, &pme->mpi_comm_d[1]); /* My communicator along minor dimension */
+
+ MPI_Comm_rank(pme->mpi_comm_d[0], &pme->nodeid_major);
+ MPI_Comm_size(pme->mpi_comm_d[0], &pme->nnodes_major);
+ MPI_Comm_rank(pme->mpi_comm_d[1], &pme->nodeid_minor);
+ MPI_Comm_size(pme->mpi_comm_d[1], &pme->nnodes_minor);
#endif
}
pme->bPPnode = (cr->duty & DUTY_PP);
if (ir->ePBC == epbcSCREW)
{
- gmx_fatal(FARGS,"pme does not (yet) work with pbc = screw");
+ gmx_fatal(FARGS, "pme does not (yet) work with pbc = screw");
}
pme->bFEP = ((ir->efep != efepNO) && bFreeEnergy);
if (pme->pme_order > PME_ORDER_MAX)
{
- gmx_fatal(FARGS,"pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
- pme->pme_order,PME_ORDER_MAX);
+ gmx_fatal(FARGS, "pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
+ pme->pme_order, PME_ORDER_MAX);
}
/* Currently pme.c supports only the fft5d FFT code.
* This check should be done before calling gmx_pme_init
* and fplog should be passed iso stderr.
*
- if (pme->ndecompdim >= 2)
- */
+ if (pme->ndecompdim >= 2)
+ */
if (pme->ndecompdim >= 1)
{
/*
- gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
+ gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
'x',nnodes_major,&pme->nkx);
- gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
+ gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
'y',nnodes_minor,&pme->nky);
- */
+ */
}
if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
pme->nkz <= pme->pme_order)
{
- gmx_fatal(FARGS,"The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order",pme->pme_order);
+ gmx_fatal(FARGS, "The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order", pme->pme_order);
}
- if (pme->nnodes > 1) {
+ if (pme->nnodes > 1)
+ {
double imbal;
#ifdef GMX_MPI
" and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
"\n",
(int)((imbal-1)*100 + 0.5),
- pme->nkx,pme->nky,pme->nnodes_major,
- pme->nky,pme->nkz,pme->nnodes_minor);
+ pme->nkx, pme->nky, pme->nnodes_major,
+ pme->nky, pme->nkz, pme->nnodes_minor);
}
}
* y is always copied through a buffer: we don't need padding in z,
* but we do need the overlap in x because of the communication order.
*/
- init_overlap_comm(&pme->overlap[0],pme->pme_order,
+ init_overlap_comm(&pme->overlap[0], pme->pme_order,
#ifdef GMX_MPI
pme->mpi_comm_d[0],
#endif
- pme->nnodes_major,pme->nodeid_major,
+ pme->nnodes_major, pme->nodeid_major,
pme->nkx,
- (div_round_up(pme->nky,pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
+ (div_round_up(pme->nky, pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
/* Along overlap dim 1 we can send in multiple pulses in sum_fftgrid_dd.
* We do this with an offset buffer of equal size, so we need to allocate
* extra for the offset. That's what the (+1)*pme->nkz is for.
*/
- init_overlap_comm(&pme->overlap[1],pme->pme_order,
+ init_overlap_comm(&pme->overlap[1], pme->pme_order,
#ifdef GMX_MPI
pme->mpi_comm_d[1],
#endif
- pme->nnodes_minor,pme->nodeid_minor,
+ pme->nnodes_minor, pme->nodeid_minor,
pme->nky,
- (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order+1)*pme->nkz);
+ (div_round_up(pme->nkx, pme->nnodes_major)+pme->pme_order+1)*pme->nkz);
/* Check for a limitation of the (current) sum_fftgrid_dd code.
* We only allow multiple communication pulses in dim 1, not in dim 0.
if (pme->nthread > 1 && (pme->overlap[0].noverlap_nodes > 1 ||
pme->nkx < pme->nnodes_major*pme->pme_order))
{
- gmx_fatal(FARGS,"The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x and should be >= pme_order (%d). To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
- pme->nkx/(double)pme->nnodes_major,pme->pme_order);
+ gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x and should be >= pme_order (%d). To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
+ pme->nkx/(double)pme->nnodes_major, pme->pme_order);
}
- snew(pme->bsp_mod[XX],pme->nkx);
- snew(pme->bsp_mod[YY],pme->nky);
- snew(pme->bsp_mod[ZZ],pme->nkz);
+ snew(pme->bsp_mod[XX], pme->nkx);
+ snew(pme->bsp_mod[YY], pme->nky);
+ snew(pme->bsp_mod[ZZ], pme->nkz);
/* The required size of the interpolation grid, including overlap.
* The allocated size (pmegrid_n?) might be slightly larger.
*/
pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
- pme->overlap[0].s2g0[pme->nodeid_major];
+ pme->overlap[0].s2g0[pme->nodeid_major];
pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
- pme->overlap[1].s2g0[pme->nodeid_minor];
+ pme->overlap[1].s2g0[pme->nodeid_minor];
pme->pmegrid_nz_base = pme->nkz;
- pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
- set_grid_alignment(&pme->pmegrid_nz,pme->pme_order);
+ pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
+ set_grid_alignment(&pme->pmegrid_nz, pme->pme_order);
pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
make_gridindex5_to_localindex(pme->nkx,
pme->pmegrid_start_ix,
pme->pmegrid_nx - (pme->pme_order-1),
- &pme->nnx,&pme->fshx);
+ &pme->nnx, &pme->fshx);
make_gridindex5_to_localindex(pme->nky,
pme->pmegrid_start_iy,
pme->pmegrid_ny - (pme->pme_order-1),
- &pme->nny,&pme->fshy);
+ &pme->nny, &pme->fshy);
make_gridindex5_to_localindex(pme->nkz,
pme->pmegrid_start_iz,
pme->pmegrid_nz_base,
- &pme->nnz,&pme->fshz);
+ &pme->nnz, &pme->fshz);
pmegrids_init(&pme->pmegridA,
- pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
+ pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz,
pme->pmegrid_nz_base,
pme->pme_order,
pme->nthread,
ndata[2] = pme->nkz;
/* This routine will allocate the grid data to fit the FFTs */
- gmx_parallel_3dfft_init(&pme->pfft_setupA,ndata,
- &pme->fftgridA,&pme->cfftgridA,
+ gmx_parallel_3dfft_init(&pme->pfft_setupA, ndata,
+ &pme->fftgridA, &pme->cfftgridA,
pme->mpi_comm_d,
- pme->overlap[0].s2g0,pme->overlap[1].s2g0,
- bReproducible,pme->nthread);
+ pme->overlap[0].s2g0, pme->overlap[1].s2g0,
+ bReproducible, pme->nthread);
if (bFreeEnergy)
{
pmegrids_init(&pme->pmegridB,
- pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
+ pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz,
pme->pmegrid_nz_base,
pme->pme_order,
pme->nthread,
pme->nkx % pme->nnodes_major != 0,
pme->nky % pme->nnodes_minor != 0);
- gmx_parallel_3dfft_init(&pme->pfft_setupB,ndata,
- &pme->fftgridB,&pme->cfftgridB,
+ gmx_parallel_3dfft_init(&pme->pfft_setupB, ndata,
+ &pme->fftgridB, &pme->cfftgridB,
pme->mpi_comm_d,
- pme->overlap[0].s2g0,pme->overlap[1].s2g0,
- bReproducible,pme->nthread);
+ pme->overlap[0].s2g0, pme->overlap[1].s2g0,
+ bReproducible, pme->nthread);
}
else
{
if (!pme->bP3M)
{
/* Use plain SPME B-spline interpolation */
- make_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
+ make_bspline_moduli(pme->bsp_mod, pme->nkx, pme->nky, pme->nkz, pme->pme_order);
}
else
{
/* Use the P3M grid-optimized influence function */
- make_p3m_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
+ make_p3m_bspline_moduli(pme->bsp_mod, pme->nkx, pme->nky, pme->nkz, pme->pme_order);
}
/* Use atc[0] for spreading */
- init_atomcomm(pme,&pme->atc[0],cr,nnodes_major > 1 ? 0 : 1,TRUE);
+ init_atomcomm(pme, &pme->atc[0], cr, nnodes_major > 1 ? 0 : 1, TRUE);
if (pme->ndecompdim >= 2)
{
- init_atomcomm(pme,&pme->atc[1],cr,1,FALSE);
+ init_atomcomm(pme, &pme->atc[1], cr, 1, FALSE);
}
- if (pme->nnodes == 1) {
+ if (pme->nnodes == 1)
+ {
pme->atc[0].n = homenr;
pme_realloc_atomcomm_things(&pme->atc[0]);
}
/* Use fft5d, order after FFT is y major, z, x minor */
- snew(pme->work,pme->nthread);
- for(thread=0; thread<pme->nthread; thread++)
+ snew(pme->work, pme->nthread);
+ for (thread = 0; thread < pme->nthread; thread++)
{
- realloc_work(&pme->work[thread],pme->nkx);
+ realloc_work(&pme->work[thread], pme->nkx);
}
}
*pmedata = pme;
-
+
return 0;
}
-static void reuse_pmegrids(const pmegrids_t *old,pmegrids_t *new)
+static void reuse_pmegrids(const pmegrids_t *old, pmegrids_t *new)
{
- int d,t;
+ int d, t;
- for(d=0; d<DIM; d++)
+ for (d = 0; d < DIM; d++)
{
if (new->grid.n[d] > old->grid.n[d])
{
if (new->nthread > 1 && new->nthread == old->nthread)
{
sfree_aligned(new->grid_all);
- for(t=0; t<new->nthread; t++)
+ for (t = 0; t < new->nthread; t++)
{
new->grid_th[t].grid = old->grid_th[t].grid;
}
int homenr;
int ret;
- irc = *ir;
+ irc = *ir;
irc.nkx = grid_size[XX];
irc.nky = grid_size[YY];
irc.nkz = grid_size[ZZ];
homenr = -1;
}
- ret = gmx_pme_init(pmedata,cr,pme_src->nnodes_major,pme_src->nnodes_minor,
- &irc,homenr,pme_src->bFEP,FALSE,pme_src->nthread);
+ ret = gmx_pme_init(pmedata, cr, pme_src->nnodes_major, pme_src->nnodes_minor,
+ &irc, homenr, pme_src->bFEP, FALSE, pme_src->nthread);
if (ret == 0)
{
/* We can easily reuse the allocated pme grids in pme_src */
- reuse_pmegrids(&pme_src->pmegridA,&(*pmedata)->pmegridA);
+ reuse_pmegrids(&pme_src->pmegridA, &(*pmedata)->pmegridA);
/* We would like to reuse the fft grids, but that's harder */
}
static void copy_local_grid(gmx_pme_t pme,
- pmegrids_t *pmegrids,int thread,real *fftgrid)
+ pmegrids_t *pmegrids, int thread, real *fftgrid)
{
- ivec local_fft_ndata,local_fft_offset,local_fft_size;
- int fft_my,fft_mz;
- int nsx,nsy,nsz;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
+ int fft_my, fft_mz;
+ int nsx, nsy, nsz;
ivec nf;
- int offx,offy,offz,x,y,z,i0,i0t;
+ int offx, offy, offz, x, y, z, i0, i0t;
int d;
pmegrid_t *pmegrid;
real *grid_th;
nsy = pmegrid->s[YY];
nsz = pmegrid->s[ZZ];
- for(d=0; d<DIM; d++)
+ for (d = 0; d < DIM; d++)
{
nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
local_fft_ndata[d] - pmegrid->offset[d]);
* This also initializes the full grid.
*/
grid_th = pmegrid->grid;
- for(x=0; x<nf[XX]; x++)
+ for (x = 0; x < nf[XX]; x++)
{
- for(y=0; y<nf[YY]; y++)
+ for (y = 0; y < nf[YY]; y++)
{
i0 = ((offx + x)*fft_my + (offy + y))*fft_mz + offz;
i0t = (x*nsy + y)*nsz;
- for(z=0; z<nf[ZZ]; z++)
+ for (z = 0; z < nf[ZZ]; z++)
{
fftgrid[i0+z] = grid_th[i0t+z];
}
static void
reduce_threadgrid_overlap(gmx_pme_t pme,
- const pmegrids_t *pmegrids,int thread,
- real *fftgrid,real *commbuf_x,real *commbuf_y)
+ const pmegrids_t *pmegrids, int thread,
+ real *fftgrid, real *commbuf_x, real *commbuf_y)
{
- ivec local_fft_ndata,local_fft_offset,local_fft_size;
- int fft_nx,fft_ny,fft_nz;
- int fft_my,fft_mz;
- int buf_my=-1;
- int nsx,nsy,nsz;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
+ int fft_nx, fft_ny, fft_nz;
+ int fft_my, fft_mz;
+ int buf_my = -1;
+ int nsx, nsy, nsz;
ivec ne;
- int offx,offy,offz,x,y,z,i0,i0t;
- int sx,sy,sz,fx,fy,fz,tx1,ty1,tz1,ox,oy,oz;
- gmx_bool bClearBufX,bClearBufY,bClearBufXY,bClearBuf;
- gmx_bool bCommX,bCommY;
+ int offx, offy, offz, x, y, z, i0, i0t;
+ int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
+ gmx_bool bClearBufX, bClearBufY, bClearBufXY, bClearBuf;
+ gmx_bool bCommX, bCommY;
int d;
int thread_f;
- const pmegrid_t *pmegrid,*pmegrid_g,*pmegrid_f;
+ const pmegrid_t *pmegrid, *pmegrid_g, *pmegrid_f;
const real *grid_th;
- real *commbuf=NULL;
+ real *commbuf = NULL;
gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
local_fft_ndata,
*/
pmegrid = &pmegrids->grid_th[thread];
- for(d=0; d<DIM; d++)
+ for (d = 0; d < DIM; d++)
{
ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
local_fft_ndata[d]);
/* Note that ffy_nx/y is equal to the number of grid points
* between the first point of our node grid and the one of the next node.
*/
- for(sx=0; sx>=-pmegrids->nthread_comm[XX]; sx--)
+ for (sx = 0; sx >= -pmegrids->nthread_comm[XX]; sx--)
{
- fx = pmegrid->ci[XX] + sx;
- ox = 0;
+ fx = pmegrid->ci[XX] + sx;
+ ox = 0;
bCommX = FALSE;
- if (fx < 0) {
- fx += pmegrids->nc[XX];
- ox -= fft_nx;
+ if (fx < 0)
+ {
+ fx += pmegrids->nc[XX];
+ ox -= fft_nx;
bCommX = (pme->nnodes_major > 1);
}
pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
- ox += pmegrid_g->offset[XX];
+ ox += pmegrid_g->offset[XX];
if (!bCommX)
{
- tx1 = min(ox + pmegrid_g->n[XX],ne[XX]);
+ tx1 = min(ox + pmegrid_g->n[XX], ne[XX]);
}
else
{
- tx1 = min(ox + pmegrid_g->n[XX],pme->pme_order);
+ tx1 = min(ox + pmegrid_g->n[XX], pme->pme_order);
}
- for(sy=0; sy>=-pmegrids->nthread_comm[YY]; sy--)
+ for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--)
{
- fy = pmegrid->ci[YY] + sy;
- oy = 0;
+ fy = pmegrid->ci[YY] + sy;
+ oy = 0;
bCommY = FALSE;
- if (fy < 0) {
- fy += pmegrids->nc[YY];
- oy -= fft_ny;
+ if (fy < 0)
+ {
+ fy += pmegrids->nc[YY];
+ oy -= fft_ny;
bCommY = (pme->nnodes_minor > 1);
}
pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
- oy += pmegrid_g->offset[YY];
+ oy += pmegrid_g->offset[YY];
if (!bCommY)
{
- ty1 = min(oy + pmegrid_g->n[YY],ne[YY]);
+ ty1 = min(oy + pmegrid_g->n[YY], ne[YY]);
}
else
{
- ty1 = min(oy + pmegrid_g->n[YY],pme->pme_order);
+ ty1 = min(oy + pmegrid_g->n[YY], pme->pme_order);
}
- for(sz=0; sz>=-pmegrids->nthread_comm[ZZ]; sz--)
+ for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--)
{
fz = pmegrid->ci[ZZ] + sz;
oz = 0;
oz -= fft_nz;
}
pmegrid_g = &pmegrids->grid_th[fz];
- oz += pmegrid_g->offset[ZZ];
- tz1 = min(oz + pmegrid_g->n[ZZ],ne[ZZ]);
+ oz += pmegrid_g->offset[ZZ];
+ tz1 = min(oz + pmegrid_g->n[ZZ], ne[ZZ]);
if (sx == 0 && sy == 0 && sz == 0)
{
#ifdef DEBUG_PME_REDUCE
printf("n%d t%d add %d %2d %2d %2d %2d %2d %2d %2d-%2d %2d-%2d, %2d-%2d %2d-%2d, %2d-%2d %2d-%2d\n",
- pme->nodeid,thread,thread_f,
+ pme->nodeid, thread, thread_f,
pme->pmegrid_start_ix,
pme->pmegrid_start_iy,
pme->pmegrid_start_iz,
- sx,sy,sz,
- offx-ox,tx1-ox,offx,tx1,
- offy-oy,ty1-oy,offy,ty1,
- offz-oz,tz1-oz,offz,tz1);
+ sx, sy, sz,
+ offx-ox, tx1-ox, offx, tx1,
+ offy-oy, ty1-oy, offy, ty1,
+ offz-oz, tz1-oz, offz, tz1);
#endif
if (!(bCommX || bCommY))
{
/* Copy from the thread local grid to the node grid */
- for(x=offx; x<tx1; x++)
+ for (x = offx; x < tx1; x++)
{
- for(y=offy; y<ty1; y++)
+ for (y = offy; y < ty1; y++)
{
i0 = (x*fft_my + y)*fft_mz;
i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
- for(z=offz; z<tz1; z++)
+ for (z = offz; z < tz1; z++)
{
fftgrid[i0+z] += grid_th[i0t+z];
}
/* We index commbuf modulo the local grid size */
commbuf += buf_my*fft_nx*fft_nz;
- bClearBuf = bClearBufXY;
+ bClearBuf = bClearBufXY;
bClearBufXY = FALSE;
}
else
}
else
{
- commbuf = commbuf_x;
- buf_my = fft_ny;
+ commbuf = commbuf_x;
+ buf_my = fft_ny;
bClearBuf = bClearBufX;
bClearBufX = FALSE;
}
/* Copy to the communication buffer */
- for(x=offx; x<tx1; x++)
+ for (x = offx; x < tx1; x++)
{
- for(y=offy; y<ty1; y++)
+ for (y = offy; y < ty1; y++)
{
i0 = (x*buf_my + y)*fft_nz;
i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
if (bClearBuf)
{
/* First access of commbuf, initialize it */
- for(z=offz; z<tz1; z++)
+ for (z = offz; z < tz1; z++)
{
commbuf[i0+z] = grid_th[i0t+z];
}
}
else
{
- for(z=offz; z<tz1; z++)
+ for (z = offz; z < tz1; z++)
{
commbuf[i0+z] += grid_th[i0t+z];
}
}
-static void sum_fftgrid_dd(gmx_pme_t pme,real *fftgrid)
+static void sum_fftgrid_dd(gmx_pme_t pme, real *fftgrid)
{
- ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
pme_overlap_t *overlap;
- int send_index0,send_nindex;
+ int send_index0, send_nindex;
int recv_nindex;
#ifdef GMX_MPI
MPI_Status stat;
#endif
- int send_size_y,recv_size_y;
- int ipulse,send_id,recv_id,datasize,gridsize,size_yx;
- real *sendptr,*recvptr;
- int x,y,z,indg,indb;
+ int send_size_y, recv_size_y;
+ int ipulse, send_id, recv_id, datasize, gridsize, size_yx;
+ real *sendptr, *recvptr;
+ int x, y, z, indg, indb;
/* Note that this routine is only used for forward communication.
* Since the force gathering, unlike the charge spreading,
if (pme->nnodes_major > 1)
{
- size_yx = pme->overlap[0].comm_data[0].send_nindex;
+ size_yx = pme->overlap[0].comm_data[0].send_nindex;
}
else
{
send_size_y = overlap->send_size;
- for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
+ for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
{
- send_id = overlap->send_id[ipulse];
- recv_id = overlap->recv_id[ipulse];
+ send_id = overlap->send_id[ipulse];
+ recv_id = overlap->recv_id[ipulse];
send_index0 =
overlap->comm_data[ipulse].send_index0 -
overlap->comm_data[0].send_index0;
recvptr = overlap->recvbuf;
#ifdef GMX_MPI
- MPI_Sendrecv(sendptr,send_size_y*datasize,GMX_MPI_REAL,
- send_id,ipulse,
- recvptr,recv_size_y*datasize,GMX_MPI_REAL,
- recv_id,ipulse,
- overlap->mpi_comm,&stat);
+ MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
+ send_id, ipulse,
+ recvptr, recv_size_y*datasize, GMX_MPI_REAL,
+ recv_id, ipulse,
+ overlap->mpi_comm, &stat);
#endif
- for(x=0; x<local_fft_ndata[XX]; x++)
+ for (x = 0; x < local_fft_ndata[XX]; x++)
{
- for(y=0; y<recv_nindex; y++)
+ for (y = 0; y < recv_nindex; y++)
{
indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
indb = (x*recv_size_y + y)*local_fft_ndata[ZZ];
- for(z=0; z<local_fft_ndata[ZZ]; z++)
+ for (z = 0; z < local_fft_ndata[ZZ]; z++)
{
fftgrid[indg+z] += recvptr[indb+z];
}
{
/* Copy from the received buffer to the send buffer for dim 0 */
sendptr = pme->overlap[0].sendbuf;
- for(x=0; x<size_yx; x++)
+ for (x = 0; x < size_yx; x++)
{
- for(y=0; y<recv_nindex; y++)
+ for (y = 0; y < recv_nindex; y++)
{
indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
indb = ((local_fft_ndata[XX] + x)*recv_size_y + y)*local_fft_ndata[ZZ];
- for(z=0; z<local_fft_ndata[ZZ]; z++)
+ for (z = 0; z < local_fft_ndata[ZZ]; z++)
{
sendptr[indg+z] += recvptr[indb+z];
}
ipulse = 0;
- send_id = overlap->send_id[ipulse];
- recv_id = overlap->recv_id[ipulse];
+ send_id = overlap->send_id[ipulse];
+ recv_id = overlap->recv_id[ipulse];
send_nindex = overlap->comm_data[ipulse].send_nindex;
/* We don't use recv_index0, as we always receive starting at 0 */
recv_nindex = overlap->comm_data[ipulse].recv_nindex;
if (debug != NULL)
{
- fprintf(debug,"PME fftgrid comm %2d x %2d x %2d\n",
- send_nindex,local_fft_ndata[YY],local_fft_ndata[ZZ]);
+ fprintf(debug, "PME fftgrid comm %2d x %2d x %2d\n",
+ send_nindex, local_fft_ndata[YY], local_fft_ndata[ZZ]);
}
#ifdef GMX_MPI
- MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
- send_id,ipulse,
- recvptr,recv_nindex*datasize,GMX_MPI_REAL,
- recv_id,ipulse,
- overlap->mpi_comm,&stat);
+ MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
+ send_id, ipulse,
+ recvptr, recv_nindex*datasize, GMX_MPI_REAL,
+ recv_id, ipulse,
+ overlap->mpi_comm, &stat);
#endif
- for(x=0; x<recv_nindex; x++)
+ for (x = 0; x < recv_nindex; x++)
{
- for(y=0; y<local_fft_ndata[YY]; y++)
+ for (y = 0; y < local_fft_ndata[YY]; y++)
{
indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
- for(z=0; z<local_fft_ndata[ZZ]; z++)
+ for (z = 0; z < local_fft_ndata[ZZ]; z++)
{
fftgrid[indg+z] += recvptr[indb+z];
}
static void spread_on_grid(gmx_pme_t pme,
- pme_atomcomm_t *atc,pmegrids_t *grids,
- gmx_bool bCalcSplines,gmx_bool bSpread,
+ pme_atomcomm_t *atc, pmegrids_t *grids,
+ gmx_bool bCalcSplines, gmx_bool bSpread,
real *fftgrid)
{
- int nthread,thread;
+ int nthread, thread;
#ifdef PME_TIME_THREADS
- gmx_cycles_t c1,c2,c3,ct1a,ct1b,ct1c;
- static double cs1=0,cs2=0,cs3=0;
- static double cs1a[6]={0,0,0,0,0,0};
- static int cnt=0;
+ gmx_cycles_t c1, c2, c3, ct1a, ct1b, ct1c;
+ static double cs1 = 0, cs2 = 0, cs3 = 0;
+ static double cs1a[6] = {0, 0, 0, 0, 0, 0};
+ static int cnt = 0;
#endif
nthread = pme->nthread;
- assert(nthread>0);
+ assert(nthread > 0);
#ifdef PME_TIME_THREADS
c1 = omp_cyc_start();
if (bCalcSplines)
{
#pragma omp parallel for num_threads(nthread) schedule(static)
- for(thread=0; thread<nthread; thread++)
+ for (thread = 0; thread < nthread; thread++)
{
- int start,end;
+ int start, end;
start = atc->n* thread /nthread;
end = atc->n*(thread+1)/nthread;
/* Compute fftgrid index for all atoms,
* with help of some extra variables.
*/
- calc_interpolation_idx(pme,atc,start,end,thread);
+ calc_interpolation_idx(pme, atc, start, end, thread);
}
}
#ifdef PME_TIME_THREADS
- c1 = omp_cyc_end(c1);
+ c1 = omp_cyc_end(c1);
cs1 += (double)c1;
#endif
c2 = omp_cyc_start();
#endif
#pragma omp parallel for num_threads(nthread) schedule(static)
- for(thread=0; thread<nthread; thread++)
+ for (thread = 0; thread < nthread; thread++)
{
splinedata_t *spline;
pmegrid_t *grid;
{
spline = &atc->spline[thread];
- make_thread_local_ind(atc,thread,spline);
+ make_thread_local_ind(atc, thread, spline);
grid = &grids->grid_th[thread];
}
if (bCalcSplines)
{
- make_bsplines(spline->theta,spline->dtheta,pme->pme_order,
- atc->fractx,spline->n,spline->ind,atc->q,pme->bFEP);
+ make_bsplines(spline->theta, spline->dtheta, pme->pme_order,
+ atc->fractx, spline->n, spline->ind, atc->q, pme->bFEP);
}
if (bSpread)
#ifdef PME_TIME_SPREAD
ct1a = omp_cyc_start();
#endif
- spread_q_bsplines_thread(grid,atc,spline,pme->spline_work);
+ spread_q_bsplines_thread(grid, atc, spline, pme->spline_work);
if (grids->nthread > 1)
{
- copy_local_grid(pme,grids,thread,fftgrid);
+ copy_local_grid(pme, grids, thread, fftgrid);
}
#ifdef PME_TIME_SPREAD
- ct1a = omp_cyc_end(ct1a);
+ ct1a = omp_cyc_end(ct1a);
cs1a[thread] += (double)ct1a;
#endif
}
}
#ifdef PME_TIME_THREADS
- c2 = omp_cyc_end(c2);
+ c2 = omp_cyc_end(c2);
cs2 += (double)c2;
#endif
c3 = omp_cyc_start();
#endif
#pragma omp parallel for num_threads(grids->nthread) schedule(static)
- for(thread=0; thread<grids->nthread; thread++)
+ for (thread = 0; thread < grids->nthread; thread++)
{
- reduce_threadgrid_overlap(pme,grids,thread,
+ reduce_threadgrid_overlap(pme, grids, thread,
fftgrid,
pme->overlap[0].sendbuf,
pme->overlap[1].sendbuf);
}
#ifdef PME_TIME_THREADS
- c3 = omp_cyc_end(c3);
+ c3 = omp_cyc_end(c3);
cs3 += (double)c3;
#endif
if (pme->nnodes > 1)
{
/* Communicate the overlapping part of the fftgrid */
- sum_fftgrid_dd(pme,fftgrid);
+ sum_fftgrid_dd(pme, fftgrid);
}
}
if (cnt % 20 == 0)
{
printf("idx %.2f spread %.2f red %.2f",
- cs1*1e-9,cs2*1e-9,cs3*1e-9);
+ cs1*1e-9, cs2*1e-9, cs3*1e-9);
#ifdef PME_TIME_SPREAD
- for(thread=0; thread<nthread; thread++)
- printf(" %.2f",cs1a[thread]*1e-9);
+ for (thread = 0; thread < nthread; thread++)
+ {
+ printf(" %.2f", cs1a[thread]*1e-9);
+ }
#endif
printf("\n");
}
static void dump_grid(FILE *fp,
- int sx,int sy,int sz,int nx,int ny,int nz,
- int my,int mz,const real *g)
+ int sx, int sy, int sz, int nx, int ny, int nz,
+ int my, int mz, const real *g)
{
- int x,y,z;
+ int x, y, z;
- for(x=0; x<nx; x++)
+ for (x = 0; x < nx; x++)
{
- for(y=0; y<ny; y++)
+ for (y = 0; y < ny; y++)
{
- for(z=0; z<nz; z++)
+ for (z = 0; z < nz; z++)
{
- fprintf(fp,"%2d %2d %2d %6.3f\n",
- sx+x,sy+y,sz+z,g[(x*my + y)*mz + z]);
+ fprintf(fp, "%2d %2d %2d %6.3f\n",
+ sx+x, sy+y, sz+z, g[(x*my + y)*mz + z]);
}
}
}
}
-static void dump_local_fftgrid(gmx_pme_t pme,const real *fftgrid)
+static void dump_local_fftgrid(gmx_pme_t pme, const real *fftgrid)
{
- ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ ivec local_fft_ndata, local_fft_offset, local_fft_size;
gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
local_fft_ndata,
}
-void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
+void gmx_pme_calc_energy(gmx_pme_t pme, int n, rvec *x, real *q, real *V)
{
pme_atomcomm_t *atc;
pmegrids_t *grid;
gmx_incons("gmx_pme_calc_energy with free energy");
}
- atc = &pme->atc_energy;
+ atc = &pme->atc_energy;
atc->nthread = 1;
if (atc->spline == NULL)
{
- snew(atc->spline,atc->nthread);
+ snew(atc->spline, atc->nthread);
}
atc->nslab = 1;
atc->bSpread = TRUE;
/* We only use the A-charges grid */
grid = &pme->pmegridA;
- spread_on_grid(pme,atc,NULL,TRUE,FALSE,pme->fftgridA);
+ spread_on_grid(pme, atc, NULL, TRUE, FALSE, pme->fftgridA);
- *V = gather_energy_bsplines(pme,grid->grid.grid,atc);
+ *V = gather_energy_bsplines(pme, grid->grid.grid, atc);
}
-static void reset_pmeonly_counters(t_commrec *cr,gmx_wallcycle_t wcycle,
- t_nrnb *nrnb,t_inputrec *ir, gmx_large_int_t step_rel)
+static void reset_pmeonly_counters(t_commrec *cr, gmx_wallcycle_t wcycle,
+ t_nrnb *nrnb, t_inputrec *ir, gmx_large_int_t step_rel)
{
/* Reset all the counters related to performance over the run */
- wallcycle_stop(wcycle,ewcRUN);
+ wallcycle_stop(wcycle, ewcRUN);
wallcycle_reset_all(wcycle);
init_nrnb(nrnb);
ir->init_step += step_rel;
ir->nsteps -= step_rel;
- wallcycle_start(wcycle,ewcRUN);
+ wallcycle_start(wcycle, ewcRUN);
}
}
(*npmedata)++;
- srenew(*pmedata,*npmedata);
+ srenew(*pmedata, *npmedata);
/* Generate a new PME data structure, copying part of the old pointers */
- gmx_pme_reinit(&((*pmedata)[ind]),cr,pme,ir,grid_size);
+ gmx_pme_reinit(&((*pmedata)[ind]), cr, pme, ir, grid_size);
*pme_ret = (*pmedata)[ind];
}
gmx_pme_pp_t pme_pp;
int natoms;
matrix box;
- rvec *x_pp=NULL,*f_pp=NULL;
- real *chargeA=NULL,*chargeB=NULL;
- real lambda=0;
- int maxshift_x=0,maxshift_y=0;
- real energy,dvdlambda;
+ rvec *x_pp = NULL, *f_pp = NULL;
+ real *chargeA = NULL, *chargeB = NULL;
+ real lambda = 0;
+ int maxshift_x = 0, maxshift_y = 0;
+ real energy, dvdlambda;
matrix vir;
float cycles;
int count;
gmx_bool bEnerVir;
- gmx_large_int_t step,step_rel;
+ gmx_large_int_t step, step_rel;
ivec grid_switch;
/* This data will only use with PME tuning, i.e. switching PME grids */
npmedata = 1;
- snew(pmedata,npmedata);
+ snew(pmedata, npmedata);
pmedata[0] = pme;
pme_pp = gmx_pme_pp_init(cr);
{
/* Domain decomposition */
natoms = gmx_pme_recv_q_x(pme_pp,
- &chargeA,&chargeB,box,&x_pp,&f_pp,
- &maxshift_x,&maxshift_y,
- &pme->bFEP,&lambda,
+ &chargeA, &chargeB, box, &x_pp, &f_pp,
+ &maxshift_x, &maxshift_y,
+ &pme->bFEP, &lambda,
&bEnerVir,
&step,
- grid_switch,&ewaldcoeff);
+ grid_switch, &ewaldcoeff);
if (natoms == -2)
{
/* Switch the PME grid to grid_switch */
- gmx_pmeonly_switch(&npmedata,&pmedata,grid_switch,cr,ir,&pme);
+ gmx_pmeonly_switch(&npmedata, &pmedata, grid_switch, cr, ir, &pme);
}
}
while (natoms == -2);
step_rel = step - ir->init_step;
if (count == 0)
- wallcycle_start(wcycle,ewcRUN);
+ {
+ wallcycle_start(wcycle, ewcRUN);
+ }
- wallcycle_start(wcycle,ewcPMEMESH);
+ wallcycle_start(wcycle, ewcPMEMESH);
dvdlambda = 0;
clear_mat(vir);
- gmx_pme_do(pme,0,natoms,x_pp,f_pp,chargeA,chargeB,box,
- cr,maxshift_x,maxshift_y,nrnb,wcycle,vir,ewaldcoeff,
- &energy,lambda,&dvdlambda,
+ gmx_pme_do(pme, 0, natoms, x_pp, f_pp, chargeA, chargeB, box,
+ cr, maxshift_x, maxshift_y, nrnb, wcycle, vir, ewaldcoeff,
+ &energy, lambda, &dvdlambda,
GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));
- cycles = wallcycle_stop(wcycle,ewcPMEMESH);
+ cycles = wallcycle_stop(wcycle, ewcPMEMESH);
gmx_pme_send_force_vir_ener(pme_pp,
- f_pp,vir,energy,dvdlambda,
+ f_pp, vir, energy, dvdlambda,
cycles);
count++;
if (step_rel == wcycle_get_reset_counters(wcycle))
{
/* Reset all the counters related to performance over the run */
- reset_pmeonly_counters(cr,wcycle,nrnb,ir,step_rel);
+ reset_pmeonly_counters(cr, wcycle, nrnb, ir, step_rel);
wcycle_set_reset_counters(wcycle, 0);
}
real *energy, real lambda,
real *dvdlambda, int flags)
{
- int q,d,i,j,ntot,npme;
- int nx,ny,nz;
- int n_d,local_ny;
- pme_atomcomm_t *atc=NULL;
- pmegrids_t *pmegrid=NULL;
- real *grid=NULL;
+ int q, d, i, j, ntot, npme;
+ int nx, ny, nz;
+ int n_d, local_ny;
+ pme_atomcomm_t *atc = NULL;
+ pmegrids_t *pmegrid = NULL;
+ real *grid = NULL;
real *ptr;
- rvec *x_d,*f_d;
- real *charge=NULL,*q_d;
+ rvec *x_d, *f_d;
+ real *charge = NULL, *q_d;
real energy_AB[2];
matrix vir_AB[2];
gmx_bool bClearF;
t_complex * cfftgrid;
int thread;
const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
- const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
+ const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
assert(pme->nnodes > 0);
assert(pme->nnodes == 1 || pme->ndecompdim > 0);
- if (pme->nnodes > 1) {
- atc = &pme->atc[0];
+ if (pme->nnodes > 1)
+ {
+ atc = &pme->atc[0];
atc->npd = homenr;
- if (atc->npd > atc->pd_nalloc) {
+ if (atc->npd > atc->pd_nalloc)
+ {
atc->pd_nalloc = over_alloc_dd(atc->npd);
- srenew(atc->pd,atc->pd_nalloc);
+ srenew(atc->pd, atc->pd_nalloc);
}
- atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
+ atc->maxshift = (atc->dimind == 0 ? maxshift_x : maxshift_y);
}
else
{
pme->atc[0].n = homenr;
}
- for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
- if (q == 0) {
- pmegrid = &pme->pmegridA;
- fftgrid = pme->fftgridA;
- cfftgrid = pme->cfftgridA;
+ for (q = 0; q < (pme->bFEP ? 2 : 1); q++)
+ {
+ if (q == 0)
+ {
+ pmegrid = &pme->pmegridA;
+ fftgrid = pme->fftgridA;
+ cfftgrid = pme->cfftgridA;
pfft_setup = pme->pfft_setupA;
- charge = chargeA+start;
- } else {
- pmegrid = &pme->pmegridB;
- fftgrid = pme->fftgridB;
- cfftgrid = pme->cfftgridB;
+ charge = chargeA+start;
+ }
+ else
+ {
+ pmegrid = &pme->pmegridB;
+ fftgrid = pme->fftgridB;
+ cfftgrid = pme->cfftgridB;
pfft_setup = pme->pfft_setupB;
- charge = chargeB+start;
+ charge = chargeB+start;
}
grid = pmegrid->grid.grid;
/* Unpack structure */
- if (debug) {
- fprintf(debug,"PME: nnodes = %d, nodeid = %d\n",
- cr->nnodes,cr->nodeid);
- fprintf(debug,"Grid = %p\n",(void*)grid);
+ if (debug)
+ {
+ fprintf(debug, "PME: nnodes = %d, nodeid = %d\n",
+ cr->nnodes, cr->nodeid);
+ fprintf(debug, "Grid = %p\n", (void*)grid);
if (grid == NULL)
- gmx_fatal(FARGS,"No grid!");
+ {
+ gmx_fatal(FARGS, "No grid!");
+ }
}
where();
- m_inv_ur0(box,pme->recipbox);
+ m_inv_ur0(box, pme->recipbox);
- if (pme->nnodes == 1) {
+ if (pme->nnodes == 1)
+ {
atc = &pme->atc[0];
- if (DOMAINDECOMP(cr)) {
+ if (DOMAINDECOMP(cr))
+ {
atc->n = homenr;
pme_realloc_atomcomm_things(atc);
}
atc->x = x;
atc->q = charge;
atc->f = f;
- } else {
- wallcycle_start(wcycle,ewcPME_REDISTXF);
- for(d=pme->ndecompdim-1; d>=0; d--)
+ }
+ else
+ {
+ wallcycle_start(wcycle, ewcPME_REDISTXF);
+ for (d = pme->ndecompdim-1; d >= 0; d--)
{
if (d == pme->ndecompdim-1)
{
x_d = atc->x;
q_d = atc->q;
}
- atc = &pme->atc[d];
+ atc = &pme->atc[d];
atc->npd = n_d;
- if (atc->npd > atc->pd_nalloc) {
+ if (atc->npd > atc->pd_nalloc)
+ {
atc->pd_nalloc = over_alloc_dd(atc->npd);
- srenew(atc->pd,atc->pd_nalloc);
+ srenew(atc->pd, atc->pd_nalloc);
}
- atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
- pme_calc_pidx_wrapper(n_d,pme->recipbox,x_d,atc);
+ atc->maxshift = (atc->dimind == 0 ? maxshift_x : maxshift_y);
+ pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc);
where();
/* Redistribute x (only once) and qA or qB */
- if (DOMAINDECOMP(cr)) {
- dd_pmeredist_x_q(pme, n_d, q==0, x_d, q_d, atc);
- } else {
- pmeredist_pd(pme, TRUE, n_d, q==0, x_d, q_d, atc);
+ if (DOMAINDECOMP(cr))
+ {
+ dd_pmeredist_x_q(pme, n_d, q == 0, x_d, q_d, atc);
+ }
+ else
+ {
+ pmeredist_pd(pme, TRUE, n_d, q == 0, x_d, q_d, atc);
}
}
where();
- wallcycle_stop(wcycle,ewcPME_REDISTXF);
+ wallcycle_stop(wcycle, ewcPME_REDISTXF);
}
if (debug)
- fprintf(debug,"Node= %6d, pme local particles=%6d\n",
- cr->nodeid,atc->n);
+ {
+ fprintf(debug, "Node= %6d, pme local particles=%6d\n",
+ cr->nodeid, atc->n);
+ }
if (flags & GMX_PME_SPREAD_Q)
{
- wallcycle_start(wcycle,ewcPME_SPREADGATHER);
+ wallcycle_start(wcycle, ewcPME_SPREADGATHER);
/* Spread the charges on a grid */
- spread_on_grid(pme,&pme->atc[0],pmegrid,q==0,TRUE,fftgrid);
+ spread_on_grid(pme, &pme->atc[0], pmegrid, q == 0, TRUE, fftgrid);
if (q == 0)
{
- inc_nrnb(nrnb,eNR_WEIGHTS,DIM*atc->n);
+ inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
}
- inc_nrnb(nrnb,eNR_SPREADQBSP,
+ inc_nrnb(nrnb, eNR_SPREADQBSP,
pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
if (pme->nthread == 1)
{
- wrap_periodic_pmegrid(pme,grid);
+ wrap_periodic_pmegrid(pme, grid);
/* sum contributions to local grid from other nodes */
#ifdef GMX_MPI
if (pme->nnodes > 1)
{
- gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_FORWARD);
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_QGRID_FORWARD);
where();
}
#endif
- copy_pmegrid_to_fftgrid(pme,grid,fftgrid);
+ copy_pmegrid_to_fftgrid(pme, grid, fftgrid);
}
- wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
+ wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
/*
- dump_local_fftgrid(pme,fftgrid);
- exit(0);
- */
+ dump_local_fftgrid(pme,fftgrid);
+ exit(0);
+ */
}
/* Here we start a large thread parallel region */
#pragma omp parallel num_threads(pme->nthread) private(thread)
{
- thread=gmx_omp_get_thread_num();
+ thread = gmx_omp_get_thread_num();
if (flags & GMX_PME_SOLVE)
{
int loop_count;
/* do 3d-fft */
if (thread == 0)
{
- wallcycle_start(wcycle,ewcPME_FFT);
+ wallcycle_start(wcycle, ewcPME_FFT);
}
- gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,
- fftgrid,cfftgrid,thread,wcycle);
+ gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
+ fftgrid, cfftgrid, thread, wcycle);
if (thread == 0)
{
- wallcycle_stop(wcycle,ewcPME_FFT);
+ wallcycle_stop(wcycle, ewcPME_FFT);
}
where();
/* solve in k-space for our local cells */
if (thread == 0)
{
- wallcycle_start(wcycle,ewcPME_SOLVE);
+ wallcycle_start(wcycle, ewcPME_SOLVE);
}
loop_count =
- solve_pme_yzx(pme,cfftgrid,ewaldcoeff,
+ solve_pme_yzx(pme, cfftgrid, ewaldcoeff,
box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
bCalcEnerVir,
- pme->nthread,thread);
+ pme->nthread, thread);
if (thread == 0)
{
- wallcycle_stop(wcycle,ewcPME_SOLVE);
+ wallcycle_stop(wcycle, ewcPME_SOLVE);
where();
- inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
+ inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
}
}
if (thread == 0)
{
where();
- wallcycle_start(wcycle,ewcPME_FFT);
+ wallcycle_start(wcycle, ewcPME_FFT);
}
- gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,
- cfftgrid,fftgrid,thread,wcycle);
+ gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
+ cfftgrid, fftgrid, thread, wcycle);
if (thread == 0)
{
- wallcycle_stop(wcycle,ewcPME_FFT);
-
+ wallcycle_stop(wcycle, ewcPME_FFT);
+
where();
if (pme->nodeid == 0)
{
- ntot = pme->nkx*pme->nky*pme->nkz;
+ ntot = pme->nkx*pme->nky*pme->nkz;
npme = ntot*log((real)ntot)/log(2.0);
- inc_nrnb(nrnb,eNR_FFT,2*npme);
+ inc_nrnb(nrnb, eNR_FFT, 2*npme);
}
- wallcycle_start(wcycle,ewcPME_SPREADGATHER);
+ wallcycle_start(wcycle, ewcPME_SPREADGATHER);
}
- copy_fftgrid_to_pmegrid(pme,fftgrid,grid,pme->nthread,thread);
+ copy_fftgrid_to_pmegrid(pme, fftgrid, grid, pme->nthread, thread);
}
}
/* End of thread parallel section.
{
/* distribute local grid to all nodes */
#ifdef GMX_MPI
- if (pme->nnodes > 1) {
- gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_BACKWARD);
+ if (pme->nnodes > 1)
+ {
+ gmx_sum_qgrid_dd(pme, grid, GMX_SUM_QGRID_BACKWARD);
}
#endif
where();
- unwrap_periodic_pmegrid(pme,grid);
+ unwrap_periodic_pmegrid(pme, grid);
/* interpolate forces for our local atoms */
*/
bClearF = (q == 0 && PAR(cr));
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
- for(thread=0; thread<pme->nthread; thread++)
+ for (thread = 0; thread < pme->nthread; thread++)
{
- gather_f_bsplines(pme,grid,bClearF,atc,
+ gather_f_bsplines(pme, grid, bClearF, atc,
&atc->spline[thread],
- pme->bFEP ? (q==0 ? 1.0-lambda : lambda) : 1.0);
+ pme->bFEP ? (q == 0 ? 1.0-lambda : lambda) : 1.0);
}
where();
- inc_nrnb(nrnb,eNR_GATHERFBSP,
+ inc_nrnb(nrnb, eNR_GATHERFBSP,
pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
- wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
+ wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
}
if (bCalcEnerVir)
/* This should only be called on the master thread
* and after the threads have synchronized.
*/
- get_pme_ener_vir(pme,pme->nthread,&energy_AB[q],vir_AB[q]);
+ get_pme_ener_vir(pme, pme->nthread, &energy_AB[q], vir_AB[q]);
}
} /* of q-loop */
- if (bCalcF && pme->nnodes > 1) {
- wallcycle_start(wcycle,ewcPME_REDISTXF);
- for(d=0; d<pme->ndecompdim; d++)
+ if (bCalcF && pme->nnodes > 1)
+ {
+ wallcycle_start(wcycle, ewcPME_REDISTXF);
+ for (d = 0; d < pme->ndecompdim; d++)
{
atc = &pme->atc[d];
if (d == pme->ndecompdim - 1)
n_d = pme->atc[d+1].n;
f_d = pme->atc[d+1].f;
}
- if (DOMAINDECOMP(cr)) {
- dd_pmeredist_f(pme,atc,n_d,f_d,
- d==pme->ndecompdim-1 && pme->bPPnode);
- } else {
+ if (DOMAINDECOMP(cr))
+ {
+ dd_pmeredist_f(pme, atc, n_d, f_d,
+ d == pme->ndecompdim-1 && pme->bPPnode);
+ }
+ else
+ {
pmeredist_pd(pme, FALSE, n_d, TRUE, f_d, NULL, atc);
}
}
- wallcycle_stop(wcycle,ewcPME_REDISTXF);
+ wallcycle_stop(wcycle, ewcPME_REDISTXF);
}
where();
if (bCalcEnerVir)
{
- if (!pme->bFEP) {
+ if (!pme->bFEP)
+ {
*energy = energy_AB[0];
- m_add(vir,vir_AB[0],vir);
- } else {
- *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
+ m_add(vir, vir_AB[0], vir);
+ }
+ else
+ {
+ *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
*dvdlambda += energy_AB[1] - energy_AB[0];
- for(i=0; i<DIM; i++)
+ for (i = 0; i < DIM; i++)
{
- for(j=0; j<DIM; j++)
+ for (j = 0; j < DIM; j++)
{
- vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] +
+ vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] +
lambda*vir_AB[1][i][j];
}
}
if (debug)
{
- fprintf(debug,"PME mesh energy: %g\n",*energy);
+ fprintf(debug, "PME mesh energy: %g\n", *energy);
}
return 0;