From: Roland Schulz Date: Mon, 27 Oct 2014 17:35:46 +0000 (-0400) Subject: C++ conversion for PME spread/gather X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=a75c69822315db615fa7949263b9ba84bc545ab0;p=alexxy%2Fgromacs.git C++ conversion for PME spread/gather Noted TODO for future refactoring Change-Id: Ib130a59a26f124645a3da0fe168d2125e05ac824 --- diff --git a/src/gromacs/ewald/CMakeLists.txt b/src/gromacs/ewald/CMakeLists.txt index b6c515ef88..5378331def 100644 --- a/src/gromacs/ewald/CMakeLists.txt +++ b/src/gromacs/ewald/CMakeLists.txt @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2014, by the GROMACS development team, led by +# Copyright (c) 2014,2015, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -32,7 +32,7 @@ # To help us fund GROMACS development, we humbly ask that you cite # the research papers on the package. Check out http://www.gromacs.org. -file(GLOB EWALD_SOURCES *.c) +file(GLOB EWALD_SOURCES *.c *cpp) set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${EWALD_SOURCES} PARENT_SCOPE) if (BUILD_TESTING) diff --git a/src/gromacs/ewald/pme-gather.c b/src/gromacs/ewald/pme-gather.cpp similarity index 93% rename from src/gromacs/ewald/pme-gather.c rename to src/gromacs/ewald/pme-gather.cpp index c28b8b50ff..2e2a12ff67 100644 --- a/src/gromacs/ewald/pme-gather.c +++ b/src/gromacs/ewald/pme-gather.cpp @@ -80,7 +80,7 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid, /* sum forces for local particles */ int nn, n, ithx, ithy, ithz, i0, j0, k0; int index_x, index_xy; - int nx, ny, nz, pnx, pny, pnz; + int nx, ny, nz, pny, pnz; int * idxptr; real tx, ty, dx, dy, coefficient; real fx, fy, fz, gval; @@ -90,29 +90,22 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid, real rxx, ryx, ryy, rzx, rzy, rzz; int order; - struct pme_spline_work *work; - -#if defined PME_SIMD4_SPREAD_GATHER && !defined PME_SIMD4_UNALIGNED - real thz_buffer[GMX_SIMD4_WIDTH*3], *thz_aligned; - real dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned; +#ifdef PME_SIMD4_SPREAD_GATHER + // cppcheck-suppress unreadVariable cppcheck seems not to analyze code from pme-simd4.h + struct pme_spline_work *work = pme->spline_work; +#ifndef PME_SIMD4_UNALIGNED + real thz_buffer[GMX_SIMD4_WIDTH*3], *thz_aligned; + real dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned; thz_aligned = gmx_simd4_align_r(thz_buffer); dthz_aligned = gmx_simd4_align_r(dthz_buffer); #endif - - work = pme->spline_work; +#endif order = pme->pme_order; - thx = spline->theta[XX]; - thy = spline->theta[YY]; - thz = spline->theta[ZZ]; - dthx = spline->dtheta[XX]; - dthy = spline->dtheta[YY]; - dthz = spline->dtheta[ZZ]; nx = pme->nkx; ny = pme->nky; nz = pme->nkz; - pnx = pme->pmegrid_nx; pny = pme->pmegrid_ny; pnz = pme->pmegrid_nz; diff --git a/src/gromacs/ewald/pme-spread.c b/src/gromacs/ewald/pme-spread.cpp similarity index 95% rename from src/gromacs/ewald/pme-spread.c rename to src/gromacs/ewald/pme-spread.cpp index 416174d247..da75efb427 100644 --- a/src/gromacs/ewald/pme-spread.c +++ b/src/gromacs/ewald/pme-spread.cpp @@ -42,12 +42,16 @@ #include +#include + #include "gromacs/ewald/pme-internal.h" #include "gromacs/ewald/pme-simd.h" #include "gromacs/ewald/pme-spline-work.h" #include "gromacs/legacyheaders/macros.h" #include "gromacs/utility/smalloc.h" +/* TODO consider split of pme-spline from this file */ + static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc, int start, int grid_index, int end, int thread) { @@ -56,7 +60,6 @@ static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc, real *xptr, *fptr, tx, ty, tz; real rxx, ryx, ryy, rzx, rzy, rzz; int nx, ny, nz; - int start_ix, start_iy, start_iz; int *g2tx, *g2ty, *g2tz; gmx_bool bThreads; int *thread_idx = NULL; @@ -68,10 +71,6 @@ static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc, ny = pme->nky; nz = pme->nkz; - start_ix = pme->pmegrid_start_ix; - start_iy = pme->pmegrid_start_iy; - start_iz = pme->pmegrid_start_iz; - rxx = pme->recipbox[XX][XX]; ryx = pme->recipbox[YY][XX]; ryy = pme->recipbox[YY][YY]; @@ -309,13 +308,11 @@ static void spread_coefficients_bsplines_thread(pmegrid_t /* spread coefficients from home atoms to local grid */ real *grid; - pme_overlap_t *ol; - int b, i, nn, n, ithx, ithy, ithz, i0, j0, k0; + int i, nn, n, ithx, ithy, ithz, i0, j0, k0; int * idxptr; int order, norder, index_x, index_xy, index_xyz; real valx, valxy, coefficient; real *thx, *thy, *thz; - int localsize, bndsize; int pnx, pny, pnz, ndatatot; int offx, offy, offz; @@ -397,7 +394,7 @@ static void copy_local_grid(struct gmx_pme_t *pme, pmegrids_t *pmegrids, { ivec local_fft_ndata, local_fft_offset, local_fft_size; int fft_my, fft_mz; - int nsx, nsy, nsz; + int nsy, nsz; ivec nf; int offx, offy, offz, x, y, z, i0, i0t; int d; @@ -413,14 +410,13 @@ static void copy_local_grid(struct gmx_pme_t *pme, pmegrids_t *pmegrids, pmegrid = &pmegrids->grid_th[thread]; - nsx = pmegrid->s[XX]; nsy = pmegrid->s[YY]; nsz = pmegrid->s[ZZ]; for (d = 0; d < DIM; d++) { - nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1), - local_fft_ndata[d] - pmegrid->offset[d]); + nf[d] = std::min(pmegrid->n[d] - (pmegrid->order - 1), + local_fft_ndata[d] - pmegrid->offset[d]); } offx = pmegrid->offset[XX]; @@ -455,7 +451,7 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, int fft_nx, fft_ny, fft_nz; int fft_my, fft_mz; int buf_my = -1; - int nsx, nsy, nsz; + int nsy, nsz; ivec localcopy_end, commcopy_end; int offx, offy, offz, x, y, z, i0, i0t; int sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz; @@ -498,8 +494,8 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, * not beyond the local FFT grid. */ localcopy_end[d] = - min(pmegrid->offset[d] + pmegrid->n[d] - (pmegrid->order - 1), - local_fft_ndata[d]); + std::min(pmegrid->offset[d] + pmegrid->n[d] - (pmegrid->order - 1), + local_fft_ndata[d]); /* Determine up to where our thread needs to copy from the * thread-local charge spreading grid to the communication buffer. @@ -512,7 +508,7 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, * When the rank-local FFT grid is narrower than pme-order, * we need the max below to ensure copying of all data. */ - commcopy_end[d] = max(commcopy_end[d], pme->pme_order); + commcopy_end[d] = std::max(commcopy_end[d], pme->pme_order); } } @@ -547,8 +543,8 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, /* Determine the end of our part of the source grid. * Use our thread local source grid and target grid part */ - tx1 = min(ox + pmegrid_g->n[XX], - !bCommX ? localcopy_end[XX] : commcopy_end[XX]); + tx1 = std::min(ox + pmegrid_g->n[XX], + !bCommX ? localcopy_end[XX] : commcopy_end[XX]); for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--) { @@ -566,8 +562,8 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, /* Determine the end of our part of the source grid. * Use our thread local source grid and target grid part */ - ty1 = min(oy + pmegrid_g->n[YY], - !bCommY ? localcopy_end[YY] : commcopy_end[YY]); + ty1 = std::min(oy + pmegrid_g->n[YY], + !bCommY ? localcopy_end[YY] : commcopy_end[YY]); for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--) { @@ -580,7 +576,7 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, } pmegrid_g = &pmegrids->grid_th[fz]; oz += pmegrid_g->offset[ZZ]; - tz1 = min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]); + tz1 = std::min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]); if (sx == 0 && sy == 0 && sz == 0) { @@ -596,7 +592,6 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme, grid_th = pmegrid_f->grid; - nsx = pmegrid_f->s[XX]; nsy = pmegrid_f->s[YY]; nsz = pmegrid_f->s[ZZ]; @@ -706,8 +701,8 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index) #ifdef GMX_MPI MPI_Status stat; #endif - int send_size_y, recv_size_y; - int ipulse, send_id, recv_id, datasize, gridsize, size_yx; + int recv_size_y; + int ipulse, size_yx; real *sendptr, *recvptr; int x, y, z, indg, indb; @@ -736,14 +731,14 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index) { size_yx = 0; } - datasize = (local_fft_ndata[XX] + size_yx)*local_fft_ndata[ZZ]; +#ifdef GMX_MPI + int datasize = (local_fft_ndata[XX] + size_yx)*local_fft_ndata[ZZ]; - send_size_y = overlap->send_size; + int send_size_y = overlap->send_size; +#endif for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++) { - send_id = overlap->send_id[ipulse]; - recv_id = overlap->recv_id[ipulse]; send_index0 = overlap->comm_data[ipulse].send_index0 - overlap->comm_data[0].send_index0; @@ -762,6 +757,8 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index) } #ifdef GMX_MPI + int send_id = overlap->send_id[ipulse]; + int recv_id = overlap->recv_id[ipulse]; MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL, send_id, ipulse, recvptr, recv_size_y*datasize, GMX_MPI_REAL, @@ -811,18 +808,12 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index) /* Major dimension */ overlap = &pme->overlap[0]; - datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ]; - gridsize = local_fft_size[YY] *local_fft_size[ZZ]; - ipulse = 0; - send_id = overlap->send_id[ipulse]; - recv_id = overlap->recv_id[ipulse]; send_nindex = overlap->comm_data[ipulse].send_nindex; /* We don't use recv_index0, as we always receive starting at 0 */ recv_nindex = overlap->comm_data[ipulse].recv_nindex; - sendptr = overlap->sendbuf; recvptr = overlap->recvbuf; if (debug != NULL) @@ -832,6 +823,10 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index) } #ifdef GMX_MPI + int datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ]; + int send_id = overlap->send_id[ipulse]; + int recv_id = overlap->recv_id[ipulse]; + sendptr = overlap->sendbuf; MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL, send_id, ipulse, recvptr, recv_nindex*datasize, GMX_MPI_REAL,