From: Roland Schulz <roland@utk.edu>
Date: Mon, 27 Oct 2014 17:35:46 +0000 (-0400)
Subject: C++ conversion for PME spread/gather
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=a75c69822315db615fa7949263b9ba84bc545ab0;p=alexxy%2Fgromacs.git

C++ conversion for PME spread/gather

Noted TODO for future refactoring

Change-Id: Ib130a59a26f124645a3da0fe168d2125e05ac824
---

diff --git a/src/gromacs/ewald/CMakeLists.txt b/src/gromacs/ewald/CMakeLists.txt
index b6c515ef88..5378331def 100644
--- a/src/gromacs/ewald/CMakeLists.txt
+++ b/src/gromacs/ewald/CMakeLists.txt
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2014, by the GROMACS development team, led by
+# Copyright (c) 2014,2015, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -32,7 +32,7 @@
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-file(GLOB EWALD_SOURCES *.c)
+file(GLOB EWALD_SOURCES *.c *cpp)
 set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${EWALD_SOURCES} PARENT_SCOPE)
 
 if (BUILD_TESTING)
diff --git a/src/gromacs/ewald/pme-gather.c b/src/gromacs/ewald/pme-gather.cpp
similarity index 93%
rename from src/gromacs/ewald/pme-gather.c
rename to src/gromacs/ewald/pme-gather.cpp
index c28b8b50ff..2e2a12ff67 100644
--- a/src/gromacs/ewald/pme-gather.c
+++ b/src/gromacs/ewald/pme-gather.cpp
@@ -80,7 +80,7 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
     /* sum forces for local particles */
     int                     nn, n, ithx, ithy, ithz, i0, j0, k0;
     int                     index_x, index_xy;
-    int                     nx, ny, nz, pnx, pny, pnz;
+    int                     nx, ny, nz, pny, pnz;
     int                 *   idxptr;
     real                    tx, ty, dx, dy, coefficient;
     real                    fx, fy, fz, gval;
@@ -90,29 +90,22 @@ void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
     real                    rxx, ryx, ryy, rzx, rzy, rzz;
     int                     order;
 
-    struct pme_spline_work *work;
-
-#if defined PME_SIMD4_SPREAD_GATHER && !defined PME_SIMD4_UNALIGNED
-    real           thz_buffer[GMX_SIMD4_WIDTH*3],  *thz_aligned;
-    real           dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned;
+#ifdef PME_SIMD4_SPREAD_GATHER
+    // cppcheck-suppress unreadVariable cppcheck seems not to analyze code from pme-simd4.h
+    struct pme_spline_work *work = pme->spline_work;
+#ifndef PME_SIMD4_UNALIGNED
+    real                    thz_buffer[GMX_SIMD4_WIDTH*3],  *thz_aligned;
+    real                    dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned;
 
     thz_aligned  = gmx_simd4_align_r(thz_buffer);
     dthz_aligned = gmx_simd4_align_r(dthz_buffer);
 #endif
-
-    work = pme->spline_work;
+#endif
 
     order = pme->pme_order;
-    thx   = spline->theta[XX];
-    thy   = spline->theta[YY];
-    thz   = spline->theta[ZZ];
-    dthx  = spline->dtheta[XX];
-    dthy  = spline->dtheta[YY];
-    dthz  = spline->dtheta[ZZ];
     nx    = pme->nkx;
     ny    = pme->nky;
     nz    = pme->nkz;
-    pnx   = pme->pmegrid_nx;
     pny   = pme->pmegrid_ny;
     pnz   = pme->pmegrid_nz;
 
diff --git a/src/gromacs/ewald/pme-spread.c b/src/gromacs/ewald/pme-spread.cpp
similarity index 95%
rename from src/gromacs/ewald/pme-spread.c
rename to src/gromacs/ewald/pme-spread.cpp
index 416174d247..da75efb427 100644
--- a/src/gromacs/ewald/pme-spread.c
+++ b/src/gromacs/ewald/pme-spread.cpp
@@ -42,12 +42,16 @@
 
 #include <assert.h>
 
+#include <algorithm>
+
 #include "gromacs/ewald/pme-internal.h"
 #include "gromacs/ewald/pme-simd.h"
 #include "gromacs/ewald/pme-spline-work.h"
 #include "gromacs/legacyheaders/macros.h"
 #include "gromacs/utility/smalloc.h"
 
+/* TODO consider split of pme-spline from this file */
+
 static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
                                    int start, int grid_index, int end, int thread)
 {
@@ -56,7 +60,6 @@ static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
     real           *xptr, *fptr, tx, ty, tz;
     real            rxx, ryx, ryy, rzx, rzy, rzz;
     int             nx, ny, nz;
-    int             start_ix, start_iy, start_iz;
     int            *g2tx, *g2ty, *g2tz;
     gmx_bool        bThreads;
     int            *thread_idx = NULL;
@@ -68,10 +71,6 @@ static void calc_interpolation_idx(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
     ny  = pme->nky;
     nz  = pme->nkz;
 
-    start_ix = pme->pmegrid_start_ix;
-    start_iy = pme->pmegrid_start_iy;
-    start_iz = pme->pmegrid_start_iz;
-
     rxx = pme->recipbox[XX][XX];
     ryx = pme->recipbox[YY][XX];
     ryy = pme->recipbox[YY][YY];
@@ -309,13 +308,11 @@ static void spread_coefficients_bsplines_thread(pmegrid_t
 
     /* spread coefficients from home atoms to local grid */
     real          *grid;
-    pme_overlap_t *ol;
-    int            b, i, nn, n, ithx, ithy, ithz, i0, j0, k0;
+    int            i, nn, n, ithx, ithy, ithz, i0, j0, k0;
     int       *    idxptr;
     int            order, norder, index_x, index_xy, index_xyz;
     real           valx, valxy, coefficient;
     real          *thx, *thy, *thz;
-    int            localsize, bndsize;
     int            pnx, pny, pnz, ndatatot;
     int            offx, offy, offz;
 
@@ -397,7 +394,7 @@ static void copy_local_grid(struct gmx_pme_t *pme, pmegrids_t *pmegrids,
 {
     ivec local_fft_ndata, local_fft_offset, local_fft_size;
     int  fft_my, fft_mz;
-    int  nsx, nsy, nsz;
+    int  nsy, nsz;
     ivec nf;
     int  offx, offy, offz, x, y, z, i0, i0t;
     int  d;
@@ -413,14 +410,13 @@ static void copy_local_grid(struct gmx_pme_t *pme, pmegrids_t *pmegrids,
 
     pmegrid = &pmegrids->grid_th[thread];
 
-    nsx = pmegrid->s[XX];
     nsy = pmegrid->s[YY];
     nsz = pmegrid->s[ZZ];
 
     for (d = 0; d < DIM; d++)
     {
-        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
-                    local_fft_ndata[d] - pmegrid->offset[d]);
+        nf[d] = std::min(pmegrid->n[d] - (pmegrid->order - 1),
+                         local_fft_ndata[d] - pmegrid->offset[d]);
     }
 
     offx = pmegrid->offset[XX];
@@ -455,7 +451,7 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
     int  fft_nx, fft_ny, fft_nz;
     int  fft_my, fft_mz;
     int  buf_my = -1;
-    int  nsx, nsy, nsz;
+    int  nsy, nsz;
     ivec localcopy_end, commcopy_end;
     int  offx, offy, offz, x, y, z, i0, i0t;
     int  sx, sy, sz, fx, fy, fz, tx1, ty1, tz1, ox, oy, oz;
@@ -498,8 +494,8 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
          * not beyond the local FFT grid.
          */
         localcopy_end[d] =
-            min(pmegrid->offset[d] + pmegrid->n[d] - (pmegrid->order - 1),
-                local_fft_ndata[d]);
+            std::min(pmegrid->offset[d] + pmegrid->n[d] - (pmegrid->order - 1),
+                     local_fft_ndata[d]);
 
         /* Determine up to where our thread needs to copy from the
          * thread-local charge spreading grid to the communication buffer.
@@ -512,7 +508,7 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
              * When the rank-local FFT grid is narrower than pme-order,
              * we need the max below to ensure copying of all data.
              */
-            commcopy_end[d] = max(commcopy_end[d], pme->pme_order);
+            commcopy_end[d] = std::max(commcopy_end[d], pme->pme_order);
         }
     }
 
@@ -547,8 +543,8 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
         /* Determine the end of our part of the source grid.
          * Use our thread local source grid and target grid part
          */
-        tx1 = min(ox + pmegrid_g->n[XX],
-                  !bCommX ? localcopy_end[XX] : commcopy_end[XX]);
+        tx1 = std::min(ox + pmegrid_g->n[XX],
+                       !bCommX ? localcopy_end[XX] : commcopy_end[XX]);
 
         for (sy = 0; sy >= -pmegrids->nthread_comm[YY]; sy--)
         {
@@ -566,8 +562,8 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
             /* Determine the end of our part of the source grid.
              * Use our thread local source grid and target grid part
              */
-            ty1 = min(oy + pmegrid_g->n[YY],
-                      !bCommY ? localcopy_end[YY] : commcopy_end[YY]);
+            ty1 = std::min(oy + pmegrid_g->n[YY],
+                           !bCommY ? localcopy_end[YY] : commcopy_end[YY]);
 
             for (sz = 0; sz >= -pmegrids->nthread_comm[ZZ]; sz--)
             {
@@ -580,7 +576,7 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
                 }
                 pmegrid_g = &pmegrids->grid_th[fz];
                 oz       += pmegrid_g->offset[ZZ];
-                tz1       = min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]);
+                tz1       = std::min(oz + pmegrid_g->n[ZZ], localcopy_end[ZZ]);
 
                 if (sx == 0 && sy == 0 && sz == 0)
                 {
@@ -596,7 +592,6 @@ reduce_threadgrid_overlap(struct gmx_pme_t *pme,
 
                 grid_th = pmegrid_f->grid;
 
-                nsx = pmegrid_f->s[XX];
                 nsy = pmegrid_f->s[YY];
                 nsz = pmegrid_f->s[ZZ];
 
@@ -706,8 +701,8 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index)
 #ifdef GMX_MPI
     MPI_Status stat;
 #endif
-    int  send_size_y, recv_size_y;
-    int  ipulse, send_id, recv_id, datasize, gridsize, size_yx;
+    int  recv_size_y;
+    int  ipulse, size_yx;
     real *sendptr, *recvptr;
     int  x, y, z, indg, indb;
 
@@ -736,14 +731,14 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index)
         {
             size_yx = 0;
         }
-        datasize = (local_fft_ndata[XX] + size_yx)*local_fft_ndata[ZZ];
+#ifdef GMX_MPI
+        int datasize = (local_fft_ndata[XX] + size_yx)*local_fft_ndata[ZZ];
 
-        send_size_y = overlap->send_size;
+        int send_size_y = overlap->send_size;
+#endif
 
         for (ipulse = 0; ipulse < overlap->noverlap_nodes; ipulse++)
         {
-            send_id       = overlap->send_id[ipulse];
-            recv_id       = overlap->recv_id[ipulse];
             send_index0   =
                 overlap->comm_data[ipulse].send_index0 -
                 overlap->comm_data[0].send_index0;
@@ -762,6 +757,8 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index)
             }
 
 #ifdef GMX_MPI
+            int send_id = overlap->send_id[ipulse];
+            int recv_id = overlap->recv_id[ipulse];
             MPI_Sendrecv(sendptr, send_size_y*datasize, GMX_MPI_REAL,
                          send_id, ipulse,
                          recvptr, recv_size_y*datasize, GMX_MPI_REAL,
@@ -811,18 +808,12 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index)
         /* Major dimension */
         overlap = &pme->overlap[0];
 
-        datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
-        gridsize = local_fft_size[YY] *local_fft_size[ZZ];
-
         ipulse = 0;
 
-        send_id       = overlap->send_id[ipulse];
-        recv_id       = overlap->recv_id[ipulse];
         send_nindex   = overlap->comm_data[ipulse].send_nindex;
         /* We don't use recv_index0, as we always receive starting at 0 */
         recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 
-        sendptr = overlap->sendbuf;
         recvptr = overlap->recvbuf;
 
         if (debug != NULL)
@@ -832,6 +823,10 @@ static void sum_fftgrid_dd(struct gmx_pme_t *pme, real *fftgrid, int grid_index)
         }
 
 #ifdef GMX_MPI
+        int datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
+        int send_id  = overlap->send_id[ipulse];
+        int recv_id  = overlap->recv_id[ipulse];
+        sendptr      = overlap->sendbuf;
         MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
                      send_id, ipulse,
                      recvptr, recv_nindex*datasize, GMX_MPI_REAL,