Apply clang-format to source tree

[alexxy/gromacs.git] / src / gromacs / ewald / pme_grid.cpp
diff --git a/src/gromacs/ewald/pme_grid.cpp b/src/gromacs/ewald/pme_grid.cpp

index a3e42e9c635090293061133079f93aae8b36a556..ab759f30deda0d41c894f1b2a4c0361d3c8603ea 100644 (file)
--- a/src/gromacs/ewald/pme_grid.cpp
+++ b/src/gromacs/ewald/pme_grid.cpp
@@ -53,9 +53,9 @@
  #include "pme_internal.h"
  
  #ifdef DEBUG_PME
-#include "gromacs/fileio/pdbio.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/futil.h"
+#    include "gromacs/fileio/pdbio.h"
+#    include "gromacs/utility/cstringutil.h"
+#    include "gromacs/utility/futil.h"
  #endif
  
  #include "pme_simd.h"
@@ -65,19 +65,17 @@
   */
  #define GMX_CACHE_SEP 64
  
-void gmx_sum_qgrid_dd(gmx_pme_t *pme,
-                      real      *grid,
-                      const int  direction)
+void gmx_sum_qgrid_dd(gmx_pme_t* pme, real* grid, const int direction)
  {
  #if GMX_MPI
-    pme_overlap_t *overlap;
+    pme_overlap_t* overlap;
      int            send_index0, send_nindex;
      int            recv_index0, recv_nindex;
      MPI_Status     stat;
      int            i, j, k, ix, iy, iz, icnt;
      int            send_id, recv_id, datasize;
-    real          *p;
-    real          *sendptr, *recvptr;
+    real*          p;
+    real *         sendptr, *recvptr;
  
      /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
      overlap = &pme->overlap[1];
@@ -89,31 +87,30 @@ void gmx_sum_qgrid_dd(gmx_pme_t *pme,
           */
          if (direction == GMX_SUM_GRID_FORWARD)
          {
-            send_id       = overlap->comm_data[ipulse].send_id;
-            recv_id       = overlap->comm_data[ipulse].recv_id;
-            send_index0   = overlap->comm_data[ipulse].send_index0;
-            send_nindex   = overlap->comm_data[ipulse].send_nindex;
-            recv_index0   = overlap->comm_data[ipulse].recv_index0;
-            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
+            send_id     = overlap->comm_data[ipulse].send_id;
+            recv_id     = overlap->comm_data[ipulse].recv_id;
+            send_index0 = overlap->comm_data[ipulse].send_index0;
+            send_nindex = overlap->comm_data[ipulse].send_nindex;
+            recv_index0 = overlap->comm_data[ipulse].recv_index0;
+            recv_nindex = overlap->comm_data[ipulse].recv_nindex;
          }
          else
          {
-            send_id       = overlap->comm_data[ipulse].recv_id;
-            recv_id       = overlap->comm_data[ipulse].send_id;
-            send_index0   = overlap->comm_data[ipulse].recv_index0;
-            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
-            recv_index0   = overlap->comm_data[ipulse].send_index0;
-            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
+            send_id     = overlap->comm_data[ipulse].recv_id;
+            recv_id     = overlap->comm_data[ipulse].send_id;
+            send_index0 = overlap->comm_data[ipulse].recv_index0;
+            send_nindex = overlap->comm_data[ipulse].recv_nindex;
+            recv_index0 = overlap->comm_data[ipulse].send_index0;
+            recv_nindex = overlap->comm_data[ipulse].send_nindex;
          }
  
          /* Copy data to contiguous send buffer */
          if (debug)
          {
              fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
-                    pme->nodeid, overlap->nodeid, send_id,
-                    pme->pmegrid_start_iy,
-                    send_index0-pme->pmegrid_start_iy,
-                    send_index0-pme->pmegrid_start_iy+send_nindex);
+                    pme->nodeid, overlap->nodeid, send_id, pme->pmegrid_start_iy,
+                    send_index0 - pme->pmegrid_start_iy,
+                    send_index0 - pme->pmegrid_start_iy + send_nindex);
          }
          icnt = 0;
          for (i = 0; i < pme->pmegrid_nx; i++)
@@ -125,27 +122,25 @@ void gmx_sum_qgrid_dd(gmx_pme_t *pme,
                  for (k = 0; k < pme->nkz; k++)
                  {
                      iz = k;
-                    overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
+                    overlap->sendbuf[icnt++] =
+                            grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz];
                  }
              }
          }
  
-        datasize      = pme->pmegrid_nx * pme->nkz;
+        datasize = pme->pmegrid_nx * pme->nkz;
  
-        MPI_Sendrecv(overlap->sendbuf.data(), send_nindex*datasize, GMX_MPI_REAL,
-                     send_id, ipulse,
-                     overlap->recvbuf.data(), recv_nindex*datasize, GMX_MPI_REAL,
-                     recv_id, ipulse,
+        MPI_Sendrecv(overlap->sendbuf.data(), send_nindex * datasize, GMX_MPI_REAL, send_id, ipulse,
+                     overlap->recvbuf.data(), recv_nindex * datasize, GMX_MPI_REAL, recv_id, ipulse,
                       overlap->mpi_comm, &stat);
  
          /* Get data from contiguous recv buffer */
          if (debug)
          {
              fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
-                    pme->nodeid, overlap->nodeid, recv_id,
-                    pme->pmegrid_start_iy,
-                    recv_index0-pme->pmegrid_start_iy,
-                    recv_index0-pme->pmegrid_start_iy+recv_nindex);
+                    pme->nodeid, overlap->nodeid, recv_id, pme->pmegrid_start_iy,
+                    recv_index0 - pme->pmegrid_start_iy,
+                    recv_index0 - pme->pmegrid_start_iy + recv_nindex);
          }
          icnt = 0;
          for (i = 0; i < pme->pmegrid_nx; i++)
@@ -159,11 +154,13 @@ void gmx_sum_qgrid_dd(gmx_pme_t *pme,
                      iz = k;
                      if (direction == GMX_SUM_GRID_FORWARD)
                      {
-                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
+                        grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz] +=
+                                overlap->recvbuf[icnt++];
                      }
                      else
                      {
-                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz]  = overlap->recvbuf[icnt++];
+                        grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz] =
+                                overlap->recvbuf[icnt++];
                      }
                  }
              }
@@ -181,53 +178,48 @@ void gmx_sum_qgrid_dd(gmx_pme_t *pme,
      {
          if (direction == GMX_SUM_GRID_FORWARD)
          {
-            send_id       = overlap->comm_data[ipulse].send_id;
-            recv_id       = overlap->comm_data[ipulse].recv_id;
-            send_index0   = overlap->comm_data[ipulse].send_index0;
-            send_nindex   = overlap->comm_data[ipulse].send_nindex;
-            recv_index0   = overlap->comm_data[ipulse].recv_index0;
-            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
-            recvptr       = overlap->recvbuf.data();
+            send_id     = overlap->comm_data[ipulse].send_id;
+            recv_id     = overlap->comm_data[ipulse].recv_id;
+            send_index0 = overlap->comm_data[ipulse].send_index0;
+            send_nindex = overlap->comm_data[ipulse].send_nindex;
+            recv_index0 = overlap->comm_data[ipulse].recv_index0;
+            recv_nindex = overlap->comm_data[ipulse].recv_nindex;
+            recvptr     = overlap->recvbuf.data();
          }
          else
          {
-            send_id       = overlap->comm_data[ipulse].recv_id;
-            recv_id       = overlap->comm_data[ipulse].send_id;
-            send_index0   = overlap->comm_data[ipulse].recv_index0;
-            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
-            recv_index0   = overlap->comm_data[ipulse].send_index0;
-            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
-            recvptr       = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
+            send_id     = overlap->comm_data[ipulse].recv_id;
+            recv_id     = overlap->comm_data[ipulse].send_id;
+            send_index0 = overlap->comm_data[ipulse].recv_index0;
+            send_nindex = overlap->comm_data[ipulse].recv_nindex;
+            recv_index0 = overlap->comm_data[ipulse].send_index0;
+            recv_nindex = overlap->comm_data[ipulse].send_nindex;
+            recvptr = grid + (recv_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
          }
  
-        sendptr       = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
-        datasize      = pme->pmegrid_ny * pme->pmegrid_nz;
+        sendptr = grid + (send_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
+        datasize = pme->pmegrid_ny * pme->pmegrid_nz;
  
          if (debug)
          {
              fprintf(debug, "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
-                    pme->nodeid, overlap->nodeid, send_id,
-                    pme->pmegrid_start_ix,
-                    send_index0-pme->pmegrid_start_ix,
-                    send_index0-pme->pmegrid_start_ix+send_nindex);
+                    pme->nodeid, overlap->nodeid, send_id, pme->pmegrid_start_ix,
+                    send_index0 - pme->pmegrid_start_ix,
+                    send_index0 - pme->pmegrid_start_ix + send_nindex);
              fprintf(debug, "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
-                    pme->nodeid, overlap->nodeid, recv_id,
-                    pme->pmegrid_start_ix,
-                    recv_index0-pme->pmegrid_start_ix,
-                    recv_index0-pme->pmegrid_start_ix+recv_nindex);
+                    pme->nodeid, overlap->nodeid, recv_id, pme->pmegrid_start_ix,
+                    recv_index0 - pme->pmegrid_start_ix,
+                    recv_index0 - pme->pmegrid_start_ix + recv_nindex);
          }
  
-        MPI_Sendrecv(sendptr, send_nindex*datasize, GMX_MPI_REAL,
-                     send_id, ipulse,
-                     recvptr, recv_nindex*datasize, GMX_MPI_REAL,
-                     recv_id, ipulse,
-                     overlap->mpi_comm, &stat);
+        MPI_Sendrecv(sendptr, send_nindex * datasize, GMX_MPI_REAL, send_id, ipulse, recvptr,
+                     recv_nindex * datasize, GMX_MPI_REAL, recv_id, ipulse, overlap->mpi_comm, &stat);
  
          /* ADD data from contiguous recv buffer */
          if (direction == GMX_SUM_GRID_FORWARD)
          {
-            p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
-            for (i = 0; i < recv_nindex*datasize; i++)
+            p = grid + (recv_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
+            for (i = 0; i < recv_nindex * datasize; i++)
              {
                  p[i] += overlap->recvbuf[i];
              }
@@ -243,17 +235,15 @@ void gmx_sum_qgrid_dd(gmx_pme_t *pme,
  }
  
  
-int copy_pmegrid_to_fftgrid(const gmx_pme_t *pme, const real *pmegrid, real *fftgrid, int grid_index)
+int copy_pmegrid_to_fftgrid(const gmx_pme_t* pme, const real* pmegrid, real* fftgrid, int grid_index)
  {
-    ivec    local_fft_ndata, local_fft_offset, local_fft_size;
-    ivec    local_pme_size;
-    int     ix, iy, iz;
-    int     pmeidx, fftidx;
+    ivec local_fft_ndata, local_fft_offset, local_fft_size;
+    ivec local_pme_size;
+    int  ix, iy, iz;
+    int  pmeidx, fftidx;
  
      /* Dimensions should be identical for A/B grid, so we just use A here */
-    gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index],
-                                   local_fft_ndata,
-                                   local_fft_offset,
+    gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset,
                                     local_fft_size);
  
      local_pme_size[0] = pme->pmegrid_nx;
@@ -280,24 +270,23 @@ int copy_pmegrid_to_fftgrid(const gmx_pme_t *pme, const real *pmegrid, real *fft
              {
                  for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
                  {
-                    pmeidx          = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
-                    fftidx          = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
+                    pmeidx = ix * (local_pme_size[YY] * local_pme_size[ZZ])
+                             + iy * (local_pme_size[ZZ]) + iz;
+                    fftidx = ix * (local_fft_size[YY] * local_fft_size[ZZ])
+                             + iy * (local_fft_size[ZZ]) + iz;
                      fftgrid[fftidx] = pmegrid[pmeidx];
  #ifdef DEBUG_PME
-                    val = 100*pmegrid[pmeidx];
+                    val = 100 * pmegrid[pmeidx];
                      if (pmegrid[pmeidx] != 0)
                      {
-                        gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx, ' ',
-                                                 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val, "");
+                        gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx,
+                                                 ' ', 5.0 * ix, 5.0 * iy, 5.0 * iz, 1.0, val, "");
                      }
                      if (pmegrid[pmeidx] != 0)
                      {
-                        fprintf(fp2, "%-12s  %5d  %5d  %5d  %12.5e\n",
-                                "qgrid",
-                                pme->pmegrid_start_ix + ix,
-                                pme->pmegrid_start_iy + iy,
-                                pme->pmegrid_start_iz + iz,
-                                pmegrid[pmeidx]);
+                        fprintf(fp2, "%-12s  %5d  %5d  %5d  %12.5e\n", "qgrid",
+                                pme->pmegrid_start_ix + ix, pme->pmegrid_start_iy + iy,
+                                pme->pmegrid_start_iz + iz, pmegrid[pmeidx]);
                      }
  #endif
                  }
@@ -325,13 +314,12 @@ static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
  #endif
  
  
-int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pmegrid, int grid_index,
-                            int nthread, int thread)
+int copy_fftgrid_to_pmegrid(struct gmx_pme_t* pme, const real* fftgrid, real* pmegrid, int grid_index, int nthread, int thread)
  {
-    ivec          local_fft_ndata, local_fft_offset, local_fft_size;
-    ivec          local_pme_size;
-    int           ixy0, ixy1, ixy, ix, iy, iz;
-    int           pmeidx, fftidx;
+    ivec local_fft_ndata, local_fft_offset, local_fft_size;
+    ivec local_pme_size;
+    int  ixy0, ixy1, ixy, ix, iy, iz;
+    int  pmeidx, fftidx;
  #ifdef PME_TIME_THREADS
      gmx_cycles_t  c1;
      static double cs1 = 0;
@@ -342,9 +330,7 @@ int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pm
      c1 = omp_cyc_start();
  #endif
      /* Dimensions should be identical for A/B grid, so we just use A here */
-    gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index],
-                                   local_fft_ndata,
-                                   local_fft_offset,
+    gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset,
                                     local_fft_size);
  
      local_pme_size[0] = pme->pmegrid_nx;
@@ -354,29 +340,29 @@ int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pm
      /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
         the offset is identical, and the PME grid always has more data (due to overlap)
       */
-    ixy0 = ((thread  )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
-    ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
+    ixy0 = ((thread)*local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread;
+    ixy1 = ((thread + 1) * local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread;
  
      for (ixy = ixy0; ixy < ixy1; ixy++)
      {
-        ix = ixy/local_fft_ndata[YY];
-        iy = ixy - ix*local_fft_ndata[YY];
+        ix = ixy / local_fft_ndata[YY];
+        iy = ixy - ix * local_fft_ndata[YY];
  
-        pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
-        fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
+        pmeidx = (ix * local_pme_size[YY] + iy) * local_pme_size[ZZ];
+        fftidx = (ix * local_fft_size[YY] + iy) * local_fft_size[ZZ];
          for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
          {
-            pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
+            pmegrid[pmeidx + iz] = fftgrid[fftidx + iz];
          }
      }
  
  #ifdef PME_TIME_THREADS
-    c1   = omp_cyc_end(c1);
+    c1 = omp_cyc_end(c1);
      cs1 += (double)c1;
      cnt++;
      if (cnt % 20 == 0)
      {
-        printf("copy %.2f\n", cs1*1e-9);
+        printf("copy %.2f\n", cs1 * 1e-9);
      }
  #endif
  
@@ -384,9 +370,9 @@ int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pm
  }
  
  
-void wrap_periodic_pmegrid(const gmx_pme_t *pme, real *pmegrid)
+void wrap_periodic_pmegrid(const gmx_pme_t* pme, real* pmegrid)
  {
-    int     nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
+    int nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
  
      nx = pme->nkx;
      ny = pme->nky;
@@ -404,8 +390,7 @@ void wrap_periodic_pmegrid(const gmx_pme_t *pme, real *pmegrid)
          {
              for (iz = 0; iz < overlap; iz++)
              {
-                pmegrid[(ix*pny+iy)*pnz+iz] +=
-                    pmegrid[(ix*pny+iy)*pnz+nz+iz];
+                pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[(ix * pny + iy) * pnz + nz + iz];
              }
          }
      }
@@ -418,8 +403,7 @@ void wrap_periodic_pmegrid(const gmx_pme_t *pme, real *pmegrid)
              {
                  for (iz = 0; iz < nz; iz++)
                  {
-                    pmegrid[(ix*pny+iy)*pnz+iz] +=
-                        pmegrid[(ix*pny+ny+iy)*pnz+iz];
+                    pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[(ix * pny + ny + iy) * pnz + iz];
                  }
              }
          }
@@ -435,8 +419,7 @@ void wrap_periodic_pmegrid(const gmx_pme_t *pme, real *pmegrid)
              {
                  for (iz = 0; iz < nz; iz++)
                  {
-                    pmegrid[(ix*pny+iy)*pnz+iz] +=
-                        pmegrid[((nx+ix)*pny+iy)*pnz+iz];
+                    pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[((nx + ix) * pny + iy) * pnz + iz];
                  }
              }
          }
@@ -444,9 +427,9 @@ void wrap_periodic_pmegrid(const gmx_pme_t *pme, real *pmegrid)
  }
  
  
-void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
+void unwrap_periodic_pmegrid(struct gmx_pme_t* pme, real* pmegrid)
  {
-    int     nx, ny, nz, pny, pnz, ny_x, overlap, ix;
+    int nx, ny, nz, pny, pnz, ny_x, overlap, ix;
  
      nx = pme->nkx;
      ny = pme->nky;
@@ -469,8 +452,7 @@ void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
              {
                  for (iz = 0; iz < nz; iz++)
                  {
-                    pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
-                        pmegrid[(ix*pny+iy)*pnz+iz];
+                    pmegrid[((nx + ix) * pny + iy) * pnz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
                  }
              }
          }
@@ -488,8 +470,7 @@ void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
              {
                  for (iz = 0; iz < nz; iz++)
                  {
-                    pmegrid[(ix*pny+ny+iy)*pnz+iz] =
-                        pmegrid[(ix*pny+iy)*pnz+iz];
+                    pmegrid[(ix * pny + ny + iy) * pnz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
                  }
              }
          }
@@ -506,21 +487,20 @@ void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid)
          {
              for (iz = 0; iz < overlap; iz++)
              {
-                pmegrid[(ix*pny+iy)*pnz+nz+iz] =
-                    pmegrid[(ix*pny+iy)*pnz+iz];
+                pmegrid[(ix * pny + iy) * pnz + nz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
              }
          }
      }
  }
  
-void set_grid_alignment(int gmx_unused *pmegrid_nz, int gmx_unused pme_order)
+void set_grid_alignment(int gmx_unused* pmegrid_nz, int gmx_unused pme_order)
  {
  #ifdef PME_SIMD4_SPREAD_GATHER
      if (pme_order == 5
-#if !PME_4NSIMD_GATHER
+#    if !PME_4NSIMD_GATHER
          || pme_order == 4
-#endif
-        )
+#    endif
+    )
      {
          /* Round nz up to a multiple of 4 to ensure alignment */
          *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
@@ -528,10 +508,10 @@ void set_grid_alignment(int gmx_unused *pmegrid_nz, int gmx_unused pme_order)
  #endif
  }
  
-static void set_gridsize_alignment(int gmx_unused *gridsize, int gmx_unused pme_order)
+static void set_gridsize_alignment(int gmx_unused* gridsize, int gmx_unused pme_order)
  {
  #ifdef PME_SIMD4_SPREAD_GATHER
-#if !PME_4NSIMD_GATHER
+#    if !PME_4NSIMD_GATHER
      if (pme_order == 4)
      {
          /* Add extra elements to ensured aligned operations do not go
@@ -541,17 +521,23 @@ static void set_gridsize_alignment(int gmx_unused *gridsize, int gmx_unused pme_
           */
          *gridsize += 4;
      }
-#endif
+#    endif
  #endif
  }
  
-void pmegrid_init(pmegrid_t *grid,
-                  int cx, int cy, int cz,
-                  int x0, int y0, int z0,
-                  int x1, int y1, int z1,
-                  gmx_bool set_alignment,
-                  int pme_order,
-                  real *ptr)
+void pmegrid_init(pmegrid_t* grid,
+                  int        cx,
+                  int        cy,
+                  int        cz,
+                  int        x0,
+                  int        y0,
+                  int        z0,
+                  int        x1,
+                  int        y1,
+                  int        z1,
+                  gmx_bool   set_alignment,
+                  int        pme_order,
+                  real*      ptr)
  {
      int nz, gridsize;
  
@@ -580,7 +566,7 @@ void pmegrid_init(pmegrid_t *grid,
      grid->order = pme_order;
      if (ptr == nullptr)
      {
-        gridsize = grid->s[XX]*grid->s[YY]*grid->s[ZZ];
+        gridsize = grid->s[XX] * grid->s[YY] * grid->s[ZZ];
          set_gridsize_alignment(&gridsize, pme_order);
          snew_aligned(grid->grid, gridsize, SIMD4_ALIGNMENT);
      }
@@ -592,15 +578,14 @@ void pmegrid_init(pmegrid_t *grid,
  
  static int div_round_up(int enumerator, int denominator)
  {
-    return (enumerator + denominator - 1)/denominator;
+    return (enumerator + denominator - 1) / denominator;
  }
  
-static void make_subgrid_division(const ivec n, int ovl, int nthread,
-                                  ivec nsub)
+static void make_subgrid_division(const ivec n, int ovl, int nthread, ivec nsub)
  {
      int   gsize_opt, gsize;
      int   nsx, nsy, nsz;
-    char *env;
+    char* env;
  
      gsize_opt = -1;
      for (nsx = 1; nsx <= nthread; nsx++)
@@ -609,23 +594,19 @@ static void make_subgrid_division(const ivec n, int ovl, int nthread,
          {
              for (nsy = 1; nsy <= nthread; nsy++)
              {
-                if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
+                if (nsx * nsy <= nthread && nthread % (nsx * nsy) == 0)
                  {
-                    nsz = nthread/(nsx*nsy);
+                    nsz = nthread / (nsx * nsy);
  
                      /* Determine the number of grid points per thread */
-                    gsize =
-                        (div_round_up(n[XX], nsx) + ovl)*
-                        (div_round_up(n[YY], nsy) + ovl)*
-                        (div_round_up(n[ZZ], nsz) + ovl);
+                    gsize = (div_round_up(n[XX], nsx) + ovl) * (div_round_up(n[YY], nsy) + ovl)
+                            * (div_round_up(n[ZZ], nsz) + ovl);
  
                      /* Minimize the number of grids points per thread
                       * and, secondarily, the number of cuts in minor dimensions.
                       */
-                    if (gsize_opt == -1 ||
-                        gsize < gsize_opt ||
-                        (gsize == gsize_opt &&
-                         (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
+                    if (gsize_opt == -1 || gsize < gsize_opt
+                        || (gsize == gsize_opt && (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
                      {
                          nsub[XX]  = nsx;
                          nsub[YY]  = nsy;
@@ -643,19 +624,25 @@ static void make_subgrid_division(const ivec n, int ovl, int nthread,
          sscanf(env, "%20d %20d %20d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
      }
  
-    if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
+    if (nsub[XX] * nsub[YY] * nsub[ZZ] != nthread)
      {
-        gmx_fatal(FARGS, "PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)", nsub[XX], nsub[YY], nsub[ZZ], nthread);
+        gmx_fatal(FARGS,
+                  "PME grid thread division (%d x %d x %d) does not match the total number of "
+                  "threads (%d)",
+                  nsub[XX], nsub[YY], nsub[ZZ], nthread);
      }
  }
  
-void pmegrids_init(pmegrids_t *grids,
-                   int nx, int ny, int nz, int nz_base,
-                   int pme_order,
-                   gmx_bool bUseThreads,
-                   int nthread,
-                   int overlap_x,
-                   int overlap_y)
+void pmegrids_init(pmegrids_t* grids,
+                   int         nx,
+                   int         ny,
+                   int         nz,
+                   int         nz_base,
+                   int         pme_order,
+                   gmx_bool    bUseThreads,
+                   int         nthread,
+                   int         overlap_x,
+                   int         overlap_y)
  {
      ivec n, n_base;
      int  t, x, y, z, d, i, tfac;
@@ -668,12 +655,11 @@ void pmegrids_init(pmegrids_t *grids,
      copy_ivec(n, n_base);
      n_base[ZZ] = nz_base;
  
-    pmegrid_init(&grids->grid, 0, 0, 0, 0, 0, 0, n[XX], n[YY], n[ZZ], FALSE, pme_order,
-                 nullptr);
+    pmegrid_init(&grids->grid, 0, 0, 0, 0, 0, 0, n[XX], n[YY], n[ZZ], FALSE, pme_order, nullptr);
  
      grids->nthread = nthread;
  
-    make_subgrid_division(n_base, pme_order-1, grids->nthread, grids->nc);
+    make_subgrid_division(n_base, pme_order - 1, grids->nthread, grids->nc);
  
      if (bUseThreads)
      {
@@ -688,19 +674,17 @@ void pmegrids_init(pmegrids_t *grids,
  
          if (debug)
          {
-            fprintf(debug, "pmegrid thread local division: %d x %d x %d\n",
-                    grids->nc[XX], grids->nc[YY], grids->nc[ZZ]);
-            fprintf(debug, "pmegrid %d %d %d max thread pmegrid %d %d %d\n",
-                    nx, ny, nz,
-                    nst[XX], nst[YY], nst[ZZ]);
+            fprintf(debug, "pmegrid thread local division: %d x %d x %d\n", grids->nc[XX],
+                    grids->nc[YY], grids->nc[ZZ]);
+            fprintf(debug, "pmegrid %d %d %d max thread pmegrid %d %d %d\n", nx, ny, nz, nst[XX],
+                    nst[YY], nst[ZZ]);
          }
  
          snew(grids->grid_th, grids->nthread);
          t        = 0;
-        gridsize = nst[XX]*nst[YY]*nst[ZZ];
+        gridsize = nst[XX] * nst[YY] * nst[ZZ];
          set_gridsize_alignment(&gridsize, pme_order);
-        snew_aligned(grids->grid_all,
-                     grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
+        snew_aligned(grids->grid_all, grids->nthread * gridsize + (grids->nthread + 1) * GMX_CACHE_SEP,
                       SIMD4_ALIGNMENT);
  
          for (x = 0; x < grids->nc[XX]; x++)
@@ -709,17 +693,11 @@ void pmegrids_init(pmegrids_t *grids,
              {
                  for (z = 0; z < grids->nc[ZZ]; z++)
                  {
-                    pmegrid_init(&grids->grid_th[t],
-                                 x, y, z,
-                                 (n[XX]*(x  ))/grids->nc[XX],
-                                 (n[YY]*(y  ))/grids->nc[YY],
-                                 (n[ZZ]*(z  ))/grids->nc[ZZ],
-                                 (n[XX]*(x+1))/grids->nc[XX],
-                                 (n[YY]*(y+1))/grids->nc[YY],
-                                 (n[ZZ]*(z+1))/grids->nc[ZZ],
-                                 TRUE,
-                                 pme_order,
-                                 grids->grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
+                    pmegrid_init(&grids->grid_th[t], x, y, z, (n[XX] * (x)) / grids->nc[XX],
+                                 (n[YY] * (y)) / grids->nc[YY], (n[ZZ] * (z)) / grids->nc[ZZ],
+                                 (n[XX] * (x + 1)) / grids->nc[XX], (n[YY] * (y + 1)) / grids->nc[YY],
+                                 (n[ZZ] * (z + 1)) / grids->nc[ZZ], TRUE, pme_order,
+                                 grids->grid_all + GMX_CACHE_SEP + t * (gridsize + GMX_CACHE_SEP));
                      t++;
                  }
              }
@@ -731,7 +709,7 @@ void pmegrids_init(pmegrids_t *grids,
      }
  
      tfac = 1;
-    for (d = DIM-1; d >= 0; d--)
+    for (d = DIM - 1; d >= 0; d--)
      {
          snew(grids->g2t[d], n[d]);
          t = 0;
@@ -740,43 +718,46 @@ void pmegrids_init(pmegrids_t *grids,
              /* The second check should match the parameters
               * of the pmegrid_init call above.
               */
-            while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
+            while (t + 1 < grids->nc[d] && i >= (n[d] * (t + 1)) / grids->nc[d])
              {
                  t++;
              }
-            grids->g2t[d][i] = t*tfac;
+            grids->g2t[d][i] = t * tfac;
          }
  
          tfac *= grids->nc[d];
  
          switch (d)
          {
-            case XX: max_comm_lines = overlap_x;     break;
-            case YY: max_comm_lines = overlap_y;     break;
+            case XX: max_comm_lines = overlap_x; break;
+            case YY: max_comm_lines = overlap_y; break;
              case ZZ: max_comm_lines = pme_order - 1; break;
          }
          grids->nthread_comm[d] = 0;
-        while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines &&
-               grids->nthread_comm[d] < grids->nc[d])
+        while ((n[d] * grids->nthread_comm[d]) / grids->nc[d] < max_comm_lines
+               && grids->nthread_comm[d] < grids->nc[d])
          {
              grids->nthread_comm[d]++;
          }
          if (debug != nullptr)
          {
-            fprintf(debug, "pmegrid thread grid communication range in %c: %d\n",
-                    'x'+d, grids->nthread_comm[d]);
+            fprintf(debug, "pmegrid thread grid communication range in %c: %d\n", 'x' + d,
+                    grids->nthread_comm[d]);
          }
          /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
           * work, but this is not a problematic restriction.
           */
          if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
          {
-            gmx_fatal(FARGS, "Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME", grids->nthread);
+            gmx_fatal(FARGS,
+                      "Too many threads for PME (%d) compared to the number of grid lines, reduce "
+                      "the number of threads doing PME",
+                      grids->nthread);
          }
      }
  }
  
-void pmegrids_destroy(pmegrids_t *grids)
+void pmegrids_destroy(pmegrids_t* grids)
  {
      if (grids->grid.grid != nullptr)
      {
@@ -794,10 +775,7 @@ void pmegrids_destroy(pmegrids_t *grids)
      }
  }
  
-void
-make_gridindex_to_localindex(int n, int local_start, int local_range,
-                             int **global_to_local,
-                             real **fraction_shift)
+void make_gridindex_to_localindex(int n, int local_start, int local_range, int** global_to_local, real** fraction_shift)
  {
      /* Here we construct array for looking up the grid line index and
       * fraction for particles. This is done because it is slighlty
@@ -807,8 +785,8 @@ make_gridindex_to_localindex(int n, int local_start, int local_range,
       * to allow for particles to be out of the triclinic unit-cell.
       */
      const int arraySize = c_pmeNeighborUnitcellCount * n;
-    int     * gtl;
-    real    * fsh;
+    int*      gtl;
+    real*     fsh;
  
      snew(gtl, arraySize);
      snew(fsh, arraySize);
@@ -855,7 +833,7 @@ make_gridindex_to_localindex(int n, int local_start, int local_range,
      *fraction_shift  = fsh;
  }
  
-void reuse_pmegrids(const pmegrids_t *oldgrid, pmegrids_t *newgrid)
+void reuse_pmegrids(const pmegrids_t* oldgrid, pmegrids_t* newgrid)
  {
      int d, t;