Apply clang-format to source tree
[alexxy/gromacs.git] / src / gromacs / ewald / pme_gather.cpp
index d4cc6772ef36fe4e79f60c4c823f3f1b8a112a87..315cb3aefb12f3ac292dcc2535980df67c38d532 100644 (file)
@@ -76,81 +76,85 @@ using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  */
 struct do_fspline
 {
-    do_fspline (
-            const gmx_pme_t *                   pme,
-            const real * gmx_restrict           grid,
-            const PmeAtomComm * gmx_restrict    atc,
-            const splinedata_t * gmx_restrict   spline,
-            int                                 nn)
-        : pme(pme), grid(grid), atc(atc), spline(spline), nn(nn) {}
-
-    template <typename Int>
+    do_fspline(const gmx_pme_t* pme,
+               const real* gmx_restrict grid,
+               const PmeAtomComm* gmx_restrict atc,
+               const splinedata_t* gmx_restrict spline,
+               int                              nn) :
+        pme(pme),
+        grid(grid),
+        atc(atc),
+        spline(spline),
+        nn(nn)
+    {
+    }
+
+    template<typename Int>
     RVec operator()(Int order) const
     {
         static_assert(isIntegralConstant<Int, int>::value || std::is_same<Int, int>::value,
                       "'order' needs to be either of type integral_constant<int,N> or int.");
 
-        const int  norder = nn*order;
+        const int norder = nn * order;
 
         /* Pointer arithmetic alert, next six statements */
-        const real *const gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
-        const real *const gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
-        const real *const gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
-        const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
-        const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
-        const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
+        const realconst gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
+        const realconst gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
+        const realconst gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
+        const realconst gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+        const realconst gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+        const realconst gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
 
-        RVec                           f(0, 0, 0);
+        RVec f(0, 0, 0);
 
         for (int ithx = 0; (ithx < order); ithx++)
         {
-            const int  index_x = (idxX + ithx)*gridNY*gridNZ;
+            const int  index_x = (idxX + ithx) * gridNY * gridNZ;
             const real tx      = thx[ithx];
             const real dx      = dthx[ithx];
 
             for (int ithy = 0; (ithy < order); ithy++)
             {
-                const int  index_xy = index_x + (idxY + ithy)*gridNZ;
+                const int  index_xy = index_x + (idxY + ithy) * gridNZ;
                 const real ty       = thy[ithy];
                 const real dy       = dthy[ithy];
-                real       fxy1     = 0, fz1 = 0;
+                real       fxy1 = 0, fz1 = 0;
 
                 for (int ithz = 0; (ithz < order); ithz++)
                 {
                     const real gval = grid[index_xy + (idxZ + ithz)];
-                    fxy1 += thz[ithz]*gval;
-                    fz1  += dthz[ithz]*gval;
+                    fxy1 += thz[ithz] * gval;
+                    fz1 += dthz[ithz] * gval;
                 }
-                f[XX] += dx*ty*fxy1;
-                f[YY] += tx*dy*fxy1;
-                f[ZZ] += tx*ty*fz1;
+                f[XX] += dx * ty * fxy1;
+                f[YY] += tx * dy * fxy1;
+                f[ZZ] += tx * ty * fz1;
             }
         }
 
         return f;
     }
 
-//TODO: Consider always have at least a dummy implementation of Simd (enough for first phase of two-phase lookup) and then use enable_if instead of #ifdef
+// TODO: Consider always have at least a dummy implementation of Simd (enough for first phase of two-phase lookup) and then use enable_if instead of #ifdef
 #if PME_4NSIMD_GATHER
-/* Gather for one charge with pme_order=4 with unaligned SIMD4 load+store.
- * Uses 4N SIMD where N is SIMD_WIDTH/4 to operate on all of z and N of y.
- * This code does not assume any memory alignment for the grid.
- */
-    RVec
-    operator()(std::integral_constant<int, 4> /*unused*/) const
+    /* Gather for one charge with pme_order=4 with unaligned SIMD4 load+store.
+     * Uses 4N SIMD where N is SIMD_WIDTH/4 to operate on all of z and N of y.
+     * This code does not assume any memory alignment for the grid.
+     */
+    RVec operator()(std::integral_constant<int, 4> /*unused*/) const
     {
-        const int                      norder = nn*4;
+        const int norder = nn * 4;
         /* Pointer arithmetic alert, next six statements */
-        const real *const gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
-        const real *const gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
-        const real *const gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
-        const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
-        const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
-        const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
+        const realconst gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
+        const realconst gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
+        const realconst gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
+        const realconst gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+        const realconst gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+        const realconst gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
 
-        Simd4NReal                     fx_S = setZero();
-        Simd4NReal                     fy_S = setZero();
-        Simd4NReal                     fz_S = setZero();
+        Simd4NReal fx_S = setZero();
+        Simd4NReal fy_S = setZero();
+        Simd4NReal fz_S = setZero();
 
         /* With order 4 the z-spline is actually aligned */
         const Simd4NReal tz_S = load4DuplicateN(thz);
@@ -158,18 +162,18 @@ struct do_fspline
 
         for (int ithx = 0; ithx < 4; ithx++)
         {
-            const int        index_x = (idxX + ithx)*gridNY*gridNZ;
+            const int        index_x = (idxX + ithx) * gridNY * gridNZ;
             const Simd4NReal tx_S    = Simd4NReal(thx[ithx]);
             const Simd4NReal dx_S    = Simd4NReal(dthx[ithx]);
 
-            for (int ithy = 0; ithy < 4; ithy += GMX_SIMD4N_REAL_WIDTH/4)
+            for (int ithy = 0; ithy < 4; ithy += GMX_SIMD4N_REAL_WIDTH / 4)
             {
-                const int        index_xy = index_x + (idxY+ithy)*gridNZ;
+                const int index_xy = index_x + (idxY + ithy) * gridNZ;
 
-                const Simd4NReal ty_S = loadUNDuplicate4(thy +ithy);
-                const Simd4NReal dy_S = loadUNDuplicate4(dthy+ithy);
+                const Simd4NReal ty_S = loadUNDuplicate4(thy + ithy);
+                const Simd4NReal dy_S = loadUNDuplicate4(dthy + ithy);
 
-                const Simd4NReal gval_S = loadU4NOffset(grid+index_xy+idxZ, gridNZ);
+                const Simd4NReal gval_S = loadU4NOffset(grid + index_xy + idxZ, gridNZ);
 
 
                 const Simd4NReal fxy1_S = tz_S * gval_S;
@@ -181,63 +185,65 @@ struct do_fspline
             }
         }
 
-        return {
-                   reduce(fx_S), reduce(fy_S), reduce(fz_S)
-        };
+        return { reduce(fx_S), reduce(fy_S), reduce(fz_S) };
     }
 #endif
 
 #ifdef PME_SIMD4_SPREAD_GATHER
-/* Load order elements from unaligned memory into two 4-wide SIMD */
+    /* Load order elements from unaligned memory into two 4-wide SIMD */
     template<int order>
-    static inline void loadOrderU(const real* data, std::integral_constant<int, order> /*unused*/,
-                                  int offset, Simd4Real* S0, Simd4Real* S1)
+    static inline void loadOrderU(const real* data,
+                                  std::integral_constant<int, order> /*unused*/,
+                                  int        offset,
+                                  Simd4Real* S0,
+                                  Simd4Real* S1)
     {
-#ifdef PME_SIMD4_UNALIGNED
-        *S0 = load4U(data-offset);
-        *S1 = load4U(data-offset+4);
-#else
-        alignas(GMX_SIMD_ALIGNMENT) real  buf_aligned[GMX_SIMD4_WIDTH*2];
+#    ifdef PME_SIMD4_UNALIGNED
+        *S0 = load4U(data - offset);
+        *S1 = load4U(data - offset + 4);
+#    else
+        alignas(GMX_SIMD_ALIGNMENT) real buf_aligned[GMX_SIMD4_WIDTH * 2];
         /* Copy data to an aligned buffer */
         for (int i = 0; i < order; i++)
         {
-            buf_aligned[offset+i]  = data[i];
+            buf_aligned[offset + i] = data[i];
         }
         *S0 = load4(buf_aligned);
-        *S1 = load4(buf_aligned+4);
-#endif
+        *S1 = load4(buf_aligned + 4);
+#    endif
     }
 #endif
 
 #ifdef PME_SIMD4_SPREAD_GATHER
-/* This code assumes that the grid is allocated 4-real aligned
- * and that pme->pmegrid_nz is a multiple of 4.
- * This code supports pme_order <= 5.
- */
-    template <int Order>
-    std::enable_if_t<Order == 4 || Order == 5, RVec>
-    operator()(std::integral_constant<int, Order>  order) const
+    /* This code assumes that the grid is allocated 4-real aligned
+     * and that pme->pmegrid_nz is a multiple of 4.
+     * This code supports pme_order <= 5.
+     */
+    template<int Order>
+    std::enable_if_t<Order == 4 || Order == 5, RVec> operator()(std::integral_constant<int, Order> order) const
     {
-        const int                     norder = nn*order;
-        GMX_ASSERT(gridNZ % 4 == 0, "For aligned SIMD4 operations the grid size has to be padded up to a multiple of 4");
+        const int norder = nn * order;
+        GMX_ASSERT(gridNZ % 4 == 0,
+                   "For aligned SIMD4 operations the grid size has to be padded up to a multiple "
+                   "of 4");
         /* Pointer arithmetic alert, next six statements */
-        const real *const gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
-        const real *const gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
-        const real *const gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
-        const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
-        const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
-        const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
+        const realconst gmx_restrict thx  = spline->theta.coefficients[XX] + norder;
+        const realconst gmx_restrict thy  = spline->theta.coefficients[YY] + norder;
+        const realconst gmx_restrict thz  = spline->theta.coefficients[ZZ] + norder;
+        const realconst gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+        const realconst gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+        const realconst gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
 
-        struct pme_spline_work *const  work = pme->spline_work;
+        struct pme_spline_work* const work = pme->spline_work;
 
-        const int                      offset = idxZ & 3;
+        const int offset = idxZ & 3;
 
-        Simd4Real                      fx_S = setZero();
-        Simd4Real                      fy_S = setZero();
-        Simd4Real                      fz_S = setZero();
+        Simd4Real fx_S = setZero();
+        Simd4Real fy_S = setZero();
+        Simd4Real fz_S = setZero();
 
-        Simd4Real                      tz_S0, tz_S1, dz_S0, dz_S1;
-        loadOrderU(thz,  order, offset, &tz_S0, &tz_S1);
+        Simd4Real tz_S0, tz_S1, dz_S0, dz_S1;
+        loadOrderU(thz, order, offset, &tz_S0, &tz_S1);
         loadOrderU(dthz, order, offset, &dz_S0, &dz_S1);
 
         tz_S0 = selectByMask(tz_S0, work->mask_S0[offset]);
@@ -247,13 +253,13 @@ struct do_fspline
 
         for (int ithx = 0; (ithx < order); ithx++)
         {
-            const int       index_x  = (idxX + ithx)*gridNY*gridNZ;
-            const Simd4Real tx_S     = Simd4Real(thx[ithx]);
-            const Simd4Real dx_S     = Simd4Real(dthx[ithx]);
+            const int       index_x = (idxX + ithx) * gridNY * gridNZ;
+            const Simd4Real tx_S    = Simd4Real(thx[ithx]);
+            const Simd4Real dx_S    = Simd4Real(dthx[ithx]);
 
             for (int ithy = 0; (ithy < order); ithy++)
             {
-                const int       index_xy = index_x + (idxY + ithy)*gridNZ;
+                const int       index_xy = index_x + (idxY + ithy) * gridNZ;
                 const Simd4Real ty_S     = Simd4Real(thy[ithy]);
                 const Simd4Real dy_S     = Simd4Real(dthy[ithy]);
 
@@ -274,49 +280,49 @@ struct do_fspline
             }
         }
 
-        return {
-                   reduce(fx_S), reduce(fy_S), reduce(fz_S)
-        };
+        return { reduce(fx_S), reduce(fy_S), reduce(fz_S) };
     }
 #endif
-    private:
-        const gmx_pme_t *const                   pme;
-        const real *const gmx_restrict           grid;
-        const PmeAtomComm *const gmx_restrict    atc;
-        const splinedata_t *const gmx_restrict   spline;
-        const int                                nn;
-
-        const int                                gridNY = pme->pmegrid_ny;
-        const int                                gridNZ = pme->pmegrid_nz;
-
-        const int *const                         idxptr = atc->idx[spline->ind[nn]];
-        const int                                idxX   = idxptr[XX];
-        const int                                idxY   = idxptr[YY];
-        const int                                idxZ   = idxptr[ZZ];
+private:
+    const gmx_pme_t* const pme;
+    const real* const gmx_restrict grid;
+    const PmeAtomComm* const gmx_restrict atc;
+    const splinedata_t* const gmx_restrict spline;
+    const int                              nn;
+
+    const int gridNY = pme->pmegrid_ny;
+    const int gridNZ = pme->pmegrid_nz;
+
+    const int* const idxptr = atc->idx[spline->ind[nn]];
+    const int        idxX   = idxptr[XX];
+    const int        idxY   = idxptr[YY];
+    const int        idxZ   = idxptr[ZZ];
 };
 
 
-void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
-                       gmx_bool bClearF, const PmeAtomComm *atc,
-                       const splinedata_t *spline,
-                       real scale)
+void gather_f_bsplines(const gmx_pme_t*    pme,
+                       const real*         grid,
+                       gmx_bool            bClearF,
+                       const PmeAtomComm*  atc,
+                       const splinedata_t* spline,
+                       real                scale)
 {
     /* sum forces for local particles */
 
-    const int  order = pme->pme_order;
-    const int  nx    = pme->nkx;
-    const int  ny    = pme->nky;
-    const int  nz    = pme->nkz;
+    const int order = pme->pme_order;
+    const int nx    = pme->nkx;
+    const int ny    = pme->nky;
+    const int nz    = pme->nkz;
 
-    const real rxx   = pme->recipbox[XX][XX];
-    const real ryx   = pme->recipbox[YY][XX];
-    const real ryy   = pme->recipbox[YY][YY];
-    const real rzx   = pme->recipbox[ZZ][XX];
-    const real rzy   = pme->recipbox[ZZ][YY];
-    const real rzz   = pme->recipbox[ZZ][ZZ];
+    const real rxx = pme->recipbox[XX][XX];
+    const real ryx = pme->recipbox[YY][XX];
+    const real ryy = pme->recipbox[YY][YY];
+    const real rzx = pme->recipbox[ZZ][XX];
+    const real rzy = pme->recipbox[ZZ][YY];
+    const real rzz = pme->recipbox[ZZ][ZZ];
 
     /* Extract the buffer for force output */
-    rvec * gmx_restrict force = as_rvec_array(atc->f.data());
+    rvec* gmx_restrict force = as_rvec_array(atc->f.data());
 
     /* Note that unrolling this loop by templating this function on order
      * deteriorates performance significantly with gcc5/6/7.
@@ -324,7 +330,7 @@ void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
     for (int nn = 0; nn < spline->n; nn++)
     {
         const int  n           = spline->ind[nn];
-        const real coefficient = scale*atc->coefficient[n];
+        const real coefficient = scale * atc->coefficient[n];
 
         if (bClearF)
         {
@@ -339,20 +345,14 @@ void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
 
             switch (order)
             {
-                case 4:
-                    f = spline_func(std::integral_constant<int, 4>());
-                    break;
-                case 5:
-                    f = spline_func(std::integral_constant<int, 5>());
-                    break;
-                default:
-                    f = spline_func(order);
-                    break;
+                case 4: f = spline_func(std::integral_constant<int, 4>()); break;
+                case 5: f = spline_func(std::integral_constant<int, 5>()); break;
+                default: f = spline_func(order); break;
             }
 
-            force[n][XX] += -coefficient*( f[XX]*nx*rxx );
-            force[n][YY] += -coefficient*( f[XX]*nx*ryx + f[YY]*ny*ryy );
-            force[n][ZZ] += -coefficient*( f[XX]*nx*rzx + f[YY]*ny*rzy + f[ZZ]*nz*rzz );
+            force[n][XX] += -coefficient * (f[XX] * nx * rxx);
+            force[n][YY] += -coefficient * (f[XX] * nx * ryx + f[YY] * ny * ryy);
+            force[n][ZZ] += -coefficient * (f[XX] * nx * rzx + f[YY] * ny * rzy + f[ZZ] * nz * rzz);
         }
     }
     /* Since the energy and not forces are interpolated
@@ -367,15 +367,14 @@ void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
 }
 
 
-real gather_energy_bsplines(gmx_pme_t *pme, const real *grid,
-                            PmeAtomComm *atc)
+real gather_energy_bsplines(gmx_pme_t* pme, const real* grid, PmeAtomComm* atc)
 {
-    splinedata_t *spline;
+    splinedata_tspline;
     int           ithx, ithy, ithz, i0, j0, k0;
     int           index_x, index_xy;
-    int          *idxptr;
+    int*          idxptr;
     real          energy, pot, tx, ty, coefficient, gval;
-    real         *thx, *thy, *thz;
+    real *        thx, *thy, *thz;
     int           norder;
     int           order;
 
@@ -386,43 +385,42 @@ real gather_energy_bsplines(gmx_pme_t *pme, const real *grid,
     energy = 0;
     for (int n = 0; n < atc->numAtoms(); n++)
     {
-        coefficient      = atc->coefficient[n];
+        coefficient = atc->coefficient[n];
 
         if (coefficient != 0)
         {
             idxptr = atc->idx[n];
-            norder = n*order;
+            norder = n * order;
 
-            i0   = idxptr[XX];
-            j0   = idxptr[YY];
-            k0   = idxptr[ZZ];
+            i0 = idxptr[XX];
+            j0 = idxptr[YY];
+            k0 = idxptr[ZZ];
 
             /* Pointer arithmetic alert, next three statements */
-            thx  = spline->theta.coefficients[XX] + norder;
-            thy  = spline->theta.coefficients[YY] + norder;
-            thz  = spline->theta.coefficients[ZZ] + norder;
+            thx = spline->theta.coefficients[XX] + norder;
+            thy = spline->theta.coefficients[YY] + norder;
+            thz = spline->theta.coefficients[ZZ] + norder;
 
             pot = 0;
             for (ithx = 0; (ithx < order); ithx++)
             {
-                index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
+                index_x = (i0 + ithx) * pme->pmegrid_ny * pme->pmegrid_nz;
                 tx      = thx[ithx];
 
                 for (ithy = 0; (ithy < order); ithy++)
                 {
-                    index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
+                    index_xy = index_x + (j0 + ithy) * pme->pmegrid_nz;
                     ty       = thy[ithy];
 
                     for (ithz = 0; (ithz < order); ithz++)
                     {
-                        gval  = grid[index_xy+(k0+ithz)];
-                        pot  += tx*ty*thz[ithz]*gval;
+                        gval = grid[index_xy + (k0 + ithz)];
+                        pot += tx * ty * thz[ithz] * gval;
                     }
-
                 }
             }
 
-            energy += pot*coefficient;
+            energy += pot * coefficient;
         }
     }