Fix exception in SIMD LJ PME solve
authorBerk Hess <hess@kth.se>
Fri, 8 Sep 2017 13:35:23 +0000 (15:35 +0200)
committerAleksei Iupinov <a.yupinov@gmail.com>
Tue, 12 Sep 2017 09:59:35 +0000 (11:59 +0200)
Clear SIMD padding elements in solve helper arrays to avoid,
otherwise harmles, fp overflow exceptions.

Fixes #2242

Change-Id: I97e67c4fcc2ef361f54d1627fd0dab4621f4bd33

src/gromacs/ewald/pme-solve.cpp

index a7599b029e36be538079db4bfe1c43e6b94d670f..f35730d075cefeda83b4e4ed3f3b65d353b41590 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -80,40 +80,54 @@ struct pme_solve_work_t
     matrix   vir_lj;
 };
 
+#ifdef PME_SIMD_SOLVE
+constexpr int c_simdWidth = GMX_SIMD_REAL_WIDTH;
+#else
+/* We can use any alignment > 0, so we use 4 */
+constexpr int c_simdWidth = 4;
+#endif
+
+/* Returns the smallest number >= \p that is a multiple of \p factor, \p factor must be a power of 2 */
+template <unsigned int factor>
+static size_t roundUpToMultipleOfFactor(size_t number)
+{
+    static_assert(factor > 0 && (factor & (factor - 1)) == 0, "factor should be >0 and a power of 2");
+
+    /* We need to add a most factor-1 and because factor is a power of 2,
+     * we get the result by masking out the bits corresponding to factor-1.
+     */
+    return (number + factor - 1) & ~(factor - 1);
+}
+
+/* Allocate an aligned pointer for SIMD operations, including extra elements
+ * at the end for padding.
+ */
+/* TODO: Replace this SIMD reallocator with a general, C++ solution */
+static void reallocSimdAlignedAndPadded(real **ptr, int unpaddedNumElements)
+{
+    sfree_aligned(*ptr);
+    snew_aligned(*ptr, roundUpToMultipleOfFactor<c_simdWidth>(unpaddedNumElements), c_simdWidth*sizeof(real));
+}
+
 static void realloc_work(struct pme_solve_work_t *work, int nkx)
 {
     if (nkx > work->nalloc)
     {
-        int simd_width, i;
-
         work->nalloc = nkx;
         srenew(work->mhx, work->nalloc);
         srenew(work->mhy, work->nalloc);
         srenew(work->mhz, work->nalloc);
         srenew(work->m2, work->nalloc);
-        /* Allocate an aligned pointer for SIMD operations, including extra
-         * elements at the end for padding.
-         */
-#ifdef PME_SIMD_SOLVE
-        simd_width = GMX_SIMD_REAL_WIDTH;
-#else
-        /* We can use any alignment, apart from 0, so we use 4 */
-        simd_width = 4;
-#endif
-        sfree_aligned(work->denom);
-        sfree_aligned(work->tmp1);
-        sfree_aligned(work->tmp2);
-        sfree_aligned(work->eterm);
-        snew_aligned(work->denom, work->nalloc+simd_width, simd_width*sizeof(real));
-        snew_aligned(work->tmp1,  work->nalloc+simd_width, simd_width*sizeof(real));
-        snew_aligned(work->tmp2,  work->nalloc+simd_width, simd_width*sizeof(real));
-        snew_aligned(work->eterm, work->nalloc+simd_width, simd_width*sizeof(real));
+        reallocSimdAlignedAndPadded(&work->denom, work->nalloc);
+        reallocSimdAlignedAndPadded(&work->tmp1, work->nalloc);
+        reallocSimdAlignedAndPadded(&work->tmp2, work->nalloc);
+        reallocSimdAlignedAndPadded(&work->eterm, work->nalloc);
         srenew(work->m2inv, work->nalloc);
 
         /* Init all allocated elements of denom to 1 to avoid 1/0 exceptions
          * of simd padded elements.
          */
-        for (i = 0; i < work->nalloc+simd_width; i++)
+        for (size_t i = 0; i < roundUpToMultipleOfFactor<c_simdWidth>(work->nalloc ); i++)
         {
             work->denom[i] = 1;
         }
@@ -668,6 +682,13 @@ int solve_pme_lj_yzx(struct gmx_pme_t *pme, t_complex **grid, gmx_bool bLB,
                 tmp1[kx]  = -factor*m2k;
                 tmp2[kx]  = sqrt(factor*m2k);
             }
+            /* Clear padding elements to avoid (harmless) fp exceptions */
+            const int kxendSimd = roundUpToMultipleOfFactor<c_simdWidth>(kxend);
+            for (; kx < kxendSimd; kx++)
+            {
+                tmp1[kx] = 0;
+                tmp2[kx] = 0;
+            }
 
             calc_exponentials_lj(kxstart, kxend, tmp1, tmp2, denom);
 
@@ -801,6 +822,13 @@ int solve_pme_lj_yzx(struct gmx_pme_t *pme, t_complex **grid, gmx_bool bLB,
                 tmp1[kx]  = -factor*m2k;
                 tmp2[kx]  = sqrt(factor*m2k);
             }
+            /* Clear padding elements to avoid (harmless) fp exceptions */
+            const int kxendSimd = roundUpToMultipleOfFactor<c_simdWidth>(kxend);
+            for (; kx < kxendSimd; kx++)
+            {
+                tmp1[kx] = 0;
+                tmp2[kx] = 0;
+            }
 
             calc_exponentials_lj(kxstart, kxend, tmp1, tmp2, denom);