From: Berk Hess <hess@kth.se>
Date: Mon, 11 Aug 2014 12:26:22 +0000 (+0200)
Subject: Corrected SIMD math overflow documentation
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=546f7c57431c6c5d0c88017243ad3240fbd65092;p=alexxy%2Fgromacs.git

Corrected SIMD math overflow documentation

Added more details to the SIMD pmecorr and exp documentation.
Corrected the source of the PME-LJ kernel overflow.

Change-Id: If3f5a27a3bb49ebb67fc24d43ed849e75175cf3c
---

diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
index 99385ff3ca..c051d7e763 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
@@ -780,7 +780,7 @@
 #endif
 #endif
 
-        /* Mask for the cut-off to avoid overflow in gmx_simd_exp_r */
+        /* Mask for the cut-off to avoid overflow of cr2^2 */
         cr2_S0        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S0, wco_vdw_S0));
 #ifndef HALF_LJ
         cr2_S2        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S2, wco_vdw_S2));
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
index c567589ed5..64467a121c 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
@@ -1014,7 +1014,7 @@
 #endif
 #endif
 
-        /* Mask for the cut-off to avoid overflow in gmx_simd_exp_r */
+        /* Mask for the cut-off to avoid overflow of cr2^2 */
         cr2_S0        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S0, wco_vdw_S0));
         cr2_S1        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S1, wco_vdw_S1));
 #ifndef HALF_LJ
diff --git a/src/gromacs/simd/simd_math.h b/src/gromacs/simd/simd_math.h
index 5013ab595d..4c03a250bf 100644
--- a/src/gromacs/simd/simd_math.h
+++ b/src/gromacs/simd/simd_math.h
@@ -345,7 +345,8 @@ gmx_simd_exp2_f(gmx_simd_float_t x)
  * extended precision arithmetics to improve accuracy.
  *
  * \param x Argument.
- * \result exp(x). Undefined if input argument caused overflow.
+ * \result exp(x). Undefined if input argument caused overflow,
+ * which can happen if abs(x) \> 7e13.
  */
 static gmx_inline gmx_simd_float_t
 gmx_simd_exp_f(gmx_simd_float_t x)
@@ -1125,9 +1126,9 @@ gmx_simd_atan2_f(gmx_simd_float_t y, gmx_simd_float_t x)
  * that we can leave out of this routine.
  *
  * For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm,
- * the argument \f$beta r\f$ will be in the range 0.15 to ~4. Use your
- * favorite plotting program to realize how well-behaved \f$\frac{\mbox{erf}(z)}{z}\f$ is
- * in this range!
+ * the argument \f$beta r\f$ will be in the range 0.15 to ~4, which is
+ * the range used for the minimax fit. Use your favorite plotting program
+ * to realize how well-behaved \f$\frac{\mbox{erf}(z)}{z}\f$ is in this range!
  *
  * We approximate \f$f(z)=\mbox{erf}(z)/z\f$ with a rational minimax polynomial.
  * However, it turns out it is more efficient to approximate \f$f(z)/z\f$ and
@@ -1171,8 +1172,11 @@ gmx_simd_atan2_f(gmx_simd_float_t y, gmx_simd_float_t x)
  *    with the vector connecting the two particles and you have your
  *    vectorial force to add to the particles.
  *
- * This approximation achieves an accuracy slightly lower than 1e-6; when
- * added to \f$1/r\f$ the error will be insignificant.
+ * This approximation achieves an error slightly lower than 1e-6
+ * in single precision and 1e-11 in double precision
+ * for arguments smaller than 16 (\f$\beta r \leq 4 \f$);
+ * when added to \f$1/r\f$ the error will be insignificant.
+ * For \f$\beta r \geq 7206\f$ the return value can be inf or NaN.
  *
  */
 static gmx_simd_float_t
@@ -1248,8 +1252,12 @@ gmx_simd_pmecorrF_f(gmx_simd_float_t z2)
  * 6. Subtract the result from \f$1/r\f$, multiply by the product of the charges,
  *    and you have your potential.
  *
- * This approximation achieves an accuracy slightly lower than 1e-6; when
- * added to \f$1/r\f$ the error will be insignificant.
+ * This approximation achieves an error slightly lower than 1e-6
+ * in single precision and 4e-11 in double precision
+ * for arguments smaller than 16 (\f$ 0.15 \leq \beta r \leq 4 \f$);
+ * for \f$ \beta r \leq 0.15\f$ the error can be twice as high;
+ * when added to \f$1/r\f$ the error will be insignificant.
+ * For \f$\beta r \geq 7142\f$ the return value can be inf or NaN.
  */
 static gmx_simd_float_t
 gmx_simd_pmecorrV_f(gmx_simd_float_t z2)