From: Berk Hess Date: Mon, 11 Aug 2014 12:26:22 +0000 (+0200) Subject: Corrected SIMD math overflow documentation X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=546f7c57431c6c5d0c88017243ad3240fbd65092;p=alexxy%2Fgromacs.git Corrected SIMD math overflow documentation Added more details to the SIMD pmecorr and exp documentation. Corrected the source of the PME-LJ kernel overflow. Change-Id: If3f5a27a3bb49ebb67fc24d43ed849e75175cf3c --- diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h index 99385ff3ca..c051d7e763 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h @@ -780,7 +780,7 @@ #endif #endif - /* Mask for the cut-off to avoid overflow in gmx_simd_exp_r */ + /* Mask for the cut-off to avoid overflow of cr2^2 */ cr2_S0 = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S0, wco_vdw_S0)); #ifndef HALF_LJ cr2_S2 = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S2, wco_vdw_S2)); diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h index c567589ed5..64467a121c 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h @@ -1014,7 +1014,7 @@ #endif #endif - /* Mask for the cut-off to avoid overflow in gmx_simd_exp_r */ + /* Mask for the cut-off to avoid overflow of cr2^2 */ cr2_S0 = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S0, wco_vdw_S0)); cr2_S1 = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S1, wco_vdw_S1)); #ifndef HALF_LJ diff --git a/src/gromacs/simd/simd_math.h b/src/gromacs/simd/simd_math.h index 5013ab595d..4c03a250bf 100644 --- a/src/gromacs/simd/simd_math.h +++ b/src/gromacs/simd/simd_math.h @@ -345,7 +345,8 @@ gmx_simd_exp2_f(gmx_simd_float_t x) * extended precision arithmetics to improve accuracy. * * \param x Argument. - * \result exp(x). Undefined if input argument caused overflow. + * \result exp(x). Undefined if input argument caused overflow, + * which can happen if abs(x) \> 7e13. */ static gmx_inline gmx_simd_float_t gmx_simd_exp_f(gmx_simd_float_t x) @@ -1125,9 +1126,9 @@ gmx_simd_atan2_f(gmx_simd_float_t y, gmx_simd_float_t x) * that we can leave out of this routine. * * For pme tolerances of 1e-3 to 1e-8 and cutoffs of 0.5nm to 1.8nm, - * the argument \f$beta r\f$ will be in the range 0.15 to ~4. Use your - * favorite plotting program to realize how well-behaved \f$\frac{\mbox{erf}(z)}{z}\f$ is - * in this range! + * the argument \f$beta r\f$ will be in the range 0.15 to ~4, which is + * the range used for the minimax fit. Use your favorite plotting program + * to realize how well-behaved \f$\frac{\mbox{erf}(z)}{z}\f$ is in this range! * * We approximate \f$f(z)=\mbox{erf}(z)/z\f$ with a rational minimax polynomial. * However, it turns out it is more efficient to approximate \f$f(z)/z\f$ and @@ -1171,8 +1172,11 @@ gmx_simd_atan2_f(gmx_simd_float_t y, gmx_simd_float_t x) * with the vector connecting the two particles and you have your * vectorial force to add to the particles. * - * This approximation achieves an accuracy slightly lower than 1e-6; when - * added to \f$1/r\f$ the error will be insignificant. + * This approximation achieves an error slightly lower than 1e-6 + * in single precision and 1e-11 in double precision + * for arguments smaller than 16 (\f$\beta r \leq 4 \f$); + * when added to \f$1/r\f$ the error will be insignificant. + * For \f$\beta r \geq 7206\f$ the return value can be inf or NaN. * */ static gmx_simd_float_t @@ -1248,8 +1252,12 @@ gmx_simd_pmecorrF_f(gmx_simd_float_t z2) * 6. Subtract the result from \f$1/r\f$, multiply by the product of the charges, * and you have your potential. * - * This approximation achieves an accuracy slightly lower than 1e-6; when - * added to \f$1/r\f$ the error will be insignificant. + * This approximation achieves an error slightly lower than 1e-6 + * in single precision and 4e-11 in double precision + * for arguments smaller than 16 (\f$ 0.15 \leq \beta r \leq 4 \f$); + * for \f$ \beta r \leq 0.15\f$ the error can be twice as high; + * when added to \f$1/r\f$ the error will be insignificant. + * For \f$\beta r \geq 7142\f$ the return value can be inf or NaN. */ static gmx_simd_float_t gmx_simd_pmecorrV_f(gmx_simd_float_t z2)