Fix frexp() for ARM SVE SIMD
authorGilles Gouaillardet <gilles@rist.or.jp>
Wed, 25 Nov 2020 07:06:46 +0000 (16:06 +0900)
committerGilles Gouaillardet <gilles@rist.or.jp>
Wed, 25 Nov 2020 07:17:20 +0000 (16:17 +0900)
src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h
src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_float.h

index 04cf849e46c882504e3cd7107be4e846963d31a2..c650ad276e34aa9b16ddb8779c009894cd7630b7 100644 (file)
@@ -387,15 +387,23 @@ static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* expone
     svint64_t         iExponent;
 
     iExponent = svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), exponentMask);
-    // iExponent               = svsub_s64_x(pg, svlsr_n_s64_x(pg, iExponent, 52), exponentBias);
     iExponent = svsub_s64_x(
             pg, svreinterpret_s64_u64(svlsr_n_u64_x(pg, svreinterpret_u64_s64(iExponent), 52)), exponentBias);
 
-    exponent->simdInternal_ = iExponent;
 
-    return { svreinterpret_f64_s64(svorr_s64_x(
+    svfloat64_t result = svreinterpret_f64_s64(svorr_s64_x(
             pg, svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), mantissaMask),
-            svreinterpret_s64_f64(half))) };
+            svreinterpret_s64_f64(half)));
+
+    if (opt == MathOptimization::Safe)
+    {
+        svbool_t valueIsZero = svcmpeq_n_f64(pg, value.simdInternal_, 0.0);
+        iExponent            = svsel_s64(valueIsZero, svdup_s64(0), iExponent);
+        result               = svsel_f64(valueIsZero, value.simdInternal_, result);
+    }
+
+    exponent->simdInternal_ = iExponent;
+    return { result };
 }
 
 template<MathOptimization opt = MathOptimization::Safe>
index ffa53071d2336c8ac5407717be77ef734f4e4ccd..1171124f1924536ee7887042bc84d506f5dac37e 100644 (file)
@@ -392,11 +392,21 @@ static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent
     iExponent = svand_s32_x(pg, svreinterpret_s32_f32(value.simdInternal_), exponentMask);
     iExponent = svsub_s32_x(
             pg, svreinterpret_s32_u32(svlsr_n_u32_x(pg, svreinterpret_u32_s32(iExponent), 23)), exponentBias);
-    exponent->simdInternal_ = iExponent;
 
-    return { svreinterpret_f32_s32(svorr_s32_x(
+
+    svfloat32_t result = svreinterpret_f32_s32(svorr_s32_x(
             pg, svand_s32_x(pg, svreinterpret_s32_f32(value.simdInternal_), mantissaMask),
-            svreinterpret_s32_f32(half))) };
+            svreinterpret_s32_f32(half)));
+
+    if (opt == MathOptimization::Safe)
+    {
+        svbool_t valueIsZero = svcmpeq_n_f32(pg, value.simdInternal_, 0.0F);
+        iExponent            = svsel_s32(valueIsZero, svdup_s32(0), iExponent);
+        result               = svsel_f32(valueIsZero, value.simdInternal_, result);
+    }
+
+    exponent->simdInternal_ = iExponent;
+    return { result };
 }
 
 template<MathOptimization opt = MathOptimization::Safe>