[builtins] Make __div[sdt]f3 handle denormal results

author Anatoly Trosinenko <atrosinenko@accesssoftek.com>

Thu, 27 Aug 2020 15:33:56 +0000 (18:33 +0300)

committer Anatoly Trosinenko <atrosinenko@accesssoftek.com>

Tue, 1 Sep 2020 18:52:34 +0000 (21:52 +0300)
author Anatoly Trosinenko <atrosinenko@accesssoftek.com>
Thu, 27 Aug 2020 15:33:56 +0000 (18:33 +0300)
committer Anatoly Trosinenko <atrosinenko@accesssoftek.com>
Tue, 1 Sep 2020 18:52:34 +0000 (21:52 +0300)
diff --git a/compiler-rt/lib/builtins/fp_div_impl.inc b/compiler-rt/lib/builtins/fp_div_impl.inc

index 143c1f4..29bcd19 100644 (file)
--- a/compiler-rt/lib/builtins/fp_div_impl.inc
+++ b/compiler-rt/lib/builtins/fp_div_impl.inc
@@ -348,12 +348,17 @@ static __inline fp_t __divXf3__(fp_t a, fp_t b) {
      // effectively doubling its value as well as its error estimation.
      residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand;
      writtenExponent -= 1;
+    aSignificand <<= 1;
    } else {
      // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
      // to UQ1.SB by right shifting by 1. Least significant bit is omitted.
      quotient_UQ1 >>= 1;
      residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand;
    }
+  // NB: residualLo is calculated above for the normal result case.
+  //     It is re-computed on denormal path that is expected to be not so
+  //     performance-sensitive.
+
    // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
    // Each NextAfter() increments the floating point value by at least 2^-SB
    // (more, if exponent was incremented).
@@ -381,19 +386,25 @@ static __inline fp_t __divXf3__(fp_t a, fp_t b) {
    // Now, quotient_UQ1_SB <= the correctly-rounded result
    // and may need taking NextAfter() up to 3 times (see error estimates above)
    // r = a - b * q
+  rep_t absResult;
+  if (writtenExponent > 0) {
+    // Clear the implicit bit
+    absResult = quotient_UQ1 & significandMask;
+    // Insert the exponent
+    absResult |= (rep_t)writtenExponent << significandBits;
+    residualLo <<= 1;
+  } else {
+    // Prevent shift amount from being negative
+    if (significandBits + writtenExponent < 0)
+      return fromRep(quotientSign);
  
-  if (writtenExponent < 0) {
-    // Result is definitely subnormal, flushing to zero
-    return fromRep(quotientSign);
-  }
+    absResult = quotient_UQ1 >> (-writtenExponent + 1);
  
-  // Clear the implicit bit
-  rep_t absResult = quotient_UQ1 & significandMask;
-  // Insert the exponent
-  absResult |= (rep_t)writtenExponent << significandBits;
+    // multiplied by two to prevent shift amount to be negative
+    residualLo = (aSignificand << (significandBits + writtenExponent)) - (absResult * bSignificand << 1);
+  }
  
    // Round
-  residualLo <<= 1;
    residualLo += absResult & 1; // tie to even
    // The above line conditionally turns the below LT comparison into LTE
    absResult += residualLo > bSignificand;
@@ -404,11 +415,5 @@ static __inline fp_t __divXf3__(fp_t a, fp_t b) {
  #if defined(QUAD_PRECISION)
    absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand;
  #endif
-
-  if ((absResult & ~significandMask) == 0) {
-    // Result is subnormal, flushing to zero
-    return fromRep(quotientSign);
-  }
-  // Result is normal, insert the sign and return
    return fromRep(absResult | quotientSign);
  }
diff --git a/compiler-rt/test/builtins/Unit/divdf3_test.c b/compiler-rt/test/builtins/Unit/divdf3_test.c

index 312602b..1b8f2b3 100644 (file)
--- a/compiler-rt/test/builtins/Unit/divdf3_test.c
+++ b/compiler-rt/test/builtins/Unit/divdf3_test.c
@@ -92,6 +92,13 @@ int main()
      if (test__divdf3(0x1.0p+0, 0x1.00000001p+0, UINT64_C(0x3fefffffffe00000)))
        return 1;
  
+    // smallest normal value divided by 2.0
+    if (test__divdf3(0x1.0p-1022, 2., UINT64_C(0x0008000000000000)))
+      return 1;
+    // smallest subnormal result
+    if (test__divdf3(0x1.0p-1022, 0x1.0p+52, UINT64_C(0x0000000000000001)))
+      return 1;
+
      // some misc test cases obtained by fuzzing against h/w implementation
      if (test__divdf3(0x1.fdc239dd64735p-658, -0x1.fff9364c0843fp-948, UINT64_C(0xd20fdc8fc0ceffb1)))
        return 1;
@@ -99,6 +106,12 @@ int main()
        return 1;
      if (test__divdf3(-0x1.da7dfe6048b8bp-875, 0x1.ffc7ea3ff60a4p-610, UINT64_C(0xaf5dab1fe0269e2a)))
        return 1;
+    if (test__divdf3(0x1.0p-1022, 0x1.9p+5, UINT64_C(0x000051eb851eb852)))
+      return 1;
+    if (test__divdf3(0x1.0p-1022, 0x1.0028p+41, UINT64_C(0x00000000000007ff)))
+      return 1;
+    if (test__divdf3(0x1.0p-1022, 0x1.0028p+52, UINT64_C(0x1)))
+      return 1;
  
      return 0;
  }
diff --git a/compiler-rt/test/builtins/Unit/divsf3_test.c b/compiler-rt/test/builtins/Unit/divsf3_test.c

index 197aad7..7a783cd 100644 (file)
--- a/compiler-rt/test/builtins/Unit/divsf3_test.c
+++ b/compiler-rt/test/builtins/Unit/divsf3_test.c
@@ -92,5 +92,20 @@ int main()
      if (test__divsf3(0x1.0p+0F, 0x1.0001p+0F, UINT32_C(0x3f7fff00)))
        return 1;
  
+    // smallest normal value divided by 2.0
+    if (test__divsf3(0x1.0p-126F, 2.0F, UINT32_C(0x00400000)))
+      return 1;
+    // smallest subnormal result
+    if (test__divsf3(0x1.0p-126F, 0x1p+23F, UINT32_C(0x00000001)))
+      return 1;
+
+    // some misc test cases obtained by fuzzing against h/w implementation
+    if (test__divsf3(-0x1.3e75e6p-108F, -0x1.cf372p+38F, UINT32_C(0x00000006)))
+      return 1;
+    if (test__divsf3(0x1.e77c54p+81F, -0x1.e77c52p-47F, UINT32_C(0xff800000)))
+      return 1;
+    if (test__divsf3(0x1.fffffep-126F, 2.F, UINT32_C(0x00800000)))
+      return 1;
+
      return 0;
  }
diff --git a/compiler-rt/test/builtins/Unit/divtf3_test.c b/compiler-rt/test/builtins/Unit/divtf3_test.c

index b0bba02..f77f4cf 100644 (file)
--- a/compiler-rt/test/builtins/Unit/divtf3_test.c
+++ b/compiler-rt/test/builtins/Unit/divtf3_test.c
@@ -146,6 +146,13 @@ int main()
                       UINT64_C(0xfffe000000000000)))
          return 1;
  
+    // smallest normal value divided by 2.0
+    if (test__divtf3(0x1.0p-16382L, 2.L, UINT64_C(0x0000800000000000), UINT64_C(0x0)))
+      return 1;
+    // smallest subnormal result
+    if (test__divtf3(0x1.0p-1022L, 0x1p+52L, UINT64_C(0x0), UINT64_C(0x1)))
+      return 1;
+
      // any / any
      if (test__divtf3(0x1.a23b45362464523375893ab4cdefp+5L,
                       0x1.eedcbaba3a94546558237654321fp-1L,
author	Anatoly Trosinenko <atrosinenko@accesssoftek.com>
	Thu, 27 Aug 2020 15:33:56 +0000 (18:33 +0300)
committer	Anatoly Trosinenko <atrosinenko@accesssoftek.com>
	Tue, 1 Sep 2020 18:52:34 +0000 (21:52 +0300)
compiler-rt/lib/builtins/fp_div_impl.inc		patch \| blob \| history
compiler-rt/test/builtins/Unit/divdf3_test.c		patch \| blob \| history
compiler-rt/test/builtins/Unit/divsf3_test.c		patch \| blob \| history
compiler-rt/test/builtins/Unit/divtf3_test.c		patch \| blob \| history