Added implementation of softdouble rounding to int64_t
authorVitaly Tuzov <terfendail@mediana.jetos.com>
Fri, 8 Dec 2017 12:38:22 +0000 (15:38 +0300)
committerVitaly Tuzov <terfendail@mediana.jetos.com>
Mon, 11 Dec 2017 11:29:32 +0000 (14:29 +0300)
modules/core/include/opencv2/core/softfloat.hpp
modules/core/src/softfloat.cpp
modules/core/test/test_math.cpp

index d5c77f9..c26fc47 100644 (file)
@@ -389,6 +389,9 @@ CV_EXPORTS int cvTrunc(const cv::softdouble& a);
 CV_EXPORTS int cvRound(const cv::softfloat&  a);
 CV_EXPORTS int cvRound(const cv::softdouble& a);
 
+/** @brief Rounds a number to nearest even long long integer */
+CV_EXPORTS int64_t cvRound64(const cv::softdouble& a);
+
 /** @brief Rounds a number down to integer */
 CV_EXPORTS int cvFloor(const cv::softfloat&  a);
 CV_EXPORTS int cvFloor(const cv::softdouble& a);
@@ -418,12 +421,18 @@ template<> inline short saturate_cast<short>(softdouble a) { return (short)std::
 template<> inline int saturate_cast<int>(softfloat  a) { return cvRound(a); }
 template<> inline int saturate_cast<int>(softdouble a) { return cvRound(a); }
 
-/** @brief Saturate cast to unsigned integer
+template<> inline int64_t saturate_cast<int64_t>(softfloat  a) { return cvRound(a); }
+template<> inline int64_t saturate_cast<int64_t>(softdouble a) { return cvRound64(a); }
+
+/** @brief Saturate cast to unsigned integer and unsigned long long integer
 We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
 */
 template<> inline unsigned saturate_cast<unsigned>(softfloat  a) { return cvRound(a); }
 template<> inline unsigned saturate_cast<unsigned>(softdouble a) { return cvRound(a); }
 
+template<> inline uint64_t saturate_cast<uint64_t>(softfloat  a) { return cvRound(a); }
+template<> inline uint64_t saturate_cast<uint64_t>(softdouble a) { return cvRound64(a); }
+
 /** @brief Min and Max functions */
 inline softfloat  min(const softfloat&  a, const softfloat&  b) { return (a > b) ? b : a; }
 inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; }
index 4532a9f..421af31 100644 (file)
@@ -183,6 +183,7 @@ static bool f32_lt( float32_t, float32_t );
 | 64-bit (double-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
 static int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
+static int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
 static int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
 static float32_t f64_to_f32( float64_t );
 static float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
@@ -258,6 +259,8 @@ int cvRound(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_near_e
 int cvFloor(const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_min, false); }
 int cvCeil (const cv::softdouble& a) { return cv::f64_to_i32(a, cv::round_max, false); }
 
+int64_t cvRound64(const cv::softdouble& a) { return cv::f64_to_i64(a, cv::round_near_even, false); }
+
 namespace cv
 {
 softdouble::operator softfloat() const { return f64_to_f32(*this); }
@@ -468,6 +471,7 @@ static float32_t softfloat_mulAddF32(uint_fast32_t, uint_fast32_t, uint_fast32_t
 
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
+static int_fast64_t softfloat_roundToI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool);
 
 struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
 static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
@@ -2026,6 +2030,59 @@ static int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exa
     return softfloat_roundToI32( sign, sig, roundingMode, exact );
 }
 
+static int_fast64_t f64_to_i64(float64_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast64_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast64_t sig;
+    int_fast16_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA = a.v;
+    sign = signF64UI(uiA);
+    exp = expF64UI(uiA);
+    sig = fracF64UI(uiA);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+#if (i64_fromNaN != i64_fromPosOverflow) || (i64_fromNaN != i64_fromNegOverflow)
+    if ((exp == 0x7FF) && sig) {
+#if (i64_fromNaN == i64_fromPosOverflow)
+        sign = 0;
+#elif (i64_fromNaN == i64_fromNegOverflow)
+        sign = 1;
+#else
+        raiseFlags(flag_invalid);
+        return i64_fromNaN;
+#endif
+    }
+#endif
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if (exp) sig |= UINT64_C(0x0010000000000000);
+    shiftDist = 0x433 - exp;
+    if (shiftDist <= 0) {
+        uint_fast64_t z = sig << -shiftDist;
+        if ((shiftDist < -11) || (z & UINT64_C(0x8000000000000000)))
+        {
+            raiseFlags(flag_invalid);
+            return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+        }
+        return sign ? -(int_fast64_t)z : (int_fast64_t)z;
+    }
+    else {
+        if (shiftDist < 64)
+            return
+                softfloat_roundToI64(
+                    sign, sig >> shiftDist, sig << (-shiftDist & 63), roundingMode, exact);
+        else
+            return
+                softfloat_roundToI64(
+                    sign, 0, (shiftDist == 64) ? sig : (sig != 0), roundingMode, exact);
+    }
+}
+
 static int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
 {
     uint_fast64_t uiA;
@@ -3076,6 +3133,46 @@ static int_fast32_t
     return sign ? i32_fromNegOverflow : i32_fromPosOverflow;
 }
 
+static int_fast64_t
+ softfloat_roundToI64(
+    bool sign, uint_fast64_t sig, uint_fast64_t sigExtra, uint_fast8_t roundingMode, bool exact )
+{
+    bool roundNearEven, doIncrement;
+    union { uint64_t ui; int64_t i; } uZ;
+    int_fast64_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundNearEven = (roundingMode == round_near_even);
+    doIncrement = (UINT64_C(0x8000000000000000) <= sigExtra);
+    if (!roundNearEven && (roundingMode != round_near_maxMag)) {
+        doIncrement =
+            (roundingMode
+                == (sign ? round_min : round_max))
+            && sigExtra;
+    }
+    if (doIncrement) {
+        ++sig;
+        if (!sig) goto invalid;
+        sig &=
+            ~(uint_fast64_t)
+            (!(sigExtra & UINT64_C(0x7FFFFFFFFFFFFFFF))
+                & roundNearEven);
+    }
+    uZ.ui = sign ? (~sig + 1) : sig;
+    z = uZ.i;
+    if (z && ((z < 0) ^ sign)) goto invalid;
+    if (exact && sigExtra) {
+        raiseFlags(flag_inexact);
+    }
+    return z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+invalid:
+    raiseFlags(flag_invalid);
+    return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+}
+
 static struct uint128
  softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
 {
index 411f8b6..0facbc3 100644 (file)
@@ -3736,4 +3736,130 @@ TEST(Core_SoftFloat, sincos64)
     }
 }
 
+TEST(Core_SoftFloat, CvRound)
+{
+    struct
+    {
+        uint64_t inVal;
+        int64_t out64;
+        int32_t out32;
+    } _values[] =
+    {
+        { 0x0123456789abcdefU,                     0,             0 }, // 3.51270056408850369812238561681E-303
+        { 0x0000000000000000U,                     0,             0 }, // 0
+        { 0x8000000000000000U,                     0,             0 }, // -0
+        { 0x000123456789abcdU,                     0,             0 }, // 1.5822747438273385725152200433E-309
+        { 0x800123456789abcdU,                     0,             0 }, // -1.5822747438273385725152200433E-309
+        { 0x7ff0000000000000U,             INT64_MAX,     INT32_MAX }, // +inf
+        { 0xfff0000000000000U,             INT64_MIN,     INT32_MIN }, // -inf
+        { 0x7ff0000000000001U,             INT64_MAX,     INT32_MAX }, // nan(casts to maximum value)
+        { 0xfff0000000000001U,             INT64_MAX,     INT32_MAX }, // nan(casts to maximum value)
+        { 0x7ffa5a5a5a5a5a5aU,             INT64_MAX,     INT32_MAX }, // nan(casts to maximum value)
+        { 0xfffa5a5a5a5a5a5aU,             INT64_MAX,     INT32_MAX }, // nan(casts to maximum value)
+        { 0x7fe123456789abcdU,             INT64_MAX,     INT32_MAX }, // 9.627645455595956656406699747E307
+        { 0xffe123456789abcdU,             INT64_MIN,     INT32_MIN }, // -9.627645455595956656406699747E307
+        { 0x43ffffffffffffffU,             INT64_MAX,     INT32_MAX }, // (2^53-1)*2^12
+        { 0xc3ffffffffffffffU,             INT64_MIN,     INT32_MIN }, // -(2^53-1)*2^12
+        { 0x43f0000000000000U,             INT64_MAX,     INT32_MAX }, // 2^64
+        { 0xc3f0000000000000U,             INT64_MIN,     INT32_MIN }, // -2^64
+        { 0x43efffffffffffffU,             INT64_MAX,     INT32_MAX }, // (2^53-1)*2^11
+        { 0xc3efffffffffffffU,             INT64_MIN,     INT32_MIN }, // -(2^53-1)*2^11
+        { 0x43e0000000000000U,             INT64_MAX,     INT32_MAX }, // 2^63
+        { 0xc3e0000000000000U, -0x7fffffffffffffff-1,     INT32_MIN }, // -2^63
+        { 0x43dfffffffffffffU,    0x7ffffffffffffc00,     INT32_MAX }, // (2^53-1)*2^10
+        { 0xc3dfffffffffffffU,   -0x7ffffffffffffc00,     INT32_MIN }, // -(2^53-1)*2^10
+        { 0x433fffffffffffffU,      0x1fffffffffffff,     INT32_MAX }, // (2^53-1)
+        { 0xc33fffffffffffffU,     -0x1fffffffffffff,     INT32_MIN }, // -(2^53-1)
+        { 0x432fffffffffffffU,      0x10000000000000,     INT32_MAX }, // (2^52-1) + 0.5
+        { 0xc32fffffffffffffU,     -0x10000000000000,     INT32_MIN }, // -(2^52-1) - 0.5
+        { 0x431fffffffffffffU,       0x8000000000000,     INT32_MAX }, // (2^51-1) + 0.75
+        { 0xc31fffffffffffffU,      -0x8000000000000,     INT32_MIN }, // -(2^51-1) - 0.75
+        { 0x431ffffffffffffeU,       0x8000000000000,     INT32_MAX }, // (2^51-1) + 0.5
+        { 0xc31ffffffffffffeU,      -0x8000000000000,     INT32_MIN }, // -(2^51-1) - 0.5
+        { 0x431ffffffffffffdU,       0x7ffffffffffff,     INT32_MAX }, // (2^51-1) + 0.25
+        { 0xc31ffffffffffffdU,      -0x7ffffffffffff,     INT32_MIN }, // -(2^51-1) - 0.25
+
+        { 0x41f0000000000000U,           0x100000000,     INT32_MAX }, // 2^32 = 4294967296
+        { 0xc1f0000000000000U,          -0x100000000,     INT32_MIN }, // -2^32 = -4294967296
+        { 0x41efffffffffffffU,           0x100000000,     INT32_MAX }, // 4294967295.99999952316284179688
+        { 0xc1efffffffffffffU,          -0x100000000,     INT32_MIN }, // -4294967295.99999952316284179688
+        { 0x41effffffff00000U,           0x100000000,     INT32_MAX }, // (2^32-1) + 0.5 = 4294967295.5
+        { 0xc1effffffff00000U,          -0x100000000,     INT32_MIN }, // -(2^32-1) - 0.5 = -4294967295.5
+        { 0x41efffffffe00000U,          0xffffffffll,     INT32_MAX }, // (2^32-1)
+        { 0xc1efffffffe00000U,         -0xffffffffll,     INT32_MIN }, // -(2^32-1)
+        { 0x41e0000000000000U,          0x80000000ll,     INT32_MAX }, // 2^31 = 2147483648
+        { 0xc1e0000000000000U,         -0x80000000ll, -0x7fffffff-1 }, // -2^31 = -2147483648
+        { 0x41dfffffffffffffU,          0x80000000ll,     INT32_MAX }, // 2147483647.99999976158142089844
+        { 0xc1dfffffffffffffU,         -0x80000000ll, -0x7fffffff-1 }, // -2147483647.99999976158142089844
+
+        { 0x41dffffffff00000U,          0x80000000ll,     INT32_MAX }, // (2^31-1) + 0.75
+        { 0xc1dffffffff00000U,         -0x80000000ll, -0x7fffffff-1 }, // -(2^31-1) - 0.75
+        { 0x41dfffffffe00001U,          0x80000000ll,     INT32_MAX }, // (2^31-1) + 0.5 + 2^-22
+        { 0xc1dfffffffe00001U,         -0x80000000ll, -0x7fffffff-1 }, // -(2^31-1) - 0.5 - 2^-22
+        { 0x41dfffffffe00000U,          0x80000000ll,     INT32_MAX }, // (2^31-1) + 0.5
+        { 0xc1dfffffffe00000U,         -0x80000000ll, -0x7fffffff-1 }, // -(2^31-1) - 0.5
+        { 0x41dfffffffdfffffU,            0x7fffffff,    0x7fffffff }, // (2^31-1) + 0.5 - 2^-22
+        { 0xc1dfffffffdfffffU,           -0x7fffffff,   -0x7fffffff }, // -(2^31-1) - 0.5 + 2^-22
+        { 0x41dfffffffd00000U,            0x7fffffff,    0x7fffffff }, // (2^31-1) + 0.25
+        { 0xc1dfffffffd00000U,           -0x7fffffff,   -0x7fffffff }, // -(2^31-1) - 0.25
+        { 0x41dfffffffc00000U,            0x7fffffff,    0x7fffffff }, // (2^31-1)
+        { 0xc1dfffffffc00000U,           -0x7fffffff,   -0x7fffffff }, // -(2^31-1)
+        { 0x41d0000000000000U,            0x40000000,    0x40000000 }, // 2^30 = 2147483648
+        { 0xc1d0000000000000U,           -0x40000000,   -0x40000000 }, // -2^30 = -2147483648
+
+        { 0x4006000000000000U,                     3,             3 }, // 2.75
+        { 0xc006000000000000U,                    -3,            -3 }, // -2.75
+        { 0x4004000000000001U,                     3,             3 }, // 2.5 + 2^-51
+        { 0xc004000000000001U,                    -3,            -3 }, // -2.5 - 2^-51
+        { 0x4004000000000000U,                     2,             2 }, // 2.5
+        { 0xc004000000000000U,                    -2,            -2 }, // -2.5
+        { 0x4003ffffffffffffU,                     2,             2 }, // 2.5 - 2^-51
+        { 0xc003ffffffffffffU,                    -2,            -2 }, // -2.5 + 2^-51
+        { 0x4002000000000000U,                     2,             2 }, // 2.25
+        { 0xc002000000000000U,                    -2,            -2 }, // -2.25
+
+        { 0x3ffc000000000000U,                     2,             2 }, // 1.75
+        { 0xbffc000000000000U,                    -2,            -2 }, // -1.75
+        { 0x3ff8000000000001U,                     2,             2 }, // 1.5 + 2^-52
+        { 0xbff8000000000001U,                    -2,            -2 }, // -1.5 - 2^-52
+        { 0x3ff8000000000000U,                     2,             2 }, // 1.5
+        { 0xbff8000000000000U,                    -2,            -2 }, // -1.5
+        { 0x3ff7ffffffffffffU,                     1,             1 }, // 1.5 - 2^-52
+        { 0xbff7ffffffffffffU,                    -1,            -1 }, // -1.5 + 2^-52
+        { 0x3ff4000000000000U,                     1,             1 }, // 1.25
+        { 0xbff4000000000000U,                    -1,            -1 }, // -1.25
+
+        { 0x3fe8000000000000U,                     1,             1 }, // 0.75
+        { 0xbfe8000000000000U,                    -1,            -1 }, // -0.75
+        { 0x3fe0000000000001U,                     1,             1 }, // 0.5 + 2^-53
+        { 0xbfe0000000000001U,                    -1,            -1 }, // -0.5 - 2^-53
+        { 0x3fe0000000000000U,                     0,             0 }, // 0.5
+        { 0xbfe0000000000000U,                     0,             0 }, // -0.5
+
+        { 0x3fd8000000000000U,                     0,             0 }, // 0.375
+        { 0xbfd8000000000000U,                     0,             0 }, // -0.375
+        { 0x3fd0000000000000U,                     0,             0 }, // 0.25
+        { 0xbfd0000000000000U,                     0,             0 }, // -0.25
+
+        { 0x0ff123456789abcdU,                     0,             0 }, // 6.89918601543515033558134828315E-232
+        { 0x8ff123456789abcdU,                     0,             0 }  // -6.89918601543515033558134828315E-232
+    };
+    struct testvalues
+    {
+        softdouble inVal;
+        int64_t out64;
+        int32_t out32;
+    } *values = (testvalues*)_values;
+
+    for (int i = 0, maxi = sizeof(_values) / sizeof(_values[0]); i < maxi; i++)
+    {
+        EXPECT_EQ(values[i].out64, cvRound64(values[i].inVal));
+        EXPECT_EQ(values[i].out64, saturate_cast<int64_t>(values[i].inVal));
+        EXPECT_EQ((uint64_t)(values[i].out64), saturate_cast<uint64_t>(values[i].inVal));
+        EXPECT_EQ(values[i].out32, cvRound(values[i].inVal));
+        EXPECT_EQ(values[i].out32, saturate_cast<int32_t>(values[i].inVal));
+        EXPECT_EQ((uint32_t)(values[i].out32), saturate_cast<uint32_t>(values[i].inVal));
+    }
+}
+
 /* End of file. */