From: Tomoaki Teshima Date: Sat, 22 Oct 2016 23:51:25 +0000 (+0900) Subject: brush up divSaturate of carotene X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1476^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2a91453ef10ca22486f9d3a05e1fa20f1874a791;p=platform%2Fupstream%2Fopencv.git brush up divSaturate of carotene * use rounding to nearest same as divSaturateQ --- diff --git a/3rdparty/carotene/src/div.cpp b/3rdparty/carotene/src/div.cpp index dbd60e7..cb5f1e7 100644 --- a/3rdparty/carotene/src/div.cpp +++ b/3rdparty/carotene/src/div.cpp @@ -74,6 +74,13 @@ template <> inline uint32x4_t divSaturateQ(const uint32x4_t &v1, const uint32x4_t &v2, const float scale) { return vcvtq_u32_f32(vroundq(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2))))); } +inline float32x2_t vround(const float32x2_t& v) +{ + const int32x2_t signMask = vdup_n_s32(1 << 31), half = vreinterpret_s32_f32(vdup_n_f32(0.5f)); + float32x2_t v_addition = vreinterpret_f32_s32(vorr_s32(half, vand_s32(signMask, vreinterpret_s32_f32(v)))); + return vadd_f32(v, v_addition); +} + template inline T divSaturate(const T &v1, const T &v2, const float scale) { @@ -81,10 +88,10 @@ inline T divSaturate(const T &v1, const T &v2, const float scale) } template <> inline int32x2_t divSaturate(const int32x2_t &v1, const int32x2_t &v2, const float scale) -{ return vcvt_s32_f32(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2)))); } +{ return vcvt_s32_f32(vround(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2))))); } template <> inline uint32x2_t divSaturate(const uint32x2_t &v1, const uint32x2_t &v2, const float scale) -{ return vcvt_u32_f32(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2)))); } +{ return vcvt_u32_f32(vround(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2))))); } template