From: Ilya Lavrenov Date: Wed, 2 Jul 2014 21:28:31 +0000 (+0400) Subject: sse2 optimization of inv_sqrt_64f X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~296^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e5b63d43f2422037c7660d8712d76b62ced0a2df;p=profile%2Fivi%2Fopencv.git sse2 optimization of inv_sqrt_64f --- diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 7a02bd6..46a80cd 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -348,7 +348,18 @@ static void InvSqrt_32f(const float* src, float* dst, int len) static void InvSqrt_64f(const double* src, double* dst, int len) { - for( int i = 0; i < len; i++ ) + int i = 0; + +#if CV_SSE2 + if (USE_SSE2) + { + __m128d v_1 = _mm_set1_pd(1.0); + for ( ; i <= len - 2; i += 2) + _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i)))); + } +#endif + + for( ; i < len; i++ ) dst[i] = 1/std::sqrt(src[i]); }