From 72c23e7bbb0550466ec329d620d355c53b906bc8 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 5 Dec 2011 22:58:27 +0000 Subject: [PATCH] fixed most of test_core failures (except for SVD) --- modules/core/src/arithm.cpp | 8 ++++---- modules/core/src/lapack.cpp | 6 +++--- modules/core/src/matmul.cpp | 7 ++++--- modules/core/src/stat.cpp | 4 ++-- modules/core/test/test_math.cpp | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 6c480b6..8b1ef60 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -140,7 +140,7 @@ void vBinOp16(const T* src1, size_t step1, const T* src2, size_t step2, r0 = op16(r0,_mm_loadu_si128((const __m128i*)(src2 + x))); r1 = op16(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 8))); _mm_storeu_si128((__m128i*)(dst + x), r0); - _mm_storeu_si128((__m128i*)(dst + x + 16), r1); + _mm_storeu_si128((__m128i*)(dst + x + 8), r1); } for( ; x <= sz.width - 4; x += 4 ) { @@ -194,7 +194,7 @@ void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2, r0 = op32(r0,_mm_load_si128((const __m128i*)(src2 + x))); r1 = op32(r1,_mm_load_si128((const __m128i*)(src2 + x + 4))); _mm_store_si128((__m128i*)(dst + x), r0); - _mm_store_si128((__m128i*)(dst + x + 16), r1); + _mm_store_si128((__m128i*)(dst + x + 4), r1); } else for( ; x <= sz.width - 8; x += 8 ) @@ -204,7 +204,7 @@ void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2, r0 = op32(r0,_mm_loadu_si128((const __m128i*)(src2 + x))); r1 = op32(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 4))); _mm_storeu_si128((__m128i*)(dst + x), r0); - _mm_storeu_si128((__m128i*)(dst + x + 16), r1); + _mm_storeu_si128((__m128i*)(dst + x + 4), r1); } } #endif @@ -452,7 +452,7 @@ struct _VAbsDiff64f struct _VAnd8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_and_si128(a,b); }}; struct _VOr8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_or_si128(a,b); }}; struct _VXor8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_xor_si128(a,b); }}; -struct _VNot8u { __m128i operator()(const __m128i& a, const __m128i&) const { return _mm_andnot_si128(_mm_setzero_si128(),a); }}; +struct _VNot8u { __m128i operator()(const __m128i& a, const __m128i&) const { return _mm_xor_si128(_mm_set1_epi32(-1),a); }}; #endif diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index 198759d..cf26917 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -547,12 +547,12 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, int m, int for( i = 0; i < n; i++ ) { - for( k = 0, s = 0; k < m; k++ ) + for( k = 0, sd = 0; k < m; k++ ) { _Tp t = At[i*astep + k]; - s += (double)t*t; + sd += (double)t*t; } - W[i] = s; + W[i] = sd; if( Vt ) { diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 45cb158..0b967bd 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2598,13 +2598,14 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len) #if CV_SSE2 if( USE_SSE2 ) { - int j, len0 = len & -4, blockSize0 = (1 << 15), blockSize; + int j, len0 = len & -4, blockSize0 = (1 << 13), blockSize; __m128i z = _mm_setzero_si128(); while( i < len0 ) { blockSize = std::min(len0 - i, blockSize0); __m128i s = _mm_setzero_si128(); - for( j = 0; j <= blockSize - 16; j += 16 ) + j = 0; + for( ; j <= blockSize - 16; j += 16 ) { __m128i b0 = _mm_loadu_si128((const __m128i*)(src1 + j)); __m128i b1 = _mm_loadu_si128((const __m128i*)(src2 + j)); @@ -2614,7 +2615,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len) s1 = _mm_unpacklo_epi8(b1, z); s3 = _mm_unpackhi_epi8(b1, z); s0 = _mm_madd_epi16(s0, s1); - s1 = _mm_madd_epi16(s2, s3); + s2 = _mm_madd_epi16(s2, s3); s = _mm_add_epi32(s, s0); s = _mm_add_epi32(s, s2); } diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index c9fcd83..b0bf4da 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -850,9 +850,9 @@ float normL1_(const float* a, const float* b, int n) if( USE_SSE2 ) { float CV_DECL_ALIGNED(16) buf[4]; - static const float CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; + static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps(); - __m128 absmask = _mm_load_ps(absbuf); + __m128 absmask = _mm_load_ps((const float*)absbuf); for( ; j <= n - 8; j += 8 ) { diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp index 0b9dbd5..a653743 100644 --- a/modules/core/test/test_math.cpp +++ b/modules/core/test/test_math.cpp @@ -2153,7 +2153,7 @@ void Core_SVBkSbTest::prepare_to_validation( int ) CvMat _w = w, _wdb = wdb; // use exactly the same threshold as in icvSVD... , // so the changes in the library and here should be synchronized. - double threshold = cv::sum(w)[0]*(is_float ? FLT_EPSILON*10 : DBL_EPSILON*2); + double threshold = cv::sum(w)[0]*(DBL_EPSILON*2);//(is_float ? FLT_EPSILON*10 : DBL_EPSILON*2); wdb = Scalar::all(0); for( i = 0; i < min_size; i++ ) -- 2.7.4