fixed most of test_core failures (except for SVD)

author Vadim Pisarevsky <no@email>

Mon, 5 Dec 2011 22:58:27 +0000 (22:58 +0000)

committer Vadim Pisarevsky <no@email>

Mon, 5 Dec 2011 22:58:27 +0000 (22:58 +0000)
author Vadim Pisarevsky <no@email>
Mon, 5 Dec 2011 22:58:27 +0000 (22:58 +0000)
committer Vadim Pisarevsky <no@email>
Mon, 5 Dec 2011 22:58:27 +0000 (22:58 +0000)
diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp

index 6c480b6..8b1ef60 100644 (file)
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -140,7 +140,7 @@ void vBinOp16(const T* src1, size_t step1, const T* src2, size_t step2,
                  r0 = op16(r0,_mm_loadu_si128((const __m128i*)(src2 + x)));
                  r1 = op16(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 8)));
                  _mm_storeu_si128((__m128i*)(dst + x), r0);
-                _mm_storeu_si128((__m128i*)(dst + x + 16), r1);
+                _mm_storeu_si128((__m128i*)(dst + x + 8), r1);
              }
              for( ; x <= sz.width - 4; x += 4 )
              {
@@ -194,7 +194,7 @@ void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2,
                      r0 = op32(r0,_mm_load_si128((const __m128i*)(src2 + x)));
                      r1 = op32(r1,_mm_load_si128((const __m128i*)(src2 + x + 4)));
                      _mm_store_si128((__m128i*)(dst + x), r0);
-                    _mm_store_si128((__m128i*)(dst + x + 16), r1);
+                    _mm_store_si128((__m128i*)(dst + x + 4), r1);
                  }
              else
                  for( ; x <= sz.width - 8; x += 8 )
@@ -204,7 +204,7 @@ void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2,
                      r0 = op32(r0,_mm_loadu_si128((const __m128i*)(src2 + x)));
                      r1 = op32(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 4)));
                      _mm_storeu_si128((__m128i*)(dst + x), r0);
-                    _mm_storeu_si128((__m128i*)(dst + x + 16), r1);
+                    _mm_storeu_si128((__m128i*)(dst + x + 4), r1);
                  }
          }
  #endif
@@ -452,7 +452,7 @@ struct _VAbsDiff64f
  struct _VAnd8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_and_si128(a,b); }};
  struct _VOr8u  { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_or_si128(a,b); }};
  struct _VXor8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_xor_si128(a,b); }};
-struct _VNot8u { __m128i operator()(const __m128i& a, const __m128i&) const { return _mm_andnot_si128(_mm_setzero_si128(),a); }};
+struct _VNot8u { __m128i operator()(const __m128i& a, const __m128i&) const { return _mm_xor_si128(_mm_set1_epi32(-1),a); }};
  
  #endif
  
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp

index 198759d..cf26917 100644 (file)
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -547,12 +547,12 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, int m, int
      
      for( i = 0; i < n; i++ )
      {
-        for( k = 0, s = 0; k < m; k++ )
+        for( k = 0, sd = 0; k < m; k++ )
          {
              _Tp t = At[i*astep + k];
-            s += (double)t*t;
+            sd += (double)t*t;
          }
-        W[i] = s;
+        W[i] = sd;
          
          if( Vt )
          {
diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp

index 45cb158..0b967bd 100644 (file)
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@@ -2598,13 +2598,14 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
  #if CV_SSE2
      if( USE_SSE2 )
      {
-        int j, len0 = len & -4, blockSize0 = (1 << 15), blockSize;
+        int j, len0 = len & -4, blockSize0 = (1 << 13), blockSize;
          __m128i z = _mm_setzero_si128();
          while( i < len0 )
          {
              blockSize = std::min(len0 - i, blockSize0);
              __m128i s = _mm_setzero_si128();
-            for( j = 0; j <= blockSize - 16; j += 16 )
+                       j = 0;
+            for( ; j <= blockSize - 16; j += 16 )
              {
                  __m128i b0 = _mm_loadu_si128((const __m128i*)(src1 + j));
                  __m128i b1 = _mm_loadu_si128((const __m128i*)(src2 + j));
@@ -2614,7 +2615,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
                  s1 = _mm_unpacklo_epi8(b1, z);
                  s3 = _mm_unpackhi_epi8(b1, z);
                  s0 = _mm_madd_epi16(s0, s1);
-                s1 = _mm_madd_epi16(s2, s3);
+                s2 = _mm_madd_epi16(s2, s3);
                  s = _mm_add_epi32(s, s0);
                  s = _mm_add_epi32(s, s2);
              }
diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp

index c9fcd83..b0bf4da 100644 (file)
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -850,9 +850,9 @@ float normL1_(const float* a, const float* b, int n)
      if( USE_SSE2 )
      {
          float CV_DECL_ALIGNED(16) buf[4];
-        static const float CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+        static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
          __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
-        __m128 absmask = _mm_load_ps(absbuf);
+        __m128 absmask = _mm_load_ps((const float*)absbuf);
          
          for( ; j <= n - 8; j += 8 )
          {
diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp

index 0b9dbd5..a653743 100644 (file)
--- a/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@@ -2153,7 +2153,7 @@ void Core_SVBkSbTest::prepare_to_validation( int )
      CvMat _w = w, _wdb = wdb;
      // use exactly the same threshold as in icvSVD... ,
      // so the changes in the library and here should be synchronized.
-    double threshold = cv::sum(w)[0]*(is_float ? FLT_EPSILON*10 : DBL_EPSILON*2);
+    double threshold = cv::sum(w)[0]*(DBL_EPSILON*2);//(is_float ? FLT_EPSILON*10 : DBL_EPSILON*2);
      
      wdb = Scalar::all(0);
      for( i = 0; i < min_size; i++ )
author	Vadim Pisarevsky <no@email>
	Mon, 5 Dec 2011 22:58:27 +0000 (22:58 +0000)
committer	Vadim Pisarevsky <no@email>
	Mon, 5 Dec 2011 22:58:27 +0000 (22:58 +0000)
modules/core/src/arithm.cpp		patch \| blob \| history
modules/core/src/lapack.cpp		patch \| blob \| history
modules/core/src/matmul.cpp		patch \| blob \| history
modules/core/src/stat.cpp		patch \| blob \| history
modules/core/test/test_math.cpp		patch \| blob \| history