CV_USE_UNROLLED for imgproc
authorVictoria Zhislina <no@email>
Tue, 21 Feb 2012 11:31:23 +0000 (11:31 +0000)
committerVictoria Zhislina <no@email>
Tue, 21 Feb 2012 11:31:23 +0000 (11:31 +0000)
modules/imgproc/perf/perf_filter2d.cpp
modules/imgproc/src/_list.h
modules/imgproc/src/accum.cpp
modules/imgproc/src/filter.cpp
modules/imgproc/src/gabor.cpp
modules/imgproc/src/geometry.cpp
modules/imgproc/src/imgwarp.cpp
modules/imgproc/src/morph.cpp
modules/imgproc/src/segmentation.cpp
modules/imgproc/src/thresh.cpp

index 5aa000c..c87f31c 100644 (file)
@@ -32,7 +32,7 @@ PERF_TEST_P( TestFilter2d, Filter2d,
 \r
     Mat kernel(kSize, kSize, CV_32FC1);\r
     randu(kernel, -3, 10);\r
-    float s = (float)fabs( sum(kernel)[0] );\r
+    float s = fabs( sum(kernel)[0] );\r
     if(s > 1e-3) kernel /= s;\r
 \r
     declare.in(src, WARMUP_RNG).out(dst).time(20);\r
index a19f7e2..b2b63e9 100644 (file)
@@ -345,7 +345,7 @@ void prefix##remove_at_##type(_CVLIST* l, CVPOS pos)\
 void prefix##set_##type(CVPOS pos, type* data)\
 {\
     ELEMENT_##type* element = ((ELEMENT_##type*)(pos.m_pos));\
-    memcpy(&(element->m_data), data, sizeof(*data));\
+    memcpy(&(element->m_data), data, sizeof(data));\
 }\
 type* prefix##get_##type(CVPOS pos)\
 {\
index 4a588f8..3c2f0e5 100644 (file)
@@ -53,6 +53,7 @@ acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
     if( !mask )
     {
         len *= cn;
+               #if CV_ENABLE_UNROLLED
         for( ; i <= len - 4; i += 4 )
         {
             AT t0, t1;
@@ -64,7 +65,7 @@ acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
             t1 = src[i+3] + dst[i+3];
             dst[i+2] = t0; dst[i+3] = t1;
         }
-        
+        #endif
         for( ; i < len; i++ )
             dst[i] += src[i];
     }
@@ -110,6 +111,7 @@ accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
     if( !mask )
     {
         len *= cn;
+                #if CV_ENABLE_UNROLLED
         for( ; i <= len - 4; i += 4 )
         {
             AT t0, t1;
@@ -121,7 +123,7 @@ accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
             t1 = (AT)src[i+3]*src[i+3] + dst[i+3];
             dst[i+2] = t0; dst[i+3] = t1;
         }
-        
+        #endif
         for( ; i < len; i++ )
             dst[i] += (AT)src[i]*src[i];
     }
@@ -167,6 +169,7 @@ accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int
     if( !mask )
     {
         len *= cn;
+               #if CV_ENABLE_UNROLLED
         for( ; i <= len - 4; i += 4 )
         {
             AT t0, t1;
@@ -178,7 +181,7 @@ accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int
             t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3];
             dst[i+2] = t0; dst[i+3] = t1;
         }
-        
+        #endif
         for( ; i < len; i++ )
             dst[i] += (AT)src1[i]*src2[i];
     }
@@ -225,6 +228,7 @@ accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
     if( !mask )
     {
         len *= cn;
+               #if CV_ENABLE_UNROLLED
         for( ; i <= len - 4; i += 4 )
         {
             AT t0, t1;
@@ -236,7 +240,7 @@ accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
             t1 = src[i+3]*a + dst[i+3]*b;
             dst[i+2] = t0; dst[i+3] = t1;
         }
-        
+        #endif
         for( ; i < len; i++ )
             dst[i] = src[i]*a + dst[i]*b;
     }
index 01a5457..efe552a 100644 (file)
@@ -2227,7 +2227,7 @@ template<typename ST, typename DT, class VecOp> struct RowFilter : public BaseRo
 
         i = vecOp(src, dst, width, cn);
         width *= cn;
-
+        #if CV_ENABLE_UNROLLED
         for( ; i <= width - 4; i += 4 )
         {
             S = (const ST*)src + i;
@@ -2245,7 +2245,7 @@ template<typename ST, typename DT, class VecOp> struct RowFilter : public BaseRo
             D[i] = s0; D[i+1] = s1;
             D[i+2] = s2; D[i+3] = s3;
         }
-
+        #endif
         for( ; i < width; i++ )
         {
             S = (const ST*)src + i;
@@ -2426,6 +2426,7 @@ template<class CastOp, class VecOp> struct ColumnFilter : public BaseColumnFilte
         {
             DT* D = (DT*)dst;
             i = vecOp(src, dst, width);
+                       #if CV_ENABLE_UNROLLED
             for( ; i <= width - 4; i += 4 )
             {
                 ST f = ky[0];
@@ -2443,7 +2444,7 @@ template<class CastOp, class VecOp> struct ColumnFilter : public BaseColumnFilte
                 D[i] = castOp(s0); D[i+1] = castOp(s1);
                 D[i+2] = castOp(s2); D[i+3] = castOp(s3);
             }
-
+            #endif
             for( ; i < width; i++ )
             {
                 ST s0 = ky[0]*((const ST*)src[0])[i] + _delta;
@@ -2492,7 +2493,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
             {
                 DT* D = (DT*)dst;
                 i = (this->vecOp)(src, dst, width);
-
+                #if CV_ENABLE_UNROLLED
                 for( ; i <= width - 4; i += 4 )
                 {
                     ST f = ky[0];
@@ -2514,7 +2515,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
                     D[i] = castOp(s0); D[i+1] = castOp(s1);
                     D[i+2] = castOp(s2); D[i+3] = castOp(s3);
                 }
-
+                #endif
                 for( ; i < width; i++ )
                 {
                     ST s0 = ky[0]*((const ST*)src[0])[i] + _delta;
@@ -2530,7 +2531,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
             {
                 DT* D = (DT*)dst;
                 i = this->vecOp(src, dst, width);
-
+                #if CV_ENABLE_UNROLLED
                 for( ; i <= width - 4; i += 4 )
                 {
                     ST f = ky[0];
@@ -2551,7 +2552,7 @@ template<class CastOp, class VecOp> struct SymmColumnFilter : public ColumnFilte
                     D[i] = castOp(s0); D[i+1] = castOp(s1);
                     D[i+2] = castOp(s2); D[i+3] = castOp(s3);
                 }
-
+                #endif
                 for( ; i < width; i++ )
                 {
                     ST s0 = _delta;
@@ -2608,6 +2609,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
             {
                 if( is_1_2_1 )
                 {
+                                       #if CV_ENABLE_UNROLLED
                     for( ; i <= width - 4; i += 4 )
                     {
                         ST s0 = S0[i] + S1[i]*2 + S2[i] + _delta;
@@ -2620,9 +2622,17 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
                         D[i+2] = castOp(s0);
                         D[i+3] = castOp(s1);
                     }
+                    #else
+                           for( ; i < width; i ++ )
+                    {
+                        ST s0 = S0[i] + S1[i]*2 + S2[i] + _delta;
+                        D[i] = castOp(s0);
+                    }
+                    #endif
                 }
                 else if( is_1_m2_1 )
                 {
+                                       #if CV_ENABLE_UNROLLED
                     for( ; i <= width - 4; i += 4 )
                     {
                         ST s0 = S0[i] - S1[i]*2 + S2[i] + _delta;
@@ -2635,9 +2645,17 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
                         D[i+2] = castOp(s0);
                         D[i+3] = castOp(s1);
                     }
+                    #else
+                           for( ; i < width; i ++ )
+                    {
+                        ST s0 = S0[i] - S1[i]*2 + S2[i] + _delta;
+                        D[i] = castOp(s0);
+                    }
+                    #endif
                 }
                 else
                 {
+                   #if CV_ENABLE_UNROLLED
                     for( ; i <= width - 4; i += 4 )
                     {
                         ST s0 = (S0[i] + S2[i])*f1 + S1[i]*f0 + _delta;
@@ -2650,8 +2668,14 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
                         D[i+2] = castOp(s0);
                         D[i+3] = castOp(s1);
                     }
+                    #else
+                    for( ; i < width; i ++ )
+                    {
+                        ST s0 = (S0[i] + S2[i])*f1 + S1[i]*f0 + _delta;
+                        D[i] = castOp(s0);
+                    }
+                    #endif
                 }
-
                 for( ; i < width; i++ )
                     D[i] = castOp((S0[i] + S2[i])*f1 + S1[i]*f0 + _delta);
             }
@@ -2661,7 +2685,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
                 {
                     if( f1 < 0 )
                         std::swap(S0, S2);
-
+                   #if CV_ENABLE_UNROLLED
                     for( ; i <= width - 4; i += 4 )
                     {
                         ST s0 = S2[i] - S0[i] + _delta;
@@ -2674,12 +2698,19 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
                         D[i+2] = castOp(s0);
                         D[i+3] = castOp(s1);
                     }
-
+                    #else
+                       for( ; i < width; i ++ )
+                    {
+                        ST s0 = S2[i] - S0[i] + _delta;
+                        D[i] = castOp(s0);
+                    }
+                    #endif
                     if( f1 < 0 )
                         std::swap(S0, S2);
                 }
                 else
                 {
+                   #if CV_ENABLE_UNROLLED
                     for( ; i <= width - 4; i += 4 )
                     {
                         ST s0 = (S2[i] - S0[i])*f1 + _delta;
@@ -2692,6 +2723,7 @@ struct SymmColumnSmallFilter : public SymmColumnFilter<CastOp, VecOp>
                         D[i+2] = castOp(s0);
                         D[i+3] = castOp(s1);
                     }
+                    #endif
                 }
 
                 for( ; i < width; i++ )
@@ -3043,7 +3075,7 @@ template<typename ST, class CastOp, class VecOp> struct Filter2D : public BaseFi
                 kp[k] = (const ST*)src[pt[k].y] + pt[k].x*cn;
 
             i = vecOp((const uchar**)kp, dst, width);
-
+            #if CV_ENABLE_UNROLLED
             for( ; i <= width - 4; i += 4 )
             {
                 KT s0 = _delta, s1 = _delta, s2 = _delta, s3 = _delta;
@@ -3061,7 +3093,7 @@ template<typename ST, class CastOp, class VecOp> struct Filter2D : public BaseFi
                 D[i] = castOp(s0); D[i+1] = castOp(s1);
                 D[i+2] = castOp(s2); D[i+3] = castOp(s3);
             }
-
+            #endif
             for( ; i < width; i++ )
             {
                 KT s0 = _delta;
index 867fa1a..5a81312 100644 (file)
@@ -60,12 +60,12 @@ cv::Mat cv::getGaborKernel( Size ksize, double sigma, double theta,
     if( ksize.width > 0 )
         xmax = ksize.width/2;
     else
-        xmax = (int)std::max(fabs(nstds*sigma_x*c), fabs(nstds*sigma_y*s));
+        xmax = std::max(fabs(nstds*sigma_x*c), fabs(nstds*sigma_y*s));
     
     if( ksize.height > 0 )
         ymax = ksize.height/2;
     else
-        ymax = (int)std::max(fabs(nstds*sigma_x*s), fabs(nstds*sigma_y*c));
+        ymax = std::max(fabs(nstds*sigma_x*s), fabs(nstds*sigma_y*c));
         
     xmin = -xmax;
     ymin = -ymax;
index 63bab60..66d0291 100644 (file)
@@ -439,8 +439,8 @@ static char segSegInt( Point2f a, Point2f b, Point2f c, Point2f d, Point2f& p, P
              (0.0 > t) || (t > 1.0) )
         code = '0';
     
-    p.x = (float)(a.x + s * ( b.x - a.x ));
-    p.y = (float)(a.y + s * ( b.y - a.y ));
+    p.x = a.x + s * ( b.x - a.x );
+    p.y = a.y + s * ( b.y - a.y );
     
     return code;
 }
@@ -652,7 +652,7 @@ float cv::intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p1
             _p12.release();
             return 0.f;
         }
-        area = (float)contourArea(_InputArray(result, nr), false);
+        area = contourArea(_InputArray(result, nr), false);
     }
     
     if( _p12.needed() )
index 62be35a..7f84a6a 100644 (file)
@@ -877,6 +877,7 @@ struct VResizeLinear
         VecOp vecOp;
 
         int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
+           #if CV_ENABLE_UNROLLED
                for( ; x <= width - 4; x += 4 )
         {
             WT t0, t1;
@@ -887,7 +888,7 @@ struct VResizeLinear
             t1 = S0[x+3]*b0 + S1[x+3]*b1;
             dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
         }
-
+        #endif
         for( ; x < width; x++ )
             dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
     }
@@ -1033,7 +1034,7 @@ struct VResizeLanczos4
         CastOp castOp;
         VecOp vecOp;
         int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
-
+               #if CV_ENABLE_UNROLLED       
         for( ; x <= width - 4; x += 4 )
         {
             WT b = beta[0];
@@ -1050,7 +1051,7 @@ struct VResizeLanczos4
             dst[x] = castOp(s0); dst[x+1] = castOp(s1);
             dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
         }
-
+        #endif
         for( ; x < width; x++ )
         {
             dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
@@ -1161,8 +1162,11 @@ static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int
         {
             const T* S = (const T*)(src.data + src.step*sy0) + xofs[dx];
             WT sum = 0;
-            for( k = 0; k <= area - 4; k += 4 )
+                       k=0;
+                       #if CV_ENABLE_UNROLLED
+            for( ; k <= area - 4; k += 4 )
                 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
+            #endif
             for( ; k < area; k++ )
                 sum += S[ofs[k]];
 
index 432d026..98052f7 100644 (file)
@@ -700,7 +700,9 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
 
         for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 )
         {
-            for( i = i0; i <= width - 4; i += 4 )
+                       i = i0;
+                       #if CV_ENABLE_UNROLLED
+            for( ; i <= width - 4; i += 4 )
             {
                 const T* sptr = src[1] + i;
                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
@@ -724,7 +726,7 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
                 D[i+dststep+2] = op(s2, sptr[2]);
                 D[i+dststep+3] = op(s3, sptr[3]);
             }
-
+            #endif
             for( ; i < width; i++ )
             {
                 T s0 = src[1][i];
@@ -739,7 +741,9 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
 
         for( ; count > 0; count--, D += dststep, src++ )
         {
-            for( i = i0; i <= width - 4; i += 4 )
+                       i = i0;
+                       #if CV_ENABLE_UNROLLED
+            for( ; i <= width - 4; i += 4 )
             {
                 const T* sptr = src[0] + i;
                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
@@ -754,7 +758,7 @@ template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilt
                 D[i] = s0; D[i+1] = s1;
                 D[i+2] = s2; D[i+3] = s3;
             }
-
+            #endif
             for( ; i < width; i++ )
             {
                 T s0 = src[0][i];
@@ -801,7 +805,7 @@ template<class Op, class VecOp> struct MorphFilter : BaseFilter
                 kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn;
 
             i = vecOp(&ptrs[0], nz, dst, width);
-
+            #if CV_ENABLE_UNROLLED
             for( ; i <= width - 4; i += 4 )
             {
                 const T* sptr = kp[0] + i;
@@ -817,7 +821,7 @@ template<class Op, class VecOp> struct MorphFilter : BaseFilter
                 D[i] = s0; D[i+1] = s1;
                 D[i+2] = s2; D[i+3] = s3;
             }
-
+            #endif
             for( ; i < width; i++ )
             {
                 T s0 = kp[0][i];
@@ -1074,8 +1078,10 @@ public:
     {
         int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows);
         int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows);
-        
-        //printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", src.rows, src.cols, range.begin(), range.end(), row0, row1);
+
+        if(0)
+            printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
+                   src.rows, src.cols, range.begin(), range.end(), row0, row1);
 
         Mat srcStripe = src.rowRange(row0, row1);
         Mat dstStripe = dst.rowRange(row0, row1);
@@ -1099,7 +1105,7 @@ private:
     Point anchor;
     int rowBorderType;
     int columnBorderType;
-    Scalar borderValue;
+    const Scalar& borderValue;
 };
 
 static void morphOp( int op, InputArray _src, OutputArray _dst,
index cb335e2..2a0a101 100644 (file)
@@ -454,6 +454,7 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
                     {
                         int row_count = 0;
                         x = minx;
+                                               #if CV_ENABLE_UNROLLED
                         for( ; x + 3 <= maxx; x += 4, ptr += 12 )
                         {
                             int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
@@ -481,7 +482,7 @@ cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
                                 sx += x+3; row_count++;
                             }
                         }
-                        
+                        #endif
                         for( ; x <= maxx; x++, ptr += 3 )
                         {      
                             int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
index f200890..0e36117 100644 (file)
@@ -248,8 +248,9 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
         {
             const uchar* src = (const uchar*)(_src.data + _src.step*i);
             uchar* dst = (uchar*)(_dst.data + _dst.step*i);
-            
-            for( j = j_scalar; j <= roi.width - 4; j += 4 )
+                       j = j_scalar;
+            #if CV_ENABLE_UNROLLED
+            for( ; j <= roi.width - 4; j += 4 )
             {
                 uchar t0 = tab[src[j]];
                 uchar t1 = tab[src[j+1]];
@@ -263,7 +264,7 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
                 dst[j+2] = t0;
                 dst[j+3] = t1;
             }
-
+            #endif
             for( ; j < roi.width; j++ )
                 dst[j] = tab[src[j]];
         }
@@ -619,13 +620,16 @@ getThreshVal_Otsu_8u( const Mat& _src )
     for( i = 0; i < size.height; i++ )
     {
         const uchar* src = _src.data + _src.step*i;
-        for( j = 0; j <= size.width - 4; j += 4 )
+               j = 0;
+               #if CV_ENABLE_UNROLLED
+        for( ; j <= size.width - 4; j += 4 )
         {
             int v0 = src[j], v1 = src[j+1];
             h[v0]++; h[v1]++;
             v0 = src[j+2]; v1 = src[j+3];
             h[v0]++; h[v1]++;
         }
+        #endif
         for( ; j < size.width; j++ )
             h[src[j]]++;
     }
@@ -682,8 +686,10 @@ public:
     {
         int row0 = std::min(cvRound(range.begin() * src.rows / nStripes), src.rows);
         int row1 = std::min(cvRound(range.end() * src.rows / nStripes), src.rows);
-        
-        //printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", src.rows, src.cols, range.begin(), range.end(), row0, row1);
+
+        if(0)
+            printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
+                   src.rows, src.cols, range.begin(), range.end(), row0, row1);
 
         Mat srcStripe = src.rowRange(row0, row1);
         Mat dstStripe = dst.rowRange(row0, row1);