disabled 64f IPP DFT; added IPP Gaussian filter; added IPP GetRectSubPix; added IPP...
authorVadim Pisarevsky <vadim.pisarevsky@gmail.com>
Tue, 6 Aug 2013 14:31:06 +0000 (18:31 +0400)
committerVadim Pisarevsky <vadim.pisarevsky@gmail.com>
Tue, 6 Aug 2013 14:31:06 +0000 (18:31 +0400)
cmake/OpenCVFindIPP.cmake
modules/core/src/dxt.cpp
modules/imgproc/src/samplers.cpp
modules/imgproc/src/smooth.cpp

index d1af605..9921d25 100644 (file)
@@ -138,9 +138,14 @@ endfunction()
 # This is auxiliary function called from set_ipp_variables()
 # to set IPP_LIBRARIES variable in IPP 7.x style
 # ------------------------------------------------------------------------
-function(set_ipp_new_libraries)
+function(set_ipp_new_libraries _LATEST_VERSION)
     set(IPP_PREFIX "ipp")
-    set(IPP_SUFFIX "_l")       # static not threaded libs suffix
+    
+    if(${_LATEST_VERSION} VERSION_LESS "8.0")
+        set(IPP_SUFFIX "_l")       # static not threaded libs suffix
+    else()
+        set(IPP_SUFFIX "")       # static not threaded libs suffix
+    endif()
     set(IPP_THRD   "_t")       # static threaded libs suffix
     set(IPPCORE    "core")     # core functionality
     set(IPPSP      "s")        # signal processing
@@ -199,7 +204,9 @@ function(set_ipp_variables _LATEST_VERSION)
         # set INCLUDE and LIB folders
         set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE)
 
-        if (IPP_X64)
+        if (APPLE)
+            set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib PARENT_SCOPE)
+        elseif (IPP_X64)
             if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64)
                 message(SEND_ERROR "IPP EM64T libraries not found")
             endif()
@@ -212,7 +219,7 @@ function(set_ipp_variables _LATEST_VERSION)
         endif()
 
         # set IPP_LIBRARIES variable (7.x lib names)
-        set_ipp_new_libraries()
+        set_ipp_new_libraries(${_LATEST_VERSION})
         set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE)
         message(STATUS "IPP libs: ${IPP_LIBRARIES}")
 
index b3c2b83..9766752 100644 (file)
@@ -1458,6 +1458,10 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac
 
 }
 
+#ifdef HAVE_IPP
+typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
+typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
+#endif
 
 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
 {
@@ -1483,7 +1487,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
     int factors[34];
     bool inplace_transform = false;
 #ifdef HAVE_IPP
-    void *spec_r = 0, *spec_c = 0;
+    AutoBuffer<uchar> ippbuf;
     int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
 #endif
 
@@ -1543,52 +1547,39 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
 
         spec = 0;
 #ifdef HAVE_IPP
-        if( len*count >= 64 ) // use IPP DFT if available
+        if( depth == CV_32F && len*count >= 64 ) // use IPP DFT if available
         {
-            int ipp_sz = 0;
+            int specsize=0, initsize=0, worksize=0;
+            IppDFTGetSizeFunc getSizeFunc = 0;
+            IppDFTInitFunc initFunc = 0;
 
             if( real_transform && stage == 0 )
             {
                 if( depth == CV_32F )
-                {
-                    if( spec_r )
-                        IPPI_CALL( ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r ));
-                    IPPI_CALL( ippsDFTInitAlloc_R_32f(
-                        (IppsDFTSpec_R_32f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_R_32f( (IppsDFTSpec_R_32f*)spec_r, &ipp_sz ));
-                }
+                    getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_R_32f,
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
                 else
-                {
-                    if( spec_r )
-                        IPPI_CALL( ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r ));
-                    IPPI_CALL( ippsDFTInitAlloc_R_64f(
-                        (IppsDFTSpec_R_64f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_R_64f( (IppsDFTSpec_R_64f*)spec_r, &ipp_sz ));
-                }
-                spec = spec_r;
+                    getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_R_64f,
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
             }
             else
             {
                 if( depth == CV_32F )
-                {
-                    if( spec_c )
-                        IPPI_CALL( ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c ));
-                    IPPI_CALL( ippsDFTInitAlloc_C_32fc(
-                        (IppsDFTSpec_C_32fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_C_32fc( (IppsDFTSpec_C_32fc*)spec_c, &ipp_sz ));
-                }
+                    getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_C_32fc,
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
                 else
-                {
-                    if( spec_c )
-                        IPPI_CALL( ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c ));
-                    IPPI_CALL( ippsDFTInitAlloc_C_64fc(
-                        (IppsDFTSpec_C_64fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone ));
-                    IPPI_CALL( ippsDFTGetBufSize_C_64fc( (IppsDFTSpec_C_64fc*)spec_c, &ipp_sz ));
-                }
-                spec = spec_c;
+                    getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_C_64fc,
+                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
+            }
+            if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
+            {
+                ippbuf.allocate(specsize + initsize + 64);
+                spec = alignPtr(&ippbuf[0], 32);
+                uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
+                if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
+                    spec = 0;
+                sz += worksize;
             }
-
-            sz += ipp_sz;
         }
         else
 #endif
@@ -1862,24 +1853,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
             src = dst;
         }
     }
-
-#ifdef HAVE_IPP
-    if( spec_c )
-    {
-        if( depth == CV_32F )
-            ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c );
-        else
-            ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c );
-    }
-
-    if( spec_r )
-    {
-        if( depth == CV_32F )
-            ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r );
-        else
-            ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r );
-    }
-#endif
 }
 
 
index e6d2d12..eb2f617 100644 (file)
@@ -519,6 +519,12 @@ typedef CvStatus (CV_STDCALL *CvGetRectSubPixFunc)( const void* src, int src_ste
                                                     int dst_step, CvSize win_size,
                                                     CvPoint2D32f center );
 
+typedef CvStatus (CV_STDCALL *CvIPPGetRectSubPixFunc)( const void* src, int src_step,
+                                                       CvSize src_size, void* dst,
+                                                       int dst_step, CvSize win_size,
+                                                       CvPoint2D32f center,
+                                                       CvPoint* minpt, CvPoint* maxpt );
+
 CV_IMPL void
 cvGetRectSubPix( const void* srcarr, void* dstarr, CvPoint2D32f center )
 {
@@ -556,6 +562,18 @@ cvGetRectSubPix( const void* srcarr, void* dstarr, CvPoint2D32f center )
 
     //if( dst_size.width > src_size.width || dst_size.height > src_size.height )
     //    CV_ERROR( CV_StsBadSize, "destination ROI must be smaller than source ROI" );
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    CvPoint minpt, maxpt;
+    int srctype = CV_MAT_TYPE(src->type), dsttype = CV_MAT_TYPE(dst->type);
+    CvIPPGetRectSubPixFunc ippfunc =
+        srctype == CV_8UC1 && dsttype == CV_8UC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u_C1R :
+        srctype == CV_8UC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u32f_C1R :
+        srctype == CV_32FC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_32f_C1R : 0;
+
+    if( ippfunc && ippfunc(src->data.ptr, src->step, src_size, dst->data.ptr,
+                           dst->step, dst_size, center, &minpt, &maxpt) >= 0 )
+        return;
+#endif
 
     if( CV_ARE_DEPTHS_EQ( src, dst ))
     {
index c84abe5..00be086 100644 (file)
@@ -856,6 +856,22 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
         return;
 #endif
 
+#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if(src.type() == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 )
+    {
+        IppiSize roi = {src.cols, src.rows};
+        int bufSize = 0;
+        ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize);
+        AutoBuffer<uchar> buf(bufSize+128);
+        if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step,
+                                          (Ipp32f *)dst.data, (int)dst.step,
+                                          roi, ksize.width, (Ipp32f)sigma1,
+                                          (IppiBorderType)borderType, 0.0,
+                                          alignPtr(&buf[0],32)) >= 0 )
+            return;
+    }
+#endif
+
     Ptr<FilterEngine> f = createGaussianFilter( src.type(), ksize, sigma1, sigma2, borderType );
     f->apply( src, dst );
 }
@@ -1888,6 +1904,29 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
     radius = MAX(radius, 1);
     d = radius*2 + 1;
 
+#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7)
+    if(cn == 1)
+    {
+        IppiSize kernel = {d, d};
+        IppiSize roi={src.cols, src.rows};
+        int bufsize=0;
+        ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize);
+        AutoBuffer<uchar> buf(bufsize+128);
+        IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32);
+        ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec );
+        Mat tsrc;
+        const Mat* psrc = &src;
+        if( src.data == dst.data )
+        {
+            src.copyTo(tsrc);
+            psrc = &tsrc;
+        }
+        if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0],
+                                       dst.data, (int)dst.step[0],
+                                       roi, kernel, pSpec) >= 0 )
+            return;
+    }
+#endif
     Mat temp;
     copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );