From 10fde1ca5c855d9eec42de8546d0549592fff0e4 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 6 Aug 2013 18:31:06 +0400 Subject: [PATCH] disabled 64f IPP DFT; added IPP Gaussian filter; added IPP GetRectSubPix; added IPP BilateralFilter (temporarily disabled) --- cmake/OpenCVFindIPP.cmake | 15 ++++++-- modules/core/src/dxt.cpp | 79 +++++++++++++--------------------------- modules/imgproc/src/samplers.cpp | 18 +++++++++ modules/imgproc/src/smooth.cpp | 39 ++++++++++++++++++++ 4 files changed, 94 insertions(+), 57 deletions(-) diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake index d1af605..9921d25 100644 --- a/cmake/OpenCVFindIPP.cmake +++ b/cmake/OpenCVFindIPP.cmake @@ -138,9 +138,14 @@ endfunction() # This is auxiliary function called from set_ipp_variables() # to set IPP_LIBRARIES variable in IPP 7.x style # ------------------------------------------------------------------------ -function(set_ipp_new_libraries) +function(set_ipp_new_libraries _LATEST_VERSION) set(IPP_PREFIX "ipp") - set(IPP_SUFFIX "_l") # static not threaded libs suffix + + if(${_LATEST_VERSION} VERSION_LESS "8.0") + set(IPP_SUFFIX "_l") # static not threaded libs suffix + else() + set(IPP_SUFFIX "") # static not threaded libs suffix + endif() set(IPP_THRD "_t") # static threaded libs suffix set(IPPCORE "core") # core functionality set(IPPSP "s") # signal processing @@ -199,7 +204,9 @@ function(set_ipp_variables _LATEST_VERSION) # set INCLUDE and LIB folders set(IPP_INCLUDE_DIRS ${IPP_ROOT_DIR}/include PARENT_SCOPE) - if (IPP_X64) + if (APPLE) + set(IPP_LIBRARY_DIRS ${IPP_ROOT_DIR}/lib PARENT_SCOPE) + elseif (IPP_X64) if(NOT EXISTS ${IPP_ROOT_DIR}/lib/intel64) message(SEND_ERROR "IPP EM64T libraries not found") endif() @@ -212,7 +219,7 @@ function(set_ipp_variables _LATEST_VERSION) endif() # set IPP_LIBRARIES variable (7.x lib names) - set_ipp_new_libraries() + set_ipp_new_libraries(${_LATEST_VERSION}) set(IPP_LIBRARIES ${IPP_LIBRARIES} PARENT_SCOPE) message(STATUS "IPP libs: ${IPP_LIBRARIES}") diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index b3c2b83..9766752 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1458,6 +1458,10 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac } +#ifdef HAVE_IPP +typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*); +typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*); +#endif void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) { @@ -1483,7 +1487,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) int factors[34]; bool inplace_transform = false; #ifdef HAVE_IPP - void *spec_r = 0, *spec_c = 0; + AutoBuffer ippbuf; int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; #endif @@ -1543,52 +1547,39 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) spec = 0; #ifdef HAVE_IPP - if( len*count >= 64 ) // use IPP DFT if available + if( depth == CV_32F && len*count >= 64 ) // use IPP DFT if available { - int ipp_sz = 0; + int specsize=0, initsize=0, worksize=0; + IppDFTGetSizeFunc getSizeFunc = 0; + IppDFTInitFunc initFunc = 0; if( real_transform && stage == 0 ) { if( depth == CV_32F ) - { - if( spec_r ) - IPPI_CALL( ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r )); - IPPI_CALL( ippsDFTInitAlloc_R_32f( - (IppsDFTSpec_R_32f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone )); - IPPI_CALL( ippsDFTGetBufSize_R_32f( (IppsDFTSpec_R_32f*)spec_r, &ipp_sz )); - } + getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_R_32f, + initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f; else - { - if( spec_r ) - IPPI_CALL( ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r )); - IPPI_CALL( ippsDFTInitAlloc_R_64f( - (IppsDFTSpec_R_64f**)&spec_r, len, ipp_norm_flag, ippAlgHintNone )); - IPPI_CALL( ippsDFTGetBufSize_R_64f( (IppsDFTSpec_R_64f*)spec_r, &ipp_sz )); - } - spec = spec_r; + getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_R_64f, + initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f; } else { if( depth == CV_32F ) - { - if( spec_c ) - IPPI_CALL( ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c )); - IPPI_CALL( ippsDFTInitAlloc_C_32fc( - (IppsDFTSpec_C_32fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone )); - IPPI_CALL( ippsDFTGetBufSize_C_32fc( (IppsDFTSpec_C_32fc*)spec_c, &ipp_sz )); - } + getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_C_32fc, + initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc; else - { - if( spec_c ) - IPPI_CALL( ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c )); - IPPI_CALL( ippsDFTInitAlloc_C_64fc( - (IppsDFTSpec_C_64fc**)&spec_c, len, ipp_norm_flag, ippAlgHintNone )); - IPPI_CALL( ippsDFTGetBufSize_C_64fc( (IppsDFTSpec_C_64fc*)spec_c, &ipp_sz )); - } - spec = spec_c; + getSizeFunc = (IppDFTGetSizeFunc)ippsDFTGetSize_C_64fc, + initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc; + } + if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 ) + { + ippbuf.allocate(specsize + initsize + 64); + spec = alignPtr(&ippbuf[0], 32); + uchar* initbuf = alignPtr((uchar*)spec + specsize, 32); + if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 ) + spec = 0; + sz += worksize; } - - sz += ipp_sz; } else #endif @@ -1862,24 +1853,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) src = dst; } } - -#ifdef HAVE_IPP - if( spec_c ) - { - if( depth == CV_32F ) - ippsDFTFree_C_32fc( (IppsDFTSpec_C_32fc*)spec_c ); - else - ippsDFTFree_C_64fc( (IppsDFTSpec_C_64fc*)spec_c ); - } - - if( spec_r ) - { - if( depth == CV_32F ) - ippsDFTFree_R_32f( (IppsDFTSpec_R_32f*)spec_r ); - else - ippsDFTFree_R_64f( (IppsDFTSpec_R_64f*)spec_r ); - } -#endif } diff --git a/modules/imgproc/src/samplers.cpp b/modules/imgproc/src/samplers.cpp index e6d2d12..eb2f617 100644 --- a/modules/imgproc/src/samplers.cpp +++ b/modules/imgproc/src/samplers.cpp @@ -519,6 +519,12 @@ typedef CvStatus (CV_STDCALL *CvGetRectSubPixFunc)( const void* src, int src_ste int dst_step, CvSize win_size, CvPoint2D32f center ); +typedef CvStatus (CV_STDCALL *CvIPPGetRectSubPixFunc)( const void* src, int src_step, + CvSize src_size, void* dst, + int dst_step, CvSize win_size, + CvPoint2D32f center, + CvPoint* minpt, CvPoint* maxpt ); + CV_IMPL void cvGetRectSubPix( const void* srcarr, void* dstarr, CvPoint2D32f center ) { @@ -556,6 +562,18 @@ cvGetRectSubPix( const void* srcarr, void* dstarr, CvPoint2D32f center ) //if( dst_size.width > src_size.width || dst_size.height > src_size.height ) // CV_ERROR( CV_StsBadSize, "destination ROI must be smaller than source ROI" ); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + CvPoint minpt, maxpt; + int srctype = CV_MAT_TYPE(src->type), dsttype = CV_MAT_TYPE(dst->type); + CvIPPGetRectSubPixFunc ippfunc = + srctype == CV_8UC1 && dsttype == CV_8UC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u_C1R : + srctype == CV_8UC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_8u32f_C1R : + srctype == CV_32FC1 && dsttype == CV_32FC1 ? (CvIPPGetRectSubPixFunc)ippiCopySubpixIntersect_32f_C1R : 0; + + if( ippfunc && ippfunc(src->data.ptr, src->step, src_size, dst->data.ptr, + dst->step, dst_size, center, &minpt, &maxpt) >= 0 ) + return; +#endif if( CV_ARE_DEPTHS_EQ( src, dst )) { diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index c84abe5..00be086 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -856,6 +856,22 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, return; #endif +#if defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) + if(src.type() == CV_32FC1 && sigma1 == sigma2 && ksize.width == ksize.height && sigma1 != 0.0 ) + { + IppiSize roi = {src.cols, src.rows}; + int bufSize = 0; + ippiFilterGaussGetBufferSize_32f_C1R(roi, ksize.width, &bufSize); + AutoBuffer buf(bufSize+128); + if( ippiFilterGaussBorder_32f_C1R((const Ipp32f *)src.data, (int)src.step, + (Ipp32f *)dst.data, (int)dst.step, + roi, ksize.width, (Ipp32f)sigma1, + (IppiBorderType)borderType, 0.0, + alignPtr(&buf[0],32)) >= 0 ) + return; + } +#endif + Ptr f = createGaussianFilter( src.type(), ksize, sigma1, sigma2, borderType ); f->apply( src, dst ); } @@ -1888,6 +1904,29 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d, radius = MAX(radius, 1); d = radius*2 + 1; +#if 0 && defined HAVE_IPP && (IPP_VERSION_MAJOR >= 7) + if(cn == 1) + { + IppiSize kernel = {d, d}; + IppiSize roi={src.cols, src.rows}; + int bufsize=0; + ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize); + AutoBuffer buf(bufsize+128); + IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32); + ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, sigma_color*sigma_color, sigma_space*sigma_space, 1, pSpec ); + Mat tsrc; + const Mat* psrc = &src; + if( src.data == dst.data ) + { + src.copyTo(tsrc); + psrc = &tsrc; + } + if( ippiFilterBilateral_8u_C1R(psrc->data, (int)psrc->step[0], + dst.data, (int)dst.step[0], + roi, kernel, pSpec) >= 0 ) + return; + } +#endif Mat temp; copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); -- 2.7.4