From 16629bff4cff3db73100306dff45d79c29f17a1e Mon Sep 17 00:00:00 2001 From: Elena Gvozdeva Date: Mon, 21 Apr 2014 12:43:26 +0400 Subject: [PATCH] Added parallel version for DCT_ROWS --- modules/core/src/dxt.cpp | 212 ++++++++++++++++++++--------------- modules/imgproc/src/distransform.cpp | 2 +- 2 files changed, 122 insertions(+), 92 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index f7ee8dc..a5976a3 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2882,125 +2882,153 @@ static void IDCT_64f(const double* src, int src_step, double* dft_src, double* d namespace cv { -#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 +#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 && !defined HAVE_IPP_ICV_ONLY -typedef IppStatus (CV_STDCALL * ippiDCTFwdFunc)(const Ipp32f*, int, Ipp32f*, int, const IppiDCTFwdSpec_32f*, Ipp8u*); -typedef IppStatus (CV_STDCALL * ippiDCTInvFunc)(const Ipp32f*, int, Ipp32f*, int, const IppiDCTInvSpec_32f*, Ipp8u*); +typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*); +typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm); +typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec); +typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*); -static bool ippi_DCT_Fwd(const Mat& src, Mat& dst, bool row) +template +class DctIPPLoop_Invoker : public ParallelLoopBody { - if (src.type() != CV_32F) - return false; - - IppStatus status; - IppiDCTFwdSpec_32f* pDCTSpec; - Ipp8u *pBuffer; - int bufSize=0; +public: - ippiDCTFwdFunc ippFunc = (ippiDCTFwdFunc)ippiDCTFwd_32f_C1R; + DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct& _ippidct, bool _inv, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), ippidct(_ippidct), inv(_inv), ok(_ok) + { + *ok = true; + } - if (ippFunc==0) - return false; + virtual void operator()(const Range& range) const + { + IppStatus status; + void* pDCTSpec; + AutoBuffer buf; + uchar* pBuffer = 0; + int bufSize=0; - IppiSize srcRoiSize = {src.cols, row ? 1 : src.rows}; + IppiSize srcRoiSize = {src.cols, 1}; - CV_SUPPRESS_DEPRECATED_START - status = ippiDCTFwdInitAlloc_32f (&pDCTSpec, srcRoiSize, ippAlgHintNone); + CV_SUPPRESS_DEPRECATED_START - if ( status < 0 ) - { - ippiDCTFwdFree_32f(pDCTSpec); - return false; - } + ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f; + ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f; + ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f; - status = ippiDCTFwdGetBufSize_32f (pDCTSpec, &bufSize); - if ( status < 0 ) - { - ippiDCTFwdFree_32f(pDCTSpec); - return false; - } + status = ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone); - pBuffer = ippsMalloc_8u( bufSize ); + if ( status < 0 ) + { + ippFree(pDCTSpec); + *ok = false; + return; + } - if (row) - { - for (int i=0; i= 0; + const DctIPPLoop_Invoker& operator= (const DctIPPLoop_Invoker&); +}; + +template +bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv) +{ + bool ok; + parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, ippidct, inv, &ok), src.total()/(double)(1<<16) ); + return ok; } -static bool ippi_DCT_Inv(const Mat& src, Mat& dst, bool row) +struct IPPDCTFunctor +{ + IPPDCTFunctor(ippiDCTFunc _func) : func(_func){} + + bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const + { + return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false; + } +private: + ippiDCTFunc func; +}; + +static bool ippi_DCT(const Mat& src, Mat& dst, bool inv, bool row) { if (src.type() != CV_32F) return false; - IppStatus status; - IppiDCTInvSpec_32f* pDCTSpec; - Ipp8u *pBuffer; - int bufSize=0; - - ippiDCTInvFunc ippFunc = (ippiDCTInvFunc)ippiDCTInv_32f_C1R; + ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ; - if (ippFunc==0) - return false; + if (row) + if(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv)) + return true; + else + return false; + else + { + IppStatus status; + void* pDCTSpec; + AutoBuffer buf; + uchar* pBuffer = 0; + int bufSize=0; - IppiSize srcRoiSize = {src.cols, row ? 1 : src.rows}; + IppiSize srcRoiSize = {src.cols, src.rows}; - CV_SUPPRESS_DEPRECATED_START - status = ippiDCTInvInitAlloc_32f (&pDCTSpec, srcRoiSize, ippAlgHintNone); + CV_SUPPRESS_DEPRECATED_START - if ( status < 0 ) - { - ippiDCTInvFree_32f(pDCTSpec); - return false; - } + ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f; + ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f; + ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f; - status = ippiDCTInvGetBufSize_32f (pDCTSpec, &bufSize); - if ( status < 0 ) - { - ippiDCTInvFree_32f(pDCTSpec); - return false; - } + status = ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone); - pBuffer = ippsMalloc_8u( bufSize ); + if ( status < 0 ) + { + ippFree(pDCTSpec); + return false; + } - if (row) - { - for (int i=0; i= 0; + status = ippFunc((float*)src.data, (int)src.step, (float*)dst.data, (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer); + + ippFree(pDCTSpec); + CV_SUPPRESS_DEPRECATED_END + + return status >= 0; + } } #endif @@ -3019,7 +3047,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) bool inv = (flags & DCT_INVERSE) != 0; Mat src0 = _src0.getMat(), src = src0; int type = src.type(), depth = src.depth(); - void /* *spec_dft = 0, */ *spec = 0; + void *spec = 0; double scale = 1.; int prev_len = 0, nf = 0, stage, end_stage; @@ -3036,12 +3064,14 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) _dst.create( src.rows, src.cols, type ); Mat dst = _dst.getMat(); -#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && !defined HAVE_IPP_ICV_ONLY bool row = (flags & DCT_ROWS) != 0; - if (inv && ippi_DCT_Inv(src,dst,row)) - return; - if(ippi_DCT_Fwd(src,dst,row)) - return; + if(!row || src.rows>(int)(1<<8)) + { + if(ippi_DCT(src,dst,inv, row)) + return; + setIppErrorStatus(); + } #endif DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2]; diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index c03d4b9..55a58c7 100644 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -577,7 +577,7 @@ trueDistTrans( const Mat& src, Mat& dst ) for( ; i <= m*3; i++ ) sat_tab[i] = i - shift; - cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab)); + cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab), src.total()/(double)(1<<16)); // stage 2: compute modified distance transform for each row float* inv_tab = sqr_tab + n; -- 2.7.4