}
static void
-DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
+DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
{
int digits[34], radix[34];
int n = factors[0], m = 0;
}
#endif
-enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 };
+struct OcvDftOptions;
+
+typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst);
+
+struct OcvDftOptions {
+ int nf;
+ int *factors;
+ double scale;
+
+ int* itab;
+ void* wave;
+ int tab_size;
+ int n;
+
+ bool isInverse;
+ bool noPermute;
+ bool isComplex;
+
+ bool haveSSE3;
+
+ DFTFunc dft_func;
+ bool useIpp;
-// mixed-radix complex discrete Fourier transform: double-precision version
-template<typename T> static void
-DFT( const Complex<T>* src, Complex<T>* dst, int n,
- int nf, const int* factors, const int* itab,
- const Complex<T>* wave, int tab_size,
- const void*
#ifdef USE_IPP_DFT
- spec
+ uchar* ipp_spec;
+ uchar* ipp_work;
#endif
- , Complex<T>* buf,
- int flags, double _scale )
+
+ OcvDftOptions()
+ {
+ nf = 0;
+ factors = 0;
+ scale = 0;
+ itab = 0;
+ wave = 0;
+ tab_size = 0;
+ n = 0;
+ isInverse = false;
+ noPermute = false;
+ isComplex = false;
+ useIpp = false;
+#ifdef USE_IPP_DFT
+ ipp_spec = 0;
+ ipp_work = 0;
+#endif
+ dft_func = 0;
+ haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
+ }
+};
+
+// mixed-radix complex discrete Fourier transform: double-precision version
+template<typename T> static void
+DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
{
static const T sin_120 = (T)0.86602540378443864676372317075294;
static const T fft5_2 = (T)0.559016994374947424102293417182819;
static const T fft5_4 = (T)-1.538841768587626701285145288018455;
static const T fft5_5 = (T)0.363271264002680442947733378740309;
- int n0 = n, f_idx, nx;
- int inv = flags & DFT_INVERSE;
- int dw0 = tab_size, dw;
+ const Complex<T>* wave = (Complex<T>*)c.wave;
+ const int * itab = c.itab;
+
+ int n = c.n;
+ int f_idx, nx;
+ int inv = c.isInverse;
+ int dw0 = c.tab_size, dw;
int i, j, k;
Complex<T> t;
- T scale = (T)_scale;
- int tab_step;
+ T scale = (T)c.scale;
-#ifdef USE_IPP_DFT
- if( spec )
+ if( c.useIpp )
{
+#ifdef USE_IPP_DFT
if( !inv )
{
- if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0)
+ if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
else
{
- if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0)
+ if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
}
setIppErrorStatus();
- }
#endif
+ }
- tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n;
+ int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n;
// 0. shuffle data
if( dst != src )
{
- assert( (flags & DFT_NO_PERMUTE) == 0 );
+ assert( !c.noPermute );
if( !inv )
{
for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
}
else
{
- if( (flags & DFT_NO_PERMUTE) == 0 )
+ if( !c.noPermute )
{
- CV_Assert( factors[0] == factors[nf-1] );
- if( nf == 1 )
+ CV_Assert( c.factors[0] == c.factors[c.nf-1] );
+ if( c.nf == 1 )
{
if( (n & 3) == 0 )
{
n = 1;
// 1. power-2 transforms
- if( (factors[0] & 1) == 0 )
+ if( (c.factors[0] & 1) == 0 )
{
- if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3))
+ if( c.factors[0] >= 4 && c.haveSSE3)
{
DFT_VecR4<T> vr4;
- n = vr4(dst, factors[0], n0, dw0, wave);
+ n = vr4(dst, c.factors[0], c.n, dw0, wave);
}
// radix-4 transform
- for( ; n*4 <= factors[0]; )
+ for( ; n*4 <= c.factors[0]; )
{
nx = n;
n *= 4;
dw0 /= 4;
- for( i = 0; i < n0; i += n )
+ for( i = 0; i < c.n; i += n )
{
Complex<T> *v0, *v1;
T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
}
}
- for( ; n < factors[0]; )
+ for( ; n < c.factors[0]; )
{
// do the remaining radix-2 transform
nx = n;
n *= 2;
dw0 /= 2;
- for( i = 0; i < n0; i += n )
+ for( i = 0; i < c.n; i += n )
{
Complex<T>* v = dst + i;
T r0 = v[0].re + v[nx].re;
}
// 2. all the other transforms
- for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ )
+ for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ )
{
- int factor = factors[f_idx];
+ int factor = c.factors[f_idx];
nx = n;
n *= factor;
dw0 /= factor;
if( factor == 3 )
{
// radix-3
- for( i = 0; i < n0; i += n )
+ for( i = 0; i < c.n; i += n )
{
Complex<T>* v = dst + i;
else if( factor == 5 )
{
// radix-5
- for( i = 0; i < n0; i += n )
+ for( i = 0; i < c.n; i += n )
{
for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
{
{
// radix-"factor" - an odd number
int p, q, factor2 = (factor - 1)/2;
- int d, dd, dw_f = tab_size/factor;
+ int d, dd, dw_f = c.tab_size/factor;
+ AutoBuffer<Complex<T> > buf(factor2 * 2);
Complex<T>* a = buf;
- Complex<T>* b = buf + factor2;
+ Complex<T>* b = a + factor2;
- for( i = 0; i < n0; i += n )
+ for( i = 0; i < c.n; i += n )
{
for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
{
s1.im += r1 - i1; s0.im += r1 + i1;
d += dd;
- d -= -(d >= tab_size) & tab_size;
+ d -= -(d >= c.tab_size) & c.tab_size;
}
v[k] = s0;
if( inv )
im_scale = -im_scale;
- for( i = 0; i < n0; i++ )
+ for( i = 0; i < c.n; i++ )
{
T t0 = dst[i].re*re_scale;
T t1 = dst[i].im*im_scale;
}
else if( inv )
{
- for( i = 0; i <= n0 - 2; i += 2 )
+ for( i = 0; i <= c.n - 2; i += 2 )
{
T t0 = -dst[i].im;
T t1 = -dst[i+1].im;
dst[i+1].im = t1;
}
- if( i < n0 )
- dst[n0-1].im = -dst[n0-1].im;
+ if( i < c.n )
+ dst[c.n-1].im = -dst[c.n-1].im;
}
}
re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
template<typename T> static void
-RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
- const Complex<T>* wave, int tab_size, const void*
-#ifdef USE_IPP_DFT
- spec
-#endif
- ,
- Complex<T>* buf, int flags, double _scale )
+RealDFT(const OcvDftOptions & c, const T* src, T* dst)
{
- int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
- T scale = (T)_scale;
- int j, n2 = n >> 1;
+ int n = c.n;
+ int complex_output = c.isComplex;
+ T scale = (T)c.scale;
+ int j;
dst += complex_output;
-#ifdef USE_IPP_DFT
- if( spec )
+ if( c.useIpp )
{
- if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0)
+#ifdef USE_IPP_DFT
+ if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0)
{
if( complex_output )
{
return;
}
setIppErrorStatus();
- }
#endif
- assert( tab_size == n );
+ }
+ assert( c.tab_size == n );
if( n == 1 )
{
_dst[0].im = 0;
for( j = 1; j < n; j += 2 )
{
- T t0 = src[itab[j]]*scale;
- T t1 = src[itab[j+1]]*scale;
+ T t0 = src[c.itab[j]]*scale;
+ T t1 = src[c.itab[j+1]]*scale;
_dst[j].re = t0;
_dst[j].im = 0;
_dst[j+1].re = t1;
_dst[j+1].im = 0;
}
- DFT( _dst, _dst, n, nf, factors, itab, wave,
- tab_size, 0, buf, DFT_NO_PERMUTE, 1 );
+ OcvDftOptions sub_c = c;
+ sub_c.isComplex = false;
+ sub_c.isInverse = false;
+ sub_c.noPermute = true;
+ sub_c.scale = 1.;
+ DFT(sub_c, _dst, _dst);
if( !complex_output )
dst[1] = dst[0];
}
T t0, t;
T h1_re, h1_im, h2_re, h2_im;
T scale2 = scale*(T)0.5;
- factors[0] >>= 1;
+ int n2 = n >> 1;
+
+ c.factors[0] >>= 1;
+
+ OcvDftOptions sub_c = c;
+ sub_c.factors += (c.factors[0] == 1);
+ sub_c.nf -= (c.factors[0] == 1);
+ sub_c.isComplex = false;
+ sub_c.isInverse = false;
+ sub_c.noPermute = false;
+ sub_c.scale = 1.;
+ sub_c.n = n2;
- DFT( (Complex<T>*)src, (Complex<T>*)dst, n2, nf - (factors[0] == 1),
- factors + (factors[0] == 1),
- itab, wave, tab_size, 0, buf, 0, 1 );
- factors[0] <<= 1;
+ DFT(sub_c, (Complex<T>*)src, (Complex<T>*)dst);
+
+ c.factors[0] <<= 1;
t = dst[0] - dst[1];
dst[0] = (dst[0] + dst[1])*scale;
t = dst[n-1];
dst[n-1] = dst[1];
+ const Complex<T> *wave = (const Complex<T>*)c.wave;
+
for( j = 2, wave++; j < n2; j += 2, wave++ )
{
/* calc odd */
re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
template<typename T> static void
-CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
- const Complex<T>* wave, int tab_size,
- const void*
-#ifdef USE_IPP_DFT
- spec
-#endif
- , Complex<T>* buf,
- int flags, double _scale )
+CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
{
- int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
- int j, k, n2 = (n+1) >> 1;
- T scale = (T)_scale;
+ int n = c.n;
+ int complex_input = c.isComplex;
+ int j, k;
+ T scale = (T)c.scale;
T save_s1 = 0.;
T t0, t1, t2, t3, t;
- assert( tab_size == n );
+ assert( c.tab_size == n );
if( complex_input )
{
((T*)src)[1] = src[0];
src++;
}
-#ifdef USE_IPP_DFT
- if( spec )
+ if( c.useIpp )
{
- if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0)
+#ifdef USE_IPP_DFT
+ if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0)
{
if( complex_input )
((T*)src)[0] = (T)save_s1;
}
setIppErrorStatus();
- }
#endif
+ }
if( n == 1 )
{
dst[0] = (T)(src[0]*scale);
_dst[0].re = src[0];
_dst[0].im = 0;
+
+ int n2 = (n+1) >> 1;
+
for( j = 1; j < n2; j++ )
{
- int k0 = itab[j], k1 = itab[n-j];
+ int k0 = c.itab[j], k1 = c.itab[n-j];
t0 = _src[j].re; t1 = _src[j].im;
_dst[k0].re = t0; _dst[k0].im = -t1;
_dst[k1].re = t0; _dst[k1].im = t1;
}
- DFT( _dst, _dst, n, nf, factors, itab, wave,
- tab_size, 0, buf, DFT_NO_PERMUTE, 1. );
+ OcvDftOptions sub_c = c;
+ sub_c.isComplex = false;
+ sub_c.isInverse = false;
+ sub_c.noPermute = true;
+ sub_c.scale = 1.;
+ sub_c.n = n;
+
+ DFT(sub_c, _dst, _dst);
dst[0] *= scale;
for( j = 1; j < n; j += 2 )
{
else
{
int inplace = src == dst;
- const Complex<T>* w = wave;
+ const Complex<T>* w = (const Complex<T>*)c.wave;
t = src[1];
t0 = (src[0] + src[n-1]);
dst[0] = t0;
dst[1] = t1;
+ int n2 = (n+1) >> 1;
+
for( j = 2, w++; j < n2; j += 2, w++ )
{
T h1_re, h1_im, h2_re, h2_im;
else
{
int j2 = j >> 1;
- k = itab[j2];
+ k = c.itab[j2];
dst[k] = t0;
dst[k+1] = t1;
- k = itab[n2-j2];
+ k = c.itab[n2-j2];
dst[k] = t2;
dst[k+1]= t3;
}
}
else
{
- k = itab[n2];
+ k = c.itab[n2];
dst[k*2] = t0;
dst[k*2+1] = t1;
}
}
- factors[0] >>= 1;
- DFT( (Complex<T>*)dst, (Complex<T>*)dst, n2,
- nf - (factors[0] == 1),
- factors + (factors[0] == 1), itab,
- wave, tab_size, 0, buf,
- inplace ? 0 : DFT_NO_PERMUTE, 1. );
- factors[0] <<= 1;
+ c.factors[0] >>= 1;
+
+ OcvDftOptions sub_c = c;
+ sub_c.factors += (c.factors[0] == 1);
+ sub_c.nf -= (c.factors[0] == 1);
+ sub_c.isComplex = false;
+ sub_c.isInverse = false;
+ sub_c.noPermute = !inplace;
+ sub_c.scale = 1.;
+ sub_c.n = n2;
+
+ DFT(sub_c, (Complex<T>*)dst, (Complex<T>*)dst);
+
+ c.factors[0] <<= 1;
for( j = 0; j < n; j += 2 )
{
}
}
-
-typedef void (*DFTFunc)(
- const void* src, void* dst, int n, int nf, int* factors,
- const int* itab, const void* wave, int tab_size,
- const void* spec, void* buf, int inv, double scale );
-
-static void DFT_32f( const Complexf* src, Complexf* dst, int n,
- int nf, const int* factors, const int* itab,
- const Complexf* wave, int tab_size,
- const void* spec, Complexf* buf,
- int flags, double scale )
+static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
{
- DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
+ DFT(c, src, dst);
}
-static void DFT_64f( const Complexd* src, Complexd* dst, int n,
- int nf, const int* factors, const int* itab,
- const Complexd* wave, int tab_size,
- const void* spec, Complexd* buf,
- int flags, double scale )
+static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
{
- DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
+ DFT(c, src, dst);
}
-static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
- const int* itab, const Complexf* wave, int tab_size, const void* spec,
- Complexf* buf, int flags, double scale )
+static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
{
- RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
+ RealDFT(c, src, dst);
}
-static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
- const int* itab, const Complexd* wave, int tab_size, const void* spec,
- Complexd* buf, int flags, double scale )
+static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
{
- RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
+ RealDFT(c, src, dst);
}
-static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
- const int* itab, const Complexf* wave, int tab_size, const void* spec,
- Complexf* buf, int flags, double scale )
+static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
{
- CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
+ CCSIDFT(c, src, dst);
}
-static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
- const int* itab, const Complexd* wave, int tab_size, const void* spec,
- Complexd* buf, int flags, double scale )
+static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
{
- CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
+ CCSIDFT(c, src, dst);
}
}
{
public:
- Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
- ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
+ Dft_C_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width,
+ const Dft& _ippidft, int _norm_flag, bool *_ok) :
+ ParallelLoopBody(),
+ src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
+ ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
{
*ok = true;
}
int sizeSpec=0;
int sizeInit=0;
- IppiSize srcRoiSize = {src.cols, 1};
+ IppiSize srcRoiSize = {width, 1};
status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 )
}
for( int i = range.start; i < range.end; ++i)
- if(!ippidft(src.ptr<Ipp32fc>(i), (int)src.step,dst.ptr<Ipp32fc>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
+ if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step,
+ pDFTSpec, (Ipp8u*)pBuffer))
{
*ok = false;
}
}
private:
- const Mat& src;
- Mat& dst;
+ uchar * src;
+ int src_step;
+ uchar * dst;
+ int dst_step;
+ int width;
const Dft& ippidft;
int norm_flag;
bool *ok;
{
public:
- Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
- ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
+ Dft_R_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width,
+ const Dft& _ippidft, int _norm_flag, bool *_ok) :
+ ParallelLoopBody(),
+ src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
+ ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
{
*ok = true;
}
int sizeSpec=0;
int sizeInit=0;
- IppiSize srcRoiSize = {src.cols, 1};
+ IppiSize srcRoiSize = {width, 1};
status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 )
}
for( int i = range.start; i < range.end; ++i)
- if(!ippidft(src.ptr<float>(i), (int)src.step,dst.ptr<float>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
+ if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step,
+ pDFTSpec, (Ipp8u*)pBuffer))
{
*ok = false;
}
}
private:
- const Mat& src;
- Mat& dst;
+ uchar * src;
+ int src_step;
+ uchar * dst;
+ int dst_step;
+ int width;
const Dft& ippidft;
int norm_flag;
bool *ok;
};
template <typename Dft>
-bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
+bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag)
{
bool ok;
- parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
+ parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
return ok;
}
template <typename Dft>
-bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
+bool Dft_R_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag)
{
bool ok;
- parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
+ parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
return ok;
}
ippiDFT_R_Func func;
};
-static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
+static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag)
{
IppStatus status;
Ipp8u* pBuffer = 0;
int sizeSpec=0;
int sizeInit=0;
- IppiSize srcRoiSize = {src.cols, src.rows};
+ IppiSize srcRoiSize = {width, height};
status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 )
}
if (!inv)
- status = ippiDFTFwd_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
+ status = ippiDFTFwd_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer );
else
- status = ippiDFTInv_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
+ status = ippiDFTInv_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer );
if ( sizeBuffer > 0 )
ippFree( pBuffer );
return false;
}
-static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
+static bool ippi_DFT_R_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag)
{
IppStatus status;
Ipp8u* pBuffer = 0;
int sizeSpec=0;
int sizeInit=0;
- IppiSize srcRoiSize = {src.cols, src.rows};
+ IppiSize srcRoiSize = {width, height};
status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 )
}
if (!inv)
- status = ippiDFTFwd_RToPack_32f_C1R( src.ptr<float>(), (int)(src.step), dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
+ status = ippiDFTFwd_RToPack_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer );
else
- status = ippiDFTInv_PackToR_32f_C1R( src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
+ status = ippiDFTInv_PackToR_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer );
if ( sizeBuffer > 0 )
ippFree( pBuffer );
namespace cv
{
-static void complementComplexOutput(Mat& dst, int len, int dft_dims)
+
+template <typename T>
+static void complementComplex(T * ptr, int step, int n, int len, int dft_dims)
{
- int i, n = dst.cols;
- size_t elem_size = dst.elemSize1();
- if( elem_size == sizeof(float) )
+ T* p0 = (T*)ptr;
+ size_t dstep = step/sizeof(p0[0]);
+ for(int i = 0; i < len; i++ )
{
- float* p0 = dst.ptr<float>();
- size_t dstep = dst.step/sizeof(p0[0]);
- for( i = 0; i < len; i++ )
- {
- float* p = p0 + dstep*i;
- float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
+ T* p = p0 + dstep*i;
+ T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
- for( int j = 1; j < (n+1)/2; j++ )
- {
- p[(n-j)*2] = q[j*2];
- p[(n-j)*2+1] = -q[j*2+1];
- }
+ for( int j = 1; j < (n+1)/2; j++ )
+ {
+ p[(n-j)*2] = q[j*2];
+ p[(n-j)*2+1] = -q[j*2+1];
}
}
+}
+
+static void complementComplexOutput(int depth, uchar * ptr, int step, int count, int len, int dft_dims)
+{
+ if( depth == CV_32F )
+ complementComplex((float*)ptr, step, count, len, dft_dims);
else
+ complementComplex((double*)ptr, step, count, len, dft_dims);
+}
+
+enum DftMode {
+ InvalidDft = 0,
+ FwdRealToCCS,
+ FwdRealToComplex,
+ FwdComplexToComplex,
+ InvCCSToReal,
+ InvComplexToReal,
+ InvComplexToComplex,
+};
+
+enum DftDims {
+ InvalidDim = 0,
+ OneDim,
+ OneDimColWise,
+ TwoDims
+};
+
+inline const char * modeName(DftMode m)
+{
+ switch (m)
{
- double* p0 = dst.ptr<double>();
- size_t dstep = dst.step/sizeof(p0[0]);
- for( i = 0; i < len; i++ )
- {
- double* p = p0 + dstep*i;
- double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
+ case InvalidDft: return "InvalidDft";
+ case FwdRealToCCS: return "FwdRealToCCS";
+ case FwdRealToComplex: return "FwdRealToComplex";
+ case FwdComplexToComplex: return "FwdComplexToComplex";
+ case InvCCSToReal: return "InvCCSToReal";
+ case InvComplexToReal: return "InvComplexToReal";
+ case InvComplexToComplex: return "InvComplexToComplex";
+ }
+ return 0;
+}
- for( int j = 1; j < (n+1)/2; j++ )
- {
- p[(n-j)*2] = q[j*2];
- p[(n-j)*2+1] = -q[j*2+1];
- }
- }
+inline const char * dimsName(DftDims d)
+{
+ switch (d)
+ {
+ case InvalidDim: return "InvalidDim";
+ case OneDim: return "OneDim";
+ case OneDimColWise: return "OneDimColWise";
+ case TwoDims: return "TwoDims";
+ };
+ return 0;
+}
+
+template <typename T>
+inline bool isInv(T mode)
+{
+ switch ((DftMode)mode)
+ {
+ case InvCCSToReal:
+ case InvComplexToReal:
+ case InvComplexToComplex: return true;
+ default: return false;
}
}
+
+inline DftMode determineMode(bool inv, int cn1, int cn2)
+{
+ if (!inv)
+ {
+ if (cn1 == 1 && cn2 == 1)
+ return FwdRealToCCS;
+ else if (cn1 == 1 && cn2 == 2)
+ return FwdRealToComplex;
+ else if (cn1 == 2 && cn2 == 2)
+ return FwdComplexToComplex;
+ }
+ else
+ {
+ if (cn1 == 1 && cn2 == 1)
+ return InvCCSToReal;
+ else if (cn1 == 2 && cn2 == 1)
+ return InvComplexToReal;
+ else if (cn1 == 2 && cn2 == 2)
+ return InvComplexToComplex;
+ }
+ return InvalidDft;
}
-void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
+
+inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
{
-#ifdef HAVE_CLAMDFFT
- CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
- _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
- ocl_dft_amdfft(_src0, _dst, flags))
-#endif
+ // printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
+ if (isRowWise)
+ return OneDim;
+ if (cols == 1 && rows > 1) // one-column-shaped input
+ {
+ if (isContinuous)
+ return OneDim;
+ else
+ return OneDimColWise;
+ }
+ if (rows == 1)
+ return OneDim;
+ if (cols > 1 && rows > 1)
+ return TwoDims;
+ return InvalidDim;
+}
-#ifdef HAVE_OPENCL
- CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
- ocl_dft(_src0, _dst, flags, nonzero_rows))
-#endif
+class OcvDftImpl
+{
+protected:
+ hal::DftContext contextA;
+ hal::DftContext contextB;
+ bool needBufferA;
+ bool needBufferB;
+ bool inv;
+ int width;
+ int height;
+ DftMode mode;
+ int elem_size;
+ int complex_elem_size;
+ int depth;
+ bool real_transform;
+ int nonzero_rows;
+ bool isRowTransform;
+ bool isScaled;
+ std::vector<int> stages;
+ bool useIpp;
+ int src_channels;
+ int dst_channels;
+
+ AutoBuffer<uchar> tmp_bufA;
+ AutoBuffer<uchar> tmp_bufB;
+ AutoBuffer<uchar> buf0;
+ AutoBuffer<uchar> buf1;
- static DFTFunc dft_tbl[6] =
+public:
+ OcvDftImpl()
{
- (DFTFunc)DFT_32f,
- (DFTFunc)RealDFT_32f,
- (DFTFunc)CCSIDFT_32f,
- (DFTFunc)DFT_64f,
- (DFTFunc)RealDFT_64f,
- (DFTFunc)CCSIDFT_64f
- };
- AutoBuffer<uchar> buf;
- Mat src0 = _src0.getMat(), src = src0;
- int prev_len = 0, stage = 0;
- bool inv = (flags & DFT_INVERSE) != 0;
- int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
- int type = src.type(), depth = src.depth();
- int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
- int factors[34];
- bool inplace_transform = false;
-#ifdef USE_IPP_DFT
- AutoBuffer<uchar> ippbuf;
- int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
+ needBufferA = false;
+ needBufferB = false;
+ inv = false;
+ width = 0;
+ height = 0;
+ mode = InvalidDft;
+ elem_size = 0;
+ complex_elem_size = 0;
+ depth = 0;
+ real_transform = false;
+ nonzero_rows = 0;
+ isRowTransform = false;
+ isScaled = false;
+ useIpp = false;
+ src_channels = 0;
+ dst_channels = 0;
+ }
+
+ void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
+ {
+ bool isComplex = _src_channels != _dst_channels;
+ nonzero_rows = _nonzero_rows;
+ width = _width;
+ height = _height;
+ depth = _depth;
+ src_channels = _src_channels;
+ dst_channels = _dst_channels;
+ bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
+ bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0;
+ bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
+ mode = determineMode(isInverse, _src_channels, _dst_channels);
+ inv = isInverse;
+ isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
+ isScaled = (flags & CV_HAL_DFT_SCALE) != 0;
+ needBufferA = false;
+ needBufferB = false;
+ real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
+
+ elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
+ complex_elem_size = elem_size * 2;
+ if( !real_transform )
+ elem_size = complex_elem_size;
+
+#if defined USE_IPP_DFT
+ CV_IPP_CHECK()
+ {
+ if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6)))
+ {
+ if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal)
+ {
+ useIpp = true;
+ return;
+ }
+ }
+ }
#endif
- CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
+ DftDims dims = determineDims(height, width, isRowTransform, isContinuous);
+ if (dims == TwoDims)
+ {
+ stages.resize(2);
+ if (mode == InvCCSToReal || mode == InvComplexToReal)
+ {
+ stages[0] = 1;
+ stages[1] = 0;
+ }
+ else
+ {
+ stages[0] = 0;
+ stages[1] = 1;
+ }
+ }
+ else
+ {
+ stages.resize(1);
+ if (dims == OneDimColWise)
+ stages[0] = 1;
+ else
+ stages[0] = 0;
+ }
- if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
- _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
- else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
- _dst.create( src.size(), depth );
- else
- _dst.create( src.size(), type );
+ for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
+ {
+ if (stageIndex == 1)
+ {
+ isInplace = true;
+ isComplex = false;
+ }
- Mat dst = _dst.getMat();
+ int stage = stages[stageIndex];
+ bool isLastStage = (stageIndex + 1 == stages.size());
+
+ int len, count;
+
+ int f = 0;
+ if (inv)
+ f |= CV_HAL_DFT_INVERSE;
+ if (isScaled)
+ f |= CV_HAL_DFT_SCALE;
+ if (isRowTransform)
+ f |= CV_HAL_DFT_ROWS;
+ if (isComplex)
+ f |= CV_HAL_DFT_COMPLEX_OUTPUT;
+ if (real_transform)
+ f |= CV_HAL_DFT_REAL_OUTPUT;
+ if (!isLastStage)
+ f |= CV_HAL_DFT_TWO_STAGE;
+
+ if( stage == 0 ) // row-wise transform
+ {
+ if (width == 1 && !isRowTransform )
+ {
+ len = height;
+ count = width;
+ }
+ else
+ {
+ len = width;
+ count = height;
+ }
+ needBufferA = isInplace;
+ hal::dftInit(contextA, len, count, depth, f, &needBufferA);
+ if (needBufferA)
+ tmp_bufA.allocate(len * complex_elem_size);
+ }
+ else
+ {
+ len = height;
+ count = width;
+ f |= CV_HAL_DFT_STAGE_COLS;
+ needBufferB = isInplace;
+ hal::dftInit(contextB, len, count, depth, f, &needBufferB);
+ if (needBufferB)
+ tmp_bufB.allocate(len * complex_elem_size);
+
+ buf0.allocate(len * complex_elem_size);
+ buf1.allocate(len * complex_elem_size);
+ }
+ }
+ }
-#if defined USE_IPP_DFT
- CV_IPP_CHECK()
+ void run(uchar * src, int src_step, uchar * dst, int dst_step)
{
- if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0)
+#if defined USE_IPP_DFT
+ if (useIpp)
{
- if ((flags & DFT_ROWS) == 0)
+ int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1;
+ if (!isRowTransform)
{
- if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
+ if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
{
- if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag))
+ if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
- if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
+ else if (mode == FwdRealToCCS || mode == InvCCSToReal)
{
- if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag))
+ if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
else
{
- if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
+ if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
{
ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
- if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
+ if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
return;
}
setIppErrorStatus();
}
- if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
+ else if (mode == FwdRealToCCS || mode == InvCCSToReal)
{
ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
- if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
+ if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
return;
setIppErrorStatus();
}
}
+ return;
}
- }
#endif
- if( !real_transform )
- elem_size = complex_elem_size;
+ for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
+ {
+ int stage_src_channels = src_channels;
+ int stage_dst_channels = dst_channels;
- if( src.cols == 1 && nonzero_rows > 0 )
- CV_Error( CV_StsNotImplemented,
- "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
- "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
+ if (stageIndex == 1)
+ {
+ src = dst;
+ src_step = dst_step;
+ stage_src_channels = stage_dst_channels;
+ }
+
+ int stage = stages[stageIndex];
+ bool isLastStage = (stageIndex + 1 == stages.size());
+ bool isComplex = stage_src_channels != stage_dst_channels;
+
+ if( stage == 0 )
+ rowDft(src, src_step, dst, dst_step, isComplex, isLastStage);
+ else
+ colDft(src, src_step, dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
+ }
+ }
+
+ void free()
+ {
+ if (useIpp)
+ return;
+ hal::dftFree(contextA);
+ hal::dftFree(contextB);
+ }
- // determine, which transform to do first - row-wise
- // (stage 0) or column-wise (stage 1) transform
- if( !(flags & DFT_ROWS) && src.rows > 1 &&
- ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
- (src.cols > 1 && inv && real_transform)) )
- stage = 1;
+protected:
- for(;;)
+ void rowDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage)
{
- double scale = 1;
- uchar* wave = 0;
- int* itab = 0;
- uchar* ptr;
- int i, len, count, sz = 0;
- int use_buf = 0, odd_real = 0;
- DFTFunc dft_func;
+ int len, count;
+ if (width == 1 && !isRowTransform )
+ {
+ len = height;
+ count = width;
+ }
+ else
+ {
+ len = width;
+ count = height;
+ }
+ int dptr_offset = 0;
+ int dst_full_len = len*elem_size;
+
+ if( needBufferA )
+ {
+ if (mode == FwdRealToCCS && (len & 1) && len > 1)
+ dptr_offset = elem_size;
+ }
+
+ if( !inv && isComplex )
+ dst_full_len += (len & 1) ? elem_size : complex_elem_size;
+
+ int nz = nonzero_rows;
+ if( nz <= 0 || nz > count )
+ nz = count;
- if( stage == 0 ) // row-wise transform
+ int i;
+ for( i = 0; i < nz; i++ )
{
- len = !inv ? src.cols : dst.cols;
- count = src.rows;
- if( len == 1 && !(flags & DFT_ROWS) )
+ const uchar* sptr = src_data + src_step * i;
+ uchar* dptr0 = dst_data + dst_step * i;
+ uchar* dptr = dptr0;
+
+ if( needBufferA )
+ dptr = tmp_bufA;
+
+ hal::dftRun(contextA, sptr, dptr);
+
+ if( needBufferA )
+ memcpy( dptr0, dptr + dptr_offset, dst_full_len );
+ }
+
+ for( ; i < count; i++ )
+ {
+ uchar* dptr0 = dst_data + dst_step * i;
+ memset( dptr0, 0, dst_full_len );
+ }
+ if(isLastStage && mode == FwdRealToComplex)
+ complementComplexOutput(depth, dst_data, dst_step, len, nz, 1);
+ }
+
+ void colDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
+ {
+ int len = height;
+ int count = width;
+ int a = 0, b = count;
+ uchar *dbuf0, *dbuf1;
+ const uchar* sptr0 = src_data;
+ uchar* dptr0 = dst_data;
+
+ dbuf0 = buf0, dbuf1 = buf1;
+
+ if( needBufferB )
+ {
+ dbuf1 = tmp_bufB;
+ dbuf0 = buf1;
+ }
+
+ if( real_transform )
+ {
+ int even;
+ a = 1;
+ even = (count & 1) == 0;
+ b = (count+1)/2;
+ if( !inv )
+ {
+ memset( buf0, 0, len*complex_elem_size );
+ CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, elem_size );
+ sptr0 += stage_dst_channels*elem_size;
+ if( even )
+ {
+ memset( buf1, 0, len*complex_elem_size );
+ CopyColumn( sptr0 + (count-2)*elem_size, src_step,
+ buf1, complex_elem_size, len, elem_size );
+ }
+ }
+ else if( stage_src_channels == 1 )
+ {
+ CopyColumn( sptr0, src_step, buf0, elem_size, len, elem_size );
+ ExpandCCS( buf0, len, elem_size );
+ if( even )
+ {
+ CopyColumn( sptr0 + (count-1)*elem_size, src_step,
+ buf1, elem_size, len, elem_size );
+ ExpandCCS( buf1, len, elem_size );
+ }
+ sptr0 += elem_size;
+ }
+ else
{
- len = !inv ? src.rows : dst.rows;
- count = 1;
+ CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
+ if( even )
+ {
+ CopyColumn( sptr0 + b*complex_elem_size, src_step,
+ buf1, complex_elem_size, len, complex_elem_size );
+ }
+ sptr0 += complex_elem_size;
+ }
+
+ if( even )
+ hal::dftRun(contextB, buf1, dbuf1);
+ hal::dftRun(contextB, buf0, dbuf0);
+
+ if( stage_dst_channels == 1 )
+ {
+ if( !inv )
+ {
+ // copy the half of output vector to the first/last column.
+ // before doing that, defgragment the vector
+ memcpy( dbuf0 + elem_size, dbuf0, elem_size );
+ CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
+ dst_step, len, elem_size );
+ if( even )
+ {
+ memcpy( dbuf1 + elem_size, dbuf1, elem_size );
+ CopyColumn( dbuf1 + elem_size, elem_size,
+ dptr0 + (count-1)*elem_size,
+ dst_step, len, elem_size );
+ }
+ dptr0 += elem_size;
+ }
+ else
+ {
+ // copy the real part of the complex vector to the first/last column
+ CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, elem_size );
+ if( even )
+ CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
+ dst_step, len, elem_size );
+ dptr0 += elem_size;
+ }
+ }
+ else
+ {
+ assert( !inv );
+ CopyColumn( dbuf0, complex_elem_size, dptr0,
+ dst_step, len, complex_elem_size );
+ if( even )
+ CopyColumn( dbuf1, complex_elem_size,
+ dptr0 + b*complex_elem_size,
+ dst_step, len, complex_elem_size );
+ dptr0 += complex_elem_size;
}
- odd_real = real_transform && (len & 1);
}
- else
+
+ for(int i = a; i < b; i += 2 )
{
- len = dst.rows;
- count = !inv ? src0.cols : dst.cols;
- sz = 2*len*complex_elem_size;
+ if( i+1 < b )
+ {
+ CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size );
+ hal::dftRun(contextB, buf1, dbuf1);
+ }
+ else
+ CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
+
+ hal::dftRun(contextB, buf0, dbuf0);
+
+ if( i+1 < b )
+ CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
+ else
+ CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, complex_elem_size );
+ sptr0 += 2*complex_elem_size;
+ dptr0 += 2*complex_elem_size;
}
+ if(isLastStage && mode == FwdRealToComplex)
+ complementComplexOutput(depth, dst_data, dst_step, count, len, 2);
+ }
+};
- void *spec = 0;
+class OcvDftBasicImpl
+{
+public:
+ OcvDftOptions opt;
+ int _factors[34];
+ AutoBuffer<uchar> wave_buf;
+ AutoBuffer<int> itab_buf;
#ifdef USE_IPP_DFT
- if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
+ AutoBuffer<uchar> ippbuf;
+ AutoBuffer<uchar> ippworkbuf;
+#endif
+
+public:
+ OcvDftBasicImpl()
+ {
+ opt.factors = _factors;
+ }
+ OcvDftBasicImpl & operator=(const OcvDftBasicImpl & other)
+ {
+ this->opt = other.opt;
+ return *this;
+ }
+ void init(int len, int count, int depth, int flags, bool *needBuffer)
+ {
+ int prev_len = opt.n;
+
+ int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0;
+ int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
+ opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
+ bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0;
+ opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0;
+ bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0;
+
+ opt.scale = 1;
+ opt.tab_size = len;
+ opt.n = len;
+
+ opt.useIpp = false;
+ #ifdef USE_IPP_DFT
+ opt.ipp_spec = 0;
+ opt.ipp_work = 0;
+
+ if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available
{
+ int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1;
int specsize=0, initsize=0, worksize=0;
IppDFTGetSizeFunc getSizeFunc = 0;
IppDFTInitFunc initFunc = 0;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
}
}
- if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
+ if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
{
ippbuf.allocate(specsize + initsize + 64);
- spec = alignPtr(&ippbuf[0], 32);
- uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
- if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
- spec = 0;
- sz += worksize;
+ opt.ipp_spec = alignPtr(&ippbuf[0], 32);
+ ippworkbuf.allocate(worksize + 32);
+ opt.ipp_work = alignPtr(&ippworkbuf[0], 32);
+ uchar* initbuf = alignPtr((uchar*)opt.ipp_spec + specsize, 32);
+ if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 )
+ opt.useIpp = true;
}
else
setIppErrorStatus();
}
+ #endif
+
+ if (!opt.useIpp)
+ {
+ if (len != prev_len)
+ {
+ opt.nf = DFTFactorize( opt.n, opt.factors );
+ }
+ bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
+ if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
+ {
+ wave_buf.allocate(opt.n*complex_elem_size);
+ opt.wave = wave_buf;
+ itab_buf.allocate(opt.n);
+ opt.itab = itab_buf;
+ DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size,
+ opt.wave, stage == 0 && opt.isInverse && real_transform );
+ }
+ // otherwise reuse the tables calculated on the previous stage
+ if (needBuffer)
+ {
+ if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) ||
+ (stage == 1 && !inplace_transform) )
+ {
+ *needBuffer = true;
+ }
+ }
+ }
else
-#endif
{
- if( len != prev_len )
- nf = DFTFactorize( len, factors );
-
- inplace_transform = factors[0] == factors[nf-1];
- sz += len*(complex_elem_size + sizeof(int));
- i = nf > 1 && (factors[0] & 1) == 0;
- if( (factors[i] & 1) != 0 && factors[i] > 5 )
- sz += (factors[i]+1)*complex_elem_size;
-
- if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) ||
- (stage == 1 && !inplace_transform) )
+ if (needBuffer)
{
- use_buf = 1;
- sz += len*complex_elem_size;
+ *needBuffer = false;
}
}
- ptr = (uchar*)buf;
- buf.allocate( sz + 32 );
- if( ptr != (uchar*)buf )
- prev_len = 0; // because we release the buffer,
- // force recalculation of
- // twiddle factors and permutation table
- ptr = (uchar*)buf;
- if( !spec )
{
- wave = ptr;
- ptr += len*complex_elem_size;
- itab = (int*)ptr;
- ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
+ static DFTFunc dft_tbl[6] =
+ {
+ (DFTFunc)DFT_32f,
+ (DFTFunc)RealDFT_32f,
+ (DFTFunc)CCSIDFT_32f,
+ (DFTFunc)DFT_64f,
+ (DFTFunc)RealDFT_64f,
+ (DFTFunc)CCSIDFT_64f
+ };
+ int idx = 0;
+ if (stage == 0)
+ {
+ if (real_transform)
+ {
+ if (!opt.isInverse)
+ idx = 1;
+ else
+ idx = 2;
+ }
+ }
+ if (depth == CV_64F)
+ idx += 3;
- if( len != prev_len || (!inplace_transform && inv && real_transform))
- DFTInit( len, nf, factors, itab, complex_elem_size,
- wave, stage == 0 && inv && real_transform );
- // otherwise reuse the tables calculated on the previous stage
+ opt.dft_func = dft_tbl[idx];
}
- if( stage == 0 )
+ if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0)
{
- uchar* tmp_buf = 0;
- int dptr_offset = 0;
- int dst_full_len = len*elem_size;
- int _flags = (int)inv + (src.channels() != dst.channels() ?
- DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
- if( use_buf )
- {
- tmp_buf = ptr;
- ptr += len*complex_elem_size;
- if( odd_real && !inv && len > 1 &&
- !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
- dptr_offset = elem_size;
- }
+ int rowCount = count;
+ if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0)
+ rowCount = 1;
+ opt.scale = 1./(len * rowCount);
+ }
+ }
- if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) )
- dst_full_len += (len & 1) ? elem_size : complex_elem_size;
+ void run(const void * src, void * dst)
+ {
+ opt.dft_func(opt, src, dst);
+ }
- dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];
+ void free() {}
+};
- if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) )
- stage = 1;
- else if( flags & CV_DXT_SCALE )
- scale = 1./(len * (flags & DFT_ROWS ? 1 : count));
+namespace hal {
- if( nonzero_rows <= 0 || nonzero_rows > count )
- nonzero_rows = count;
+//================== 1D ======================
- for( i = 0; i < nonzero_rows; i++ )
- {
- const uchar* sptr = src.ptr(i);
- uchar* dptr0 = dst.ptr(i);
- uchar* dptr = dptr0;
+void dftInit(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer)
+{
+ int res = cv_hal_dftInit(&context.impl, len, count, depth, flags, needBuffer);
+ if (res == CV_HAL_ERROR_OK)
+ {
+ context.useReplacement = true;
+ return;
+ }
- if( tmp_buf )
- dptr = tmp_buf;
+ context.useReplacement = false;
+ OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
+ if (!c)
+ {
+ c = new OcvDftBasicImpl();
+ context.impl = (void*)c;
+ }
+ c->init(len, count, depth, flags, needBuffer);
+}
- dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
- if( dptr != dptr0 )
- memcpy( dptr0, dptr + dptr_offset, dst_full_len );
- }
+void dftRun(const DftContext & context, const void * src, void * dst)
+{
+ if (context.useReplacement)
+ {
+ int res = cv_hal_dftRun(context.impl, src, dst);
+ if (res != CV_HAL_ERROR_OK)
+ {
+ CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun");
+ }
+ return;
+ }
+ OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
+ c->run(src, dst);
+}
- for( ; i < count; i++ )
- {
- uchar* dptr0 = dst.ptr(i);
- memset( dptr0, 0, dst_full_len );
- }
+void dftFree(DftContext & context)
+{
+ if (context.useReplacement)
+ {
+ int res = cv_hal_dftFree(context.impl);
+ if (res != CV_HAL_ERROR_OK)
+ {
+ CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree");
+ }
+ return;
+ }
- if( stage != 1 )
- {
- if( !inv && real_transform && dst.channels() == 2 )
- complementComplexOutput(dst, nonzero_rows, 1);
- break;
- }
- src = dst;
+ OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
+ if (c)
+ {
+ c->free();
+ delete c;
+ context.impl = 0;
+ }
+}
+
+
+//================== 2D ======================
+
+void dftInit2D(DftContext & c,
+ int _width, int _height, int _depth, int _src_channels, int _dst_channels,
+ int flags,
+ int _nonzero_rows)
+{
+ int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows);
+ if (res == CV_HAL_ERROR_OK)
+ {
+ c.useReplacement = true;
+ return;
+ }
+ c.useReplacement = false;
+
+ if( _width == 1 && _nonzero_rows > 0 )
+ CV_Error( CV_StsNotImplemented,
+ "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
+ "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
+
+ OcvDftImpl * d = new OcvDftImpl();
+ d->init(_width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows);
+ c.impl = (void*)d;
+}
+
+void dftRun2D(const DftContext & c,
+ const void * src, int src_step, void * dst, int dst_step)
+{
+ if (c.useReplacement)
+ {
+ int res = cv_hal_dftRun2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step);
+ if (res != CV_HAL_ERROR_OK)
+ {
+ CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D");
}
- else
+ return;
+ }
+ OcvDftImpl * d = (OcvDftImpl*)c.impl;
+ d->run((uchar*)src, src_step, (uchar*)dst, dst_step);
+}
+
+void dftFree2D(DftContext & c)
+{
+ if (c.useReplacement)
+ {
+ int res = cv_hal_dftFree2D(c.impl);
+ if (res != CV_HAL_ERROR_OK)
{
- int a = 0, b = count;
- uchar *buf0, *buf1, *dbuf0, *dbuf1;
- const uchar* sptr0 = src.ptr();
- uchar* dptr0 = dst.ptr();
- buf0 = ptr;
- ptr += len*complex_elem_size;
- buf1 = ptr;
- ptr += len*complex_elem_size;
- dbuf0 = buf0, dbuf1 = buf1;
+ CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree2D");
+ }
+ return;
+ }
+ OcvDftImpl * d = (OcvDftImpl*)c.impl;
+ d->free();
+ delete d;
+ c.impl = 0;
+}
- if( use_buf )
- {
- dbuf1 = ptr;
- dbuf0 = buf1;
- ptr += len*complex_elem_size;
- }
+} // cv::hal::
- dft_func = dft_tbl[(depth == CV_64F)*3];
+} // cv::
- if( real_transform && inv && src.cols > 1 )
- stage = 0;
- else if( flags & CV_DXT_SCALE )
- scale = 1./(len * count);
- if( real_transform )
- {
- int even;
- a = 1;
- even = (count & 1) == 0;
- b = (count+1)/2;
- if( !inv )
- {
- memset( buf0, 0, len*complex_elem_size );
- CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size );
- sptr0 += dst.channels()*elem_size;
- if( even )
- {
- memset( buf1, 0, len*complex_elem_size );
- CopyColumn( sptr0 + (count-2)*elem_size, src.step,
- buf1, complex_elem_size, len, elem_size );
- }
- }
- else if( src.channels() == 1 )
- {
- CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size );
- ExpandCCS( buf0, len, elem_size );
- if( even )
- {
- CopyColumn( sptr0 + (count-1)*elem_size, src.step,
- buf1, elem_size, len, elem_size );
- ExpandCCS( buf1, len, elem_size );
- }
- sptr0 += elem_size;
- }
- else
- {
- CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
- if( even )
- {
- CopyColumn( sptr0 + b*complex_elem_size, src.step,
- buf1, complex_elem_size, len, complex_elem_size );
- }
- sptr0 += complex_elem_size;
- }
+void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
+{
+#ifdef HAVE_CLAMDFFT
+ CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
+ _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
+ ocl_dft_amdfft(_src0, _dst, flags))
+#endif
- if( even )
- dft_func( buf1, dbuf1, len, nf, factors, itab,
- wave, len, spec, ptr, inv, scale );
- dft_func( buf0, dbuf0, len, nf, factors, itab,
- wave, len, spec, ptr, inv, scale );
+#ifdef HAVE_OPENCL
+ CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
+ ocl_dft(_src0, _dst, flags, nonzero_rows))
+#endif
- if( dst.channels() == 1 )
- {
- if( !inv )
- {
- // copy the half of output vector to the first/last column.
- // before doing that, defgragment the vector
- memcpy( dbuf0 + elem_size, dbuf0, elem_size );
- CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
- dst.step, len, elem_size );
- if( even )
- {
- memcpy( dbuf1 + elem_size, dbuf1, elem_size );
- CopyColumn( dbuf1 + elem_size, elem_size,
- dptr0 + (count-1)*elem_size,
- dst.step, len, elem_size );
- }
- dptr0 += elem_size;
- }
- else
- {
- // copy the real part of the complex vector to the first/last column
- CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size );
- if( even )
- CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
- dst.step, len, elem_size );
- dptr0 += elem_size;
- }
- }
- else
- {
- assert( !inv );
- CopyColumn( dbuf0, complex_elem_size, dptr0,
- dst.step, len, complex_elem_size );
- if( even )
- CopyColumn( dbuf1, complex_elem_size,
- dptr0 + b*complex_elem_size,
- dst.step, len, complex_elem_size );
- dptr0 += complex_elem_size;
- }
- }
+ Mat src0 = _src0.getMat(), src = src0;
+ bool inv = (flags & DFT_INVERSE) != 0;
+ int type = src.type();
+ int depth = src.depth();
- for( i = a; i < b; i += 2 )
- {
- if( i+1 < b )
- {
- CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size );
- dft_func( buf1, dbuf1, len, nf, factors, itab,
- wave, len, spec, ptr, inv, scale );
- }
- else
- CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
+ CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
- dft_func( buf0, dbuf0, len, nf, factors, itab,
- wave, len, spec, ptr, inv, scale );
+ if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
+ _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
+ else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
+ _dst.create( src.size(), depth );
+ else
+ _dst.create( src.size(), type );
- if( i+1 < b )
- CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size );
- else
- CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size );
- sptr0 += 2*complex_elem_size;
- dptr0 += 2*complex_elem_size;
- }
+ Mat dst = _dst.getMat();
- if( stage != 0 )
- {
- if( !inv && real_transform && dst.channels() == 2 && len > 1 )
- complementComplexOutput(dst, len, 2);
- break;
- }
- src = dst;
- }
- }
+ int f = 0;
+ if (src.isContinuous() && dst.isContinuous())
+ f |= CV_HAL_DFT_IS_CONTINUOUS;
+ if (inv)
+ f |= CV_HAL_DFT_INVERSE;
+ if (flags & DFT_ROWS)
+ f |= CV_HAL_DFT_ROWS;
+ if (flags & DFT_SCALE)
+ f |= CV_HAL_DFT_SCALE;
+ if (src.data == dst.data)
+ f |= CV_HAL_DFT_IS_INPLACE;
+ hal::DftContext c;
+ hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows);
+ hal::dftRun2D(c, src.data, (int)src.step, dst.data, (int)dst.step);
+ hal::dftFree2D(c);
}
http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
*/
template<typename T> static void
-DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
- int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
- const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
+DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
+ const Complex<T>* dct_wave )
{
static const T sin_45 = (T)0.70710678118654752440084436210485;
+
+ int n = c.n;
int j, n2 = n >> 1;
src_step /= sizeof(src[0]);
dft_src[n-j-1] = src[src_step];
}
- RealDFT( dft_src, dft_dst, n, nf, factors,
- itab, dft_wave, n, spec, buf, 0, 1.0 );
+ RealDFT(c, dft_src, dft_dst);
src = dft_dst;
dst[0] = (T)(src[0]*dct_wave->re*sin_45);
template<typename T> static void
-IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
- int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
- const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
+IDCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
+ const Complex<T>* dct_wave)
{
static const T sin_45 = (T)0.70710678118654752440084436210485;
+ int n = c.n;
int j, n2 = n >> 1;
src_step /= sizeof(src[0]);
}
dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
- CCSIDFT( dft_src, dft_dst, n, nf, factors, itab,
- dft_wave, n, spec, buf, 0, 1.0 );
+ CCSIDFT(c, dft_src, dft_dst);
for( j = 0; j < n2; j++, dst += dst_step*2 )
{
}
-typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src,
- void* dft_dst, void* dst, int dst_step, int n,
- int nf, int* factors, const int* itab, const void* dft_wave,
- const void* dct_wave, const void* spec, void* buf );
+typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, int src_step, void* dft_src,
+ void* dft_dst, void* dst, int dst_step, const void* dct_wave);
-static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
- float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
- const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
+static void DCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst,
+ float* dst, int dst_step, const Complexf* dct_wave)
{
- DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
- n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
+ DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
}
-static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
- float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
- const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
+static void IDCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst,
+ float* dst, int dst_step, const Complexf* dct_wave)
{
- IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
- n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
+ IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
}
-static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
- double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
- const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
+static void DCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst,
+ double* dst, int dst_step, const Complexd* dct_wave)
{
- DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
- n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
+ DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
}
-static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
- double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
- const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
+static void IDCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst,
+ double* dst, int dst_step, const Complexd* dct_wave)
{
- IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
- n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
+ IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
}
}
class DctIPPLoop_Invoker : public ParallelLoopBody
{
public:
- DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, bool _inv, bool *_ok) :
- ParallelLoopBody(), src(&_src), dst(&_dst), inv(_inv), ok(_ok)
+ DctIPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, bool _inv, bool *_ok) :
+ ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
{
*ok = true;
}
return;
#if IPP_VERSION_X100 >= 900
- IppiSize srcRoiSize = {src->cols, 1};
+ IppiSize srcRoiSize = {width, 1};
int specSize = 0;
int initSize = 0;
for(int i = range.start; i < range.end; ++i)
{
- if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, pBuffer) < 0)
+ if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, pBuffer) < 0)
{
*ok = false;
IPP_RETURN
uchar* pBuffer = 0;
int bufSize=0;
- IppiSize srcRoiSize = {src->cols, 1};
+ IppiSize srcRoiSize = {width, 1};
CV_SUPPRESS_DEPRECATED_START
for( int i = range.start; i < range.end; ++i)
{
- if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer) < 0)
+ if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, (Ipp8u*)pBuffer) < 0)
{
*ok = false;
break;
}
private:
- const Mat* src;
- Mat* dst;
+ const uchar * src;
+ int src_step;
+ uchar * dst;
+ int dst_step;
+ int width;
bool inv;
bool *ok;
};
-static bool DctIPPLoop(const Mat& src, Mat& dst, bool inv)
+static bool DctIPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv)
{
bool ok;
- parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, inv, &ok), src.rows/(double)(1<<4) );
+ parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) );
return ok;
}
-static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
+static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, bool row)
{
if(row)
- return DctIPPLoop(src, dst, inv);
+ return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
else
{
#if IPP_VERSION_X100 >= 900
- IppiSize srcRoiSize = {src.cols, src.rows};
+ IppiSize srcRoiSize = {width, height};
int specSize = 0;
int initSize = 0;
return false;
}
- if(ippDctFun(src.ptr<float>(), (int)src.step,dst.ptr<float>(), (int)dst.step, pDCTSpec, pBuffer) < 0)
+ if(ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, pBuffer) < 0)
{
IPP_RELEASE
return false;
uchar* pBuffer = 0;
int bufSize=0;
- IppiSize srcRoiSize = {src.cols, src.rows};
+ IppiSize srcRoiSize = {width, height};
CV_SUPPRESS_DEPRECATED_START
buf.allocate( bufSize );
pBuffer = (uchar*)buf;
- status = ippDctFun(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
+ status = ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, (Ipp8u*)pBuffer);
}
if (pDCTSpec)
}
#endif
-void cv::dct( InputArray _src0, OutputArray _dst, int flags )
-{
- static DCTFunc dct_tbl[4] =
- {
- (DCTFunc)DCT_32f,
- (DCTFunc)IDCT_32f,
- (DCTFunc)DCT_64f,
- (DCTFunc)IDCT_64f
- };
-
- bool inv = (flags & DCT_INVERSE) != 0;
- Mat src0 = _src0.getMat(), src = src0;
- int type = src.type(), depth = src.depth();
- void *spec = 0;
-
- double scale = 1.;
- int prev_len = 0, nf = 0, stage, end_stage;
- uchar *src_dft_buf = 0, *dst_dft_buf = 0;
- uchar *dft_wave = 0, *dct_wave = 0;
- int* itab = 0;
- uchar* ptr = 0;
- int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2;
- int factors[34], inplace_transform;
- int i, len, count;
- AutoBuffer<uchar> buf;
-
- CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
- _dst.create( src.rows, src.cols, type );
- Mat dst = _dst.getMat();
-
- CV_IPP_RUN(IPP_VERSION_X100 >= 700 && src.type() == CV_32F, ippi_DCT_32f(src, dst, inv, ((flags & DCT_ROWS) != 0)))
-
- DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
-
- if( (flags & DCT_ROWS) || src.rows == 1 ||
- (src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
- {
- stage = end_stage = 0;
- }
- else
- {
- stage = src.cols == 1;
- end_stage = 1;
- }
+namespace cv {
- for( ; stage <= end_stage; stage++ )
+class OcvDctImpl
+{
+public:
+ OcvDftOptions opt;
+
+ int _factors[34];
+ AutoBuffer<uint> wave_buf;
+ AutoBuffer<int> itab_buf;
+
+ DCTFunc dct_func;
+ bool isRowTransform;
+ bool isInverse;
+ bool isContinuous;
+ int start_stage;
+ int end_stage;
+ int width;
+ int height;
+ int depth;
+
+ void init(int _width, int _height, int _depth, int flags)
{
- const uchar* sptr = src.ptr();
- uchar* dptr = dst.ptr();
- size_t sstep0, sstep1, dstep0, dstep1;
-
- if( stage == 0 )
+ width = _width;
+ height = _height;
+ depth = _depth;
+ isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
+ isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
+ isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
+ static DCTFunc dct_tbl[4] =
{
- len = src.cols;
- count = src.rows;
- if( len == 1 && !(flags & DCT_ROWS) )
- {
- len = src.rows;
- count = 1;
- }
- sstep0 = src.step;
- dstep0 = dst.step;
- sstep1 = dstep1 = elem_size;
+ (DCTFunc)DCT_32f,
+ (DCTFunc)IDCT_32f,
+ (DCTFunc)DCT_64f,
+ (DCTFunc)IDCT_64f
+ };
+ dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2];
+ opt.nf = 0;
+ opt.isComplex = false;
+ opt.isInverse = false;
+ opt.noPermute = false;
+ opt.scale = 1.;
+ opt.factors = _factors;
+
+ if (isRowTransform || height == 1 || (width == 1 && isContinuous))
+ {
+ start_stage = end_stage = 0;
}
else
{
- len = dst.rows;
- count = dst.cols;
- sstep1 = src.step;
- dstep1 = dst.step;
- sstep0 = dstep0 = elem_size;
+ start_stage = (width == 1);
+ end_stage = 1;
}
+ }
+ void run(uchar * src, int src_step, uchar * dst, int dst_step)
+ {
+ CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
- if( len != prev_len )
- {
- int sz;
-
- if( len > 1 && (len & 1) )
- CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
+ AutoBuffer<uchar> dct_wave;
+ AutoBuffer<uchar> src_buf, dst_buf;
+ uchar *src_dft_buf = 0, *dst_dft_buf = 0;
+ int prev_len = 0;
+ int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
+ int complex_elem_size = elem_size*2;
- sz = len*elem_size;
- sz += (len/2 + 1)*complex_elem_size;
+ for(int stage = start_stage ; stage <= end_stage; stage++ )
+ {
+ const uchar* sptr = src;
+ uchar* dptr = dst;
+ size_t sstep0, sstep1, dstep0, dstep1;
+ int len, count;
- spec = 0;
- inplace_transform = 1;
+ if( stage == 0 )
{
- sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size;
-
- nf = DFTFactorize( len, factors );
- inplace_transform = factors[0] == factors[nf-1];
-
- i = nf > 1 && (factors[0] & 1) == 0;
- if( (factors[i] & 1) != 0 && factors[i] > 5 )
- sz += (factors[i]+1)*complex_elem_size;
-
- if( !inplace_transform )
- sz += len*elem_size;
+ len = width;
+ count = height;
+ if( len == 1 && !isRowTransform )
+ {
+ len = height;
+ count = 1;
+ }
+ sstep0 = src_step;
+ dstep0 = dst_step;
+ sstep1 = dstep1 = elem_size;
+ }
+ else
+ {
+ len = height;
+ count = width;
+ sstep1 = src_step;
+ dstep1 = dst_step;
+ sstep0 = dstep0 = elem_size;
}
- buf.allocate( sz + 32 );
- ptr = (uchar*)buf;
+ opt.n = len;
+ opt.tab_size = len;
- if( !spec )
+ if( len != prev_len )
{
- dft_wave = ptr;
- ptr += len*complex_elem_size;
- itab = (int*)ptr;
- ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
- DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv );
+ if( len > 1 && (len & 1) )
+ CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
+
+ opt.nf = DFTFactorize( len, opt.factors );
+ bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
+
+ wave_buf.allocate(len*complex_elem_size);
+ opt.wave = wave_buf;
+ itab_buf.allocate(len);
+ opt.itab = itab_buf;
+ DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse );
+
+ dct_wave.allocate((len/2 + 1)*complex_elem_size);
+ src_buf.allocate(len*elem_size);
+ src_dft_buf = src_buf;
+ if(!inplace_transform)
+ {
+ dst_buf.allocate(len*elem_size);
+ dst_dft_buf = dst_buf;
+ }
+ else
+ {
+ dst_dft_buf = src_buf;
+ }
+ DCTInit( len, complex_elem_size, dct_wave, isInverse);
+ prev_len = len;
}
-
- dct_wave = ptr;
- ptr += (len/2 + 1)*complex_elem_size;
- src_dft_buf = dst_dft_buf = ptr;
- ptr += len*elem_size;
- if( !inplace_transform )
+ // otherwise reuse the tables calculated on the previous stage
+ for(int i = 0; i < count; i++ )
{
- dst_dft_buf = ptr;
- ptr += len*elem_size;
+ dct_func( opt, sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf,
+ dptr + i*dstep0, (int)dstep1, dct_wave);
}
- DCTInit( len, complex_elem_size, dct_wave, inv );
- if( !inv )
- scale += scale;
- prev_len = len;
+ src = dst;
+ src_step = dst_step;
}
- // otherwise reuse the tables calculated on the previous stage
- for( i = 0; i < count; i++ )
+
+ }
+ void free() {}
+};
+
+namespace hal {
+
+void dctInit(DftContext & c, int width, int height, int depth, int flags)
+{
+ int res = cv_hal_dctInit(&c.impl, width, height, depth, flags);
+ if (res == CV_HAL_ERROR_OK)
+ {
+ c.useReplacement = true;
+ return;
+ }
+ c.useReplacement = false;
+ OcvDctImpl * impl = new OcvDctImpl();
+ impl->init(width, height, depth, flags);
+ c.impl = impl;
+}
+
+void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step)
+{
+ if (c.useReplacement)
+ {
+ int res = cv_hal_dctRun(c.impl, src, src_step, dst, dst_step);
+ if (res != CV_HAL_ERROR_OK)
{
- dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf,
- dptr + i*dstep0, (int)dstep1, len, nf, factors,
- itab, dft_wave, dct_wave, spec, ptr );
+ CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun");
}
- src = dst;
+ return;
}
+ OcvDctImpl * impl = (OcvDctImpl*)c.impl;
+ impl->run((uchar*)src, src_step, (uchar*)dst, dst_step);
+}
+
+void dctFree(DftContext & c)
+{
+ if (c.useReplacement)
+ {
+ int res = cv_hal_dctFree(c.impl);
+ if (res != CV_HAL_ERROR_OK)
+ {
+ CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree");
+ }
+ return;
+ }
+ OcvDctImpl * impl = (OcvDctImpl*)c.impl;
+ impl->free();
+ delete impl;
+ c.impl = 0;
+}
+
+} // cv::hal::
+
+} // cv::
+
+void cv::dct( InputArray _src0, OutputArray _dst, int flags )
+{
+ Mat src0 = _src0.getMat(), src = src0;
+ int type = src.type(), depth = src.depth();
+
+ CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
+ _dst.create( src.rows, src.cols, type );
+ Mat dst = _dst.getMat();
+
+ int f = 0;
+ if ((flags & DFT_ROWS) != 0)
+ f |= CV_HAL_DFT_ROWS;
+ if ((flags & DCT_INVERSE) != 0)
+ f |= CV_HAL_DFT_INVERSE;
+ if (src.isContinuous() && dst.isContinuous())
+ f |= CV_HAL_DFT_IS_CONTINUOUS;
+
+ hal::DftContext c;
+ hal::dctInit(c, src.cols, src.rows, depth, f);
+ hal::dctRun(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step);
+ hal::dctFree(c);
}