modules/core/src/dxt.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "precomp.hpp"
  43 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
  44 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
  45 #include "opencl_kernels_core.hpp"
  46 #include <map>
  47
  48 namespace cv
  49 {
  50
  51 // On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
  52 #if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
  53 # pragma optimize("", off)
  54 # pragma warning(disable: 4748)
  55 #endif
  56
  57 #if IPP_VERSION_X100 >= 701
  58 #define USE_IPP_DFT 1
  59 #else
  60 #undef USE_IPP_DFT
  61 #endif
  62
  63
  64 /****************************************************************************************\
  65                                Discrete Fourier Transform
  66 \****************************************************************************************/
  67
  68 #define CV_MAX_LOCAL_DFT_SIZE  (1 << 15)
  69
  70 static unsigned char bitrevTab[] =
  71 {
  72   0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
  73   0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
  74   0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
  75   0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
  76   0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
  77   0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
  78   0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
  79   0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
  80   0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
  81   0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
  82   0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
  83   0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
  84   0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
  85   0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
  86   0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
  87   0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
  88 };
  89
  90 static const double DFTTab[][2] =
  91 {
  92 { 1.00000000000000000, 0.00000000000000000 },
  93 {-1.00000000000000000, 0.00000000000000000 },
  94 { 0.00000000000000000, 1.00000000000000000 },
  95 { 0.70710678118654757, 0.70710678118654746 },
  96 { 0.92387953251128674, 0.38268343236508978 },
  97 { 0.98078528040323043, 0.19509032201612825 },
  98 { 0.99518472667219693, 0.09801714032956060 },
  99 { 0.99879545620517241, 0.04906767432741802 },
 100 { 0.99969881869620425, 0.02454122852291229 },
 101 { 0.99992470183914450, 0.01227153828571993 },
 102 { 0.99998117528260111, 0.00613588464915448 },
 103 { 0.99999529380957619, 0.00306795676296598 },
 104 { 0.99999882345170188, 0.00153398018628477 },
 105 { 0.99999970586288223, 0.00076699031874270 },
 106 { 0.99999992646571789, 0.00038349518757140 },
 107 { 0.99999998161642933, 0.00019174759731070 },
 108 { 0.99999999540410733, 0.00009587379909598 },
 109 { 0.99999999885102686, 0.00004793689960307 },
 110 { 0.99999999971275666, 0.00002396844980842 },
 111 { 0.99999999992818922, 0.00001198422490507 },
 112 { 0.99999999998204725, 0.00000599211245264 },
 113 { 0.99999999999551181, 0.00000299605622633 },
 114 { 0.99999999999887801, 0.00000149802811317 },
 115 { 0.99999999999971945, 0.00000074901405658 },
 116 { 0.99999999999992983, 0.00000037450702829 },
 117 { 0.99999999999998246, 0.00000018725351415 },
 118 { 0.99999999999999567, 0.00000009362675707 },
 119 { 0.99999999999999889, 0.00000004681337854 },
 120 { 0.99999999999999978, 0.00000002340668927 },
 121 { 0.99999999999999989, 0.00000001170334463 },
 122 { 1.00000000000000000, 0.00000000585167232 },
 123 { 1.00000000000000000, 0.00000000292583616 }
 124 };
 125
 126 #define BitRev(i,shift) \
 127    ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
 128            ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
 129            ((unsigned)bitrevTab[((i)>>16)&255] <<  8)+ \
 130            ((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
 131
 132 static int
 133 DFTFactorize( int n, int* factors )
 134 {
 135     int nf = 0, f, i, j;
 136
 137     if( n <= 5 )
 138     {
 139         factors[0] = n;
 140         return 1;
 141     }
 142
 143     f = (((n - 1)^n)+1) >> 1;
 144     if( f > 1 )
 145     {
 146         factors[nf++] = f;
 147         n = f == n ? 1 : n/f;
 148     }
 149
 150     for( f = 3; n > 1; )
 151     {
 152         int d = n/f;
 153         if( d*f == n )
 154         {
 155             factors[nf++] = f;
 156             n = d;
 157         }
 158         else
 159         {
 160             f += 2;
 161             if( f*f > n )
 162                 break;
 163         }
 164     }
 165
 166     if( n > 1 )
 167         factors[nf++] = n;
 168
 169     f = (factors[0] & 1) == 0;
 170     for( i = f; i < (nf+f)/2; i++ )
 171         CV_SWAP( factors[i], factors[nf-i-1+f], j );
 172
 173     return nf;
 174 }
 175
 176 static void
 177 DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
 178 {
 179     int digits[34], radix[34];
 180     int n = factors[0], m = 0;
 181     int* itab0 = itab;
 182     int i, j, k;
 183     Complex<double> w, w1;
 184     double t;
 185
 186     if( n0 <= 5 )
 187     {
 188         itab[0] = 0;
 189         itab[n0-1] = n0-1;
 190
 191         if( n0 != 4 )
 192         {
 193             for( i = 1; i < n0-1; i++ )
 194                 itab[i] = i;
 195         }
 196         else
 197         {
 198             itab[1] = 2;
 199             itab[2] = 1;
 200         }
 201         if( n0 == 5 )
 202         {
 203             if( elem_size == sizeof(Complex<double>) )
 204                 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
 205             else
 206                 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
 207         }
 208         if( n0 != 4 )
 209             return;
 210         m = 2;
 211     }
 212     else
 213     {
 214         // radix[] is initialized from index 'nf' down to zero
 215         assert (nf < 34);
 216         radix[nf] = 1;
 217         digits[nf] = 0;
 218         for( i = 0; i < nf; i++ )
 219         {
 220             digits[i] = 0;
 221             radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
 222         }
 223
 224         if( inv_itab && factors[0] != factors[nf-1] )
 225             itab = (int*)_wave;
 226
 227         if( (n & 1) == 0 )
 228         {
 229             int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
 230             for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
 231                 ;
 232             if( n <= 2  )
 233             {
 234                 itab[0] = 0;
 235                 itab[1] = na2;
 236             }
 237             else if( n <= 256 )
 238             {
 239                 int shift = 10 - m;
 240                 for( i = 0; i <= n - 4; i += 4 )
 241                 {
 242                     j = (bitrevTab[i>>2]>>shift)*a;
 243                     itab[i] = j;
 244                     itab[i+1] = j + na2;
 245                     itab[i+2] = j + na4;
 246                     itab[i+3] = j + na2 + na4;
 247                 }
 248             }
 249             else
 250             {
 251                 int shift = 34 - m;
 252                 for( i = 0; i < n; i += 4 )
 253                 {
 254                     int i4 = i >> 2;
 255                     j = BitRev(i4,shift)*a;
 256                     itab[i] = j;
 257                     itab[i+1] = j + na2;
 258                     itab[i+2] = j + na4;
 259                     itab[i+3] = j + na2 + na4;
 260                 }
 261             }
 262
 263             digits[1]++;
 264
 265             if( nf >= 2 )
 266             {
 267                 for( i = n, j = radix[2]; i < n0; )
 268                 {
 269                     for( k = 0; k < n; k++ )
 270                         itab[i+k] = itab[k] + j;
 271                     if( (i += n) >= n0 )
 272                         break;
 273                     j += radix[2];
 274                     for( k = 1; ++digits[k] >= factors[k]; k++ )
 275                     {
 276                         digits[k] = 0;
 277                         j += radix[k+2] - radix[k];
 278                     }
 279                 }
 280             }
 281         }
 282         else
 283         {
 284             for( i = 0, j = 0;; )
 285             {
 286                 itab[i] = j;
 287                 if( ++i >= n0 )
 288                     break;
 289                 j += radix[1];
 290                 for( k = 0; ++digits[k] >= factors[k]; k++ )
 291                 {
 292                     digits[k] = 0;
 293                     j += radix[k+2] - radix[k];
 294                 }
 295             }
 296         }
 297
 298         if( itab != itab0 )
 299         {
 300             itab0[0] = 0;
 301             for( i = n0 & 1; i < n0; i += 2 )
 302             {
 303                 int k0 = itab[i];
 304                 int k1 = itab[i+1];
 305                 itab0[k0] = i;
 306                 itab0[k1] = i+1;
 307             }
 308         }
 309     }
 310
 311     if( (n0 & (n0-1)) == 0 )
 312     {
 313         w.re = w1.re = DFTTab[m][0];
 314         w.im = w1.im = -DFTTab[m][1];
 315     }
 316     else
 317     {
 318         t = -CV_PI*2/n0;
 319         w.im = w1.im = sin(t);
 320         w.re = w1.re = std::sqrt(1. - w1.im*w1.im);
 321     }
 322     n = (n0+1)/2;
 323
 324     if( elem_size == sizeof(Complex<double>) )
 325     {
 326         Complex<double>* wave = (Complex<double>*)_wave;
 327
 328         wave[0].re = 1.;
 329         wave[0].im = 0.;
 330
 331         if( (n0 & 1) == 0 )
 332         {
 333             wave[n].re = -1.;
 334             wave[n].im = 0;
 335         }
 336
 337         for( i = 1; i < n; i++ )
 338         {
 339             wave[i] = w;
 340             wave[n0-i].re = w.re;
 341             wave[n0-i].im = -w.im;
 342
 343             t = w.re*w1.re - w.im*w1.im;
 344             w.im = w.re*w1.im + w.im*w1.re;
 345             w.re = t;
 346         }
 347     }
 348     else
 349     {
 350         Complex<float>* wave = (Complex<float>*)_wave;
 351         assert( elem_size == sizeof(Complex<float>) );
 352
 353         wave[0].re = 1.f;
 354         wave[0].im = 0.f;
 355
 356         if( (n0 & 1) == 0 )
 357         {
 358             wave[n].re = -1.f;
 359             wave[n].im = 0.f;
 360         }
 361
 362         for( i = 1; i < n; i++ )
 363         {
 364             wave[i].re = (float)w.re;
 365             wave[i].im = (float)w.im;
 366             wave[n0-i].re = (float)w.re;
 367             wave[n0-i].im = (float)-w.im;
 368
 369             t = w.re*w1.re - w.im*w1.im;
 370             w.im = w.re*w1.im + w.im*w1.re;
 371             w.re = t;
 372         }
 373     }
 374 }
 375
 376 template<typename T> struct DFT_VecR4
 377 {
 378     int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
 379 };
 380
 381 #if CV_SSE3
 382
 383 // optimized radix-4 transform
 384 template<> struct DFT_VecR4<float>
 385 {
 386     int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
 387     {
 388         int n = 1, i, j, nx, dw, dw0 = _dw0;
 389         __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
 390         Cv32suf t; t.i = 0x80000000;
 391         __m128 neg0_mask = _mm_load_ss(&t.f);
 392         __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
 393
 394         for( ; n*4 <= N; )
 395         {
 396             nx = n;
 397             n *= 4;
 398             dw0 /= 4;
 399
 400             for( i = 0; i < n0; i += n )
 401             {
 402                 Complexf *v0, *v1;
 403
 404                 v0 = dst + i;
 405                 v1 = v0 + nx*2;
 406
 407                 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
 408                 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
 409                 x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]);
 410                 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]);
 411
 412                 y01 = _mm_add_ps(x02, x13);
 413                 y23 = _mm_sub_ps(x02, x13);
 414                 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
 415                 t0 = _mm_movelh_ps(y01, y23);
 416                 y01 = _mm_add_ps(t0, t1);
 417                 y23 = _mm_sub_ps(t0, t1);
 418
 419                 _mm_storel_pi((__m64*)&v0[0], y01);
 420                 _mm_storeh_pi((__m64*)&v0[nx], y01);
 421                 _mm_storel_pi((__m64*)&v1[0], y23);
 422                 _mm_storeh_pi((__m64*)&v1[nx], y23);
 423
 424                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
 425                 {
 426                     v0 = dst + i + j;
 427                     v1 = v0 + nx*2;
 428
 429                     x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
 430                     w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]);
 431                     x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
 432                     w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
 433
 434                     t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23);
 435                     t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
 436                     x13 = _mm_addsub_ps(t0, t1);
 437                     // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
 438                     x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2
 439                     w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1
 440                     x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
 441                     w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
 442                     x02 = _mm_mul_ps(x02, w01);
 443                     x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02));
 444                     // re(x0) im(x0) re(x2*w1), im(x2*w1)
 445                     x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
 446
 447                     y01 = _mm_add_ps(x02, x13);
 448                     y23 = _mm_sub_ps(x02, x13);
 449                     t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
 450                     t0 = _mm_movelh_ps(y01, y23);
 451                     y01 = _mm_add_ps(t0, t1);
 452                     y23 = _mm_sub_ps(t0, t1);
 453
 454                     _mm_storel_pi((__m64*)&v0[0], y01);
 455                     _mm_storeh_pi((__m64*)&v0[nx], y01);
 456                     _mm_storel_pi((__m64*)&v1[0], y23);
 457                     _mm_storeh_pi((__m64*)&v1[nx], y23);
 458                 }
 459             }
 460         }
 461
 462         _dw0 = dw0;
 463         return n;
 464     }
 465 };
 466
 467 #endif
 468
 469 #ifdef USE_IPP_DFT
 470 static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
 471                              const void* spec, uchar* buf)
 472 {
 473     return ippsDFTFwd_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst,
 474                                  (const IppsDFTSpec_C_32fc*)spec, buf);
 475 }
 476
 477 static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
 478                              const void* spec, uchar* buf)
 479 {
 480     return ippsDFTFwd_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst,
 481                                  (const IppsDFTSpec_C_64fc*)spec, buf);
 482 }
 483
 484 static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
 485                              const void* spec, uchar* buf)
 486 {
 487     return ippsDFTInv_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst,
 488                                  (const IppsDFTSpec_C_32fc*)spec, buf);
 489 }
 490
 491 static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
 492                                   const void* spec, uchar* buf)
 493 {
 494     return ippsDFTInv_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst,
 495                                  (const IppsDFTSpec_C_64fc*)spec, buf);
 496 }
 497
 498 static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
 499                                      const void* spec, uchar* buf)
 500 {
 501     return ippsDFTFwd_RToPack_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
 502 }
 503
 504 static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
 505                                      const void* spec, uchar* buf)
 506 {
 507     return ippsDFTFwd_RToPack_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
 508 }
 509
 510 static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
 511                                      const void* spec, uchar* buf)
 512 {
 513     return ippsDFTInv_PackToR_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
 514 }
 515
 516 static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
 517                                      const void* spec, uchar* buf)
 518 {
 519     return ippsDFTInv_PackToR_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
 520 }
 521 #endif
 522
 523 enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 };
 524
 525 // mixed-radix complex discrete Fourier transform: double-precision version
 526 template<typename T> static void
 527 DFT( const Complex<T>* src, Complex<T>* dst, int n,
 528      int nf, const int* factors, const int* itab,
 529      const Complex<T>* wave, int tab_size,
 530      const void*
 531 #ifdef USE_IPP_DFT
 532      spec
 533 #endif
 534      , Complex<T>* buf,
 535      int flags, double _scale )
 536 {
 537     static const T sin_120 = (T)0.86602540378443864676372317075294;
 538     static const T fft5_2 = (T)0.559016994374947424102293417182819;
 539     static const T fft5_3 = (T)-0.951056516295153572116439333379382;
 540     static const T fft5_4 = (T)-1.538841768587626701285145288018455;
 541     static const T fft5_5 = (T)0.363271264002680442947733378740309;
 542
 543     int n0 = n, f_idx, nx;
 544     int inv = flags & DFT_INVERSE;
 545     int dw0 = tab_size, dw;
 546     int i, j, k;
 547     Complex<T> t;
 548     T scale = (T)_scale;
 549     int tab_step;
 550
 551 #ifdef USE_IPP_DFT
 552     if( spec )
 553     {
 554         if( !inv )
 555         {
 556             if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0)
 557             {
 558                 CV_IMPL_ADD(CV_IMPL_IPP);
 559                 return;
 560             }
 561         }
 562         else
 563         {
 564             if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0)
 565             {
 566                 CV_IMPL_ADD(CV_IMPL_IPP);
 567                 return;
 568             }
 569         }
 570         setIppErrorStatus();
 571     }
 572 #endif
 573
 574     tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n;
 575
 576     // 0. shuffle data
 577     if( dst != src )
 578     {
 579         assert( (flags & DFT_NO_PERMUTE) == 0 );
 580         if( !inv )
 581         {
 582             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
 583             {
 584                 int k0 = itab[0], k1 = itab[tab_step];
 585                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
 586                 dst[i] = src[k0]; dst[i+1] = src[k1];
 587             }
 588
 589             if( i < n )
 590                 dst[n-1] = src[n-1];
 591         }
 592         else
 593         {
 594             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
 595             {
 596                 int k0 = itab[0], k1 = itab[tab_step];
 597                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
 598                 t.re = src[k0].re; t.im = -src[k0].im;
 599                 dst[i] = t;
 600                 t.re = src[k1].re; t.im = -src[k1].im;
 601                 dst[i+1] = t;
 602             }
 603
 604             if( i < n )
 605             {
 606                 t.re = src[n-1].re; t.im = -src[n-1].im;
 607                 dst[i] = t;
 608             }
 609         }
 610     }
 611     else
 612     {
 613         if( (flags & DFT_NO_PERMUTE) == 0 )
 614         {
 615             CV_Assert( factors[0] == factors[nf-1] );
 616             if( nf == 1 )
 617             {
 618                 if( (n & 3) == 0 )
 619                 {
 620                     int n2 = n/2;
 621                     Complex<T>* dsth = dst + n2;
 622
 623                     for( i = 0; i < n2; i += 2, itab += tab_step*2 )
 624                     {
 625                         j = itab[0];
 626                         assert( (unsigned)j < (unsigned)n2 );
 627
 628                         CV_SWAP(dst[i+1], dsth[j], t);
 629                         if( j > i )
 630                         {
 631                             CV_SWAP(dst[i], dst[j], t);
 632                             CV_SWAP(dsth[i+1], dsth[j+1], t);
 633                         }
 634                     }
 635                 }
 636                 // else do nothing
 637             }
 638             else
 639             {
 640                 for( i = 0; i < n; i++, itab += tab_step )
 641                 {
 642                     j = itab[0];
 643                     assert( (unsigned)j < (unsigned)n );
 644                     if( j > i )
 645                         CV_SWAP(dst[i], dst[j], t);
 646                 }
 647             }
 648         }
 649
 650         if( inv )
 651         {
 652             for( i = 0; i <= n - 2; i += 2 )
 653             {
 654                 T t0 = -dst[i].im;
 655                 T t1 = -dst[i+1].im;
 656                 dst[i].im = t0; dst[i+1].im = t1;
 657             }
 658
 659             if( i < n )
 660                 dst[n-1].im = -dst[n-1].im;
 661         }
 662     }
 663
 664     n = 1;
 665     // 1. power-2 transforms
 666     if( (factors[0] & 1) == 0 )
 667     {
 668         if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3))
 669         {
 670             DFT_VecR4<T> vr4;
 671             n = vr4(dst, factors[0], n0, dw0, wave);
 672         }
 673
 674         // radix-4 transform
 675         for( ; n*4 <= factors[0]; )
 676         {
 677             nx = n;
 678             n *= 4;
 679             dw0 /= 4;
 680
 681             for( i = 0; i < n0; i += n )
 682             {
 683                 Complex<T> *v0, *v1;
 684                 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
 685
 686                 v0 = dst + i;
 687                 v1 = v0 + nx*2;
 688
 689                 r0 = v1[0].re; i0 = v1[0].im;
 690                 r4 = v1[nx].re; i4 = v1[nx].im;
 691
 692                 r1 = r0 + r4; i1 = i0 + i4;
 693                 r3 = i0 - i4; i3 = r4 - r0;
 694
 695                 r2 = v0[0].re; i2 = v0[0].im;
 696                 r4 = v0[nx].re; i4 = v0[nx].im;
 697
 698                 r0 = r2 + r4; i0 = i2 + i4;
 699                 r2 -= r4; i2 -= i4;
 700
 701                 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
 702                 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
 703                 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
 704                 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
 705
 706                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
 707                 {
 708                     v0 = dst + i + j;
 709                     v1 = v0 + nx*2;
 710
 711                     r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
 712                     i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
 713                     r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
 714                     i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
 715                     r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
 716                     i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
 717
 718                     r1 = i0 + i3; i1 = r0 + r3;
 719                     r3 = r0 - r3; i3 = i3 - i0;
 720                     r4 = v0[0].re; i4 = v0[0].im;
 721
 722                     r0 = r4 + r2; i0 = i4 + i2;
 723                     r2 = r4 - r2; i2 = i4 - i2;
 724
 725                     v0[0].re = r0 + r1; v0[0].im = i0 + i1;
 726                     v1[0].re = r0 - r1; v1[0].im = i0 - i1;
 727                     v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
 728                     v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
 729                 }
 730             }
 731         }
 732
 733         for( ; n < factors[0]; )
 734         {
 735             // do the remaining radix-2 transform
 736             nx = n;
 737             n *= 2;
 738             dw0 /= 2;
 739
 740             for( i = 0; i < n0; i += n )
 741             {
 742                 Complex<T>* v = dst + i;
 743                 T r0 = v[0].re + v[nx].re;
 744                 T i0 = v[0].im + v[nx].im;
 745                 T r1 = v[0].re - v[nx].re;
 746                 T i1 = v[0].im - v[nx].im;
 747                 v[0].re = r0; v[0].im = i0;
 748                 v[nx].re = r1; v[nx].im = i1;
 749
 750                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
 751                 {
 752                     v = dst + i + j;
 753                     r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
 754                     i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
 755                     r0 = v[0].re; i0 = v[0].im;
 756
 757                     v[0].re = r0 + r1; v[0].im = i0 + i1;
 758                     v[nx].re = r0 - r1; v[nx].im = i0 - i1;
 759                 }
 760             }
 761         }
 762     }
 763
 764     // 2. all the other transforms
 765     for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ )
 766     {
 767         int factor = factors[f_idx];
 768         nx = n;
 769         n *= factor;
 770         dw0 /= factor;
 771
 772         if( factor == 3 )
 773         {
 774             // radix-3
 775             for( i = 0; i < n0; i += n )
 776             {
 777                 Complex<T>* v = dst + i;
 778
 779                 T r1 = v[nx].re + v[nx*2].re;
 780                 T i1 = v[nx].im + v[nx*2].im;
 781                 T r0 = v[0].re;
 782                 T i0 = v[0].im;
 783                 T r2 = sin_120*(v[nx].im - v[nx*2].im);
 784                 T i2 = sin_120*(v[nx*2].re - v[nx].re);
 785                 v[0].re = r0 + r1; v[0].im = i0 + i1;
 786                 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
 787                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
 788                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
 789
 790                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
 791                 {
 792                     v = dst + i + j;
 793                     r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
 794                     i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
 795                     i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
 796                     r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
 797                     r1 = r0 + i2; i1 = i0 + r2;
 798
 799                     r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0);
 800                     r0 = v[0].re; i0 = v[0].im;
 801                     v[0].re = r0 + r1; v[0].im = i0 + i1;
 802                     r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
 803                     v[nx].re = r0 + r2; v[nx].im = i0 + i2;
 804                     v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
 805                 }
 806             }
 807         }
 808         else if( factor == 5 )
 809         {
 810             // radix-5
 811             for( i = 0; i < n0; i += n )
 812             {
 813                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
 814                 {
 815                     Complex<T>* v0 = dst + i + j;
 816                     Complex<T>* v1 = v0 + nx*2;
 817                     Complex<T>* v2 = v1 + nx*2;
 818
 819                     T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
 820
 821                     r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
 822                     i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
 823                     r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
 824                     i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
 825
 826                     r1 = r3 + r2; i1 = i3 + i2;
 827                     r3 -= r2; i3 -= i2;
 828
 829                     r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
 830                     i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
 831                     r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
 832                     i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
 833
 834                     r2 = r4 + r0; i2 = i4 + i0;
 835                     r4 -= r0; i4 -= i0;
 836
 837                     r0 = v0[0].re; i0 = v0[0].im;
 838                     r5 = r1 + r2; i5 = i1 + i2;
 839
 840                     v0[0].re = r0 + r5; v0[0].im = i0 + i5;
 841
 842                     r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
 843                     r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2);
 844                     r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4);
 845
 846                     i3 *= -fft5_5; r3 *= fft5_5;
 847                     i4 *= -fft5_4; r4 *= fft5_4;
 848
 849                     r5 = r2 + i3; i5 = i2 + r3;
 850                     r2 -= i4; i2 -= r4;
 851
 852                     r3 = r0 + r1; i3 = i0 + i1;
 853                     r0 -= r1; i0 -= i1;
 854
 855                     v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
 856                     v2[0].re = r3 - r2; v2[0].im = i3 - i2;
 857
 858                     v1[0].re = r0 + r5; v1[0].im = i0 + i5;
 859                     v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
 860                 }
 861             }
 862         }
 863         else
 864         {
 865             // radix-"factor" - an odd number
 866             int p, q, factor2 = (factor - 1)/2;
 867             int d, dd, dw_f = tab_size/factor;
 868             Complex<T>* a = buf;
 869             Complex<T>* b = buf + factor2;
 870
 871             for( i = 0; i < n0; i += n )
 872             {
 873                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
 874                 {
 875                     Complex<T>* v = dst + i + j;
 876                     Complex<T> v_0 = v[0];
 877                     Complex<T> vn_0 = v_0;
 878
 879                     if( j == 0 )
 880                     {
 881                         for( p = 1, k = nx; p <= factor2; p++, k += nx )
 882                         {
 883                             T r0 = v[k].re + v[n-k].re;
 884                             T i0 = v[k].im - v[n-k].im;
 885                             T r1 = v[k].re - v[n-k].re;
 886                             T i1 = v[k].im + v[n-k].im;
 887
 888                             vn_0.re += r0; vn_0.im += i1;
 889                             a[p-1].re = r0; a[p-1].im = i0;
 890                             b[p-1].re = r1; b[p-1].im = i1;
 891                         }
 892                     }
 893                     else
 894                     {
 895                         const Complex<T>* wave_ = wave + dw*factor;
 896                         d = dw;
 897
 898                         for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
 899                         {
 900                             T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
 901                             T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
 902
 903                             T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
 904                             T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
 905
 906                             T r0 = r2 + r1;
 907                             T i0 = i2 - i1;
 908                             r1 = r2 - r1;
 909                             i1 = i2 + i1;
 910
 911                             vn_0.re += r0; vn_0.im += i1;
 912                             a[p-1].re = r0; a[p-1].im = i0;
 913                             b[p-1].re = r1; b[p-1].im = i1;
 914                         }
 915                     }
 916
 917                     v[0] = vn_0;
 918
 919                     for( p = 1, k = nx; p <= factor2; p++, k += nx )
 920                     {
 921                         Complex<T> s0 = v_0, s1 = v_0;
 922                         d = dd = dw_f*p;
 923
 924                         for( q = 0; q < factor2; q++ )
 925                         {
 926                             T r0 = wave[d].re * a[q].re;
 927                             T i0 = wave[d].im * a[q].im;
 928                             T r1 = wave[d].re * b[q].im;
 929                             T i1 = wave[d].im * b[q].re;
 930
 931                             s1.re += r0 + i0; s0.re += r0 - i0;
 932                             s1.im += r1 - i1; s0.im += r1 + i1;
 933
 934                             d += dd;
 935                             d -= -(d >= tab_size) & tab_size;
 936                         }
 937
 938                         v[k] = s0;
 939                         v[n-k] = s1;
 940                     }
 941                 }
 942             }
 943         }
 944     }
 945
 946     if( scale != 1 )
 947     {
 948         T re_scale = scale, im_scale = scale;
 949         if( inv )
 950             im_scale = -im_scale;
 951
 952         for( i = 0; i < n0; i++ )
 953         {
 954             T t0 = dst[i].re*re_scale;
 955             T t1 = dst[i].im*im_scale;
 956             dst[i].re = t0;
 957             dst[i].im = t1;
 958         }
 959     }
 960     else if( inv )
 961     {
 962         for( i = 0; i <= n0 - 2; i += 2 )
 963         {
 964             T t0 = -dst[i].im;
 965             T t1 = -dst[i+1].im;
 966             dst[i].im = t0;
 967             dst[i+1].im = t1;
 968         }
 969
 970         if( i < n0 )
 971             dst[n0-1].im = -dst[n0-1].im;
 972     }
 973 }
 974
 975
 976 /* FFT of real vector
 977    output vector format:
 978      re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
 979      re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
 980 template<typename T> static void
 981 RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
 982          const Complex<T>* wave, int tab_size, const void*
 983 #ifdef USE_IPP_DFT
 984          spec
 985 #endif
 986          ,
 987          Complex<T>* buf, int flags, double _scale )
 988 {
 989     int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
 990     T scale = (T)_scale;
 991     int j, n2 = n >> 1;
 992     dst += complex_output;
 993
 994 #ifdef USE_IPP_DFT
 995     if( spec )
 996     {
 997         if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0)
 998         {
 999             if( complex_output )
1000             {
1001                 dst[-1] = dst[0];
1002                 dst[0] = 0;
1003                 if( (n & 1) == 0 )
1004                     dst[n] = 0;
1005             }
1006             CV_IMPL_ADD(CV_IMPL_IPP);
1007             return;
1008         }
1009         setIppErrorStatus();
1010     }
1011 #endif
1012     assert( tab_size == n );
1013
1014     if( n == 1 )
1015     {
1016         dst[0] = src[0]*scale;
1017     }
1018     else if( n == 2 )
1019     {
1020         T t = (src[0] + src[1])*scale;
1021         dst[1] = (src[0] - src[1])*scale;
1022         dst[0] = t;
1023     }
1024     else if( n & 1 )
1025     {
1026         dst -= complex_output;
1027         Complex<T>* _dst = (Complex<T>*)dst;
1028         _dst[0].re = src[0]*scale;
1029         _dst[0].im = 0;
1030         for( j = 1; j < n; j += 2 )
1031         {
1032             T t0 = src[itab[j]]*scale;
1033             T t1 = src[itab[j+1]]*scale;
1034             _dst[j].re = t0;
1035             _dst[j].im = 0;
1036             _dst[j+1].re = t1;
1037             _dst[j+1].im = 0;
1038         }
1039         DFT( _dst, _dst, n, nf, factors, itab, wave,
1040              tab_size, 0, buf, DFT_NO_PERMUTE, 1 );
1041         if( !complex_output )
1042             dst[1] = dst[0];
1043     }
1044     else
1045     {
1046         T t0, t;
1047         T h1_re, h1_im, h2_re, h2_im;
1048         T scale2 = scale*(T)0.5;
1049         factors[0] >>= 1;
1050
1051         DFT( (Complex<T>*)src, (Complex<T>*)dst, n2, nf - (factors[0] == 1),
1052              factors + (factors[0] == 1),
1053              itab, wave, tab_size, 0, buf, 0, 1 );
1054         factors[0] <<= 1;
1055
1056         t = dst[0] - dst[1];
1057         dst[0] = (dst[0] + dst[1])*scale;
1058         dst[1] = t*scale;
1059
1060         t0 = dst[n2];
1061         t = dst[n-1];
1062         dst[n-1] = dst[1];
1063
1064         for( j = 2, wave++; j < n2; j += 2, wave++ )
1065         {
1066             /* calc odd */
1067             h2_re = scale2*(dst[j+1] + t);
1068             h2_im = scale2*(dst[n-j] - dst[j]);
1069
1070             /* calc even */
1071             h1_re = scale2*(dst[j] + dst[n-j]);
1072             h1_im = scale2*(dst[j+1] - t);
1073
1074             /* rotate */
1075             t = h2_re*wave->re - h2_im*wave->im;
1076             h2_im = h2_re*wave->im + h2_im*wave->re;
1077             h2_re = t;
1078             t = dst[n-j-1];
1079
1080             dst[j-1] = h1_re + h2_re;
1081             dst[n-j-1] = h1_re - h2_re;
1082             dst[j] = h1_im + h2_im;
1083             dst[n-j] = h2_im - h1_im;
1084         }
1085
1086         if( j <= n2 )
1087         {
1088             dst[n2-1] = t0*scale;
1089             dst[n2] = -t*scale;
1090         }
1091     }
1092
1093     if( complex_output && (n & 1) == 0 )
1094     {
1095         dst[-1] = dst[0];
1096         dst[0] = 0;
1097         dst[n] = 0;
1098     }
1099 }
1100
1101 /* Inverse FFT of complex conjugate-symmetric vector
1102    input vector format:
1103       re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
1104       re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1105 template<typename T> static void
1106 CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
1107          const Complex<T>* wave, int tab_size,
1108          const void*
1109 #ifdef USE_IPP_DFT
1110          spec
1111 #endif
1112          , Complex<T>* buf,
1113          int flags, double _scale )
1114 {
1115     int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
1116     int j, k, n2 = (n+1) >> 1;
1117     T scale = (T)_scale;
1118     T save_s1 = 0.;
1119     T t0, t1, t2, t3, t;
1120
1121     assert( tab_size == n );
1122
1123     if( complex_input )
1124     {
1125         assert( src != dst );
1126         save_s1 = src[1];
1127         ((T*)src)[1] = src[0];
1128         src++;
1129     }
1130 #ifdef USE_IPP_DFT
1131     if( spec )
1132     {
1133         if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0)
1134         {
1135             if( complex_input )
1136                 ((T*)src)[0] = (T)save_s1;
1137             CV_IMPL_ADD(CV_IMPL_IPP);
1138             return;
1139         }
1140
1141         setIppErrorStatus();
1142     }
1143 #endif
1144     if( n == 1 )
1145     {
1146         dst[0] = (T)(src[0]*scale);
1147     }
1148     else if( n == 2 )
1149     {
1150         t = (src[0] + src[1])*scale;
1151         dst[1] = (src[0] - src[1])*scale;
1152         dst[0] = t;
1153     }
1154     else if( n & 1 )
1155     {
1156         Complex<T>* _src = (Complex<T>*)(src-1);
1157         Complex<T>* _dst = (Complex<T>*)dst;
1158
1159         _dst[0].re = src[0];
1160         _dst[0].im = 0;
1161         for( j = 1; j < n2; j++ )
1162         {
1163             int k0 = itab[j], k1 = itab[n-j];
1164             t0 = _src[j].re; t1 = _src[j].im;
1165             _dst[k0].re = t0; _dst[k0].im = -t1;
1166             _dst[k1].re = t0; _dst[k1].im = t1;
1167         }
1168
1169         DFT( _dst, _dst, n, nf, factors, itab, wave,
1170              tab_size, 0, buf, DFT_NO_PERMUTE, 1. );
1171         dst[0] *= scale;
1172         for( j = 1; j < n; j += 2 )
1173         {
1174             t0 = dst[j*2]*scale;
1175             t1 = dst[j*2+2]*scale;
1176             dst[j] = t0;
1177             dst[j+1] = t1;
1178         }
1179     }
1180     else
1181     {
1182         int inplace = src == dst;
1183         const Complex<T>* w = wave;
1184
1185         t = src[1];
1186         t0 = (src[0] + src[n-1]);
1187         t1 = (src[n-1] - src[0]);
1188         dst[0] = t0;
1189         dst[1] = t1;
1190
1191         for( j = 2, w++; j < n2; j += 2, w++ )
1192         {
1193             T h1_re, h1_im, h2_re, h2_im;
1194
1195             h1_re = (t + src[n-j-1]);
1196             h1_im = (src[j] - src[n-j]);
1197
1198             h2_re = (t - src[n-j-1]);
1199             h2_im = (src[j] + src[n-j]);
1200
1201             t = h2_re*w->re + h2_im*w->im;
1202             h2_im = h2_im*w->re - h2_re*w->im;
1203             h2_re = t;
1204
1205             t = src[j+1];
1206             t0 = h1_re - h2_im;
1207             t1 = -h1_im - h2_re;
1208             t2 = h1_re + h2_im;
1209             t3 = h1_im - h2_re;
1210
1211             if( inplace )
1212             {
1213                 dst[j] = t0;
1214                 dst[j+1] = t1;
1215                 dst[n-j] = t2;
1216                 dst[n-j+1]= t3;
1217             }
1218             else
1219             {
1220                 int j2 = j >> 1;
1221                 k = itab[j2];
1222                 dst[k] = t0;
1223                 dst[k+1] = t1;
1224                 k = itab[n2-j2];
1225                 dst[k] = t2;
1226                 dst[k+1]= t3;
1227             }
1228         }
1229
1230         if( j <= n2 )
1231         {
1232             t0 = t*2;
1233             t1 = src[n2]*2;
1234
1235             if( inplace )
1236             {
1237                 dst[n2] = t0;
1238                 dst[n2+1] = t1;
1239             }
1240             else
1241             {
1242                 k = itab[n2];
1243                 dst[k*2] = t0;
1244                 dst[k*2+1] = t1;
1245             }
1246         }
1247
1248         factors[0] >>= 1;
1249         DFT( (Complex<T>*)dst, (Complex<T>*)dst, n2,
1250              nf - (factors[0] == 1),
1251              factors + (factors[0] == 1), itab,
1252              wave, tab_size, 0, buf,
1253              inplace ? 0 : DFT_NO_PERMUTE, 1. );
1254         factors[0] <<= 1;
1255
1256         for( j = 0; j < n; j += 2 )
1257         {
1258             t0 = dst[j]*scale;
1259             t1 = dst[j+1]*(-scale);
1260             dst[j] = t0;
1261             dst[j+1] = t1;
1262         }
1263     }
1264     if( complex_input )
1265         ((T*)src)[0] = (T)save_s1;
1266 }
1267
1268 static void
1269 CopyColumn( const uchar* _src, size_t src_step,
1270             uchar* _dst, size_t dst_step,
1271             int len, size_t elem_size )
1272 {
1273     int i, t0, t1;
1274     const int* src = (const int*)_src;
1275     int* dst = (int*)_dst;
1276     src_step /= sizeof(src[0]);
1277     dst_step /= sizeof(dst[0]);
1278
1279     if( elem_size == sizeof(int) )
1280     {
1281         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1282             dst[0] = src[0];
1283     }
1284     else if( elem_size == sizeof(int)*2 )
1285     {
1286         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1287         {
1288             t0 = src[0]; t1 = src[1];
1289             dst[0] = t0; dst[1] = t1;
1290         }
1291     }
1292     else if( elem_size == sizeof(int)*4 )
1293     {
1294         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1295         {
1296             t0 = src[0]; t1 = src[1];
1297             dst[0] = t0; dst[1] = t1;
1298             t0 = src[2]; t1 = src[3];
1299             dst[2] = t0; dst[3] = t1;
1300         }
1301     }
1302 }
1303
1304
1305 static void
1306 CopyFrom2Columns( const uchar* _src, size_t src_step,
1307                   uchar* _dst0, uchar* _dst1,
1308                   int len, size_t elem_size )
1309 {
1310     int i, t0, t1;
1311     const int* src = (const int*)_src;
1312     int* dst0 = (int*)_dst0;
1313     int* dst1 = (int*)_dst1;
1314     src_step /= sizeof(src[0]);
1315
1316     if( elem_size == sizeof(int) )
1317     {
1318         for( i = 0; i < len; i++, src += src_step )
1319         {
1320             t0 = src[0]; t1 = src[1];
1321             dst0[i] = t0; dst1[i] = t1;
1322         }
1323     }
1324     else if( elem_size == sizeof(int)*2 )
1325     {
1326         for( i = 0; i < len*2; i += 2, src += src_step )
1327         {
1328             t0 = src[0]; t1 = src[1];
1329             dst0[i] = t0; dst0[i+1] = t1;
1330             t0 = src[2]; t1 = src[3];
1331             dst1[i] = t0; dst1[i+1] = t1;
1332         }
1333     }
1334     else if( elem_size == sizeof(int)*4 )
1335     {
1336         for( i = 0; i < len*4; i += 4, src += src_step )
1337         {
1338             t0 = src[0]; t1 = src[1];
1339             dst0[i] = t0; dst0[i+1] = t1;
1340             t0 = src[2]; t1 = src[3];
1341             dst0[i+2] = t0; dst0[i+3] = t1;
1342             t0 = src[4]; t1 = src[5];
1343             dst1[i] = t0; dst1[i+1] = t1;
1344             t0 = src[6]; t1 = src[7];
1345             dst1[i+2] = t0; dst1[i+3] = t1;
1346         }
1347     }
1348 }
1349
1350
1351 static void
1352 CopyTo2Columns( const uchar* _src0, const uchar* _src1,
1353                 uchar* _dst, size_t dst_step,
1354                 int len, size_t elem_size )
1355 {
1356     int i, t0, t1;
1357     const int* src0 = (const int*)_src0;
1358     const int* src1 = (const int*)_src1;
1359     int* dst = (int*)_dst;
1360     dst_step /= sizeof(dst[0]);
1361
1362     if( elem_size == sizeof(int) )
1363     {
1364         for( i = 0; i < len; i++, dst += dst_step )
1365         {
1366             t0 = src0[i]; t1 = src1[i];
1367             dst[0] = t0; dst[1] = t1;
1368         }
1369     }
1370     else if( elem_size == sizeof(int)*2 )
1371     {
1372         for( i = 0; i < len*2; i += 2, dst += dst_step )
1373         {
1374             t0 = src0[i]; t1 = src0[i+1];
1375             dst[0] = t0; dst[1] = t1;
1376             t0 = src1[i]; t1 = src1[i+1];
1377             dst[2] = t0; dst[3] = t1;
1378         }
1379     }
1380     else if( elem_size == sizeof(int)*4 )
1381     {
1382         for( i = 0; i < len*4; i += 4, dst += dst_step )
1383         {
1384             t0 = src0[i]; t1 = src0[i+1];
1385             dst[0] = t0; dst[1] = t1;
1386             t0 = src0[i+2]; t1 = src0[i+3];
1387             dst[2] = t0; dst[3] = t1;
1388             t0 = src1[i]; t1 = src1[i+1];
1389             dst[4] = t0; dst[5] = t1;
1390             t0 = src1[i+2]; t1 = src1[i+3];
1391             dst[6] = t0; dst[7] = t1;
1392         }
1393     }
1394 }
1395
1396
1397 static void
1398 ExpandCCS( uchar* _ptr, int n, int elem_size )
1399 {
1400     int i;
1401     if( elem_size == (int)sizeof(float) )
1402     {
1403         float* p = (float*)_ptr;
1404         for( i = 1; i < (n+1)/2; i++ )
1405         {
1406             p[(n-i)*2] = p[i*2-1];
1407             p[(n-i)*2+1] = -p[i*2];
1408         }
1409         if( (n & 1) == 0 )
1410         {
1411             p[n] = p[n-1];
1412             p[n+1] = 0.f;
1413             n--;
1414         }
1415         for( i = n-1; i > 0; i-- )
1416             p[i+1] = p[i];
1417         p[1] = 0.f;
1418     }
1419     else
1420     {
1421         double* p = (double*)_ptr;
1422         for( i = 1; i < (n+1)/2; i++ )
1423         {
1424             p[(n-i)*2] = p[i*2-1];
1425             p[(n-i)*2+1] = -p[i*2];
1426         }
1427         if( (n & 1) == 0 )
1428         {
1429             p[n] = p[n-1];
1430             p[n+1] = 0.f;
1431             n--;
1432         }
1433         for( i = n-1; i > 0; i-- )
1434             p[i+1] = p[i];
1435         p[1] = 0.f;
1436     }
1437 }
1438
1439
1440 typedef void (*DFTFunc)(
1441      const void* src, void* dst, int n, int nf, int* factors,
1442      const int* itab, const void* wave, int tab_size,
1443      const void* spec, void* buf, int inv, double scale );
1444
1445 static void DFT_32f( const Complexf* src, Complexf* dst, int n,
1446     int nf, const int* factors, const int* itab,
1447     const Complexf* wave, int tab_size,
1448     const void* spec, Complexf* buf,
1449     int flags, double scale )
1450 {
1451     DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
1452 }
1453
1454 static void DFT_64f( const Complexd* src, Complexd* dst, int n,
1455     int nf, const int* factors, const int* itab,
1456     const Complexd* wave, int tab_size,
1457     const void* spec, Complexd* buf,
1458     int flags, double scale )
1459 {
1460     DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
1461 }
1462
1463
1464 static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
1465         const int* itab,  const Complexf* wave, int tab_size, const void* spec,
1466         Complexf* buf, int flags, double scale )
1467 {
1468     RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
1469 }
1470
1471 static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
1472         const int* itab,  const Complexd* wave, int tab_size, const void* spec,
1473         Complexd* buf, int flags, double scale )
1474 {
1475     RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
1476 }
1477
1478 static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
1479                          const int* itab,  const Complexf* wave, int tab_size, const void* spec,
1480                          Complexf* buf, int flags, double scale )
1481 {
1482     CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
1483 }
1484
1485 static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
1486                          const int* itab,  const Complexd* wave, int tab_size, const void* spec,
1487                          Complexd* buf, int flags, double scale )
1488 {
1489     CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
1490 }
1491
1492 }
1493
1494 #ifdef USE_IPP_DFT
1495 typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
1496 typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
1497 #endif
1498
1499 namespace cv
1500 {
1501 #if defined USE_IPP_DFT
1502
1503 typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
1504 typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
1505
1506 template <typename Dft>
1507 class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
1508 {
1509 public:
1510
1511     Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
1512         ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1513     {
1514         *ok = true;
1515     }
1516
1517     virtual void operator()(const Range& range) const
1518     {
1519         IppStatus status;
1520         Ipp8u* pBuffer = 0;
1521         Ipp8u* pMemInit= 0;
1522         int sizeBuffer=0;
1523         int sizeSpec=0;
1524         int sizeInit=0;
1525
1526         IppiSize srcRoiSize = {src.cols, 1};
1527
1528         status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1529         if ( status < 0 )
1530         {
1531             *ok = false;
1532             return;
1533         }
1534
1535         IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec );
1536
1537         if ( sizeInit > 0 )
1538             pMemInit = (Ipp8u*)ippMalloc( sizeInit );
1539
1540         if ( sizeBuffer > 0 )
1541             pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
1542
1543         status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1544
1545         if ( sizeInit > 0 )
1546             ippFree( pMemInit );
1547
1548         if ( status < 0 )
1549         {
1550             ippFree( pDFTSpec );
1551             if ( sizeBuffer > 0 )
1552                 ippFree( pBuffer );
1553             *ok = false;
1554             return;
1555         }
1556
1557         for( int i = range.start; i < range.end; ++i)
1558             if(!ippidft(src.ptr<Ipp32fc>(i), (int)src.step,dst.ptr<Ipp32fc>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
1559             {
1560                 *ok = false;
1561             }
1562
1563         if ( sizeBuffer > 0 )
1564             ippFree( pBuffer );
1565
1566         ippFree( pDFTSpec );
1567         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1568     }
1569
1570 private:
1571     const Mat& src;
1572     Mat& dst;
1573     const Dft& ippidft;
1574     int norm_flag;
1575     bool *ok;
1576
1577     const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
1578 };
1579
1580 template <typename Dft>
1581 class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
1582 {
1583 public:
1584
1585     Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
1586         ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1587     {
1588         *ok = true;
1589     }
1590
1591     virtual void operator()(const Range& range) const
1592     {
1593         IppStatus status;
1594         Ipp8u* pBuffer = 0;
1595         Ipp8u* pMemInit= 0;
1596         int sizeBuffer=0;
1597         int sizeSpec=0;
1598         int sizeInit=0;
1599
1600         IppiSize srcRoiSize = {src.cols, 1};
1601
1602         status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1603         if ( status < 0 )
1604         {
1605             *ok = false;
1606             return;
1607         }
1608
1609         IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec );
1610
1611         if ( sizeInit > 0 )
1612             pMemInit = (Ipp8u*)ippMalloc( sizeInit );
1613
1614         if ( sizeBuffer > 0 )
1615             pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
1616
1617         status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1618
1619         if ( sizeInit > 0 )
1620             ippFree( pMemInit );
1621
1622         if ( status < 0 )
1623         {
1624             ippFree( pDFTSpec );
1625             if ( sizeBuffer > 0 )
1626                 ippFree( pBuffer );
1627             *ok = false;
1628             return;
1629         }
1630
1631         for( int i = range.start; i < range.end; ++i)
1632             if(!ippidft(src.ptr<float>(i), (int)src.step,dst.ptr<float>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
1633             {
1634                 *ok = false;
1635             }
1636
1637         if ( sizeBuffer > 0 )
1638             ippFree( pBuffer );
1639
1640         ippFree( pDFTSpec );
1641         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1642     }
1643
1644 private:
1645     const Mat& src;
1646     Mat& dst;
1647     const Dft& ippidft;
1648     int norm_flag;
1649     bool *ok;
1650
1651     const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
1652 };
1653
1654 template <typename Dft>
1655 bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
1656 {
1657     bool ok;
1658     parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
1659     return ok;
1660 }
1661
1662 template <typename Dft>
1663 bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
1664 {
1665     bool ok;
1666     parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
1667     return ok;
1668 }
1669
1670 struct IPPDFT_C_Functor
1671 {
1672     IPPDFT_C_Functor(ippiDFT_C_Func _func) : func(_func){}
1673
1674     bool operator()(const Ipp32fc* src, int srcStep, Ipp32fc* dst, int dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
1675     {
1676         return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false;
1677     }
1678 private:
1679     ippiDFT_C_Func func;
1680 };
1681
1682 struct IPPDFT_R_Functor
1683 {
1684     IPPDFT_R_Functor(ippiDFT_R_Func _func) : func(_func){}
1685
1686     bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
1687     {
1688         return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false;
1689     }
1690 private:
1691     ippiDFT_R_Func func;
1692 };
1693
1694 static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
1695 {
1696     IppStatus status;
1697     Ipp8u* pBuffer = 0;
1698     Ipp8u* pMemInit= 0;
1699     int sizeBuffer=0;
1700     int sizeSpec=0;
1701     int sizeInit=0;
1702
1703     IppiSize srcRoiSize = {src.cols, src.rows};
1704
1705     status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1706     if ( status < 0 )
1707         return false;
1708
1709     IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec );
1710
1711     if ( sizeInit > 0 )
1712         pMemInit = (Ipp8u*)ippMalloc( sizeInit );
1713
1714     if ( sizeBuffer > 0 )
1715         pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
1716
1717     status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1718
1719     if ( sizeInit > 0 )
1720         ippFree( pMemInit );
1721
1722     if ( status < 0 )
1723     {
1724         ippFree( pDFTSpec );
1725         if ( sizeBuffer > 0 )
1726             ippFree( pBuffer );
1727         return false;
1728     }
1729
1730     if (!inv)
1731         status = ippiDFTFwd_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
1732     else
1733         status = ippiDFTInv_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
1734
1735     if ( sizeBuffer > 0 )
1736         ippFree( pBuffer );
1737
1738     ippFree( pDFTSpec );
1739
1740     if(status >= 0)
1741     {
1742         CV_IMPL_ADD(CV_IMPL_IPP);
1743         return true;
1744     }
1745     return false;
1746 }
1747
1748 static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
1749 {
1750     IppStatus status;
1751     Ipp8u* pBuffer = 0;
1752     Ipp8u* pMemInit= 0;
1753     int sizeBuffer=0;
1754     int sizeSpec=0;
1755     int sizeInit=0;
1756
1757     IppiSize srcRoiSize = {src.cols, src.rows};
1758
1759     status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1760     if ( status < 0 )
1761         return false;
1762
1763     IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec );
1764
1765     if ( sizeInit > 0 )
1766         pMemInit = (Ipp8u*)ippMalloc( sizeInit );
1767
1768     if ( sizeBuffer > 0 )
1769         pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
1770
1771     status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1772
1773     if ( sizeInit > 0 )
1774         ippFree( pMemInit );
1775
1776     if ( status < 0 )
1777     {
1778         ippFree( pDFTSpec );
1779         if ( sizeBuffer > 0 )
1780             ippFree( pBuffer );
1781         return false;
1782     }
1783
1784     if (!inv)
1785         status = ippiDFTFwd_RToPack_32f_C1R( src.ptr<float>(), (int)(src.step), dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
1786     else
1787         status = ippiDFTInv_PackToR_32f_C1R( src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
1788
1789     if ( sizeBuffer > 0 )
1790         ippFree( pBuffer );
1791
1792     ippFree( pDFTSpec );
1793
1794     if(status >= 0)
1795     {
1796         CV_IMPL_ADD(CV_IMPL_IPP);
1797         return true;
1798     }
1799     return false;
1800 }
1801
1802 #endif
1803 }
1804
1805 #ifdef HAVE_OPENCL
1806
1807 namespace cv
1808 {
1809
1810 enum FftType
1811 {
1812     R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
1813     C2R = 1, // complex to real in case inverse transform
1814     R2C = 2, // real to complex in case forward transform
1815     C2C = 3  // complex to complex
1816 };
1817
1818 struct OCL_FftPlan
1819 {
1820 private:
1821     UMat twiddles;
1822     String buildOptions;
1823     int thread_count;
1824     int dft_size;
1825     int dft_depth;
1826     bool status;
1827
1828 public:
1829     OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
1830     {
1831         CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
1832
1833         int min_radix;
1834         std::vector<int> radixes, blocks;
1835         ocl_getRadixes(dft_size, radixes, blocks, min_radix);
1836         thread_count = dft_size / min_radix;
1837
1838         if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
1839         {
1840             status = false;
1841             return;
1842         }
1843
1844         // generate string with radix calls
1845         String radix_processing;
1846         int n = 1, twiddle_size = 0;
1847         for (size_t i=0; i<radixes.size(); i++)
1848         {
1849             int radix = radixes[i], block = blocks[i];
1850             if (block > 1)
1851                 radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
1852             else
1853                 radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
1854             twiddle_size += (radix-1)*n;
1855             n *= radix;
1856         }
1857
1858         twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
1859         if (dft_depth == CV_32F)
1860             fillRadixTable<float>(twiddles, radixes);
1861         else
1862             fillRadixTable<double>(twiddles, radixes);
1863
1864         buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
1865                               dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
1866                               dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
1867     }
1868
1869     bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
1870     {
1871         if (!status)
1872             return false;
1873
1874         UMat src = _src.getUMat();
1875         UMat dst = _dst.getUMat();
1876
1877         size_t globalsize[2];
1878         size_t localsize[2];
1879         String kernel_name;
1880
1881         bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
1882         bool inv = (flags & DFT_INVERSE) != 0;
1883         String options = buildOptions;
1884
1885         if (rows)
1886         {
1887             globalsize[0] = thread_count; globalsize[1] = src.rows;
1888             localsize[0] = thread_count; localsize[1] = 1;
1889             kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
1890             if ((is1d || inv) && (flags & DFT_SCALE))
1891                 options += " -D DFT_SCALE";
1892         }
1893         else
1894         {
1895             globalsize[0] = num_dfts; globalsize[1] = thread_count;
1896             localsize[0] = 1; localsize[1] = thread_count;
1897             kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
1898             if (flags & DFT_SCALE)
1899                 options += " -D DFT_SCALE";
1900         }
1901
1902         options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
1903         options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
1904         options += is1d ? " -D IS_1D" : "";
1905
1906         if (!inv)
1907         {
1908             if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
1909                 options += " -D NO_CONJUGATE";
1910         }
1911         else
1912         {
1913             if (rows && (fftType == C2R || fftType == R2R))
1914                 options += " -D NO_CONJUGATE";
1915             if (dst.cols % 2 == 0)
1916                 options += " -D EVEN";
1917         }
1918
1919         ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
1920         if (k.empty())
1921             return false;
1922
1923         k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts);
1924         return k.run(2, globalsize, localsize, false);
1925     }
1926
1927 private:
1928     static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
1929     {
1930         int factors[34];
1931         int nf = DFTFactorize(cols, factors);
1932
1933         int n = 1;
1934         int factor_index = 0;
1935         min_radix = INT_MAX;
1936
1937         // 2^n transforms
1938         if ((factors[factor_index] & 1) == 0)
1939         {
1940             for( ; n < factors[factor_index];)
1941             {
1942                 int radix = 2, block = 1;
1943                 if (8*n <= factors[0])
1944                     radix = 8;
1945                 else if (4*n <= factors[0])
1946                 {
1947                     radix = 4;
1948                     if (cols % 12 == 0)
1949                         block = 3;
1950                     else if (cols % 8 == 0)
1951                         block = 2;
1952                 }
1953                 else
1954                 {
1955                     if (cols % 10 == 0)
1956                         block = 5;
1957                     else if (cols % 8 == 0)
1958                         block = 4;
1959                     else if (cols % 6 == 0)
1960                         block = 3;
1961                     else if (cols % 4 == 0)
1962                         block = 2;
1963                 }
1964
1965                 radixes.push_back(radix);
1966                 blocks.push_back(block);
1967                 min_radix = min(min_radix, block*radix);
1968                 n *= radix;
1969             }
1970             factor_index++;
1971         }
1972
1973         // all the other transforms
1974         for( ; factor_index < nf; factor_index++)
1975         {
1976             int radix = factors[factor_index], block = 1;
1977             if (radix == 3)
1978             {
1979                 if (cols % 12 == 0)
1980                     block = 4;
1981                 else if (cols % 9 == 0)
1982                     block = 3;
1983                 else if (cols % 6 == 0)
1984                     block = 2;
1985             }
1986             else if (radix == 5)
1987             {
1988                 if (cols % 10 == 0)
1989                     block = 2;
1990             }
1991             radixes.push_back(radix);
1992             blocks.push_back(block);
1993             min_radix = min(min_radix, block*radix);
1994         }
1995     }
1996
1997     template <typename T>
1998     static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
1999     {
2000         Mat tw = twiddles.getMat(ACCESS_WRITE);
2001         T* ptr = tw.ptr<T>();
2002         int ptr_index = 0;
2003
2004         int n = 1;
2005         for (size_t i=0; i<radixes.size(); i++)
2006         {
2007             int radix = radixes[i];
2008             n *= radix;
2009
2010             for (int j=1; j<radix; j++)
2011             {
2012                 double theta = -CV_2PI*j/n;
2013
2014                 for (int k=0; k<(n/radix); k++)
2015                 {
2016                     ptr[ptr_index++] = (T) cos(k*theta);
2017                     ptr[ptr_index++] = (T) sin(k*theta);
2018                 }
2019             }
2020         }
2021     }
2022 };
2023
2024 class OCL_FftPlanCache
2025 {
2026 public:
2027     static OCL_FftPlanCache & getInstance()
2028     {
2029         static OCL_FftPlanCache planCache;
2030         return planCache;
2031     }
2032
2033     Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
2034     {
2035         int key = (dft_size << 16) | (depth & 0xFFFF);
2036         std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key);
2037         if (f != planStorage.end())
2038         {
2039             return f->second;
2040         }
2041         else
2042         {
2043             Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
2044             planStorage[key] = newPlan;
2045             return newPlan;
2046         }
2047     }
2048
2049     ~OCL_FftPlanCache()
2050     {
2051         planStorage.clear();
2052     }
2053
2054 protected:
2055     OCL_FftPlanCache() :
2056         planStorage()
2057     {
2058     }
2059     std::map<int, Ptr<OCL_FftPlan> > planStorage;
2060 };
2061
2062 static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
2063 {
2064     int type = _src.type(), depth = CV_MAT_DEPTH(type);
2065     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth);
2066     return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
2067 }
2068
2069 static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
2070 {
2071     int type = _src.type(), depth = CV_MAT_DEPTH(type);
2072     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth);
2073     return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
2074 }
2075
2076 static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
2077 {
2078     int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
2079     Size ssize = _src.size();
2080     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2081
2082     if ( !((cn == 1 || cn == 2) && (depth == CV_32F || (depth == CV_64F && doubleSupport))) )
2083         return false;
2084
2085     // if is not a multiplication of prime numbers { 2, 3, 5 }
2086     if (ssize.area() != getOptimalDFTSize(ssize.area()))
2087         return false;
2088
2089     UMat src = _src.getUMat();
2090     int complex_input = cn == 2 ? 1 : 0;
2091     int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
2092     int real_input = cn == 1 ? 1 : 0;
2093     int real_output = (flags & DFT_REAL_OUTPUT) != 0;
2094     bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2095
2096     if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
2097         nonzero_rows = _src.rows();
2098     bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
2099
2100     // if output format is not specified
2101     if (complex_output + real_output == 0)
2102     {
2103         if (real_input)
2104             real_output = 1;
2105         else
2106             complex_output = 1;
2107     }
2108
2109     FftType fftType = (FftType)(complex_input << 0 | complex_output << 1);
2110
2111     // Forward Complex to CCS not supported
2112     if (fftType == C2R && !inv)
2113         fftType = C2C;
2114
2115     // Inverse CCS to Complex not supported
2116     if (fftType == R2C && inv)
2117         fftType = R2R;
2118
2119     UMat output;
2120     if (fftType == C2C || fftType == R2C)
2121     {
2122         // complex output
2123         _dst.create(src.size(), CV_MAKETYPE(depth, 2));
2124         output = _dst.getUMat();
2125     }
2126     else
2127     {
2128         // real output
2129         if (is1d)
2130         {
2131             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
2132             output = _dst.getUMat();
2133         }
2134         else
2135         {
2136             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
2137             output.create(src.size(), CV_MAKETYPE(depth, 2));
2138         }
2139     }
2140
2141     if (!inv)
2142     {
2143         if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
2144             return false;
2145
2146         if (!is1d)
2147         {
2148             int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
2149             if (!ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType))
2150                 return false;
2151         }
2152     }
2153     else
2154     {
2155         if (fftType == C2C)
2156         {
2157             // complex output
2158             if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
2159                 return false;
2160
2161             if (!is1d)
2162             {
2163                 if (!ocl_dft_cols(output, output, output.cols, flags, fftType))
2164                     return false;
2165             }
2166         }
2167         else
2168         {
2169             if (is1d)
2170             {
2171                 if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
2172                     return false;
2173             }
2174             else
2175             {
2176                 int nonzero_cols = src.cols/2 + 1;
2177                 if (!ocl_dft_cols(src, output, nonzero_cols, flags, fftType))
2178                     return false;
2179
2180                 if (!ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType))
2181                     return false;
2182             }
2183         }
2184     }
2185     return true;
2186 }
2187
2188 } // namespace cv;
2189
2190 #endif
2191
2192 #ifdef HAVE_CLAMDFFT
2193
2194 namespace cv {
2195
2196 #define CLAMDDFT_Assert(func) \
2197     { \
2198         clAmdFftStatus s = (func); \
2199         CV_Assert(s == CLFFT_SUCCESS); \
2200     }
2201
2202 class PlanCache
2203 {
2204     struct FftPlan
2205     {
2206         FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
2207             dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
2208             doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
2209             context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
2210         {
2211             bool dft_inverse = (flags & DFT_INVERSE) != 0;
2212             bool dft_scale = (flags & DFT_SCALE) != 0;
2213             bool dft_rows = (flags & DFT_ROWS) != 0;
2214
2215             clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
2216             clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
2217
2218             size_t batchSize = dft_rows ? dft_size.height : 1;
2219             size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 };
2220             size_t clStridesIn[3] = { 1, 1, 1 };
2221             size_t clStridesOut[3]  = { 1, 1, 1 };
2222             int elemSize = doubleFP ? sizeof(double) : sizeof(float);
2223
2224             switch (fftType)
2225             {
2226             case C2C:
2227                 inLayout = CLFFT_COMPLEX_INTERLEAVED;
2228                 outLayout = CLFFT_COMPLEX_INTERLEAVED;
2229                 clStridesIn[1] = src_step / (elemSize << 1);
2230                 clStridesOut[1] = dst_step / (elemSize << 1);
2231                 break;
2232             case R2C:
2233                 inLayout = CLFFT_REAL;
2234                 outLayout = CLFFT_HERMITIAN_INTERLEAVED;
2235                 clStridesIn[1] = src_step / elemSize;
2236                 clStridesOut[1] = dst_step / (elemSize << 1);
2237                 break;
2238             case C2R:
2239                 inLayout = CLFFT_HERMITIAN_INTERLEAVED;
2240                 outLayout = CLFFT_REAL;
2241                 clStridesIn[1] = src_step / (elemSize << 1);
2242                 clStridesOut[1] = dst_step / elemSize;
2243                 break;
2244             case R2R:
2245             default:
2246                 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
2247                 break;
2248             }
2249
2250             clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
2251             clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
2252
2253             CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
2254
2255             // setting plan properties
2256             CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
2257             CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
2258             CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
2259             CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
2260             CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
2261             CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
2262             CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
2263
2264             float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
2265             CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
2266
2267             // ready to bake
2268             cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2269             CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL))
2270         }
2271
2272         ~FftPlan()
2273         {
2274 //            clAmdFftDestroyPlan(&plHandle);
2275         }
2276
2277         friend class PlanCache;
2278
2279     private:
2280         Size dft_size;
2281         int src_step, dst_step;
2282         bool doubleFP;
2283         bool inplace;
2284         int flags;
2285         FftType fftType;
2286
2287         cl_context context;
2288         clAmdFftPlanHandle plHandle;
2289     };
2290
2291 public:
2292     static PlanCache & getInstance()
2293     {
2294         static PlanCache planCache;
2295         return planCache;
2296     }
2297
2298     clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
2299                                      bool inplace, int flags, FftType fftType)
2300     {
2301         cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
2302
2303         for (size_t i = 0, size = planStorage.size(); i < size; ++i)
2304         {
2305             const FftPlan * const plan = planStorage[i];
2306
2307             if (plan->dft_size == dft_size &&
2308                 plan->flags == flags &&
2309                 plan->src_step == src_step &&
2310                 plan->dst_step == dst_step &&
2311                 plan->doubleFP == doubleFP &&
2312                 plan->fftType == fftType &&
2313                 plan->inplace == inplace)
2314             {
2315                 if (plan->context != currentContext)
2316                 {
2317                     planStorage.erase(planStorage.begin() + i);
2318                     break;
2319                 }
2320
2321                 return plan->plHandle;
2322             }
2323         }
2324
2325         // no baked plan is found, so let's create a new one
2326         Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
2327         planStorage.push_back(newPlan);
2328
2329         return newPlan->plHandle;
2330     }
2331
2332     ~PlanCache()
2333     {
2334         planStorage.clear();
2335     }
2336
2337 protected:
2338     PlanCache() :
2339         planStorage()
2340     {
2341     }
2342
2343     std::vector<Ptr<FftPlan> > planStorage;
2344 };
2345
2346 extern "C" {
2347
2348 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2349 {
2350     UMatData * u = (UMatData *)p;
2351
2352     if( u && CV_XADD(&u->urefcount, -1) == 1 )
2353         u->currAllocator->deallocate(u);
2354     u = 0;
2355
2356     clReleaseEvent(e), e = 0;
2357 }
2358
2359 }
2360
2361 static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
2362 {
2363     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2364     Size ssize = _src.size();
2365
2366     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2367     if ( (!doubleSupport && depth == CV_64F) ||
2368          !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
2369          _src.offset() != 0)
2370         return false;
2371
2372     // if is not a multiplication of prime numbers { 2, 3, 5 }
2373     if (ssize.area() != getOptimalDFTSize(ssize.area()))
2374         return false;
2375
2376     int dst_complex_input = cn == 2 ? 1 : 0;
2377     bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2378     int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
2379     bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
2380
2381     CV_Assert(dft_complex_output + dft_real_output < 2);
2382     FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
2383
2384     switch (fftType)
2385     {
2386     case C2C:
2387         _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
2388         break;
2389     case R2C: // TODO implement it if possible
2390     case C2R: // TODO implement it if possible
2391     case R2R: // AMD Fft does not support this type
2392     default:
2393         return false;
2394     }
2395
2396     UMat src = _src.getUMat(), dst = _dst.getUMat();
2397     bool inplace = src.u == dst.u;
2398
2399     clAmdFftPlanHandle plHandle = PlanCache::getInstance().
2400             getPlanHandle(ssize, (int)src.step, (int)dst.step,
2401                           depth == CV_64F, inplace, flags, fftType);
2402
2403     // get the bufferSize
2404     size_t bufferSize = 0;
2405     CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
2406     UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
2407
2408     cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
2409     cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
2410
2411     cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2412     cl_event e = 0;
2413
2414     CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
2415                                        1, &queue, 0, NULL, &e,
2416                                        &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
2417
2418     tmpBuffer.addref();
2419     clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
2420     return true;
2421 }
2422
2423 #undef DFT_ASSERT
2424
2425 }
2426
2427 #endif // HAVE_CLAMDFFT
2428
2429 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
2430 {
2431 #ifdef HAVE_CLAMDFFT
2432     CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
2433             _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
2434                ocl_dft_amdfft(_src0, _dst, flags))
2435 #endif
2436
2437 #ifdef HAVE_OPENCL
2438     CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
2439                ocl_dft(_src0, _dst, flags, nonzero_rows))
2440 #endif
2441
2442     static DFTFunc dft_tbl[6] =
2443     {
2444         (DFTFunc)DFT_32f,
2445         (DFTFunc)RealDFT_32f,
2446         (DFTFunc)CCSIDFT_32f,
2447         (DFTFunc)DFT_64f,
2448         (DFTFunc)RealDFT_64f,
2449         (DFTFunc)CCSIDFT_64f
2450     };
2451     AutoBuffer<uchar> buf;
2452     void *spec = 0;
2453     Mat src0 = _src0.getMat(), src = src0;
2454     int prev_len = 0, stage = 0;
2455     bool inv = (flags & DFT_INVERSE) != 0;
2456     int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
2457     int type = src.type(), depth = src.depth();
2458     int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
2459     int factors[34];
2460     bool inplace_transform = false;
2461 #ifdef USE_IPP_DFT
2462     AutoBuffer<uchar> ippbuf;
2463     int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
2464 #endif
2465
2466     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
2467
2468     if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
2469         _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
2470     else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
2471         _dst.create( src.size(), depth );
2472     else
2473         _dst.create( src.size(), type );
2474
2475     Mat dst = _dst.getMat();
2476
2477 #if defined USE_IPP_DFT
2478     CV_IPP_CHECK()
2479     {
2480         if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0)
2481         {
2482             if ((flags & DFT_ROWS) == 0)
2483             {
2484                 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
2485                 {
2486                     if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag))
2487                     {
2488                         CV_IMPL_ADD(CV_IMPL_IPP);
2489                         return;
2490                     }
2491                     setIppErrorStatus();
2492                 }
2493                 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
2494                 {
2495                     if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag))
2496                     {
2497                         CV_IMPL_ADD(CV_IMPL_IPP);
2498                         return;
2499                     }
2500                     setIppErrorStatus();
2501                 }
2502             }
2503             else
2504             {
2505                 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
2506                 {
2507                     ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
2508                     if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
2509                     {
2510                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2511                         return;
2512                     }
2513                     setIppErrorStatus();
2514                 }
2515                 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
2516                 {
2517                     ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
2518                     if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
2519                     {
2520                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2521                         return;
2522                     }
2523                     setIppErrorStatus();
2524                 }
2525             }
2526         }
2527     }
2528 #endif
2529
2530     if( !real_transform )
2531         elem_size = complex_elem_size;
2532
2533     if( src.cols == 1 && nonzero_rows > 0 )
2534         CV_Error( CV_StsNotImplemented,
2535         "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
2536         "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
2537
2538     // determine, which transform to do first - row-wise
2539     // (stage 0) or column-wise (stage 1) transform
2540     if( !(flags & DFT_ROWS) && src.rows > 1 &&
2541         ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
2542          (src.cols > 1 && inv && real_transform)) )
2543         stage = 1;
2544
2545     for(;;)
2546     {
2547         double scale = 1;
2548         uchar* wave = 0;
2549         int* itab = 0;
2550         uchar* ptr;
2551         int i, len, count, sz = 0;
2552         int use_buf = 0, odd_real = 0;
2553         DFTFunc dft_func;
2554
2555         if( stage == 0 ) // row-wise transform
2556         {
2557             len = !inv ? src.cols : dst.cols;
2558             count = src.rows;
2559             if( len == 1 && !(flags & DFT_ROWS) )
2560             {
2561                 len = !inv ? src.rows : dst.rows;
2562                 count = 1;
2563             }
2564             odd_real = real_transform && (len & 1);
2565         }
2566         else
2567         {
2568             len = dst.rows;
2569             count = !inv ? src0.cols : dst.cols;
2570             sz = 2*len*complex_elem_size;
2571         }
2572
2573         spec = 0;
2574 #ifdef USE_IPP_DFT
2575         if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
2576         {
2577             int specsize=0, initsize=0, worksize=0;
2578             IppDFTGetSizeFunc getSizeFunc = 0;
2579             IppDFTInitFunc initFunc = 0;
2580
2581             if( real_transform && stage == 0 )
2582             {
2583                 if( depth == CV_32F )
2584                 {
2585                     getSizeFunc = ippsDFTGetSize_R_32f;
2586                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
2587                 }
2588                 else
2589                 {
2590                     getSizeFunc = ippsDFTGetSize_R_64f;
2591                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
2592                 }
2593             }
2594             else
2595             {
2596                 if( depth == CV_32F )
2597                 {
2598                     getSizeFunc = ippsDFTGetSize_C_32fc;
2599                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
2600                 }
2601                 else
2602                 {
2603                     getSizeFunc = ippsDFTGetSize_C_64fc;
2604                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
2605                 }
2606             }
2607             if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
2608             {
2609                 ippbuf.allocate(specsize + initsize + 64);
2610                 spec = alignPtr(&ippbuf[0], 32);
2611                 uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
2612                 if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
2613                     spec = 0;
2614                 sz += worksize;
2615             }
2616             else
2617                 setIppErrorStatus();
2618         }
2619         else
2620 #endif
2621         {
2622             if( len != prev_len )
2623                 nf = DFTFactorize( len, factors );
2624
2625             inplace_transform = factors[0] == factors[nf-1];
2626             sz += len*(complex_elem_size + sizeof(int));
2627             i = nf > 1 && (factors[0] & 1) == 0;
2628             if( (factors[i] & 1) != 0 && factors[i] > 5 )
2629                 sz += (factors[i]+1)*complex_elem_size;
2630
2631             if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) ||
2632                 (stage == 1 && !inplace_transform) )
2633             {
2634                 use_buf = 1;
2635                 sz += len*complex_elem_size;
2636             }
2637         }
2638
2639         ptr = (uchar*)buf;
2640         buf.allocate( sz + 32 );
2641         if( ptr != (uchar*)buf )
2642             prev_len = 0; // because we release the buffer,
2643                           // force recalculation of
2644                           // twiddle factors and permutation table
2645         ptr = (uchar*)buf;
2646         if( !spec )
2647         {
2648             wave = ptr;
2649             ptr += len*complex_elem_size;
2650             itab = (int*)ptr;
2651             ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
2652
2653             if( len != prev_len || (!inplace_transform && inv && real_transform))
2654                 DFTInit( len, nf, factors, itab, complex_elem_size,
2655                             wave, stage == 0 && inv && real_transform );
2656             // otherwise reuse the tables calculated on the previous stage
2657         }
2658
2659         if( stage == 0 )
2660         {
2661             uchar* tmp_buf = 0;
2662             int dptr_offset = 0;
2663             int dst_full_len = len*elem_size;
2664             int _flags = (int)inv + (src.channels() != dst.channels() ?
2665                          DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
2666             if( use_buf )
2667             {
2668                 tmp_buf = ptr;
2669                 ptr += len*complex_elem_size;
2670                 if( odd_real && !inv && len > 1 &&
2671                     !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
2672                     dptr_offset = elem_size;
2673             }
2674
2675             if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) )
2676                 dst_full_len += (len & 1) ? elem_size : complex_elem_size;
2677
2678             dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];
2679
2680             if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) )
2681                 stage = 1;
2682             else if( flags & CV_DXT_SCALE )
2683                 scale = 1./(len * (flags & DFT_ROWS ? 1 : count));
2684
2685             if( nonzero_rows <= 0 || nonzero_rows > count )
2686                 nonzero_rows = count;
2687
2688             for( i = 0; i < nonzero_rows; i++ )
2689             {
2690                 const uchar* sptr = src.ptr(i);
2691                 uchar* dptr0 = dst.ptr(i);
2692                 uchar* dptr = dptr0;
2693
2694                 if( tmp_buf )
2695                     dptr = tmp_buf;
2696
2697                 dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
2698                 if( dptr != dptr0 )
2699                     memcpy( dptr0, dptr + dptr_offset, dst_full_len );
2700             }
2701
2702             for( ; i < count; i++ )
2703             {
2704                 uchar* dptr0 = dst.ptr(i);
2705                 memset( dptr0, 0, dst_full_len );
2706             }
2707
2708             if( stage != 1 )
2709                 break;
2710             src = dst;
2711         }
2712         else
2713         {
2714             int a = 0, b = count;
2715             uchar *buf0, *buf1, *dbuf0, *dbuf1;
2716             const uchar* sptr0 = src.ptr();
2717             uchar* dptr0 = dst.ptr();
2718             buf0 = ptr;
2719             ptr += len*complex_elem_size;
2720             buf1 = ptr;
2721             ptr += len*complex_elem_size;
2722             dbuf0 = buf0, dbuf1 = buf1;
2723
2724             if( use_buf )
2725             {
2726                 dbuf1 = ptr;
2727                 dbuf0 = buf1;
2728                 ptr += len*complex_elem_size;
2729             }
2730
2731             dft_func = dft_tbl[(depth == CV_64F)*3];
2732
2733             if( real_transform && inv && src.cols > 1 )
2734                 stage = 0;
2735             else if( flags & CV_DXT_SCALE )
2736                 scale = 1./(len * count);
2737
2738             if( real_transform )
2739             {
2740                 int even;
2741                 a = 1;
2742                 even = (count & 1) == 0;
2743                 b = (count+1)/2;
2744                 if( !inv )
2745                 {
2746                     memset( buf0, 0, len*complex_elem_size );
2747                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size );
2748                     sptr0 += dst.channels()*elem_size;
2749                     if( even )
2750                     {
2751                         memset( buf1, 0, len*complex_elem_size );
2752                         CopyColumn( sptr0 + (count-2)*elem_size, src.step,
2753                                     buf1, complex_elem_size, len, elem_size );
2754                     }
2755                 }
2756                 else if( src.channels() == 1 )
2757                 {
2758                     CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size );
2759                     ExpandCCS( buf0, len, elem_size );
2760                     if( even )
2761                     {
2762                         CopyColumn( sptr0 + (count-1)*elem_size, src.step,
2763                                     buf1, elem_size, len, elem_size );
2764                         ExpandCCS( buf1, len, elem_size );
2765                     }
2766                     sptr0 += elem_size;
2767                 }
2768                 else
2769                 {
2770                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
2771                     if( even )
2772                     {
2773                         CopyColumn( sptr0 + b*complex_elem_size, src.step,
2774                                        buf1, complex_elem_size, len, complex_elem_size );
2775                     }
2776                     sptr0 += complex_elem_size;
2777                 }
2778
2779                 if( even )
2780                     dft_func( buf1, dbuf1, len, nf, factors, itab,
2781                               wave, len, spec, ptr, inv, scale );
2782                 dft_func( buf0, dbuf0, len, nf, factors, itab,
2783                           wave, len, spec, ptr, inv, scale );
2784
2785                 if( dst.channels() == 1 )
2786                 {
2787                     if( !inv )
2788                     {
2789                         // copy the half of output vector to the first/last column.
2790                         // before doing that, defgragment the vector
2791                         memcpy( dbuf0 + elem_size, dbuf0, elem_size );
2792                         CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
2793                                        dst.step, len, elem_size );
2794                         if( even )
2795                         {
2796                             memcpy( dbuf1 + elem_size, dbuf1, elem_size );
2797                             CopyColumn( dbuf1 + elem_size, elem_size,
2798                                            dptr0 + (count-1)*elem_size,
2799                                            dst.step, len, elem_size );
2800                         }
2801                         dptr0 += elem_size;
2802                     }
2803                     else
2804                     {
2805                         // copy the real part of the complex vector to the first/last column
2806                         CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size );
2807                         if( even )
2808                             CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
2809                                            dst.step, len, elem_size );
2810                         dptr0 += elem_size;
2811                     }
2812                 }
2813                 else
2814                 {
2815                     assert( !inv );
2816                     CopyColumn( dbuf0, complex_elem_size, dptr0,
2817                                    dst.step, len, complex_elem_size );
2818                     if( even )
2819                         CopyColumn( dbuf1, complex_elem_size,
2820                                        dptr0 + b*complex_elem_size,
2821                                        dst.step, len, complex_elem_size );
2822                     dptr0 += complex_elem_size;
2823                 }
2824             }
2825
2826             for( i = a; i < b; i += 2 )
2827             {
2828                 if( i+1 < b )
2829                 {
2830                     CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size );
2831                     dft_func( buf1, dbuf1, len, nf, factors, itab,
2832                               wave, len, spec, ptr, inv, scale );
2833                 }
2834                 else
2835                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
2836
2837                 dft_func( buf0, dbuf0, len, nf, factors, itab,
2838                           wave, len, spec, ptr, inv, scale );
2839
2840                 if( i+1 < b )
2841                     CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size );
2842                 else
2843                     CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size );
2844                 sptr0 += 2*complex_elem_size;
2845                 dptr0 += 2*complex_elem_size;
2846             }
2847
2848             if( stage != 0 )
2849             {
2850                 if( !inv && real_transform && dst.channels() == 2 && len > 1 )
2851                 {
2852                     int n = dst.cols;
2853                     if( elem_size == (int)sizeof(float) )
2854                     {
2855                         float* p0 = dst.ptr<float>();
2856                         size_t dstep = dst.step/sizeof(p0[0]);
2857                         for( i = 0; i < len; i++ )
2858                         {
2859                             float* p = p0 + dstep*i;
2860                             float* q = i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
2861
2862                             for( int j = 1; j < (n+1)/2; j++ )
2863                             {
2864                                 p[(n-j)*2] = q[j*2];
2865                                 p[(n-j)*2+1] = -q[j*2+1];
2866                             }
2867                         }
2868                     }
2869                     else
2870                     {
2871                         double* p0 = dst.ptr<double>();
2872                         size_t dstep = dst.step/sizeof(p0[0]);
2873                         for( i = 0; i < len; i++ )
2874                         {
2875                             double* p = p0 + dstep*i;
2876                             double* q = i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
2877
2878                             for( int j = 1; j < (n+1)/2; j++ )
2879                             {
2880                                 p[(n-j)*2] = q[j*2];
2881                                 p[(n-j)*2+1] = -q[j*2+1];
2882                             }
2883                         }
2884                     }
2885                 }
2886                 break;
2887             }
2888             src = dst;
2889         }
2890     }
2891 }
2892
2893
2894 void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
2895 {
2896     dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
2897 }
2898
2899 #ifdef HAVE_OPENCL
2900
2901 namespace cv {
2902
2903 static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
2904                               OutputArray _dst, int flags, bool conjB )
2905 {
2906     int atype = _srcA.type(), btype = _srcB.type(),
2907             rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
2908     Size asize = _srcA.size(), bsize = _srcB.size();
2909     CV_Assert(asize == bsize);
2910
2911     if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
2912         return false;
2913
2914     UMat A = _srcA.getUMat(), B = _srcB.getUMat();
2915     CV_Assert(A.size() == B.size());
2916
2917     _dst.create(A.size(), atype);
2918     UMat dst = _dst.getUMat();
2919
2920     ocl::Kernel k("mulAndScaleSpectrums",
2921                   ocl::core::mulspectrums_oclsrc,
2922                   format("%s", conjB ? "-D CONJ" : ""));
2923     if (k.empty())
2924         return false;
2925
2926     k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
2927            ocl::KernelArg::WriteOnly(dst), rowsPerWI);
2928
2929     size_t globalsize[2] = { asize.width, (asize.height + rowsPerWI - 1) / rowsPerWI };
2930     return k.run(2, globalsize, NULL, false);
2931 }
2932
2933 }
2934
2935 #endif
2936
2937 void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
2938                        OutputArray _dst, int flags, bool conjB )
2939 {
2940     CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
2941             ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
2942
2943     Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
2944     int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
2945     int rows = srcA.rows, cols = srcA.cols;
2946     int j, k;
2947
2948     CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
2949     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
2950
2951     _dst.create( srcA.rows, srcA.cols, type );
2952     Mat dst = _dst.getMat();
2953
2954     bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
2955              srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));
2956
2957     if( is_1d && !(flags & DFT_ROWS) )
2958         cols = cols + rows - 1, rows = 1;
2959
2960     int ncols = cols*cn;
2961     int j0 = cn == 1;
2962     int j1 = ncols - (cols % 2 == 0 && cn == 1);
2963
2964     if( depth == CV_32F )
2965     {
2966         const float* dataA = srcA.ptr<float>();
2967         const float* dataB = srcB.ptr<float>();
2968         float* dataC = dst.ptr<float>();
2969
2970         size_t stepA = srcA.step/sizeof(dataA[0]);
2971         size_t stepB = srcB.step/sizeof(dataB[0]);
2972         size_t stepC = dst.step/sizeof(dataC[0]);
2973
2974         if( !is_1d && cn == 1 )
2975         {
2976             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
2977             {
2978                 if( k == 1 )
2979                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
2980                 dataC[0] = dataA[0]*dataB[0];
2981                 if( rows % 2 == 0 )
2982                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
2983                 if( !conjB )
2984                     for( j = 1; j <= rows - 2; j += 2 )
2985                     {
2986                         double re = (double)dataA[j*stepA]*dataB[j*stepB] -
2987                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
2988                         double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] +
2989                                     (double)dataA[(j+1)*stepA]*dataB[j*stepB];
2990                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
2991                     }
2992                 else
2993                     for( j = 1; j <= rows - 2; j += 2 )
2994                     {
2995                         double re = (double)dataA[j*stepA]*dataB[j*stepB] +
2996                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
2997                         double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
2998                                     (double)dataA[j*stepA]*dataB[(j+1)*stepB];
2999                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
3000                     }
3001                 if( k == 1 )
3002                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
3003             }
3004         }
3005
3006         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
3007         {
3008             if( is_1d && cn == 1 )
3009             {
3010                 dataC[0] = dataA[0]*dataB[0];
3011                 if( cols % 2 == 0 )
3012                     dataC[j1] = dataA[j1]*dataB[j1];
3013             }
3014
3015             if( !conjB )
3016                 for( j = j0; j < j1; j += 2 )
3017                 {
3018                     double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1];
3019                     double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1];
3020                     dataC[j] = (float)re; dataC[j+1] = (float)im;
3021                 }
3022             else
3023                 for( j = j0; j < j1; j += 2 )
3024                 {
3025                     double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1];
3026                     double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1];
3027                     dataC[j] = (float)re; dataC[j+1] = (float)im;
3028                 }
3029         }
3030     }
3031     else
3032     {
3033         const double* dataA = srcA.ptr<double>();
3034         const double* dataB = srcB.ptr<double>();
3035         double* dataC = dst.ptr<double>();
3036
3037         size_t stepA = srcA.step/sizeof(dataA[0]);
3038         size_t stepB = srcB.step/sizeof(dataB[0]);
3039         size_t stepC = dst.step/sizeof(dataC[0]);
3040
3041         if( !is_1d && cn == 1 )
3042         {
3043             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
3044             {
3045                 if( k == 1 )
3046                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
3047                 dataC[0] = dataA[0]*dataB[0];
3048                 if( rows % 2 == 0 )
3049                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
3050                 if( !conjB )
3051                     for( j = 1; j <= rows - 2; j += 2 )
3052                     {
3053                         double re = dataA[j*stepA]*dataB[j*stepB] -
3054                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
3055                         double im = dataA[j*stepA]*dataB[(j+1)*stepB] +
3056                                     dataA[(j+1)*stepA]*dataB[j*stepB];
3057                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
3058                     }
3059                 else
3060                     for( j = 1; j <= rows - 2; j += 2 )
3061                     {
3062                         double re = dataA[j*stepA]*dataB[j*stepB] +
3063                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
3064                         double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
3065                                     dataA[j*stepA]*dataB[(j+1)*stepB];
3066                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
3067                     }
3068                 if( k == 1 )
3069                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
3070             }
3071         }
3072
3073         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
3074         {
3075             if( is_1d && cn == 1 )
3076             {
3077                 dataC[0] = dataA[0]*dataB[0];
3078                 if( cols % 2 == 0 )
3079                     dataC[j1] = dataA[j1]*dataB[j1];
3080             }
3081
3082             if( !conjB )
3083                 for( j = j0; j < j1; j += 2 )
3084                 {
3085                     double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
3086                     double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
3087                     dataC[j] = re; dataC[j+1] = im;
3088                 }
3089             else
3090                 for( j = j0; j < j1; j += 2 )
3091                 {
3092                     double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
3093                     double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
3094                     dataC[j] = re; dataC[j+1] = im;
3095                 }
3096         }
3097     }
3098 }
3099
3100
3101 /****************************************************************************************\
3102                                Discrete Cosine Transform
3103 \****************************************************************************************/
3104
3105 namespace cv
3106 {
3107
3108 /* DCT is calculated using DFT, as described here:
3109    http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
3110 */
3111 template<typename T> static void
3112 DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
3113      int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
3114      const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
3115 {
3116     static const T sin_45 = (T)0.70710678118654752440084436210485;
3117     int j, n2 = n >> 1;
3118
3119     src_step /= sizeof(src[0]);
3120     dst_step /= sizeof(dst[0]);
3121     T* dst1 = dst + (n-1)*dst_step;
3122
3123     if( n == 1 )
3124     {
3125         dst[0] = src[0];
3126         return;
3127     }
3128
3129     for( j = 0; j < n2; j++, src += src_step*2 )
3130     {
3131         dft_src[j] = src[0];
3132         dft_src[n-j-1] = src[src_step];
3133     }
3134
3135     RealDFT( dft_src, dft_dst, n, nf, factors,
3136              itab, dft_wave, n, spec, buf, 0, 1.0 );
3137     src = dft_dst;
3138
3139     dst[0] = (T)(src[0]*dct_wave->re*sin_45);
3140     dst += dst_step;
3141     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3142                                     dst += dst_step, dst1 -= dst_step )
3143     {
3144         T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
3145         T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
3146         dst[0] = t0;
3147         dst1[0] = t1;
3148     }
3149
3150     dst[0] = src[n-1]*dct_wave->re;
3151 }
3152
3153
3154 template<typename T> static void
3155 IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
3156       int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
3157       const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
3158 {
3159     static const T sin_45 = (T)0.70710678118654752440084436210485;
3160     int j, n2 = n >> 1;
3161
3162     src_step /= sizeof(src[0]);
3163     dst_step /= sizeof(dst[0]);
3164     const T* src1 = src + (n-1)*src_step;
3165
3166     if( n == 1 )
3167     {
3168         dst[0] = src[0];
3169         return;
3170     }
3171
3172     dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
3173     src += src_step;
3174     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3175                                     src += src_step, src1 -= src_step )
3176     {
3177         T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
3178         T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
3179         dft_src[j*2-1] = t0;
3180         dft_src[j*2] = t1;
3181     }
3182
3183     dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
3184     CCSIDFT( dft_src, dft_dst, n, nf, factors, itab,
3185              dft_wave, n, spec, buf, 0, 1.0 );
3186
3187     for( j = 0; j < n2; j++, dst += dst_step*2 )
3188     {
3189         dst[0] = dft_dst[j];
3190         dst[dst_step] = dft_dst[n-j-1];
3191     }
3192 }
3193
3194
3195 static void
3196 DCTInit( int n, int elem_size, void* _wave, int inv )
3197 {
3198     static const double DctScale[] =
3199     {
3200     0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
3201     0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
3202     0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
3203     0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
3204     0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
3205     0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
3206     0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
3207     0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
3208     0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
3209     0.000061035156250000, 0.000043158372875155, 0.000030517578125000
3210     };
3211
3212     int i;
3213     Complex<double> w, w1;
3214     double t, scale;
3215
3216     if( n == 1 )
3217         return;
3218
3219     assert( (n&1) == 0 );
3220
3221     if( (n & (n - 1)) == 0 )
3222     {
3223         int m;
3224         for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
3225             ;
3226         scale = (!inv ? 2 : 1)*DctScale[m];
3227         w1.re = DFTTab[m+2][0];
3228         w1.im = -DFTTab[m+2][1];
3229     }
3230     else
3231     {
3232         t = 1./(2*n);
3233         scale = (!inv ? 2 : 1)*std::sqrt(t);
3234         w1.im = sin(-CV_PI*t);
3235         w1.re = std::sqrt(1. - w1.im*w1.im);
3236     }
3237     n >>= 1;
3238
3239     if( elem_size == sizeof(Complex<double>) )
3240     {
3241         Complex<double>* wave = (Complex<double>*)_wave;
3242
3243         w.re = scale;
3244         w.im = 0.;
3245
3246         for( i = 0; i <= n; i++ )
3247         {
3248             wave[i] = w;
3249             t = w.re*w1.re - w.im*w1.im;
3250             w.im = w.re*w1.im + w.im*w1.re;
3251             w.re = t;
3252         }
3253     }
3254     else
3255     {
3256         Complex<float>* wave = (Complex<float>*)_wave;
3257         assert( elem_size == sizeof(Complex<float>) );
3258
3259         w.re = (float)scale;
3260         w.im = 0.f;
3261
3262         for( i = 0; i <= n; i++ )
3263         {
3264             wave[i].re = (float)w.re;
3265             wave[i].im = (float)w.im;
3266             t = w.re*w1.re - w.im*w1.im;
3267             w.im = w.re*w1.im + w.im*w1.re;
3268             w.re = t;
3269         }
3270     }
3271 }
3272
3273
3274 typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src,
3275                         void* dft_dst, void* dst, int dst_step, int n,
3276                         int nf, int* factors, const int* itab, const void* dft_wave,
3277                         const void* dct_wave, const void* spec, void* buf );
3278
3279 static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
3280                     float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
3281                     const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
3282 {
3283     DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
3284         n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
3285 }
3286
3287 static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
3288                     float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
3289                     const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
3290 {
3291     IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
3292          n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
3293 }
3294
3295 static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
3296                     double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
3297                     const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
3298 {
3299     DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
3300         n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
3301 }
3302
3303 static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
3304                      double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
3305                      const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
3306 {
3307     IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
3308          n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
3309 }
3310
3311 }
3312
3313 namespace cv
3314 {
3315 #if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7
3316
3317 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
3318 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
3319 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
3320 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
3321
3322 template <typename Dct>
3323 class DctIPPLoop_Invoker : public ParallelLoopBody
3324 {
3325 public:
3326
3327     DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct* _ippidct, bool _inv, bool *_ok) :
3328         ParallelLoopBody(), src(&_src), dst(&_dst), ippidct(_ippidct), inv(_inv), ok(_ok)
3329     {
3330         *ok = true;
3331     }
3332
3333     virtual void operator()(const Range& range) const
3334     {
3335         void* pDCTSpec;
3336         AutoBuffer<uchar> buf;
3337         uchar* pBuffer = 0;
3338         int bufSize=0;
3339
3340         IppiSize srcRoiSize = {src->cols, 1};
3341
3342         CV_SUPPRESS_DEPRECATED_START
3343
3344         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
3345         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
3346         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
3347
3348         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
3349         {
3350             buf.allocate( bufSize );
3351             pBuffer = (uchar*)buf;
3352
3353             for( int i = range.start; i < range.end; ++i)
3354                 if(!(*ippidct)(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer))
3355                     *ok = false;
3356         }
3357         else
3358             *ok = false;
3359
3360         if (pDCTSpec)
3361             ippFree(pDCTSpec);
3362
3363         CV_SUPPRESS_DEPRECATED_END
3364     }
3365
3366 private:
3367     const Mat* src;
3368     Mat* dst;
3369     const Dct* ippidct;
3370     bool inv;
3371     bool *ok;
3372 };
3373
3374 template <typename Dct>
3375 bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv)
3376 {
3377     bool ok;
3378     parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker<Dct>(src, dst, &ippidct, inv, &ok), src.rows/(double)(1<<4) );
3379     return ok;
3380 }
3381
3382 struct IPPDCTFunctor
3383 {
3384     IPPDCTFunctor(ippiDCTFunc _func) : func(_func){}
3385
3386     bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const
3387     {
3388         return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false;
3389     }
3390 private:
3391     ippiDCTFunc func;
3392 };
3393
3394 static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
3395 {
3396     ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ;
3397
3398     if (row)
3399         return(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv));
3400     else
3401     {
3402         IppStatus status;
3403         void* pDCTSpec;
3404         AutoBuffer<uchar> buf;
3405         uchar* pBuffer = 0;
3406         int bufSize=0;
3407
3408         IppiSize srcRoiSize = {src.cols, src.rows};
3409
3410         CV_SUPPRESS_DEPRECATED_START
3411
3412         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
3413         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
3414         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
3415
3416         status = ippStsErr;
3417
3418         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
3419         {
3420             buf.allocate( bufSize );
3421             pBuffer = (uchar*)buf;
3422
3423             status = ippFunc(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
3424         }
3425
3426         if (pDCTSpec)
3427             ippFree(pDCTSpec);
3428
3429         CV_SUPPRESS_DEPRECATED_END
3430
3431         return status >= 0;
3432     }
3433 }
3434
3435 #endif
3436 }
3437
3438 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
3439 {
3440     static DCTFunc dct_tbl[4] =
3441     {
3442         (DCTFunc)DCT_32f,
3443         (DCTFunc)IDCT_32f,
3444         (DCTFunc)DCT_64f,
3445         (DCTFunc)IDCT_64f
3446     };
3447
3448     bool inv = (flags & DCT_INVERSE) != 0;
3449     Mat src0 = _src0.getMat(), src = src0;
3450     int type = src.type(), depth = src.depth();
3451     void *spec = 0;
3452
3453     double scale = 1.;
3454     int prev_len = 0, nf = 0, stage, end_stage;
3455     uchar *src_dft_buf = 0, *dst_dft_buf = 0;
3456     uchar *dft_wave = 0, *dct_wave = 0;
3457     int* itab = 0;
3458     uchar* ptr = 0;
3459     int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2;
3460     int factors[34], inplace_transform;
3461     int i, len, count;
3462     AutoBuffer<uchar> buf;
3463
3464     CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
3465     _dst.create( src.rows, src.cols, type );
3466     Mat dst = _dst.getMat();
3467
3468 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3469     CV_IPP_CHECK()
3470     {
3471         bool row = (flags & DCT_ROWS) != 0;
3472         if (src.type() == CV_32F)
3473         {
3474             if(ippi_DCT_32f(src,dst,inv, row))
3475             {
3476                 CV_IMPL_ADD(CV_IMPL_IPP);
3477                 return;
3478             }
3479             setIppErrorStatus();
3480         }
3481     }
3482 #endif
3483
3484     DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
3485
3486     if( (flags & DCT_ROWS) || src.rows == 1 ||
3487         (src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
3488     {
3489         stage = end_stage = 0;
3490     }
3491     else
3492     {
3493         stage = src.cols == 1;
3494         end_stage = 1;
3495     }
3496
3497     for( ; stage <= end_stage; stage++ )
3498     {
3499         const uchar* sptr = src.ptr();
3500         uchar* dptr = dst.ptr();
3501         size_t sstep0, sstep1, dstep0, dstep1;
3502
3503         if( stage == 0 )
3504         {
3505             len = src.cols;
3506             count = src.rows;
3507             if( len == 1 && !(flags & DCT_ROWS) )
3508             {
3509                 len = src.rows;
3510                 count = 1;
3511             }
3512             sstep0 = src.step;
3513             dstep0 = dst.step;
3514             sstep1 = dstep1 = elem_size;
3515         }
3516         else
3517         {
3518             len = dst.rows;
3519             count = dst.cols;
3520             sstep1 = src.step;
3521             dstep1 = dst.step;
3522             sstep0 = dstep0 = elem_size;
3523         }
3524
3525         if( len != prev_len )
3526         {
3527             int sz;
3528
3529             if( len > 1 && (len & 1) )
3530                 CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
3531
3532             sz = len*elem_size;
3533             sz += (len/2 + 1)*complex_elem_size;
3534
3535             spec = 0;
3536             inplace_transform = 1;
3537             {
3538                 sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size;
3539
3540                 nf = DFTFactorize( len, factors );
3541                 inplace_transform = factors[0] == factors[nf-1];
3542
3543                 i = nf > 1 && (factors[0] & 1) == 0;
3544                 if( (factors[i] & 1) != 0 && factors[i] > 5 )
3545                     sz += (factors[i]+1)*complex_elem_size;
3546
3547                 if( !inplace_transform )
3548                     sz += len*elem_size;
3549             }
3550
3551             buf.allocate( sz + 32 );
3552             ptr = (uchar*)buf;
3553
3554             if( !spec )
3555             {
3556                 dft_wave = ptr;
3557                 ptr += len*complex_elem_size;
3558                 itab = (int*)ptr;
3559                 ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
3560                 DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv );
3561             }
3562
3563             dct_wave = ptr;
3564             ptr += (len/2 + 1)*complex_elem_size;
3565             src_dft_buf = dst_dft_buf = ptr;
3566             ptr += len*elem_size;
3567             if( !inplace_transform )
3568             {
3569                 dst_dft_buf = ptr;
3570                 ptr += len*elem_size;
3571             }
3572             DCTInit( len, complex_elem_size, dct_wave, inv );
3573             if( !inv )
3574                 scale += scale;
3575             prev_len = len;
3576         }
3577         // otherwise reuse the tables calculated on the previous stage
3578         for( i = 0; i < count; i++ )
3579         {
3580             dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf,
3581                       dptr + i*dstep0, (int)dstep1, len, nf, factors,
3582                       itab, dft_wave, dct_wave, spec, ptr );
3583         }
3584         src = dst;
3585     }
3586 }
3587
3588
3589 void cv::idct( InputArray src, OutputArray dst, int flags )
3590 {
3591     dct( src, dst, flags | DCT_INVERSE );
3592 }
3593
3594 namespace cv
3595 {
3596
3597 static const int optimalDFTSizeTab[] = {
3598 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
3599 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
3600 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
3601 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
3602 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
3603 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
3604 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
3605 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
3606 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
3607 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
3608 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
3609 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
3610 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
3611 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
3612 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
3613 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
3614 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
3615 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
3616 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
3617 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
3618 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
3619 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
3620 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
3621 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
3622 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
3623 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
3624 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
3625 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
3626 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
3627 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
3628 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
3629 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
3630 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
3631 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
3632 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
3633 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
3634 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
3635 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
3636 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
3637 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
3638 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
3639 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
3640 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
3641 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
3642 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
3643 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
3644 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
3645 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
3646 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
3647 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
3648 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
3649 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
3650 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
3651 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
3652 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
3653 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
3654 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
3655 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
3656 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
3657 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
3658 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
3659 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
3660 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
3661 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
3662 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
3663 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
3664 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
3665 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
3666 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
3667 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
3668 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
3669 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
3670 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
3671 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
3672 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
3673 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
3674 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
3675 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
3676 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
3677 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
3678 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
3679 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
3680 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
3681 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
3682 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
3683 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
3684 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
3685 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
3686 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
3687 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
3688 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
3689 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
3690 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
3691 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
3692 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
3693 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
3694 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
3695 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
3696 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
3697 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
3698 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
3699 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
3700 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
3701 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
3702 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
3703 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
3704 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
3705 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
3706 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
3707 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
3708 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
3709 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
3710 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
3711 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
3712 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
3713 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
3714 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
3715 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
3716 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
3717 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
3718 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
3719 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
3720 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
3721 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
3722 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
3723 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
3724 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
3725 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
3726 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
3727 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
3728 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
3729 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
3730 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
3731 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
3732 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
3733 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
3734 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
3735 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
3736 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
3737 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
3738 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
3739 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
3740 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
3741 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
3742 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
3743 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
3744 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
3745 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
3746 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
3747 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
3748 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
3749 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
3750 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
3751 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
3752 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
3753 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
3754 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
3755 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
3756 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
3757 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
3758 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
3759 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
3760 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
3761 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
3762 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
3763 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
3764 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
3765 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
3766 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
3767 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
3768 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
3769 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
3770 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
3771 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
3772 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
3773 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
3774 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
3775 2097152000, 2099520000, 2109375000, 2123366400, 2125764000
3776 };
3777
3778 }
3779
3780 int cv::getOptimalDFTSize( int size0 )
3781 {
3782     int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
3783     if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
3784         return -1;
3785
3786     while( a < b )
3787     {
3788         int c = (a + b) >> 1;
3789         if( size0 <= optimalDFTSizeTab[c] )
3790             b = c;
3791         else
3792             a = c+1;
3793     }
3794
3795     return optimalDFTSizeTab[b];
3796 }
3797
3798 CV_IMPL void
3799 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
3800 {
3801     cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
3802     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
3803         ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
3804         ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
3805
3806     CV_Assert( src.size == dst.size );
3807
3808     if( src.type() != dst.type() )
3809     {
3810         if( dst.channels() == 2 )
3811             _flags |= cv::DFT_COMPLEX_OUTPUT;
3812         else
3813             _flags |= cv::DFT_REAL_OUTPUT;
3814     }
3815
3816     cv::dft( src, dst, _flags, nonzero_rows );
3817     CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
3818 }
3819
3820
3821 CV_IMPL void
3822 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
3823                 CvArr* dstarr, int flags )
3824 {
3825     cv::Mat srcA = cv::cvarrToMat(srcAarr),
3826         srcB = cv::cvarrToMat(srcBarr),
3827         dst = cv::cvarrToMat(dstarr);
3828     CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
3829
3830     cv::mulSpectrums(srcA, srcB, dst,
3831         (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
3832         (flags & CV_DXT_MUL_CONJ) != 0 );
3833 }
3834
3835
3836 CV_IMPL void
3837 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
3838 {
3839     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3840     CV_Assert( src.size == dst.size && src.type() == dst.type() );
3841     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
3842             ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
3843     cv::dct( src, dst, _flags );
3844 }
3845
3846
3847 CV_IMPL int
3848 cvGetOptimalDFTSize( int size0 )
3849 {
3850     return cv::getOptimalDFTSize(size0);
3851 }
3852
3853 /* End of file. */