modules/imgproc/src/pyramids.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44 #include "opencl_kernels.hpp"
  45
  46 namespace cv
  47 {
  48
  49 template<typename T, int shift> struct FixPtCast
  50 {
  51     typedef int type1;
  52     typedef T rtype;
  53     rtype operator ()(type1 arg) const { return (T)((arg + (1 << (shift-1))) >> shift); }
  54 };
  55
  56 template<typename T, int shift> struct FltCast
  57 {
  58     typedef T type1;
  59     typedef T rtype;
  60     rtype operator ()(type1 arg) const { return arg*(T)(1./(1 << shift)); }
  61 };
  62
  63 template<typename T1, typename T2> struct NoVec
  64 {
  65     int operator()(T1**, T2*, int, int) const { return 0; }
  66 };
  67
  68 #if CV_SSE2
  69
  70 struct PyrDownVec_32s8u
  71 {
  72     int operator()(int** src, uchar* dst, int, int width) const
  73     {
  74         if( !checkHardwareSupport(CV_CPU_SSE2) )
  75             return 0;
  76
  77         int x = 0;
  78         const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
  79         __m128i delta = _mm_set1_epi16(128);
  80
  81         for( ; x <= width - 16; x += 16 )
  82         {
  83             __m128i r0, r1, r2, r3, r4, t0, t1;
  84             r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)),
  85                                  _mm_load_si128((const __m128i*)(row0 + x + 4)));
  86             r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)),
  87                                  _mm_load_si128((const __m128i*)(row1 + x + 4)));
  88             r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)),
  89                                  _mm_load_si128((const __m128i*)(row2 + x + 4)));
  90             r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)),
  91                                  _mm_load_si128((const __m128i*)(row3 + x + 4)));
  92             r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)),
  93                                  _mm_load_si128((const __m128i*)(row4 + x + 4)));
  94             r0 = _mm_add_epi16(r0, r4);
  95             r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
  96             r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
  97             t0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
  98             r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x + 8)),
  99                                  _mm_load_si128((const __m128i*)(row0 + x + 12)));
 100             r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x + 8)),
 101                                  _mm_load_si128((const __m128i*)(row1 + x + 12)));
 102             r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x + 8)),
 103                                  _mm_load_si128((const __m128i*)(row2 + x + 12)));
 104             r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x + 8)),
 105                                  _mm_load_si128((const __m128i*)(row3 + x + 12)));
 106             r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x + 8)),
 107                                  _mm_load_si128((const __m128i*)(row4 + x + 12)));
 108             r0 = _mm_add_epi16(r0, r4);
 109             r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
 110             r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
 111             t1 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
 112             t0 = _mm_srli_epi16(_mm_add_epi16(t0, delta), 8);
 113             t1 = _mm_srli_epi16(_mm_add_epi16(t1, delta), 8);
 114             _mm_storeu_si128((__m128i*)(dst + x), _mm_packus_epi16(t0, t1));
 115         }
 116
 117         for( ; x <= width - 4; x += 4 )
 118         {
 119             __m128i r0, r1, r2, r3, r4, z = _mm_setzero_si128();
 120             r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), z);
 121             r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), z);
 122             r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), z);
 123             r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), z);
 124             r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), z);
 125             r0 = _mm_add_epi16(r0, r4);
 126             r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
 127             r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
 128             r0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
 129             r0 = _mm_srli_epi16(_mm_add_epi16(r0, delta), 8);
 130             *(int*)(dst + x) = _mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
 131         }
 132
 133         return x;
 134     }
 135 };
 136
 137 struct PyrDownVec_32f
 138 {
 139     int operator()(float** src, float* dst, int, int width) const
 140     {
 141         if( !checkHardwareSupport(CV_CPU_SSE) )
 142             return 0;
 143
 144         int x = 0;
 145         const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
 146         __m128 _4 = _mm_set1_ps(4.f), _scale = _mm_set1_ps(1.f/256);
 147         for( ; x <= width - 8; x += 8 )
 148         {
 149             __m128 r0, r1, r2, r3, r4, t0, t1;
 150             r0 = _mm_load_ps(row0 + x);
 151             r1 = _mm_load_ps(row1 + x);
 152             r2 = _mm_load_ps(row2 + x);
 153             r3 = _mm_load_ps(row3 + x);
 154             r4 = _mm_load_ps(row4 + x);
 155             r0 = _mm_add_ps(r0, r4);
 156             r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
 157             r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
 158             t0 = _mm_add_ps(r0, _mm_mul_ps(r1, _4));
 159
 160             r0 = _mm_load_ps(row0 + x + 4);
 161             r1 = _mm_load_ps(row1 + x + 4);
 162             r2 = _mm_load_ps(row2 + x + 4);
 163             r3 = _mm_load_ps(row3 + x + 4);
 164             r4 = _mm_load_ps(row4 + x + 4);
 165             r0 = _mm_add_ps(r0, r4);
 166             r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
 167             r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
 168             t1 = _mm_add_ps(r0, _mm_mul_ps(r1, _4));
 169
 170             t0 = _mm_mul_ps(t0, _scale);
 171             t1 = _mm_mul_ps(t1, _scale);
 172
 173             _mm_storeu_ps(dst + x, t0);
 174             _mm_storeu_ps(dst + x + 4, t1);
 175         }
 176
 177         return x;
 178     }
 179 };
 180
 181 #else
 182
 183 typedef NoVec<int, uchar> PyrDownVec_32s8u;
 184 typedef NoVec<float, float> PyrDownVec_32f;
 185
 186 #endif
 187
 188 template<class CastOp, class VecOp> void
 189 pyrDown_( const Mat& _src, Mat& _dst, int borderType )
 190 {
 191     const int PD_SZ = 5;
 192     typedef typename CastOp::type1 WT;
 193     typedef typename CastOp::rtype T;
 194
 195     CV_Assert( !_src.empty() );
 196     Size ssize = _src.size(), dsize = _dst.size();
 197     int cn = _src.channels();
 198     int bufstep = (int)alignSize(dsize.width*cn, 16);
 199     AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
 200     WT* buf = alignPtr((WT*)_buf, 16);
 201     int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
 202     AutoBuffer<int> _tabM(dsize.width*cn);
 203     int* tabM = _tabM;
 204     WT* rows[PD_SZ];
 205     CastOp castOp;
 206     VecOp vecOp;
 207
 208     CV_Assert( ssize.width > 0 && ssize.height > 0 &&
 209                std::abs(dsize.width*2 - ssize.width) <= 2 &&
 210                std::abs(dsize.height*2 - ssize.height) <= 2 );
 211     int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
 212
 213     for( x = 0; x <= PD_SZ+1; x++ )
 214     {
 215         int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn;
 216         int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn;
 217         for( k = 0; k < cn; k++ )
 218         {
 219             tabL[x*cn + k] = sx0 + k;
 220             tabR[x*cn + k] = sx1 + k;
 221         }
 222     }
 223
 224     ssize.width *= cn;
 225     dsize.width *= cn;
 226     width0 *= cn;
 227
 228     for( x = 0; x < dsize.width; x++ )
 229         tabM[x] = (x/cn)*2*cn + x % cn;
 230
 231     for( int y = 0; y < dsize.height; y++ )
 232     {
 233         T* dst = (T*)(_dst.data + _dst.step*y);
 234         WT *row0, *row1, *row2, *row3, *row4;
 235
 236         // fill the ring buffer (horizontal convolution and decimation)
 237         for( ; sy <= y*2 + 2; sy++ )
 238         {
 239             WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;
 240             int _sy = borderInterpolate(sy, ssize.height, borderType);
 241             const T* src = (const T*)(_src.data + _src.step*_sy);
 242             int limit = cn;
 243             const int* tab = tabL;
 244
 245             for( x = 0;;)
 246             {
 247                 for( ; x < limit; x++ )
 248                 {
 249                     row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 +
 250                         src[tab[x]] + src[tab[x+cn*4]];
 251                 }
 252
 253                 if( x == dsize.width )
 254                     break;
 255
 256                 if( cn == 1 )
 257                 {
 258                     for( ; x < width0; x++ )
 259                         row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 +
 260                             src[x*2 - 2] + src[x*2 + 2];
 261                 }
 262                 else if( cn == 3 )
 263                 {
 264                     for( ; x < width0; x += 3 )
 265                     {
 266                         const T* s = src + x*2;
 267                         WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6];
 268                         WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7];
 269                         WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8];
 270                         row[x] = t0; row[x+1] = t1; row[x+2] = t2;
 271                     }
 272                 }
 273                 else if( cn == 4 )
 274                 {
 275                     for( ; x < width0; x += 4 )
 276                     {
 277                         const T* s = src + x*2;
 278                         WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8];
 279                         WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9];
 280                         row[x] = t0; row[x+1] = t1;
 281                         t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10];
 282                         t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11];
 283                         row[x+2] = t0; row[x+3] = t1;
 284                     }
 285                 }
 286                 else
 287                 {
 288                     for( ; x < width0; x++ )
 289                     {
 290                         int sx = tabM[x];
 291                         row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 +
 292                             src[sx - cn*2] + src[sx + cn*2];
 293                     }
 294                 }
 295
 296                 limit = dsize.width;
 297                 tab = tabR - x;
 298             }
 299         }
 300
 301         // do vertical convolution and decimation and write the result to the destination image
 302         for( k = 0; k < PD_SZ; k++ )
 303             rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;
 304         row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];
 305
 306         x = vecOp(rows, dst, (int)_dst.step, dsize.width);
 307         for( ; x < dsize.width; x++ )
 308             dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]);
 309     }
 310 }
 311
 312
 313 template<class CastOp, class VecOp> void
 314 pyrUp_( const Mat& _src, Mat& _dst, int)
 315 {
 316     const int PU_SZ = 3;
 317     typedef typename CastOp::type1 WT;
 318     typedef typename CastOp::rtype T;
 319
 320     Size ssize = _src.size(), dsize = _dst.size();
 321     int cn = _src.channels();
 322     int bufstep = (int)alignSize((dsize.width+1)*cn, 16);
 323     AutoBuffer<WT> _buf(bufstep*PU_SZ + 16);
 324     WT* buf = alignPtr((WT*)_buf, 16);
 325     AutoBuffer<int> _dtab(ssize.width*cn);
 326     int* dtab = _dtab;
 327     WT* rows[PU_SZ];
 328     CastOp castOp;
 329     VecOp vecOp;
 330
 331     CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 &&
 332                std::abs(dsize.height - ssize.height*2) == dsize.height % 2);
 333     int k, x, sy0 = -PU_SZ/2, sy = sy0;
 334
 335     ssize.width *= cn;
 336     dsize.width *= cn;
 337
 338     for( x = 0; x < ssize.width; x++ )
 339         dtab[x] = (x/cn)*2*cn + x % cn;
 340
 341     for( int y = 0; y < ssize.height; y++ )
 342     {
 343         T* dst0 = (T*)(_dst.data + _dst.step*y*2);
 344         T* dst1 = (T*)(_dst.data + _dst.step*(y*2+1));
 345         WT *row0, *row1, *row2;
 346
 347         if( y*2+1 >= dsize.height )
 348             dst1 = dst0;
 349
 350         // fill the ring buffer (horizontal convolution and decimation)
 351         for( ; sy <= y + 1; sy++ )
 352         {
 353             WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep;
 354             int _sy = borderInterpolate(sy*2, dsize.height, BORDER_REFLECT_101)/2;
 355             const T* src = (const T*)(_src.data + _src.step*_sy);
 356
 357             if( ssize.width == cn )
 358             {
 359                 for( x = 0; x < cn; x++ )
 360                     row[x] = row[x + cn] = src[x]*8;
 361                 continue;
 362             }
 363
 364             for( x = 0; x < cn; x++ )
 365             {
 366                 int dx = dtab[x];
 367                 WT t0 = src[x]*6 + src[x + cn]*2;
 368                 WT t1 = (src[x] + src[x + cn])*4;
 369                 row[dx] = t0; row[dx + cn] = t1;
 370                 dx = dtab[ssize.width - cn + x];
 371                 int sx = ssize.width - cn + x;
 372                 t0 = src[sx - cn] + src[sx]*7;
 373                 t1 = src[sx]*8;
 374                 row[dx] = t0; row[dx + cn] = t1;
 375             }
 376
 377             for( x = cn; x < ssize.width - cn; x++ )
 378             {
 379                 int dx = dtab[x];
 380                 WT t0 = src[x-cn] + src[x]*6 + src[x+cn];
 381                 WT t1 = (src[x] + src[x+cn])*4;
 382                 row[dx] = t0;
 383                 row[dx+cn] = t1;
 384             }
 385         }
 386
 387         // do vertical convolution and decimation and write the result to the destination image
 388         for( k = 0; k < PU_SZ; k++ )
 389             rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep;
 390         row0 = rows[0]; row1 = rows[1]; row2 = rows[2];
 391
 392         x = vecOp(rows, dst0, (int)_dst.step, dsize.width);
 393         for( ; x < dsize.width; x++ )
 394         {
 395             T t1 = castOp((row1[x] + row2[x])*4);
 396             T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
 397             dst1[x] = t1; dst0[x] = t0;
 398         }
 399     }
 400 }
 401
 402 typedef void (*PyrFunc)(const Mat&, Mat&, int);
 403
 404 #ifdef HAVE_OPENCL
 405
 406 static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
 407 {
 408     int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
 409
 410     if (channels > 4 || borderType != BORDER_DEFAULT)
 411         return false;
 412
 413     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 414     if ((depth == CV_64F) && !(doubleSupport))
 415         return false;
 416
 417     Size ssize = _src.size();
 418     Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
 419     CV_Assert( ssize.width > 0 && ssize.height > 0 &&
 420             std::abs(dsize.width*2 - ssize.width) <= 2 &&
 421             std::abs(dsize.height*2 - ssize.height) <= 2 );
 422
 423     UMat src = _src.getUMat();
 424     _dst.create( dsize, src.type() );
 425     UMat dst = _dst.getUMat();
 426
 427     int float_depth = depth == CV_64F ? CV_64F : CV_32F;
 428     char cvt[2][50];
 429     String buildOptions = format(
 430             "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
 431             "-D T1=%s -D cn=%d",
 432             ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
 433             ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
 434             ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
 435             doubleSupport ? " -D DOUBLE_SUPPORT" : "",
 436             ocl::typeToStr(depth), channels
 437     );
 438     ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
 439     if (k.empty())
 440         return false;
 441
 442     k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
 443
 444     size_t localThreads[2]  = { 256, 1 };
 445     size_t globalThreads[2] = { src.cols, dst.rows };
 446     return k.run(2, globalThreads, localThreads, false);
 447 }
 448
 449 static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
 450 {
 451     int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
 452
 453     if (channels > 4 || borderType != BORDER_DEFAULT)
 454         return false;
 455
 456     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 457     if (depth == CV_64F && !doubleSupport)
 458         return false;
 459
 460     Size ssize = _src.size();
 461     if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
 462         return false;
 463
 464     UMat src = _src.getUMat();
 465     Size dsize = Size(ssize.width * 2, ssize.height * 2);
 466     _dst.create( dsize, src.type() );
 467     UMat dst = _dst.getUMat();
 468
 469     int float_depth = depth == CV_64F ? CV_64F : CV_32F;
 470     char cvt[2][50];
 471     String buildOptions = format(
 472             "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
 473             "-D T1=%s -D cn=%d",
 474             ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
 475             ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
 476             ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
 477             doubleSupport ? " -D DOUBLE_SUPPORT" : "",
 478             ocl::typeToStr(depth), channels
 479     );
 480     ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);
 481     if (k.empty())
 482         return false;
 483
 484     k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
 485     size_t globalThreads[2] = {dst.cols, dst.rows};
 486     size_t localThreads[2]  = {16, 16};
 487
 488     return k.run(2, globalThreads, localThreads, false);
 489 }
 490
 491 #endif
 492
 493 }
 494
 495 void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
 496 {
 497     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
 498                ocl_pyrDown(_src, _dst, _dsz, borderType))
 499
 500     Mat src = _src.getMat();
 501     Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
 502     _dst.create( dsz, src.type() );
 503     Mat dst = _dst.getMat();
 504     int depth = src.depth();
 505
 506 #ifdef HAVE_TEGRA_OPTIMIZATION
 507     if(borderType == BORDER_DEFAULT && tegra::pyrDown(src, dst))
 508         return;
 509 #endif
 510
 511 #if (defined(HAVE_IPP) && !defined(HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 801)
 512     typedef IppStatus (CV_STDCALL * ippiPyrDown)(const void* pSrc, int srcStep, void* pDst, int dstStep, IppiSize srcRoi, Ipp8u* buffer);
 513     int type = src.type();
 514     CV_SUPPRESS_DEPRECATED_START
 515     ippiPyrDown pyrDownFunc = type == CV_8UC1 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_8u_C1R :
 516                               type == CV_8UC3 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_8u_C3R :
 517                               type == CV_32FC1 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_32f_C1R :
 518                               type == CV_32FC3 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_32f_C3R : 0;
 519     CV_SUPPRESS_DEPRECATED_END
 520
 521     if (pyrDownFunc)
 522     {
 523         int bufferSize;
 524         IppiSize srcRoi = { src.cols, src.rows };
 525         IppDataType dataType = depth == CV_8U ? ipp8u : ipp32f;
 526         CV_SUPPRESS_DEPRECATED_START
 527         IppStatus ok = ippiPyrDownGetBufSize_Gauss5x5(srcRoi.width, dataType, src.channels(), &bufferSize);
 528         CV_SUPPRESS_DEPRECATED_END
 529         if (ok >= 0)
 530         {
 531             Ipp8u* buffer = ippsMalloc_8u(bufferSize);
 532             ok = pyrDownFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer);
 533             ippsFree(buffer);
 534
 535             if (ok >= 0)
 536                 return;
 537         }
 538     }
 539 #endif
 540
 541     PyrFunc func = 0;
 542     if( depth == CV_8U )
 543         func = pyrDown_<FixPtCast<uchar, 8>, PyrDownVec_32s8u>;
 544     else if( depth == CV_16S )
 545         func = pyrDown_<FixPtCast<short, 8>, NoVec<int, short> >;
 546     else if( depth == CV_16U )
 547         func = pyrDown_<FixPtCast<ushort, 8>, NoVec<int, ushort> >;
 548     else if( depth == CV_32F )
 549         func = pyrDown_<FltCast<float, 8>, PyrDownVec_32f>;
 550     else if( depth == CV_64F )
 551         func = pyrDown_<FltCast<double, 8>, NoVec<double, double> >;
 552     else
 553         CV_Error( CV_StsUnsupportedFormat, "" );
 554
 555     func( src, dst, borderType );
 556 }
 557
 558 void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
 559 {
 560     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
 561                ocl_pyrUp(_src, _dst, _dsz, borderType))
 562
 563     Mat src = _src.getMat();
 564     Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz;
 565     _dst.create( dsz, src.type() );
 566     Mat dst = _dst.getMat();
 567
 568 #ifdef HAVE_TEGRA_OPTIMIZATION
 569     if(borderType == BORDER_DEFAULT && tegra::pyrUp(src, dst))
 570         return;
 571 #endif
 572
 573     int depth = src.depth();
 574     PyrFunc func = 0;
 575     if( depth == CV_8U )
 576         func = pyrUp_<FixPtCast<uchar, 6>, NoVec<int, uchar> >;
 577     else if( depth == CV_16S )
 578         func = pyrUp_<FixPtCast<short, 6>, NoVec<int, short> >;
 579     else if( depth == CV_16U )
 580         func = pyrUp_<FixPtCast<ushort, 6>, NoVec<int, ushort> >;
 581     else if( depth == CV_32F )
 582         func = pyrUp_<FltCast<float, 6>, NoVec<float, float> >;
 583     else if( depth == CV_64F )
 584         func = pyrUp_<FltCast<double, 6>, NoVec<double, double> >;
 585     else
 586         CV_Error( CV_StsUnsupportedFormat, "" );
 587
 588     func( src, dst, borderType );
 589 }
 590
 591 void cv::buildPyramid( InputArray _src, OutputArrayOfArrays _dst, int maxlevel, int borderType )
 592 {
 593     if (_src.dims() <= 2 && _dst.isUMatVector())
 594     {
 595         UMat src = _src.getUMat();
 596         _dst.create( maxlevel + 1, 1, 0 );
 597         _dst.getUMatRef(0) = src;
 598         for( int i = 1; i <= maxlevel; i++ )
 599             pyrDown( _dst.getUMatRef(i-1), _dst.getUMatRef(i), Size(), borderType );
 600         return;
 601     }
 602
 603     Mat src = _src.getMat();
 604     _dst.create( maxlevel + 1, 1, 0 );
 605     _dst.getMatRef(0) = src;
 606     for( int i = 1; i <= maxlevel; i++ )
 607         pyrDown( _dst.getMatRef(i-1), _dst.getMatRef(i), Size(), borderType );
 608 }
 609
 610 CV_IMPL void cvPyrDown( const void* srcarr, void* dstarr, int _filter )
 611 {
 612     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
 613
 614     CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type());
 615     cv::pyrDown( src, dst, dst.size() );
 616 }
 617
 618 CV_IMPL void cvPyrUp( const void* srcarr, void* dstarr, int _filter )
 619 {
 620     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
 621
 622     CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type());
 623     cv::pyrUp( src, dst, dst.size() );
 624 }
 625
 626
 627 CV_IMPL void
 628 cvReleasePyramid( CvMat*** _pyramid, int extra_layers )
 629 {
 630     if( !_pyramid )
 631         CV_Error( CV_StsNullPtr, "" );
 632
 633     if( *_pyramid )
 634         for( int i = 0; i <= extra_layers; i++ )
 635             cvReleaseMat( &(*_pyramid)[i] );
 636
 637     cvFree( _pyramid );
 638 }
 639
 640
 641 CV_IMPL CvMat**
 642 cvCreatePyramid( const CvArr* srcarr, int extra_layers, double rate,
 643                  const CvSize* layer_sizes, CvArr* bufarr,
 644                  int calc, int filter )
 645 {
 646     const float eps = 0.1f;
 647     uchar* ptr = 0;
 648
 649     CvMat stub, *src = cvGetMat( srcarr, &stub );
 650
 651     if( extra_layers < 0 )
 652         CV_Error( CV_StsOutOfRange, "The number of extra layers must be non negative" );
 653
 654     int i, layer_step, elem_size = CV_ELEM_SIZE(src->type);
 655     CvSize layer_size, size = cvGetMatSize(src);
 656
 657     if( bufarr )
 658     {
 659         CvMat bstub, *buf;
 660         int bufsize = 0;
 661
 662         buf = cvGetMat( bufarr, &bstub );
 663         bufsize = buf->rows*buf->cols*CV_ELEM_SIZE(buf->type);
 664         layer_size = size;
 665         for( i = 1; i <= extra_layers; i++ )
 666         {
 667             if( !layer_sizes )
 668             {
 669                 layer_size.width = cvRound(layer_size.width*rate+eps);
 670                 layer_size.height = cvRound(layer_size.height*rate+eps);
 671             }
 672             else
 673                 layer_size = layer_sizes[i-1];
 674             layer_step = layer_size.width*elem_size;
 675             bufsize -= layer_step*layer_size.height;
 676         }
 677
 678         if( bufsize < 0 )
 679             CV_Error( CV_StsOutOfRange, "The buffer is too small to fit the pyramid" );
 680         ptr = buf->data.ptr;
 681     }
 682
 683     CvMat** pyramid = (CvMat**)cvAlloc( (extra_layers+1)*sizeof(pyramid[0]) );
 684     memset( pyramid, 0, (extra_layers+1)*sizeof(pyramid[0]) );
 685
 686     pyramid[0] = cvCreateMatHeader( size.height, size.width, src->type );
 687     cvSetData( pyramid[0], src->data.ptr, src->step );
 688     layer_size = size;
 689
 690     for( i = 1; i <= extra_layers; i++ )
 691     {
 692         if( !layer_sizes )
 693         {
 694             layer_size.width = cvRound(layer_size.width*rate + eps);
 695             layer_size.height = cvRound(layer_size.height*rate + eps);
 696         }
 697         else
 698             layer_size = layer_sizes[i];
 699
 700         if( bufarr )
 701         {
 702             pyramid[i] = cvCreateMatHeader( layer_size.height, layer_size.width, src->type );
 703             layer_step = layer_size.width*elem_size;
 704             cvSetData( pyramid[i], ptr, layer_step );
 705             ptr += layer_step*layer_size.height;
 706         }
 707         else
 708             pyramid[i] = cvCreateMat( layer_size.height, layer_size.width, src->type );
 709
 710         if( calc )
 711             cvPyrDown( pyramid[i-1], pyramid[i], filter );
 712             //cvResize( pyramid[i-1], pyramid[i], CV_INTER_LINEAR );
 713     }
 714
 715     return pyramid;
 716 }
 717
 718 /* End of file. */