modules/imgproc/src/pyramids.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44 #include "opencl_kernels_imgproc.hpp"
  45
  46 namespace cv
  47 {
  48
  49 template<typename T, int shift> struct FixPtCast
  50 {
  51     typedef int type1;
  52     typedef T rtype;
  53     rtype operator ()(type1 arg) const { return (T)((arg + (1 << (shift-1))) >> shift); }
  54 };
  55
  56 template<typename T, int shift> struct FltCast
  57 {
  58     typedef T type1;
  59     typedef T rtype;
  60     rtype operator ()(type1 arg) const { return arg*(T)(1./(1 << shift)); }
  61 };
  62
  63 template<typename T1, typename T2> struct NoVec
  64 {
  65     int operator()(T1**, T2*, int, int) const { return 0; }
  66 };
  67
  68 #if CV_SSE2
  69
  70 struct PyrDownVec_32s8u
  71 {
  72     int operator()(int** src, uchar* dst, int, int width) const
  73     {
  74         if( !checkHardwareSupport(CV_CPU_SSE2) )
  75             return 0;
  76
  77         int x = 0;
  78         const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
  79         __m128i delta = _mm_set1_epi16(128);
  80
  81         for( ; x <= width - 16; x += 16 )
  82         {
  83             __m128i r0, r1, r2, r3, r4, t0, t1;
  84             r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)),
  85                                  _mm_load_si128((const __m128i*)(row0 + x + 4)));
  86             r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)),
  87                                  _mm_load_si128((const __m128i*)(row1 + x + 4)));
  88             r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)),
  89                                  _mm_load_si128((const __m128i*)(row2 + x + 4)));
  90             r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)),
  91                                  _mm_load_si128((const __m128i*)(row3 + x + 4)));
  92             r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)),
  93                                  _mm_load_si128((const __m128i*)(row4 + x + 4)));
  94             r0 = _mm_add_epi16(r0, r4);
  95             r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
  96             r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
  97             t0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
  98             r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x + 8)),
  99                                  _mm_load_si128((const __m128i*)(row0 + x + 12)));
 100             r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x + 8)),
 101                                  _mm_load_si128((const __m128i*)(row1 + x + 12)));
 102             r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x + 8)),
 103                                  _mm_load_si128((const __m128i*)(row2 + x + 12)));
 104             r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x + 8)),
 105                                  _mm_load_si128((const __m128i*)(row3 + x + 12)));
 106             r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x + 8)),
 107                                  _mm_load_si128((const __m128i*)(row4 + x + 12)));
 108             r0 = _mm_add_epi16(r0, r4);
 109             r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
 110             r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
 111             t1 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
 112             t0 = _mm_srli_epi16(_mm_add_epi16(t0, delta), 8);
 113             t1 = _mm_srli_epi16(_mm_add_epi16(t1, delta), 8);
 114             _mm_storeu_si128((__m128i*)(dst + x), _mm_packus_epi16(t0, t1));
 115         }
 116
 117         for( ; x <= width - 4; x += 4 )
 118         {
 119             __m128i r0, r1, r2, r3, r4, z = _mm_setzero_si128();
 120             r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), z);
 121             r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), z);
 122             r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), z);
 123             r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), z);
 124             r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), z);
 125             r0 = _mm_add_epi16(r0, r4);
 126             r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
 127             r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
 128             r0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
 129             r0 = _mm_srli_epi16(_mm_add_epi16(r0, delta), 8);
 130             *(int*)(dst + x) = _mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
 131         }
 132
 133         return x;
 134     }
 135 };
 136
 137 struct PyrDownVec_32f
 138 {
 139     int operator()(float** src, float* dst, int, int width) const
 140     {
 141         if( !checkHardwareSupport(CV_CPU_SSE) )
 142             return 0;
 143
 144         int x = 0;
 145         const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
 146         __m128 _4 = _mm_set1_ps(4.f), _scale = _mm_set1_ps(1.f/256);
 147         for( ; x <= width - 8; x += 8 )
 148         {
 149             __m128 r0, r1, r2, r3, r4, t0, t1;
 150             r0 = _mm_load_ps(row0 + x);
 151             r1 = _mm_load_ps(row1 + x);
 152             r2 = _mm_load_ps(row2 + x);
 153             r3 = _mm_load_ps(row3 + x);
 154             r4 = _mm_load_ps(row4 + x);
 155             r0 = _mm_add_ps(r0, r4);
 156             r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
 157             r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
 158             t0 = _mm_add_ps(r0, _mm_mul_ps(r1, _4));
 159
 160             r0 = _mm_load_ps(row0 + x + 4);
 161             r1 = _mm_load_ps(row1 + x + 4);
 162             r2 = _mm_load_ps(row2 + x + 4);
 163             r3 = _mm_load_ps(row3 + x + 4);
 164             r4 = _mm_load_ps(row4 + x + 4);
 165             r0 = _mm_add_ps(r0, r4);
 166             r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
 167             r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
 168             t1 = _mm_add_ps(r0, _mm_mul_ps(r1, _4));
 169
 170             t0 = _mm_mul_ps(t0, _scale);
 171             t1 = _mm_mul_ps(t1, _scale);
 172
 173             _mm_storeu_ps(dst + x, t0);
 174             _mm_storeu_ps(dst + x + 4, t1);
 175         }
 176
 177         return x;
 178     }
 179 };
 180
 181 typedef NoVec<int, ushort> PyrDownVec_32s16u;
 182 typedef NoVec<int, short> PyrDownVec_32s16s;
 183
 184 typedef NoVec<float, float> PyrUpVec_32f;
 185
 186 #elif CV_NEON
 187
 188 struct PyrDownVec_32s8u
 189 {
 190     int operator()(int** src, uchar* dst, int, int width) const
 191     {
 192         int x = 0;
 193         const unsigned int *row0 = (unsigned int*)src[0], *row1 = (unsigned int*)src[1],
 194                            *row2 = (unsigned int*)src[2], *row3 = (unsigned int*)src[3],
 195                            *row4 = (unsigned int*)src[4];
 196         uint16x8_t v_delta = vdupq_n_u16(128);
 197
 198         for( ; x <= width - 16; x += 16 )
 199         {
 200             uint16x8_t v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x)), vqmovn_u32(vld1q_u32(row0 + x + 4)));
 201             uint16x8_t v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x)), vqmovn_u32(vld1q_u32(row1 + x + 4)));
 202             uint16x8_t v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x)), vqmovn_u32(vld1q_u32(row2 + x + 4)));
 203             uint16x8_t v_r3 = vcombine_u16(vqmovn_u32(vld1q_u32(row3 + x)), vqmovn_u32(vld1q_u32(row3 + x + 4)));
 204             uint16x8_t v_r4 = vcombine_u16(vqmovn_u32(vld1q_u32(row4 + x)), vqmovn_u32(vld1q_u32(row4 + x + 4)));
 205
 206             v_r0 = vqaddq_u16(vqaddq_u16(v_r0, v_r4), vqaddq_u16(v_r2, v_r2));
 207             v_r1 = vqaddq_u16(vqaddq_u16(v_r1, v_r2), v_r3);
 208             uint16x8_t v_dst0 = vqaddq_u16(v_r0, vshlq_n_u16(v_r1, 2));
 209
 210             v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x + 8)), vqmovn_u32(vld1q_u32(row0 + x + 12)));
 211             v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x + 8)), vqmovn_u32(vld1q_u32(row1 + x + 12)));
 212             v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x + 8)), vqmovn_u32(vld1q_u32(row2 + x + 12)));
 213             v_r3 = vcombine_u16(vqmovn_u32(vld1q_u32(row3 + x + 8)), vqmovn_u32(vld1q_u32(row3 + x + 12)));
 214             v_r4 = vcombine_u16(vqmovn_u32(vld1q_u32(row4 + x + 8)), vqmovn_u32(vld1q_u32(row4 + x + 12)));
 215
 216             v_r0 = vqaddq_u16(vqaddq_u16(v_r0, v_r4), vqaddq_u16(v_r2, v_r2));
 217             v_r1 = vqaddq_u16(vqaddq_u16(v_r1, v_r2), v_r3);
 218             uint16x8_t v_dst1 = vqaddq_u16(v_r0, vshlq_n_u16(v_r1, 2));
 219
 220             vst1q_u8(dst + x, vcombine_u8(vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst0, v_delta), 8)),
 221                                           vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst1, v_delta), 8))));
 222         }
 223
 224         return x;
 225     }
 226 };
 227
 228 struct PyrDownVec_32s16u
 229 {
 230     int operator()(int** src, ushort* dst, int, int width) const
 231     {
 232         int x = 0;
 233         const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
 234         int32x4_t v_delta = vdupq_n_s32(128);
 235
 236         for( ; x <= width - 8; x += 8 )
 237         {
 238             int32x4_t v_r00 = vld1q_s32(row0 + x), v_r01 = vld1q_s32(row0 + x + 4);
 239             int32x4_t v_r10 = vld1q_s32(row1 + x), v_r11 = vld1q_s32(row1 + x + 4);
 240             int32x4_t v_r20 = vld1q_s32(row2 + x), v_r21 = vld1q_s32(row2 + x + 4);
 241             int32x4_t v_r30 = vld1q_s32(row3 + x), v_r31 = vld1q_s32(row3 + x + 4);
 242             int32x4_t v_r40 = vld1q_s32(row4 + x), v_r41 = vld1q_s32(row4 + x + 4);
 243
 244             v_r00 = vaddq_s32(vqaddq_s32(v_r00, v_r40), vqaddq_s32(v_r20, v_r20));
 245             v_r10 = vaddq_s32(vqaddq_s32(v_r10, v_r20), v_r30);
 246             int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r00, vshlq_n_s32(v_r10, 2)), v_delta), 8);
 247
 248             v_r01 = vaddq_s32(vqaddq_s32(v_r01, v_r41), vqaddq_s32(v_r21, v_r21));
 249             v_r11 = vaddq_s32(vqaddq_s32(v_r11, v_r21), v_r31);
 250             int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r01, vshlq_n_s32(v_r11, 2)), v_delta), 8);
 251
 252             vst1q_u16(dst + x, vcombine_u16(vqmovun_s32(v_dst0), vqmovun_s32(v_dst1)));
 253         }
 254
 255         return x;
 256     }
 257 };
 258
 259 struct PyrDownVec_32s16s
 260 {
 261     int operator()(int** src, short* dst, int, int width) const
 262     {
 263         int x = 0;
 264         const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
 265         int32x4_t v_delta = vdupq_n_s32(128);
 266
 267         for( ; x <= width - 8; x += 8 )
 268         {
 269             int32x4_t v_r00 = vld1q_s32(row0 + x), v_r01 = vld1q_s32(row0 + x + 4);
 270             int32x4_t v_r10 = vld1q_s32(row1 + x), v_r11 = vld1q_s32(row1 + x + 4);
 271             int32x4_t v_r20 = vld1q_s32(row2 + x), v_r21 = vld1q_s32(row2 + x + 4);
 272             int32x4_t v_r30 = vld1q_s32(row3 + x), v_r31 = vld1q_s32(row3 + x + 4);
 273             int32x4_t v_r40 = vld1q_s32(row4 + x), v_r41 = vld1q_s32(row4 + x + 4);
 274
 275             v_r00 = vaddq_s32(vqaddq_s32(v_r00, v_r40), vqaddq_s32(v_r20, v_r20));
 276             v_r10 = vaddq_s32(vqaddq_s32(v_r10, v_r20), v_r30);
 277             int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r00, vshlq_n_s32(v_r10, 2)), v_delta), 8);
 278
 279             v_r01 = vaddq_s32(vqaddq_s32(v_r01, v_r41), vqaddq_s32(v_r21, v_r21));
 280             v_r11 = vaddq_s32(vqaddq_s32(v_r11, v_r21), v_r31);
 281             int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r01, vshlq_n_s32(v_r11, 2)), v_delta), 8);
 282
 283             vst1q_s16(dst + x, vcombine_s16(vqmovn_s32(v_dst0), vqmovn_s32(v_dst1)));
 284         }
 285
 286         return x;
 287     }
 288 };
 289
 290 struct PyrDownVec_32f
 291 {
 292     int operator()(float** src, float* dst, int, int width) const
 293     {
 294         int x = 0;
 295         const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
 296         float32x4_t v_4 = vdupq_n_f32(4.0f), v_scale = vdupq_n_f32(1.f/256.0f);
 297
 298         for( ; x <= width - 8; x += 8 )
 299         {
 300             float32x4_t v_r0 = vld1q_f32(row0 + x);
 301             float32x4_t v_r1 = vld1q_f32(row1 + x);
 302             float32x4_t v_r2 = vld1q_f32(row2 + x);
 303             float32x4_t v_r3 = vld1q_f32(row3 + x);
 304             float32x4_t v_r4 = vld1q_f32(row4 + x);
 305
 306             v_r0 = vaddq_f32(vaddq_f32(v_r0, v_r4), vaddq_f32(v_r2, v_r2));
 307             v_r1 = vaddq_f32(vaddq_f32(v_r1, v_r2), v_r3);
 308             vst1q_f32(dst + x, vmulq_f32(vmlaq_f32(v_r0, v_4, v_r1), v_scale));
 309
 310             v_r0 = vld1q_f32(row0 + x + 4);
 311             v_r1 = vld1q_f32(row1 + x + 4);
 312             v_r2 = vld1q_f32(row2 + x + 4);
 313             v_r3 = vld1q_f32(row3 + x + 4);
 314             v_r4 = vld1q_f32(row4 + x + 4);
 315
 316             v_r0 = vaddq_f32(vaddq_f32(v_r0, v_r4), vaddq_f32(v_r2, v_r2));
 317             v_r1 = vaddq_f32(vaddq_f32(v_r1, v_r2), v_r3);
 318             vst1q_f32(dst + x + 4, vmulq_f32(vmlaq_f32(v_r0, v_4, v_r1), v_scale));
 319         }
 320
 321         return x;
 322     }
 323 };
 324
 325 struct PyrUpVec_32f
 326 {
 327     int operator()(float** src, float* dst, int, int width) const
 328     {
 329         int x = 0;
 330         float ** dsts = (float **)dst;
 331         const float *row0 = src[0], *row1 = src[1], *row2 = src[2];
 332         float *dst0 = dsts[0], *dst1 = dsts[1];
 333         float32x4_t v_6 = vdupq_n_f32(6.0f), v_scale = vdupq_n_f32(1.f/64.0f), v_scale4 = vmulq_n_f32(v_scale, 4.0f);
 334
 335         for( ; x <= width - 8; x += 8 )
 336         {
 337             float32x4_t v_r0 = vld1q_f32(row0 + x);
 338             float32x4_t v_r1 = vld1q_f32(row1 + x);
 339             float32x4_t v_r2 = vld1q_f32(row2 + x);
 340
 341             vst1q_f32(dst1 + x, vmulq_f32(v_scale4, vaddq_f32(v_r1, v_r2)));
 342             vst1q_f32(dst0 + x, vmulq_f32(v_scale, vaddq_f32(vmlaq_f32(v_r0, v_6, v_r1), v_r2)));
 343
 344             v_r0 = vld1q_f32(row0 + x + 4);
 345             v_r1 = vld1q_f32(row1 + x + 4);
 346             v_r2 = vld1q_f32(row2 + x + 4);
 347
 348             vst1q_f32(dst1 + x + 4, vmulq_f32(v_scale4, vaddq_f32(v_r1, v_r2)));
 349             vst1q_f32(dst0 + x + 4, vmulq_f32(v_scale, vaddq_f32(vmlaq_f32(v_r0, v_6, v_r1), v_r2)));
 350         }
 351
 352         return x;
 353     }
 354 };
 355
 356 #else
 357
 358 typedef NoVec<int, uchar> PyrDownVec_32s8u;
 359 typedef NoVec<int, ushort> PyrDownVec_32s16u;
 360 typedef NoVec<int, short> PyrDownVec_32s16s;
 361 typedef NoVec<float, float> PyrDownVec_32f;
 362
 363 typedef NoVec<float, float> PyrUpVec_32f;
 364
 365 #endif
 366
 367 template<class CastOp, class VecOp> void
 368 pyrDown_( const Mat& _src, Mat& _dst, int borderType )
 369 {
 370     const int PD_SZ = 5;
 371     typedef typename CastOp::type1 WT;
 372     typedef typename CastOp::rtype T;
 373
 374     CV_Assert( !_src.empty() );
 375     Size ssize = _src.size(), dsize = _dst.size();
 376     int cn = _src.channels();
 377     int bufstep = (int)alignSize(dsize.width*cn, 16);
 378     AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
 379     WT* buf = alignPtr((WT*)_buf, 16);
 380     int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
 381     AutoBuffer<int> _tabM(dsize.width*cn);
 382     int* tabM = _tabM;
 383     WT* rows[PD_SZ];
 384     CastOp castOp;
 385     VecOp vecOp;
 386
 387     CV_Assert( ssize.width > 0 && ssize.height > 0 &&
 388                std::abs(dsize.width*2 - ssize.width) <= 2 &&
 389                std::abs(dsize.height*2 - ssize.height) <= 2 );
 390     int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
 391
 392     for( x = 0; x <= PD_SZ+1; x++ )
 393     {
 394         int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn;
 395         int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn;
 396         for( k = 0; k < cn; k++ )
 397         {
 398             tabL[x*cn + k] = sx0 + k;
 399             tabR[x*cn + k] = sx1 + k;
 400         }
 401     }
 402
 403     ssize.width *= cn;
 404     dsize.width *= cn;
 405     width0 *= cn;
 406
 407     for( x = 0; x < dsize.width; x++ )
 408         tabM[x] = (x/cn)*2*cn + x % cn;
 409
 410     for( int y = 0; y < dsize.height; y++ )
 411     {
 412         T* dst = _dst.ptr<T>(y);
 413         WT *row0, *row1, *row2, *row3, *row4;
 414
 415         // fill the ring buffer (horizontal convolution and decimation)
 416         for( ; sy <= y*2 + 2; sy++ )
 417         {
 418             WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;
 419             int _sy = borderInterpolate(sy, ssize.height, borderType);
 420             const T* src = _src.ptr<T>(_sy);
 421             int limit = cn;
 422             const int* tab = tabL;
 423
 424             for( x = 0;;)
 425             {
 426                 for( ; x < limit; x++ )
 427                 {
 428                     row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 +
 429                         src[tab[x]] + src[tab[x+cn*4]];
 430                 }
 431
 432                 if( x == dsize.width )
 433                     break;
 434
 435                 if( cn == 1 )
 436                 {
 437                     for( ; x < width0; x++ )
 438                         row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 +
 439                             src[x*2 - 2] + src[x*2 + 2];
 440                 }
 441                 else if( cn == 3 )
 442                 {
 443                     for( ; x < width0; x += 3 )
 444                     {
 445                         const T* s = src + x*2;
 446                         WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6];
 447                         WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7];
 448                         WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8];
 449                         row[x] = t0; row[x+1] = t1; row[x+2] = t2;
 450                     }
 451                 }
 452                 else if( cn == 4 )
 453                 {
 454                     for( ; x < width0; x += 4 )
 455                     {
 456                         const T* s = src + x*2;
 457                         WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8];
 458                         WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9];
 459                         row[x] = t0; row[x+1] = t1;
 460                         t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10];
 461                         t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11];
 462                         row[x+2] = t0; row[x+3] = t1;
 463                     }
 464                 }
 465                 else
 466                 {
 467                     for( ; x < width0; x++ )
 468                     {
 469                         int sx = tabM[x];
 470                         row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 +
 471                             src[sx - cn*2] + src[sx + cn*2];
 472                     }
 473                 }
 474
 475                 limit = dsize.width;
 476                 tab = tabR - x;
 477             }
 478         }
 479
 480         // do vertical convolution and decimation and write the result to the destination image
 481         for( k = 0; k < PD_SZ; k++ )
 482             rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;
 483         row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];
 484
 485         x = vecOp(rows, dst, (int)_dst.step, dsize.width);
 486         for( ; x < dsize.width; x++ )
 487             dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]);
 488     }
 489 }
 490
 491
 492 template<class CastOp, class VecOp> void
 493 pyrUp_( const Mat& _src, Mat& _dst, int)
 494 {
 495     const int PU_SZ = 3;
 496     typedef typename CastOp::type1 WT;
 497     typedef typename CastOp::rtype T;
 498
 499     Size ssize = _src.size(), dsize = _dst.size();
 500     int cn = _src.channels();
 501     int bufstep = (int)alignSize((dsize.width+1)*cn, 16);
 502     AutoBuffer<WT> _buf(bufstep*PU_SZ + 16);
 503     WT* buf = alignPtr((WT*)_buf, 16);
 504     AutoBuffer<int> _dtab(ssize.width*cn);
 505     int* dtab = _dtab;
 506     WT* rows[PU_SZ];
 507     T* dsts[2];
 508     CastOp castOp;
 509     VecOp vecOp;
 510
 511     CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 &&
 512                std::abs(dsize.height - ssize.height*2) == dsize.height % 2);
 513     int k, x, sy0 = -PU_SZ/2, sy = sy0;
 514
 515     ssize.width *= cn;
 516     dsize.width *= cn;
 517
 518     for( x = 0; x < ssize.width; x++ )
 519         dtab[x] = (x/cn)*2*cn + x % cn;
 520
 521     for( int y = 0; y < ssize.height; y++ )
 522     {
 523         T* dst0 = _dst.ptr<T>(y*2);
 524         T* dst1 = _dst.ptr<T>(std::min(y*2+1, dsize.height-1));
 525         WT *row0, *row1, *row2;
 526
 527         // fill the ring buffer (horizontal convolution and decimation)
 528         for( ; sy <= y + 1; sy++ )
 529         {
 530             WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep;
 531             int _sy = borderInterpolate(sy*2, dsize.height, BORDER_REFLECT_101)/2;
 532             const T* src = _src.ptr<T>(_sy);
 533
 534             if( ssize.width == cn )
 535             {
 536                 for( x = 0; x < cn; x++ )
 537                     row[x] = row[x + cn] = src[x]*8;
 538                 continue;
 539             }
 540
 541             for( x = 0; x < cn; x++ )
 542             {
 543                 int dx = dtab[x];
 544                 WT t0 = src[x]*6 + src[x + cn]*2;
 545                 WT t1 = (src[x] + src[x + cn])*4;
 546                 row[dx] = t0; row[dx + cn] = t1;
 547                 dx = dtab[ssize.width - cn + x];
 548                 int sx = ssize.width - cn + x;
 549                 t0 = src[sx - cn] + src[sx]*7;
 550                 t1 = src[sx]*8;
 551                 row[dx] = t0; row[dx + cn] = t1;
 552             }
 553
 554             for( x = cn; x < ssize.width - cn; x++ )
 555             {
 556                 int dx = dtab[x];
 557                 WT t0 = src[x-cn] + src[x]*6 + src[x+cn];
 558                 WT t1 = (src[x] + src[x+cn])*4;
 559                 row[dx] = t0;
 560                 row[dx+cn] = t1;
 561             }
 562         }
 563
 564         // do vertical convolution and decimation and write the result to the destination image
 565         for( k = 0; k < PU_SZ; k++ )
 566             rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep;
 567         row0 = rows[0]; row1 = rows[1]; row2 = rows[2];
 568         dsts[0] = dst0; dsts[1] = dst1;
 569
 570         x = vecOp(rows, (T*)dsts, (int)_dst.step, dsize.width);
 571         for( ; x < dsize.width; x++ )
 572         {
 573             T t1 = castOp((row1[x] + row2[x])*4);
 574             T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
 575             dst1[x] = t1; dst0[x] = t0;
 576         }
 577     }
 578 }
 579
 580 typedef void (*PyrFunc)(const Mat&, Mat&, int);
 581
 582 #ifdef HAVE_OPENCL
 583
 584 static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
 585 {
 586     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 587
 588     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 589     if (cn > 4 || (depth == CV_64F && !doubleSupport))
 590         return false;
 591
 592     Size ssize = _src.size();
 593     Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
 594     if (dsize.height < 2 || dsize.width < 2)
 595         return false;
 596
 597     CV_Assert( ssize.width > 0 && ssize.height > 0 &&
 598             std::abs(dsize.width*2 - ssize.width) <= 2 &&
 599             std::abs(dsize.height*2 - ssize.height) <= 2 );
 600
 601     UMat src = _src.getUMat();
 602     _dst.create( dsize, src.type() );
 603     UMat dst = _dst.getUMat();
 604
 605     int float_depth = depth == CV_64F ? CV_64F : CV_32F;
 606     const int local_size = 256;
 607     int kercn = 1;
 608     if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel())
 609         kercn = 4;
 610     const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
 611                                        "BORDER_REFLECT_101" };
 612     char cvt[2][50];
 613     String buildOptions = format(
 614             "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
 615             "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d",
 616             ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)),
 617             ocl::convertTypeStr(float_depth, depth, cn, cvt[0]),
 618             ocl::convertTypeStr(depth, float_depth, cn, cvt[1]),
 619             doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth),
 620             cn, kercn, float_depth, borderMap[borderType], local_size
 621     );
 622     ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
 623     if (k.empty())
 624         return false;
 625
 626     k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
 627
 628     size_t localThreads[2]  = { local_size/kercn, 1 };
 629     size_t globalThreads[2] = { (src.cols + (kercn-1))/kercn, (dst.rows + 1) / 2 };
 630     return k.run(2, globalThreads, localThreads, false);
 631 }
 632
 633 static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
 634 {
 635     int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
 636
 637     if (channels > 4 || borderType != BORDER_DEFAULT)
 638         return false;
 639
 640     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 641     if (depth == CV_64F && !doubleSupport)
 642         return false;
 643
 644     Size ssize = _src.size();
 645     if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
 646         return false;
 647
 648     UMat src = _src.getUMat();
 649     Size dsize = Size(ssize.width * 2, ssize.height * 2);
 650     _dst.create( dsize, src.type() );
 651     UMat dst = _dst.getUMat();
 652
 653     int float_depth = depth == CV_64F ? CV_64F : CV_32F;
 654     const int local_size = 16;
 655     char cvt[2][50];
 656     String buildOptions = format(
 657             "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
 658             "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d",
 659             ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
 660             ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
 661             ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
 662             doubleSupport ? " -D DOUBLE_SUPPORT" : "",
 663             ocl::typeToStr(depth), channels, local_size
 664     );
 665     size_t globalThreads[2] = { dst.cols, dst.rows };
 666     size_t localThreads[2] = { local_size, local_size };
 667     ocl::Kernel k;
 668     if (ocl::Device::getDefault().isIntel() && channels == 1)
 669     {
 670         k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions);
 671         globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2;
 672     }
 673     else
 674         k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions);
 675
 676     if (k.empty())
 677         return false;
 678
 679     k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
 680     return k.run(2, globalThreads, localThreads, false);
 681 }
 682
 683 #endif
 684
 685 }
 686
 687 void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
 688 {
 689     CV_Assert(borderType != BORDER_CONSTANT);
 690
 691     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
 692                ocl_pyrDown(_src, _dst, _dsz, borderType))
 693
 694     Mat src = _src.getMat();
 695     Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
 696     _dst.create( dsz, src.type() );
 697     Mat dst = _dst.getMat();
 698     int depth = src.depth();
 699
 700 #ifdef HAVE_TEGRA_OPTIMIZATION
 701     if(borderType == BORDER_DEFAULT && tegra::pyrDown(src, dst))
 702         return;
 703 #endif
 704
 705 #if IPP_VERSION_X100 >= 801 && 0
 706     bool isolated = (borderType & BORDER_ISOLATED) != 0;
 707     int borderTypeNI = borderType & ~BORDER_ISOLATED;
 708     if (borderTypeNI == BORDER_DEFAULT && (!src.isSubmatrix() || isolated) && dsz == Size((src.cols + 1)/2, (src.rows + 1)/2))
 709     {
 710         typedef IppStatus (CV_STDCALL * ippiPyrDown)(const void* pSrc, int srcStep, void* pDst, int dstStep, IppiSize srcRoi, Ipp8u* buffer);
 711         int type = src.type();
 712         CV_SUPPRESS_DEPRECATED_START
 713         ippiPyrDown pyrDownFunc = type == CV_8UC1 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_8u_C1R :
 714                                   type == CV_8UC3 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_8u_C3R :
 715                                   type == CV_32FC1 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_32f_C1R :
 716                                   type == CV_32FC3 ? (ippiPyrDown) ippiPyrDown_Gauss5x5_32f_C3R : 0;
 717         CV_SUPPRESS_DEPRECATED_END
 718
 719         if (pyrDownFunc)
 720         {
 721             int bufferSize;
 722             IppiSize srcRoi = { src.cols, src.rows };
 723             IppDataType dataType = depth == CV_8U ? ipp8u : ipp32f;
 724             CV_SUPPRESS_DEPRECATED_START
 725             IppStatus ok = ippiPyrDownGetBufSize_Gauss5x5(srcRoi.width, dataType, src.channels(), &bufferSize);
 726             CV_SUPPRESS_DEPRECATED_END
 727             if (ok >= 0)
 728             {
 729                 Ipp8u* buffer = ippsMalloc_8u(bufferSize);
 730                 ok = pyrDownFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer);
 731                 ippsFree(buffer);
 732
 733                 if (ok >= 0)
 734                     return;
 735                 setIppErrorStatus();
 736             }
 737         }
 738     }
 739 #endif
 740
 741     PyrFunc func = 0;
 742     if( depth == CV_8U )
 743         func = pyrDown_<FixPtCast<uchar, 8>, PyrDownVec_32s8u>;
 744     else if( depth == CV_16S )
 745         func = pyrDown_<FixPtCast<short, 8>, PyrDownVec_32s16s >;
 746     else if( depth == CV_16U )
 747         func = pyrDown_<FixPtCast<ushort, 8>, PyrDownVec_32s16u >;
 748     else if( depth == CV_32F )
 749         func = pyrDown_<FltCast<float, 8>, PyrDownVec_32f>;
 750     else if( depth == CV_64F )
 751         func = pyrDown_<FltCast<double, 8>, NoVec<double, double> >;
 752     else
 753         CV_Error( CV_StsUnsupportedFormat, "" );
 754
 755     func( src, dst, borderType );
 756 }
 757
 758 void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
 759 {
 760     CV_Assert(borderType == BORDER_DEFAULT);
 761
 762     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
 763                ocl_pyrUp(_src, _dst, _dsz, borderType))
 764
 765     Mat src = _src.getMat();
 766     Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz;
 767     _dst.create( dsz, src.type() );
 768     Mat dst = _dst.getMat();
 769     int depth = src.depth();
 770
 771 #ifdef HAVE_TEGRA_OPTIMIZATION
 772     if(borderType == BORDER_DEFAULT && tegra::pyrUp(src, dst))
 773         return;
 774 #endif
 775
 776 #if IPP_VERSION_X100 >= 801 && 0
 777     bool isolated = (borderType & BORDER_ISOLATED) != 0;
 778     int borderTypeNI = borderType & ~BORDER_ISOLATED;
 779     if (borderTypeNI == BORDER_DEFAULT && (!src.isSubmatrix() || isolated) && dsz == Size(src.cols*2, src.rows*2))
 780     {
 781         typedef IppStatus (CV_STDCALL * ippiPyrUp)(const void* pSrc, int srcStep, void* pDst, int dstStep, IppiSize srcRoi, Ipp8u* buffer);
 782         int type = src.type();
 783         CV_SUPPRESS_DEPRECATED_START
 784         ippiPyrUp pyrUpFunc = type == CV_8UC1 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_8u_C1R :
 785                               type == CV_8UC3 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_8u_C3R :
 786                               type == CV_32FC1 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_32f_C1R :
 787                               type == CV_32FC3 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_32f_C3R : 0;
 788         CV_SUPPRESS_DEPRECATED_END
 789
 790         if (pyrUpFunc)
 791         {
 792             int bufferSize;
 793             IppiSize srcRoi = { src.cols, src.rows };
 794             IppDataType dataType = depth == CV_8U ? ipp8u : ipp32f;
 795             CV_SUPPRESS_DEPRECATED_START
 796             IppStatus ok = ippiPyrUpGetBufSize_Gauss5x5(srcRoi.width, dataType, src.channels(), &bufferSize);
 797             CV_SUPPRESS_DEPRECATED_END
 798             if (ok >= 0)
 799             {
 800                 Ipp8u* buffer = ippsMalloc_8u(bufferSize);
 801                 ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer);
 802                 ippsFree(buffer);
 803
 804                 if (ok >= 0)
 805                     return;
 806                 setIppErrorStatus();
 807             }
 808         }
 809     }
 810 #endif
 811
 812     PyrFunc func = 0;
 813     if( depth == CV_8U )
 814         func = pyrUp_<FixPtCast<uchar, 6>, NoVec<int, uchar> >;
 815     else if( depth == CV_16S )
 816         func = pyrUp_<FixPtCast<short, 6>, NoVec<int, short> >;
 817     else if( depth == CV_16U )
 818         func = pyrUp_<FixPtCast<ushort, 6>, NoVec<int, ushort> >;
 819     else if( depth == CV_32F )
 820         func = pyrUp_<FltCast<float, 6>, PyrUpVec_32f >;
 821     else if( depth == CV_64F )
 822         func = pyrUp_<FltCast<double, 6>, NoVec<double, double> >;
 823     else
 824         CV_Error( CV_StsUnsupportedFormat, "" );
 825
 826     func( src, dst, borderType );
 827 }
 828
 829 void cv::buildPyramid( InputArray _src, OutputArrayOfArrays _dst, int maxlevel, int borderType )
 830 {
 831     CV_Assert(borderType != BORDER_CONSTANT);
 832
 833     if (_src.dims() <= 2 && _dst.isUMatVector())
 834     {
 835         UMat src = _src.getUMat();
 836         _dst.create( maxlevel + 1, 1, 0 );
 837         _dst.getUMatRef(0) = src;
 838         for( int i = 1; i <= maxlevel; i++ )
 839             pyrDown( _dst.getUMatRef(i-1), _dst.getUMatRef(i), Size(), borderType );
 840         return;
 841     }
 842
 843     Mat src = _src.getMat();
 844     _dst.create( maxlevel + 1, 1, 0 );
 845     _dst.getMatRef(0) = src;
 846
 847     int i=1;
 848
 849 #if IPP_VERSION_X100 >= 801 && 0
 850     bool isolated = (borderType & BORDER_ISOLATED) != 0;
 851     int borderTypeNI = borderType & ~BORDER_ISOLATED;
 852     if (borderTypeNI == BORDER_DEFAULT && (!src.isSubmatrix() || isolated))
 853     {
 854         typedef IppStatus (CV_STDCALL * ippiPyramidLayerDownInitAlloc)(void** ppState, IppiSize srcRoi, Ipp32f rate, void* pKernel, int kerSize, int mode);
 855         typedef IppStatus (CV_STDCALL * ippiPyramidLayerDown)(void* pSrc, int srcStep, IppiSize srcRoiSize, void* pDst, int dstStep, IppiSize dstRoiSize, void* pState);
 856         typedef IppStatus (CV_STDCALL * ippiPyramidLayerDownFree)(void* pState);
 857
 858         int type = src.type();
 859         int depth = src.depth();
 860         ippiPyramidLayerDownInitAlloc pyrInitAllocFunc = 0;
 861         ippiPyramidLayerDown pyrDownFunc = 0;
 862         ippiPyramidLayerDownFree pyrFreeFunc = 0;
 863
 864         if (type == CV_8UC1)
 865         {
 866             pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_8u_C1R;
 867             pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_8u_C1R;
 868             pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_8u_C1R;
 869         }
 870         else if (type == CV_8UC3)
 871         {
 872             pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_8u_C3R;
 873             pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_8u_C3R;
 874             pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_8u_C3R;
 875         }
 876         else if (type == CV_32FC1)
 877         {
 878             pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_32f_C1R;
 879             pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_32f_C1R;
 880             pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_32f_C1R;
 881         }
 882         else if (type == CV_32FC3)
 883         {
 884             pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_32f_C3R;
 885             pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_32f_C3R;
 886             pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_32f_C3R;
 887         }
 888
 889         if (pyrInitAllocFunc && pyrDownFunc && pyrFreeFunc)
 890         {
 891             float rate = 2.f;
 892             IppiSize srcRoi = { src.cols, src.rows };
 893             IppiPyramid *gPyr;
 894             IppStatus ok = ippiPyramidInitAlloc(&gPyr, maxlevel + 1, srcRoi, rate);
 895
 896             Ipp16s iKernel[5] = { 1, 4, 6, 4, 1 };
 897             Ipp32f fKernel[5] = { 1.f, 4.f, 6.f, 4.f, 1.f };
 898             void* kernel = depth >= CV_32F ? (void*) fKernel : (void*) iKernel;
 899
 900             if (ok >= 0) ok = pyrInitAllocFunc((void**) &(gPyr->pState), srcRoi, rate, kernel, 5, IPPI_INTER_LINEAR);
 901             if (ok >= 0)
 902             {
 903                 gPyr->pImage[0] = src.data;
 904                 gPyr->pStep[0] = (int) src.step;
 905                 gPyr->pRoi[0] = srcRoi;
 906                 for( ; i <= maxlevel; i++ )
 907                 {
 908                     IppiSize dstRoi;
 909                     ok = ippiGetPyramidDownROI(gPyr->pRoi[i-1], &dstRoi, rate);
 910                     Mat& dst = _dst.getMatRef(i);
 911                     dst.create(Size(dstRoi.width, dstRoi.height), type);
 912                     gPyr->pImage[i] = dst.data;
 913                     gPyr->pStep[i] = (int) dst.step;
 914                     gPyr->pRoi[i] = dstRoi;
 915
 916                     if (ok >= 0) ok = pyrDownFunc(gPyr->pImage[i-1], gPyr->pStep[i-1], gPyr->pRoi[i-1],
 917                                                   gPyr->pImage[i], gPyr->pStep[i], gPyr->pRoi[i], gPyr->pState);
 918
 919                     if (ok < 0)
 920                     {
 921                         setIppErrorStatus();
 922                         break;
 923                     }
 924                 }
 925                 pyrFreeFunc(gPyr->pState);
 926             }
 927             else
 928                 setIppErrorStatus();
 929
 930             ippiPyramidFree(gPyr);
 931         }
 932     }
 933 #endif
 934     for( ; i <= maxlevel; i++ )
 935         pyrDown( _dst.getMatRef(i-1), _dst.getMatRef(i), Size(), borderType );
 936 }
 937
 938 CV_IMPL void cvPyrDown( const void* srcarr, void* dstarr, int _filter )
 939 {
 940     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
 941
 942     CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type());
 943     cv::pyrDown( src, dst, dst.size() );
 944 }
 945
 946 CV_IMPL void cvPyrUp( const void* srcarr, void* dstarr, int _filter )
 947 {
 948     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
 949
 950     CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type());
 951     cv::pyrUp( src, dst, dst.size() );
 952 }
 953
 954
 955 CV_IMPL void
 956 cvReleasePyramid( CvMat*** _pyramid, int extra_layers )
 957 {
 958     if( !_pyramid )
 959         CV_Error( CV_StsNullPtr, "" );
 960
 961     if( *_pyramid )
 962         for( int i = 0; i <= extra_layers; i++ )
 963             cvReleaseMat( &(*_pyramid)[i] );
 964
 965     cvFree( _pyramid );
 966 }
 967
 968
 969 CV_IMPL CvMat**
 970 cvCreatePyramid( const CvArr* srcarr, int extra_layers, double rate,
 971                  const CvSize* layer_sizes, CvArr* bufarr,
 972                  int calc, int filter )
 973 {
 974     const float eps = 0.1f;
 975     uchar* ptr = 0;
 976
 977     CvMat stub, *src = cvGetMat( srcarr, &stub );
 978
 979     if( extra_layers < 0 )
 980         CV_Error( CV_StsOutOfRange, "The number of extra layers must be non negative" );
 981
 982     int i, layer_step, elem_size = CV_ELEM_SIZE(src->type);
 983     CvSize layer_size, size = cvGetMatSize(src);
 984
 985     if( bufarr )
 986     {
 987         CvMat bstub, *buf;
 988         int bufsize = 0;
 989
 990         buf = cvGetMat( bufarr, &bstub );
 991         bufsize = buf->rows*buf->cols*CV_ELEM_SIZE(buf->type);
 992         layer_size = size;
 993         for( i = 1; i <= extra_layers; i++ )
 994         {
 995             if( !layer_sizes )
 996             {
 997                 layer_size.width = cvRound(layer_size.width*rate+eps);
 998                 layer_size.height = cvRound(layer_size.height*rate+eps);
 999             }
1000             else
1001                 layer_size = layer_sizes[i-1];
1002             layer_step = layer_size.width*elem_size;
1003             bufsize -= layer_step*layer_size.height;
1004         }
1005
1006         if( bufsize < 0 )
1007             CV_Error( CV_StsOutOfRange, "The buffer is too small to fit the pyramid" );
1008         ptr = buf->data.ptr;
1009     }
1010
1011     CvMat** pyramid = (CvMat**)cvAlloc( (extra_layers+1)*sizeof(pyramid[0]) );
1012     memset( pyramid, 0, (extra_layers+1)*sizeof(pyramid[0]) );
1013
1014     pyramid[0] = cvCreateMatHeader( size.height, size.width, src->type );
1015     cvSetData( pyramid[0], src->data.ptr, src->step );
1016     layer_size = size;
1017
1018     for( i = 1; i <= extra_layers; i++ )
1019     {
1020         if( !layer_sizes )
1021         {
1022             layer_size.width = cvRound(layer_size.width*rate + eps);
1023             layer_size.height = cvRound(layer_size.height*rate + eps);
1024         }
1025         else
1026             layer_size = layer_sizes[i];
1027
1028         if( bufarr )
1029         {
1030             pyramid[i] = cvCreateMatHeader( layer_size.height, layer_size.width, src->type );
1031             layer_step = layer_size.width*elem_size;
1032             cvSetData( pyramid[i], ptr, layer_step );
1033             ptr += layer_step*layer_size.height;
1034         }
1035         else
1036             pyramid[i] = cvCreateMat( layer_size.height, layer_size.width, src->type );
1037
1038         if( calc )
1039             cvPyrDown( pyramid[i-1], pyramid[i], filter );
1040             //cvResize( pyramid[i-1], pyramid[i], CV_INTER_LINEAR );
1041     }
1042
1043     return pyramid;
1044 }
1045
1046 /* End of file. */