1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /* ////////////////////////////////////////////////////////////////////
45 // Geometrical transforms on images and matrices: rotation, zoom etc.
49 #include "precomp.hpp"
50 #include "opencl_kernels.hpp"
52 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
53 static IppStatus sts = ippInit();
58 #if defined (HAVE_IPP) && ((IPP_VERSION_MAJOR == 7 && IPP_VERSION_MINOR >= 1) || IPP_VERSION_MAJOR > 7)
59 typedef IppStatus (CV_STDCALL* ippiResizeFunc)(const void*, int, const void*, int, IppiPoint, IppiSize, IppiBorderType, void*, void*, Ipp8u*);
62 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
63 typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
64 typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
65 typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
67 template <int channels, typename Type>
68 bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
70 Type values[channels];
71 for( int i = 0; i < channels; i++ )
72 values[i] = (Type)value[i];
73 return func(values, dataPointer, step, size) >= 0;
76 bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
83 return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
85 return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
87 return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
97 return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
99 return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
101 return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
104 else if( channels == 4 )
109 return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
111 return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
113 return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
121 /************** interpolation formulas and tables ***************/
123 const int INTER_RESIZE_COEF_BITS=11;
124 const int INTER_RESIZE_COEF_SCALE=1 << INTER_RESIZE_COEF_BITS;
126 const int INTER_REMAP_COEF_BITS=15;
127 const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
129 static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
131 static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
132 static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
135 static short BilinearTab_iC4_buf[INTER_TAB_SIZE2+2][2][8];
136 static short (*BilinearTab_iC4)[2][8] = (short (*)[2][8])alignPtr(BilinearTab_iC4_buf, 16);
139 static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
140 static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
142 static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
143 static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
145 static inline void interpolateLinear( float x, float* coeffs )
151 static inline void interpolateCubic( float x, float* coeffs )
153 const float A = -0.75f;
155 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
156 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
157 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
158 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
161 static inline void interpolateLanczos4( float x, float* coeffs )
163 static const double s45 = 0.70710678118654752440084436210485;
164 static const double cs[][2]=
165 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
167 if( x < FLT_EPSILON )
169 for( int i = 0; i < 8; i++ )
176 double y0=-(x+3)*CV_PI*0.25, s0 = sin(y0), c0=cos(y0);
177 for(int i = 0; i < 8; i++ )
179 double y = -(x+3-i)*CV_PI*0.25;
180 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
185 for(int i = 0; i < 8; i++ )
189 static void initInterTab1D(int method, float* tab, int tabsz)
191 float scale = 1.f/tabsz;
192 if( method == INTER_LINEAR )
194 for( int i = 0; i < tabsz; i++, tab += 2 )
195 interpolateLinear( i*scale, tab );
197 else if( method == INTER_CUBIC )
199 for( int i = 0; i < tabsz; i++, tab += 4 )
200 interpolateCubic( i*scale, tab );
202 else if( method == INTER_LANCZOS4 )
204 for( int i = 0; i < tabsz; i++, tab += 8 )
205 interpolateLanczos4( i*scale, tab );
208 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
212 static const void* initInterTab2D( int method, bool fixpt )
214 static bool inittab[INTER_MAX+1] = {false};
218 if( method == INTER_LINEAR )
219 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
220 else if( method == INTER_CUBIC )
221 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
222 else if( method == INTER_LANCZOS4 )
223 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
225 CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
227 if( !inittab[method] )
229 AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
231 initInterTab1D(method, _tab, INTER_TAB_SIZE);
232 for( i = 0; i < INTER_TAB_SIZE; i++ )
233 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
236 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
237 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
239 for( k1 = 0; k1 < ksize; k1++ )
241 float vy = _tab[i*ksize + k1];
242 for( k2 = 0; k2 < ksize; k2++ )
244 float v = vy*_tab[j*ksize + k2];
245 tab[k1*ksize + k2] = v;
246 isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
250 if( isum != INTER_REMAP_COEF_SCALE )
252 int diff = isum - INTER_REMAP_COEF_SCALE;
253 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
254 for( k1 = ksize2; k1 < ksize2+2; k1++ )
255 for( k2 = ksize2; k2 < ksize2+2; k2++ )
257 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
259 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
263 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
265 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
268 tab -= INTER_TAB_SIZE2*ksize*ksize;
269 itab -= INTER_TAB_SIZE2*ksize*ksize;
271 if( method == INTER_LINEAR )
273 for( i = 0; i < INTER_TAB_SIZE2; i++ )
274 for( j = 0; j < 4; j++ )
276 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
277 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
278 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
279 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
283 inittab[method] = true;
285 return fixpt ? (const void*)itab : (const void*)tab;
289 static bool initAllInterTab2D()
291 return initInterTab2D( INTER_LINEAR, false ) &&
292 initInterTab2D( INTER_LINEAR, true ) &&
293 initInterTab2D( INTER_CUBIC, false ) &&
294 initInterTab2D( INTER_CUBIC, true ) &&
295 initInterTab2D( INTER_LANCZOS4, false ) &&
296 initInterTab2D( INTER_LANCZOS4, true );
299 static volatile bool doInitAllInterTab2D = initAllInterTab2D();
302 template<typename ST, typename DT> struct Cast
307 DT operator()(ST val) const { return saturate_cast<DT>(val); }
310 template<typename ST, typename DT, int bits> struct FixedPtCast
314 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
316 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
319 /****************************************************************************************\
321 \****************************************************************************************/
323 class resizeNNInvoker :
324 public ParallelLoopBody
327 resizeNNInvoker(const Mat& _src, Mat &_dst, int *_x_ofs, int _pix_size4, double _ify) :
328 ParallelLoopBody(), src(_src), dst(_dst), x_ofs(_x_ofs), pix_size4(_pix_size4),
333 virtual void operator() (const Range& range) const
335 Size ssize = src.size(), dsize = dst.size();
336 int y, x, pix_size = (int)src.elemSize();
338 for( y = range.start; y < range.end; y++ )
340 uchar* D = dst.data + dst.step*y;
341 int sy = std::min(cvFloor(y*ify), ssize.height-1);
342 const uchar* S = src.data + src.step*sy;
347 for( x = 0; x <= dsize.width - 2; x += 2 )
349 uchar t0 = S[x_ofs[x]];
350 uchar t1 = S[x_ofs[x+1]];
355 for( ; x < dsize.width; x++ )
359 for( x = 0; x < dsize.width; x++ )
360 *(ushort*)(D + x*2) = *(ushort*)(S + x_ofs[x]);
363 for( x = 0; x < dsize.width; x++, D += 3 )
365 const uchar* _tS = S + x_ofs[x];
366 D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
370 for( x = 0; x < dsize.width; x++ )
371 *(int*)(D + x*4) = *(int*)(S + x_ofs[x]);
374 for( x = 0; x < dsize.width; x++, D += 6 )
376 const ushort* _tS = (const ushort*)(S + x_ofs[x]);
377 ushort* _tD = (ushort*)D;
378 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
382 for( x = 0; x < dsize.width; x++, D += 8 )
384 const int* _tS = (const int*)(S + x_ofs[x]);
386 _tD[0] = _tS[0]; _tD[1] = _tS[1];
390 for( x = 0; x < dsize.width; x++, D += 12 )
392 const int* _tS = (const int*)(S + x_ofs[x]);
394 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
398 for( x = 0; x < dsize.width; x++, D += pix_size )
400 const int* _tS = (const int*)(S + x_ofs[x]);
402 for( int k = 0; k < pix_size4; k++ )
412 int* x_ofs, pix_size4;
415 resizeNNInvoker(const resizeNNInvoker&);
416 resizeNNInvoker& operator=(const resizeNNInvoker&);
420 resizeNN( const Mat& src, Mat& dst, double fx, double fy )
422 Size ssize = src.size(), dsize = dst.size();
423 AutoBuffer<int> _x_ofs(dsize.width);
425 int pix_size = (int)src.elemSize();
426 int pix_size4 = (int)(pix_size / sizeof(int));
427 double ifx = 1./fx, ify = 1./fy;
430 for( x = 0; x < dsize.width; x++ )
432 int sx = cvFloor(x*ifx);
433 x_ofs[x] = std::min(sx, ssize.width-1)*pix_size;
436 Range range(0, dsize.height);
437 resizeNNInvoker invoker(src, dst, x_ofs, pix_size4, ify);
438 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
444 int operator()(const uchar**, uchar*, const uchar*, int ) const { return 0; }
449 int operator()(const uchar**, uchar**, int, const int*,
450 const uchar*, int, int, int, int, int) const { return 0; }
455 struct VResizeLinearVec_32s8u
457 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
459 if( !checkHardwareSupport(CV_CPU_SSE2) )
462 const int** src = (const int**)_src;
463 const short* beta = (const short*)_beta;
464 const int *S0 = src[0], *S1 = src[1];
466 __m128i b0 = _mm_set1_epi16(beta[0]), b1 = _mm_set1_epi16(beta[1]);
467 __m128i delta = _mm_set1_epi16(2);
469 if( (((size_t)S0|(size_t)S1)&15) == 0 )
470 for( ; x <= width - 16; x += 16 )
472 __m128i x0, x1, x2, y0, y1, y2;
473 x0 = _mm_load_si128((const __m128i*)(S0 + x));
474 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
475 y0 = _mm_load_si128((const __m128i*)(S1 + x));
476 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
477 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
478 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
480 x1 = _mm_load_si128((const __m128i*)(S0 + x + 8));
481 x2 = _mm_load_si128((const __m128i*)(S0 + x + 12));
482 y1 = _mm_load_si128((const __m128i*)(S1 + x + 8));
483 y2 = _mm_load_si128((const __m128i*)(S1 + x + 12));
484 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
485 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
487 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
488 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
490 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
491 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
492 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
495 for( ; x <= width - 16; x += 16 )
497 __m128i x0, x1, x2, y0, y1, y2;
498 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
499 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
500 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
501 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
502 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
503 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
505 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 8));
506 x2 = _mm_loadu_si128((const __m128i*)(S0 + x + 12));
507 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 8));
508 y2 = _mm_loadu_si128((const __m128i*)(S1 + x + 12));
509 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
510 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
512 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
513 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
515 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
516 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
517 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
520 for( ; x < width - 4; x += 4 )
523 x0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S0 + x)), 4);
524 y0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S1 + x)), 4);
525 x0 = _mm_packs_epi32(x0, x0);
526 y0 = _mm_packs_epi32(y0, y0);
527 x0 = _mm_adds_epi16(_mm_mulhi_epi16(x0, b0), _mm_mulhi_epi16(y0, b1));
528 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
529 x0 = _mm_packus_epi16(x0, x0);
530 *(int*)(dst + x) = _mm_cvtsi128_si32(x0);
538 template<int shiftval> struct VResizeLinearVec_32f16
540 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
542 if( !checkHardwareSupport(CV_CPU_SSE2) )
545 const float** src = (const float**)_src;
546 const float* beta = (const float*)_beta;
547 const float *S0 = src[0], *S1 = src[1];
548 ushort* dst = (ushort*)_dst;
551 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
552 __m128i preshift = _mm_set1_epi32(shiftval);
553 __m128i postshift = _mm_set1_epi16((short)shiftval);
555 if( (((size_t)S0|(size_t)S1)&15) == 0 )
556 for( ; x <= width - 16; x += 16 )
558 __m128 x0, x1, y0, y1;
560 x0 = _mm_load_ps(S0 + x);
561 x1 = _mm_load_ps(S0 + x + 4);
562 y0 = _mm_load_ps(S1 + x);
563 y1 = _mm_load_ps(S1 + x + 4);
565 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
566 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
567 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
568 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
569 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
571 x0 = _mm_load_ps(S0 + x + 8);
572 x1 = _mm_load_ps(S0 + x + 12);
573 y0 = _mm_load_ps(S1 + x + 8);
574 y1 = _mm_load_ps(S1 + x + 12);
576 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
577 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
578 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
579 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
580 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
582 _mm_storeu_si128( (__m128i*)(dst + x), t0);
583 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
586 for( ; x <= width - 16; x += 16 )
588 __m128 x0, x1, y0, y1;
590 x0 = _mm_loadu_ps(S0 + x);
591 x1 = _mm_loadu_ps(S0 + x + 4);
592 y0 = _mm_loadu_ps(S1 + x);
593 y1 = _mm_loadu_ps(S1 + x + 4);
595 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
596 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
597 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
598 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
599 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
601 x0 = _mm_loadu_ps(S0 + x + 8);
602 x1 = _mm_loadu_ps(S0 + x + 12);
603 y0 = _mm_loadu_ps(S1 + x + 8);
604 y1 = _mm_loadu_ps(S1 + x + 12);
606 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
607 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
608 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
609 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
610 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
612 _mm_storeu_si128( (__m128i*)(dst + x), t0);
613 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
616 for( ; x < width - 4; x += 4 )
620 x0 = _mm_loadu_ps(S0 + x);
621 y0 = _mm_loadu_ps(S1 + x);
623 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
624 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
625 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t0), postshift);
626 _mm_storel_epi64( (__m128i*)(dst + x), t0);
633 typedef VResizeLinearVec_32f16<SHRT_MIN> VResizeLinearVec_32f16u;
634 typedef VResizeLinearVec_32f16<0> VResizeLinearVec_32f16s;
636 struct VResizeLinearVec_32f
638 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
640 if( !checkHardwareSupport(CV_CPU_SSE) )
643 const float** src = (const float**)_src;
644 const float* beta = (const float*)_beta;
645 const float *S0 = src[0], *S1 = src[1];
646 float* dst = (float*)_dst;
649 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
651 if( (((size_t)S0|(size_t)S1)&15) == 0 )
652 for( ; x <= width - 8; x += 8 )
654 __m128 x0, x1, y0, y1;
655 x0 = _mm_load_ps(S0 + x);
656 x1 = _mm_load_ps(S0 + x + 4);
657 y0 = _mm_load_ps(S1 + x);
658 y1 = _mm_load_ps(S1 + x + 4);
660 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
661 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
663 _mm_storeu_ps( dst + x, x0);
664 _mm_storeu_ps( dst + x + 4, x1);
667 for( ; x <= width - 8; x += 8 )
669 __m128 x0, x1, y0, y1;
670 x0 = _mm_loadu_ps(S0 + x);
671 x1 = _mm_loadu_ps(S0 + x + 4);
672 y0 = _mm_loadu_ps(S1 + x);
673 y1 = _mm_loadu_ps(S1 + x + 4);
675 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
676 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
678 _mm_storeu_ps( dst + x, x0);
679 _mm_storeu_ps( dst + x + 4, x1);
687 struct VResizeCubicVec_32s8u
689 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
691 if( !checkHardwareSupport(CV_CPU_SSE2) )
694 const int** src = (const int**)_src;
695 const short* beta = (const short*)_beta;
696 const int *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
698 float scale = 1.f/(INTER_RESIZE_COEF_SCALE*INTER_RESIZE_COEF_SCALE);
699 __m128 b0 = _mm_set1_ps(beta[0]*scale), b1 = _mm_set1_ps(beta[1]*scale),
700 b2 = _mm_set1_ps(beta[2]*scale), b3 = _mm_set1_ps(beta[3]*scale);
702 if( (((size_t)S0|(size_t)S1|(size_t)S2|(size_t)S3)&15) == 0 )
703 for( ; x <= width - 8; x += 8 )
705 __m128i x0, x1, y0, y1;
706 __m128 s0, s1, f0, f1;
707 x0 = _mm_load_si128((const __m128i*)(S0 + x));
708 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
709 y0 = _mm_load_si128((const __m128i*)(S1 + x));
710 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
712 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
713 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
714 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
715 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
716 s0 = _mm_add_ps(s0, f0);
717 s1 = _mm_add_ps(s1, f1);
719 x0 = _mm_load_si128((const __m128i*)(S2 + x));
720 x1 = _mm_load_si128((const __m128i*)(S2 + x + 4));
721 y0 = _mm_load_si128((const __m128i*)(S3 + x));
722 y1 = _mm_load_si128((const __m128i*)(S3 + x + 4));
724 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
725 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
726 s0 = _mm_add_ps(s0, f0);
727 s1 = _mm_add_ps(s1, f1);
728 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
729 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
730 s0 = _mm_add_ps(s0, f0);
731 s1 = _mm_add_ps(s1, f1);
733 x0 = _mm_cvtps_epi32(s0);
734 x1 = _mm_cvtps_epi32(s1);
736 x0 = _mm_packs_epi32(x0, x1);
737 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
740 for( ; x <= width - 8; x += 8 )
742 __m128i x0, x1, y0, y1;
743 __m128 s0, s1, f0, f1;
744 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
745 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
746 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
747 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
749 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
750 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
751 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
752 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
753 s0 = _mm_add_ps(s0, f0);
754 s1 = _mm_add_ps(s1, f1);
756 x0 = _mm_loadu_si128((const __m128i*)(S2 + x));
757 x1 = _mm_loadu_si128((const __m128i*)(S2 + x + 4));
758 y0 = _mm_loadu_si128((const __m128i*)(S3 + x));
759 y1 = _mm_loadu_si128((const __m128i*)(S3 + x + 4));
761 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
762 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
763 s0 = _mm_add_ps(s0, f0);
764 s1 = _mm_add_ps(s1, f1);
765 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
766 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
767 s0 = _mm_add_ps(s0, f0);
768 s1 = _mm_add_ps(s1, f1);
770 x0 = _mm_cvtps_epi32(s0);
771 x1 = _mm_cvtps_epi32(s1);
773 x0 = _mm_packs_epi32(x0, x1);
774 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
782 template<int shiftval> struct VResizeCubicVec_32f16
784 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
786 if( !checkHardwareSupport(CV_CPU_SSE2) )
789 const float** src = (const float**)_src;
790 const float* beta = (const float*)_beta;
791 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
792 ushort* dst = (ushort*)_dst;
794 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
795 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
796 __m128i preshift = _mm_set1_epi32(shiftval);
797 __m128i postshift = _mm_set1_epi16((short)shiftval);
799 for( ; x <= width - 8; x += 8 )
801 __m128 x0, x1, y0, y1, s0, s1;
803 x0 = _mm_loadu_ps(S0 + x);
804 x1 = _mm_loadu_ps(S0 + x + 4);
805 y0 = _mm_loadu_ps(S1 + x);
806 y1 = _mm_loadu_ps(S1 + x + 4);
808 s0 = _mm_mul_ps(x0, b0);
809 s1 = _mm_mul_ps(x1, b0);
810 y0 = _mm_mul_ps(y0, b1);
811 y1 = _mm_mul_ps(y1, b1);
812 s0 = _mm_add_ps(s0, y0);
813 s1 = _mm_add_ps(s1, y1);
815 x0 = _mm_loadu_ps(S2 + x);
816 x1 = _mm_loadu_ps(S2 + x + 4);
817 y0 = _mm_loadu_ps(S3 + x);
818 y1 = _mm_loadu_ps(S3 + x + 4);
820 x0 = _mm_mul_ps(x0, b2);
821 x1 = _mm_mul_ps(x1, b2);
822 y0 = _mm_mul_ps(y0, b3);
823 y1 = _mm_mul_ps(y1, b3);
824 s0 = _mm_add_ps(s0, x0);
825 s1 = _mm_add_ps(s1, x1);
826 s0 = _mm_add_ps(s0, y0);
827 s1 = _mm_add_ps(s1, y1);
829 t0 = _mm_add_epi32(_mm_cvtps_epi32(s0), preshift);
830 t1 = _mm_add_epi32(_mm_cvtps_epi32(s1), preshift);
832 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t1), postshift);
833 _mm_storeu_si128( (__m128i*)(dst + x), t0);
840 typedef VResizeCubicVec_32f16<SHRT_MIN> VResizeCubicVec_32f16u;
841 typedef VResizeCubicVec_32f16<0> VResizeCubicVec_32f16s;
843 struct VResizeCubicVec_32f
845 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
847 if( !checkHardwareSupport(CV_CPU_SSE) )
850 const float** src = (const float**)_src;
851 const float* beta = (const float*)_beta;
852 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
853 float* dst = (float*)_dst;
855 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
856 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
858 for( ; x <= width - 8; x += 8 )
860 __m128 x0, x1, y0, y1, s0, s1;
861 x0 = _mm_loadu_ps(S0 + x);
862 x1 = _mm_loadu_ps(S0 + x + 4);
863 y0 = _mm_loadu_ps(S1 + x);
864 y1 = _mm_loadu_ps(S1 + x + 4);
866 s0 = _mm_mul_ps(x0, b0);
867 s1 = _mm_mul_ps(x1, b0);
868 y0 = _mm_mul_ps(y0, b1);
869 y1 = _mm_mul_ps(y1, b1);
870 s0 = _mm_add_ps(s0, y0);
871 s1 = _mm_add_ps(s1, y1);
873 x0 = _mm_loadu_ps(S2 + x);
874 x1 = _mm_loadu_ps(S2 + x + 4);
875 y0 = _mm_loadu_ps(S3 + x);
876 y1 = _mm_loadu_ps(S3 + x + 4);
878 x0 = _mm_mul_ps(x0, b2);
879 x1 = _mm_mul_ps(x1, b2);
880 y0 = _mm_mul_ps(y0, b3);
881 y1 = _mm_mul_ps(y1, b3);
882 s0 = _mm_add_ps(s0, x0);
883 s1 = _mm_add_ps(s1, x1);
884 s0 = _mm_add_ps(s0, y0);
885 s1 = _mm_add_ps(s1, y1);
887 _mm_storeu_ps( dst + x, s0);
888 _mm_storeu_ps( dst + x + 4, s1);
897 typedef VResizeNoVec VResizeLinearVec_32s8u;
898 typedef VResizeNoVec VResizeLinearVec_32f16u;
899 typedef VResizeNoVec VResizeLinearVec_32f16s;
900 typedef VResizeNoVec VResizeLinearVec_32f;
902 typedef VResizeNoVec VResizeCubicVec_32s8u;
903 typedef VResizeNoVec VResizeCubicVec_32f16u;
904 typedef VResizeNoVec VResizeCubicVec_32f16s;
905 typedef VResizeNoVec VResizeCubicVec_32f;
909 typedef HResizeNoVec HResizeLinearVec_8u32s;
910 typedef HResizeNoVec HResizeLinearVec_16u32f;
911 typedef HResizeNoVec HResizeLinearVec_16s32f;
912 typedef HResizeNoVec HResizeLinearVec_32f;
913 typedef HResizeNoVec HResizeLinearVec_64f;
916 template<typename T, typename WT, typename AT, int ONE, class VecOp>
919 typedef T value_type;
921 typedef AT alpha_type;
923 void operator()(const T** src, WT** dst, int count,
924 const int* xofs, const AT* alpha,
925 int swidth, int dwidth, int cn, int xmin, int xmax ) const
930 int dx0 = vecOp((const uchar**)src, (uchar**)dst, count,
931 xofs, (const uchar*)alpha, swidth, dwidth, cn, xmin, xmax );
933 for( k = 0; k <= count - 2; k++ )
935 const T *S0 = src[k], *S1 = src[k+1];
936 WT *D0 = dst[k], *D1 = dst[k+1];
937 for( dx = dx0; dx < xmax; dx++ )
940 WT a0 = alpha[dx*2], a1 = alpha[dx*2+1];
941 WT t0 = S0[sx]*a0 + S0[sx + cn]*a1;
942 WT t1 = S1[sx]*a0 + S1[sx + cn]*a1;
943 D0[dx] = t0; D1[dx] = t1;
946 for( ; dx < dwidth; dx++ )
949 D0[dx] = WT(S0[sx]*ONE); D1[dx] = WT(S1[sx]*ONE);
953 for( ; k < count; k++ )
957 for( dx = 0; dx < xmax; dx++ )
960 D[dx] = S[sx]*alpha[dx*2] + S[sx+cn]*alpha[dx*2+1];
963 for( ; dx < dwidth; dx++ )
964 D[dx] = WT(S[xofs[dx]]*ONE);
970 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
973 typedef T value_type;
975 typedef AT alpha_type;
977 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
979 WT b0 = beta[0], b1 = beta[1];
980 const WT *S0 = src[0], *S1 = src[1];
984 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
985 #if CV_ENABLE_UNROLLED
986 for( ; x <= width - 4; x += 4 )
989 t0 = S0[x]*b0 + S1[x]*b1;
990 t1 = S0[x+1]*b0 + S1[x+1]*b1;
991 dst[x] = castOp(t0); dst[x+1] = castOp(t1);
992 t0 = S0[x+2]*b0 + S1[x+2]*b1;
993 t1 = S0[x+3]*b0 + S1[x+3]*b1;
994 dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
997 for( ; x < width; x++ )
998 dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
1003 struct VResizeLinear<uchar, int, short, FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>, VResizeLinearVec_32s8u>
1005 typedef uchar value_type;
1006 typedef int buf_type;
1007 typedef short alpha_type;
1009 void operator()(const buf_type** src, value_type* dst, const alpha_type* beta, int width ) const
1011 alpha_type b0 = beta[0], b1 = beta[1];
1012 const buf_type *S0 = src[0], *S1 = src[1];
1013 VResizeLinearVec_32s8u vecOp;
1015 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1016 #if CV_ENABLE_UNROLLED
1017 for( ; x <= width - 4; x += 4 )
1019 dst[x+0] = uchar(( ((b0 * (S0[x+0] >> 4)) >> 16) + ((b1 * (S1[x+0] >> 4)) >> 16) + 2)>>2);
1020 dst[x+1] = uchar(( ((b0 * (S0[x+1] >> 4)) >> 16) + ((b1 * (S1[x+1] >> 4)) >> 16) + 2)>>2);
1021 dst[x+2] = uchar(( ((b0 * (S0[x+2] >> 4)) >> 16) + ((b1 * (S1[x+2] >> 4)) >> 16) + 2)>>2);
1022 dst[x+3] = uchar(( ((b0 * (S0[x+3] >> 4)) >> 16) + ((b1 * (S1[x+3] >> 4)) >> 16) + 2)>>2);
1025 for( ; x < width; x++ )
1026 dst[x] = uchar(( ((b0 * (S0[x] >> 4)) >> 16) + ((b1 * (S1[x] >> 4)) >> 16) + 2)>>2);
1031 template<typename T, typename WT, typename AT>
1034 typedef T value_type;
1035 typedef WT buf_type;
1036 typedef AT alpha_type;
1038 void operator()(const T** src, WT** dst, int count,
1039 const int* xofs, const AT* alpha,
1040 int swidth, int dwidth, int cn, int xmin, int xmax ) const
1042 for( int k = 0; k < count; k++ )
1044 const T *S = src[k];
1046 int dx = 0, limit = xmin;
1049 for( ; dx < limit; dx++, alpha += 4 )
1051 int j, sx = xofs[dx] - cn;
1053 for( j = 0; j < 4; j++ )
1055 int sxj = sx + j*cn;
1056 if( (unsigned)sxj >= (unsigned)swidth )
1060 while( sxj >= swidth )
1063 v += S[sxj]*alpha[j];
1067 if( limit == dwidth )
1069 for( ; dx < xmax; dx++, alpha += 4 )
1072 D[dx] = S[sx-cn]*alpha[0] + S[sx]*alpha[1] +
1073 S[sx+cn]*alpha[2] + S[sx+cn*2]*alpha[3];
1083 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1086 typedef T value_type;
1087 typedef WT buf_type;
1088 typedef AT alpha_type;
1090 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1092 WT b0 = beta[0], b1 = beta[1], b2 = beta[2], b3 = beta[3];
1093 const WT *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
1097 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1098 for( ; x < width; x++ )
1099 dst[x] = castOp(S0[x]*b0 + S1[x]*b1 + S2[x]*b2 + S3[x]*b3);
1104 template<typename T, typename WT, typename AT>
1105 struct HResizeLanczos4
1107 typedef T value_type;
1108 typedef WT buf_type;
1109 typedef AT alpha_type;
1111 void operator()(const T** src, WT** dst, int count,
1112 const int* xofs, const AT* alpha,
1113 int swidth, int dwidth, int cn, int xmin, int xmax ) const
1115 for( int k = 0; k < count; k++ )
1117 const T *S = src[k];
1119 int dx = 0, limit = xmin;
1122 for( ; dx < limit; dx++, alpha += 8 )
1124 int j, sx = xofs[dx] - cn*3;
1126 for( j = 0; j < 8; j++ )
1128 int sxj = sx + j*cn;
1129 if( (unsigned)sxj >= (unsigned)swidth )
1133 while( sxj >= swidth )
1136 v += S[sxj]*alpha[j];
1140 if( limit == dwidth )
1142 for( ; dx < xmax; dx++, alpha += 8 )
1145 D[dx] = S[sx-cn*3]*alpha[0] + S[sx-cn*2]*alpha[1] +
1146 S[sx-cn]*alpha[2] + S[sx]*alpha[3] +
1147 S[sx+cn]*alpha[4] + S[sx+cn*2]*alpha[5] +
1148 S[sx+cn*3]*alpha[6] + S[sx+cn*4]*alpha[7];
1158 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1159 struct VResizeLanczos4
1161 typedef T value_type;
1162 typedef WT buf_type;
1163 typedef AT alpha_type;
1165 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1169 int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1170 #if CV_ENABLE_UNROLLED
1171 for( ; x <= width - 4; x += 4 )
1174 const WT* S = src[0];
1175 WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
1177 for( k = 1; k < 8; k++ )
1179 b = beta[k]; S = src[k];
1180 s0 += S[x]*b; s1 += S[x+1]*b;
1181 s2 += S[x+2]*b; s3 += S[x+3]*b;
1184 dst[x] = castOp(s0); dst[x+1] = castOp(s1);
1185 dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
1188 for( ; x < width; x++ )
1190 dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
1191 src[2][x]*beta[2] + src[3][x]*beta[3] + src[4][x]*beta[4] +
1192 src[5][x]*beta[5] + src[6][x]*beta[6] + src[7][x]*beta[7]);
1198 static inline int clip(int x, int a, int b)
1200 return x >= a ? (x < b ? x : b-1) : a;
1203 static const int MAX_ESIZE=16;
1205 template <typename HResize, typename VResize>
1206 class resizeGeneric_Invoker :
1207 public ParallelLoopBody
1210 typedef typename HResize::value_type T;
1211 typedef typename HResize::buf_type WT;
1212 typedef typename HResize::alpha_type AT;
1214 resizeGeneric_Invoker(const Mat& _src, Mat &_dst, const int *_xofs, const int *_yofs,
1215 const AT* _alpha, const AT* __beta, const Size& _ssize, const Size &_dsize,
1216 int _ksize, int _xmin, int _xmax) :
1217 ParallelLoopBody(), src(_src), dst(_dst), xofs(_xofs), yofs(_yofs),
1218 alpha(_alpha), _beta(__beta), ssize(_ssize), dsize(_dsize),
1219 ksize(_ksize), xmin(_xmin), xmax(_xmax)
1223 virtual void operator() (const Range& range) const
1225 int dy, cn = src.channels();
1229 int bufstep = (int)alignSize(dsize.width, 16);
1230 AutoBuffer<WT> _buffer(bufstep*ksize);
1231 const T* srows[MAX_ESIZE]={0};
1232 WT* rows[MAX_ESIZE]={0};
1233 int prev_sy[MAX_ESIZE];
1235 for(int k = 0; k < ksize; k++ )
1238 rows[k] = (WT*)_buffer + bufstep*k;
1241 const AT* beta = _beta + ksize * range.start;
1243 for( dy = range.start; dy < range.end; dy++, beta += ksize )
1245 int sy0 = yofs[dy], k0=ksize, k1=0, ksize2 = ksize/2;
1247 for(int k = 0; k < ksize; k++ )
1249 int sy = clip(sy0 - ksize2 + 1 + k, 0, ssize.height);
1250 for( k1 = std::max(k1, k); k1 < ksize; k1++ )
1252 if( sy == prev_sy[k1] ) // if the sy-th row has been computed already, reuse it.
1255 memcpy( rows[k], rows[k1], bufstep*sizeof(rows[0][0]) );
1260 k0 = std::min(k0, k); // remember the first row that needs to be computed
1261 srows[k] = (T*)(src.data + src.step*sy);
1266 hresize( (const T**)(srows + k0), (WT**)(rows + k0), ksize - k0, xofs, (const AT*)(alpha),
1267 ssize.width, dsize.width, cn, xmin, xmax );
1268 vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
1275 const int* xofs, *yofs;
1276 const AT* alpha, *_beta;
1278 int ksize, xmin, xmax;
1281 template<class HResize, class VResize>
1282 static void resizeGeneric_( const Mat& src, Mat& dst,
1283 const int* xofs, const void* _alpha,
1284 const int* yofs, const void* _beta,
1285 int xmin, int xmax, int ksize )
1287 typedef typename HResize::alpha_type AT;
1289 const AT* beta = (const AT*)_beta;
1290 Size ssize = src.size(), dsize = dst.size();
1291 int cn = src.channels();
1296 // image resize is a separable operation. In case of not too strong
1298 Range range(0, dsize.height);
1299 resizeGeneric_Invoker<HResize, VResize> invoker(src, dst, xofs, yofs, (const AT*)_alpha, beta,
1300 ssize, dsize, ksize, xmin, xmax);
1301 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1304 template <typename T, typename WT>
1305 struct ResizeAreaFastNoVec
1307 ResizeAreaFastNoVec(int, int) { }
1308 ResizeAreaFastNoVec(int, int, int, int) { }
1309 int operator() (const T*, T*, int) const
1314 class ResizeAreaFastVec_SIMD_8u
1317 ResizeAreaFastVec_SIMD_8u(int _cn, int _step) :
1318 cn(_cn), step(_step)
1320 use_simd = checkHardwareSupport(CV_CPU_SSE2);
1323 int operator() (const uchar* S, uchar* D, int w) const
1329 const uchar* S0 = S;
1330 const uchar* S1 = S0 + step;
1331 __m128i zero = _mm_setzero_si128();
1332 __m128i delta2 = _mm_set1_epi16(2);
1336 __m128i masklow = _mm_set1_epi16(0x00ff);
1337 for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
1339 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1340 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1342 __m128i s0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_and_si128(r0, masklow));
1343 __m128i s1 = _mm_add_epi16(_mm_srli_epi16(r1, 8), _mm_and_si128(r1, masklow));
1344 s0 = _mm_add_epi16(_mm_add_epi16(s0, s1), delta2);
1345 s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
1347 _mm_storel_epi64((__m128i*)D, s0);
1351 for ( ; dx <= w - 11; dx += 6, S0 += 12, S1 += 12, D += 6)
1353 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1354 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1356 __m128i r0_16l = _mm_unpacklo_epi8(r0, zero);
1357 __m128i r0_16h = _mm_unpacklo_epi8(_mm_srli_si128(r0, 6), zero);
1358 __m128i r1_16l = _mm_unpacklo_epi8(r1, zero);
1359 __m128i r1_16h = _mm_unpacklo_epi8(_mm_srli_si128(r1, 6), zero);
1361 __m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 6));
1362 __m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 6));
1363 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1364 s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
1365 _mm_storel_epi64((__m128i*)D, s0);
1367 s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 6));
1368 s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 6));
1369 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1370 s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
1371 _mm_storel_epi64((__m128i*)(D+3), s0);
1376 int v[] = { 0, 0, -1, -1 };
1377 __m128i mask = _mm_loadu_si128((const __m128i*)v);
1379 for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
1381 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1382 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1384 __m128i r0_16l = _mm_unpacklo_epi8(r0, zero);
1385 __m128i r0_16h = _mm_unpackhi_epi8(r0, zero);
1386 __m128i r1_16l = _mm_unpacklo_epi8(r1, zero);
1387 __m128i r1_16h = _mm_unpackhi_epi8(r1, zero);
1389 __m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 8));
1390 __m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 8));
1391 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1392 __m128i res0 = _mm_srli_epi16(s0, 2);
1394 s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 8));
1395 s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 8));
1396 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1397 __m128i res1 = _mm_srli_epi16(s0, 2);
1398 s0 = _mm_packus_epi16(_mm_or_si128(_mm_andnot_si128(mask, res0),
1399 _mm_and_si128(mask, _mm_slli_si128(res1, 8))), zero);
1400 _mm_storel_epi64((__m128i*)(D), s0);
1413 class ResizeAreaFastVec_SIMD_16u
1416 ResizeAreaFastVec_SIMD_16u(int _cn, int _step) :
1417 cn(_cn), step(_step)
1419 use_simd = checkHardwareSupport(CV_CPU_SSE2);
1422 int operator() (const ushort* S, ushort* D, int w) const
1428 const ushort* S0 = (const ushort*)S;
1429 const ushort* S1 = (const ushort*)((const uchar*)(S) + step);
1430 __m128i masklow = _mm_set1_epi32(0x0000ffff);
1431 __m128i zero = _mm_setzero_si128();
1432 __m128i delta2 = _mm_set1_epi32(2);
1434 #define _mm_packus_epi32(a, zero) _mm_packs_epi32(_mm_srai_epi32(_mm_slli_epi32(a, 16), 16), zero)
1438 for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
1440 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1441 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1443 __m128i s0 = _mm_add_epi32(_mm_srli_epi32(r0, 16), _mm_and_si128(r0, masklow));
1444 __m128i s1 = _mm_add_epi32(_mm_srli_epi32(r1, 16), _mm_and_si128(r1, masklow));
1445 s0 = _mm_add_epi32(_mm_add_epi32(s0, s1), delta2);
1446 s0 = _mm_srli_epi32(s0, 2);
1447 s0 = _mm_packus_epi32(s0, zero);
1449 _mm_storel_epi64((__m128i*)D, s0);
1453 for ( ; dx <= w - 4; dx += 3, S0 += 6, S1 += 6, D += 3)
1455 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1456 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1458 __m128i r0_16l = _mm_unpacklo_epi16(r0, zero);
1459 __m128i r0_16h = _mm_unpacklo_epi16(_mm_srli_si128(r0, 6), zero);
1460 __m128i r1_16l = _mm_unpacklo_epi16(r1, zero);
1461 __m128i r1_16h = _mm_unpacklo_epi16(_mm_srli_si128(r1, 6), zero);
1463 __m128i s0 = _mm_add_epi32(r0_16l, r0_16h);
1464 __m128i s1 = _mm_add_epi32(r1_16l, r1_16h);
1465 s0 = _mm_add_epi32(delta2, _mm_add_epi32(s0, s1));
1466 s0 = _mm_packus_epi32(_mm_srli_epi32(s0, 2), zero);
1467 _mm_storel_epi64((__m128i*)D, s0);
1472 for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
1474 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1475 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1477 __m128i r0_32l = _mm_unpacklo_epi16(r0, zero);
1478 __m128i r0_32h = _mm_unpackhi_epi16(r0, zero);
1479 __m128i r1_32l = _mm_unpacklo_epi16(r1, zero);
1480 __m128i r1_32h = _mm_unpackhi_epi16(r1, zero);
1482 __m128i s0 = _mm_add_epi32(r0_32l, r0_32h);
1483 __m128i s1 = _mm_add_epi32(r1_32l, r1_32h);
1484 s0 = _mm_add_epi32(s1, _mm_add_epi32(s0, delta2));
1485 s0 = _mm_packus_epi32(_mm_srli_epi32(s0, 2), zero);
1486 _mm_storel_epi64((__m128i*)D, s0);
1490 #undef _mm_packus_epi32
1502 typedef ResizeAreaFastNoVec<uchar, uchar> ResizeAreaFastVec_SIMD_8u;
1503 typedef ResizeAreaFastNoVec<ushort, ushort> ResizeAreaFastVec_SIMD_16u;
1506 template<typename T, typename SIMDVecOp>
1507 struct ResizeAreaFastVec
1509 ResizeAreaFastVec(int _scale_x, int _scale_y, int _cn, int _step) :
1510 scale_x(_scale_x), scale_y(_scale_y), cn(_cn), step(_step), vecOp(_cn, _step)
1512 fast_mode = scale_x == 2 && scale_y == 2 && (cn == 1 || cn == 3 || cn == 4);
1515 int operator() (const T* S, T* D, int w) const
1520 const T* nextS = (const T*)((const uchar*)S + step);
1521 int dx = vecOp(S, D, w);
1524 for( ; dx < w; ++dx )
1527 D[dx] = (T)((S[index] + S[index+1] + nextS[index] + nextS[index+1] + 2) >> 2);
1530 for( ; dx < w; dx += 3 )
1533 D[dx] = (T)((S[index] + S[index+3] + nextS[index] + nextS[index+3] + 2) >> 2);
1534 D[dx+1] = (T)((S[index+1] + S[index+4] + nextS[index+1] + nextS[index+4] + 2) >> 2);
1535 D[dx+2] = (T)((S[index+2] + S[index+5] + nextS[index+2] + nextS[index+5] + 2) >> 2);
1540 for( ; dx < w; dx += 4 )
1543 D[dx] = (T)((S[index] + S[index+4] + nextS[index] + nextS[index+4] + 2) >> 2);
1544 D[dx+1] = (T)((S[index+1] + S[index+5] + nextS[index+1] + nextS[index+5] + 2) >> 2);
1545 D[dx+2] = (T)((S[index+2] + S[index+6] + nextS[index+2] + nextS[index+6] + 2) >> 2);
1546 D[dx+3] = (T)((S[index+3] + S[index+7] + nextS[index+3] + nextS[index+7] + 2) >> 2);
1554 int scale_x, scale_y;
1561 template <typename T, typename WT, typename VecOp>
1562 class resizeAreaFast_Invoker :
1563 public ParallelLoopBody
1566 resizeAreaFast_Invoker(const Mat &_src, Mat &_dst,
1567 int _scale_x, int _scale_y, const int* _ofs, const int* _xofs) :
1568 ParallelLoopBody(), src(_src), dst(_dst), scale_x(_scale_x),
1569 scale_y(_scale_y), ofs(_ofs), xofs(_xofs)
1573 virtual void operator() (const Range& range) const
1575 Size ssize = src.size(), dsize = dst.size();
1576 int cn = src.channels();
1577 int area = scale_x*scale_y;
1578 float scale = 1.f/(area);
1579 int dwidth1 = (ssize.width/scale_x)*cn;
1584 VecOp vop(scale_x, scale_y, src.channels(), (int)src.step/*, area_ofs*/);
1586 for( dy = range.start; dy < range.end; dy++ )
1588 T* D = (T*)(dst.data + dst.step*dy);
1589 int sy0 = dy*scale_y;
1590 int w = sy0 + scale_y <= ssize.height ? dwidth1 : 0;
1592 if( sy0 >= ssize.height )
1594 for( dx = 0; dx < dsize.width; dx++ )
1599 dx = vop((const T*)(src.data + src.step * sy0), D, w);
1600 for( ; dx < w; dx++ )
1602 const T* S = (const T*)(src.data + src.step * sy0) + xofs[dx];
1605 #if CV_ENABLE_UNROLLED
1606 for( ; k <= area - 4; k += 4 )
1607 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
1609 for( ; k < area; k++ )
1612 D[dx] = saturate_cast<T>(sum * scale);
1615 for( ; dx < dsize.width; dx++ )
1618 int count = 0, sx0 = xofs[dx];
1619 if( sx0 >= ssize.width )
1622 for( int sy = 0; sy < scale_y; sy++ )
1624 if( sy0 + sy >= ssize.height )
1626 const T* S = (const T*)(src.data + src.step*(sy0 + sy)) + sx0;
1627 for( int sx = 0; sx < scale_x*cn; sx += cn )
1629 if( sx0 + sx >= ssize.width )
1636 D[dx] = saturate_cast<T>((float)sum/count);
1644 int scale_x, scale_y;
1645 const int *ofs, *xofs;
1648 template<typename T, typename WT, typename VecOp>
1649 static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int* xofs,
1650 int scale_x, int scale_y )
1652 Range range(0, dst.rows);
1653 resizeAreaFast_Invoker<T, WT, VecOp> invoker(src, dst, scale_x,
1654 scale_y, ofs, xofs);
1655 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1658 struct DecimateAlpha
1665 template<typename T, typename WT> class ResizeArea_Invoker :
1666 public ParallelLoopBody
1669 ResizeArea_Invoker( const Mat& _src, Mat& _dst,
1670 const DecimateAlpha* _xtab, int _xtab_size,
1671 const DecimateAlpha* _ytab, int _ytab_size,
1672 const int* _tabofs )
1677 xtab_size0 = _xtab_size;
1679 ytab_size = _ytab_size;
1683 virtual void operator() (const Range& range) const
1685 Size dsize = dst->size();
1686 int cn = dst->channels();
1688 AutoBuffer<WT> _buffer(dsize.width*2);
1689 const DecimateAlpha* xtab = xtab0;
1690 int xtab_size = xtab_size0;
1691 WT *buf = _buffer, *sum = buf + dsize.width;
1692 int j_start = tabofs[range.start], j_end = tabofs[range.end], j, k, dx, prev_dy = ytab[j_start].di;
1694 for( dx = 0; dx < dsize.width; dx++ )
1697 for( j = j_start; j < j_end; j++ )
1699 WT beta = ytab[j].alpha;
1700 int dy = ytab[j].di;
1701 int sy = ytab[j].si;
1704 const T* S = (const T*)(src->data + src->step*sy);
1705 for( dx = 0; dx < dsize.width; dx++ )
1709 for( k = 0; k < xtab_size; k++ )
1711 int dxn = xtab[k].di;
1712 WT alpha = xtab[k].alpha;
1713 buf[dxn] += S[xtab[k].si]*alpha;
1716 for( k = 0; k < xtab_size; k++ )
1718 int sxn = xtab[k].si;
1719 int dxn = xtab[k].di;
1720 WT alpha = xtab[k].alpha;
1721 WT t0 = buf[dxn] + S[sxn]*alpha;
1722 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1723 buf[dxn] = t0; buf[dxn+1] = t1;
1726 for( k = 0; k < xtab_size; k++ )
1728 int sxn = xtab[k].si;
1729 int dxn = xtab[k].di;
1730 WT alpha = xtab[k].alpha;
1731 WT t0 = buf[dxn] + S[sxn]*alpha;
1732 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1733 WT t2 = buf[dxn+2] + S[sxn+2]*alpha;
1734 buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;
1738 for( k = 0; k < xtab_size; k++ )
1740 int sxn = xtab[k].si;
1741 int dxn = xtab[k].di;
1742 WT alpha = xtab[k].alpha;
1743 WT t0 = buf[dxn] + S[sxn]*alpha;
1744 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1745 buf[dxn] = t0; buf[dxn+1] = t1;
1746 t0 = buf[dxn+2] + S[sxn+2]*alpha;
1747 t1 = buf[dxn+3] + S[sxn+3]*alpha;
1748 buf[dxn+2] = t0; buf[dxn+3] = t1;
1753 for( k = 0; k < xtab_size; k++ )
1755 int sxn = xtab[k].si;
1756 int dxn = xtab[k].di;
1757 WT alpha = xtab[k].alpha;
1758 for( int c = 0; c < cn; c++ )
1759 buf[dxn + c] += S[sxn + c]*alpha;
1766 T* D = (T*)(dst->data + dst->step*prev_dy);
1768 for( dx = 0; dx < dsize.width; dx++ )
1770 D[dx] = saturate_cast<T>(sum[dx]);
1771 sum[dx] = beta*buf[dx];
1777 for( dx = 0; dx < dsize.width; dx++ )
1778 sum[dx] += beta*buf[dx];
1783 T* D = (T*)(dst->data + dst->step*prev_dy);
1784 for( dx = 0; dx < dsize.width; dx++ )
1785 D[dx] = saturate_cast<T>(sum[dx]);
1792 const DecimateAlpha* xtab0;
1793 const DecimateAlpha* ytab;
1794 int xtab_size0, ytab_size;
1799 template <typename T, typename WT>
1800 static void resizeArea_( const Mat& src, Mat& dst,
1801 const DecimateAlpha* xtab, int xtab_size,
1802 const DecimateAlpha* ytab, int ytab_size,
1805 parallel_for_(Range(0, dst.rows),
1806 ResizeArea_Invoker<T, WT>(src, dst, xtab, xtab_size, ytab, ytab_size, tabofs),
1807 dst.total()/((double)(1 << 16)));
1811 typedef void (*ResizeFunc)( const Mat& src, Mat& dst,
1812 const int* xofs, const void* alpha,
1813 const int* yofs, const void* beta,
1814 int xmin, int xmax, int ksize );
1816 typedef void (*ResizeAreaFastFunc)( const Mat& src, Mat& dst,
1817 const int* ofs, const int *xofs,
1818 int scale_x, int scale_y );
1820 typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
1821 const DecimateAlpha* xtab, int xtab_size,
1822 const DecimateAlpha* ytab, int ytab_size,
1826 static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab )
1829 for(int dx = 0; dx < dsize; dx++ )
1831 double fsx1 = dx * scale;
1832 double fsx2 = fsx1 + scale;
1833 double cellWidth = std::min(scale, ssize - fsx1);
1835 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
1837 sx2 = std::min(sx2, ssize - 1);
1838 sx1 = std::min(sx1, sx2);
1840 if( sx1 - fsx1 > 1e-3 )
1842 assert( k < ssize*2 );
1843 tab[k].di = dx * cn;
1844 tab[k].si = (sx1 - 1) * cn;
1845 tab[k++].alpha = (float)((sx1 - fsx1) / cellWidth);
1848 for(int sx = sx1; sx < sx2; sx++ )
1850 assert( k < ssize*2 );
1851 tab[k].di = dx * cn;
1852 tab[k].si = sx * cn;
1853 tab[k++].alpha = float(1.0 / cellWidth);
1856 if( fsx2 - sx2 > 1e-3 )
1858 assert( k < ssize*2 );
1859 tab[k].di = dx * cn;
1860 tab[k].si = sx2 * cn;
1861 tab[k++].alpha = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
1867 #if defined (HAVE_IPP) && ((IPP_VERSION_MAJOR == 7 && IPP_VERSION_MINOR >= 1) || IPP_VERSION_MAJOR > 7)
1868 class IPPresizeInvoker :
1869 public ParallelLoopBody
1872 IPPresizeInvoker(Mat &_src, Mat &_dst, double _inv_scale_x, double _inv_scale_y, int _mode, bool *_ok) :
1873 ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), ok(_ok)
1875 IppStatus status = ippStsNotSupportedModeErr;
1877 IppiSize srcSize, dstSize;
1878 int type = src.type();
1879 int specSize = 0, initSize = 0;
1880 srcSize.width = src.cols;
1881 srcSize.height = src.rows;
1882 dstSize.width = dst.cols;
1883 dstSize.height = dst.rows;
1885 if (mode == (int)ippLinear)
1888 type == CV_8UC1 ? (ippiResizeFunc)ippiResizeLinear_8u_C1R :
1889 type == CV_8UC3 ? (ippiResizeFunc)ippiResizeLinear_8u_C3R :
1890 type == CV_8UC4 ? (ippiResizeFunc)ippiResizeLinear_8u_C4R :
1891 type == CV_16UC1 ? (ippiResizeFunc)ippiResizeLinear_16u_C1R :
1892 type == CV_16UC3 ? (ippiResizeFunc)ippiResizeLinear_16u_C3R :
1893 type == CV_16UC4 ? (ippiResizeFunc)ippiResizeLinear_16u_C4R :
1894 type == CV_16SC1 ? (ippiResizeFunc)ippiResizeLinear_16s_C1R :
1895 type == CV_16SC3 ? (ippiResizeFunc)ippiResizeLinear_16s_C3R :
1896 type == CV_16SC4 ? (ippiResizeFunc)ippiResizeLinear_16s_C4R :
1897 type == CV_32FC1 ? (ippiResizeFunc)ippiResizeLinear_32f_C1R :
1898 type == CV_32FC3 ? (ippiResizeFunc)ippiResizeLinear_32f_C3R :
1899 type == CV_32FC4 ? (ippiResizeFunc)ippiResizeLinear_32f_C4R :
1900 type == CV_64FC1 ? (ippiResizeFunc)ippiResizeLinear_64f_C1R :
1901 type == CV_64FC3 ? (ippiResizeFunc)ippiResizeLinear_64f_C3R :
1902 type == CV_64FC4 ? (ippiResizeFunc)ippiResizeLinear_64f_C4R :
1905 else if (mode == (int)ippCubic)
1908 type == CV_8UC1 ? (ippiResizeFunc)ippiResizeCubic_8u_C1R :
1909 type == CV_8UC3 ? (ippiResizeFunc)ippiResizeCubic_8u_C3R :
1910 type == CV_8UC4 ? (ippiResizeFunc)ippiResizeCubic_8u_C4R :
1911 type == CV_16UC1 ? (ippiResizeFunc)ippiResizeCubic_16u_C1R :
1912 type == CV_16UC3 ? (ippiResizeFunc)ippiResizeCubic_16u_C3R :
1913 type == CV_16UC4 ? (ippiResizeFunc)ippiResizeCubic_16u_C4R :
1914 type == CV_16SC1 ? (ippiResizeFunc)ippiResizeCubic_16s_C1R :
1915 type == CV_16SC3 ? (ippiResizeFunc)ippiResizeCubic_16s_C3R :
1916 type == CV_16SC4 ? (ippiResizeFunc)ippiResizeCubic_16s_C4R :
1917 type == CV_32FC1 ? (ippiResizeFunc)ippiResizeCubic_32f_C1R :
1918 type == CV_32FC3 ? (ippiResizeFunc)ippiResizeCubic_32f_C3R :
1919 type == CV_32FC4 ? (ippiResizeFunc)ippiResizeCubic_32f_C4R :
1923 switch (src.depth())
1926 status = ippiResizeGetSize_8u(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize);
1929 status = ippiResizeGetSize_16u(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize);
1932 status = ippiResizeGetSize_16s(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize);
1935 status = ippiResizeGetSize_32f(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize);
1938 status = ippiResizeGetSize_64f(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize);
1941 if (status != ippStsNoErr)
1947 specBuf.allocate(specSize);
1948 pSpec = (uchar*)specBuf;
1950 status = ippStsNotSupportedModeErr;
1951 if (mode == (int)ippLinear)
1953 switch (src.depth())
1956 status = ippiResizeLinearInit_8u(srcSize, dstSize, (IppiResizeSpec_32f*)pSpec);
1959 status = ippiResizeLinearInit_16u(srcSize, dstSize, (IppiResizeSpec_32f*)pSpec);
1962 status = ippiResizeLinearInit_16s(srcSize, dstSize, (IppiResizeSpec_32f*)pSpec);
1965 status = ippiResizeLinearInit_32f(srcSize, dstSize, (IppiResizeSpec_32f*)pSpec);
1968 status = ippiResizeLinearInit_64f(srcSize, dstSize, (IppiResizeSpec_64f*)pSpec);
1971 if (status != ippStsNoErr)
1977 else if (mode == (int)ippCubic)
1979 AutoBuffer<uchar> buf(initSize);
1980 uchar* pInit = (uchar*)buf;
1982 switch (src.depth())
1985 status = ippiResizeCubicInit_8u(srcSize, dstSize, 0.f, 0.75f, (IppiResizeSpec_32f*)pSpec, pInit);
1988 status = ippiResizeCubicInit_16u(srcSize, dstSize, 0.f, 0.75f, (IppiResizeSpec_32f*)pSpec, pInit);
1991 status = ippiResizeCubicInit_16s(srcSize, dstSize, 0.f, 0.75f, (IppiResizeSpec_32f*)pSpec, pInit);
1994 status = ippiResizeCubicInit_32f(srcSize, dstSize, 0.f, 0.75f, (IppiResizeSpec_32f*)pSpec, pInit);
1997 if (status != ippStsNoErr) *ok = false;
2005 virtual void operator() (const Range& range) const
2007 if (*ok == false) return;
2009 int cn = src.channels();
2010 int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows);
2011 int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols);
2012 int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows);
2014 IppiPoint dstOffset = { 0, dsty }, srcOffset = {0, 0};
2015 IppiSize dstSize = { dstwidth, dstheight - dsty };
2016 int bufsize = 0, itemSize = 0;
2018 IppStatus status = ippStsNotSupportedModeErr;
2020 switch (src.depth())
2024 status = ippiResizeGetBufferSize_8u((IppiResizeSpec_32f*)pSpec, dstSize, cn, &bufsize);
2025 if (status == ippStsNoErr)
2026 status = ippiResizeGetSrcOffset_8u((IppiResizeSpec_32f*)pSpec, dstOffset, &srcOffset);
2030 status = ippiResizeGetBufferSize_16u((IppiResizeSpec_32f*)pSpec, dstSize, cn, &bufsize);
2031 if (status == ippStsNoErr)
2032 status = ippiResizeGetSrcOffset_16u((IppiResizeSpec_32f*)pSpec, dstOffset, &srcOffset);
2036 status = ippiResizeGetBufferSize_16s((IppiResizeSpec_32f*)pSpec, dstSize, cn, &bufsize);
2037 if (status == ippStsNoErr)
2038 status = ippiResizeGetSrcOffset_16s((IppiResizeSpec_32f*)pSpec, dstOffset, &srcOffset);
2042 status = ippiResizeGetBufferSize_32f((IppiResizeSpec_32f*)pSpec, dstSize, cn, &bufsize);
2043 if (status == ippStsNoErr)
2044 status = ippiResizeGetSrcOffset_32f((IppiResizeSpec_32f*)pSpec, dstOffset, &srcOffset);
2048 status = ippiResizeGetBufferSize_64f((IppiResizeSpec_64f*)pSpec, dstSize, cn, &bufsize);
2049 if (status == ippStsNoErr)
2050 status = ippiResizeGetSrcOffset_64f((IppiResizeSpec_64f*)pSpec, dstOffset, &srcOffset);
2053 if (status != ippStsNoErr)
2059 Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize;
2060 Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize;
2062 AutoBuffer<uchar> buf(bufsize + 64);
2063 uchar* bufptr = alignPtr((uchar*)buf, 32);
2064 if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 )
2073 AutoBuffer<uchar> specBuf;
2075 ippiResizeFunc func;
2077 const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
2083 static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
2084 float * const alpha_tab, int * const ofs_tab)
2087 for ( ; dx < dsize; dx++)
2091 double fsx1 = dx * scale;
2092 double fsx2 = fsx1 + scale;
2093 double cellWidth = std::min(scale, ssize - fsx1);
2095 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
2097 sx2 = std::min(sx2, ssize - 1);
2098 sx1 = std::min(sx1, sx2);
2100 if (sx1 - fsx1 > 1e-3)
2102 map_tab[k] = sx1 - 1;
2103 alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth);
2106 for (int sx = sx1; sx < sx2; sx++)
2109 alpha_tab[k++] = float(1.0 / cellWidth);
2112 if (fsx2 - sx2 > 1e-3)
2115 alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
2121 static void ocl_computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
2123 for (int i = 0; i < dcols; ++i)
2124 dmap_tab[i] = scale * i;
2126 for (int i = 0, size = dcols * scale; i < size; ++i)
2127 smap_tab[i] = std::min(scol - 1, i);
2130 static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
2131 double fx, double fy, int interpolation)
2133 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2135 double inv_fx = 1. / fx, inv_fy = 1. / fy;
2136 float inv_fxf = (float)inv_fx, inv_fyf = (float)inv_fy;
2139 (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR ||
2140 (interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1) )) )
2143 UMat src = _src.getUMat();
2144 _dst.create(dsize, type);
2145 UMat dst = _dst.getUMat();
2148 size_t globalsize[] = { dst.cols, dst.rows };
2150 if (interpolation == INTER_LINEAR)
2152 int wdepth = std::max(depth, CV_32S);
2153 int wtype = CV_MAKETYPE(wdepth, cn);
2155 k.create("resizeLN", ocl::imgproc::resize_oclsrc,
2156 format("-D INTER_LINEAR -D depth=%d -D PIXTYPE=%s -D PIXTYPE1=%s "
2157 "-D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d",
2158 depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
2159 ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
2160 ocl::convertTypeStr(wdepth, depth, cn, buf[1]),
2163 else if (interpolation == INTER_NEAREST)
2165 k.create("resizeNN", ocl::imgproc::resize_oclsrc,
2166 format("-D INTER_NEAREST -D PIXTYPE=%s -D PIXTYPE1=%s -D cn=%d",
2167 ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), cn));
2169 else if (interpolation == INTER_AREA)
2171 int iscale_x = saturate_cast<int>(inv_fx);
2172 int iscale_y = saturate_cast<int>(inv_fy);
2173 bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON &&
2174 std::abs(inv_fy - iscale_y) < DBL_EPSILON;
2175 int wdepth = std::max(depth, is_area_fast ? CV_32S : CV_32F);
2176 int wtype = CV_MAKE_TYPE(wdepth, cn);
2179 String buildOption = format("-D INTER_AREA -D PIXTYPE=%s -D PIXTYPE1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d",
2180 ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
2181 ocl::convertTypeStr(depth, wdepth, cn, cvt[0]), cn);
2183 UMat alphaOcl, tabofsOcl, mapOcl;
2188 int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn);
2189 buildOption = buildOption + format(" -D convertToPIXTYPE=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
2190 " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff",
2191 ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]),
2192 ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]),
2193 iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
2195 k.create("resizeAREA_FAST", ocl::imgproc::resize_oclsrc, buildOption);
2199 int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
2200 AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
2201 int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
2202 int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
2204 ocl_computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
2205 ocl_computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
2207 Mat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab).copyTo(dmap);
2208 Mat(1, smap_tab_size, CV_32SC1, (void *)smap_tab).copyTo(smap);
2212 buildOption = buildOption + format(" -D convertToPIXTYPE=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0]));
2213 k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption);
2217 Size ssize = src.size();
2218 int xytab_size = (ssize.width + ssize.height) << 1;
2219 int tabofs_size = dsize.height + dsize.width + 2;
2221 AutoBuffer<int> _xymap_tab(xytab_size), _xyofs_tab(tabofs_size);
2222 AutoBuffer<float> _xyalpha_tab(xytab_size);
2223 int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1);
2224 float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1);
2225 int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1;
2227 ocl_computeResizeAreaTabs(ssize.width, dsize.width, inv_fx, xmap_tab, xalpha_tab, xofs_tab);
2228 ocl_computeResizeAreaTabs(ssize.height, dsize.height, inv_fy, ymap_tab, yalpha_tab, yofs_tab);
2230 // loading precomputed arrays to GPU
2231 Mat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab).copyTo(alphaOcl);
2232 Mat(1, xytab_size, CV_32SC1, (void *)_xymap_tab).copyTo(mapOcl);
2233 Mat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab).copyTo(tabofsOcl);
2236 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst);
2239 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(dmap), ocl::KernelArg::PtrReadOnly(smap));
2241 k.args(srcarg, dstarg, inv_fxf, inv_fyf, ocl::KernelArg::PtrReadOnly(tabofsOcl),
2242 ocl::KernelArg::PtrReadOnly(mapOcl), ocl::KernelArg::PtrReadOnly(alphaOcl));
2244 return k.run(2, globalsize, NULL, false);
2249 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
2250 (float)inv_fx, (float)inv_fy);
2252 return k.run(2, globalsize, 0, false);
2259 //////////////////////////////////////////////////////////////////////////////////////////
2261 void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
2262 double inv_scale_x, double inv_scale_y, int interpolation )
2264 static ResizeFunc linear_tab[] =
2267 HResizeLinear<uchar, int, short,
2268 INTER_RESIZE_COEF_SCALE,
2269 HResizeLinearVec_8u32s>,
2270 VResizeLinear<uchar, int, short,
2271 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
2272 VResizeLinearVec_32s8u> >,
2275 HResizeLinear<ushort, float, float, 1,
2276 HResizeLinearVec_16u32f>,
2277 VResizeLinear<ushort, float, float, Cast<float, ushort>,
2278 VResizeLinearVec_32f16u> >,
2280 HResizeLinear<short, float, float, 1,
2281 HResizeLinearVec_16s32f>,
2282 VResizeLinear<short, float, float, Cast<float, short>,
2283 VResizeLinearVec_32f16s> >,
2286 HResizeLinear<float, float, float, 1,
2287 HResizeLinearVec_32f>,
2288 VResizeLinear<float, float, float, Cast<float, float>,
2289 VResizeLinearVec_32f> >,
2291 HResizeLinear<double, double, float, 1,
2293 VResizeLinear<double, double, float, Cast<double, double>,
2298 static ResizeFunc cubic_tab[] =
2301 HResizeCubic<uchar, int, short>,
2302 VResizeCubic<uchar, int, short,
2303 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
2304 VResizeCubicVec_32s8u> >,
2307 HResizeCubic<ushort, float, float>,
2308 VResizeCubic<ushort, float, float, Cast<float, ushort>,
2309 VResizeCubicVec_32f16u> >,
2311 HResizeCubic<short, float, float>,
2312 VResizeCubic<short, float, float, Cast<float, short>,
2313 VResizeCubicVec_32f16s> >,
2316 HResizeCubic<float, float, float>,
2317 VResizeCubic<float, float, float, Cast<float, float>,
2318 VResizeCubicVec_32f> >,
2320 HResizeCubic<double, double, float>,
2321 VResizeCubic<double, double, float, Cast<double, double>,
2326 static ResizeFunc lanczos4_tab[] =
2328 resizeGeneric_<HResizeLanczos4<uchar, int, short>,
2329 VResizeLanczos4<uchar, int, short,
2330 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
2333 resizeGeneric_<HResizeLanczos4<ushort, float, float>,
2334 VResizeLanczos4<ushort, float, float, Cast<float, ushort>,
2336 resizeGeneric_<HResizeLanczos4<short, float, float>,
2337 VResizeLanczos4<short, float, float, Cast<float, short>,
2340 resizeGeneric_<HResizeLanczos4<float, float, float>,
2341 VResizeLanczos4<float, float, float, Cast<float, float>,
2343 resizeGeneric_<HResizeLanczos4<double, double, float>,
2344 VResizeLanczos4<double, double, float, Cast<double, double>,
2349 static ResizeAreaFastFunc areafast_tab[] =
2351 resizeAreaFast_<uchar, int, ResizeAreaFastVec<uchar, ResizeAreaFastVec_SIMD_8u> >,
2353 resizeAreaFast_<ushort, float, ResizeAreaFastVec<ushort, ResizeAreaFastVec_SIMD_16u> >,
2354 resizeAreaFast_<short, float, ResizeAreaFastVec<short, ResizeAreaFastNoVec<short, float> > >,
2356 resizeAreaFast_<float, float, ResizeAreaFastNoVec<float, float> >,
2357 resizeAreaFast_<double, double, ResizeAreaFastNoVec<double, double> >,
2361 static ResizeAreaFunc area_tab[] =
2363 resizeArea_<uchar, float>, 0, resizeArea_<ushort, float>,
2364 resizeArea_<short, float>, 0, resizeArea_<float, float>,
2365 resizeArea_<double, double>, 0
2368 Size ssize = _src.size();
2370 CV_Assert( ssize.area() > 0 );
2371 CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) );
2372 if( dsize.area() == 0 )
2374 dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
2375 saturate_cast<int>(ssize.height*inv_scale_y));
2376 CV_Assert( dsize.area() > 0 );
2380 inv_scale_x = (double)dsize.width/ssize.width;
2381 inv_scale_y = (double)dsize.height/ssize.height;
2384 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
2385 ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation))
2387 Mat src = _src.getMat();
2388 _dst.create(dsize, src.type());
2389 Mat dst = _dst.getMat();
2391 #ifdef HAVE_TEGRA_OPTIMIZATION
2392 if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation))
2396 int depth = src.depth(), cn = src.channels();
2397 double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
2398 int k, sx, sy, dx, dy;
2400 #if defined (HAVE_IPP) && ((IPP_VERSION_MAJOR == 7 && IPP_VERSION_MINOR >= 1) || IPP_VERSION_MAJOR > 7)
2401 #define IPP_RESIZE_EPS 1.e-10
2403 double ex = fabs((double)dsize.width/src.cols - inv_scale_x)/inv_scale_x;
2404 double ey = fabs((double)dsize.height/src.rows - inv_scale_y)/inv_scale_y;
2406 if ((ex < IPP_RESIZE_EPS && ey < IPP_RESIZE_EPS && depth != CV_64F) ||
2407 (ex == 0 && ey == 0 && depth == CV_64F))
2410 if (interpolation == INTER_LINEAR && src.rows >= 2 && src.cols >= 2)
2414 else if (interpolation == INTER_CUBIC && src.rows >= 4 && src.cols >= 4)
2418 if( mode != 0 && (cn == 1 || cn ==3 || cn == 4) &&
2419 (depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F ||
2420 (depth == CV_64F && mode == ippLinear)))
2423 Range range(0, src.rows);
2424 IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, &ok);
2425 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2430 #undef IPP_RESIZE_EPS
2433 if( interpolation == INTER_NEAREST )
2435 resizeNN( src, dst, inv_scale_x, inv_scale_y );
2440 int iscale_x = saturate_cast<int>(scale_x);
2441 int iscale_y = saturate_cast<int>(scale_y);
2443 bool is_area_fast = std::abs(scale_x - iscale_x) < DBL_EPSILON &&
2444 std::abs(scale_y - iscale_y) < DBL_EPSILON;
2446 // in case of scale_x && scale_y is equal to 2
2447 // INTER_AREA (fast) also is equal to INTER_LINEAR
2448 if( interpolation == INTER_LINEAR && is_area_fast && iscale_x == 2 && iscale_y == 2 )
2449 interpolation = INTER_AREA;
2451 // true "area" interpolation is only implemented for the case (scale_x <= 1 && scale_y <= 1).
2452 // In other cases it is emulated using some variant of bilinear interpolation
2453 if( interpolation == INTER_AREA && scale_x >= 1 && scale_y >= 1 )
2457 int area = iscale_x*iscale_y;
2458 size_t srcstep = src.step / src.elemSize1();
2459 AutoBuffer<int> _ofs(area + dsize.width*cn);
2461 int* xofs = ofs + area;
2462 ResizeAreaFastFunc func = areafast_tab[depth];
2463 CV_Assert( func != 0 );
2465 for( sy = 0, k = 0; sy < iscale_y; sy++ )
2466 for( sx = 0; sx < iscale_x; sx++ )
2467 ofs[k++] = (int)(sy*srcstep + sx*cn);
2469 for( dx = 0; dx < dsize.width; dx++ )
2473 for( k = 0; k < cn; k++ )
2474 xofs[j + k] = sx + k;
2477 func( src, dst, ofs, xofs, iscale_x, iscale_y );
2481 ResizeAreaFunc func = area_tab[depth];
2482 CV_Assert( func != 0 && cn <= 4 );
2484 AutoBuffer<DecimateAlpha> _xytab((ssize.width + ssize.height)*2);
2485 DecimateAlpha* xtab = _xytab, *ytab = xtab + ssize.width*2;
2487 int xtab_size = computeResizeAreaTab(ssize.width, dsize.width, cn, scale_x, xtab);
2488 int ytab_size = computeResizeAreaTab(ssize.height, dsize.height, 1, scale_y, ytab);
2490 AutoBuffer<int> _tabofs(dsize.height + 1);
2491 int* tabofs = _tabofs;
2492 for( k = 0, dy = 0; k < ytab_size; k++ )
2494 if( k == 0 || ytab[k].di != ytab[k-1].di )
2496 assert( ytab[k].di == dy );
2500 tabofs[dy] = ytab_size;
2502 func( src, dst, xtab, xtab_size, ytab, ytab_size, tabofs );
2507 int xmin = 0, xmax = dsize.width, width = dsize.width*cn;
2508 bool area_mode = interpolation == INTER_AREA;
2509 bool fixpt = depth == CV_8U;
2512 int ksize=0, ksize2;
2513 if( interpolation == INTER_CUBIC )
2514 ksize = 4, func = cubic_tab[depth];
2515 else if( interpolation == INTER_LANCZOS4 )
2516 ksize = 8, func = lanczos4_tab[depth];
2517 else if( interpolation == INTER_LINEAR || interpolation == INTER_AREA )
2518 ksize = 2, func = linear_tab[depth];
2520 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
2523 CV_Assert( func != 0 );
2525 AutoBuffer<uchar> _buffer((width + dsize.height)*(sizeof(int) + sizeof(float)*ksize));
2526 int* xofs = (int*)(uchar*)_buffer;
2527 int* yofs = xofs + width;
2528 float* alpha = (float*)(yofs + dsize.height);
2529 short* ialpha = (short*)alpha;
2530 float* beta = alpha + width*ksize;
2531 short* ibeta = ialpha + width*ksize;
2532 float cbuf[MAX_ESIZE];
2534 for( dx = 0; dx < dsize.width; dx++ )
2538 fx = (float)((dx+0.5)*scale_x - 0.5);
2544 sx = cvFloor(dx*scale_x);
2545 fx = (float)((dx+1) - (sx+1)*inv_scale_x);
2546 fx = fx <= 0 ? 0.f : fx - cvFloor(fx);
2552 if( sx < 0 && (interpolation != INTER_CUBIC && interpolation != INTER_LANCZOS4))
2556 if( sx + ksize2 >= ssize.width )
2558 xmax = std::min( xmax, dx );
2559 if( sx >= ssize.width-1 && (interpolation != INTER_CUBIC && interpolation != INTER_LANCZOS4))
2560 fx = 0, sx = ssize.width-1;
2563 for( k = 0, sx *= cn; k < cn; k++ )
2564 xofs[dx*cn + k] = sx + k;
2566 if( interpolation == INTER_CUBIC )
2567 interpolateCubic( fx, cbuf );
2568 else if( interpolation == INTER_LANCZOS4 )
2569 interpolateLanczos4( fx, cbuf );
2577 for( k = 0; k < ksize; k++ )
2578 ialpha[dx*cn*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
2579 for( ; k < cn*ksize; k++ )
2580 ialpha[dx*cn*ksize + k] = ialpha[dx*cn*ksize + k - ksize];
2584 for( k = 0; k < ksize; k++ )
2585 alpha[dx*cn*ksize + k] = cbuf[k];
2586 for( ; k < cn*ksize; k++ )
2587 alpha[dx*cn*ksize + k] = alpha[dx*cn*ksize + k - ksize];
2591 for( dy = 0; dy < dsize.height; dy++ )
2595 fy = (float)((dy+0.5)*scale_y - 0.5);
2601 sy = cvFloor(dy*scale_y);
2602 fy = (float)((dy+1) - (sy+1)*inv_scale_y);
2603 fy = fy <= 0 ? 0.f : fy - cvFloor(fy);
2607 if( interpolation == INTER_CUBIC )
2608 interpolateCubic( fy, cbuf );
2609 else if( interpolation == INTER_LANCZOS4 )
2610 interpolateLanczos4( fy, cbuf );
2619 for( k = 0; k < ksize; k++ )
2620 ibeta[dy*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
2624 for( k = 0; k < ksize; k++ )
2625 beta[dy*ksize + k] = cbuf[k];
2629 func( src, dst, xofs, fixpt ? (void*)ialpha : (void*)alpha, yofs,
2630 fixpt ? (void*)ibeta : (void*)beta, xmin, xmax, ksize );
2634 /****************************************************************************************\
2635 * General warping (affine, perspective, remap) *
2636 \****************************************************************************************/
2641 template<typename T>
2642 static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
2643 int borderType, const Scalar& _borderValue )
2645 Size ssize = _src.size(), dsize = _dst.size();
2646 int cn = _src.channels();
2647 const T* S0 = (const T*)_src.data;
2648 size_t sstep = _src.step/sizeof(S0[0]);
2649 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2650 saturate_cast<T>(_borderValue[1]),
2651 saturate_cast<T>(_borderValue[2]),
2652 saturate_cast<T>(_borderValue[3]));
2655 unsigned width1 = ssize.width, height1 = ssize.height;
2657 if( _dst.isContinuous() && _xy.isContinuous() )
2659 dsize.width *= dsize.height;
2663 for( dy = 0; dy < dsize.height; dy++ )
2665 T* D = (T*)(_dst.data + _dst.step*dy);
2666 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2670 for( dx = 0; dx < dsize.width; dx++ )
2672 int sx = XY[dx*2], sy = XY[dx*2+1];
2673 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2674 D[dx] = S0[sy*sstep + sx];
2677 if( borderType == BORDER_REPLICATE )
2679 sx = clip(sx, 0, ssize.width);
2680 sy = clip(sy, 0, ssize.height);
2681 D[dx] = S0[sy*sstep + sx];
2683 else if( borderType == BORDER_CONSTANT )
2685 else if( borderType != BORDER_TRANSPARENT )
2687 sx = borderInterpolate(sx, ssize.width, borderType);
2688 sy = borderInterpolate(sy, ssize.height, borderType);
2689 D[dx] = S0[sy*sstep + sx];
2696 for( dx = 0; dx < dsize.width; dx++, D += cn )
2698 int sx = XY[dx*2], sy = XY[dx*2+1], k;
2700 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2704 S = S0 + sy*sstep + sx*3;
2705 D[0] = S[0], D[1] = S[1], D[2] = S[2];
2709 S = S0 + sy*sstep + sx*4;
2710 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
2714 S = S0 + sy*sstep + sx*cn;
2715 for( k = 0; k < cn; k++ )
2719 else if( borderType != BORDER_TRANSPARENT )
2721 if( borderType == BORDER_REPLICATE )
2723 sx = clip(sx, 0, ssize.width);
2724 sy = clip(sy, 0, ssize.height);
2725 S = S0 + sy*sstep + sx*cn;
2727 else if( borderType == BORDER_CONSTANT )
2731 sx = borderInterpolate(sx, ssize.width, borderType);
2732 sy = borderInterpolate(sy, ssize.height, borderType);
2733 S = S0 + sy*sstep + sx*cn;
2735 for( k = 0; k < cn; k++ )
2746 int operator()( const Mat&, void*, const short*, const ushort*,
2747 const void*, int ) const { return 0; }
2754 int operator()( const Mat& _src, void* _dst, const short* XY,
2755 const ushort* FXY, const void* _wtab, int width ) const
2757 int cn = _src.channels();
2759 if( (cn != 1 && cn != 3 && cn != 4) || !checkHardwareSupport(CV_CPU_SSE2))
2762 const uchar *S0 = _src.data, *S1 = _src.data + _src.step;
2763 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
2764 uchar* D = (uchar*)_dst;
2766 int x = 0, sstep = (int)_src.step;
2768 __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE/2);
2769 __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << 16));
2770 __m128i z = _mm_setzero_si128();
2771 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
2775 for( ; x <= width - 8; x += 8 )
2777 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2778 __m128i xy1 = _mm_loadu_si128( (const __m128i*)(XY + x*2 + 8));
2779 __m128i v0, v1, v2, v3, a0, a1, b0, b1;
2782 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2783 xy1 = _mm_madd_epi16( xy1, xy2ofs );
2784 _mm_store_si128( (__m128i*)iofs0, xy0 );
2785 _mm_store_si128( (__m128i*)iofs1, xy1 );
2787 i0 = *(ushort*)(S0 + iofs0[0]) + (*(ushort*)(S0 + iofs0[1]) << 16);
2788 i1 = *(ushort*)(S0 + iofs0[2]) + (*(ushort*)(S0 + iofs0[3]) << 16);
2789 v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2790 i0 = *(ushort*)(S1 + iofs0[0]) + (*(ushort*)(S1 + iofs0[1]) << 16);
2791 i1 = *(ushort*)(S1 + iofs0[2]) + (*(ushort*)(S1 + iofs0[3]) << 16);
2792 v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2793 v0 = _mm_unpacklo_epi8(v0, z);
2794 v1 = _mm_unpacklo_epi8(v1, z);
2796 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x]*4)),
2797 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+1]*4)));
2798 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+2]*4)),
2799 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+3]*4)));
2800 b0 = _mm_unpacklo_epi64(a0, a1);
2801 b1 = _mm_unpackhi_epi64(a0, a1);
2802 v0 = _mm_madd_epi16(v0, b0);
2803 v1 = _mm_madd_epi16(v1, b1);
2804 v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta);
2806 i0 = *(ushort*)(S0 + iofs1[0]) + (*(ushort*)(S0 + iofs1[1]) << 16);
2807 i1 = *(ushort*)(S0 + iofs1[2]) + (*(ushort*)(S0 + iofs1[3]) << 16);
2808 v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2809 i0 = *(ushort*)(S1 + iofs1[0]) + (*(ushort*)(S1 + iofs1[1]) << 16);
2810 i1 = *(ushort*)(S1 + iofs1[2]) + (*(ushort*)(S1 + iofs1[3]) << 16);
2811 v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2812 v2 = _mm_unpacklo_epi8(v2, z);
2813 v3 = _mm_unpacklo_epi8(v3, z);
2815 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+4]*4)),
2816 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+5]*4)));
2817 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+6]*4)),
2818 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+7]*4)));
2819 b0 = _mm_unpacklo_epi64(a0, a1);
2820 b1 = _mm_unpackhi_epi64(a0, a1);
2821 v2 = _mm_madd_epi16(v2, b0);
2822 v3 = _mm_madd_epi16(v3, b1);
2823 v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta);
2825 v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS);
2826 v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS);
2827 v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
2828 _mm_storel_epi64( (__m128i*)(D + x), v0 );
2833 for( ; x <= width - 5; x += 4, D += 12 )
2835 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2836 __m128i u0, v0, u1, v1;
2838 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2839 _mm_store_si128( (__m128i*)iofs0, xy0 );
2840 const __m128i *w0, *w1;
2841 w0 = (const __m128i*)(wtab + FXY[x]*16);
2842 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
2844 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
2845 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 3)));
2846 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
2847 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 3)));
2848 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
2849 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 3)));
2850 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
2851 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 3)));
2852 u0 = _mm_unpacklo_epi8(u0, z);
2853 v0 = _mm_unpacklo_epi8(v0, z);
2854 u1 = _mm_unpacklo_epi8(u1, z);
2855 v1 = _mm_unpacklo_epi8(v1, z);
2856 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2857 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2858 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2859 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2860 u0 = _mm_slli_si128(u0, 4);
2861 u0 = _mm_packs_epi32(u0, u1);
2862 u0 = _mm_packus_epi16(u0, u0);
2863 _mm_storel_epi64((__m128i*)D, _mm_srli_si128(u0,1));
2865 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
2866 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
2868 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
2869 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 3)));
2870 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
2871 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 3)));
2872 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
2873 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 3)));
2874 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
2875 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 3)));
2876 u0 = _mm_unpacklo_epi8(u0, z);
2877 v0 = _mm_unpacklo_epi8(v0, z);
2878 u1 = _mm_unpacklo_epi8(u1, z);
2879 v1 = _mm_unpacklo_epi8(v1, z);
2880 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2881 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2882 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2883 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2884 u0 = _mm_slli_si128(u0, 4);
2885 u0 = _mm_packs_epi32(u0, u1);
2886 u0 = _mm_packus_epi16(u0, u0);
2887 _mm_storel_epi64((__m128i*)(D + 6), _mm_srli_si128(u0,1));
2892 for( ; x <= width - 4; x += 4, D += 16 )
2894 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2895 __m128i u0, v0, u1, v1;
2897 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2898 _mm_store_si128( (__m128i*)iofs0, xy0 );
2899 const __m128i *w0, *w1;
2900 w0 = (const __m128i*)(wtab + FXY[x]*16);
2901 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
2903 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
2904 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 4)));
2905 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
2906 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 4)));
2907 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
2908 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 4)));
2909 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
2910 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 4)));
2911 u0 = _mm_unpacklo_epi8(u0, z);
2912 v0 = _mm_unpacklo_epi8(v0, z);
2913 u1 = _mm_unpacklo_epi8(u1, z);
2914 v1 = _mm_unpacklo_epi8(v1, z);
2915 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2916 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2917 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2918 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2919 u0 = _mm_packs_epi32(u0, u1);
2920 u0 = _mm_packus_epi16(u0, u0);
2921 _mm_storel_epi64((__m128i*)D, u0);
2923 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
2924 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
2926 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
2927 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 4)));
2928 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
2929 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 4)));
2930 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
2931 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 4)));
2932 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
2933 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 4)));
2934 u0 = _mm_unpacklo_epi8(u0, z);
2935 v0 = _mm_unpacklo_epi8(v0, z);
2936 u1 = _mm_unpacklo_epi8(u1, z);
2937 v1 = _mm_unpacklo_epi8(v1, z);
2938 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2939 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2940 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2941 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2942 u0 = _mm_packs_epi32(u0, u1);
2943 u0 = _mm_packus_epi16(u0, u0);
2944 _mm_storel_epi64((__m128i*)(D + 8), u0);
2954 typedef RemapNoVec RemapVec_8u;
2959 template<class CastOp, class VecOp, typename AT>
2960 static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
2961 const Mat& _fxy, const void* _wtab,
2962 int borderType, const Scalar& _borderValue )
2964 typedef typename CastOp::rtype T;
2965 typedef typename CastOp::type1 WT;
2966 Size ssize = _src.size(), dsize = _dst.size();
2967 int cn = _src.channels();
2968 const AT* wtab = (const AT*)_wtab;
2969 const T* S0 = (const T*)_src.data;
2970 size_t sstep = _src.step/sizeof(S0[0]);
2971 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2972 saturate_cast<T>(_borderValue[1]),
2973 saturate_cast<T>(_borderValue[2]),
2974 saturate_cast<T>(_borderValue[3]));
2979 unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
2980 CV_Assert( cn <= 4 && ssize.area() > 0 );
2982 if( _src.type() == CV_8UC3 )
2983 width1 = std::max(ssize.width-2, 0);
2986 for( dy = 0; dy < dsize.height; dy++ )
2988 T* D = (T*)(_dst.data + _dst.step*dy);
2989 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2990 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2992 bool prevInlier = false;
2994 for( dx = 0; dx <= dsize.width; dx++ )
2996 bool curInlier = dx < dsize.width ?
2997 (unsigned)XY[dx*2] < width1 &&
2998 (unsigned)XY[dx*2+1] < height1 : !prevInlier;
2999 if( curInlier == prevInlier )
3005 prevInlier = curInlier;
3009 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
3015 for( ; dx < X1; dx++, D++ )
3017 int sx = XY[dx*2], sy = XY[dx*2+1];
3018 const AT* w = wtab + FXY[dx]*4;
3019 const T* S = S0 + sy*sstep + sx;
3020 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
3024 for( ; dx < X1; dx++, D += 2 )
3026 int sx = XY[dx*2], sy = XY[dx*2+1];
3027 const AT* w = wtab + FXY[dx]*4;
3028 const T* S = S0 + sy*sstep + sx*2;
3029 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
3030 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
3031 D[0] = castOp(t0); D[1] = castOp(t1);
3034 for( ; dx < X1; dx++, D += 3 )
3036 int sx = XY[dx*2], sy = XY[dx*2+1];
3037 const AT* w = wtab + FXY[dx]*4;
3038 const T* S = S0 + sy*sstep + sx*3;
3039 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
3040 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
3041 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
3042 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
3045 for( ; dx < X1; dx++, D += 4 )
3047 int sx = XY[dx*2], sy = XY[dx*2+1];
3048 const AT* w = wtab + FXY[dx]*4;
3049 const T* S = S0 + sy*sstep + sx*4;
3050 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
3051 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
3052 D[0] = castOp(t0); D[1] = castOp(t1);
3053 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
3054 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
3055 D[2] = castOp(t0); D[3] = castOp(t1);
3060 if( borderType == BORDER_TRANSPARENT && cn != 3 )
3068 for( ; dx < X1; dx++, D++ )
3070 int sx = XY[dx*2], sy = XY[dx*2+1];
3071 if( borderType == BORDER_CONSTANT &&
3072 (sx >= ssize.width || sx+1 < 0 ||
3073 sy >= ssize.height || sy+1 < 0) )
3079 int sx0, sx1, sy0, sy1;
3081 const AT* w = wtab + FXY[dx]*4;
3082 if( borderType == BORDER_REPLICATE )
3084 sx0 = clip(sx, 0, ssize.width);
3085 sx1 = clip(sx+1, 0, ssize.width);
3086 sy0 = clip(sy, 0, ssize.height);
3087 sy1 = clip(sy+1, 0, ssize.height);
3088 v0 = S0[sy0*sstep + sx0];
3089 v1 = S0[sy0*sstep + sx1];
3090 v2 = S0[sy1*sstep + sx0];
3091 v3 = S0[sy1*sstep + sx1];
3095 sx0 = borderInterpolate(sx, ssize.width, borderType);
3096 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
3097 sy0 = borderInterpolate(sy, ssize.height, borderType);
3098 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
3099 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
3100 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
3101 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
3102 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
3104 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
3108 for( ; dx < X1; dx++, D += cn )
3110 int sx = XY[dx*2], sy = XY[dx*2+1], k;
3111 if( borderType == BORDER_CONSTANT &&
3112 (sx >= ssize.width || sx+1 < 0 ||
3113 sy >= ssize.height || sy+1 < 0) )
3115 for( k = 0; k < cn; k++ )
3120 int sx0, sx1, sy0, sy1;
3121 const T *v0, *v1, *v2, *v3;
3122 const AT* w = wtab + FXY[dx]*4;
3123 if( borderType == BORDER_REPLICATE )
3125 sx0 = clip(sx, 0, ssize.width);
3126 sx1 = clip(sx+1, 0, ssize.width);
3127 sy0 = clip(sy, 0, ssize.height);
3128 sy1 = clip(sy+1, 0, ssize.height);
3129 v0 = S0 + sy0*sstep + sx0*cn;
3130 v1 = S0 + sy0*sstep + sx1*cn;
3131 v2 = S0 + sy1*sstep + sx0*cn;
3132 v3 = S0 + sy1*sstep + sx1*cn;
3134 else if( borderType == BORDER_TRANSPARENT &&
3135 ((unsigned)sx >= (unsigned)(ssize.width-1) ||
3136 (unsigned)sy >= (unsigned)(ssize.height-1)))
3140 sx0 = borderInterpolate(sx, ssize.width, borderType);
3141 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
3142 sy0 = borderInterpolate(sy, ssize.height, borderType);
3143 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
3144 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
3145 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
3146 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
3147 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
3149 for( k = 0; k < cn; k++ )
3150 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
3159 template<class CastOp, typename AT, int ONE>
3160 static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
3161 const Mat& _fxy, const void* _wtab,
3162 int borderType, const Scalar& _borderValue )
3164 typedef typename CastOp::rtype T;
3165 typedef typename CastOp::type1 WT;
3166 Size ssize = _src.size(), dsize = _dst.size();
3167 int cn = _src.channels();
3168 const AT* wtab = (const AT*)_wtab;
3169 const T* S0 = (const T*)_src.data;
3170 size_t sstep = _src.step/sizeof(S0[0]);
3171 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
3172 saturate_cast<T>(_borderValue[1]),
3173 saturate_cast<T>(_borderValue[2]),
3174 saturate_cast<T>(_borderValue[3]));
3177 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
3179 unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
3181 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
3183 dsize.width *= dsize.height;
3187 for( dy = 0; dy < dsize.height; dy++ )
3189 T* D = (T*)(_dst.data + _dst.step*dy);
3190 const short* XY = (const short*)(_xy.data + _xy.step*dy);
3191 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
3193 for( dx = 0; dx < dsize.width; dx++, D += cn )
3195 int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
3196 const AT* w = wtab + FXY[dx]*16;
3198 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
3200 const T* S = S0 + sy*sstep + sx*cn;
3201 for( k = 0; k < cn; k++ )
3203 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
3205 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
3207 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
3209 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
3217 if( borderType == BORDER_TRANSPARENT &&
3218 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
3219 (unsigned)(sy+1) >= (unsigned)ssize.height) )
3222 if( borderType1 == BORDER_CONSTANT &&
3223 (sx >= ssize.width || sx+4 <= 0 ||
3224 sy >= ssize.height || sy+4 <= 0))
3226 for( k = 0; k < cn; k++ )
3231 for( i = 0; i < 4; i++ )
3233 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
3234 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
3237 for( k = 0; k < cn; k++, S0++, w -= 16 )
3239 WT cv = cval[k], sum = cv*ONE;
3240 for( i = 0; i < 4; i++, w += 4 )
3243 const T* S = S0 + yi*sstep;
3247 sum += (S[x[0]] - cv)*w[0];
3249 sum += (S[x[1]] - cv)*w[1];
3251 sum += (S[x[2]] - cv)*w[2];
3253 sum += (S[x[3]] - cv)*w[3];
3264 template<class CastOp, typename AT, int ONE>
3265 static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
3266 const Mat& _fxy, const void* _wtab,
3267 int borderType, const Scalar& _borderValue )
3269 typedef typename CastOp::rtype T;
3270 typedef typename CastOp::type1 WT;
3271 Size ssize = _src.size(), dsize = _dst.size();
3272 int cn = _src.channels();
3273 const AT* wtab = (const AT*)_wtab;
3274 const T* S0 = (const T*)_src.data;
3275 size_t sstep = _src.step/sizeof(S0[0]);
3276 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
3277 saturate_cast<T>(_borderValue[1]),
3278 saturate_cast<T>(_borderValue[2]),
3279 saturate_cast<T>(_borderValue[3]));
3282 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
3284 unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
3286 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
3288 dsize.width *= dsize.height;
3292 for( dy = 0; dy < dsize.height; dy++ )
3294 T* D = (T*)(_dst.data + _dst.step*dy);
3295 const short* XY = (const short*)(_xy.data + _xy.step*dy);
3296 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
3298 for( dx = 0; dx < dsize.width; dx++, D += cn )
3300 int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
3301 const AT* w = wtab + FXY[dx]*64;
3302 const T* S = S0 + sy*sstep + sx*cn;
3304 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
3306 for( k = 0; k < cn; k++ )
3309 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
3310 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
3311 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
3320 if( borderType == BORDER_TRANSPARENT &&
3321 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
3322 (unsigned)(sy+3) >= (unsigned)ssize.height) )
3325 if( borderType1 == BORDER_CONSTANT &&
3326 (sx >= ssize.width || sx+8 <= 0 ||
3327 sy >= ssize.height || sy+8 <= 0))
3329 for( k = 0; k < cn; k++ )
3334 for( i = 0; i < 8; i++ )
3336 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
3337 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
3340 for( k = 0; k < cn; k++, S0++, w -= 64 )
3342 WT cv = cval[k], sum = cv*ONE;
3343 for( i = 0; i < 8; i++, w += 8 )
3346 const T* S1 = S0 + yi*sstep;
3350 sum += (S1[x[0]] - cv)*w[0];
3352 sum += (S1[x[1]] - cv)*w[1];
3354 sum += (S1[x[2]] - cv)*w[2];
3356 sum += (S1[x[3]] - cv)*w[3];
3358 sum += (S1[x[4]] - cv)*w[4];
3360 sum += (S1[x[5]] - cv)*w[5];
3362 sum += (S1[x[6]] - cv)*w[6];
3364 sum += (S1[x[7]] - cv)*w[7];
3375 typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
3376 int borderType, const Scalar& _borderValue );
3378 typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
3379 const Mat& _fxy, const void* _wtab,
3380 int borderType, const Scalar& _borderValue);
3382 class RemapInvoker :
3383 public ParallelLoopBody
3386 RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
3387 const Mat *_m2, int _borderType, const Scalar &_borderValue,
3388 int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
3389 ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
3390 borderType(_borderType), borderValue(_borderValue),
3391 planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
3395 virtual void operator() (const Range& range) const
3398 const int buf_size = 1 << 14;
3399 int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
3400 int bcols0 = std::min(buf_size/brows0, dst->cols);
3401 brows0 = std::min(buf_size/bcols0, dst->rows);
3403 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
3406 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
3408 _bufa.create(brows0, bcols0, CV_16UC1);
3410 for( y = range.start; y < range.end; y += brows0 )
3412 for( x = 0; x < dst->cols; x += bcols0 )
3414 int brows = std::min(brows0, range.end - y);
3415 int bcols = std::min(bcols0, dst->cols - x);
3416 Mat dpart(*dst, Rect(x, y, bcols, brows));
3417 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
3421 if( m1->type() == CV_16SC2 && !m2->data ) // the data is already in the right format
3422 bufxy = (*m1)(Rect(x, y, bcols, brows));
3423 else if( map_depth != CV_32F )
3425 for( y1 = 0; y1 < brows; y1++ )
3427 short* XY = (short*)(bufxy.data + bufxy.step*y1);
3428 const short* sXY = (const short*)(m1->data + m1->step*(y+y1)) + x*2;
3429 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
3431 for( x1 = 0; x1 < bcols; x1++ )
3433 int a = sA[x1] & (INTER_TAB_SIZE2-1);
3434 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
3435 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
3439 else if( !planar_input )
3440 (*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
3443 for( y1 = 0; y1 < brows; y1++ )
3445 short* XY = (short*)(bufxy.data + bufxy.step*y1);
3446 const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
3447 const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
3453 for( ; x1 <= bcols - 8; x1 += 8 )
3455 __m128 fx0 = _mm_loadu_ps(sX + x1);
3456 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
3457 __m128 fy0 = _mm_loadu_ps(sY + x1);
3458 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
3459 __m128i ix0 = _mm_cvtps_epi32(fx0);
3460 __m128i ix1 = _mm_cvtps_epi32(fx1);
3461 __m128i iy0 = _mm_cvtps_epi32(fy0);
3462 __m128i iy1 = _mm_cvtps_epi32(fy1);
3463 ix0 = _mm_packs_epi32(ix0, ix1);
3464 iy0 = _mm_packs_epi32(iy0, iy1);
3465 ix1 = _mm_unpacklo_epi16(ix0, iy0);
3466 iy1 = _mm_unpackhi_epi16(ix0, iy0);
3467 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
3468 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
3473 for( ; x1 < bcols; x1++ )
3475 XY[x1*2] = saturate_cast<short>(sX[x1]);
3476 XY[x1*2+1] = saturate_cast<short>(sY[x1]);
3480 nnfunc( *src, dpart, bufxy, borderType, borderValue );
3484 Mat bufa(_bufa, Rect(0, 0, bcols, brows));
3485 for( y1 = 0; y1 < brows; y1++ )
3487 short* XY = (short*)(bufxy.data + bufxy.step*y1);
3488 ushort* A = (ushort*)(bufa.data + bufa.step*y1);
3490 if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
3492 bufxy = (*m1)(Rect(x, y, bcols, brows));
3494 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
3495 for( x1 = 0; x1 < bcols; x1++ )
3496 A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
3498 else if( planar_input )
3500 const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
3501 const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
3507 __m128 scale = _mm_set1_ps((float)INTER_TAB_SIZE);
3508 __m128i mask = _mm_set1_epi32(INTER_TAB_SIZE-1);
3509 for( ; x1 <= bcols - 8; x1 += 8 )
3511 __m128 fx0 = _mm_loadu_ps(sX + x1);
3512 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
3513 __m128 fy0 = _mm_loadu_ps(sY + x1);
3514 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
3515 __m128i ix0 = _mm_cvtps_epi32(_mm_mul_ps(fx0, scale));
3516 __m128i ix1 = _mm_cvtps_epi32(_mm_mul_ps(fx1, scale));
3517 __m128i iy0 = _mm_cvtps_epi32(_mm_mul_ps(fy0, scale));
3518 __m128i iy1 = _mm_cvtps_epi32(_mm_mul_ps(fy1, scale));
3519 __m128i mx0 = _mm_and_si128(ix0, mask);
3520 __m128i mx1 = _mm_and_si128(ix1, mask);
3521 __m128i my0 = _mm_and_si128(iy0, mask);
3522 __m128i my1 = _mm_and_si128(iy1, mask);
3523 mx0 = _mm_packs_epi32(mx0, mx1);
3524 my0 = _mm_packs_epi32(my0, my1);
3525 my0 = _mm_slli_epi16(my0, INTER_BITS);
3526 mx0 = _mm_or_si128(mx0, my0);
3527 _mm_storeu_si128((__m128i*)(A + x1), mx0);
3528 ix0 = _mm_srai_epi32(ix0, INTER_BITS);
3529 ix1 = _mm_srai_epi32(ix1, INTER_BITS);
3530 iy0 = _mm_srai_epi32(iy0, INTER_BITS);
3531 iy1 = _mm_srai_epi32(iy1, INTER_BITS);
3532 ix0 = _mm_packs_epi32(ix0, ix1);
3533 iy0 = _mm_packs_epi32(iy0, iy1);
3534 ix1 = _mm_unpacklo_epi16(ix0, iy0);
3535 iy1 = _mm_unpackhi_epi16(ix0, iy0);
3536 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
3537 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
3542 for( ; x1 < bcols; x1++ )
3544 int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
3545 int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
3546 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
3547 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
3548 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
3554 const float* sXY = (const float*)(m1->data + m1->step*(y+y1)) + x*2;
3556 for( x1 = 0; x1 < bcols; x1++ )
3558 int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
3559 int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
3560 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
3561 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
3562 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
3567 ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
3586 static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, InputArray _map2,
3587 int interpolation, int borderType, const Scalar& borderValue)
3589 int cn = _src.channels(), type = _src.type(), depth = _src.depth();
3591 if (borderType == BORDER_TRANSPARENT || cn == 3 || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
3592 || _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
3595 UMat src = _src.getUMat(), map1 = _map1.getUMat(), map2 = _map2.getUMat();
3597 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.empty())) ||
3598 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.empty())) )
3600 if (map1.type() != CV_16SC2)
3601 std::swap(map1, map2);
3604 CV_Assert( map1.type() == CV_32FC2 || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
3606 _dst.create(map1.size(), type);
3607 UMat dst = _dst.getUMat();
3609 String kernelName = "remap";
3610 if (map1.type() == CV_32FC2 && map2.empty())
3611 kernelName += "_32FC2";
3612 else if (map1.type() == CV_16SC2)
3614 kernelName += "_16SC2";
3616 kernelName += "_16UC1";
3618 else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
3619 kernelName += "_2_32FC1";
3621 CV_Error(Error::StsBadArg, "Unsupported map types");
3623 static const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
3624 static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
3625 "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
3626 String buildOptions = format("-D %s -D %s -D T=%s", interMap[interpolation], borderMap[borderType], ocl::typeToStr(type));
3628 if (interpolation != INTER_NEAREST)
3631 int wdepth = std::max(CV_32F, dst.depth());
3632 buildOptions = buildOptions
3633 + format(" -D WT=%s -D convertToT=%s -D convertToWT=%s"
3634 " -D convertToWT2=%s -D WT2=%s",
3635 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
3636 ocl::convertTypeStr(wdepth, depth, cn, cvt[0]),
3637 ocl::convertTypeStr(depth, wdepth, cn, cvt[1]),
3638 ocl::convertTypeStr(CV_32S, wdepth, 2, cvt[2]),
3639 ocl::typeToStr(CV_MAKE_TYPE(wdepth, 2)));
3642 ocl::Kernel k(kernelName.c_str(), ocl::imgproc::remap_oclsrc, buildOptions);
3644 Mat scalar(1, 1, type, borderValue);
3645 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst),
3646 map1arg = ocl::KernelArg::ReadOnlyNoSize(map1),
3647 scalararg = ocl::KernelArg::Constant((void*)scalar.data, scalar.elemSize());
3650 k.args(srcarg, dstarg, map1arg, scalararg);
3652 k.args(srcarg, dstarg, map1arg, ocl::KernelArg::ReadOnlyNoSize(map2), scalararg);
3654 size_t globalThreads[2] = { dst.cols, dst.rows };
3655 return k.run(2, globalThreads, NULL, false);
3662 void cv::remap( InputArray _src, OutputArray _dst,
3663 InputArray _map1, InputArray _map2,
3664 int interpolation, int borderType, const Scalar& borderValue )
3666 static RemapNNFunc nn_tab[] =
3668 remapNearest<uchar>, remapNearest<schar>, remapNearest<ushort>, remapNearest<short>,
3669 remapNearest<int>, remapNearest<float>, remapNearest<double>, 0
3672 static RemapFunc linear_tab[] =
3674 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
3675 remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
3676 remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
3677 remapBilinear<Cast<float, float>, RemapNoVec, float>,
3678 remapBilinear<Cast<double, double>, RemapNoVec, float>, 0
3681 static RemapFunc cubic_tab[] =
3683 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
3684 remapBicubic<Cast<float, ushort>, float, 1>,
3685 remapBicubic<Cast<float, short>, float, 1>, 0,
3686 remapBicubic<Cast<float, float>, float, 1>,
3687 remapBicubic<Cast<double, double>, float, 1>, 0
3690 static RemapFunc lanczos4_tab[] =
3692 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
3693 remapLanczos4<Cast<float, ushort>, float, 1>,
3694 remapLanczos4<Cast<float, short>, float, 1>, 0,
3695 remapLanczos4<Cast<float, float>, float, 1>,
3696 remapLanczos4<Cast<double, double>, float, 1>, 0
3699 CV_Assert( _map1.size().area() > 0 );
3700 CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
3702 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
3703 ocl_remap(_src, _dst, _map1, _map2, interpolation, borderType, borderValue))
3705 Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
3706 _dst.create( map1.size(), src.type() );
3707 Mat dst = _dst.getMat();
3708 if( dst.data == src.data )
3711 int depth = src.depth();
3712 RemapNNFunc nnfunc = 0;
3713 RemapFunc ifunc = 0;
3714 const void* ctab = 0;
3715 bool fixpt = depth == CV_8U;
3716 bool planar_input = false;
3718 if( interpolation == INTER_NEAREST )
3720 nnfunc = nn_tab[depth];
3721 CV_Assert( nnfunc != 0 );
3725 if( interpolation == INTER_AREA )
3726 interpolation = INTER_LINEAR;
3728 if( interpolation == INTER_LINEAR )
3729 ifunc = linear_tab[depth];
3730 else if( interpolation == INTER_CUBIC )
3731 ifunc = cubic_tab[depth];
3732 else if( interpolation == INTER_LANCZOS4 )
3733 ifunc = lanczos4_tab[depth];
3735 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
3736 CV_Assert( ifunc != 0 );
3737 ctab = initInterTab2D( interpolation, fixpt );
3740 const Mat *m1 = &map1, *m2 = &map2;
3742 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || !map2.data)) ||
3743 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || !map1.data)) )
3745 if( map1.type() != CV_16SC2 )
3750 CV_Assert( ((map1.type() == CV_32FC2 || map1.type() == CV_16SC2) && !map2.data) ||
3751 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
3752 planar_input = map1.channels() == 1;
3755 RemapInvoker invoker(src, dst, m1, m2,
3756 borderType, borderValue, planar_input, nnfunc, ifunc,
3758 parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
3762 void cv::convertMaps( InputArray _map1, InputArray _map2,
3763 OutputArray _dstmap1, OutputArray _dstmap2,
3764 int dstm1type, bool nninterpolate )
3766 Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
3767 Size size = map1.size();
3768 const Mat *m1 = &map1, *m2 = &map2;
3769 int m1type = m1->type(), m2type = m2->type();
3771 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
3772 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
3773 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
3774 (m1type == CV_32FC2 && !m2->data) );
3776 if( m2type == CV_16SC2 )
3778 std::swap( m1, m2 );
3779 std::swap( m1type, m2type );
3782 if( dstm1type <= 0 )
3783 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
3784 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
3785 _dstmap1.create( size, dstm1type );
3786 dstmap1 = _dstmap1.getMat();
3788 if( !nninterpolate && dstm1type != CV_32FC2 )
3790 _dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
3791 dstmap2 = _dstmap2.getMat();
3796 if( m1type == dstm1type || (nninterpolate &&
3797 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
3798 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
3800 m1->convertTo( dstmap1, dstmap1.type() );
3801 if( dstmap2.data && dstmap2.type() == m2->type() )
3802 m2->copyTo( dstmap2 );
3806 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
3808 Mat vdata[] = { *m1, *m2 };
3809 merge( vdata, 2, dstmap1 );
3813 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
3815 Mat mv[] = { dstmap1, dstmap2 };
3820 if( m1->isContinuous() && (!m2->data || m2->isContinuous()) &&
3821 dstmap1.isContinuous() && (!dstmap2.data || dstmap2.isContinuous()) )
3823 size.width *= size.height;
3827 const float scale = 1.f/INTER_TAB_SIZE;
3829 for( y = 0; y < size.height; y++ )
3831 const float* src1f = (const float*)(m1->data + m1->step*y);
3832 const float* src2f = (const float*)(m2->data + m2->step*y);
3833 const short* src1 = (const short*)src1f;
3834 const ushort* src2 = (const ushort*)src2f;
3836 float* dst1f = (float*)(dstmap1.data + dstmap1.step*y);
3837 float* dst2f = (float*)(dstmap2.data + dstmap2.step*y);
3838 short* dst1 = (short*)dst1f;
3839 ushort* dst2 = (ushort*)dst2f;
3841 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
3844 for( x = 0; x < size.width; x++ )
3846 dst1[x*2] = saturate_cast<short>(src1f[x]);
3847 dst1[x*2+1] = saturate_cast<short>(src2f[x]);
3850 for( x = 0; x < size.width; x++ )
3852 int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
3853 int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
3854 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
3855 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
3856 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
3859 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
3862 for( x = 0; x < size.width; x++ )
3864 dst1[x*2] = saturate_cast<short>(src1f[x*2]);
3865 dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
3868 for( x = 0; x < size.width; x++ )
3870 int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
3871 int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
3872 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
3873 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
3874 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
3877 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
3879 for( x = 0; x < size.width; x++ )
3881 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
3882 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
3883 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
3886 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
3888 for( x = 0; x < size.width; x++ )
3890 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
3891 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
3892 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
3896 CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
3904 class warpAffineInvoker :
3905 public ParallelLoopBody
3908 warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
3909 const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) :
3910 ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
3911 borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
3916 virtual void operator() (const Range& range) const
3918 const int BLOCK_SZ = 64;
3919 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
3920 const int AB_BITS = MAX(10, (int)INTER_BITS);
3921 const int AB_SCALE = 1 << AB_BITS;
3922 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
3924 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
3927 int bh0 = std::min(BLOCK_SZ/2, dst.rows);
3928 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
3929 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, dst.rows);
3931 for( y = range.start; y < range.end; y += bh0 )
3933 for( x = 0; x < dst.cols; x += bw0 )
3935 int bw = std::min( bw0, dst.cols - x);
3936 int bh = std::min( bh0, range.end - y);
3938 Mat _XY(bh, bw, CV_16SC2, XY), matA;
3939 Mat dpart(dst, Rect(x, y, bw, bh));
3941 for( y1 = 0; y1 < bh; y1++ )
3943 short* xy = XY + y1*bw*2;
3944 int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
3945 int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
3947 if( interpolation == INTER_NEAREST )
3948 for( x1 = 0; x1 < bw; x1++ )
3950 int X = (X0 + adelta[x+x1]) >> AB_BITS;
3951 int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
3952 xy[x1*2] = saturate_cast<short>(X);
3953 xy[x1*2+1] = saturate_cast<short>(Y);
3957 short* alpha = A + y1*bw;
3962 __m128i fxy_mask = _mm_set1_epi32(INTER_TAB_SIZE - 1);
3963 __m128i XX = _mm_set1_epi32(X0), YY = _mm_set1_epi32(Y0);
3964 for( ; x1 <= bw - 8; x1 += 8 )
3966 __m128i tx0, tx1, ty0, ty1;
3967 tx0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1)), XX);
3968 ty0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1)), YY);
3969 tx1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1 + 4)), XX);
3970 ty1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1 + 4)), YY);
3972 tx0 = _mm_srai_epi32(tx0, AB_BITS - INTER_BITS);
3973 ty0 = _mm_srai_epi32(ty0, AB_BITS - INTER_BITS);
3974 tx1 = _mm_srai_epi32(tx1, AB_BITS - INTER_BITS);
3975 ty1 = _mm_srai_epi32(ty1, AB_BITS - INTER_BITS);
3977 __m128i fx_ = _mm_packs_epi32(_mm_and_si128(tx0, fxy_mask),
3978 _mm_and_si128(tx1, fxy_mask));
3979 __m128i fy_ = _mm_packs_epi32(_mm_and_si128(ty0, fxy_mask),
3980 _mm_and_si128(ty1, fxy_mask));
3981 tx0 = _mm_packs_epi32(_mm_srai_epi32(tx0, INTER_BITS),
3982 _mm_srai_epi32(tx1, INTER_BITS));
3983 ty0 = _mm_packs_epi32(_mm_srai_epi32(ty0, INTER_BITS),
3984 _mm_srai_epi32(ty1, INTER_BITS));
3985 fx_ = _mm_adds_epi16(fx_, _mm_slli_epi16(fy_, INTER_BITS));
3987 _mm_storeu_si128((__m128i*)(xy + x1*2), _mm_unpacklo_epi16(tx0, ty0));
3988 _mm_storeu_si128((__m128i*)(xy + x1*2 + 8), _mm_unpackhi_epi16(tx0, ty0));
3989 _mm_storeu_si128((__m128i*)(alpha + x1), fx_);
3993 for( ; x1 < bw; x1++ )
3995 int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
3996 int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
3997 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3998 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3999 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
4000 (X & (INTER_TAB_SIZE-1)));
4005 if( interpolation == INTER_NEAREST )
4006 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
4009 Mat _matA(bh, bw, CV_16U, A);
4010 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
4019 int interpolation, borderType;
4021 int *adelta, *bdelta;
4025 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4026 class IPPwarpAffineInvoker :
4027 public ParallelLoopBody
4030 IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
4031 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
4036 virtual void operator() (const Range& range) const
4038 IppiSize srcsize = { src.cols, src.rows };
4039 IppiRect srcroi = { 0, 0, src.cols, src.rows };
4040 IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
4041 int cnn = src.channels();
4042 if( borderType == BORDER_CONSTANT )
4044 IppiSize setSize = { dst.cols, range.end - range.start };
4045 void *dataPointer = dst.data + dst.step[0] * range.start;
4046 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
4052 if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
4058 double (&coeffs)[2][3];
4062 ippiWarpAffineBackFunc func;
4064 const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
4070 enum { OCL_OP_PERSPECTIVE = 1, OCL_OP_AFFINE = 0 };
4072 static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
4073 Size dsize, int flags, int borderType, const Scalar& borderValue,
4076 CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
4078 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
4079 double doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
4081 int interpolation = flags & INTER_MAX;
4082 if( interpolation == INTER_AREA )
4083 interpolation = INTER_LINEAR;
4085 if ( !(borderType == cv::BORDER_CONSTANT &&
4086 (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)) ||
4087 (!doubleSupport && depth == CV_64F) || cn > 4)
4090 const char * const interpolationMap[3] = { "NEAREST", "LINEAR", "CUBIC" };
4091 ocl::ProgramSource program = op_type == OCL_OP_AFFINE ?
4092 ocl::imgproc::warp_affine_oclsrc : ocl::imgproc::warp_perspective_oclsrc;
4093 const char * const kernelName = op_type == OCL_OP_AFFINE ? "warpAffine" : "warpPerspective";
4095 int scalarcn = cn == 3 ? 4 : cn;
4096 int wdepth = interpolation == INTER_NEAREST ? depth : std::max(CV_32S, depth);
4097 int sctype = CV_MAKETYPE(wdepth, scalarcn);
4101 if (interpolation == INTER_NEAREST)
4103 opts = format("-D INTER_NEAREST -D T=%s%s -D T1=%s -D ST=%s -D cn=%d", ocl::typeToStr(type),
4104 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
4105 ocl::typeToStr(CV_MAT_DEPTH(type)),
4106 ocl::typeToStr(sctype),
4112 opts = format("-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D depth=%d -D convertToWT=%s -D convertToT=%s%s -D cn=%d",
4113 interpolationMap[interpolation], ocl::typeToStr(type),
4114 ocl::typeToStr(CV_MAT_DEPTH(type)),
4115 ocl::typeToStr(sctype),
4116 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), depth,
4117 ocl::convertTypeStr(depth, wdepth, cn, cvt[0]),
4118 ocl::convertTypeStr(wdepth, depth, cn, cvt[1]),
4119 doubleSupport ? " -D DOUBLE_SUPPORT" : "", cn);
4122 k.create(kernelName, program, opts);
4126 double borderBuf[] = {0, 0, 0, 0};
4127 scalarToRawData(borderValue, borderBuf, sctype);
4129 UMat src = _src.getUMat(), M0;
4130 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
4131 UMat dst = _dst.getUMat();
4134 int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
4135 Mat matM(matRows, 3, CV_64F, M), M1 = _M0.getMat();
4136 CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) &&
4137 M1.rows == matRows && M1.cols == 3 );
4138 M1.convertTo(matM, matM.type());
4140 if( !(flags & WARP_INVERSE_MAP) )
4142 if (op_type == OCL_OP_PERSPECTIVE)
4146 double D = M[0]*M[4] - M[1]*M[3];
4147 D = D != 0 ? 1./D : 0;
4148 double A11 = M[4]*D, A22=M[0]*D;
4149 M[0] = A11; M[1] *= -D;
4150 M[3] *= -D; M[4] = A22;
4151 double b1 = -M[0]*M[2] - M[1]*M[5];
4152 double b2 = -M[3]*M[2] - M[4]*M[5];
4153 M[2] = b1; M[5] = b2;
4156 matM.convertTo(M0, doubleSupport ? CV_64F : CV_32F);
4158 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
4159 ocl::KernelArg(0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
4161 size_t globalThreads[2] = { dst.cols, dst.rows };
4162 return k.run(2, globalThreads, NULL, false);
4170 void cv::warpAffine( InputArray _src, OutputArray _dst,
4171 InputArray _M0, Size dsize,
4172 int flags, int borderType, const Scalar& borderValue )
4174 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
4175 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType,
4176 borderValue, OCL_OP_AFFINE))
4178 Mat src = _src.getMat(), M0 = _M0.getMat();
4179 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
4180 Mat dst = _dst.getMat();
4181 CV_Assert( src.cols > 0 && src.rows > 0 );
4182 if( dst.data == src.data )
4186 Mat matM(2, 3, CV_64F, M);
4187 int interpolation = flags & INTER_MAX;
4188 if( interpolation == INTER_AREA )
4189 interpolation = INTER_LINEAR;
4191 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
4192 M0.convertTo(matM, matM.type());
4194 #ifdef HAVE_TEGRA_OPTIMIZATION
4195 if( tegra::warpAffine(src, dst, M, flags, borderType, borderValue) )
4199 if( !(flags & WARP_INVERSE_MAP) )
4201 double D = M[0]*M[4] - M[1]*M[3];
4202 D = D != 0 ? 1./D : 0;
4203 double A11 = M[4]*D, A22=M[0]*D;
4204 M[0] = A11; M[1] *= -D;
4205 M[3] *= -D; M[4] = A22;
4206 double b1 = -M[0]*M[2] - M[1]*M[5];
4207 double b2 = -M[3]*M[2] - M[4]*M[5];
4208 M[2] = b1; M[5] = b2;
4212 AutoBuffer<int> _abdelta(dst.cols*2);
4213 int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
4214 const int AB_BITS = MAX(10, (int)INTER_BITS);
4215 const int AB_SCALE = 1 << AB_BITS;
4217 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4218 int depth = src.depth();
4219 int channels = src.channels();
4220 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
4221 ( channels == 1 || channels == 3 || channels == 4 ) &&
4222 ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
4224 int type = src.type();
4225 ippiWarpAffineBackFunc ippFunc =
4226 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
4227 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
4228 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
4229 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
4230 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
4231 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
4232 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
4233 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
4234 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
4237 flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
4238 flags == INTER_NEAREST ? IPPI_INTER_NN :
4239 flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
4241 if( mode && ippFunc )
4243 double coeffs[2][3];
4244 for( int i = 0; i < 2; i++ )
4246 for( int j = 0; j < 3; j++ )
4248 coeffs[i][j] = matM.at<double>(i, j);
4252 Range range(0, dst.rows);
4253 IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
4254 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4261 for( x = 0; x < dst.cols; x++ )
4263 adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
4264 bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
4267 Range range(0, dst.rows);
4268 warpAffineInvoker invoker(src, dst, interpolation, borderType,
4269 borderValue, adelta, bdelta, M);
4270 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4277 class warpPerspectiveInvoker :
4278 public ParallelLoopBody
4282 warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
4283 int _borderType, const Scalar &_borderValue) :
4284 ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
4285 borderType(_borderType), borderValue(_borderValue)
4289 virtual void operator() (const Range& range) const
4291 const int BLOCK_SZ = 32;
4292 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
4293 int x, y, x1, y1, width = dst.cols, height = dst.rows;
4295 int bh0 = std::min(BLOCK_SZ/2, height);
4296 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
4297 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
4299 for( y = range.start; y < range.end; y += bh0 )
4301 for( x = 0; x < width; x += bw0 )
4303 int bw = std::min( bw0, width - x);
4304 int bh = std::min( bh0, range.end - y); // height
4306 Mat _XY(bh, bw, CV_16SC2, XY), matA;
4307 Mat dpart(dst, Rect(x, y, bw, bh));
4309 for( y1 = 0; y1 < bh; y1++ )
4311 short* xy = XY + y1*bw*2;
4312 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
4313 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
4314 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
4316 if( interpolation == INTER_NEAREST )
4317 for( x1 = 0; x1 < bw; x1++ )
4319 double W = W0 + M[6]*x1;
4321 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
4322 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
4323 int X = saturate_cast<int>(fX);
4324 int Y = saturate_cast<int>(fY);
4326 xy[x1*2] = saturate_cast<short>(X);
4327 xy[x1*2+1] = saturate_cast<short>(Y);
4331 short* alpha = A + y1*bw;
4332 for( x1 = 0; x1 < bw; x1++ )
4334 double W = W0 + M[6]*x1;
4335 W = W ? INTER_TAB_SIZE/W : 0;
4336 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
4337 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
4338 int X = saturate_cast<int>(fX);
4339 int Y = saturate_cast<int>(fY);
4341 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
4342 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
4343 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
4344 (X & (INTER_TAB_SIZE-1)));
4349 if( interpolation == INTER_NEAREST )
4350 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
4353 Mat _matA(bh, bw, CV_16U, A);
4354 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
4364 int interpolation, borderType;
4368 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4369 class IPPwarpPerspectiveInvoker :
4370 public ParallelLoopBody
4373 IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
4374 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
4379 virtual void operator() (const Range& range) const
4381 IppiSize srcsize = {src.cols, src.rows};
4382 IppiRect srcroi = {0, 0, src.cols, src.rows};
4383 IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
4384 int cnn = src.channels();
4386 if( borderType == BORDER_CONSTANT )
4388 IppiSize setSize = {dst.cols, range.end - range.start};
4389 void *dataPointer = dst.data + dst.step[0] * range.start;
4390 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
4396 if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
4402 double (&coeffs)[3][3];
4405 const Scalar borderValue;
4406 ippiWarpPerspectiveBackFunc func;
4408 const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
4414 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
4415 Size dsize, int flags, int borderType, const Scalar& borderValue )
4417 CV_Assert( _src.total() > 0 );
4419 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
4420 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType, borderValue,
4421 OCL_OP_PERSPECTIVE))
4423 Mat src = _src.getMat(), M0 = _M0.getMat();
4424 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
4425 Mat dst = _dst.getMat();
4427 if( dst.data == src.data )
4431 Mat matM(3, 3, CV_64F, M);
4432 int interpolation = flags & INTER_MAX;
4433 if( interpolation == INTER_AREA )
4434 interpolation = INTER_LINEAR;
4436 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
4437 M0.convertTo(matM, matM.type());
4439 #ifdef HAVE_TEGRA_OPTIMIZATION
4440 if( tegra::warpPerspective(src, dst, M, flags, borderType, borderValue) )
4444 if( !(flags & WARP_INVERSE_MAP) )
4447 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4448 int depth = src.depth();
4449 int channels = src.channels();
4450 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
4451 ( channels == 1 || channels == 3 || channels == 4 ) &&
4452 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
4454 int type = src.type();
4455 ippiWarpPerspectiveBackFunc ippFunc =
4456 type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
4457 type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
4458 type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
4459 type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
4460 type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
4461 type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
4462 type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
4463 type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
4464 type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
4467 flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
4468 flags == INTER_NEAREST ? IPPI_INTER_NN :
4469 flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
4471 if( mode && ippFunc )
4473 double coeffs[3][3];
4474 for( int i = 0; i < 3; i++ )
4476 for( int j = 0; j < 3; j++ )
4478 coeffs[i][j] = matM.at<double>(i, j);
4482 Range range(0, dst.rows);
4483 IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
4484 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4491 Range range(0, dst.rows);
4492 warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
4493 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4497 cv::Mat cv::getRotationMatrix2D( Point2f center, double angle, double scale )
4500 double alpha = cos(angle)*scale;
4501 double beta = sin(angle)*scale;
4503 Mat M(2, 3, CV_64F);
4504 double* m = (double*)M.data;
4508 m[2] = (1-alpha)*center.x - beta*center.y;
4511 m[5] = beta*center.x + (1-alpha)*center.y;
4516 /* Calculates coefficients of perspective transformation
4517 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
4519 * c00*xi + c01*yi + c02
4520 * ui = ---------------------
4521 * c20*xi + c21*yi + c22
4523 * c10*xi + c11*yi + c12
4524 * vi = ---------------------
4525 * c20*xi + c21*yi + c22
4527 * Coefficients are calculated by solving linear system:
4528 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
4529 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
4530 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
4531 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
4532 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
4533 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
4534 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
4535 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
4538 * cij - matrix coefficients, c22 = 1
4540 cv::Mat cv::getPerspectiveTransform( const Point2f src[], const Point2f dst[] )
4542 Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.data);
4543 double a[8][8], b[8];
4544 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
4546 for( int i = 0; i < 4; ++i )
4548 a[i][0] = a[i+4][3] = src[i].x;
4549 a[i][1] = a[i+4][4] = src[i].y;
4550 a[i][2] = a[i+4][5] = 1;
4551 a[i][3] = a[i][4] = a[i][5] =
4552 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
4553 a[i][6] = -src[i].x*dst[i].x;
4554 a[i][7] = -src[i].y*dst[i].x;
4555 a[i+4][6] = -src[i].x*dst[i].y;
4556 a[i+4][7] = -src[i].y*dst[i].y;
4561 solve( A, B, X, DECOMP_SVD );
4562 ((double*)M.data)[8] = 1.;
4567 /* Calculates coefficients of affine transformation
4568 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
4570 * ui = c00*xi + c01*yi + c02
4572 * vi = c10*xi + c11*yi + c12
4574 * Coefficients are calculated by solving linear system:
4575 * / x0 y0 1 0 0 0 \ /c00\ /u0\
4576 * | x1 y1 1 0 0 0 | |c01| |u1|
4577 * | x2 y2 1 0 0 0 | |c02| |u2|
4578 * | 0 0 0 x0 y0 1 | |c10| |v0|
4579 * | 0 0 0 x1 y1 1 | |c11| |v1|
4580 * \ 0 0 0 x2 y2 1 / |c12| |v2|
4583 * cij - matrix coefficients
4586 cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
4588 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.data);
4589 double a[6*6], b[6];
4590 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
4592 for( int i = 0; i < 3; i++ )
4596 a[j] = a[k+3] = src[i].x;
4597 a[j+1] = a[k+4] = src[i].y;
4598 a[j+2] = a[k+5] = 1;
4599 a[j+3] = a[j+4] = a[j+5] = 0;
4600 a[k] = a[k+1] = a[k+2] = 0;
4602 b[i*2+1] = dst[i].y;
4609 void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
4611 Mat matM = _matM.getMat();
4612 CV_Assert(matM.rows == 2 && matM.cols == 3);
4613 __iM.create(2, 3, matM.type());
4614 Mat _iM = __iM.getMat();
4616 if( matM.type() == CV_32F )
4618 const float* M = (const float*)matM.data;
4619 float* iM = (float*)_iM.data;
4620 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
4622 double D = M[0]*M[step+1] - M[1]*M[step];
4623 D = D != 0 ? 1./D : 0;
4624 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
4625 double b1 = -A11*M[2] - A12*M[step+2];
4626 double b2 = -A21*M[2] - A22*M[step+2];
4628 iM[0] = (float)A11; iM[1] = (float)A12; iM[2] = (float)b1;
4629 iM[istep] = (float)A21; iM[istep+1] = (float)A22; iM[istep+2] = (float)b2;
4631 else if( matM.type() == CV_64F )
4633 const double* M = (const double*)matM.data;
4634 double* iM = (double*)_iM.data;
4635 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
4637 double D = M[0]*M[step+1] - M[1]*M[step];
4638 D = D != 0 ? 1./D : 0;
4639 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
4640 double b1 = -A11*M[2] - A12*M[step+2];
4641 double b2 = -A21*M[2] - A22*M[step+2];
4643 iM[0] = A11; iM[1] = A12; iM[2] = b1;
4644 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
4647 CV_Error( CV_StsUnsupportedFormat, "" );
4650 cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst)
4652 Mat src = _src.getMat(), dst = _dst.getMat();
4653 CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
4654 return getPerspectiveTransform((const Point2f*)src.data, (const Point2f*)dst.data);
4657 cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
4659 Mat src = _src.getMat(), dst = _dst.getMat();
4660 CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
4661 return getAffineTransform((const Point2f*)src.data, (const Point2f*)dst.data);
4665 cvResize( const CvArr* srcarr, CvArr* dstarr, int method )
4667 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4668 CV_Assert( src.type() == dst.type() );
4669 cv::resize( src, dst, dst.size(), (double)dst.cols/src.cols,
4670 (double)dst.rows/src.rows, method );
4675 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
4676 int flags, CvScalar fillval )
4678 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4679 cv::Mat matrix = cv::cvarrToMat(marr);
4680 CV_Assert( src.type() == dst.type() );
4681 cv::warpAffine( src, dst, matrix, dst.size(), flags,
4682 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
4687 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
4688 int flags, CvScalar fillval )
4690 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4691 cv::Mat matrix = cv::cvarrToMat(marr);
4692 CV_Assert( src.type() == dst.type() );
4693 cv::warpPerspective( src, dst, matrix, dst.size(), flags,
4694 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
4699 cvRemap( const CvArr* srcarr, CvArr* dstarr,
4700 const CvArr* _mapx, const CvArr* _mapy,
4701 int flags, CvScalar fillval )
4703 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
4704 cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
4705 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
4706 cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
4707 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
4709 CV_Assert( dst0.data == dst.data );
4714 cv2DRotationMatrix( CvPoint2D32f center, double angle,
4715 double scale, CvMat* matrix )
4717 cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
4718 CV_Assert( M.size() == M0.size() );
4719 M.convertTo(M0, M0.type());
4725 cvGetPerspectiveTransform( const CvPoint2D32f* src,
4726 const CvPoint2D32f* dst,
4729 cv::Mat M0 = cv::cvarrToMat(matrix),
4730 M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
4731 CV_Assert( M.size() == M0.size() );
4732 M.convertTo(M0, M0.type());
4738 cvGetAffineTransform( const CvPoint2D32f* src,
4739 const CvPoint2D32f* dst,
4742 cv::Mat M0 = cv::cvarrToMat(matrix),
4743 M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
4744 CV_Assert( M.size() == M0.size() );
4745 M.convertTo(M0, M0.type());
4751 cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
4753 cv::Mat map1 = cv::cvarrToMat(arr1), map2;
4754 cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
4757 map2 = cv::cvarrToMat(arr2);
4760 dstmap2 = cv::cvarrToMat(dstarr2);
4761 if( dstmap2.type() == CV_16SC1 )
4762 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.data, dstmap2.step);
4765 cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
4768 /****************************************************************************************\
4769 * Log-Polar Transform *
4770 \****************************************************************************************/
4772 /* now it is done via Remap; more correct implementation should use
4773 some super-sampling technique outside of the "fovea" circle */
4775 cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
4776 CvPoint2D32f center, double M, int flags )
4778 cv::Ptr<CvMat> mapx, mapy;
4780 CvMat srcstub, *src = cvGetMat(srcarr, &srcstub);
4781 CvMat dststub, *dst = cvGetMat(dstarr, &dststub);
4782 CvSize ssize, dsize;
4784 if( !CV_ARE_TYPES_EQ( src, dst ))
4785 CV_Error( CV_StsUnmatchedFormats, "" );
4788 CV_Error( CV_StsOutOfRange, "M should be >0" );
4790 ssize = cvGetMatSize(src);
4791 dsize = cvGetMatSize(dst);
4793 mapx.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4794 mapy.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4796 if( !(flags & CV_WARP_INVERSE_MAP) )
4799 cv::AutoBuffer<double> _exp_tab(dsize.width);
4800 double* exp_tab = _exp_tab;
4802 for( rho = 0; rho < dst->width; rho++ )
4803 exp_tab[rho] = std::exp(rho/M);
4805 for( phi = 0; phi < dsize.height; phi++ )
4807 double cp = cos(phi*2*CV_PI/dsize.height);
4808 double sp = sin(phi*2*CV_PI/dsize.height);
4809 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
4810 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
4812 for( rho = 0; rho < dsize.width; rho++ )
4814 double r = exp_tab[rho];
4815 double x = r*cp + center.x;
4816 double y = r*sp + center.y;
4826 CvMat bufx, bufy, bufp, bufa;
4827 double ascale = ssize.height/(2*CV_PI);
4828 cv::AutoBuffer<float> _buf(4*dsize.width);
4831 bufx = cvMat( 1, dsize.width, CV_32F, buf );
4832 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
4833 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
4834 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
4836 for( x = 0; x < dsize.width; x++ )
4837 bufx.data.fl[x] = (float)x - center.x;
4839 for( y = 0; y < dsize.height; y++ )
4841 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
4842 float* my = (float*)(mapy->data.ptr + y*mapy->step);
4844 for( x = 0; x < dsize.width; x++ )
4845 bufy.data.fl[x] = (float)y - center.y;
4848 cvCartToPolar( &bufx, &bufy, &bufp, &bufa );
4850 for( x = 0; x < dsize.width; x++ )
4851 bufp.data.fl[x] += 1.f;
4853 cvLog( &bufp, &bufp );
4855 for( x = 0; x < dsize.width; x++ )
4857 double rho = bufp.data.fl[x]*M;
4858 double phi = bufa.data.fl[x]*ascale;
4864 for( x = 0; x < dsize.width; x++ )
4866 double xx = bufx.data.fl[x];
4867 double yy = bufy.data.fl[x];
4869 double p = log(std::sqrt(xx*xx + yy*yy) + 1.)*M;
4870 double a = atan2(yy,xx);
4882 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
4885 void cv::logPolar( InputArray _src, OutputArray _dst,
4886 Point2f center, double M, int flags )
4888 Mat src = _src.getMat();
4889 _dst.create( src.size(), src.type() );
4890 CvMat c_src = src, c_dst = _dst.getMat();
4891 cvLogPolar( &c_src, &c_dst, center, M, flags );
4894 /****************************************************************************************
4895 Linear-Polar Transform
4896 J.L. Blanco, Apr 2009
4897 ****************************************************************************************/
4899 void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
4900 CvPoint2D32f center, double maxRadius, int flags )
4902 cv::Ptr<CvMat> mapx, mapy;
4904 CvMat srcstub, *src = (CvMat*)srcarr;
4905 CvMat dststub, *dst = (CvMat*)dstarr;
4906 CvSize ssize, dsize;
4908 src = cvGetMat( srcarr, &srcstub,0,0 );
4909 dst = cvGetMat( dstarr, &dststub,0,0 );
4911 if( !CV_ARE_TYPES_EQ( src, dst ))
4912 CV_Error( CV_StsUnmatchedFormats, "" );
4914 ssize.width = src->cols;
4915 ssize.height = src->rows;
4916 dsize.width = dst->cols;
4917 dsize.height = dst->rows;
4919 mapx.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4920 mapy.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4922 if( !(flags & CV_WARP_INVERSE_MAP) )
4926 for( phi = 0; phi < dsize.height; phi++ )
4928 double cp = cos(phi*2*CV_PI/dsize.height);
4929 double sp = sin(phi*2*CV_PI/dsize.height);
4930 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
4931 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
4933 for( rho = 0; rho < dsize.width; rho++ )
4935 double r = maxRadius*(rho+1)/dsize.width;
4936 double x = r*cp + center.x;
4937 double y = r*sp + center.y;
4947 CvMat bufx, bufy, bufp, bufa;
4948 const double ascale = ssize.height/(2*CV_PI);
4949 const double pscale = ssize.width/maxRadius;
4951 cv::AutoBuffer<float> _buf(4*dsize.width);
4954 bufx = cvMat( 1, dsize.width, CV_32F, buf );
4955 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
4956 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
4957 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
4959 for( x = 0; x < dsize.width; x++ )
4960 bufx.data.fl[x] = (float)x - center.x;
4962 for( y = 0; y < dsize.height; y++ )
4964 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
4965 float* my = (float*)(mapy->data.ptr + y*mapy->step);
4967 for( x = 0; x < dsize.width; x++ )
4968 bufy.data.fl[x] = (float)y - center.y;
4970 cvCartToPolar( &bufx, &bufy, &bufp, &bufa, 0 );
4972 for( x = 0; x < dsize.width; x++ )
4973 bufp.data.fl[x] += 1.f;
4975 for( x = 0; x < dsize.width; x++ )
4977 double rho = bufp.data.fl[x]*pscale;
4978 double phi = bufa.data.fl[x]*ascale;
4985 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
4988 void cv::linearPolar( InputArray _src, OutputArray _dst,
4989 Point2f center, double maxRadius, int flags )
4991 Mat src = _src.getMat();
4992 _dst.create( src.size(), src.type() );
4993 CvMat c_src = src, c_dst = _dst.getMat();
4994 cvLinearPolar( &c_src, &c_dst, center, maxRadius, flags );