1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /* ////////////////////////////////////////////////////////////////////
45 // Geometrical transforms on images and matrices: rotation, zoom etc.
49 #include "precomp.hpp"
54 /************** interpolation formulas and tables ***************/
56 const int INTER_RESIZE_COEF_BITS=11;
57 const int INTER_RESIZE_COEF_SCALE=1 << INTER_RESIZE_COEF_BITS;
59 const int INTER_REMAP_COEF_BITS=15;
60 const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
62 static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
64 static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
65 static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
68 static short CV_DECL_ALIGNED(16) BilinearTab_iC4[INTER_TAB_SIZE2][2][8];
71 static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
72 static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
74 static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
75 static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
77 static inline void interpolateLinear( float x, float* coeffs )
83 static inline void interpolateCubic( float x, float* coeffs )
85 const float A = -0.75f;
87 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
88 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
89 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
90 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
93 static inline void interpolateLanczos4( float x, float* coeffs )
95 static const double s45 = 0.70710678118654752440084436210485;
96 static const double cs[][2]=
97 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
100 if( x < FLT_EPSILON )
102 for( int i = 0; i < 8; i++ )
109 double y0=-(x+3)*CV_PI*0.25, s0 = sin(y0), c0=cos(y0);
110 for( i = 0; i < 8; i++ )
112 double y = -(x+3-i)*CV_PI*0.25;
113 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
118 for( i = 0; i < 8; i++ )
122 static void initInterTab1D(int method, float* tab, int tabsz)
124 float scale = 1.f/tabsz;
125 if( method == INTER_LINEAR )
127 for( int i = 0; i < tabsz; i++, tab += 2 )
128 interpolateLinear( i*scale, tab );
130 else if( method == INTER_CUBIC )
132 for( int i = 0; i < tabsz; i++, tab += 4 )
133 interpolateCubic( i*scale, tab );
135 else if( method == INTER_LANCZOS4 )
137 for( int i = 0; i < tabsz; i++, tab += 8 )
138 interpolateLanczos4( i*scale, tab );
141 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
145 static const void* initInterTab2D( int method, bool fixpt )
147 static bool inittab[INTER_MAX+1] = {false};
151 if( method == INTER_LINEAR )
152 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
153 else if( method == INTER_CUBIC )
154 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
155 else if( method == INTER_LANCZOS4 )
156 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
158 CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
160 if( !inittab[method] )
162 AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
164 initInterTab1D(method, _tab, INTER_TAB_SIZE);
165 for( i = 0; i < INTER_TAB_SIZE; i++ )
166 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
169 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
170 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
172 for( k1 = 0; k1 < ksize; k1++ )
174 float vy = _tab[i*ksize + k1];
175 for( k2 = 0; k2 < ksize; k2++ )
177 float v = vy*_tab[j*ksize + k2];
178 tab[k1*ksize + k2] = v;
179 isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
183 if( isum != INTER_REMAP_COEF_SCALE )
185 int diff = isum - INTER_REMAP_COEF_SCALE;
186 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
187 for( k1 = ksize2; k1 < ksize2+2; k1++ )
188 for( k2 = ksize2; k2 < ksize2+2; k2++ )
190 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
192 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
196 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
198 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
201 tab -= INTER_TAB_SIZE2*ksize*ksize;
202 itab -= INTER_TAB_SIZE2*ksize*ksize;
204 if( method == INTER_LINEAR )
206 for( i = 0; i < INTER_TAB_SIZE2; i++ )
207 for( j = 0; j < 4; j++ )
209 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
210 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
211 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
212 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
216 inittab[method] = true;
218 return fixpt ? (const void*)itab : (const void*)tab;
222 template<typename ST, typename DT> struct Cast
227 DT operator()(ST val) const { return saturate_cast<DT>(val); }
230 template<typename ST, typename DT, int bits> struct FixedPtCast
234 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
236 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
239 /****************************************************************************************\
241 \****************************************************************************************/
244 resizeNN( const Mat& src, Mat& dst, double fx, double fy )
246 Size ssize = src.size(), dsize = dst.size();
247 AutoBuffer<int> _x_ofs(dsize.width);
249 int pix_size = (int)src.elemSize();
250 int pix_size4 = (int)(pix_size / sizeof(int));
251 double ifx = 1./fx, ify = 1./fy;
254 for( x = 0; x < dsize.width; x++ )
256 int sx = cvFloor(x*ifx);
257 x_ofs[x] = std::min(sx, ssize.width-1)*pix_size;
260 for( y = 0; y < dsize.height; y++ )
262 uchar* D = dst.data + dst.step*y;
263 int sy = std::min(cvFloor(y*ify), ssize.height-1);
264 const uchar* S = src.data + src.step*sy;
269 for( x = 0; x <= dsize.width - 2; x += 2 )
271 uchar t0 = S[x_ofs[x]];
272 uchar t1 = S[x_ofs[x+1]];
277 for( ; x < dsize.width; x++ )
281 for( x = 0; x < dsize.width; x++ )
282 *(ushort*)(D + x*2) = *(ushort*)(S + x_ofs[x]);
285 for( x = 0; x < dsize.width; x++, D += 3 )
287 const uchar* _tS = S + x_ofs[x];
288 D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
292 for( x = 0; x < dsize.width; x++ )
293 *(int*)(D + x*4) = *(int*)(S + x_ofs[x]);
296 for( x = 0; x < dsize.width; x++, D += 6 )
298 const ushort* _tS = (const ushort*)(S + x_ofs[x]);
299 ushort* _tD = (ushort*)D;
300 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
304 for( x = 0; x < dsize.width; x++, D += 8 )
306 const int* _tS = (const int*)(S + x_ofs[x]);
308 _tD[0] = _tS[0]; _tD[1] = _tS[1];
312 for( x = 0; x < dsize.width; x++, D += 12 )
314 const int* _tS = (const int*)(S + x_ofs[x]);
316 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
320 for( x = 0; x < dsize.width; x++, D += pix_size )
322 const int* _tS = (const int*)(S + x_ofs[x]);
324 for( int k = 0; k < pix_size4; k++ )
334 int operator()(const uchar**, uchar*, const uchar*, int ) const { return 0; }
339 int operator()(const uchar**, uchar**, int, const int*,
340 const uchar*, int, int, int, int, int) const { return 0; }
345 struct VResizeLinearVec_32s8u
347 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
349 if( !checkHardwareSupport(CV_CPU_SSE2) )
352 const int** src = (const int**)_src;
353 const short* beta = (const short*)_beta;
354 const int *S0 = src[0], *S1 = src[1];
356 __m128i b0 = _mm_set1_epi16(beta[0]), b1 = _mm_set1_epi16(beta[1]);
357 __m128i delta = _mm_set1_epi16(2);
359 if( (((size_t)S0|(size_t)S1)&15) == 0 )
360 for( ; x <= width - 16; x += 16 )
362 __m128i x0, x1, x2, y0, y1, y2;
363 x0 = _mm_load_si128((const __m128i*)(S0 + x));
364 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
365 y0 = _mm_load_si128((const __m128i*)(S1 + x));
366 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
367 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
368 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
370 x1 = _mm_load_si128((const __m128i*)(S0 + x + 8));
371 x2 = _mm_load_si128((const __m128i*)(S0 + x + 12));
372 y1 = _mm_load_si128((const __m128i*)(S1 + x + 8));
373 y2 = _mm_load_si128((const __m128i*)(S1 + x + 12));
374 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
375 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
377 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
378 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
380 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
381 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
382 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
385 for( ; x <= width - 16; x += 16 )
387 __m128i x0, x1, x2, y0, y1, y2;
388 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
389 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
390 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
391 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
392 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
393 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
395 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 8));
396 x2 = _mm_loadu_si128((const __m128i*)(S0 + x + 12));
397 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 8));
398 y2 = _mm_loadu_si128((const __m128i*)(S1 + x + 12));
399 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
400 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
402 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
403 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
405 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
406 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
407 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
410 for( ; x < width - 4; x += 4 )
413 x0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S0 + x)), 4);
414 y0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S1 + x)), 4);
415 x0 = _mm_packs_epi32(x0, x0);
416 y0 = _mm_packs_epi32(y0, y0);
417 x0 = _mm_adds_epi16(_mm_mulhi_epi16(x0, b0), _mm_mulhi_epi16(y0, b1));
418 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
419 x0 = _mm_packus_epi16(x0, x0);
420 *(int*)(dst + x) = _mm_cvtsi128_si32(x0);
428 template<int shiftval> struct VResizeLinearVec_32f16
430 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
432 if( !checkHardwareSupport(CV_CPU_SSE2) )
435 const float** src = (const float**)_src;
436 const float* beta = (const float*)_beta;
437 const float *S0 = src[0], *S1 = src[1];
438 ushort* dst = (ushort*)_dst;
441 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
442 __m128i preshift = _mm_set1_epi32(shiftval);
443 __m128i postshift = _mm_set1_epi16((short)shiftval);
445 if( (((size_t)S0|(size_t)S1)&15) == 0 )
446 for( ; x <= width - 16; x += 16 )
448 __m128 x0, x1, y0, y1;
450 x0 = _mm_load_ps(S0 + x);
451 x1 = _mm_load_ps(S0 + x + 4);
452 y0 = _mm_load_ps(S1 + x);
453 y1 = _mm_load_ps(S1 + x + 4);
455 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
456 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
457 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
458 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
459 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
461 x0 = _mm_load_ps(S0 + x + 8);
462 x1 = _mm_load_ps(S0 + x + 12);
463 y0 = _mm_load_ps(S1 + x + 8);
464 y1 = _mm_load_ps(S1 + x + 12);
466 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
467 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
468 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
469 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
470 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
472 _mm_storeu_si128( (__m128i*)(dst + x), t0);
473 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
476 for( ; x <= width - 16; x += 16 )
478 __m128 x0, x1, y0, y1;
480 x0 = _mm_loadu_ps(S0 + x);
481 x1 = _mm_loadu_ps(S0 + x + 4);
482 y0 = _mm_loadu_ps(S1 + x);
483 y1 = _mm_loadu_ps(S1 + x + 4);
485 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
486 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
487 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
488 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
489 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
491 x0 = _mm_loadu_ps(S0 + x + 8);
492 x1 = _mm_loadu_ps(S0 + x + 12);
493 y0 = _mm_loadu_ps(S1 + x + 8);
494 y1 = _mm_loadu_ps(S1 + x + 12);
496 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
497 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
498 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
499 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
500 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
502 _mm_storeu_si128( (__m128i*)(dst + x), t0);
503 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
506 for( ; x < width - 4; x += 4 )
510 x0 = _mm_loadu_ps(S0 + x);
511 y0 = _mm_loadu_ps(S1 + x);
513 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
514 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
515 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t0), postshift);
516 _mm_storel_epi64( (__m128i*)(dst + x), t0);
523 typedef VResizeLinearVec_32f16<SHRT_MIN> VResizeLinearVec_32f16u;
524 typedef VResizeLinearVec_32f16<0> VResizeLinearVec_32f16s;
526 struct VResizeLinearVec_32f
528 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
530 if( !checkHardwareSupport(CV_CPU_SSE) )
533 const float** src = (const float**)_src;
534 const float* beta = (const float*)_beta;
535 const float *S0 = src[0], *S1 = src[1];
536 float* dst = (float*)_dst;
539 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
541 if( (((size_t)S0|(size_t)S1)&15) == 0 )
542 for( ; x <= width - 8; x += 8 )
544 __m128 x0, x1, y0, y1;
545 x0 = _mm_load_ps(S0 + x);
546 x1 = _mm_load_ps(S0 + x + 4);
547 y0 = _mm_load_ps(S1 + x);
548 y1 = _mm_load_ps(S1 + x + 4);
550 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
551 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
553 _mm_storeu_ps( dst + x, x0);
554 _mm_storeu_ps( dst + x + 4, x1);
557 for( ; x <= width - 8; x += 8 )
559 __m128 x0, x1, y0, y1;
560 x0 = _mm_loadu_ps(S0 + x);
561 x1 = _mm_loadu_ps(S0 + x + 4);
562 y0 = _mm_loadu_ps(S1 + x);
563 y1 = _mm_loadu_ps(S1 + x + 4);
565 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
566 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
568 _mm_storeu_ps( dst + x, x0);
569 _mm_storeu_ps( dst + x + 4, x1);
577 struct VResizeCubicVec_32s8u
579 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
581 if( !checkHardwareSupport(CV_CPU_SSE2) )
584 const int** src = (const int**)_src;
585 const short* beta = (const short*)_beta;
586 const int *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
588 float scale = 1.f/(INTER_RESIZE_COEF_SCALE*INTER_RESIZE_COEF_SCALE);
589 __m128 b0 = _mm_set1_ps(beta[0]*scale), b1 = _mm_set1_ps(beta[1]*scale),
590 b2 = _mm_set1_ps(beta[2]*scale), b3 = _mm_set1_ps(beta[3]*scale);
592 if( (((size_t)S0|(size_t)S1|(size_t)S2|(size_t)S3)&15) == 0 )
593 for( ; x <= width - 8; x += 8 )
595 __m128i x0, x1, y0, y1;
596 __m128 s0, s1, f0, f1;
597 x0 = _mm_load_si128((const __m128i*)(S0 + x));
598 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
599 y0 = _mm_load_si128((const __m128i*)(S1 + x));
600 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
602 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
603 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
604 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
605 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
606 s0 = _mm_add_ps(s0, f0);
607 s1 = _mm_add_ps(s1, f1);
609 x0 = _mm_load_si128((const __m128i*)(S2 + x));
610 x1 = _mm_load_si128((const __m128i*)(S2 + x + 4));
611 y0 = _mm_load_si128((const __m128i*)(S3 + x));
612 y1 = _mm_load_si128((const __m128i*)(S3 + x + 4));
614 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
615 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
616 s0 = _mm_add_ps(s0, f0);
617 s1 = _mm_add_ps(s1, f1);
618 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
619 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
620 s0 = _mm_add_ps(s0, f0);
621 s1 = _mm_add_ps(s1, f1);
623 x0 = _mm_cvtps_epi32(s0);
624 x1 = _mm_cvtps_epi32(s1);
626 x0 = _mm_packs_epi32(x0, x1);
627 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
630 for( ; x <= width - 8; x += 8 )
632 __m128i x0, x1, y0, y1;
633 __m128 s0, s1, f0, f1;
634 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
635 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
636 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
637 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
639 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
640 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
641 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
642 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
643 s0 = _mm_add_ps(s0, f0);
644 s1 = _mm_add_ps(s1, f1);
646 x0 = _mm_loadu_si128((const __m128i*)(S2 + x));
647 x1 = _mm_loadu_si128((const __m128i*)(S2 + x + 4));
648 y0 = _mm_loadu_si128((const __m128i*)(S3 + x));
649 y1 = _mm_loadu_si128((const __m128i*)(S3 + x + 4));
651 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
652 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
653 s0 = _mm_add_ps(s0, f0);
654 s1 = _mm_add_ps(s1, f1);
655 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
656 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
657 s0 = _mm_add_ps(s0, f0);
658 s1 = _mm_add_ps(s1, f1);
660 x0 = _mm_cvtps_epi32(s0);
661 x1 = _mm_cvtps_epi32(s1);
663 x0 = _mm_packs_epi32(x0, x1);
664 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
672 template<int shiftval> struct VResizeCubicVec_32f16
674 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
676 if( !checkHardwareSupport(CV_CPU_SSE2) )
679 const float** src = (const float**)_src;
680 const float* beta = (const float*)_beta;
681 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
682 ushort* dst = (ushort*)_dst;
684 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
685 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
686 __m128i preshift = _mm_set1_epi32(shiftval);
687 __m128i postshift = _mm_set1_epi16((short)shiftval);
689 for( ; x <= width - 8; x += 8 )
691 __m128 x0, x1, y0, y1, s0, s1;
693 x0 = _mm_loadu_ps(S0 + x);
694 x1 = _mm_loadu_ps(S0 + x + 4);
695 y0 = _mm_loadu_ps(S1 + x);
696 y1 = _mm_loadu_ps(S1 + x + 4);
698 s0 = _mm_mul_ps(x0, b0);
699 s1 = _mm_mul_ps(x1, b0);
700 y0 = _mm_mul_ps(y0, b1);
701 y1 = _mm_mul_ps(y1, b1);
702 s0 = _mm_add_ps(s0, y0);
703 s1 = _mm_add_ps(s1, y1);
705 x0 = _mm_loadu_ps(S2 + x);
706 x1 = _mm_loadu_ps(S2 + x + 4);
707 y0 = _mm_loadu_ps(S3 + x);
708 y1 = _mm_loadu_ps(S3 + x + 4);
710 x0 = _mm_mul_ps(x0, b2);
711 x1 = _mm_mul_ps(x1, b2);
712 y0 = _mm_mul_ps(y0, b3);
713 y1 = _mm_mul_ps(y1, b3);
714 s0 = _mm_add_ps(s0, x0);
715 s1 = _mm_add_ps(s1, x1);
716 s0 = _mm_add_ps(s0, y0);
717 s1 = _mm_add_ps(s1, y1);
719 t0 = _mm_add_epi32(_mm_cvtps_epi32(s0), preshift);
720 t1 = _mm_add_epi32(_mm_cvtps_epi32(s1), preshift);
722 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t1), postshift);
723 _mm_storeu_si128( (__m128i*)(dst + x), t0);
730 typedef VResizeCubicVec_32f16<SHRT_MIN> VResizeCubicVec_32f16u;
731 typedef VResizeCubicVec_32f16<0> VResizeCubicVec_32f16s;
733 struct VResizeCubicVec_32f
735 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
737 if( !checkHardwareSupport(CV_CPU_SSE) )
740 const float** src = (const float**)_src;
741 const float* beta = (const float*)_beta;
742 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
743 float* dst = (float*)_dst;
745 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
746 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
748 for( ; x <= width - 8; x += 8 )
750 __m128 x0, x1, y0, y1, s0, s1;
751 x0 = _mm_loadu_ps(S0 + x);
752 x1 = _mm_loadu_ps(S0 + x + 4);
753 y0 = _mm_loadu_ps(S1 + x);
754 y1 = _mm_loadu_ps(S1 + x + 4);
756 s0 = _mm_mul_ps(x0, b0);
757 s1 = _mm_mul_ps(x1, b0);
758 y0 = _mm_mul_ps(y0, b1);
759 y1 = _mm_mul_ps(y1, b1);
760 s0 = _mm_add_ps(s0, y0);
761 s1 = _mm_add_ps(s1, y1);
763 x0 = _mm_loadu_ps(S2 + x);
764 x1 = _mm_loadu_ps(S2 + x + 4);
765 y0 = _mm_loadu_ps(S3 + x);
766 y1 = _mm_loadu_ps(S3 + x + 4);
768 x0 = _mm_mul_ps(x0, b2);
769 x1 = _mm_mul_ps(x1, b2);
770 y0 = _mm_mul_ps(y0, b3);
771 y1 = _mm_mul_ps(y1, b3);
772 s0 = _mm_add_ps(s0, x0);
773 s1 = _mm_add_ps(s1, x1);
774 s0 = _mm_add_ps(s0, y0);
775 s1 = _mm_add_ps(s1, y1);
777 _mm_storeu_ps( dst + x, s0);
778 _mm_storeu_ps( dst + x + 4, s1);
785 typedef HResizeNoVec HResizeLinearVec_8u32s;
786 typedef HResizeNoVec HResizeLinearVec_16u32f;
787 typedef HResizeNoVec HResizeLinearVec_16s32f;
788 typedef HResizeNoVec HResizeLinearVec_32f;
789 typedef HResizeNoVec HResizeLinearVec_64f;
793 typedef HResizeNoVec HResizeLinearVec_8u32s;
794 typedef HResizeNoVec HResizeLinearVec_16u32f;
795 typedef HResizeNoVec HResizeLinearVec_16s32f;
796 typedef HResizeNoVec HResizeLinearVec_32f;
798 typedef VResizeNoVec VResizeLinearVec_32s8u;
799 typedef VResizeNoVec VResizeLinearVec_32f16u;
800 typedef VResizeNoVec VResizeLinearVec_32f16s;
801 typedef VResizeNoVec VResizeLinearVec_32f;
803 typedef VResizeNoVec VResizeCubicVec_32s8u;
804 typedef VResizeNoVec VResizeCubicVec_32f16u;
805 typedef VResizeNoVec VResizeCubicVec_32f16s;
806 typedef VResizeNoVec VResizeCubicVec_32f;
811 template<typename T, typename WT, typename AT, int ONE, class VecOp>
814 typedef T value_type;
816 typedef AT alpha_type;
818 void operator()(const T** src, WT** dst, int count,
819 const int* xofs, const AT* alpha,
820 int swidth, int dwidth, int cn, int xmin, int xmax ) const
825 int dx0 = vecOp((const uchar**)src, (uchar**)dst, count,
826 xofs, (const uchar*)alpha, swidth, dwidth, cn, xmin, xmax );
828 for( k = 0; k <= count - 2; k++ )
830 const T *S0 = src[k], *S1 = src[k+1];
831 WT *D0 = dst[k], *D1 = dst[k+1];
832 for( dx = dx0; dx < xmax; dx++ )
835 WT a0 = alpha[dx*2], a1 = alpha[dx*2+1];
836 WT t0 = S0[sx]*a0 + S0[sx + cn]*a1;
837 WT t1 = S1[sx]*a0 + S1[sx + cn]*a1;
838 D0[dx] = t0; D1[dx] = t1;
841 for( ; dx < dwidth; dx++ )
844 D0[dx] = WT(S0[sx]*ONE); D1[dx] = WT(S1[sx]*ONE);
848 for( ; k < count; k++ )
852 for( dx = 0; dx < xmax; dx++ )
855 D[dx] = S[sx]*alpha[dx*2] + S[sx+cn]*alpha[dx*2+1];
858 for( ; dx < dwidth; dx++ )
859 D[dx] = WT(S[xofs[dx]]*ONE);
865 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
868 typedef T value_type;
870 typedef AT alpha_type;
872 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
874 WT b0 = beta[0], b1 = beta[1];
875 const WT *S0 = src[0], *S1 = src[1];
879 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
880 #if CV_ENABLE_UNROLLED
881 for( ; x <= width - 4; x += 4 )
884 t0 = S0[x]*b0 + S1[x]*b1;
885 t1 = S0[x+1]*b0 + S1[x+1]*b1;
886 dst[x] = castOp(t0); dst[x+1] = castOp(t1);
887 t0 = S0[x+2]*b0 + S1[x+2]*b1;
888 t1 = S0[x+3]*b0 + S1[x+3]*b1;
889 dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
892 for( ; x < width; x++ )
893 dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
898 template<typename T, typename WT, typename AT>
901 typedef T value_type;
903 typedef AT alpha_type;
905 void operator()(const T** src, WT** dst, int count,
906 const int* xofs, const AT* alpha,
907 int swidth, int dwidth, int cn, int xmin, int xmax ) const
909 for( int k = 0; k < count; k++ )
913 int dx = 0, limit = xmin;
916 for( ; dx < limit; dx++, alpha += 4 )
918 int j, sx = xofs[dx] - cn;
920 for( j = 0; j < 4; j++ )
923 if( (unsigned)sxj >= (unsigned)swidth )
927 while( sxj >= swidth )
930 v += S[sxj]*alpha[j];
934 if( limit == dwidth )
936 for( ; dx < xmax; dx++, alpha += 4 )
939 D[dx] = S[sx-cn]*alpha[0] + S[sx]*alpha[1] +
940 S[sx+cn]*alpha[2] + S[sx+cn*2]*alpha[3];
950 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
953 typedef T value_type;
955 typedef AT alpha_type;
957 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
959 WT b0 = beta[0], b1 = beta[1], b2 = beta[2], b3 = beta[3];
960 const WT *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
964 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
965 for( ; x < width; x++ )
966 dst[x] = castOp(S0[x]*b0 + S1[x]*b1 + S2[x]*b2 + S3[x]*b3);
971 template<typename T, typename WT, typename AT>
972 struct HResizeLanczos4
974 typedef T value_type;
976 typedef AT alpha_type;
978 void operator()(const T** src, WT** dst, int count,
979 const int* xofs, const AT* alpha,
980 int swidth, int dwidth, int cn, int xmin, int xmax ) const
982 for( int k = 0; k < count; k++ )
986 int dx = 0, limit = xmin;
989 for( ; dx < limit; dx++, alpha += 8 )
991 int j, sx = xofs[dx] - cn*3;
993 for( j = 0; j < 8; j++ )
996 if( (unsigned)sxj >= (unsigned)swidth )
1000 while( sxj >= swidth )
1003 v += S[sxj]*alpha[j];
1007 if( limit == dwidth )
1009 for( ; dx < xmax; dx++, alpha += 8 )
1012 D[dx] = S[sx-cn*3]*alpha[0] + S[sx-cn*2]*alpha[1] +
1013 S[sx-cn]*alpha[2] + S[sx]*alpha[3] +
1014 S[sx+cn]*alpha[4] + S[sx+cn*2]*alpha[5] +
1015 S[sx+cn*3]*alpha[6] + S[sx+cn*4]*alpha[7];
1025 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1026 struct VResizeLanczos4
1028 typedef T value_type;
1029 typedef WT buf_type;
1030 typedef AT alpha_type;
1032 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1036 int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1037 #if CV_ENABLE_UNROLLED
1038 for( ; x <= width - 4; x += 4 )
1041 const WT* S = src[0];
1042 WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
1044 for( k = 1; k < 8; k++ )
1046 b = beta[k]; S = src[k];
1047 s0 += S[x]*b; s1 += S[x+1]*b;
1048 s2 += S[x+2]*b; s3 += S[x+3]*b;
1051 dst[x] = castOp(s0); dst[x+1] = castOp(s1);
1052 dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
1055 for( ; x < width; x++ )
1057 dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
1058 src[2][x]*beta[2] + src[3][x]*beta[3] + src[4][x]*beta[4] +
1059 src[5][x]*beta[5] + src[6][x]*beta[6] + src[7][x]*beta[7]);
1065 static inline int clip(int x, int a, int b)
1067 return x >= a ? (x < b ? x : b-1) : a;
1070 static const int MAX_ESIZE=16;
1072 template<class HResize, class VResize>
1073 static void resizeGeneric_( const Mat& src, Mat& dst,
1074 const int* xofs, const void* _alpha,
1075 const int* yofs, const void* _beta,
1076 int xmin, int xmax, int ksize )
1078 typedef typename HResize::value_type T;
1079 typedef typename HResize::buf_type WT;
1080 typedef typename HResize::alpha_type AT;
1082 const AT* alpha = (const AT*)_alpha;
1083 const AT* beta = (const AT*)_beta;
1084 Size ssize = src.size(), dsize = dst.size();
1085 int cn = src.channels();
1088 int bufstep = (int)alignSize(dsize.width, 16);
1089 AutoBuffer<WT> _buffer(bufstep*ksize);
1090 const T* srows[MAX_ESIZE]={0};
1091 WT* rows[MAX_ESIZE]={0};
1092 int prev_sy[MAX_ESIZE];
1100 for( k = 0; k < ksize; k++ )
1103 rows[k] = (WT*)_buffer + bufstep*k;
1106 // image resize is a separable operation. In case of not too strong
1107 for( dy = 0; dy < dsize.height; dy++, beta += ksize )
1109 int sy0 = yofs[dy], k, k0=ksize, k1=0, ksize2 = ksize/2;
1111 for( k = 0; k < ksize; k++ )
1113 int sy = clip(sy0 - ksize2 + 1 + k, 0, ssize.height);
1114 for( k1 = std::max(k1, k); k1 < ksize; k1++ )
1116 if( sy == prev_sy[k1] ) // if the sy-th row has been computed already, reuse it.
1119 memcpy( rows[k], rows[k1], bufstep*sizeof(rows[0][0]) );
1124 k0 = std::min(k0, k); // remember the first row that needs to be computed
1125 srows[k] = (const T*)(src.data + src.step*sy);
1130 hresize( srows + k0, rows + k0, ksize - k0, xofs, alpha,
1131 ssize.width, dsize.width, cn, xmin, xmax );
1132 vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
1137 template<typename T, typename WT>
1138 static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int* xofs,
1139 int scale_x, int scale_y )
1141 Size ssize = src.size(), dsize = dst.size();
1142 int cn = src.channels();
1144 int area = scale_x*scale_y;
1145 float scale = 1.f/(scale_x*scale_y);
1146 int dwidth1 = (ssize.width/scale_x)*cn;
1150 for( dy = 0; dy < dsize.height; dy++ )
1152 T* D = (T*)(dst.data + dst.step*dy);
1153 int sy0 = dy*scale_y, w = sy0 + scale_y <= ssize.height ? dwidth1 : 0;
1154 if( sy0 >= ssize.height )
1156 for( dx = 0; dx < dsize.width; dx++ )
1161 for( dx = 0; dx < w; dx++ )
1163 const T* S = (const T*)(src.data + src.step*sy0) + xofs[dx];
1166 #if CV_ENABLE_UNROLLED
1167 for( ; k <= area - 4; k += 4 )
1168 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
1170 for( ; k < area; k++ )
1173 D[dx] = saturate_cast<T>(sum*scale);
1176 for( ; dx < dsize.width; dx++ )
1179 int count = 0, sx0 = xofs[dx];
1180 if( sx0 >= ssize.width )
1183 for( int sy = 0; sy < scale_y; sy++ )
1185 if( sy0 + sy >= ssize.height )
1187 const T* S = (const T*)(src.data + src.step*(sy0 + sy)) + sx0;
1188 for( int sx = 0; sx < scale_x*cn; sx += cn )
1190 if( sx0 + sx >= ssize.width )
1197 D[dx] = saturate_cast<T>((float)sum/count);
1202 struct DecimateAlpha
1208 template<typename T, typename WT>
1209 static void resizeArea_( const Mat& src, Mat& dst, const DecimateAlpha* xofs, int xofs_count )
1211 Size ssize = src.size(), dsize = dst.size();
1212 int cn = src.channels();
1214 AutoBuffer<WT> _buffer(dsize.width*2);
1215 WT *buf = _buffer, *sum = buf + dsize.width;
1216 int k, sy, dx, cur_dy = 0;
1217 WT scale_y = (WT)ssize.height/dsize.height;
1219 CV_Assert( cn <= 4 );
1220 for( dx = 0; dx < dsize.width; dx++ )
1221 buf[dx] = sum[dx] = 0;
1223 for( sy = 0; sy < ssize.height; sy++ )
1225 const T* S = (const T*)(src.data + src.step*sy);
1227 for( k = 0; k < xofs_count; k++ )
1229 int dxn = xofs[k].di;
1230 WT alpha = xofs[k].alpha;
1231 buf[dxn] += S[xofs[k].si]*alpha;
1234 for( k = 0; k < xofs_count; k++ )
1236 int sxn = xofs[k].si;
1237 int dxn = xofs[k].di;
1238 WT alpha = xofs[k].alpha;
1239 WT t0 = buf[dxn] + S[sxn]*alpha;
1240 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1241 buf[dxn] = t0; buf[dxn+1] = t1;
1244 for( k = 0; k < xofs_count; k++ )
1246 int sxn = xofs[k].si;
1247 int dxn = xofs[k].di;
1248 WT alpha = xofs[k].alpha;
1249 WT t0 = buf[dxn] + S[sxn]*alpha;
1250 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1251 WT t2 = buf[dxn+2] + S[sxn+2]*alpha;
1252 buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;
1255 for( k = 0; k < xofs_count; k++ )
1257 int sxn = xofs[k].si;
1258 int dxn = xofs[k].di;
1259 WT alpha = xofs[k].alpha;
1260 WT t0 = buf[dxn] + S[sxn]*alpha;
1261 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1262 buf[dxn] = t0; buf[dxn+1] = t1;
1263 t0 = buf[dxn+2] + S[sxn+2]*alpha;
1264 t1 = buf[dxn+3] + S[sxn+3]*alpha;
1265 buf[dxn+2] = t0; buf[dxn+3] = t1;
1268 if( (cur_dy + 1)*scale_y <= sy + 1 || sy == ssize.height - 1 )
1270 WT beta = std::max(sy + 1 - (cur_dy+1)*scale_y, (WT)0);
1271 WT beta1 = 1 - beta;
1272 T* D = (T*)(dst.data + dst.step*cur_dy);
1273 if( fabs(beta) < 1e-3 )
1274 for( dx = 0; dx < dsize.width; dx++ )
1276 D[dx] = saturate_cast<T>(sum[dx] + buf[dx]);
1277 sum[dx] = buf[dx] = 0;
1280 for( dx = 0; dx < dsize.width; dx++ )
1282 D[dx] = saturate_cast<T>(sum[dx] + buf[dx]*beta1);
1283 sum[dx] = buf[dx]*beta;
1290 for( dx = 0; dx <= dsize.width - 2; dx += 2 )
1292 WT t0 = sum[dx] + buf[dx];
1293 WT t1 = sum[dx+1] + buf[dx+1];
1294 sum[dx] = t0; sum[dx+1] = t1;
1295 buf[dx] = buf[dx+1] = 0;
1297 for( ; dx < dsize.width; dx++ )
1307 typedef void (*ResizeFunc)( const Mat& src, Mat& dst,
1308 const int* xofs, const void* alpha,
1309 const int* yofs, const void* beta,
1310 int xmin, int xmax, int ksize );
1312 typedef void (*ResizeAreaFastFunc)( const Mat& src, Mat& dst,
1313 const int* ofs, const int *xofs,
1314 int scale_x, int scale_y );
1316 typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
1317 const DecimateAlpha* xofs, int xofs_count );
1321 //////////////////////////////////////////////////////////////////////////////////////////
1323 void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
1324 double inv_scale_x, double inv_scale_y, int interpolation )
1326 static ResizeFunc linear_tab[] =
1329 HResizeLinear<uchar, int, short,
1330 INTER_RESIZE_COEF_SCALE,
1331 HResizeLinearVec_8u32s>,
1332 VResizeLinear<uchar, int, short,
1333 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1334 VResizeLinearVec_32s8u> >,
1337 HResizeLinear<ushort, float, float, 1,
1338 HResizeLinearVec_16u32f>,
1339 VResizeLinear<ushort, float, float, Cast<float, ushort>,
1340 VResizeLinearVec_32f16u> >,
1342 HResizeLinear<short, float, float, 1,
1343 HResizeLinearVec_16s32f>,
1344 VResizeLinear<short, float, float, Cast<float, short>,
1345 VResizeLinearVec_32f16s> >,
1348 HResizeLinear<float, float, float, 1,
1349 HResizeLinearVec_32f>,
1350 VResizeLinear<float, float, float, Cast<float, float>,
1351 VResizeLinearVec_32f> >,
1353 HResizeLinear<double, double, float, 1,
1355 VResizeLinear<double, double, float, Cast<double, double>,
1360 static ResizeFunc cubic_tab[] =
1363 HResizeCubic<uchar, int, short>,
1364 VResizeCubic<uchar, int, short,
1365 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1366 VResizeCubicVec_32s8u> >,
1369 HResizeCubic<ushort, float, float>,
1370 VResizeCubic<ushort, float, float, Cast<float, ushort>,
1371 VResizeCubicVec_32f16u> >,
1373 HResizeCubic<short, float, float>,
1374 VResizeCubic<short, float, float, Cast<float, short>,
1375 VResizeCubicVec_32f16s> >,
1378 HResizeCubic<float, float, float>,
1379 VResizeCubic<float, float, float, Cast<float, float>,
1380 VResizeCubicVec_32f> >,
1382 HResizeCubic<double, double, float>,
1383 VResizeCubic<double, double, float, Cast<double, double>,
1388 static ResizeFunc lanczos4_tab[] =
1390 resizeGeneric_<HResizeLanczos4<uchar, int, short>,
1391 VResizeLanczos4<uchar, int, short,
1392 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1395 resizeGeneric_<HResizeLanczos4<ushort, float, float>,
1396 VResizeLanczos4<ushort, float, float, Cast<float, ushort>,
1398 resizeGeneric_<HResizeLanczos4<short, float, float>,
1399 VResizeLanczos4<short, float, float, Cast<float, short>,
1402 resizeGeneric_<HResizeLanczos4<float, float, float>,
1403 VResizeLanczos4<float, float, float, Cast<float, float>,
1405 resizeGeneric_<HResizeLanczos4<double, double, float>,
1406 VResizeLanczos4<double, double, float, Cast<double, double>,
1411 static ResizeAreaFastFunc areafast_tab[] =
1413 resizeAreaFast_<uchar, int>, 0,
1414 resizeAreaFast_<ushort, float>,
1415 resizeAreaFast_<short, float>,
1417 resizeAreaFast_<float, float>,
1418 resizeAreaFast_<double, double>,
1422 static ResizeAreaFunc area_tab[] =
1424 resizeArea_<uchar, float>, 0, resizeArea_<ushort, float>, resizeArea_<short, float>,
1425 0, resizeArea_<float, float>, resizeArea_<double, double>, 0
1428 Mat src = _src.getMat();
1429 Size ssize = src.size();
1431 CV_Assert( ssize.area() > 0 );
1432 CV_Assert( !(dsize == Size()) || (inv_scale_x > 0 && inv_scale_y > 0) );
1433 if( dsize == Size() )
1435 dsize = Size(saturate_cast<int>(src.cols*inv_scale_x),
1436 saturate_cast<int>(src.rows*inv_scale_y));
1440 inv_scale_x = (double)dsize.width/src.cols;
1441 inv_scale_y = (double)dsize.height/src.rows;
1443 _dst.create(dsize, src.type());
1444 Mat dst = _dst.getMat();
1447 #ifdef HAVE_TEGRA_OPTIMIZATION
1448 if (tegra::resize(src, dst, inv_scale_x, inv_scale_y, interpolation))
1452 int depth = src.depth(), cn = src.channels();
1453 double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
1454 int k, sx, sy, dx, dy;
1456 if( interpolation == INTER_NEAREST )
1458 resizeNN( src, dst, inv_scale_x, inv_scale_y );
1462 // true "area" interpolation is only implemented for the case (scale_x <= 1 && scale_y <= 1).
1463 // In other cases it is emulated using some variant of bilinear interpolation
1464 if( interpolation == INTER_AREA && scale_x >= 1 && scale_y >= 1 )
1466 int iscale_x = saturate_cast<int>(scale_x);
1467 int iscale_y = saturate_cast<int>(scale_y);
1469 if( std::abs(scale_x - iscale_x) < DBL_EPSILON &&
1470 std::abs(scale_y - iscale_y) < DBL_EPSILON )
1472 int area = iscale_x*iscale_y;
1473 size_t srcstep = src.step / src.elemSize1();
1474 AutoBuffer<int> _ofs(area + dsize.width*cn);
1476 int* xofs = ofs + area;
1477 ResizeAreaFastFunc func = areafast_tab[depth];
1478 CV_Assert( func != 0 );
1480 for( sy = 0, k = 0; sy < iscale_y; sy++ )
1481 for( sx = 0; sx < iscale_x; sx++ )
1482 ofs[k++] = (int)(sy*srcstep + sx*cn);
1484 for( dx = 0; dx < dsize.width; dx++ )
1486 sx = dx*iscale_x*cn;
1487 for( k = 0; k < cn; k++ )
1488 xofs[dx*cn + k] = sx + k;
1491 func( src, dst, ofs, xofs, iscale_x, iscale_y );
1495 ResizeAreaFunc func = area_tab[depth];
1496 CV_Assert( func != 0 && cn <= 4 );
1498 AutoBuffer<DecimateAlpha> _xofs(ssize.width*2);
1499 DecimateAlpha* xofs = _xofs;
1500 double scale = 1.f/(scale_x*scale_y);
1502 for( dx = 0, k = 0; dx < dsize.width; dx++ )
1504 double fsx1 = dx*scale_x, fsx2 = fsx1 + scale_x;
1505 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
1506 sx1 = std::min(sx1, ssize.width-1);
1507 sx2 = std::min(sx2, ssize.width-1);
1511 assert( k < ssize.width*2 );
1513 xofs[k].si = (sx1-1)*cn;
1514 xofs[k++].alpha = (float)((sx1 - fsx1)*scale);
1517 for( sx = sx1; sx < sx2; sx++ )
1519 assert( k < ssize.width*2 );
1522 xofs[k++].alpha = (float)scale;
1525 if( fsx2 - sx2 > 1e-3 )
1527 assert( k < ssize.width*2 );
1529 xofs[k].si = sx2*cn;
1530 xofs[k++].alpha = (float)((fsx2 - sx2)*scale);
1534 func( src, dst, xofs, k );
1538 int xmin = 0, xmax = dsize.width, width = dsize.width*cn;
1539 bool area_mode = interpolation == INTER_AREA;
1540 bool fixpt = depth == CV_8U;
1543 int ksize=0, ksize2;
1544 if( interpolation == INTER_CUBIC )
1545 ksize = 4, func = cubic_tab[depth];
1546 else if( interpolation == INTER_LANCZOS4 )
1547 ksize = 8, func = lanczos4_tab[depth];
1548 else if( interpolation == INTER_LINEAR || interpolation == INTER_AREA )
1549 ksize = 2, func = linear_tab[depth];
1551 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
1554 CV_Assert( func != 0 );
1556 AutoBuffer<uchar> _buffer((width + dsize.height)*(sizeof(int) + sizeof(float)*ksize));
1557 int* xofs = (int*)(uchar*)_buffer;
1558 int* yofs = xofs + width;
1559 float* alpha = (float*)(yofs + dsize.height);
1560 short* ialpha = (short*)alpha;
1561 float* beta = alpha + width*ksize;
1562 short* ibeta = ialpha + width*ksize;
1563 float cbuf[MAX_ESIZE];
1565 for( dx = 0; dx < dsize.width; dx++ )
1569 fx = (float)((dx+0.5)*scale_x - 0.5);
1575 sx = cvFloor(dx*scale_x);
1576 fx = (float)((dx+1) - (sx+1)*inv_scale_x);
1577 fx = fx <= 0 ? 0.f : fx - cvFloor(fx);
1587 if( sx + ksize2 >= ssize.width )
1589 xmax = std::min( xmax, dx );
1590 if( sx >= ssize.width-1 )
1591 fx = 0, sx = ssize.width-1;
1594 for( k = 0, sx *= cn; k < cn; k++ )
1595 xofs[dx*cn + k] = sx + k;
1597 if( interpolation == INTER_CUBIC )
1598 interpolateCubic( fx, cbuf );
1599 else if( interpolation == INTER_LANCZOS4 )
1600 interpolateLanczos4( fx, cbuf );
1608 for( k = 0; k < ksize; k++ )
1609 ialpha[dx*cn*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
1610 for( ; k < cn*ksize; k++ )
1611 ialpha[dx*cn*ksize + k] = ialpha[dx*cn*ksize + k - ksize];
1615 for( k = 0; k < ksize; k++ )
1616 alpha[dx*cn*ksize + k] = cbuf[k];
1617 for( ; k < cn*ksize; k++ )
1618 alpha[dx*cn*ksize + k] = alpha[dx*cn*ksize + k - ksize];
1622 for( dy = 0; dy < dsize.height; dy++ )
1626 fy = (float)((dy+0.5)*scale_y - 0.5);
1632 sy = cvFloor(dy*scale_y);
1633 fy = (float)((dy+1) - (sy+1)*inv_scale_y);
1634 fy = fy <= 0 ? 0.f : fy - cvFloor(fy);
1638 if( interpolation == INTER_CUBIC )
1639 interpolateCubic( fy, cbuf );
1640 else if( interpolation == INTER_LANCZOS4 )
1641 interpolateLanczos4( fy, cbuf );
1650 for( k = 0; k < ksize; k++ )
1651 ibeta[dy*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
1655 for( k = 0; k < ksize; k++ )
1656 beta[dy*ksize + k] = cbuf[k];
1660 func( src, dst, xofs, fixpt ? (void*)ialpha : (void*)alpha, yofs,
1661 fixpt ? (void*)ibeta : (void*)beta, xmin, xmax, ksize );
1665 /****************************************************************************************\
1666 * General warping (affine, perspective, remap) *
1667 \****************************************************************************************/
1672 template<typename T>
1673 static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
1674 int borderType, const Scalar& _borderValue )
1676 Size ssize = _src.size(), dsize = _dst.size();
1677 int cn = _src.channels();
1678 const T* S0 = (const T*)_src.data;
1679 size_t sstep = _src.step/sizeof(S0[0]);
1680 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
1681 saturate_cast<T>(_borderValue[1]),
1682 saturate_cast<T>(_borderValue[2]),
1683 saturate_cast<T>(_borderValue[3]));
1686 unsigned width1 = ssize.width, height1 = ssize.height;
1688 if( _dst.isContinuous() && _xy.isContinuous() )
1690 dsize.width *= dsize.height;
1694 for( dy = 0; dy < dsize.height; dy++ )
1696 T* D = (T*)(_dst.data + _dst.step*dy);
1697 const short* XY = (const short*)(_xy.data + _xy.step*dy);
1701 for( dx = 0; dx < dsize.width; dx++ )
1703 int sx = XY[dx*2], sy = XY[dx*2+1];
1704 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1705 D[dx] = S0[sy*sstep + sx];
1708 if( borderType == BORDER_REPLICATE )
1710 sx = clip(sx, 0, ssize.width);
1711 sy = clip(sy, 0, ssize.height);
1712 D[dx] = S0[sy*sstep + sx];
1714 else if( borderType == BORDER_CONSTANT )
1716 else if( borderType != BORDER_TRANSPARENT )
1718 sx = borderInterpolate(sx, ssize.width, borderType);
1719 sy = borderInterpolate(sy, ssize.height, borderType);
1720 D[dx] = S0[sy*sstep + sx];
1727 for( dx = 0; dx < dsize.width; dx++, D += cn )
1729 int sx = XY[dx*2], sy = XY[dx*2+1], k;
1731 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1735 S = S0 + sy*sstep + sx*3;
1736 D[0] = S[0], D[1] = S[1], D[2] = S[2];
1740 S = S0 + sy*sstep + sx*4;
1741 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
1745 S = S0 + sy*sstep + sx*cn;
1746 for( k = 0; k < cn; k++ )
1750 else if( borderType != BORDER_TRANSPARENT )
1752 if( borderType == BORDER_REPLICATE )
1754 sx = clip(sx, 0, ssize.width);
1755 sy = clip(sy, 0, ssize.height);
1756 S = S0 + sy*sstep + sx*cn;
1758 else if( borderType == BORDER_CONSTANT )
1762 sx = borderInterpolate(sx, ssize.width, borderType);
1763 sy = borderInterpolate(sy, ssize.height, borderType);
1764 S = S0 + sy*sstep + sx*cn;
1766 for( k = 0; k < cn; k++ )
1777 int operator()( const Mat&, void*, const short*, const ushort*,
1778 const void*, int ) const { return 0; }
1785 int operator()( const Mat& _src, void* _dst, const short* XY,
1786 const ushort* FXY, const void* _wtab, int width ) const
1788 int cn = _src.channels();
1790 if( (cn != 1 && cn != 3 && cn != 4) || !checkHardwareSupport(CV_CPU_SSE2) )
1793 const uchar *S0 = _src.data, *S1 = _src.data + _src.step;
1794 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
1795 uchar* D = (uchar*)_dst;
1796 int x = 0, sstep = (int)_src.step;
1797 __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE/2);
1798 __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << 16));
1799 __m128i z = _mm_setzero_si128();
1800 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
1804 for( ; x <= width - 8; x += 8 )
1806 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
1807 __m128i xy1 = _mm_loadu_si128( (const __m128i*)(XY + x*2 + 8));
1808 __m128i v0, v1, v2, v3, a0, a1, b0, b1;
1811 xy0 = _mm_madd_epi16( xy0, xy2ofs );
1812 xy1 = _mm_madd_epi16( xy1, xy2ofs );
1813 _mm_store_si128( (__m128i*)iofs0, xy0 );
1814 _mm_store_si128( (__m128i*)iofs1, xy1 );
1816 i0 = *(ushort*)(S0 + iofs0[0]) + (*(ushort*)(S0 + iofs0[1]) << 16);
1817 i1 = *(ushort*)(S0 + iofs0[2]) + (*(ushort*)(S0 + iofs0[3]) << 16);
1818 v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1819 i0 = *(ushort*)(S1 + iofs0[0]) + (*(ushort*)(S1 + iofs0[1]) << 16);
1820 i1 = *(ushort*)(S1 + iofs0[2]) + (*(ushort*)(S1 + iofs0[3]) << 16);
1821 v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1822 v0 = _mm_unpacklo_epi8(v0, z);
1823 v1 = _mm_unpacklo_epi8(v1, z);
1825 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x]*4)),
1826 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+1]*4)));
1827 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+2]*4)),
1828 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+3]*4)));
1829 b0 = _mm_unpacklo_epi64(a0, a1);
1830 b1 = _mm_unpackhi_epi64(a0, a1);
1831 v0 = _mm_madd_epi16(v0, b0);
1832 v1 = _mm_madd_epi16(v1, b1);
1833 v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta);
1835 i0 = *(ushort*)(S0 + iofs1[0]) + (*(ushort*)(S0 + iofs1[1]) << 16);
1836 i1 = *(ushort*)(S0 + iofs1[2]) + (*(ushort*)(S0 + iofs1[3]) << 16);
1837 v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1838 i0 = *(ushort*)(S1 + iofs1[0]) + (*(ushort*)(S1 + iofs1[1]) << 16);
1839 i1 = *(ushort*)(S1 + iofs1[2]) + (*(ushort*)(S1 + iofs1[3]) << 16);
1840 v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1841 v2 = _mm_unpacklo_epi8(v2, z);
1842 v3 = _mm_unpacklo_epi8(v3, z);
1844 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+4]*4)),
1845 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+5]*4)));
1846 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+6]*4)),
1847 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+7]*4)));
1848 b0 = _mm_unpacklo_epi64(a0, a1);
1849 b1 = _mm_unpackhi_epi64(a0, a1);
1850 v2 = _mm_madd_epi16(v2, b0);
1851 v3 = _mm_madd_epi16(v3, b1);
1852 v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta);
1854 v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS);
1855 v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS);
1856 v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
1857 _mm_storel_epi64( (__m128i*)(D + x), v0 );
1862 for( ; x <= width - 5; x += 4, D += 12 )
1864 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
1865 __m128i u0, v0, u1, v1;
1867 xy0 = _mm_madd_epi16( xy0, xy2ofs );
1868 _mm_store_si128( (__m128i*)iofs0, xy0 );
1869 const __m128i *w0, *w1;
1870 w0 = (const __m128i*)(wtab + FXY[x]*16);
1871 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
1873 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
1874 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 3)));
1875 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
1876 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 3)));
1877 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
1878 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 3)));
1879 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
1880 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 3)));
1881 u0 = _mm_unpacklo_epi8(u0, z);
1882 v0 = _mm_unpacklo_epi8(v0, z);
1883 u1 = _mm_unpacklo_epi8(u1, z);
1884 v1 = _mm_unpacklo_epi8(v1, z);
1885 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1886 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1887 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1888 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1889 u0 = _mm_slli_si128(u0, 4);
1890 u0 = _mm_packs_epi32(u0, u1);
1891 u0 = _mm_packus_epi16(u0, u0);
1892 _mm_storel_epi64((__m128i*)D, _mm_srli_si128(u0,1));
1894 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
1895 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
1897 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
1898 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 3)));
1899 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
1900 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 3)));
1901 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
1902 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 3)));
1903 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
1904 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 3)));
1905 u0 = _mm_unpacklo_epi8(u0, z);
1906 v0 = _mm_unpacklo_epi8(v0, z);
1907 u1 = _mm_unpacklo_epi8(u1, z);
1908 v1 = _mm_unpacklo_epi8(v1, z);
1909 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1910 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1911 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1912 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1913 u0 = _mm_slli_si128(u0, 4);
1914 u0 = _mm_packs_epi32(u0, u1);
1915 u0 = _mm_packus_epi16(u0, u0);
1916 _mm_storel_epi64((__m128i*)(D + 6), _mm_srli_si128(u0,1));
1921 for( ; x <= width - 4; x += 4, D += 16 )
1923 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
1924 __m128i u0, v0, u1, v1;
1926 xy0 = _mm_madd_epi16( xy0, xy2ofs );
1927 _mm_store_si128( (__m128i*)iofs0, xy0 );
1928 const __m128i *w0, *w1;
1929 w0 = (const __m128i*)(wtab + FXY[x]*16);
1930 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
1932 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
1933 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 4)));
1934 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
1935 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 4)));
1936 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
1937 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 4)));
1938 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
1939 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 4)));
1940 u0 = _mm_unpacklo_epi8(u0, z);
1941 v0 = _mm_unpacklo_epi8(v0, z);
1942 u1 = _mm_unpacklo_epi8(u1, z);
1943 v1 = _mm_unpacklo_epi8(v1, z);
1944 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1945 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1946 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1947 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1948 u0 = _mm_packs_epi32(u0, u1);
1949 u0 = _mm_packus_epi16(u0, u0);
1950 _mm_storel_epi64((__m128i*)D, u0);
1952 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
1953 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
1955 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
1956 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 4)));
1957 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
1958 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 4)));
1959 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
1960 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 4)));
1961 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
1962 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 4)));
1963 u0 = _mm_unpacklo_epi8(u0, z);
1964 v0 = _mm_unpacklo_epi8(v0, z);
1965 u1 = _mm_unpacklo_epi8(u1, z);
1966 v1 = _mm_unpacklo_epi8(v1, z);
1967 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1968 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1969 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1970 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1971 u0 = _mm_packs_epi32(u0, u1);
1972 u0 = _mm_packus_epi16(u0, u0);
1973 _mm_storel_epi64((__m128i*)(D + 8), u0);
1983 typedef RemapNoVec RemapVec_8u;
1988 template<class CastOp, class VecOp, typename AT>
1989 static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
1990 const Mat& _fxy, const void* _wtab,
1991 int borderType, const Scalar& _borderValue )
1993 typedef typename CastOp::rtype T;
1994 typedef typename CastOp::type1 WT;
1995 Size ssize = _src.size(), dsize = _dst.size();
1996 int cn = _src.channels();
1997 const AT* wtab = (const AT*)_wtab;
1998 const T* S0 = (const T*)_src.data;
1999 size_t sstep = _src.step/sizeof(S0[0]);
2000 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2001 saturate_cast<T>(_borderValue[1]),
2002 saturate_cast<T>(_borderValue[2]),
2003 saturate_cast<T>(_borderValue[3]));
2008 unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
2009 CV_Assert( cn <= 4 && ssize.area() > 0 );
2011 if( _src.type() == CV_8UC3 )
2012 width1 = std::max(ssize.width-2, 0);
2015 for( dy = 0; dy < dsize.height; dy++ )
2017 T* D = (T*)(_dst.data + _dst.step*dy);
2018 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2019 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2021 bool prevInlier = false;
2023 for( dx = 0; dx <= dsize.width; dx++ )
2025 bool curInlier = dx < dsize.width ?
2026 (unsigned)XY[dx*2] < width1 &&
2027 (unsigned)XY[dx*2+1] < height1 : !prevInlier;
2028 if( curInlier == prevInlier )
2034 prevInlier = curInlier;
2038 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
2044 for( ; dx < X1; dx++, D++ )
2046 int sx = XY[dx*2], sy = XY[dx*2+1];
2047 const AT* w = wtab + FXY[dx]*4;
2048 const T* S = S0 + sy*sstep + sx;
2049 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
2053 for( ; dx < X1; dx++, D += 2 )
2055 int sx = XY[dx*2], sy = XY[dx*2+1];
2056 const AT* w = wtab + FXY[dx]*4;
2057 const T* S = S0 + sy*sstep + sx*2;
2058 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
2059 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
2060 D[0] = castOp(t0); D[1] = castOp(t1);
2063 for( ; dx < X1; dx++, D += 3 )
2065 int sx = XY[dx*2], sy = XY[dx*2+1];
2066 const AT* w = wtab + FXY[dx]*4;
2067 const T* S = S0 + sy*sstep + sx*3;
2068 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
2069 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
2070 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
2071 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
2074 for( ; dx < X1; dx++, D += 4 )
2076 int sx = XY[dx*2], sy = XY[dx*2+1];
2077 const AT* w = wtab + FXY[dx]*4;
2078 const T* S = S0 + sy*sstep + sx*4;
2079 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
2080 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
2081 D[0] = castOp(t0); D[1] = castOp(t1);
2082 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
2083 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
2084 D[2] = castOp(t0); D[3] = castOp(t1);
2089 if( borderType == BORDER_TRANSPARENT && cn != 3 )
2097 for( ; dx < X1; dx++, D++ )
2099 int sx = XY[dx*2], sy = XY[dx*2+1];
2100 if( borderType == BORDER_CONSTANT &&
2101 (sx >= ssize.width || sx+1 < 0 ||
2102 sy >= ssize.height || sy+1 < 0) )
2108 int sx0, sx1, sy0, sy1;
2110 const AT* w = wtab + FXY[dx]*4;
2111 if( borderType == BORDER_REPLICATE )
2113 sx0 = clip(sx, 0, ssize.width);
2114 sx1 = clip(sx+1, 0, ssize.width);
2115 sy0 = clip(sy, 0, ssize.height);
2116 sy1 = clip(sy+1, 0, ssize.height);
2117 v0 = S0[sy0*sstep + sx0];
2118 v1 = S0[sy0*sstep + sx1];
2119 v2 = S0[sy1*sstep + sx0];
2120 v3 = S0[sy1*sstep + sx1];
2124 sx0 = borderInterpolate(sx, ssize.width, borderType);
2125 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
2126 sy0 = borderInterpolate(sy, ssize.height, borderType);
2127 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
2128 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
2129 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
2130 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
2131 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
2133 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
2137 for( ; dx < X1; dx++, D += cn )
2139 int sx = XY[dx*2], sy = XY[dx*2+1], k;
2140 if( borderType == BORDER_CONSTANT &&
2141 (sx >= ssize.width || sx+1 < 0 ||
2142 sy >= ssize.height || sy+1 < 0) )
2144 for( k = 0; k < cn; k++ )
2149 int sx0, sx1, sy0, sy1;
2150 const T *v0, *v1, *v2, *v3;
2151 const AT* w = wtab + FXY[dx]*4;
2152 if( borderType == BORDER_REPLICATE )
2154 sx0 = clip(sx, 0, ssize.width);
2155 sx1 = clip(sx+1, 0, ssize.width);
2156 sy0 = clip(sy, 0, ssize.height);
2157 sy1 = clip(sy+1, 0, ssize.height);
2158 v0 = S0 + sy0*sstep + sx0*cn;
2159 v1 = S0 + sy0*sstep + sx1*cn;
2160 v2 = S0 + sy1*sstep + sx0*cn;
2161 v3 = S0 + sy1*sstep + sx1*cn;
2163 else if( borderType == BORDER_TRANSPARENT &&
2164 ((unsigned)sx >= (unsigned)(ssize.width-1) ||
2165 (unsigned)sy >= (unsigned)(ssize.height-1)))
2169 sx0 = borderInterpolate(sx, ssize.width, borderType);
2170 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
2171 sy0 = borderInterpolate(sy, ssize.height, borderType);
2172 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
2173 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
2174 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
2175 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
2176 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
2178 for( k = 0; k < cn; k++ )
2179 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
2188 template<class CastOp, typename AT, int ONE>
2189 static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
2190 const Mat& _fxy, const void* _wtab,
2191 int borderType, const Scalar& _borderValue )
2193 typedef typename CastOp::rtype T;
2194 typedef typename CastOp::type1 WT;
2195 Size ssize = _src.size(), dsize = _dst.size();
2196 int cn = _src.channels();
2197 const AT* wtab = (const AT*)_wtab;
2198 const T* S0 = (const T*)_src.data;
2199 size_t sstep = _src.step/sizeof(S0[0]);
2200 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2201 saturate_cast<T>(_borderValue[1]),
2202 saturate_cast<T>(_borderValue[2]),
2203 saturate_cast<T>(_borderValue[3]));
2206 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
2208 unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
2210 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
2212 dsize.width *= dsize.height;
2216 for( dy = 0; dy < dsize.height; dy++ )
2218 T* D = (T*)(_dst.data + _dst.step*dy);
2219 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2220 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2222 for( dx = 0; dx < dsize.width; dx++, D += cn )
2224 int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
2225 const AT* w = wtab + FXY[dx]*16;
2227 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2229 const T* S = S0 + sy*sstep + sx*cn;
2230 for( k = 0; k < cn; k++ )
2232 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
2234 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
2236 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
2238 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
2246 if( borderType == BORDER_TRANSPARENT &&
2247 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
2248 (unsigned)(sy+1) >= (unsigned)ssize.height) )
2251 if( borderType1 == BORDER_CONSTANT &&
2252 (sx >= ssize.width || sx+4 <= 0 ||
2253 sy >= ssize.height || sy+4 <= 0))
2255 for( k = 0; k < cn; k++ )
2260 for( i = 0; i < 4; i++ )
2262 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
2263 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
2266 for( k = 0; k < cn; k++, S0++, w -= 16 )
2268 WT cv = cval[k], sum = cv*ONE;
2269 for( i = 0; i < 4; i++, w += 4 )
2272 const T* S = S0 + yi*sstep;
2276 sum += (S[x[0]] - cv)*w[0];
2278 sum += (S[x[1]] - cv)*w[1];
2280 sum += (S[x[2]] - cv)*w[2];
2282 sum += (S[x[3]] - cv)*w[3];
2293 template<class CastOp, typename AT, int ONE>
2294 static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
2295 const Mat& _fxy, const void* _wtab,
2296 int borderType, const Scalar& _borderValue )
2298 typedef typename CastOp::rtype T;
2299 typedef typename CastOp::type1 WT;
2300 Size ssize = _src.size(), dsize = _dst.size();
2301 int cn = _src.channels();
2302 const AT* wtab = (const AT*)_wtab;
2303 const T* S0 = (const T*)_src.data;
2304 size_t sstep = _src.step/sizeof(S0[0]);
2305 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2306 saturate_cast<T>(_borderValue[1]),
2307 saturate_cast<T>(_borderValue[2]),
2308 saturate_cast<T>(_borderValue[3]));
2311 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
2313 unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
2315 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
2317 dsize.width *= dsize.height;
2321 for( dy = 0; dy < dsize.height; dy++ )
2323 T* D = (T*)(_dst.data + _dst.step*dy);
2324 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2325 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2327 for( dx = 0; dx < dsize.width; dx++, D += cn )
2329 int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
2330 const AT* w = wtab + FXY[dx]*64;
2331 const T* S = S0 + sy*sstep + sx*cn;
2333 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2335 for( k = 0; k < cn; k++ )
2338 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
2339 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
2340 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
2349 if( borderType == BORDER_TRANSPARENT &&
2350 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
2351 (unsigned)(sy+3) >= (unsigned)ssize.height) )
2354 if( borderType1 == BORDER_CONSTANT &&
2355 (sx >= ssize.width || sx+8 <= 0 ||
2356 sy >= ssize.height || sy+8 <= 0))
2358 for( k = 0; k < cn; k++ )
2363 for( i = 0; i < 8; i++ )
2365 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
2366 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
2369 for( k = 0; k < cn; k++, S0++, w -= 64 )
2371 WT cv = cval[k], sum = cv*ONE;
2372 for( i = 0; i < 8; i++, w += 8 )
2375 const T* S = S0 + yi*sstep;
2379 sum += (S[x[0]] - cv)*w[0];
2381 sum += (S[x[1]] - cv)*w[1];
2383 sum += (S[x[2]] - cv)*w[2];
2385 sum += (S[x[3]] - cv)*w[3];
2387 sum += (S[x[4]] - cv)*w[4];
2389 sum += (S[x[5]] - cv)*w[5];
2391 sum += (S[x[6]] - cv)*w[6];
2393 sum += (S[x[7]] - cv)*w[7];
2404 typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
2405 int borderType, const Scalar& _borderValue );
2407 typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
2408 const Mat& _fxy, const void* _wtab,
2409 int borderType, const Scalar& _borderValue);
2413 void cv::remap( InputArray _src, OutputArray _dst,
2414 InputArray _map1, InputArray _map2,
2415 int interpolation, int borderType, const Scalar& borderValue )
2417 static RemapNNFunc nn_tab[] =
2419 remapNearest<uchar>, remapNearest<uchar>, remapNearest<ushort>, remapNearest<ushort>,
2420 remapNearest<int>, remapNearest<int>, remapNearest<double>, 0
2423 static RemapFunc linear_tab[] =
2425 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
2426 remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
2427 remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
2428 remapBilinear<Cast<float, float>, RemapNoVec, float>,
2429 remapBilinear<Cast<double, double>, RemapNoVec, float>, 0
2432 static RemapFunc cubic_tab[] =
2434 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
2435 remapBicubic<Cast<float, ushort>, float, 1>,
2436 remapBicubic<Cast<float, short>, float, 1>, 0,
2437 remapBicubic<Cast<float, float>, float, 1>,
2438 remapBicubic<Cast<double, double>, float, 1>, 0
2441 static RemapFunc lanczos4_tab[] =
2443 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
2444 remapLanczos4<Cast<float, ushort>, float, 1>,
2445 remapLanczos4<Cast<float, short>, float, 1>, 0,
2446 remapLanczos4<Cast<float, float>, float, 1>,
2447 remapLanczos4<Cast<double, double>, float, 1>, 0
2450 Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
2452 CV_Assert( (!map2.data || map2.size() == map1.size()));
2454 _dst.create( map1.size(), src.type() );
2455 Mat dst = _dst.getMat();
2456 CV_Assert(dst.data != src.data);
2458 int depth = src.depth(), map_depth = map1.depth();
2459 RemapNNFunc nnfunc = 0;
2460 RemapFunc ifunc = 0;
2461 const void* ctab = 0;
2462 bool fixpt = depth == CV_8U;
2463 bool planar_input = false;
2465 if( interpolation == INTER_NEAREST )
2467 nnfunc = nn_tab[depth];
2468 CV_Assert( nnfunc != 0 );
2470 if( map1.type() == CV_16SC2 && !map2.data ) // the data is already in the right format
2472 nnfunc( src, dst, map1, borderType, borderValue );
2478 if( interpolation == INTER_AREA )
2479 interpolation = INTER_LINEAR;
2481 if( interpolation == INTER_LINEAR )
2482 ifunc = linear_tab[depth];
2483 else if( interpolation == INTER_CUBIC )
2484 ifunc = cubic_tab[depth];
2485 else if( interpolation == INTER_LANCZOS4 )
2486 ifunc = lanczos4_tab[depth];
2488 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
2489 CV_Assert( ifunc != 0 );
2490 ctab = initInterTab2D( interpolation, fixpt );
2493 const Mat *m1 = &map1, *m2 = &map2;
2495 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ||
2496 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1)) )
2498 if( map1.type() != CV_16SC2 )
2502 ifunc( src, dst, *m1, *m2, ctab, borderType, borderValue );
2508 CV_Assert( (map1.type() == CV_32FC2 && !map2.data) ||
2509 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
2510 planar_input = map1.channels() == 1;
2514 const int buf_size = 1 << 14;
2515 int brows0 = std::min(128, dst.rows);
2516 int bcols0 = std::min(buf_size/brows0, dst.cols);
2517 brows0 = std::min(buf_size/bcols0, dst.rows);
2519 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
2522 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
2524 _bufa.create(brows0, bcols0, CV_16UC1);
2526 for( y = 0; y < dst.rows; y += brows0 )
2528 for( x = 0; x < dst.cols; x += bcols0 )
2530 int brows = std::min(brows0, dst.rows - y);
2531 int bcols = std::min(bcols0, dst.cols - x);
2532 Mat dpart(dst, Rect(x, y, bcols, brows));
2533 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
2537 if( map_depth != CV_32F )
2539 for( y1 = 0; y1 < brows; y1++ )
2541 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2542 const short* sXY = (const short*)(m1->data + m1->step*(y+y1)) + x*2;
2543 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
2545 for( x1 = 0; x1 < bcols; x1++ )
2547 int a = sA[x1] & (INTER_TAB_SIZE2-1);
2548 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
2549 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
2553 else if( !planar_input )
2554 map1(Rect(0,0,bcols,brows)).convertTo(bufxy, bufxy.depth());
2557 for( y1 = 0; y1 < brows; y1++ )
2559 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2560 const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
2561 const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
2567 for( ; x1 <= bcols - 8; x1 += 8 )
2569 __m128 fx0 = _mm_loadu_ps(sX + x1);
2570 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
2571 __m128 fy0 = _mm_loadu_ps(sY + x1);
2572 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
2573 __m128i ix0 = _mm_cvtps_epi32(fx0);
2574 __m128i ix1 = _mm_cvtps_epi32(fx1);
2575 __m128i iy0 = _mm_cvtps_epi32(fy0);
2576 __m128i iy1 = _mm_cvtps_epi32(fy1);
2577 ix0 = _mm_packs_epi32(ix0, ix1);
2578 iy0 = _mm_packs_epi32(iy0, iy1);
2579 ix1 = _mm_unpacklo_epi16(ix0, iy0);
2580 iy1 = _mm_unpackhi_epi16(ix0, iy0);
2581 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
2582 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
2587 for( ; x1 < bcols; x1++ )
2589 XY[x1*2] = saturate_cast<short>(sX[x1]);
2590 XY[x1*2+1] = saturate_cast<short>(sY[x1]);
2594 nnfunc( src, dpart, bufxy, borderType, borderValue );
2598 Mat bufa(_bufa, Rect(0,0,bcols, brows));
2599 for( y1 = 0; y1 < brows; y1++ )
2601 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2602 ushort* A = (ushort*)(bufa.data + bufa.step*y1);
2606 const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
2607 const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
2613 __m128 scale = _mm_set1_ps((float)INTER_TAB_SIZE);
2614 __m128i mask = _mm_set1_epi32(INTER_TAB_SIZE-1);
2615 for( ; x1 <= bcols - 8; x1 += 8 )
2617 __m128 fx0 = _mm_loadu_ps(sX + x1);
2618 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
2619 __m128 fy0 = _mm_loadu_ps(sY + x1);
2620 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
2621 __m128i ix0 = _mm_cvtps_epi32(_mm_mul_ps(fx0, scale));
2622 __m128i ix1 = _mm_cvtps_epi32(_mm_mul_ps(fx1, scale));
2623 __m128i iy0 = _mm_cvtps_epi32(_mm_mul_ps(fy0, scale));
2624 __m128i iy1 = _mm_cvtps_epi32(_mm_mul_ps(fy1, scale));
2625 __m128i mx0 = _mm_and_si128(ix0, mask);
2626 __m128i mx1 = _mm_and_si128(ix1, mask);
2627 __m128i my0 = _mm_and_si128(iy0, mask);
2628 __m128i my1 = _mm_and_si128(iy1, mask);
2629 mx0 = _mm_packs_epi32(mx0, mx1);
2630 my0 = _mm_packs_epi32(my0, my1);
2631 my0 = _mm_slli_epi16(my0, INTER_BITS);
2632 mx0 = _mm_or_si128(mx0, my0);
2633 _mm_storeu_si128((__m128i*)(A + x1), mx0);
2634 ix0 = _mm_srai_epi32(ix0, INTER_BITS);
2635 ix1 = _mm_srai_epi32(ix1, INTER_BITS);
2636 iy0 = _mm_srai_epi32(iy0, INTER_BITS);
2637 iy1 = _mm_srai_epi32(iy1, INTER_BITS);
2638 ix0 = _mm_packs_epi32(ix0, ix1);
2639 iy0 = _mm_packs_epi32(iy0, iy1);
2640 ix1 = _mm_unpacklo_epi16(ix0, iy0);
2641 iy1 = _mm_unpackhi_epi16(ix0, iy0);
2642 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
2643 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
2648 for( ; x1 < bcols; x1++ )
2650 int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
2651 int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
2652 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
2653 XY[x1*2] = (short)(sx >> INTER_BITS);
2654 XY[x1*2+1] = (short)(sy >> INTER_BITS);
2660 const float* sXY = (const float*)(map1.data + map1.step*(y+y1)) + x*2;
2662 for( x1 = 0; x1 < bcols; x1++ )
2664 int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
2665 int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
2666 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
2667 XY[x1*2] = (short)(sx >> INTER_BITS);
2668 XY[x1*2+1] = (short)(sy >> INTER_BITS);
2673 ifunc(src, dpart, bufxy, bufa, ctab, borderType, borderValue);
2679 void cv::convertMaps( InputArray _map1, InputArray _map2,
2680 OutputArray _dstmap1, OutputArray _dstmap2,
2681 int dstm1type, bool nninterpolate )
2683 Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
2684 Size size = map1.size();
2685 const Mat *m1 = &map1, *m2 = &map2;
2686 int m1type = m1->type(), m2type = m2->type();
2688 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
2689 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
2690 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
2691 (m1type == CV_32FC2 && !m2->data) );
2693 if( m2type == CV_16SC2 )
2695 std::swap( m1, m2 );
2696 std::swap( m1type, m2type );
2699 if( dstm1type <= 0 )
2700 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
2701 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
2702 _dstmap1.create( size, dstm1type );
2703 dstmap1 = _dstmap1.getMat();
2705 if( !nninterpolate && dstm1type != CV_32FC2 )
2707 _dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
2708 dstmap2 = _dstmap2.getMat();
2713 if( m1type == dstm1type || (nninterpolate &&
2714 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
2715 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
2717 m1->convertTo( dstmap1, dstmap1.type() );
2718 if( dstmap2.data && dstmap2.type() == m2->type() )
2719 m2->copyTo( dstmap2 );
2723 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
2725 Mat vdata[] = { *m1, *m2 };
2726 merge( vdata, 2, dstmap1 );
2730 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
2732 Mat mv[] = { dstmap1, dstmap2 };
2737 if( m1->isContinuous() && (!m2->data || m2->isContinuous()) &&
2738 dstmap1.isContinuous() && (!dstmap2.data || dstmap2.isContinuous()) )
2740 size.width *= size.height;
2744 const float scale = 1.f/INTER_TAB_SIZE;
2746 for( y = 0; y < size.height; y++ )
2748 const float* src1f = (const float*)(m1->data + m1->step*y);
2749 const float* src2f = (const float*)(m2->data + m2->step*y);
2750 const short* src1 = (const short*)src1f;
2751 const ushort* src2 = (const ushort*)src2f;
2753 float* dst1f = (float*)(dstmap1.data + dstmap1.step*y);
2754 float* dst2f = (float*)(dstmap2.data + dstmap2.step*y);
2755 short* dst1 = (short*)dst1f;
2756 ushort* dst2 = (ushort*)dst2f;
2758 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
2761 for( x = 0; x < size.width; x++ )
2763 dst1[x*2] = saturate_cast<short>(src1f[x]);
2764 dst1[x*2+1] = saturate_cast<short>(src2f[x]);
2767 for( x = 0; x < size.width; x++ )
2769 int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
2770 int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
2771 dst1[x*2] = (short)(ix >> INTER_BITS);
2772 dst1[x*2+1] = (short)(iy >> INTER_BITS);
2773 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2776 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
2779 for( x = 0; x < size.width; x++ )
2781 dst1[x*2] = saturate_cast<short>(src1f[x*2]);
2782 dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
2785 for( x = 0; x < size.width; x++ )
2787 int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
2788 int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
2789 dst1[x*2] = (short)(ix >> INTER_BITS);
2790 dst1[x*2+1] = (short)(iy >> INTER_BITS);
2791 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2794 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
2796 for( x = 0; x < size.width; x++ )
2798 int fxy = src2 ? src2[x] : 0;
2799 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2800 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2803 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
2805 for( x = 0; x < size.width; x++ )
2807 int fxy = src2 ? src2[x] : 0;
2808 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2809 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2813 CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
2818 void cv::warpAffine( InputArray _src, OutputArray _dst,
2819 InputArray _M0, Size dsize,
2820 int flags, int borderType, const Scalar& borderValue )
2822 Mat src = _src.getMat(), M0 = _M0.getMat();
2823 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
2824 Mat dst = _dst.getMat();
2825 CV_Assert( dst.data != src.data && src.cols > 0 && src.rows > 0 );
2827 const int BLOCK_SZ = 64;
2828 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2830 Mat matM(2, 3, CV_64F, M);
2831 int interpolation = flags & INTER_MAX;
2832 if( interpolation == INTER_AREA )
2833 interpolation = INTER_LINEAR;
2835 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
2836 M0.convertTo(matM, matM.type());
2838 if( !(flags & WARP_INVERSE_MAP) )
2840 double D = M[0]*M[4] - M[1]*M[3];
2841 D = D != 0 ? 1./D : 0;
2842 double A11 = M[4]*D, A22=M[0]*D;
2843 M[0] = A11; M[1] *= -D;
2844 M[3] *= -D; M[4] = A22;
2845 double b1 = -M[0]*M[2] - M[1]*M[5];
2846 double b2 = -M[3]*M[2] - M[4]*M[5];
2847 M[2] = b1; M[5] = b2;
2850 #ifdef HAVE_TEGRA_OPTIMIZATION
2851 if( tegra::warpAffine(src, dst, M, interpolation, borderType, borderValue) )
2855 int x, y, x1, y1, width = dst.cols, height = dst.rows;
2856 AutoBuffer<int> _abdelta(width*2);
2857 int* adelta = &_abdelta[0], *bdelta = adelta + width;
2858 const int AB_BITS = MAX(10, (int)INTER_BITS);
2859 const int AB_SCALE = 1 << AB_BITS;
2860 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2;
2862 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
2865 for( x = 0; x < width; x++ )
2867 adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
2868 bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
2871 int bh0 = std::min(BLOCK_SZ/2, height);
2872 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
2873 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
2875 for( y = 0; y < height; y += bh0 )
2877 for( x = 0; x < width; x += bw0 )
2879 int bw = std::min( bw0, width - x);
2880 int bh = std::min( bh0, height - y);
2882 Mat _XY(bh, bw, CV_16SC2, XY), matA;
2883 Mat dpart(dst, Rect(x, y, bw, bh));
2885 for( y1 = 0; y1 < bh; y1++ )
2887 short* xy = XY + y1*bw*2;
2888 int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
2889 int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
2891 if( interpolation == INTER_NEAREST )
2892 for( x1 = 0; x1 < bw; x1++ )
2894 int X = (X0 + adelta[x+x1]) >> AB_BITS;
2895 int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
2896 xy[x1*2] = saturate_cast<short>(X);
2897 xy[x1*2+1] = saturate_cast<short>(Y);
2901 short* alpha = A + y1*bw;
2906 __m128i fxy_mask = _mm_set1_epi32(INTER_TAB_SIZE - 1);
2907 __m128i XX = _mm_set1_epi32(X0), YY = _mm_set1_epi32(Y0);
2908 for( ; x1 <= bw - 8; x1 += 8 )
2910 __m128i tx0, tx1, ty0, ty1;
2911 tx0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1)), XX);
2912 ty0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1)), YY);
2913 tx1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1 + 4)), XX);
2914 ty1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1 + 4)), YY);
2916 tx0 = _mm_srai_epi32(tx0, AB_BITS - INTER_BITS);
2917 ty0 = _mm_srai_epi32(ty0, AB_BITS - INTER_BITS);
2918 tx1 = _mm_srai_epi32(tx1, AB_BITS - INTER_BITS);
2919 ty1 = _mm_srai_epi32(ty1, AB_BITS - INTER_BITS);
2921 __m128i fx_ = _mm_packs_epi32(_mm_and_si128(tx0, fxy_mask),
2922 _mm_and_si128(tx1, fxy_mask));
2923 __m128i fy_ = _mm_packs_epi32(_mm_and_si128(ty0, fxy_mask),
2924 _mm_and_si128(ty1, fxy_mask));
2925 tx0 = _mm_packs_epi32(_mm_srai_epi32(tx0, INTER_BITS),
2926 _mm_srai_epi32(tx1, INTER_BITS));
2927 ty0 = _mm_packs_epi32(_mm_srai_epi32(ty0, INTER_BITS),
2928 _mm_srai_epi32(ty1, INTER_BITS));
2929 fx_ = _mm_adds_epi16(fx_, _mm_slli_epi16(fy_, INTER_BITS));
2931 _mm_storeu_si128((__m128i*)(xy + x1*2), _mm_unpacklo_epi16(tx0, ty0));
2932 _mm_storeu_si128((__m128i*)(xy + x1*2 + 8), _mm_unpackhi_epi16(tx0, ty0));
2933 _mm_storeu_si128((__m128i*)(alpha + x1), fx_);
2937 for( ; x1 < bw; x1++ )
2939 int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
2940 int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
2941 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
2942 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
2943 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2944 (X & (INTER_TAB_SIZE-1)));
2949 if( interpolation == INTER_NEAREST )
2950 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
2953 Mat matA(bh, bw, CV_16U, A);
2954 remap( src, dpart, _XY, matA, interpolation, borderType, borderValue );
2961 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
2962 Size dsize, int flags, int borderType, const Scalar& borderValue )
2964 Mat src = _src.getMat(), M0 = _M0.getMat();
2965 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
2966 Mat dst = _dst.getMat();
2968 CV_Assert( dst.data != src.data && src.cols > 0 && src.rows > 0 );
2970 const int BLOCK_SZ = 32;
2971 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2973 Mat matM(3, 3, CV_64F, M);
2974 int interpolation = flags & INTER_MAX;
2975 if( interpolation == INTER_AREA )
2976 interpolation = INTER_LINEAR;
2978 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
2979 M0.convertTo(matM, matM.type());
2981 if( !(flags & WARP_INVERSE_MAP) )
2984 #ifdef HAVE_TEGRA_OPTIMIZATION
2985 if( tegra::warpPerspective(src, dst, M, interpolation, borderType, borderValue) )
2989 int x, y, x1, y1, width = dst.cols, height = dst.rows;
2991 int bh0 = std::min(BLOCK_SZ/2, height);
2992 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
2993 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
2995 for( y = 0; y < height; y += bh0 )
2997 for( x = 0; x < width; x += bw0 )
2999 int bw = std::min( bw0, width - x);
3000 int bh = std::min( bh0, height - y);
3002 Mat _XY(bh, bw, CV_16SC2, XY), matA;
3003 Mat dpart(dst, Rect(x, y, bw, bh));
3005 for( y1 = 0; y1 < bh; y1++ )
3007 short* xy = XY + y1*bw*2;
3008 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
3009 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
3010 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
3012 if( interpolation == INTER_NEAREST )
3013 for( x1 = 0; x1 < bw; x1++ )
3015 double W = W0 + M[6]*x1;
3017 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3018 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3019 int X = saturate_cast<int>(fX);
3020 int Y = saturate_cast<int>(fY);
3022 xy[x1*2] = saturate_cast<short>(X);
3023 xy[x1*2+1] = saturate_cast<short>(Y);
3027 short* alpha = A + y1*bw;
3028 for( x1 = 0; x1 < bw; x1++ )
3030 double W = W0 + M[6]*x1;
3031 W = W ? INTER_TAB_SIZE/W : 0;
3032 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3033 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3034 int X = saturate_cast<int>(fX);
3035 int Y = saturate_cast<int>(fY);
3037 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3038 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3039 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3040 (X & (INTER_TAB_SIZE-1)));
3045 if( interpolation == INTER_NEAREST )
3046 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
3049 Mat matA(bh, bw, CV_16U, A);
3050 remap( src, dpart, _XY, matA, interpolation, borderType, borderValue );
3057 cv::Mat cv::getRotationMatrix2D( Point2f center, double angle, double scale )
3060 double alpha = cos(angle)*scale;
3061 double beta = sin(angle)*scale;
3063 Mat M(2, 3, CV_64F);
3064 double* m = (double*)M.data;
3068 m[2] = (1-alpha)*center.x - beta*center.y;
3071 m[5] = beta*center.x + (1-alpha)*center.y;
3076 /* Calculates coefficients of perspective transformation
3077 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
3079 * c00*xi + c01*yi + c02
3080 * ui = ---------------------
3081 * c20*xi + c21*yi + c22
3083 * c10*xi + c11*yi + c12
3084 * vi = ---------------------
3085 * c20*xi + c21*yi + c22
3087 * Coefficients are calculated by solving linear system:
3088 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
3089 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
3090 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
3091 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
3092 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
3093 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
3094 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
3095 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
3098 * cij - matrix coefficients, c22 = 1
3100 cv::Mat cv::getPerspectiveTransform( const Point2f src[], const Point2f dst[] )
3102 Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.data);
3103 double a[8][8], b[8];
3104 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
3106 for( int i = 0; i < 4; ++i )
3108 a[i][0] = a[i+4][3] = src[i].x;
3109 a[i][1] = a[i+4][4] = src[i].y;
3110 a[i][2] = a[i+4][5] = 1;
3111 a[i][3] = a[i][4] = a[i][5] =
3112 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
3113 a[i][6] = -src[i].x*dst[i].x;
3114 a[i][7] = -src[i].y*dst[i].x;
3115 a[i+4][6] = -src[i].x*dst[i].y;
3116 a[i+4][7] = -src[i].y*dst[i].y;
3121 solve( A, B, X, DECOMP_SVD );
3122 ((double*)M.data)[8] = 1.;
3127 /* Calculates coefficients of affine transformation
3128 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
3130 * ui = c00*xi + c01*yi + c02
3132 * vi = c10*xi + c11*yi + c12
3134 * Coefficients are calculated by solving linear system:
3135 * / x0 y0 1 0 0 0 \ /c00\ /u0\
3136 * | x1 y1 1 0 0 0 | |c01| |u1|
3137 * | x2 y2 1 0 0 0 | |c02| |u2|
3138 * | 0 0 0 x0 y0 1 | |c10| |v0|
3139 * | 0 0 0 x1 y1 1 | |c11| |v1|
3140 * \ 0 0 0 x2 y2 1 / |c12| |v2|
3143 * cij - matrix coefficients
3146 cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
3148 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.data);
3149 double a[6*6], b[6];
3150 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
3152 for( int i = 0; i < 3; i++ )
3156 a[j] = a[k+3] = src[i].x;
3157 a[j+1] = a[k+4] = src[i].y;
3158 a[j+2] = a[k+5] = 1;
3159 a[j+3] = a[j+4] = a[j+5] = 0;
3160 a[k] = a[k+1] = a[k+2] = 0;
3162 b[i*2+1] = dst[i].y;
3169 void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
3171 Mat matM = _matM.getMat();
3172 CV_Assert(matM.rows == 2 && matM.cols == 3);
3173 __iM.create(2, 3, matM.type());
3174 Mat _iM = __iM.getMat();
3176 if( matM.type() == CV_32F )
3178 const float* M = (const float*)matM.data;
3179 float* iM = (float*)_iM.data;
3180 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3182 double D = M[0]*M[step+1] - M[1]*M[step];
3183 D = D != 0 ? 1./D : 0;
3184 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3185 double b1 = -A11*M[2] - A12*M[step+2];
3186 double b2 = -A21*M[2] - A22*M[step+2];
3188 iM[0] = (float)A11; iM[1] = (float)A12; iM[2] = (float)b1;
3189 iM[istep] = (float)A21; iM[istep+1] = (float)A22; iM[istep+2] = (float)b2;
3191 else if( matM.type() == CV_64F )
3193 const double* M = (const double*)matM.data;
3194 double* iM = (double*)_iM.data;
3195 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3197 double D = M[0]*M[step+1] - M[1]*M[step];
3198 D = D != 0 ? 1./D : 0;
3199 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3200 double b1 = -A11*M[2] - A12*M[step+2];
3201 double b2 = -A21*M[2] - A22*M[step+2];
3203 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3204 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3207 CV_Error( CV_StsUnsupportedFormat, "" );
3210 cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst)
3212 Mat src = _src.getMat(), dst = _dst.getMat();
3213 CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
3214 return getPerspectiveTransform((const Point2f*)src.data, (const Point2f*)dst.data);
3217 cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
3219 Mat src = _src.getMat(), dst = _dst.getMat();
3220 CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
3221 return getAffineTransform((const Point2f*)src.data, (const Point2f*)dst.data);
3225 cvResize( const CvArr* srcarr, CvArr* dstarr, int method )
3227 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3228 CV_Assert( src.type() == dst.type() );
3229 cv::resize( src, dst, dst.size(), (double)dst.cols/src.cols,
3230 (double)dst.rows/src.rows, method );
3235 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3236 int flags, CvScalar fillval )
3238 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3239 cv::Mat matrix = cv::cvarrToMat(marr);
3240 CV_Assert( src.type() == dst.type() );
3241 cv::warpAffine( src, dst, matrix, dst.size(), flags,
3242 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3247 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3248 int flags, CvScalar fillval )
3250 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3251 cv::Mat matrix = cv::cvarrToMat(marr);
3252 CV_Assert( src.type() == dst.type() );
3253 cv::warpPerspective( src, dst, matrix, dst.size(), flags,
3254 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3259 cvRemap( const CvArr* srcarr, CvArr* dstarr,
3260 const CvArr* _mapx, const CvArr* _mapy,
3261 int flags, CvScalar fillval )
3263 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
3264 cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
3265 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
3266 cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
3267 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3269 CV_Assert( dst0.data == dst.data );
3274 cv2DRotationMatrix( CvPoint2D32f center, double angle,
3275 double scale, CvMat* matrix )
3277 cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
3278 CV_Assert( M.size() == M.size() );
3279 M.convertTo(M0, M0.type());
3285 cvGetPerspectiveTransform( const CvPoint2D32f* src,
3286 const CvPoint2D32f* dst,
3289 cv::Mat M0 = cv::cvarrToMat(matrix),
3290 M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3291 CV_Assert( M.size() == M.size() );
3292 M.convertTo(M0, M0.type());
3298 cvGetAffineTransform( const CvPoint2D32f* src,
3299 const CvPoint2D32f* dst,
3302 cv::Mat M0 = cv::cvarrToMat(matrix),
3303 M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3304 CV_Assert( M.size() == M0.size() );
3305 M.convertTo(M0, M0.type());
3311 cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
3313 cv::Mat map1 = cv::cvarrToMat(arr1), map2;
3314 cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
3317 map2 = cv::cvarrToMat(arr2);
3320 dstmap2 = cv::cvarrToMat(dstarr2);
3321 if( dstmap2.type() == CV_16SC1 )
3322 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.data, dstmap2.step);
3325 cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
3328 /****************************************************************************************\
3329 * Log-Polar Transform *
3330 \****************************************************************************************/
3332 /* now it is done via Remap; more correct implementation should use
3333 some super-sampling technique outside of the "fovea" circle */
3335 cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
3336 CvPoint2D32f center, double M, int flags )
3338 cv::Ptr<CvMat> mapx, mapy;
3340 CvMat srcstub, *src = cvGetMat(srcarr, &srcstub);
3341 CvMat dststub, *dst = cvGetMat(dstarr, &dststub);
3342 CvSize ssize, dsize;
3344 if( !CV_ARE_TYPES_EQ( src, dst ))
3345 CV_Error( CV_StsUnmatchedFormats, "" );
3348 CV_Error( CV_StsOutOfRange, "M should be >0" );
3350 ssize = cvGetMatSize(src);
3351 dsize = cvGetMatSize(dst);
3353 mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
3354 mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
3356 if( !(flags & CV_WARP_INVERSE_MAP) )
3359 cv::AutoBuffer<double> _exp_tab(dsize.width);
3360 double* exp_tab = _exp_tab;
3362 for( rho = 0; rho < dst->width; rho++ )
3363 exp_tab[rho] = std::exp(rho/M);
3365 for( phi = 0; phi < dsize.height; phi++ )
3367 double cp = cos(phi*2*CV_PI/dsize.height);
3368 double sp = sin(phi*2*CV_PI/dsize.height);
3369 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
3370 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
3372 for( rho = 0; rho < dsize.width; rho++ )
3374 double r = exp_tab[rho];
3375 double x = r*cp + center.x;
3376 double y = r*sp + center.y;
3386 CvMat bufx, bufy, bufp, bufa;
3387 double ascale = ssize.height/(2*CV_PI);
3388 cv::AutoBuffer<float> _buf(4*dsize.width);
3391 bufx = cvMat( 1, dsize.width, CV_32F, buf );
3392 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
3393 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
3394 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
3396 for( x = 0; x < dsize.width; x++ )
3397 bufx.data.fl[x] = (float)x - center.x;
3399 for( y = 0; y < dsize.height; y++ )
3401 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
3402 float* my = (float*)(mapy->data.ptr + y*mapy->step);
3404 for( x = 0; x < dsize.width; x++ )
3405 bufy.data.fl[x] = (float)y - center.y;
3408 cvCartToPolar( &bufx, &bufy, &bufp, &bufa );
3410 for( x = 0; x < dsize.width; x++ )
3411 bufp.data.fl[x] += 1.f;
3413 cvLog( &bufp, &bufp );
3415 for( x = 0; x < dsize.width; x++ )
3417 double rho = bufp.data.fl[x]*M;
3418 double phi = bufa.data.fl[x]*ascale;
3424 for( x = 0; x < dsize.width; x++ )
3426 double xx = bufx.data.fl[x];
3427 double yy = bufy.data.fl[x];
3429 double p = log(sqrt(xx*xx + yy*yy) + 1.)*M;
3430 double a = atan2(yy,xx);
3442 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
3446 /****************************************************************************************
3447 Linear-Polar Transform
3448 J.L. Blanco, Apr 2009
3449 ****************************************************************************************/
3451 void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
3452 CvPoint2D32f center, double maxRadius, int flags )
3454 cv::Ptr<CvMat> mapx, mapy;
3456 CvMat srcstub, *src = (CvMat*)srcarr;
3457 CvMat dststub, *dst = (CvMat*)dstarr;
3458 CvSize ssize, dsize;
3460 src = cvGetMat( srcarr, &srcstub,0,0 );
3461 dst = cvGetMat( dstarr, &dststub,0,0 );
3463 if( !CV_ARE_TYPES_EQ( src, dst ))
3464 CV_Error( CV_StsUnmatchedFormats, "" );
3466 ssize.width = src->cols;
3467 ssize.height = src->rows;
3468 dsize.width = dst->cols;
3469 dsize.height = dst->rows;
3471 mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
3472 mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
3474 if( !(flags & CV_WARP_INVERSE_MAP) )
3478 for( phi = 0; phi < dsize.height; phi++ )
3480 double cp = cos(phi*2*CV_PI/dsize.height);
3481 double sp = sin(phi*2*CV_PI/dsize.height);
3482 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
3483 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
3485 for( rho = 0; rho < dsize.width; rho++ )
3487 double r = maxRadius*(rho+1)/dsize.width;
3488 double x = r*cp + center.x;
3489 double y = r*sp + center.y;
3499 CvMat bufx, bufy, bufp, bufa;
3500 const double ascale = ssize.height/(2*CV_PI);
3501 const double pscale = ssize.width/maxRadius;
3503 cv::AutoBuffer<float> _buf(4*dsize.width);
3506 bufx = cvMat( 1, dsize.width, CV_32F, buf );
3507 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
3508 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
3509 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
3511 for( x = 0; x < dsize.width; x++ )
3512 bufx.data.fl[x] = (float)x - center.x;
3514 for( y = 0; y < dsize.height; y++ )
3516 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
3517 float* my = (float*)(mapy->data.ptr + y*mapy->step);
3519 for( x = 0; x < dsize.width; x++ )
3520 bufy.data.fl[x] = (float)y - center.y;
3522 cvCartToPolar( &bufx, &bufy, &bufp, &bufa, 0 );
3524 for( x = 0; x < dsize.width; x++ )
3525 bufp.data.fl[x] += 1.f;
3527 for( x = 0; x < dsize.width; x++ )
3529 double rho = bufp.data.fl[x]*pscale;
3530 double phi = bufa.data.fl[x]*ascale;
3537 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );