1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /* ////////////////////////////////////////////////////////////////////
45 // Geometrical transforms on images and matrices: rotation, zoom etc.
49 #include "precomp.hpp"
53 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
54 static IppStatus sts = ippInit();
60 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
61 typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
62 typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
63 typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
64 typedef IppStatus (CV_STDCALL* ippiResizeSqrPixelFunc)(const void*, IppiSize, int, IppiRect, void*, int, IppiRect, double, double, double, double, int, Ipp8u *);
66 template <int channels, typename Type>
67 bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
69 Type values[channels];
70 for( int i = 0; i < channels; i++ )
71 values[i] = (Type)value[i];
72 return func(values, dataPointer, step, size) >= 0;
75 bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
82 return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
84 return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
86 return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
96 return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
98 return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
100 return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
103 else if( channels == 4 )
108 return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
110 return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
112 return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
120 /************** interpolation formulas and tables ***************/
122 const int INTER_RESIZE_COEF_BITS=11;
123 const int INTER_RESIZE_COEF_SCALE=1 << INTER_RESIZE_COEF_BITS;
125 const int INTER_REMAP_COEF_BITS=15;
126 const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
128 static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
130 static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
131 static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
134 static short BilinearTab_iC4_buf[INTER_TAB_SIZE2+2][2][8];
135 static short (*BilinearTab_iC4)[2][8] = (short (*)[2][8])alignPtr(BilinearTab_iC4_buf, 16);
138 static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
139 static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
141 static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
142 static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
144 static inline void interpolateLinear( float x, float* coeffs )
150 static inline void interpolateCubic( float x, float* coeffs )
152 const float A = -0.75f;
154 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
155 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
156 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
157 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
160 static inline void interpolateLanczos4( float x, float* coeffs )
162 static const double s45 = 0.70710678118654752440084436210485;
163 static const double cs[][2]=
164 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
166 if( x < FLT_EPSILON )
168 for( int i = 0; i < 8; i++ )
175 double y0=-(x+3)*CV_PI*0.25, s0 = sin(y0), c0=cos(y0);
176 for(int i = 0; i < 8; i++ )
178 double y = -(x+3-i)*CV_PI*0.25;
179 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
184 for(int i = 0; i < 8; i++ )
188 static void initInterTab1D(int method, float* tab, int tabsz)
190 float scale = 1.f/tabsz;
191 if( method == INTER_LINEAR )
193 for( int i = 0; i < tabsz; i++, tab += 2 )
194 interpolateLinear( i*scale, tab );
196 else if( method == INTER_CUBIC )
198 for( int i = 0; i < tabsz; i++, tab += 4 )
199 interpolateCubic( i*scale, tab );
201 else if( method == INTER_LANCZOS4 )
203 for( int i = 0; i < tabsz; i++, tab += 8 )
204 interpolateLanczos4( i*scale, tab );
207 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
211 static const void* initInterTab2D( int method, bool fixpt )
213 static bool inittab[INTER_MAX+1] = {false};
217 if( method == INTER_LINEAR )
218 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
219 else if( method == INTER_CUBIC )
220 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
221 else if( method == INTER_LANCZOS4 )
222 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
224 CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
226 if( !inittab[method] )
228 AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
230 initInterTab1D(method, _tab, INTER_TAB_SIZE);
231 for( i = 0; i < INTER_TAB_SIZE; i++ )
232 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
235 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
236 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
238 for( k1 = 0; k1 < ksize; k1++ )
240 float vy = _tab[i*ksize + k1];
241 for( k2 = 0; k2 < ksize; k2++ )
243 float v = vy*_tab[j*ksize + k2];
244 tab[k1*ksize + k2] = v;
245 isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
249 if( isum != INTER_REMAP_COEF_SCALE )
251 int diff = isum - INTER_REMAP_COEF_SCALE;
252 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
253 for( k1 = ksize2; k1 < ksize2+2; k1++ )
254 for( k2 = ksize2; k2 < ksize2+2; k2++ )
256 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
258 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
262 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
264 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
267 tab -= INTER_TAB_SIZE2*ksize*ksize;
268 itab -= INTER_TAB_SIZE2*ksize*ksize;
270 if( method == INTER_LINEAR )
272 for( i = 0; i < INTER_TAB_SIZE2; i++ )
273 for( j = 0; j < 4; j++ )
275 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
276 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
277 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
278 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
282 inittab[method] = true;
284 return fixpt ? (const void*)itab : (const void*)tab;
288 static bool initAllInterTab2D()
290 return initInterTab2D( INTER_LINEAR, false ) &&
291 initInterTab2D( INTER_LINEAR, true ) &&
292 initInterTab2D( INTER_CUBIC, false ) &&
293 initInterTab2D( INTER_CUBIC, true ) &&
294 initInterTab2D( INTER_LANCZOS4, false ) &&
295 initInterTab2D( INTER_LANCZOS4, true );
298 static volatile bool doInitAllInterTab2D = initAllInterTab2D();
301 template<typename ST, typename DT> struct Cast
306 DT operator()(ST val) const { return saturate_cast<DT>(val); }
309 template<typename ST, typename DT, int bits> struct FixedPtCast
313 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
315 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
318 /****************************************************************************************\
320 \****************************************************************************************/
322 class resizeNNInvoker :
323 public ParallelLoopBody
326 resizeNNInvoker(const Mat& _src, Mat &_dst, int *_x_ofs, int _pix_size4, double _ify) :
327 ParallelLoopBody(), src(_src), dst(_dst), x_ofs(_x_ofs), pix_size4(_pix_size4),
332 virtual void operator() (const Range& range) const
334 Size ssize = src.size(), dsize = dst.size();
335 int y, x, pix_size = (int)src.elemSize();
337 for( y = range.start; y < range.end; y++ )
339 uchar* D = dst.data + dst.step*y;
340 int sy = std::min(cvFloor(y*ify), ssize.height-1);
341 const uchar* S = src.data + src.step*sy;
346 for( x = 0; x <= dsize.width - 2; x += 2 )
348 uchar t0 = S[x_ofs[x]];
349 uchar t1 = S[x_ofs[x+1]];
354 for( ; x < dsize.width; x++ )
358 for( x = 0; x < dsize.width; x++ )
359 *(ushort*)(D + x*2) = *(ushort*)(S + x_ofs[x]);
362 for( x = 0; x < dsize.width; x++, D += 3 )
364 const uchar* _tS = S + x_ofs[x];
365 D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
369 for( x = 0; x < dsize.width; x++ )
370 *(int*)(D + x*4) = *(int*)(S + x_ofs[x]);
373 for( x = 0; x < dsize.width; x++, D += 6 )
375 const ushort* _tS = (const ushort*)(S + x_ofs[x]);
376 ushort* _tD = (ushort*)D;
377 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
381 for( x = 0; x < dsize.width; x++, D += 8 )
383 const int* _tS = (const int*)(S + x_ofs[x]);
385 _tD[0] = _tS[0]; _tD[1] = _tS[1];
389 for( x = 0; x < dsize.width; x++, D += 12 )
391 const int* _tS = (const int*)(S + x_ofs[x]);
393 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
397 for( x = 0; x < dsize.width; x++, D += pix_size )
399 const int* _tS = (const int*)(S + x_ofs[x]);
401 for( int k = 0; k < pix_size4; k++ )
411 int* x_ofs, pix_size4;
414 resizeNNInvoker(const resizeNNInvoker&);
415 resizeNNInvoker& operator=(const resizeNNInvoker&);
419 resizeNN( const Mat& src, Mat& dst, double fx, double fy )
421 Size ssize = src.size(), dsize = dst.size();
422 AutoBuffer<int> _x_ofs(dsize.width);
424 int pix_size = (int)src.elemSize();
425 int pix_size4 = (int)(pix_size / sizeof(int));
426 double ifx = 1./fx, ify = 1./fy;
429 for( x = 0; x < dsize.width; x++ )
431 int sx = cvFloor(x*ifx);
432 x_ofs[x] = std::min(sx, ssize.width-1)*pix_size;
435 Range range(0, dsize.height);
436 resizeNNInvoker invoker(src, dst, x_ofs, pix_size4, ify);
437 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
443 int operator()(const uchar**, uchar*, const uchar*, int ) const { return 0; }
448 int operator()(const uchar**, uchar**, int, const int*,
449 const uchar*, int, int, int, int, int) const { return 0; }
454 struct VResizeLinearVec_32s8u
456 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
458 if( !checkHardwareSupport(CV_CPU_SSE2) )
461 const int** src = (const int**)_src;
462 const short* beta = (const short*)_beta;
463 const int *S0 = src[0], *S1 = src[1];
465 __m128i b0 = _mm_set1_epi16(beta[0]), b1 = _mm_set1_epi16(beta[1]);
466 __m128i delta = _mm_set1_epi16(2);
468 if( (((size_t)S0|(size_t)S1)&15) == 0 )
469 for( ; x <= width - 16; x += 16 )
471 __m128i x0, x1, x2, y0, y1, y2;
472 x0 = _mm_load_si128((const __m128i*)(S0 + x));
473 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
474 y0 = _mm_load_si128((const __m128i*)(S1 + x));
475 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
476 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
477 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
479 x1 = _mm_load_si128((const __m128i*)(S0 + x + 8));
480 x2 = _mm_load_si128((const __m128i*)(S0 + x + 12));
481 y1 = _mm_load_si128((const __m128i*)(S1 + x + 8));
482 y2 = _mm_load_si128((const __m128i*)(S1 + x + 12));
483 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
484 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
486 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
487 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
489 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
490 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
491 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
494 for( ; x <= width - 16; x += 16 )
496 __m128i x0, x1, x2, y0, y1, y2;
497 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
498 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
499 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
500 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
501 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
502 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
504 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 8));
505 x2 = _mm_loadu_si128((const __m128i*)(S0 + x + 12));
506 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 8));
507 y2 = _mm_loadu_si128((const __m128i*)(S1 + x + 12));
508 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
509 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
511 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
512 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
514 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
515 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
516 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
519 for( ; x < width - 4; x += 4 )
522 x0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S0 + x)), 4);
523 y0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S1 + x)), 4);
524 x0 = _mm_packs_epi32(x0, x0);
525 y0 = _mm_packs_epi32(y0, y0);
526 x0 = _mm_adds_epi16(_mm_mulhi_epi16(x0, b0), _mm_mulhi_epi16(y0, b1));
527 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
528 x0 = _mm_packus_epi16(x0, x0);
529 *(int*)(dst + x) = _mm_cvtsi128_si32(x0);
537 template<int shiftval> struct VResizeLinearVec_32f16
539 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
541 if( !checkHardwareSupport(CV_CPU_SSE2) )
544 const float** src = (const float**)_src;
545 const float* beta = (const float*)_beta;
546 const float *S0 = src[0], *S1 = src[1];
547 ushort* dst = (ushort*)_dst;
550 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
551 __m128i preshift = _mm_set1_epi32(shiftval);
552 __m128i postshift = _mm_set1_epi16((short)shiftval);
554 if( (((size_t)S0|(size_t)S1)&15) == 0 )
555 for( ; x <= width - 16; x += 16 )
557 __m128 x0, x1, y0, y1;
559 x0 = _mm_load_ps(S0 + x);
560 x1 = _mm_load_ps(S0 + x + 4);
561 y0 = _mm_load_ps(S1 + x);
562 y1 = _mm_load_ps(S1 + x + 4);
564 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
565 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
566 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
567 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
568 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
570 x0 = _mm_load_ps(S0 + x + 8);
571 x1 = _mm_load_ps(S0 + x + 12);
572 y0 = _mm_load_ps(S1 + x + 8);
573 y1 = _mm_load_ps(S1 + x + 12);
575 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
576 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
577 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
578 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
579 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
581 _mm_storeu_si128( (__m128i*)(dst + x), t0);
582 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
585 for( ; x <= width - 16; x += 16 )
587 __m128 x0, x1, y0, y1;
589 x0 = _mm_loadu_ps(S0 + x);
590 x1 = _mm_loadu_ps(S0 + x + 4);
591 y0 = _mm_loadu_ps(S1 + x);
592 y1 = _mm_loadu_ps(S1 + x + 4);
594 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
595 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
596 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
597 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
598 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
600 x0 = _mm_loadu_ps(S0 + x + 8);
601 x1 = _mm_loadu_ps(S0 + x + 12);
602 y0 = _mm_loadu_ps(S1 + x + 8);
603 y1 = _mm_loadu_ps(S1 + x + 12);
605 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
606 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
607 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
608 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
609 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
611 _mm_storeu_si128( (__m128i*)(dst + x), t0);
612 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
615 for( ; x < width - 4; x += 4 )
619 x0 = _mm_loadu_ps(S0 + x);
620 y0 = _mm_loadu_ps(S1 + x);
622 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
623 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
624 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t0), postshift);
625 _mm_storel_epi64( (__m128i*)(dst + x), t0);
632 typedef VResizeLinearVec_32f16<SHRT_MIN> VResizeLinearVec_32f16u;
633 typedef VResizeLinearVec_32f16<0> VResizeLinearVec_32f16s;
635 struct VResizeLinearVec_32f
637 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
639 if( !checkHardwareSupport(CV_CPU_SSE) )
642 const float** src = (const float**)_src;
643 const float* beta = (const float*)_beta;
644 const float *S0 = src[0], *S1 = src[1];
645 float* dst = (float*)_dst;
648 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
650 if( (((size_t)S0|(size_t)S1)&15) == 0 )
651 for( ; x <= width - 8; x += 8 )
653 __m128 x0, x1, y0, y1;
654 x0 = _mm_load_ps(S0 + x);
655 x1 = _mm_load_ps(S0 + x + 4);
656 y0 = _mm_load_ps(S1 + x);
657 y1 = _mm_load_ps(S1 + x + 4);
659 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
660 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
662 _mm_storeu_ps( dst + x, x0);
663 _mm_storeu_ps( dst + x + 4, x1);
666 for( ; x <= width - 8; x += 8 )
668 __m128 x0, x1, y0, y1;
669 x0 = _mm_loadu_ps(S0 + x);
670 x1 = _mm_loadu_ps(S0 + x + 4);
671 y0 = _mm_loadu_ps(S1 + x);
672 y1 = _mm_loadu_ps(S1 + x + 4);
674 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
675 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
677 _mm_storeu_ps( dst + x, x0);
678 _mm_storeu_ps( dst + x + 4, x1);
686 struct VResizeCubicVec_32s8u
688 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
690 if( !checkHardwareSupport(CV_CPU_SSE2) )
693 const int** src = (const int**)_src;
694 const short* beta = (const short*)_beta;
695 const int *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
697 float scale = 1.f/(INTER_RESIZE_COEF_SCALE*INTER_RESIZE_COEF_SCALE);
698 __m128 b0 = _mm_set1_ps(beta[0]*scale), b1 = _mm_set1_ps(beta[1]*scale),
699 b2 = _mm_set1_ps(beta[2]*scale), b3 = _mm_set1_ps(beta[3]*scale);
701 if( (((size_t)S0|(size_t)S1|(size_t)S2|(size_t)S3)&15) == 0 )
702 for( ; x <= width - 8; x += 8 )
704 __m128i x0, x1, y0, y1;
705 __m128 s0, s1, f0, f1;
706 x0 = _mm_load_si128((const __m128i*)(S0 + x));
707 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
708 y0 = _mm_load_si128((const __m128i*)(S1 + x));
709 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
711 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
712 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
713 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
714 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
715 s0 = _mm_add_ps(s0, f0);
716 s1 = _mm_add_ps(s1, f1);
718 x0 = _mm_load_si128((const __m128i*)(S2 + x));
719 x1 = _mm_load_si128((const __m128i*)(S2 + x + 4));
720 y0 = _mm_load_si128((const __m128i*)(S3 + x));
721 y1 = _mm_load_si128((const __m128i*)(S3 + x + 4));
723 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
724 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
725 s0 = _mm_add_ps(s0, f0);
726 s1 = _mm_add_ps(s1, f1);
727 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
728 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
729 s0 = _mm_add_ps(s0, f0);
730 s1 = _mm_add_ps(s1, f1);
732 x0 = _mm_cvtps_epi32(s0);
733 x1 = _mm_cvtps_epi32(s1);
735 x0 = _mm_packs_epi32(x0, x1);
736 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
739 for( ; x <= width - 8; x += 8 )
741 __m128i x0, x1, y0, y1;
742 __m128 s0, s1, f0, f1;
743 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
744 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
745 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
746 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
748 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
749 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
750 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
751 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
752 s0 = _mm_add_ps(s0, f0);
753 s1 = _mm_add_ps(s1, f1);
755 x0 = _mm_loadu_si128((const __m128i*)(S2 + x));
756 x1 = _mm_loadu_si128((const __m128i*)(S2 + x + 4));
757 y0 = _mm_loadu_si128((const __m128i*)(S3 + x));
758 y1 = _mm_loadu_si128((const __m128i*)(S3 + x + 4));
760 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
761 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
762 s0 = _mm_add_ps(s0, f0);
763 s1 = _mm_add_ps(s1, f1);
764 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
765 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
766 s0 = _mm_add_ps(s0, f0);
767 s1 = _mm_add_ps(s1, f1);
769 x0 = _mm_cvtps_epi32(s0);
770 x1 = _mm_cvtps_epi32(s1);
772 x0 = _mm_packs_epi32(x0, x1);
773 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
781 template<int shiftval> struct VResizeCubicVec_32f16
783 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
785 if( !checkHardwareSupport(CV_CPU_SSE2) )
788 const float** src = (const float**)_src;
789 const float* beta = (const float*)_beta;
790 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
791 ushort* dst = (ushort*)_dst;
793 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
794 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
795 __m128i preshift = _mm_set1_epi32(shiftval);
796 __m128i postshift = _mm_set1_epi16((short)shiftval);
798 for( ; x <= width - 8; x += 8 )
800 __m128 x0, x1, y0, y1, s0, s1;
802 x0 = _mm_loadu_ps(S0 + x);
803 x1 = _mm_loadu_ps(S0 + x + 4);
804 y0 = _mm_loadu_ps(S1 + x);
805 y1 = _mm_loadu_ps(S1 + x + 4);
807 s0 = _mm_mul_ps(x0, b0);
808 s1 = _mm_mul_ps(x1, b0);
809 y0 = _mm_mul_ps(y0, b1);
810 y1 = _mm_mul_ps(y1, b1);
811 s0 = _mm_add_ps(s0, y0);
812 s1 = _mm_add_ps(s1, y1);
814 x0 = _mm_loadu_ps(S2 + x);
815 x1 = _mm_loadu_ps(S2 + x + 4);
816 y0 = _mm_loadu_ps(S3 + x);
817 y1 = _mm_loadu_ps(S3 + x + 4);
819 x0 = _mm_mul_ps(x0, b2);
820 x1 = _mm_mul_ps(x1, b2);
821 y0 = _mm_mul_ps(y0, b3);
822 y1 = _mm_mul_ps(y1, b3);
823 s0 = _mm_add_ps(s0, x0);
824 s1 = _mm_add_ps(s1, x1);
825 s0 = _mm_add_ps(s0, y0);
826 s1 = _mm_add_ps(s1, y1);
828 t0 = _mm_add_epi32(_mm_cvtps_epi32(s0), preshift);
829 t1 = _mm_add_epi32(_mm_cvtps_epi32(s1), preshift);
831 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t1), postshift);
832 _mm_storeu_si128( (__m128i*)(dst + x), t0);
839 typedef VResizeCubicVec_32f16<SHRT_MIN> VResizeCubicVec_32f16u;
840 typedef VResizeCubicVec_32f16<0> VResizeCubicVec_32f16s;
842 struct VResizeCubicVec_32f
844 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
846 if( !checkHardwareSupport(CV_CPU_SSE) )
849 const float** src = (const float**)_src;
850 const float* beta = (const float*)_beta;
851 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
852 float* dst = (float*)_dst;
854 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
855 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
857 for( ; x <= width - 8; x += 8 )
859 __m128 x0, x1, y0, y1, s0, s1;
860 x0 = _mm_loadu_ps(S0 + x);
861 x1 = _mm_loadu_ps(S0 + x + 4);
862 y0 = _mm_loadu_ps(S1 + x);
863 y1 = _mm_loadu_ps(S1 + x + 4);
865 s0 = _mm_mul_ps(x0, b0);
866 s1 = _mm_mul_ps(x1, b0);
867 y0 = _mm_mul_ps(y0, b1);
868 y1 = _mm_mul_ps(y1, b1);
869 s0 = _mm_add_ps(s0, y0);
870 s1 = _mm_add_ps(s1, y1);
872 x0 = _mm_loadu_ps(S2 + x);
873 x1 = _mm_loadu_ps(S2 + x + 4);
874 y0 = _mm_loadu_ps(S3 + x);
875 y1 = _mm_loadu_ps(S3 + x + 4);
877 x0 = _mm_mul_ps(x0, b2);
878 x1 = _mm_mul_ps(x1, b2);
879 y0 = _mm_mul_ps(y0, b3);
880 y1 = _mm_mul_ps(y1, b3);
881 s0 = _mm_add_ps(s0, x0);
882 s1 = _mm_add_ps(s1, x1);
883 s0 = _mm_add_ps(s0, y0);
884 s1 = _mm_add_ps(s1, y1);
886 _mm_storeu_ps( dst + x, s0);
887 _mm_storeu_ps( dst + x + 4, s1);
896 typedef VResizeNoVec VResizeLinearVec_32s8u;
897 typedef VResizeNoVec VResizeLinearVec_32f16u;
898 typedef VResizeNoVec VResizeLinearVec_32f16s;
899 typedef VResizeNoVec VResizeLinearVec_32f;
901 typedef VResizeNoVec VResizeCubicVec_32s8u;
902 typedef VResizeNoVec VResizeCubicVec_32f16u;
903 typedef VResizeNoVec VResizeCubicVec_32f16s;
904 typedef VResizeNoVec VResizeCubicVec_32f;
908 typedef HResizeNoVec HResizeLinearVec_8u32s;
909 typedef HResizeNoVec HResizeLinearVec_16u32f;
910 typedef HResizeNoVec HResizeLinearVec_16s32f;
911 typedef HResizeNoVec HResizeLinearVec_32f;
912 typedef HResizeNoVec HResizeLinearVec_64f;
915 template<typename T, typename WT, typename AT, int ONE, class VecOp>
918 typedef T value_type;
920 typedef AT alpha_type;
922 void operator()(const T** src, WT** dst, int count,
923 const int* xofs, const AT* alpha,
924 int swidth, int dwidth, int cn, int xmin, int xmax ) const
929 int dx0 = vecOp((const uchar**)src, (uchar**)dst, count,
930 xofs, (const uchar*)alpha, swidth, dwidth, cn, xmin, xmax );
932 for( k = 0; k <= count - 2; k++ )
934 const T *S0 = src[k], *S1 = src[k+1];
935 WT *D0 = dst[k], *D1 = dst[k+1];
936 for( dx = dx0; dx < xmax; dx++ )
939 WT a0 = alpha[dx*2], a1 = alpha[dx*2+1];
940 WT t0 = S0[sx]*a0 + S0[sx + cn]*a1;
941 WT t1 = S1[sx]*a0 + S1[sx + cn]*a1;
942 D0[dx] = t0; D1[dx] = t1;
945 for( ; dx < dwidth; dx++ )
948 D0[dx] = WT(S0[sx]*ONE); D1[dx] = WT(S1[sx]*ONE);
952 for( ; k < count; k++ )
956 for( dx = 0; dx < xmax; dx++ )
959 D[dx] = S[sx]*alpha[dx*2] + S[sx+cn]*alpha[dx*2+1];
962 for( ; dx < dwidth; dx++ )
963 D[dx] = WT(S[xofs[dx]]*ONE);
969 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
972 typedef T value_type;
974 typedef AT alpha_type;
976 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
978 WT b0 = beta[0], b1 = beta[1];
979 const WT *S0 = src[0], *S1 = src[1];
983 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
984 #if CV_ENABLE_UNROLLED
985 for( ; x <= width - 4; x += 4 )
988 t0 = S0[x]*b0 + S1[x]*b1;
989 t1 = S0[x+1]*b0 + S1[x+1]*b1;
990 dst[x] = castOp(t0); dst[x+1] = castOp(t1);
991 t0 = S0[x+2]*b0 + S1[x+2]*b1;
992 t1 = S0[x+3]*b0 + S1[x+3]*b1;
993 dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
996 for( ; x < width; x++ )
997 dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
1002 struct VResizeLinear<uchar, int, short, FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>, VResizeLinearVec_32s8u>
1004 typedef uchar value_type;
1005 typedef int buf_type;
1006 typedef short alpha_type;
1008 void operator()(const buf_type** src, value_type* dst, const alpha_type* beta, int width ) const
1010 alpha_type b0 = beta[0], b1 = beta[1];
1011 const buf_type *S0 = src[0], *S1 = src[1];
1012 VResizeLinearVec_32s8u vecOp;
1014 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1015 #if CV_ENABLE_UNROLLED
1016 for( ; x <= width - 4; x += 4 )
1018 dst[x+0] = uchar(( ((b0 * (S0[x+0] >> 4)) >> 16) + ((b1 * (S1[x+0] >> 4)) >> 16) + 2)>>2);
1019 dst[x+1] = uchar(( ((b0 * (S0[x+1] >> 4)) >> 16) + ((b1 * (S1[x+1] >> 4)) >> 16) + 2)>>2);
1020 dst[x+2] = uchar(( ((b0 * (S0[x+2] >> 4)) >> 16) + ((b1 * (S1[x+2] >> 4)) >> 16) + 2)>>2);
1021 dst[x+3] = uchar(( ((b0 * (S0[x+3] >> 4)) >> 16) + ((b1 * (S1[x+3] >> 4)) >> 16) + 2)>>2);
1024 for( ; x < width; x++ )
1025 dst[x] = uchar(( ((b0 * (S0[x] >> 4)) >> 16) + ((b1 * (S1[x] >> 4)) >> 16) + 2)>>2);
1030 template<typename T, typename WT, typename AT>
1033 typedef T value_type;
1034 typedef WT buf_type;
1035 typedef AT alpha_type;
1037 void operator()(const T** src, WT** dst, int count,
1038 const int* xofs, const AT* alpha,
1039 int swidth, int dwidth, int cn, int xmin, int xmax ) const
1041 for( int k = 0; k < count; k++ )
1043 const T *S = src[k];
1045 int dx = 0, limit = xmin;
1048 for( ; dx < limit; dx++, alpha += 4 )
1050 int j, sx = xofs[dx] - cn;
1052 for( j = 0; j < 4; j++ )
1054 int sxj = sx + j*cn;
1055 if( (unsigned)sxj >= (unsigned)swidth )
1059 while( sxj >= swidth )
1062 v += S[sxj]*alpha[j];
1066 if( limit == dwidth )
1068 for( ; dx < xmax; dx++, alpha += 4 )
1071 D[dx] = S[sx-cn]*alpha[0] + S[sx]*alpha[1] +
1072 S[sx+cn]*alpha[2] + S[sx+cn*2]*alpha[3];
1082 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1085 typedef T value_type;
1086 typedef WT buf_type;
1087 typedef AT alpha_type;
1089 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1091 WT b0 = beta[0], b1 = beta[1], b2 = beta[2], b3 = beta[3];
1092 const WT *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
1096 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1097 for( ; x < width; x++ )
1098 dst[x] = castOp(S0[x]*b0 + S1[x]*b1 + S2[x]*b2 + S3[x]*b3);
1103 template<typename T, typename WT, typename AT>
1104 struct HResizeLanczos4
1106 typedef T value_type;
1107 typedef WT buf_type;
1108 typedef AT alpha_type;
1110 void operator()(const T** src, WT** dst, int count,
1111 const int* xofs, const AT* alpha,
1112 int swidth, int dwidth, int cn, int xmin, int xmax ) const
1114 for( int k = 0; k < count; k++ )
1116 const T *S = src[k];
1118 int dx = 0, limit = xmin;
1121 for( ; dx < limit; dx++, alpha += 8 )
1123 int j, sx = xofs[dx] - cn*3;
1125 for( j = 0; j < 8; j++ )
1127 int sxj = sx + j*cn;
1128 if( (unsigned)sxj >= (unsigned)swidth )
1132 while( sxj >= swidth )
1135 v += S[sxj]*alpha[j];
1139 if( limit == dwidth )
1141 for( ; dx < xmax; dx++, alpha += 8 )
1144 D[dx] = S[sx-cn*3]*alpha[0] + S[sx-cn*2]*alpha[1] +
1145 S[sx-cn]*alpha[2] + S[sx]*alpha[3] +
1146 S[sx+cn]*alpha[4] + S[sx+cn*2]*alpha[5] +
1147 S[sx+cn*3]*alpha[6] + S[sx+cn*4]*alpha[7];
1157 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1158 struct VResizeLanczos4
1160 typedef T value_type;
1161 typedef WT buf_type;
1162 typedef AT alpha_type;
1164 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1168 int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1169 #if CV_ENABLE_UNROLLED
1170 for( ; x <= width - 4; x += 4 )
1173 const WT* S = src[0];
1174 WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
1176 for( k = 1; k < 8; k++ )
1178 b = beta[k]; S = src[k];
1179 s0 += S[x]*b; s1 += S[x+1]*b;
1180 s2 += S[x+2]*b; s3 += S[x+3]*b;
1183 dst[x] = castOp(s0); dst[x+1] = castOp(s1);
1184 dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
1187 for( ; x < width; x++ )
1189 dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
1190 src[2][x]*beta[2] + src[3][x]*beta[3] + src[4][x]*beta[4] +
1191 src[5][x]*beta[5] + src[6][x]*beta[6] + src[7][x]*beta[7]);
1197 static inline int clip(int x, int a, int b)
1199 return x >= a ? (x < b ? x : b-1) : a;
1202 static const int MAX_ESIZE=16;
1204 template <typename HResize, typename VResize>
1205 class resizeGeneric_Invoker :
1206 public ParallelLoopBody
1209 typedef typename HResize::value_type T;
1210 typedef typename HResize::buf_type WT;
1211 typedef typename HResize::alpha_type AT;
1213 resizeGeneric_Invoker(const Mat& _src, Mat &_dst, const int *_xofs, const int *_yofs,
1214 const AT* _alpha, const AT* __beta, const Size& _ssize, const Size &_dsize,
1215 int _ksize, int _xmin, int _xmax) :
1216 ParallelLoopBody(), src(_src), dst(_dst), xofs(_xofs), yofs(_yofs),
1217 alpha(_alpha), _beta(__beta), ssize(_ssize), dsize(_dsize),
1218 ksize(_ksize), xmin(_xmin), xmax(_xmax)
1222 virtual void operator() (const Range& range) const
1224 int dy, cn = src.channels();
1228 int bufstep = (int)alignSize(dsize.width, 16);
1229 AutoBuffer<WT> _buffer(bufstep*ksize);
1230 const T* srows[MAX_ESIZE]={0};
1231 WT* rows[MAX_ESIZE]={0};
1232 int prev_sy[MAX_ESIZE];
1234 for(int k = 0; k < ksize; k++ )
1237 rows[k] = (WT*)_buffer + bufstep*k;
1240 const AT* beta = _beta + ksize * range.start;
1242 for( dy = range.start; dy < range.end; dy++, beta += ksize )
1244 int sy0 = yofs[dy], k0=ksize, k1=0, ksize2 = ksize/2;
1246 for(int k = 0; k < ksize; k++ )
1248 int sy = clip(sy0 - ksize2 + 1 + k, 0, ssize.height);
1249 for( k1 = std::max(k1, k); k1 < ksize; k1++ )
1251 if( sy == prev_sy[k1] ) // if the sy-th row has been computed already, reuse it.
1254 memcpy( rows[k], rows[k1], bufstep*sizeof(rows[0][0]) );
1259 k0 = std::min(k0, k); // remember the first row that needs to be computed
1260 srows[k] = (T*)(src.data + src.step*sy);
1265 hresize( (const T**)(srows + k0), (WT**)(rows + k0), ksize - k0, xofs, (const AT*)(alpha),
1266 ssize.width, dsize.width, cn, xmin, xmax );
1267 vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
1274 const int* xofs, *yofs;
1275 const AT* alpha, *_beta;
1277 int ksize, xmin, xmax;
1280 template<class HResize, class VResize>
1281 static void resizeGeneric_( const Mat& src, Mat& dst,
1282 const int* xofs, const void* _alpha,
1283 const int* yofs, const void* _beta,
1284 int xmin, int xmax, int ksize )
1286 typedef typename HResize::alpha_type AT;
1288 const AT* beta = (const AT*)_beta;
1289 Size ssize = src.size(), dsize = dst.size();
1290 int cn = src.channels();
1295 // image resize is a separable operation. In case of not too strong
1297 Range range(0, dsize.height);
1298 resizeGeneric_Invoker<HResize, VResize> invoker(src, dst, xofs, yofs, (const AT*)_alpha, beta,
1299 ssize, dsize, ksize, xmin, xmax);
1300 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1303 template <typename T, typename WT>
1304 struct ResizeAreaFastNoVec
1306 ResizeAreaFastNoVec(int /*_scale_x*/, int /*_scale_y*/,
1307 int /*_cn*/, int /*_step*//*, const int**/ /*_ofs*/) { }
1308 int operator() (const T* /*S*/, T* /*D*/, int /*w*/) const { return 0; }
1311 template<typename T>
1312 struct ResizeAreaFastVec
1314 ResizeAreaFastVec(int _scale_x, int _scale_y, int _cn, int _step/*, const int* _ofs*/) :
1315 scale_x(_scale_x), scale_y(_scale_y), cn(_cn), step(_step)/*, ofs(_ofs)*/
1317 fast_mode = scale_x == 2 && scale_y == 2 && (cn == 1 || cn == 3 || cn == 4);
1320 int operator() (const T* S, T* D, int w) const
1325 const T* nextS = (const T*)((const uchar*)S + step);
1329 for( ; dx < w; ++dx )
1332 D[dx] = (T)((S[index] + S[index+1] + nextS[index] + nextS[index+1] + 2) >> 2);
1335 for( ; dx < w; dx += 3 )
1338 D[dx] = (T)((S[index] + S[index+3] + nextS[index] + nextS[index+3] + 2) >> 2);
1339 D[dx+1] = (T)((S[index+1] + S[index+4] + nextS[index+1] + nextS[index+4] + 2) >> 2);
1340 D[dx+2] = (T)((S[index+2] + S[index+5] + nextS[index+2] + nextS[index+5] + 2) >> 2);
1345 for( ; dx < w; dx += 4 )
1348 D[dx] = (T)((S[index] + S[index+4] + nextS[index] + nextS[index+4] + 2) >> 2);
1349 D[dx+1] = (T)((S[index+1] + S[index+5] + nextS[index+1] + nextS[index+5] + 2) >> 2);
1350 D[dx+2] = (T)((S[index+2] + S[index+6] + nextS[index+2] + nextS[index+6] + 2) >> 2);
1351 D[dx+3] = (T)((S[index+3] + S[index+7] + nextS[index+3] + nextS[index+7] + 2) >> 2);
1359 int scale_x, scale_y;
1365 template <typename T, typename WT, typename VecOp>
1366 class resizeAreaFast_Invoker :
1367 public ParallelLoopBody
1370 resizeAreaFast_Invoker(const Mat &_src, Mat &_dst,
1371 int _scale_x, int _scale_y, const int* _ofs, const int* _xofs) :
1372 ParallelLoopBody(), src(_src), dst(_dst), scale_x(_scale_x),
1373 scale_y(_scale_y), ofs(_ofs), xofs(_xofs)
1377 virtual void operator() (const Range& range) const
1379 Size ssize = src.size(), dsize = dst.size();
1380 int cn = src.channels();
1381 int area = scale_x*scale_y;
1382 float scale = 1.f/(area);
1383 int dwidth1 = (ssize.width/scale_x)*cn;
1388 VecOp vop(scale_x, scale_y, src.channels(), (int)src.step/*, area_ofs*/);
1390 for( dy = range.start; dy < range.end; dy++ )
1392 T* D = (T*)(dst.data + dst.step*dy);
1393 int sy0 = dy*scale_y;
1394 int w = sy0 + scale_y <= ssize.height ? dwidth1 : 0;
1396 if( sy0 >= ssize.height )
1398 for( dx = 0; dx < dsize.width; dx++ )
1403 dx = vop((const T*)(src.data + src.step * sy0), D, w);
1404 for( ; dx < w; dx++ )
1406 const T* S = (const T*)(src.data + src.step * sy0) + xofs[dx];
1409 #if CV_ENABLE_UNROLLED
1410 for( ; k <= area - 4; k += 4 )
1411 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
1413 for( ; k < area; k++ )
1416 D[dx] = saturate_cast<T>(sum * scale);
1419 for( ; dx < dsize.width; dx++ )
1422 int count = 0, sx0 = xofs[dx];
1423 if( sx0 >= ssize.width )
1426 for( int sy = 0; sy < scale_y; sy++ )
1428 if( sy0 + sy >= ssize.height )
1430 const T* S = (const T*)(src.data + src.step*(sy0 + sy)) + sx0;
1431 for( int sx = 0; sx < scale_x*cn; sx += cn )
1433 if( sx0 + sx >= ssize.width )
1440 D[dx] = saturate_cast<T>((float)sum/count);
1448 int scale_x, scale_y;
1449 const int *ofs, *xofs;
1452 template<typename T, typename WT, typename VecOp>
1453 static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int* xofs,
1454 int scale_x, int scale_y )
1456 Range range(0, dst.rows);
1457 resizeAreaFast_Invoker<T, WT, VecOp> invoker(src, dst, scale_x,
1458 scale_y, ofs, xofs);
1459 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1462 struct DecimateAlpha
1469 template<typename T, typename WT> class ResizeArea_Invoker :
1470 public ParallelLoopBody
1473 ResizeArea_Invoker( const Mat& _src, Mat& _dst,
1474 const DecimateAlpha* _xtab, int _xtab_size,
1475 const DecimateAlpha* _ytab, int _ytab_size,
1476 const int* _tabofs )
1481 xtab_size0 = _xtab_size;
1483 ytab_size = _ytab_size;
1487 virtual void operator() (const Range& range) const
1489 Size dsize = dst->size();
1490 int cn = dst->channels();
1492 AutoBuffer<WT> _buffer(dsize.width*2);
1493 const DecimateAlpha* xtab = xtab0;
1494 int xtab_size = xtab_size0;
1495 WT *buf = _buffer, *sum = buf + dsize.width;
1496 int j_start = tabofs[range.start], j_end = tabofs[range.end], j, k, dx, prev_dy = ytab[j_start].di;
1498 for( dx = 0; dx < dsize.width; dx++ )
1501 for( j = j_start; j < j_end; j++ )
1503 WT beta = ytab[j].alpha;
1504 int dy = ytab[j].di;
1505 int sy = ytab[j].si;
1508 const T* S = (const T*)(src->data + src->step*sy);
1509 for( dx = 0; dx < dsize.width; dx++ )
1513 for( k = 0; k < xtab_size; k++ )
1515 int dxn = xtab[k].di;
1516 WT alpha = xtab[k].alpha;
1517 buf[dxn] += S[xtab[k].si]*alpha;
1520 for( k = 0; k < xtab_size; k++ )
1522 int sxn = xtab[k].si;
1523 int dxn = xtab[k].di;
1524 WT alpha = xtab[k].alpha;
1525 WT t0 = buf[dxn] + S[sxn]*alpha;
1526 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1527 buf[dxn] = t0; buf[dxn+1] = t1;
1530 for( k = 0; k < xtab_size; k++ )
1532 int sxn = xtab[k].si;
1533 int dxn = xtab[k].di;
1534 WT alpha = xtab[k].alpha;
1535 WT t0 = buf[dxn] + S[sxn]*alpha;
1536 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1537 WT t2 = buf[dxn+2] + S[sxn+2]*alpha;
1538 buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;
1542 for( k = 0; k < xtab_size; k++ )
1544 int sxn = xtab[k].si;
1545 int dxn = xtab[k].di;
1546 WT alpha = xtab[k].alpha;
1547 WT t0 = buf[dxn] + S[sxn]*alpha;
1548 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1549 buf[dxn] = t0; buf[dxn+1] = t1;
1550 t0 = buf[dxn+2] + S[sxn+2]*alpha;
1551 t1 = buf[dxn+3] + S[sxn+3]*alpha;
1552 buf[dxn+2] = t0; buf[dxn+3] = t1;
1557 for( k = 0; k < xtab_size; k++ )
1559 int sxn = xtab[k].si;
1560 int dxn = xtab[k].di;
1561 WT alpha = xtab[k].alpha;
1562 for( int c = 0; c < cn; c++ )
1563 buf[dxn + c] += S[sxn + c]*alpha;
1570 T* D = (T*)(dst->data + dst->step*prev_dy);
1572 for( dx = 0; dx < dsize.width; dx++ )
1574 D[dx] = saturate_cast<T>(sum[dx]);
1575 sum[dx] = beta*buf[dx];
1581 for( dx = 0; dx < dsize.width; dx++ )
1582 sum[dx] += beta*buf[dx];
1587 T* D = (T*)(dst->data + dst->step*prev_dy);
1588 for( dx = 0; dx < dsize.width; dx++ )
1589 D[dx] = saturate_cast<T>(sum[dx]);
1596 const DecimateAlpha* xtab0;
1597 const DecimateAlpha* ytab;
1598 int xtab_size0, ytab_size;
1603 template <typename T, typename WT>
1604 static void resizeArea_( const Mat& src, Mat& dst,
1605 const DecimateAlpha* xtab, int xtab_size,
1606 const DecimateAlpha* ytab, int ytab_size,
1609 parallel_for_(Range(0, dst.rows),
1610 ResizeArea_Invoker<T, WT>(src, dst, xtab, xtab_size, ytab, ytab_size, tabofs),
1611 dst.total()/((double)(1 << 16)));
1615 typedef void (*ResizeFunc)( const Mat& src, Mat& dst,
1616 const int* xofs, const void* alpha,
1617 const int* yofs, const void* beta,
1618 int xmin, int xmax, int ksize );
1620 typedef void (*ResizeAreaFastFunc)( const Mat& src, Mat& dst,
1621 const int* ofs, const int *xofs,
1622 int scale_x, int scale_y );
1624 typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
1625 const DecimateAlpha* xtab, int xtab_size,
1626 const DecimateAlpha* ytab, int ytab_size,
1630 static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab )
1633 for(int dx = 0; dx < dsize; dx++ )
1635 double fsx1 = dx * scale;
1636 double fsx2 = fsx1 + scale;
1637 double cellWidth = min(scale, ssize - fsx1);
1639 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
1641 sx2 = std::min(sx2, ssize - 1);
1642 sx1 = std::min(sx1, sx2);
1644 if( sx1 - fsx1 > 1e-3 )
1646 assert( k < ssize*2 );
1647 tab[k].di = dx * cn;
1648 tab[k].si = (sx1 - 1) * cn;
1649 tab[k++].alpha = (float)((sx1 - fsx1) / cellWidth);
1652 for(int sx = sx1; sx < sx2; sx++ )
1654 assert( k < ssize*2 );
1655 tab[k].di = dx * cn;
1656 tab[k].si = sx * cn;
1657 tab[k++].alpha = float(1.0 / cellWidth);
1660 if( fsx2 - sx2 > 1e-3 )
1662 assert( k < ssize*2 );
1663 tab[k].di = dx * cn;
1664 tab[k].si = sx2 * cn;
1665 tab[k++].alpha = (float)(min(min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
1671 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1672 class IPPresizeInvoker :
1673 public ParallelLoopBody
1676 IPPresizeInvoker(Mat &_src, Mat &_dst, double &_inv_scale_x, double &_inv_scale_y, int _mode, ippiResizeSqrPixelFunc _func, bool *_ok) :
1677 ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), func(_func), ok(_ok)
1682 virtual void operator() (const Range& range) const
1684 int cn = src.channels();
1685 IppiRect srcroi = { 0, range.start, src.cols, range.end - range.start };
1686 int dsty = CV_IMIN(cvRound(range.start * inv_scale_y), dst.rows);
1687 int dstwidth = CV_IMIN(cvRound(src.cols * inv_scale_x), dst.cols);
1688 int dstheight = CV_IMIN(cvRound(range.end * inv_scale_y), dst.rows);
1689 IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty };
1691 ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize );
1692 AutoBuffer<uchar> buf(bufsize + 64);
1693 uchar* bufptr = alignPtr((uchar*)buf, 32);
1694 if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, bufptr ) < 0 )
1703 ippiResizeSqrPixelFunc func;
1705 const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
1712 //////////////////////////////////////////////////////////////////////////////////////////
1714 void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
1715 double inv_scale_x, double inv_scale_y, int interpolation )
1717 static ResizeFunc linear_tab[] =
1720 HResizeLinear<uchar, int, short,
1721 INTER_RESIZE_COEF_SCALE,
1722 HResizeLinearVec_8u32s>,
1723 VResizeLinear<uchar, int, short,
1724 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1725 VResizeLinearVec_32s8u> >,
1728 HResizeLinear<ushort, float, float, 1,
1729 HResizeLinearVec_16u32f>,
1730 VResizeLinear<ushort, float, float, Cast<float, ushort>,
1731 VResizeLinearVec_32f16u> >,
1733 HResizeLinear<short, float, float, 1,
1734 HResizeLinearVec_16s32f>,
1735 VResizeLinear<short, float, float, Cast<float, short>,
1736 VResizeLinearVec_32f16s> >,
1739 HResizeLinear<float, float, float, 1,
1740 HResizeLinearVec_32f>,
1741 VResizeLinear<float, float, float, Cast<float, float>,
1742 VResizeLinearVec_32f> >,
1744 HResizeLinear<double, double, float, 1,
1746 VResizeLinear<double, double, float, Cast<double, double>,
1751 static ResizeFunc cubic_tab[] =
1754 HResizeCubic<uchar, int, short>,
1755 VResizeCubic<uchar, int, short,
1756 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1757 VResizeCubicVec_32s8u> >,
1760 HResizeCubic<ushort, float, float>,
1761 VResizeCubic<ushort, float, float, Cast<float, ushort>,
1762 VResizeCubicVec_32f16u> >,
1764 HResizeCubic<short, float, float>,
1765 VResizeCubic<short, float, float, Cast<float, short>,
1766 VResizeCubicVec_32f16s> >,
1769 HResizeCubic<float, float, float>,
1770 VResizeCubic<float, float, float, Cast<float, float>,
1771 VResizeCubicVec_32f> >,
1773 HResizeCubic<double, double, float>,
1774 VResizeCubic<double, double, float, Cast<double, double>,
1779 static ResizeFunc lanczos4_tab[] =
1781 resizeGeneric_<HResizeLanczos4<uchar, int, short>,
1782 VResizeLanczos4<uchar, int, short,
1783 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1786 resizeGeneric_<HResizeLanczos4<ushort, float, float>,
1787 VResizeLanczos4<ushort, float, float, Cast<float, ushort>,
1789 resizeGeneric_<HResizeLanczos4<short, float, float>,
1790 VResizeLanczos4<short, float, float, Cast<float, short>,
1793 resizeGeneric_<HResizeLanczos4<float, float, float>,
1794 VResizeLanczos4<float, float, float, Cast<float, float>,
1796 resizeGeneric_<HResizeLanczos4<double, double, float>,
1797 VResizeLanczos4<double, double, float, Cast<double, double>,
1802 static ResizeAreaFastFunc areafast_tab[] =
1804 resizeAreaFast_<uchar, int, ResizeAreaFastVec<uchar> >,
1806 resizeAreaFast_<ushort, float, ResizeAreaFastVec<ushort> >,
1807 resizeAreaFast_<short, float, ResizeAreaFastVec<short> >,
1809 resizeAreaFast_<float, float, ResizeAreaFastNoVec<float, float> >,
1810 resizeAreaFast_<double, double, ResizeAreaFastNoVec<double, double> >,
1814 static ResizeAreaFunc area_tab[] =
1816 resizeArea_<uchar, float>, 0, resizeArea_<ushort, float>,
1817 resizeArea_<short, float>, 0, resizeArea_<float, float>,
1818 resizeArea_<double, double>, 0
1821 Mat src = _src.getMat();
1822 Size ssize = src.size();
1824 CV_Assert( ssize.area() > 0 );
1825 CV_Assert( dsize.area() || (inv_scale_x > 0 && inv_scale_y > 0) );
1828 dsize = Size(saturate_cast<int>(src.cols*inv_scale_x),
1829 saturate_cast<int>(src.rows*inv_scale_y));
1830 CV_Assert( dsize.area() );
1834 inv_scale_x = (double)dsize.width/src.cols;
1835 inv_scale_y = (double)dsize.height/src.rows;
1837 _dst.create(dsize, src.type());
1838 Mat dst = _dst.getMat();
1841 #ifdef HAVE_TEGRA_OPTIMIZATION
1842 if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation))
1846 int depth = src.depth(), cn = src.channels();
1847 double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
1848 int k, sx, sy, dx, dy;
1850 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1851 int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
1852 int type = src.type();
1853 ippiResizeSqrPixelFunc ippFunc =
1854 type == CV_8UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C1R :
1855 type == CV_8UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C3R :
1856 type == CV_8UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_8u_C4R :
1857 type == CV_16UC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C1R :
1858 type == CV_16UC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C3R :
1859 type == CV_16UC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16u_C4R :
1860 type == CV_16SC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C1R :
1861 type == CV_16SC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C3R :
1862 type == CV_16SC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_16s_C4R :
1863 type == CV_32FC1 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C1R :
1864 type == CV_32FC3 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C3R :
1865 type == CV_32FC4 ? (ippiResizeSqrPixelFunc)ippiResizeSqrPixel_32f_C4R :
1867 if( ippFunc && mode != 0 )
1870 Range range(0, src.rows);
1871 IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, ippFunc, &ok);
1872 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1878 if( interpolation == INTER_NEAREST )
1880 resizeNN( src, dst, inv_scale_x, inv_scale_y );
1885 int iscale_x = saturate_cast<int>(scale_x);
1886 int iscale_y = saturate_cast<int>(scale_y);
1888 bool is_area_fast = std::abs(scale_x - iscale_x) < DBL_EPSILON &&
1889 std::abs(scale_y - iscale_y) < DBL_EPSILON;
1891 // in case of scale_x && scale_y is equal to 2
1892 // INTER_AREA (fast) also is equal to INTER_LINEAR
1893 if( interpolation == INTER_LINEAR && is_area_fast && iscale_x == 2 && iscale_y == 2 )
1895 interpolation = INTER_AREA;
1898 // true "area" interpolation is only implemented for the case (scale_x <= 1 && scale_y <= 1).
1899 // In other cases it is emulated using some variant of bilinear interpolation
1900 if( interpolation == INTER_AREA && scale_x >= 1 && scale_y >= 1 )
1904 int area = iscale_x*iscale_y;
1905 size_t srcstep = src.step / src.elemSize1();
1906 AutoBuffer<int> _ofs(area + dsize.width*cn);
1908 int* xofs = ofs + area;
1909 ResizeAreaFastFunc func = areafast_tab[depth];
1910 CV_Assert( func != 0 );
1912 for( sy = 0, k = 0; sy < iscale_y; sy++ )
1913 for( sx = 0; sx < iscale_x; sx++ )
1914 ofs[k++] = (int)(sy*srcstep + sx*cn);
1916 for( dx = 0; dx < dsize.width; dx++ )
1920 for( k = 0; k < cn; k++ )
1921 xofs[j + k] = sx + k;
1924 func( src, dst, ofs, xofs, iscale_x, iscale_y );
1928 ResizeAreaFunc func = area_tab[depth];
1929 CV_Assert( func != 0 && cn <= 4 );
1931 AutoBuffer<DecimateAlpha> _xytab((ssize.width + ssize.height)*2);
1932 DecimateAlpha* xtab = _xytab, *ytab = xtab + ssize.width*2;
1934 int xtab_size = computeResizeAreaTab(ssize.width, dsize.width, cn, scale_x, xtab);
1935 int ytab_size = computeResizeAreaTab(ssize.height, dsize.height, 1, scale_y, ytab);
1937 AutoBuffer<int> _tabofs(dsize.height + 1);
1938 int* tabofs = _tabofs;
1939 for( k = 0, dy = 0; k < ytab_size; k++ )
1941 if( k == 0 || ytab[k].di != ytab[k-1].di )
1943 assert( ytab[k].di == dy );
1947 tabofs[dy] = ytab_size;
1949 func( src, dst, xtab, xtab_size, ytab, ytab_size, tabofs );
1954 int xmin = 0, xmax = dsize.width, width = dsize.width*cn;
1955 bool area_mode = interpolation == INTER_AREA;
1956 bool fixpt = depth == CV_8U;
1959 int ksize=0, ksize2;
1960 if( interpolation == INTER_CUBIC )
1961 ksize = 4, func = cubic_tab[depth];
1962 else if( interpolation == INTER_LANCZOS4 )
1963 ksize = 8, func = lanczos4_tab[depth];
1964 else if( interpolation == INTER_LINEAR || interpolation == INTER_AREA )
1965 ksize = 2, func = linear_tab[depth];
1967 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
1970 CV_Assert( func != 0 );
1972 AutoBuffer<uchar> _buffer((width + dsize.height)*(sizeof(int) + sizeof(float)*ksize));
1973 int* xofs = (int*)(uchar*)_buffer;
1974 int* yofs = xofs + width;
1975 float* alpha = (float*)(yofs + dsize.height);
1976 short* ialpha = (short*)alpha;
1977 float* beta = alpha + width*ksize;
1978 short* ibeta = ialpha + width*ksize;
1979 float cbuf[MAX_ESIZE];
1981 for( dx = 0; dx < dsize.width; dx++ )
1985 fx = (float)((dx+0.5)*scale_x - 0.5);
1991 sx = cvFloor(dx*scale_x);
1992 fx = (float)((dx+1) - (sx+1)*inv_scale_x);
1993 fx = fx <= 0 ? 0.f : fx - cvFloor(fx);
2003 if( sx + ksize2 >= ssize.width )
2005 xmax = std::min( xmax, dx );
2006 if( sx >= ssize.width-1 )
2007 fx = 0, sx = ssize.width-1;
2010 for( k = 0, sx *= cn; k < cn; k++ )
2011 xofs[dx*cn + k] = sx + k;
2013 if( interpolation == INTER_CUBIC )
2014 interpolateCubic( fx, cbuf );
2015 else if( interpolation == INTER_LANCZOS4 )
2016 interpolateLanczos4( fx, cbuf );
2024 for( k = 0; k < ksize; k++ )
2025 ialpha[dx*cn*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
2026 for( ; k < cn*ksize; k++ )
2027 ialpha[dx*cn*ksize + k] = ialpha[dx*cn*ksize + k - ksize];
2031 for( k = 0; k < ksize; k++ )
2032 alpha[dx*cn*ksize + k] = cbuf[k];
2033 for( ; k < cn*ksize; k++ )
2034 alpha[dx*cn*ksize + k] = alpha[dx*cn*ksize + k - ksize];
2038 for( dy = 0; dy < dsize.height; dy++ )
2042 fy = (float)((dy+0.5)*scale_y - 0.5);
2048 sy = cvFloor(dy*scale_y);
2049 fy = (float)((dy+1) - (sy+1)*inv_scale_y);
2050 fy = fy <= 0 ? 0.f : fy - cvFloor(fy);
2054 if( interpolation == INTER_CUBIC )
2055 interpolateCubic( fy, cbuf );
2056 else if( interpolation == INTER_LANCZOS4 )
2057 interpolateLanczos4( fy, cbuf );
2066 for( k = 0; k < ksize; k++ )
2067 ibeta[dy*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
2071 for( k = 0; k < ksize; k++ )
2072 beta[dy*ksize + k] = cbuf[k];
2076 func( src, dst, xofs, fixpt ? (void*)ialpha : (void*)alpha, yofs,
2077 fixpt ? (void*)ibeta : (void*)beta, xmin, xmax, ksize );
2081 /****************************************************************************************\
2082 * General warping (affine, perspective, remap) *
2083 \****************************************************************************************/
2088 template<typename T>
2089 static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
2090 int borderType, const Scalar& _borderValue )
2092 Size ssize = _src.size(), dsize = _dst.size();
2093 int cn = _src.channels();
2094 const T* S0 = (const T*)_src.data;
2095 size_t sstep = _src.step/sizeof(S0[0]);
2096 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2097 saturate_cast<T>(_borderValue[1]),
2098 saturate_cast<T>(_borderValue[2]),
2099 saturate_cast<T>(_borderValue[3]));
2102 unsigned width1 = ssize.width, height1 = ssize.height;
2104 if( _dst.isContinuous() && _xy.isContinuous() )
2106 dsize.width *= dsize.height;
2110 for( dy = 0; dy < dsize.height; dy++ )
2112 T* D = (T*)(_dst.data + _dst.step*dy);
2113 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2117 for( dx = 0; dx < dsize.width; dx++ )
2119 int sx = XY[dx*2], sy = XY[dx*2+1];
2120 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2121 D[dx] = S0[sy*sstep + sx];
2124 if( borderType == BORDER_REPLICATE )
2126 sx = clip(sx, 0, ssize.width);
2127 sy = clip(sy, 0, ssize.height);
2128 D[dx] = S0[sy*sstep + sx];
2130 else if( borderType == BORDER_CONSTANT )
2132 else if( borderType != BORDER_TRANSPARENT )
2134 sx = borderInterpolate(sx, ssize.width, borderType);
2135 sy = borderInterpolate(sy, ssize.height, borderType);
2136 D[dx] = S0[sy*sstep + sx];
2143 for( dx = 0; dx < dsize.width; dx++, D += cn )
2145 int sx = XY[dx*2], sy = XY[dx*2+1], k;
2147 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2151 S = S0 + sy*sstep + sx*3;
2152 D[0] = S[0], D[1] = S[1], D[2] = S[2];
2156 S = S0 + sy*sstep + sx*4;
2157 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
2161 S = S0 + sy*sstep + sx*cn;
2162 for( k = 0; k < cn; k++ )
2166 else if( borderType != BORDER_TRANSPARENT )
2168 if( borderType == BORDER_REPLICATE )
2170 sx = clip(sx, 0, ssize.width);
2171 sy = clip(sy, 0, ssize.height);
2172 S = S0 + sy*sstep + sx*cn;
2174 else if( borderType == BORDER_CONSTANT )
2178 sx = borderInterpolate(sx, ssize.width, borderType);
2179 sy = borderInterpolate(sy, ssize.height, borderType);
2180 S = S0 + sy*sstep + sx*cn;
2182 for( k = 0; k < cn; k++ )
2193 int operator()( const Mat&, void*, const short*, const ushort*,
2194 const void*, int ) const { return 0; }
2201 int operator()( const Mat& _src, void* _dst, const short* XY,
2202 const ushort* FXY, const void* _wtab, int width ) const
2204 int cn = _src.channels(), x = 0, sstep = (int)_src.step;
2206 if( (cn != 1 && cn != 3 && cn != 4) || !checkHardwareSupport(CV_CPU_SSE2) ||
2210 const uchar *S0 = _src.data, *S1 = _src.data + _src.step;
2211 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
2212 uchar* D = (uchar*)_dst;
2213 __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE/2);
2214 __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << 16));
2215 __m128i z = _mm_setzero_si128();
2216 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
2220 for( ; x <= width - 8; x += 8 )
2222 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2223 __m128i xy1 = _mm_loadu_si128( (const __m128i*)(XY + x*2 + 8));
2224 __m128i v0, v1, v2, v3, a0, a1, b0, b1;
2227 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2228 xy1 = _mm_madd_epi16( xy1, xy2ofs );
2229 _mm_store_si128( (__m128i*)iofs0, xy0 );
2230 _mm_store_si128( (__m128i*)iofs1, xy1 );
2232 i0 = *(ushort*)(S0 + iofs0[0]) + (*(ushort*)(S0 + iofs0[1]) << 16);
2233 i1 = *(ushort*)(S0 + iofs0[2]) + (*(ushort*)(S0 + iofs0[3]) << 16);
2234 v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2235 i0 = *(ushort*)(S1 + iofs0[0]) + (*(ushort*)(S1 + iofs0[1]) << 16);
2236 i1 = *(ushort*)(S1 + iofs0[2]) + (*(ushort*)(S1 + iofs0[3]) << 16);
2237 v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2238 v0 = _mm_unpacklo_epi8(v0, z);
2239 v1 = _mm_unpacklo_epi8(v1, z);
2241 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x]*4)),
2242 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+1]*4)));
2243 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+2]*4)),
2244 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+3]*4)));
2245 b0 = _mm_unpacklo_epi64(a0, a1);
2246 b1 = _mm_unpackhi_epi64(a0, a1);
2247 v0 = _mm_madd_epi16(v0, b0);
2248 v1 = _mm_madd_epi16(v1, b1);
2249 v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta);
2251 i0 = *(ushort*)(S0 + iofs1[0]) + (*(ushort*)(S0 + iofs1[1]) << 16);
2252 i1 = *(ushort*)(S0 + iofs1[2]) + (*(ushort*)(S0 + iofs1[3]) << 16);
2253 v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2254 i0 = *(ushort*)(S1 + iofs1[0]) + (*(ushort*)(S1 + iofs1[1]) << 16);
2255 i1 = *(ushort*)(S1 + iofs1[2]) + (*(ushort*)(S1 + iofs1[3]) << 16);
2256 v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2257 v2 = _mm_unpacklo_epi8(v2, z);
2258 v3 = _mm_unpacklo_epi8(v3, z);
2260 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+4]*4)),
2261 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+5]*4)));
2262 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+6]*4)),
2263 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+7]*4)));
2264 b0 = _mm_unpacklo_epi64(a0, a1);
2265 b1 = _mm_unpackhi_epi64(a0, a1);
2266 v2 = _mm_madd_epi16(v2, b0);
2267 v3 = _mm_madd_epi16(v3, b1);
2268 v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta);
2270 v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS);
2271 v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS);
2272 v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
2273 _mm_storel_epi64( (__m128i*)(D + x), v0 );
2278 for( ; x <= width - 5; x += 4, D += 12 )
2280 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2281 __m128i u0, v0, u1, v1;
2283 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2284 _mm_store_si128( (__m128i*)iofs0, xy0 );
2285 const __m128i *w0, *w1;
2286 w0 = (const __m128i*)(wtab + FXY[x]*16);
2287 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
2289 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
2290 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 3)));
2291 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
2292 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 3)));
2293 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
2294 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 3)));
2295 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
2296 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 3)));
2297 u0 = _mm_unpacklo_epi8(u0, z);
2298 v0 = _mm_unpacklo_epi8(v0, z);
2299 u1 = _mm_unpacklo_epi8(u1, z);
2300 v1 = _mm_unpacklo_epi8(v1, z);
2301 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2302 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2303 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2304 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2305 u0 = _mm_slli_si128(u0, 4);
2306 u0 = _mm_packs_epi32(u0, u1);
2307 u0 = _mm_packus_epi16(u0, u0);
2308 _mm_storel_epi64((__m128i*)D, _mm_srli_si128(u0,1));
2310 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
2311 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
2313 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
2314 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 3)));
2315 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
2316 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 3)));
2317 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
2318 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 3)));
2319 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
2320 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 3)));
2321 u0 = _mm_unpacklo_epi8(u0, z);
2322 v0 = _mm_unpacklo_epi8(v0, z);
2323 u1 = _mm_unpacklo_epi8(u1, z);
2324 v1 = _mm_unpacklo_epi8(v1, z);
2325 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2326 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2327 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2328 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2329 u0 = _mm_slli_si128(u0, 4);
2330 u0 = _mm_packs_epi32(u0, u1);
2331 u0 = _mm_packus_epi16(u0, u0);
2332 _mm_storel_epi64((__m128i*)(D + 6), _mm_srli_si128(u0,1));
2337 for( ; x <= width - 4; x += 4, D += 16 )
2339 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2340 __m128i u0, v0, u1, v1;
2342 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2343 _mm_store_si128( (__m128i*)iofs0, xy0 );
2344 const __m128i *w0, *w1;
2345 w0 = (const __m128i*)(wtab + FXY[x]*16);
2346 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
2348 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
2349 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 4)));
2350 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
2351 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 4)));
2352 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
2353 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 4)));
2354 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
2355 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 4)));
2356 u0 = _mm_unpacklo_epi8(u0, z);
2357 v0 = _mm_unpacklo_epi8(v0, z);
2358 u1 = _mm_unpacklo_epi8(u1, z);
2359 v1 = _mm_unpacklo_epi8(v1, z);
2360 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2361 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2362 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2363 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2364 u0 = _mm_packs_epi32(u0, u1);
2365 u0 = _mm_packus_epi16(u0, u0);
2366 _mm_storel_epi64((__m128i*)D, u0);
2368 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
2369 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
2371 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
2372 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 4)));
2373 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
2374 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 4)));
2375 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
2376 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 4)));
2377 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
2378 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 4)));
2379 u0 = _mm_unpacklo_epi8(u0, z);
2380 v0 = _mm_unpacklo_epi8(v0, z);
2381 u1 = _mm_unpacklo_epi8(u1, z);
2382 v1 = _mm_unpacklo_epi8(v1, z);
2383 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2384 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2385 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2386 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2387 u0 = _mm_packs_epi32(u0, u1);
2388 u0 = _mm_packus_epi16(u0, u0);
2389 _mm_storel_epi64((__m128i*)(D + 8), u0);
2399 typedef RemapNoVec RemapVec_8u;
2404 template<class CastOp, class VecOp, typename AT>
2405 static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
2406 const Mat& _fxy, const void* _wtab,
2407 int borderType, const Scalar& _borderValue )
2409 typedef typename CastOp::rtype T;
2410 typedef typename CastOp::type1 WT;
2411 Size ssize = _src.size(), dsize = _dst.size();
2412 int cn = _src.channels();
2413 const AT* wtab = (const AT*)_wtab;
2414 const T* S0 = (const T*)_src.data;
2415 size_t sstep = _src.step/sizeof(S0[0]);
2416 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2417 saturate_cast<T>(_borderValue[1]),
2418 saturate_cast<T>(_borderValue[2]),
2419 saturate_cast<T>(_borderValue[3]));
2424 unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
2425 CV_Assert( cn <= 4 && ssize.area() > 0 );
2427 if( _src.type() == CV_8UC3 )
2428 width1 = std::max(ssize.width-2, 0);
2431 for( dy = 0; dy < dsize.height; dy++ )
2433 T* D = (T*)(_dst.data + _dst.step*dy);
2434 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2435 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2437 bool prevInlier = false;
2439 for( dx = 0; dx <= dsize.width; dx++ )
2441 bool curInlier = dx < dsize.width ?
2442 (unsigned)XY[dx*2] < width1 &&
2443 (unsigned)XY[dx*2+1] < height1 : !prevInlier;
2444 if( curInlier == prevInlier )
2450 prevInlier = curInlier;
2454 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
2460 for( ; dx < X1; dx++, D++ )
2462 int sx = XY[dx*2], sy = XY[dx*2+1];
2463 const AT* w = wtab + FXY[dx]*4;
2464 const T* S = S0 + sy*sstep + sx;
2465 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
2469 for( ; dx < X1; dx++, D += 2 )
2471 int sx = XY[dx*2], sy = XY[dx*2+1];
2472 const AT* w = wtab + FXY[dx]*4;
2473 const T* S = S0 + sy*sstep + sx*2;
2474 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
2475 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
2476 D[0] = castOp(t0); D[1] = castOp(t1);
2479 for( ; dx < X1; dx++, D += 3 )
2481 int sx = XY[dx*2], sy = XY[dx*2+1];
2482 const AT* w = wtab + FXY[dx]*4;
2483 const T* S = S0 + sy*sstep + sx*3;
2484 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
2485 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
2486 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
2487 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
2490 for( ; dx < X1; dx++, D += 4 )
2492 int sx = XY[dx*2], sy = XY[dx*2+1];
2493 const AT* w = wtab + FXY[dx]*4;
2494 const T* S = S0 + sy*sstep + sx*4;
2495 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
2496 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
2497 D[0] = castOp(t0); D[1] = castOp(t1);
2498 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
2499 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
2500 D[2] = castOp(t0); D[3] = castOp(t1);
2505 if( borderType == BORDER_TRANSPARENT && cn != 3 )
2513 for( ; dx < X1; dx++, D++ )
2515 int sx = XY[dx*2], sy = XY[dx*2+1];
2516 if( borderType == BORDER_CONSTANT &&
2517 (sx >= ssize.width || sx+1 < 0 ||
2518 sy >= ssize.height || sy+1 < 0) )
2524 int sx0, sx1, sy0, sy1;
2526 const AT* w = wtab + FXY[dx]*4;
2527 if( borderType == BORDER_REPLICATE )
2529 sx0 = clip(sx, 0, ssize.width);
2530 sx1 = clip(sx+1, 0, ssize.width);
2531 sy0 = clip(sy, 0, ssize.height);
2532 sy1 = clip(sy+1, 0, ssize.height);
2533 v0 = S0[sy0*sstep + sx0];
2534 v1 = S0[sy0*sstep + sx1];
2535 v2 = S0[sy1*sstep + sx0];
2536 v3 = S0[sy1*sstep + sx1];
2540 sx0 = borderInterpolate(sx, ssize.width, borderType);
2541 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
2542 sy0 = borderInterpolate(sy, ssize.height, borderType);
2543 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
2544 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
2545 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
2546 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
2547 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
2549 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
2553 for( ; dx < X1; dx++, D += cn )
2555 int sx = XY[dx*2], sy = XY[dx*2+1], k;
2556 if( borderType == BORDER_CONSTANT &&
2557 (sx >= ssize.width || sx+1 < 0 ||
2558 sy >= ssize.height || sy+1 < 0) )
2560 for( k = 0; k < cn; k++ )
2565 int sx0, sx1, sy0, sy1;
2566 const T *v0, *v1, *v2, *v3;
2567 const AT* w = wtab + FXY[dx]*4;
2568 if( borderType == BORDER_REPLICATE )
2570 sx0 = clip(sx, 0, ssize.width);
2571 sx1 = clip(sx+1, 0, ssize.width);
2572 sy0 = clip(sy, 0, ssize.height);
2573 sy1 = clip(sy+1, 0, ssize.height);
2574 v0 = S0 + sy0*sstep + sx0*cn;
2575 v1 = S0 + sy0*sstep + sx1*cn;
2576 v2 = S0 + sy1*sstep + sx0*cn;
2577 v3 = S0 + sy1*sstep + sx1*cn;
2579 else if( borderType == BORDER_TRANSPARENT &&
2580 ((unsigned)sx >= (unsigned)(ssize.width-1) ||
2581 (unsigned)sy >= (unsigned)(ssize.height-1)))
2585 sx0 = borderInterpolate(sx, ssize.width, borderType);
2586 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
2587 sy0 = borderInterpolate(sy, ssize.height, borderType);
2588 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
2589 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
2590 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
2591 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
2592 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
2594 for( k = 0; k < cn; k++ )
2595 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
2604 template<class CastOp, typename AT, int ONE>
2605 static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
2606 const Mat& _fxy, const void* _wtab,
2607 int borderType, const Scalar& _borderValue )
2609 typedef typename CastOp::rtype T;
2610 typedef typename CastOp::type1 WT;
2611 Size ssize = _src.size(), dsize = _dst.size();
2612 int cn = _src.channels();
2613 const AT* wtab = (const AT*)_wtab;
2614 const T* S0 = (const T*)_src.data;
2615 size_t sstep = _src.step/sizeof(S0[0]);
2616 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2617 saturate_cast<T>(_borderValue[1]),
2618 saturate_cast<T>(_borderValue[2]),
2619 saturate_cast<T>(_borderValue[3]));
2622 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
2624 unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
2626 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
2628 dsize.width *= dsize.height;
2632 for( dy = 0; dy < dsize.height; dy++ )
2634 T* D = (T*)(_dst.data + _dst.step*dy);
2635 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2636 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2638 for( dx = 0; dx < dsize.width; dx++, D += cn )
2640 int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
2641 const AT* w = wtab + FXY[dx]*16;
2643 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2645 const T* S = S0 + sy*sstep + sx*cn;
2646 for( k = 0; k < cn; k++ )
2648 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
2650 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
2652 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
2654 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
2662 if( borderType == BORDER_TRANSPARENT &&
2663 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
2664 (unsigned)(sy+1) >= (unsigned)ssize.height) )
2667 if( borderType1 == BORDER_CONSTANT &&
2668 (sx >= ssize.width || sx+4 <= 0 ||
2669 sy >= ssize.height || sy+4 <= 0))
2671 for( k = 0; k < cn; k++ )
2676 for( i = 0; i < 4; i++ )
2678 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
2679 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
2682 for( k = 0; k < cn; k++, S0++, w -= 16 )
2684 WT cv = cval[k], sum = cv*ONE;
2685 for( i = 0; i < 4; i++, w += 4 )
2688 const T* S = S0 + yi*sstep;
2692 sum += (S[x[0]] - cv)*w[0];
2694 sum += (S[x[1]] - cv)*w[1];
2696 sum += (S[x[2]] - cv)*w[2];
2698 sum += (S[x[3]] - cv)*w[3];
2709 template<class CastOp, typename AT, int ONE>
2710 static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
2711 const Mat& _fxy, const void* _wtab,
2712 int borderType, const Scalar& _borderValue )
2714 typedef typename CastOp::rtype T;
2715 typedef typename CastOp::type1 WT;
2716 Size ssize = _src.size(), dsize = _dst.size();
2717 int cn = _src.channels();
2718 const AT* wtab = (const AT*)_wtab;
2719 const T* S0 = (const T*)_src.data;
2720 size_t sstep = _src.step/sizeof(S0[0]);
2721 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2722 saturate_cast<T>(_borderValue[1]),
2723 saturate_cast<T>(_borderValue[2]),
2724 saturate_cast<T>(_borderValue[3]));
2727 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
2729 unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
2731 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
2733 dsize.width *= dsize.height;
2737 for( dy = 0; dy < dsize.height; dy++ )
2739 T* D = (T*)(_dst.data + _dst.step*dy);
2740 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2741 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2743 for( dx = 0; dx < dsize.width; dx++, D += cn )
2745 int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
2746 const AT* w = wtab + FXY[dx]*64;
2747 const T* S = S0 + sy*sstep + sx*cn;
2749 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2751 for( k = 0; k < cn; k++ )
2754 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
2755 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
2756 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
2765 if( borderType == BORDER_TRANSPARENT &&
2766 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
2767 (unsigned)(sy+3) >= (unsigned)ssize.height) )
2770 if( borderType1 == BORDER_CONSTANT &&
2771 (sx >= ssize.width || sx+8 <= 0 ||
2772 sy >= ssize.height || sy+8 <= 0))
2774 for( k = 0; k < cn; k++ )
2779 for( i = 0; i < 8; i++ )
2781 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
2782 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
2785 for( k = 0; k < cn; k++, S0++, w -= 64 )
2787 WT cv = cval[k], sum = cv*ONE;
2788 for( i = 0; i < 8; i++, w += 8 )
2791 const T* S1 = S0 + yi*sstep;
2795 sum += (S1[x[0]] - cv)*w[0];
2797 sum += (S1[x[1]] - cv)*w[1];
2799 sum += (S1[x[2]] - cv)*w[2];
2801 sum += (S1[x[3]] - cv)*w[3];
2803 sum += (S1[x[4]] - cv)*w[4];
2805 sum += (S1[x[5]] - cv)*w[5];
2807 sum += (S1[x[6]] - cv)*w[6];
2809 sum += (S1[x[7]] - cv)*w[7];
2820 typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
2821 int borderType, const Scalar& _borderValue );
2823 typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
2824 const Mat& _fxy, const void* _wtab,
2825 int borderType, const Scalar& _borderValue);
2827 class RemapInvoker :
2828 public ParallelLoopBody
2831 RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
2832 const Mat *_m2, int _interpolation, int _borderType, const Scalar &_borderValue,
2833 int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
2834 ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
2835 interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue),
2836 planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
2840 virtual void operator() (const Range& range) const
2843 const int buf_size = 1 << 14;
2844 int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
2845 int bcols0 = std::min(buf_size/brows0, dst->cols);
2846 brows0 = std::min(buf_size/bcols0, dst->rows);
2848 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
2851 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
2853 _bufa.create(brows0, bcols0, CV_16UC1);
2855 for( y = range.start; y < range.end; y += brows0 )
2857 for( x = 0; x < dst->cols; x += bcols0 )
2859 int brows = std::min(brows0, range.end - y);
2860 int bcols = std::min(bcols0, dst->cols - x);
2861 Mat dpart(*dst, Rect(x, y, bcols, brows));
2862 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
2866 if( m1->type() == CV_16SC2 && !m2->data ) // the data is already in the right format
2867 bufxy = (*m1)(Rect(x, y, bcols, brows));
2868 else if( map_depth != CV_32F )
2870 for( y1 = 0; y1 < brows; y1++ )
2872 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2873 const short* sXY = (const short*)(m1->data + m1->step*(y+y1)) + x*2;
2874 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
2876 for( x1 = 0; x1 < bcols; x1++ )
2878 int a = sA[x1] & (INTER_TAB_SIZE2-1);
2879 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
2880 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
2884 else if( !planar_input )
2885 (*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
2888 for( y1 = 0; y1 < brows; y1++ )
2890 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2891 const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
2892 const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
2898 for( ; x1 <= bcols - 8; x1 += 8 )
2900 __m128 fx0 = _mm_loadu_ps(sX + x1);
2901 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
2902 __m128 fy0 = _mm_loadu_ps(sY + x1);
2903 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
2904 __m128i ix0 = _mm_cvtps_epi32(fx0);
2905 __m128i ix1 = _mm_cvtps_epi32(fx1);
2906 __m128i iy0 = _mm_cvtps_epi32(fy0);
2907 __m128i iy1 = _mm_cvtps_epi32(fy1);
2908 ix0 = _mm_packs_epi32(ix0, ix1);
2909 iy0 = _mm_packs_epi32(iy0, iy1);
2910 ix1 = _mm_unpacklo_epi16(ix0, iy0);
2911 iy1 = _mm_unpackhi_epi16(ix0, iy0);
2912 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
2913 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
2918 for( ; x1 < bcols; x1++ )
2920 XY[x1*2] = saturate_cast<short>(sX[x1]);
2921 XY[x1*2+1] = saturate_cast<short>(sY[x1]);
2925 nnfunc( *src, dpart, bufxy, borderType, borderValue );
2929 Mat bufa(_bufa, Rect(0, 0, bcols, brows));
2930 for( y1 = 0; y1 < brows; y1++ )
2932 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2933 ushort* A = (ushort*)(bufa.data + bufa.step*y1);
2935 if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
2937 bufxy = (*m1)(Rect(x, y, bcols, brows));
2939 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
2940 for( x1 = 0; x1 < bcols; x1++ )
2941 A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
2943 else if( planar_input )
2945 const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
2946 const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
2952 __m128 scale = _mm_set1_ps((float)INTER_TAB_SIZE);
2953 __m128i mask = _mm_set1_epi32(INTER_TAB_SIZE-1);
2954 for( ; x1 <= bcols - 8; x1 += 8 )
2956 __m128 fx0 = _mm_loadu_ps(sX + x1);
2957 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
2958 __m128 fy0 = _mm_loadu_ps(sY + x1);
2959 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
2960 __m128i ix0 = _mm_cvtps_epi32(_mm_mul_ps(fx0, scale));
2961 __m128i ix1 = _mm_cvtps_epi32(_mm_mul_ps(fx1, scale));
2962 __m128i iy0 = _mm_cvtps_epi32(_mm_mul_ps(fy0, scale));
2963 __m128i iy1 = _mm_cvtps_epi32(_mm_mul_ps(fy1, scale));
2964 __m128i mx0 = _mm_and_si128(ix0, mask);
2965 __m128i mx1 = _mm_and_si128(ix1, mask);
2966 __m128i my0 = _mm_and_si128(iy0, mask);
2967 __m128i my1 = _mm_and_si128(iy1, mask);
2968 mx0 = _mm_packs_epi32(mx0, mx1);
2969 my0 = _mm_packs_epi32(my0, my1);
2970 my0 = _mm_slli_epi16(my0, INTER_BITS);
2971 mx0 = _mm_or_si128(mx0, my0);
2972 _mm_storeu_si128((__m128i*)(A + x1), mx0);
2973 ix0 = _mm_srai_epi32(ix0, INTER_BITS);
2974 ix1 = _mm_srai_epi32(ix1, INTER_BITS);
2975 iy0 = _mm_srai_epi32(iy0, INTER_BITS);
2976 iy1 = _mm_srai_epi32(iy1, INTER_BITS);
2977 ix0 = _mm_packs_epi32(ix0, ix1);
2978 iy0 = _mm_packs_epi32(iy0, iy1);
2979 ix1 = _mm_unpacklo_epi16(ix0, iy0);
2980 iy1 = _mm_unpackhi_epi16(ix0, iy0);
2981 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
2982 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
2987 for( ; x1 < bcols; x1++ )
2989 int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
2990 int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
2991 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
2992 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
2993 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
2999 const float* sXY = (const float*)(m1->data + m1->step*(y+y1)) + x*2;
3001 for( x1 = 0; x1 < bcols; x1++ )
3003 int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
3004 int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
3005 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
3006 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
3007 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
3012 ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
3021 int interpolation, borderType;
3031 void cv::remap( InputArray _src, OutputArray _dst,
3032 InputArray _map1, InputArray _map2,
3033 int interpolation, int borderType, const Scalar& borderValue )
3035 static RemapNNFunc nn_tab[] =
3037 remapNearest<uchar>, remapNearest<schar>, remapNearest<ushort>, remapNearest<short>,
3038 remapNearest<int>, remapNearest<float>, remapNearest<double>, 0
3041 static RemapFunc linear_tab[] =
3043 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
3044 remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
3045 remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
3046 remapBilinear<Cast<float, float>, RemapNoVec, float>,
3047 remapBilinear<Cast<double, double>, RemapNoVec, float>, 0
3050 static RemapFunc cubic_tab[] =
3052 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
3053 remapBicubic<Cast<float, ushort>, float, 1>,
3054 remapBicubic<Cast<float, short>, float, 1>, 0,
3055 remapBicubic<Cast<float, float>, float, 1>,
3056 remapBicubic<Cast<double, double>, float, 1>, 0
3059 static RemapFunc lanczos4_tab[] =
3061 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
3062 remapLanczos4<Cast<float, ushort>, float, 1>,
3063 remapLanczos4<Cast<float, short>, float, 1>, 0,
3064 remapLanczos4<Cast<float, float>, float, 1>,
3065 remapLanczos4<Cast<double, double>, float, 1>, 0
3068 Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
3070 CV_Assert( map1.size().area() > 0 );
3071 CV_Assert( !map2.data || (map2.size() == map1.size()));
3073 _dst.create( map1.size(), src.type() );
3074 Mat dst = _dst.getMat();
3075 if( dst.data == src.data )
3078 int depth = src.depth();
3079 RemapNNFunc nnfunc = 0;
3080 RemapFunc ifunc = 0;
3081 const void* ctab = 0;
3082 bool fixpt = depth == CV_8U;
3083 bool planar_input = false;
3085 if( interpolation == INTER_NEAREST )
3087 nnfunc = nn_tab[depth];
3088 CV_Assert( nnfunc != 0 );
3092 if( interpolation == INTER_AREA )
3093 interpolation = INTER_LINEAR;
3095 if( interpolation == INTER_LINEAR )
3096 ifunc = linear_tab[depth];
3097 else if( interpolation == INTER_CUBIC )
3098 ifunc = cubic_tab[depth];
3099 else if( interpolation == INTER_LANCZOS4 )
3100 ifunc = lanczos4_tab[depth];
3102 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
3103 CV_Assert( ifunc != 0 );
3104 ctab = initInterTab2D( interpolation, fixpt );
3107 const Mat *m1 = &map1, *m2 = &map2;
3109 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || !map2.data)) ||
3110 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || !map1.data)) )
3112 if( map1.type() != CV_16SC2 )
3117 CV_Assert( ((map1.type() == CV_32FC2 || map1.type() == CV_16SC2) && !map2.data) ||
3118 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
3119 planar_input = map1.channels() == 1;
3122 RemapInvoker invoker(src, dst, m1, m2, interpolation,
3123 borderType, borderValue, planar_input, nnfunc, ifunc,
3125 parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
3129 void cv::convertMaps( InputArray _map1, InputArray _map2,
3130 OutputArray _dstmap1, OutputArray _dstmap2,
3131 int dstm1type, bool nninterpolate )
3133 Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
3134 Size size = map1.size();
3135 const Mat *m1 = &map1, *m2 = &map2;
3136 int m1type = m1->type(), m2type = m2->type();
3138 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
3139 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
3140 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
3141 (m1type == CV_32FC2 && !m2->data) );
3143 if( m2type == CV_16SC2 )
3145 std::swap( m1, m2 );
3146 std::swap( m1type, m2type );
3149 if( dstm1type <= 0 )
3150 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
3151 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
3152 _dstmap1.create( size, dstm1type );
3153 dstmap1 = _dstmap1.getMat();
3155 if( !nninterpolate && dstm1type != CV_32FC2 )
3157 _dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
3158 dstmap2 = _dstmap2.getMat();
3163 if( m1type == dstm1type || (nninterpolate &&
3164 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
3165 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
3167 m1->convertTo( dstmap1, dstmap1.type() );
3168 if( dstmap2.data && dstmap2.type() == m2->type() )
3169 m2->copyTo( dstmap2 );
3173 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
3175 Mat vdata[] = { *m1, *m2 };
3176 merge( vdata, 2, dstmap1 );
3180 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
3182 Mat mv[] = { dstmap1, dstmap2 };
3187 if( m1->isContinuous() && (!m2->data || m2->isContinuous()) &&
3188 dstmap1.isContinuous() && (!dstmap2.data || dstmap2.isContinuous()) )
3190 size.width *= size.height;
3194 const float scale = 1.f/INTER_TAB_SIZE;
3196 for( y = 0; y < size.height; y++ )
3198 const float* src1f = (const float*)(m1->data + m1->step*y);
3199 const float* src2f = (const float*)(m2->data + m2->step*y);
3200 const short* src1 = (const short*)src1f;
3201 const ushort* src2 = (const ushort*)src2f;
3203 float* dst1f = (float*)(dstmap1.data + dstmap1.step*y);
3204 float* dst2f = (float*)(dstmap2.data + dstmap2.step*y);
3205 short* dst1 = (short*)dst1f;
3206 ushort* dst2 = (ushort*)dst2f;
3208 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
3211 for( x = 0; x < size.width; x++ )
3213 dst1[x*2] = saturate_cast<short>(src1f[x]);
3214 dst1[x*2+1] = saturate_cast<short>(src2f[x]);
3217 for( x = 0; x < size.width; x++ )
3219 int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
3220 int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
3221 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
3222 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
3223 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
3226 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
3229 for( x = 0; x < size.width; x++ )
3231 dst1[x*2] = saturate_cast<short>(src1f[x*2]);
3232 dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
3235 for( x = 0; x < size.width; x++ )
3237 int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
3238 int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
3239 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
3240 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
3241 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
3244 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
3246 for( x = 0; x < size.width; x++ )
3248 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
3249 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
3250 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
3253 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
3255 for( x = 0; x < size.width; x++ )
3257 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
3258 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
3259 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
3263 CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
3271 class warpAffineInvoker :
3272 public ParallelLoopBody
3275 warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
3276 const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) :
3277 ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
3278 borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
3283 virtual void operator() (const Range& range) const
3285 const int BLOCK_SZ = 64;
3286 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
3287 const int AB_BITS = MAX(10, (int)INTER_BITS);
3288 const int AB_SCALE = 1 << AB_BITS;
3289 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
3291 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
3294 int bh0 = std::min(BLOCK_SZ/2, dst.rows);
3295 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
3296 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, dst.rows);
3298 for( y = range.start; y < range.end; y += bh0 )
3300 for( x = 0; x < dst.cols; x += bw0 )
3302 int bw = std::min( bw0, dst.cols - x);
3303 int bh = std::min( bh0, range.end - y);
3305 Mat _XY(bh, bw, CV_16SC2, XY), matA;
3306 Mat dpart(dst, Rect(x, y, bw, bh));
3308 for( y1 = 0; y1 < bh; y1++ )
3310 short* xy = XY + y1*bw*2;
3311 int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
3312 int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
3314 if( interpolation == INTER_NEAREST )
3315 for( x1 = 0; x1 < bw; x1++ )
3317 int X = (X0 + adelta[x+x1]) >> AB_BITS;
3318 int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
3319 xy[x1*2] = saturate_cast<short>(X);
3320 xy[x1*2+1] = saturate_cast<short>(Y);
3324 short* alpha = A + y1*bw;
3329 __m128i fxy_mask = _mm_set1_epi32(INTER_TAB_SIZE - 1);
3330 __m128i XX = _mm_set1_epi32(X0), YY = _mm_set1_epi32(Y0);
3331 for( ; x1 <= bw - 8; x1 += 8 )
3333 __m128i tx0, tx1, ty0, ty1;
3334 tx0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1)), XX);
3335 ty0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1)), YY);
3336 tx1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1 + 4)), XX);
3337 ty1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1 + 4)), YY);
3339 tx0 = _mm_srai_epi32(tx0, AB_BITS - INTER_BITS);
3340 ty0 = _mm_srai_epi32(ty0, AB_BITS - INTER_BITS);
3341 tx1 = _mm_srai_epi32(tx1, AB_BITS - INTER_BITS);
3342 ty1 = _mm_srai_epi32(ty1, AB_BITS - INTER_BITS);
3344 __m128i fx_ = _mm_packs_epi32(_mm_and_si128(tx0, fxy_mask),
3345 _mm_and_si128(tx1, fxy_mask));
3346 __m128i fy_ = _mm_packs_epi32(_mm_and_si128(ty0, fxy_mask),
3347 _mm_and_si128(ty1, fxy_mask));
3348 tx0 = _mm_packs_epi32(_mm_srai_epi32(tx0, INTER_BITS),
3349 _mm_srai_epi32(tx1, INTER_BITS));
3350 ty0 = _mm_packs_epi32(_mm_srai_epi32(ty0, INTER_BITS),
3351 _mm_srai_epi32(ty1, INTER_BITS));
3352 fx_ = _mm_adds_epi16(fx_, _mm_slli_epi16(fy_, INTER_BITS));
3354 _mm_storeu_si128((__m128i*)(xy + x1*2), _mm_unpacklo_epi16(tx0, ty0));
3355 _mm_storeu_si128((__m128i*)(xy + x1*2 + 8), _mm_unpackhi_epi16(tx0, ty0));
3356 _mm_storeu_si128((__m128i*)(alpha + x1), fx_);
3360 for( ; x1 < bw; x1++ )
3362 int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
3363 int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
3364 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3365 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3366 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3367 (X & (INTER_TAB_SIZE-1)));
3372 if( interpolation == INTER_NEAREST )
3373 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
3376 Mat _matA(bh, bw, CV_16U, A);
3377 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
3386 int interpolation, borderType;
3388 int *adelta, *bdelta;
3392 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3393 class IPPwarpAffineInvoker :
3394 public ParallelLoopBody
3397 IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
3398 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
3403 virtual void operator() (const Range& range) const
3405 IppiSize srcsize = { src.cols, src.rows };
3406 IppiRect srcroi = { 0, 0, src.cols, src.rows };
3407 IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
3408 int cnn = src.channels();
3409 if( borderType == BORDER_CONSTANT )
3411 IppiSize setSize = { dst.cols, range.end - range.start };
3412 void *dataPointer = dst.data + dst.step[0] * range.start;
3413 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
3419 if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
3425 double (&coeffs)[2][3];
3429 ippiWarpAffineBackFunc func;
3431 const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
3438 void cv::warpAffine( InputArray _src, OutputArray _dst,
3439 InputArray _M0, Size dsize,
3440 int flags, int borderType, const Scalar& borderValue )
3442 Mat src = _src.getMat(), M0 = _M0.getMat();
3443 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
3444 Mat dst = _dst.getMat();
3445 CV_Assert( src.cols > 0 && src.rows > 0 );
3446 if( dst.data == src.data )
3450 Mat matM(2, 3, CV_64F, M);
3451 int interpolation = flags & INTER_MAX;
3452 if( interpolation == INTER_AREA )
3453 interpolation = INTER_LINEAR;
3455 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
3456 M0.convertTo(matM, matM.type());
3458 #ifdef HAVE_TEGRA_OPTIMIZATION
3459 if( tegra::warpAffine(src, dst, M, flags, borderType, borderValue) )
3463 if( !(flags & WARP_INVERSE_MAP) )
3465 double D = M[0]*M[4] - M[1]*M[3];
3466 D = D != 0 ? 1./D : 0;
3467 double A11 = M[4]*D, A22=M[0]*D;
3468 M[0] = A11; M[1] *= -D;
3469 M[3] *= -D; M[4] = A22;
3470 double b1 = -M[0]*M[2] - M[1]*M[5];
3471 double b2 = -M[3]*M[2] - M[4]*M[5];
3472 M[2] = b1; M[5] = b2;
3476 AutoBuffer<int> _abdelta(dst.cols*2);
3477 int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
3478 const int AB_BITS = MAX(10, (int)INTER_BITS);
3479 const int AB_SCALE = 1 << AB_BITS;
3481 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3482 int depth = src.depth();
3483 int channels = src.channels();
3484 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
3485 ( channels == 1 || channels == 3 || channels == 4 ) &&
3486 ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
3488 int type = src.type();
3489 ippiWarpAffineBackFunc ippFunc =
3490 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
3491 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
3492 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
3493 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
3494 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
3495 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
3496 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
3497 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
3498 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
3501 flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
3502 flags == INTER_NEAREST ? IPPI_INTER_NN :
3503 flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
3505 if( mode && ippFunc )
3507 double coeffs[2][3];
3508 for( int i = 0; i < 2; i++ )
3510 for( int j = 0; j < 3; j++ )
3512 coeffs[i][j] = matM.at<double>(i, j);
3516 Range range(0, dst.rows);
3517 IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
3518 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3525 for( x = 0; x < dst.cols; x++ )
3527 adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
3528 bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
3531 Range range(0, dst.rows);
3532 warpAffineInvoker invoker(src, dst, interpolation, borderType,
3533 borderValue, adelta, bdelta, M);
3534 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3541 class warpPerspectiveInvoker :
3542 public ParallelLoopBody
3546 warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
3547 int _borderType, const Scalar &_borderValue) :
3548 ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
3549 borderType(_borderType), borderValue(_borderValue)
3553 virtual void operator() (const Range& range) const
3555 const int BLOCK_SZ = 32;
3556 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
3557 int x, y, x1, y1, width = dst.cols, height = dst.rows;
3559 int bh0 = std::min(BLOCK_SZ/2, height);
3560 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
3561 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
3563 for( y = range.start; y < range.end; y += bh0 )
3565 for( x = 0; x < width; x += bw0 )
3567 int bw = std::min( bw0, width - x);
3568 int bh = std::min( bh0, range.end - y); // height
3570 Mat _XY(bh, bw, CV_16SC2, XY), matA;
3571 Mat dpart(dst, Rect(x, y, bw, bh));
3573 for( y1 = 0; y1 < bh; y1++ )
3575 short* xy = XY + y1*bw*2;
3576 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
3577 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
3578 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
3580 if( interpolation == INTER_NEAREST )
3581 for( x1 = 0; x1 < bw; x1++ )
3583 double W = W0 + M[6]*x1;
3585 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3586 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3587 int X = saturate_cast<int>(fX);
3588 int Y = saturate_cast<int>(fY);
3590 xy[x1*2] = saturate_cast<short>(X);
3591 xy[x1*2+1] = saturate_cast<short>(Y);
3595 short* alpha = A + y1*bw;
3596 for( x1 = 0; x1 < bw; x1++ )
3598 double W = W0 + M[6]*x1;
3599 W = W ? INTER_TAB_SIZE/W : 0;
3600 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3601 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3602 int X = saturate_cast<int>(fX);
3603 int Y = saturate_cast<int>(fY);
3605 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3606 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3607 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3608 (X & (INTER_TAB_SIZE-1)));
3613 if( interpolation == INTER_NEAREST )
3614 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
3617 Mat _matA(bh, bw, CV_16U, A);
3618 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
3628 int interpolation, borderType;
3632 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3633 class IPPwarpPerspectiveInvoker :
3634 public ParallelLoopBody
3637 IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
3638 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
3643 virtual void operator() (const Range& range) const
3645 IppiSize srcsize = {src.cols, src.rows};
3646 IppiRect srcroi = {0, 0, src.cols, src.rows};
3647 IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
3648 int cnn = src.channels();
3650 if( borderType == BORDER_CONSTANT )
3652 IppiSize setSize = {dst.cols, range.end - range.start};
3653 void *dataPointer = dst.data + dst.step[0] * range.start;
3654 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
3660 if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
3666 double (&coeffs)[3][3];
3669 const Scalar borderValue;
3670 ippiWarpPerspectiveBackFunc func;
3672 const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
3678 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
3679 Size dsize, int flags, int borderType, const Scalar& borderValue )
3681 Mat src = _src.getMat(), M0 = _M0.getMat();
3682 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
3683 Mat dst = _dst.getMat();
3685 CV_Assert( src.cols > 0 && src.rows > 0 );
3686 if( dst.data == src.data )
3690 Mat matM(3, 3, CV_64F, M);
3691 int interpolation = flags & INTER_MAX;
3692 if( interpolation == INTER_AREA )
3693 interpolation = INTER_LINEAR;
3695 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
3696 M0.convertTo(matM, matM.type());
3698 #ifdef HAVE_TEGRA_OPTIMIZATION
3699 if( tegra::warpPerspective(src, dst, M, flags, borderType, borderValue) )
3703 if( !(flags & WARP_INVERSE_MAP) )
3706 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3707 int depth = src.depth();
3708 int channels = src.channels();
3709 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
3710 ( channels == 1 || channels == 3 || channels == 4 ) &&
3711 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
3713 int type = src.type();
3714 ippiWarpPerspectiveBackFunc ippFunc =
3715 type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
3716 type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
3717 type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
3718 type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
3719 type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
3720 type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
3721 type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
3722 type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
3723 type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
3726 flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
3727 flags == INTER_NEAREST ? IPPI_INTER_NN :
3728 flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
3730 if( mode && ippFunc )
3732 double coeffs[3][3];
3733 for( int i = 0; i < 3; i++ )
3735 for( int j = 0; j < 3; j++ )
3737 coeffs[i][j] = matM.at<double>(i, j);
3741 Range range(0, dst.rows);
3742 IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
3743 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3750 Range range(0, dst.rows);
3751 warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
3752 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3756 cv::Mat cv::getRotationMatrix2D( Point2f center, double angle, double scale )
3759 double alpha = cos(angle)*scale;
3760 double beta = sin(angle)*scale;
3762 Mat M(2, 3, CV_64F);
3763 double* m = (double*)M.data;
3767 m[2] = (1-alpha)*center.x - beta*center.y;
3770 m[5] = beta*center.x + (1-alpha)*center.y;
3775 /* Calculates coefficients of perspective transformation
3776 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
3778 * c00*xi + c01*yi + c02
3779 * ui = ---------------------
3780 * c20*xi + c21*yi + c22
3782 * c10*xi + c11*yi + c12
3783 * vi = ---------------------
3784 * c20*xi + c21*yi + c22
3786 * Coefficients are calculated by solving linear system:
3787 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
3788 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
3789 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
3790 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
3791 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
3792 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
3793 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
3794 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
3797 * cij - matrix coefficients, c22 = 1
3799 cv::Mat cv::getPerspectiveTransform( const Point2f src[], const Point2f dst[] )
3801 Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.data);
3802 double a[8][8], b[8];
3803 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
3805 for( int i = 0; i < 4; ++i )
3807 a[i][0] = a[i+4][3] = src[i].x;
3808 a[i][1] = a[i+4][4] = src[i].y;
3809 a[i][2] = a[i+4][5] = 1;
3810 a[i][3] = a[i][4] = a[i][5] =
3811 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
3812 a[i][6] = -src[i].x*dst[i].x;
3813 a[i][7] = -src[i].y*dst[i].x;
3814 a[i+4][6] = -src[i].x*dst[i].y;
3815 a[i+4][7] = -src[i].y*dst[i].y;
3820 solve( A, B, X, DECOMP_SVD );
3821 ((double*)M.data)[8] = 1.;
3826 /* Calculates coefficients of affine transformation
3827 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
3829 * ui = c00*xi + c01*yi + c02
3831 * vi = c10*xi + c11*yi + c12
3833 * Coefficients are calculated by solving linear system:
3834 * / x0 y0 1 0 0 0 \ /c00\ /u0\
3835 * | x1 y1 1 0 0 0 | |c01| |u1|
3836 * | x2 y2 1 0 0 0 | |c02| |u2|
3837 * | 0 0 0 x0 y0 1 | |c10| |v0|
3838 * | 0 0 0 x1 y1 1 | |c11| |v1|
3839 * \ 0 0 0 x2 y2 1 / |c12| |v2|
3842 * cij - matrix coefficients
3845 cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
3847 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.data);
3848 double a[6*6], b[6];
3849 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
3851 for( int i = 0; i < 3; i++ )
3855 a[j] = a[k+3] = src[i].x;
3856 a[j+1] = a[k+4] = src[i].y;
3857 a[j+2] = a[k+5] = 1;
3858 a[j+3] = a[j+4] = a[j+5] = 0;
3859 a[k] = a[k+1] = a[k+2] = 0;
3861 b[i*2+1] = dst[i].y;
3868 void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
3870 Mat matM = _matM.getMat();
3871 CV_Assert(matM.rows == 2 && matM.cols == 3);
3872 __iM.create(2, 3, matM.type());
3873 Mat _iM = __iM.getMat();
3875 if( matM.type() == CV_32F )
3877 const float* M = (const float*)matM.data;
3878 float* iM = (float*)_iM.data;
3879 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3881 double D = M[0]*M[step+1] - M[1]*M[step];
3882 D = D != 0 ? 1./D : 0;
3883 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3884 double b1 = -A11*M[2] - A12*M[step+2];
3885 double b2 = -A21*M[2] - A22*M[step+2];
3887 iM[0] = (float)A11; iM[1] = (float)A12; iM[2] = (float)b1;
3888 iM[istep] = (float)A21; iM[istep+1] = (float)A22; iM[istep+2] = (float)b2;
3890 else if( matM.type() == CV_64F )
3892 const double* M = (const double*)matM.data;
3893 double* iM = (double*)_iM.data;
3894 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3896 double D = M[0]*M[step+1] - M[1]*M[step];
3897 D = D != 0 ? 1./D : 0;
3898 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3899 double b1 = -A11*M[2] - A12*M[step+2];
3900 double b2 = -A21*M[2] - A22*M[step+2];
3902 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3903 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3906 CV_Error( CV_StsUnsupportedFormat, "" );
3909 cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst)
3911 Mat src = _src.getMat(), dst = _dst.getMat();
3912 CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
3913 return getPerspectiveTransform((const Point2f*)src.data, (const Point2f*)dst.data);
3916 cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
3918 Mat src = _src.getMat(), dst = _dst.getMat();
3919 CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
3920 return getAffineTransform((const Point2f*)src.data, (const Point2f*)dst.data);
3924 cvResize( const CvArr* srcarr, CvArr* dstarr, int method )
3926 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3927 CV_Assert( src.type() == dst.type() );
3928 cv::resize( src, dst, dst.size(), (double)dst.cols/src.cols,
3929 (double)dst.rows/src.rows, method );
3934 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3935 int flags, CvScalar fillval )
3937 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3938 cv::Mat matrix = cv::cvarrToMat(marr);
3939 CV_Assert( src.type() == dst.type() );
3940 cv::warpAffine( src, dst, matrix, dst.size(), flags,
3941 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3946 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3947 int flags, CvScalar fillval )
3949 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3950 cv::Mat matrix = cv::cvarrToMat(marr);
3951 CV_Assert( src.type() == dst.type() );
3952 cv::warpPerspective( src, dst, matrix, dst.size(), flags,
3953 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3958 cvRemap( const CvArr* srcarr, CvArr* dstarr,
3959 const CvArr* _mapx, const CvArr* _mapy,
3960 int flags, CvScalar fillval )
3962 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
3963 cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
3964 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
3965 cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
3966 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3968 CV_Assert( dst0.data == dst.data );
3973 cv2DRotationMatrix( CvPoint2D32f center, double angle,
3974 double scale, CvMat* matrix )
3976 cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
3977 CV_Assert( M.size() == M0.size() );
3978 M.convertTo(M0, M0.type());
3984 cvGetPerspectiveTransform( const CvPoint2D32f* src,
3985 const CvPoint2D32f* dst,
3988 cv::Mat M0 = cv::cvarrToMat(matrix),
3989 M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3990 CV_Assert( M.size() == M0.size() );
3991 M.convertTo(M0, M0.type());
3997 cvGetAffineTransform( const CvPoint2D32f* src,
3998 const CvPoint2D32f* dst,
4001 cv::Mat M0 = cv::cvarrToMat(matrix),
4002 M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
4003 CV_Assert( M.size() == M0.size() );
4004 M.convertTo(M0, M0.type());
4010 cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
4012 cv::Mat map1 = cv::cvarrToMat(arr1), map2;
4013 cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
4016 map2 = cv::cvarrToMat(arr2);
4019 dstmap2 = cv::cvarrToMat(dstarr2);
4020 if( dstmap2.type() == CV_16SC1 )
4021 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.data, dstmap2.step);
4024 cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
4027 /****************************************************************************************\
4028 * Log-Polar Transform *
4029 \****************************************************************************************/
4031 /* now it is done via Remap; more correct implementation should use
4032 some super-sampling technique outside of the "fovea" circle */
4034 cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
4035 CvPoint2D32f center, double M, int flags )
4037 cv::Ptr<CvMat> mapx, mapy;
4039 CvMat srcstub, *src = cvGetMat(srcarr, &srcstub);
4040 CvMat dststub, *dst = cvGetMat(dstarr, &dststub);
4041 CvSize ssize, dsize;
4043 if( !CV_ARE_TYPES_EQ( src, dst ))
4044 CV_Error( CV_StsUnmatchedFormats, "" );
4047 CV_Error( CV_StsOutOfRange, "M should be >0" );
4049 ssize = cvGetMatSize(src);
4050 dsize = cvGetMatSize(dst);
4052 mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
4053 mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
4055 if( !(flags & CV_WARP_INVERSE_MAP) )
4058 cv::AutoBuffer<double> _exp_tab(dsize.width);
4059 double* exp_tab = _exp_tab;
4061 for( rho = 0; rho < dst->width; rho++ )
4062 exp_tab[rho] = std::exp(rho/M);
4064 for( phi = 0; phi < dsize.height; phi++ )
4066 double cp = cos(phi*2*CV_PI/dsize.height);
4067 double sp = sin(phi*2*CV_PI/dsize.height);
4068 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
4069 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
4071 for( rho = 0; rho < dsize.width; rho++ )
4073 double r = exp_tab[rho];
4074 double x = r*cp + center.x;
4075 double y = r*sp + center.y;
4085 CvMat bufx, bufy, bufp, bufa;
4086 double ascale = ssize.height/(2*CV_PI);
4087 cv::AutoBuffer<float> _buf(4*dsize.width);
4090 bufx = cvMat( 1, dsize.width, CV_32F, buf );
4091 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
4092 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
4093 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
4095 for( x = 0; x < dsize.width; x++ )
4096 bufx.data.fl[x] = (float)x - center.x;
4098 for( y = 0; y < dsize.height; y++ )
4100 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
4101 float* my = (float*)(mapy->data.ptr + y*mapy->step);
4103 for( x = 0; x < dsize.width; x++ )
4104 bufy.data.fl[x] = (float)y - center.y;
4107 cvCartToPolar( &bufx, &bufy, &bufp, &bufa );
4109 for( x = 0; x < dsize.width; x++ )
4110 bufp.data.fl[x] += 1.f;
4112 cvLog( &bufp, &bufp );
4114 for( x = 0; x < dsize.width; x++ )
4116 double rho = bufp.data.fl[x]*M;
4117 double phi = bufa.data.fl[x]*ascale;
4123 for( x = 0; x < dsize.width; x++ )
4125 double xx = bufx.data.fl[x];
4126 double yy = bufy.data.fl[x];
4128 double p = log(sqrt(xx*xx + yy*yy) + 1.)*M;
4129 double a = atan2(yy,xx);
4141 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
4145 /****************************************************************************************
4146 Linear-Polar Transform
4147 J.L. Blanco, Apr 2009
4148 ****************************************************************************************/
4150 void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
4151 CvPoint2D32f center, double maxRadius, int flags )
4153 cv::Ptr<CvMat> mapx, mapy;
4155 CvMat srcstub, *src = (CvMat*)srcarr;
4156 CvMat dststub, *dst = (CvMat*)dstarr;
4157 CvSize ssize, dsize;
4159 src = cvGetMat( srcarr, &srcstub,0,0 );
4160 dst = cvGetMat( dstarr, &dststub,0,0 );
4162 if( !CV_ARE_TYPES_EQ( src, dst ))
4163 CV_Error( CV_StsUnmatchedFormats, "" );
4165 ssize.width = src->cols;
4166 ssize.height = src->rows;
4167 dsize.width = dst->cols;
4168 dsize.height = dst->rows;
4170 mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
4171 mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
4173 if( !(flags & CV_WARP_INVERSE_MAP) )
4177 for( phi = 0; phi < dsize.height; phi++ )
4179 double cp = cos(phi*2*CV_PI/dsize.height);
4180 double sp = sin(phi*2*CV_PI/dsize.height);
4181 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
4182 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
4184 for( rho = 0; rho < dsize.width; rho++ )
4186 double r = maxRadius*(rho+1)/dsize.width;
4187 double x = r*cp + center.x;
4188 double y = r*sp + center.y;
4198 CvMat bufx, bufy, bufp, bufa;
4199 const double ascale = ssize.height/(2*CV_PI);
4200 const double pscale = ssize.width/maxRadius;
4202 cv::AutoBuffer<float> _buf(4*dsize.width);
4205 bufx = cvMat( 1, dsize.width, CV_32F, buf );
4206 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
4207 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
4208 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
4210 for( x = 0; x < dsize.width; x++ )
4211 bufx.data.fl[x] = (float)x - center.x;
4213 for( y = 0; y < dsize.height; y++ )
4215 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
4216 float* my = (float*)(mapy->data.ptr + y*mapy->step);
4218 for( x = 0; x < dsize.width; x++ )
4219 bufy.data.fl[x] = (float)y - center.y;
4221 cvCartToPolar( &bufx, &bufy, &bufp, &bufa, 0 );
4223 for( x = 0; x < dsize.width; x++ )
4224 bufp.data.fl[x] += 1.f;
4226 for( x = 0; x < dsize.width; x++ )
4228 double rho = bufp.data.fl[x]*pscale;
4229 double phi = bufa.data.fl[x]*ascale;
4236 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );