1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /* ////////////////////////////////////////////////////////////////////
45 // Geometrical transforms on images and matrices: rotation, zoom etc.
49 #include "precomp.hpp"
50 #include "opencl_kernels.hpp"
52 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
53 static IppStatus sts = ippInit();
58 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701)
59 typedef IppStatus (CV_STDCALL* ippiResizeFunc)(const void*, int, const void*, int, IppiPoint, IppiSize, IppiBorderType, void*, void*, Ipp8u*);
60 typedef IppStatus (CV_STDCALL* ippiResizeGetBufferSize)(void*, IppiSize, Ipp32u, int*);
61 typedef IppStatus (CV_STDCALL* ippiResizeGetSrcOffset)(void*, IppiPoint, IppiPoint*);
64 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
65 typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
66 typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
67 typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
69 template <int channels, typename Type>
70 bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
72 Type values[channels];
73 for( int i = 0; i < channels; i++ )
74 values[i] = (Type)value[i];
75 return func(values, dataPointer, step, size) >= 0;
78 bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
85 return ippiSet_8u_C1R((Ipp8u)value[0], (Ipp8u *)dataPointer, step, size) >= 0;
87 return ippiSet_16u_C1R((Ipp16u)value[0], (Ipp16u *)dataPointer, step, size) >= 0;
89 return ippiSet_32f_C1R((Ipp32f)value[0], (Ipp32f *)dataPointer, step, size) >= 0;
99 return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
101 return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
103 return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
106 else if( channels == 4 )
111 return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
113 return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
115 return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
123 /************** interpolation formulas and tables ***************/
125 const int INTER_RESIZE_COEF_BITS=11;
126 const int INTER_RESIZE_COEF_SCALE=1 << INTER_RESIZE_COEF_BITS;
128 const int INTER_REMAP_COEF_BITS=15;
129 const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
131 static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
133 static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
134 static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
137 static short BilinearTab_iC4_buf[INTER_TAB_SIZE2+2][2][8];
138 static short (*BilinearTab_iC4)[2][8] = (short (*)[2][8])alignPtr(BilinearTab_iC4_buf, 16);
141 static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
142 static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
144 static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
145 static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
147 static inline void interpolateLinear( float x, float* coeffs )
153 static inline void interpolateCubic( float x, float* coeffs )
155 const float A = -0.75f;
157 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
158 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
159 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
160 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
163 static inline void interpolateLanczos4( float x, float* coeffs )
165 static const double s45 = 0.70710678118654752440084436210485;
166 static const double cs[][2]=
167 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
169 if( x < FLT_EPSILON )
171 for( int i = 0; i < 8; i++ )
178 double y0=-(x+3)*CV_PI*0.25, s0 = sin(y0), c0=cos(y0);
179 for(int i = 0; i < 8; i++ )
181 double y = -(x+3-i)*CV_PI*0.25;
182 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
187 for(int i = 0; i < 8; i++ )
191 static void initInterTab1D(int method, float* tab, int tabsz)
193 float scale = 1.f/tabsz;
194 if( method == INTER_LINEAR )
196 for( int i = 0; i < tabsz; i++, tab += 2 )
197 interpolateLinear( i*scale, tab );
199 else if( method == INTER_CUBIC )
201 for( int i = 0; i < tabsz; i++, tab += 4 )
202 interpolateCubic( i*scale, tab );
204 else if( method == INTER_LANCZOS4 )
206 for( int i = 0; i < tabsz; i++, tab += 8 )
207 interpolateLanczos4( i*scale, tab );
210 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
214 static const void* initInterTab2D( int method, bool fixpt )
216 static bool inittab[INTER_MAX+1] = {false};
220 if( method == INTER_LINEAR )
221 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
222 else if( method == INTER_CUBIC )
223 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
224 else if( method == INTER_LANCZOS4 )
225 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
227 CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
229 if( !inittab[method] )
231 AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
233 initInterTab1D(method, _tab, INTER_TAB_SIZE);
234 for( i = 0; i < INTER_TAB_SIZE; i++ )
235 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
238 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
239 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
241 for( k1 = 0; k1 < ksize; k1++ )
243 float vy = _tab[i*ksize + k1];
244 for( k2 = 0; k2 < ksize; k2++ )
246 float v = vy*_tab[j*ksize + k2];
247 tab[k1*ksize + k2] = v;
248 isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
252 if( isum != INTER_REMAP_COEF_SCALE )
254 int diff = isum - INTER_REMAP_COEF_SCALE;
255 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
256 for( k1 = ksize2; k1 < ksize2+2; k1++ )
257 for( k2 = ksize2; k2 < ksize2+2; k2++ )
259 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
261 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
265 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
267 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
270 tab -= INTER_TAB_SIZE2*ksize*ksize;
271 itab -= INTER_TAB_SIZE2*ksize*ksize;
273 if( method == INTER_LINEAR )
275 for( i = 0; i < INTER_TAB_SIZE2; i++ )
276 for( j = 0; j < 4; j++ )
278 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
279 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
280 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
281 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
285 inittab[method] = true;
287 return fixpt ? (const void*)itab : (const void*)tab;
291 static bool initAllInterTab2D()
293 return initInterTab2D( INTER_LINEAR, false ) &&
294 initInterTab2D( INTER_LINEAR, true ) &&
295 initInterTab2D( INTER_CUBIC, false ) &&
296 initInterTab2D( INTER_CUBIC, true ) &&
297 initInterTab2D( INTER_LANCZOS4, false ) &&
298 initInterTab2D( INTER_LANCZOS4, true );
301 static volatile bool doInitAllInterTab2D = initAllInterTab2D();
304 template<typename ST, typename DT> struct Cast
309 DT operator()(ST val) const { return saturate_cast<DT>(val); }
312 template<typename ST, typename DT, int bits> struct FixedPtCast
316 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
318 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
321 /****************************************************************************************\
323 \****************************************************************************************/
325 class resizeNNInvoker :
326 public ParallelLoopBody
329 resizeNNInvoker(const Mat& _src, Mat &_dst, int *_x_ofs, int _pix_size4, double _ify) :
330 ParallelLoopBody(), src(_src), dst(_dst), x_ofs(_x_ofs), pix_size4(_pix_size4),
335 virtual void operator() (const Range& range) const
337 Size ssize = src.size(), dsize = dst.size();
338 int y, x, pix_size = (int)src.elemSize();
340 for( y = range.start; y < range.end; y++ )
342 uchar* D = dst.data + dst.step*y;
343 int sy = std::min(cvFloor(y*ify), ssize.height-1);
344 const uchar* S = src.data + src.step*sy;
349 for( x = 0; x <= dsize.width - 2; x += 2 )
351 uchar t0 = S[x_ofs[x]];
352 uchar t1 = S[x_ofs[x+1]];
357 for( ; x < dsize.width; x++ )
361 for( x = 0; x < dsize.width; x++ )
362 *(ushort*)(D + x*2) = *(ushort*)(S + x_ofs[x]);
365 for( x = 0; x < dsize.width; x++, D += 3 )
367 const uchar* _tS = S + x_ofs[x];
368 D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
372 for( x = 0; x < dsize.width; x++ )
373 *(int*)(D + x*4) = *(int*)(S + x_ofs[x]);
376 for( x = 0; x < dsize.width; x++, D += 6 )
378 const ushort* _tS = (const ushort*)(S + x_ofs[x]);
379 ushort* _tD = (ushort*)D;
380 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
384 for( x = 0; x < dsize.width; x++, D += 8 )
386 const int* _tS = (const int*)(S + x_ofs[x]);
388 _tD[0] = _tS[0]; _tD[1] = _tS[1];
392 for( x = 0; x < dsize.width; x++, D += 12 )
394 const int* _tS = (const int*)(S + x_ofs[x]);
396 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
400 for( x = 0; x < dsize.width; x++, D += pix_size )
402 const int* _tS = (const int*)(S + x_ofs[x]);
404 for( int k = 0; k < pix_size4; k++ )
414 int* x_ofs, pix_size4;
417 resizeNNInvoker(const resizeNNInvoker&);
418 resizeNNInvoker& operator=(const resizeNNInvoker&);
422 resizeNN( const Mat& src, Mat& dst, double fx, double fy )
424 Size ssize = src.size(), dsize = dst.size();
425 AutoBuffer<int> _x_ofs(dsize.width);
427 int pix_size = (int)src.elemSize();
428 int pix_size4 = (int)(pix_size / sizeof(int));
429 double ifx = 1./fx, ify = 1./fy;
432 for( x = 0; x < dsize.width; x++ )
434 int sx = cvFloor(x*ifx);
435 x_ofs[x] = std::min(sx, ssize.width-1)*pix_size;
438 Range range(0, dsize.height);
439 resizeNNInvoker invoker(src, dst, x_ofs, pix_size4, ify);
440 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
446 int operator()(const uchar**, uchar*, const uchar*, int ) const { return 0; }
451 int operator()(const uchar**, uchar**, int, const int*,
452 const uchar*, int, int, int, int, int) const { return 0; }
457 struct VResizeLinearVec_32s8u
459 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
461 if( !checkHardwareSupport(CV_CPU_SSE2) )
464 const int** src = (const int**)_src;
465 const short* beta = (const short*)_beta;
466 const int *S0 = src[0], *S1 = src[1];
468 __m128i b0 = _mm_set1_epi16(beta[0]), b1 = _mm_set1_epi16(beta[1]);
469 __m128i delta = _mm_set1_epi16(2);
471 if( (((size_t)S0|(size_t)S1)&15) == 0 )
472 for( ; x <= width - 16; x += 16 )
474 __m128i x0, x1, x2, y0, y1, y2;
475 x0 = _mm_load_si128((const __m128i*)(S0 + x));
476 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
477 y0 = _mm_load_si128((const __m128i*)(S1 + x));
478 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
479 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
480 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
482 x1 = _mm_load_si128((const __m128i*)(S0 + x + 8));
483 x2 = _mm_load_si128((const __m128i*)(S0 + x + 12));
484 y1 = _mm_load_si128((const __m128i*)(S1 + x + 8));
485 y2 = _mm_load_si128((const __m128i*)(S1 + x + 12));
486 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
487 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
489 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
490 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
492 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
493 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
494 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
497 for( ; x <= width - 16; x += 16 )
499 __m128i x0, x1, x2, y0, y1, y2;
500 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
501 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
502 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
503 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
504 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
505 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
507 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 8));
508 x2 = _mm_loadu_si128((const __m128i*)(S0 + x + 12));
509 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 8));
510 y2 = _mm_loadu_si128((const __m128i*)(S1 + x + 12));
511 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
512 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
514 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
515 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
517 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
518 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
519 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
522 for( ; x < width - 4; x += 4 )
525 x0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S0 + x)), 4);
526 y0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S1 + x)), 4);
527 x0 = _mm_packs_epi32(x0, x0);
528 y0 = _mm_packs_epi32(y0, y0);
529 x0 = _mm_adds_epi16(_mm_mulhi_epi16(x0, b0), _mm_mulhi_epi16(y0, b1));
530 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
531 x0 = _mm_packus_epi16(x0, x0);
532 *(int*)(dst + x) = _mm_cvtsi128_si32(x0);
540 template<int shiftval> struct VResizeLinearVec_32f16
542 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
544 if( !checkHardwareSupport(CV_CPU_SSE2) )
547 const float** src = (const float**)_src;
548 const float* beta = (const float*)_beta;
549 const float *S0 = src[0], *S1 = src[1];
550 ushort* dst = (ushort*)_dst;
553 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
554 __m128i preshift = _mm_set1_epi32(shiftval);
555 __m128i postshift = _mm_set1_epi16((short)shiftval);
557 if( (((size_t)S0|(size_t)S1)&15) == 0 )
558 for( ; x <= width - 16; x += 16 )
560 __m128 x0, x1, y0, y1;
562 x0 = _mm_load_ps(S0 + x);
563 x1 = _mm_load_ps(S0 + x + 4);
564 y0 = _mm_load_ps(S1 + x);
565 y1 = _mm_load_ps(S1 + x + 4);
567 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
568 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
569 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
570 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
571 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
573 x0 = _mm_load_ps(S0 + x + 8);
574 x1 = _mm_load_ps(S0 + x + 12);
575 y0 = _mm_load_ps(S1 + x + 8);
576 y1 = _mm_load_ps(S1 + x + 12);
578 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
579 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
580 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
581 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
582 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
584 _mm_storeu_si128( (__m128i*)(dst + x), t0);
585 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
588 for( ; x <= width - 16; x += 16 )
590 __m128 x0, x1, y0, y1;
592 x0 = _mm_loadu_ps(S0 + x);
593 x1 = _mm_loadu_ps(S0 + x + 4);
594 y0 = _mm_loadu_ps(S1 + x);
595 y1 = _mm_loadu_ps(S1 + x + 4);
597 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
598 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
599 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
600 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
601 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
603 x0 = _mm_loadu_ps(S0 + x + 8);
604 x1 = _mm_loadu_ps(S0 + x + 12);
605 y0 = _mm_loadu_ps(S1 + x + 8);
606 y1 = _mm_loadu_ps(S1 + x + 12);
608 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
609 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
610 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
611 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
612 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
614 _mm_storeu_si128( (__m128i*)(dst + x), t0);
615 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
618 for( ; x < width - 4; x += 4 )
622 x0 = _mm_loadu_ps(S0 + x);
623 y0 = _mm_loadu_ps(S1 + x);
625 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
626 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
627 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t0), postshift);
628 _mm_storel_epi64( (__m128i*)(dst + x), t0);
635 typedef VResizeLinearVec_32f16<SHRT_MIN> VResizeLinearVec_32f16u;
636 typedef VResizeLinearVec_32f16<0> VResizeLinearVec_32f16s;
638 struct VResizeLinearVec_32f
640 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
642 if( !checkHardwareSupport(CV_CPU_SSE) )
645 const float** src = (const float**)_src;
646 const float* beta = (const float*)_beta;
647 const float *S0 = src[0], *S1 = src[1];
648 float* dst = (float*)_dst;
651 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
653 if( (((size_t)S0|(size_t)S1)&15) == 0 )
654 for( ; x <= width - 8; x += 8 )
656 __m128 x0, x1, y0, y1;
657 x0 = _mm_load_ps(S0 + x);
658 x1 = _mm_load_ps(S0 + x + 4);
659 y0 = _mm_load_ps(S1 + x);
660 y1 = _mm_load_ps(S1 + x + 4);
662 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
663 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
665 _mm_storeu_ps( dst + x, x0);
666 _mm_storeu_ps( dst + x + 4, x1);
669 for( ; x <= width - 8; x += 8 )
671 __m128 x0, x1, y0, y1;
672 x0 = _mm_loadu_ps(S0 + x);
673 x1 = _mm_loadu_ps(S0 + x + 4);
674 y0 = _mm_loadu_ps(S1 + x);
675 y1 = _mm_loadu_ps(S1 + x + 4);
677 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
678 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
680 _mm_storeu_ps( dst + x, x0);
681 _mm_storeu_ps( dst + x + 4, x1);
689 struct VResizeCubicVec_32s8u
691 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
693 if( !checkHardwareSupport(CV_CPU_SSE2) )
696 const int** src = (const int**)_src;
697 const short* beta = (const short*)_beta;
698 const int *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
700 float scale = 1.f/(INTER_RESIZE_COEF_SCALE*INTER_RESIZE_COEF_SCALE);
701 __m128 b0 = _mm_set1_ps(beta[0]*scale), b1 = _mm_set1_ps(beta[1]*scale),
702 b2 = _mm_set1_ps(beta[2]*scale), b3 = _mm_set1_ps(beta[3]*scale);
704 if( (((size_t)S0|(size_t)S1|(size_t)S2|(size_t)S3)&15) == 0 )
705 for( ; x <= width - 8; x += 8 )
707 __m128i x0, x1, y0, y1;
708 __m128 s0, s1, f0, f1;
709 x0 = _mm_load_si128((const __m128i*)(S0 + x));
710 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
711 y0 = _mm_load_si128((const __m128i*)(S1 + x));
712 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
714 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
715 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
716 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
717 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
718 s0 = _mm_add_ps(s0, f0);
719 s1 = _mm_add_ps(s1, f1);
721 x0 = _mm_load_si128((const __m128i*)(S2 + x));
722 x1 = _mm_load_si128((const __m128i*)(S2 + x + 4));
723 y0 = _mm_load_si128((const __m128i*)(S3 + x));
724 y1 = _mm_load_si128((const __m128i*)(S3 + x + 4));
726 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
727 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
728 s0 = _mm_add_ps(s0, f0);
729 s1 = _mm_add_ps(s1, f1);
730 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
731 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
732 s0 = _mm_add_ps(s0, f0);
733 s1 = _mm_add_ps(s1, f1);
735 x0 = _mm_cvtps_epi32(s0);
736 x1 = _mm_cvtps_epi32(s1);
738 x0 = _mm_packs_epi32(x0, x1);
739 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
742 for( ; x <= width - 8; x += 8 )
744 __m128i x0, x1, y0, y1;
745 __m128 s0, s1, f0, f1;
746 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
747 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
748 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
749 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
751 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
752 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
753 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
754 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
755 s0 = _mm_add_ps(s0, f0);
756 s1 = _mm_add_ps(s1, f1);
758 x0 = _mm_loadu_si128((const __m128i*)(S2 + x));
759 x1 = _mm_loadu_si128((const __m128i*)(S2 + x + 4));
760 y0 = _mm_loadu_si128((const __m128i*)(S3 + x));
761 y1 = _mm_loadu_si128((const __m128i*)(S3 + x + 4));
763 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
764 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
765 s0 = _mm_add_ps(s0, f0);
766 s1 = _mm_add_ps(s1, f1);
767 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
768 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
769 s0 = _mm_add_ps(s0, f0);
770 s1 = _mm_add_ps(s1, f1);
772 x0 = _mm_cvtps_epi32(s0);
773 x1 = _mm_cvtps_epi32(s1);
775 x0 = _mm_packs_epi32(x0, x1);
776 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
784 template<int shiftval> struct VResizeCubicVec_32f16
786 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
788 if( !checkHardwareSupport(CV_CPU_SSE2) )
791 const float** src = (const float**)_src;
792 const float* beta = (const float*)_beta;
793 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
794 ushort* dst = (ushort*)_dst;
796 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
797 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
798 __m128i preshift = _mm_set1_epi32(shiftval);
799 __m128i postshift = _mm_set1_epi16((short)shiftval);
801 for( ; x <= width - 8; x += 8 )
803 __m128 x0, x1, y0, y1, s0, s1;
805 x0 = _mm_loadu_ps(S0 + x);
806 x1 = _mm_loadu_ps(S0 + x + 4);
807 y0 = _mm_loadu_ps(S1 + x);
808 y1 = _mm_loadu_ps(S1 + x + 4);
810 s0 = _mm_mul_ps(x0, b0);
811 s1 = _mm_mul_ps(x1, b0);
812 y0 = _mm_mul_ps(y0, b1);
813 y1 = _mm_mul_ps(y1, b1);
814 s0 = _mm_add_ps(s0, y0);
815 s1 = _mm_add_ps(s1, y1);
817 x0 = _mm_loadu_ps(S2 + x);
818 x1 = _mm_loadu_ps(S2 + x + 4);
819 y0 = _mm_loadu_ps(S3 + x);
820 y1 = _mm_loadu_ps(S3 + x + 4);
822 x0 = _mm_mul_ps(x0, b2);
823 x1 = _mm_mul_ps(x1, b2);
824 y0 = _mm_mul_ps(y0, b3);
825 y1 = _mm_mul_ps(y1, b3);
826 s0 = _mm_add_ps(s0, x0);
827 s1 = _mm_add_ps(s1, x1);
828 s0 = _mm_add_ps(s0, y0);
829 s1 = _mm_add_ps(s1, y1);
831 t0 = _mm_add_epi32(_mm_cvtps_epi32(s0), preshift);
832 t1 = _mm_add_epi32(_mm_cvtps_epi32(s1), preshift);
834 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t1), postshift);
835 _mm_storeu_si128( (__m128i*)(dst + x), t0);
842 typedef VResizeCubicVec_32f16<SHRT_MIN> VResizeCubicVec_32f16u;
843 typedef VResizeCubicVec_32f16<0> VResizeCubicVec_32f16s;
845 struct VResizeCubicVec_32f
847 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
849 if( !checkHardwareSupport(CV_CPU_SSE) )
852 const float** src = (const float**)_src;
853 const float* beta = (const float*)_beta;
854 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
855 float* dst = (float*)_dst;
857 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
858 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
860 for( ; x <= width - 8; x += 8 )
862 __m128 x0, x1, y0, y1, s0, s1;
863 x0 = _mm_loadu_ps(S0 + x);
864 x1 = _mm_loadu_ps(S0 + x + 4);
865 y0 = _mm_loadu_ps(S1 + x);
866 y1 = _mm_loadu_ps(S1 + x + 4);
868 s0 = _mm_mul_ps(x0, b0);
869 s1 = _mm_mul_ps(x1, b0);
870 y0 = _mm_mul_ps(y0, b1);
871 y1 = _mm_mul_ps(y1, b1);
872 s0 = _mm_add_ps(s0, y0);
873 s1 = _mm_add_ps(s1, y1);
875 x0 = _mm_loadu_ps(S2 + x);
876 x1 = _mm_loadu_ps(S2 + x + 4);
877 y0 = _mm_loadu_ps(S3 + x);
878 y1 = _mm_loadu_ps(S3 + x + 4);
880 x0 = _mm_mul_ps(x0, b2);
881 x1 = _mm_mul_ps(x1, b2);
882 y0 = _mm_mul_ps(y0, b3);
883 y1 = _mm_mul_ps(y1, b3);
884 s0 = _mm_add_ps(s0, x0);
885 s1 = _mm_add_ps(s1, x1);
886 s0 = _mm_add_ps(s0, y0);
887 s1 = _mm_add_ps(s1, y1);
889 _mm_storeu_ps( dst + x, s0);
890 _mm_storeu_ps( dst + x + 4, s1);
899 typedef VResizeNoVec VResizeLinearVec_32s8u;
900 typedef VResizeNoVec VResizeLinearVec_32f16u;
901 typedef VResizeNoVec VResizeLinearVec_32f16s;
902 typedef VResizeNoVec VResizeLinearVec_32f;
904 typedef VResizeNoVec VResizeCubicVec_32s8u;
905 typedef VResizeNoVec VResizeCubicVec_32f16u;
906 typedef VResizeNoVec VResizeCubicVec_32f16s;
907 typedef VResizeNoVec VResizeCubicVec_32f;
911 typedef HResizeNoVec HResizeLinearVec_8u32s;
912 typedef HResizeNoVec HResizeLinearVec_16u32f;
913 typedef HResizeNoVec HResizeLinearVec_16s32f;
914 typedef HResizeNoVec HResizeLinearVec_32f;
915 typedef HResizeNoVec HResizeLinearVec_64f;
918 template<typename T, typename WT, typename AT, int ONE, class VecOp>
921 typedef T value_type;
923 typedef AT alpha_type;
925 void operator()(const T** src, WT** dst, int count,
926 const int* xofs, const AT* alpha,
927 int swidth, int dwidth, int cn, int xmin, int xmax ) const
932 int dx0 = vecOp((const uchar**)src, (uchar**)dst, count,
933 xofs, (const uchar*)alpha, swidth, dwidth, cn, xmin, xmax );
935 for( k = 0; k <= count - 2; k++ )
937 const T *S0 = src[k], *S1 = src[k+1];
938 WT *D0 = dst[k], *D1 = dst[k+1];
939 for( dx = dx0; dx < xmax; dx++ )
942 WT a0 = alpha[dx*2], a1 = alpha[dx*2+1];
943 WT t0 = S0[sx]*a0 + S0[sx + cn]*a1;
944 WT t1 = S1[sx]*a0 + S1[sx + cn]*a1;
945 D0[dx] = t0; D1[dx] = t1;
948 for( ; dx < dwidth; dx++ )
951 D0[dx] = WT(S0[sx]*ONE); D1[dx] = WT(S1[sx]*ONE);
955 for( ; k < count; k++ )
959 for( dx = 0; dx < xmax; dx++ )
962 D[dx] = S[sx]*alpha[dx*2] + S[sx+cn]*alpha[dx*2+1];
965 for( ; dx < dwidth; dx++ )
966 D[dx] = WT(S[xofs[dx]]*ONE);
972 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
975 typedef T value_type;
977 typedef AT alpha_type;
979 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
981 WT b0 = beta[0], b1 = beta[1];
982 const WT *S0 = src[0], *S1 = src[1];
986 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
987 #if CV_ENABLE_UNROLLED
988 for( ; x <= width - 4; x += 4 )
991 t0 = S0[x]*b0 + S1[x]*b1;
992 t1 = S0[x+1]*b0 + S1[x+1]*b1;
993 dst[x] = castOp(t0); dst[x+1] = castOp(t1);
994 t0 = S0[x+2]*b0 + S1[x+2]*b1;
995 t1 = S0[x+3]*b0 + S1[x+3]*b1;
996 dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
999 for( ; x < width; x++ )
1000 dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
1005 struct VResizeLinear<uchar, int, short, FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>, VResizeLinearVec_32s8u>
1007 typedef uchar value_type;
1008 typedef int buf_type;
1009 typedef short alpha_type;
1011 void operator()(const buf_type** src, value_type* dst, const alpha_type* beta, int width ) const
1013 alpha_type b0 = beta[0], b1 = beta[1];
1014 const buf_type *S0 = src[0], *S1 = src[1];
1015 VResizeLinearVec_32s8u vecOp;
1017 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1018 #if CV_ENABLE_UNROLLED
1019 for( ; x <= width - 4; x += 4 )
1021 dst[x+0] = uchar(( ((b0 * (S0[x+0] >> 4)) >> 16) + ((b1 * (S1[x+0] >> 4)) >> 16) + 2)>>2);
1022 dst[x+1] = uchar(( ((b0 * (S0[x+1] >> 4)) >> 16) + ((b1 * (S1[x+1] >> 4)) >> 16) + 2)>>2);
1023 dst[x+2] = uchar(( ((b0 * (S0[x+2] >> 4)) >> 16) + ((b1 * (S1[x+2] >> 4)) >> 16) + 2)>>2);
1024 dst[x+3] = uchar(( ((b0 * (S0[x+3] >> 4)) >> 16) + ((b1 * (S1[x+3] >> 4)) >> 16) + 2)>>2);
1027 for( ; x < width; x++ )
1028 dst[x] = uchar(( ((b0 * (S0[x] >> 4)) >> 16) + ((b1 * (S1[x] >> 4)) >> 16) + 2)>>2);
1033 template<typename T, typename WT, typename AT>
1036 typedef T value_type;
1037 typedef WT buf_type;
1038 typedef AT alpha_type;
1040 void operator()(const T** src, WT** dst, int count,
1041 const int* xofs, const AT* alpha,
1042 int swidth, int dwidth, int cn, int xmin, int xmax ) const
1044 for( int k = 0; k < count; k++ )
1046 const T *S = src[k];
1048 int dx = 0, limit = xmin;
1051 for( ; dx < limit; dx++, alpha += 4 )
1053 int j, sx = xofs[dx] - cn;
1055 for( j = 0; j < 4; j++ )
1057 int sxj = sx + j*cn;
1058 if( (unsigned)sxj >= (unsigned)swidth )
1062 while( sxj >= swidth )
1065 v += S[sxj]*alpha[j];
1069 if( limit == dwidth )
1071 for( ; dx < xmax; dx++, alpha += 4 )
1074 D[dx] = S[sx-cn]*alpha[0] + S[sx]*alpha[1] +
1075 S[sx+cn]*alpha[2] + S[sx+cn*2]*alpha[3];
1085 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1088 typedef T value_type;
1089 typedef WT buf_type;
1090 typedef AT alpha_type;
1092 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1094 WT b0 = beta[0], b1 = beta[1], b2 = beta[2], b3 = beta[3];
1095 const WT *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
1099 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1100 for( ; x < width; x++ )
1101 dst[x] = castOp(S0[x]*b0 + S1[x]*b1 + S2[x]*b2 + S3[x]*b3);
1106 template<typename T, typename WT, typename AT>
1107 struct HResizeLanczos4
1109 typedef T value_type;
1110 typedef WT buf_type;
1111 typedef AT alpha_type;
1113 void operator()(const T** src, WT** dst, int count,
1114 const int* xofs, const AT* alpha,
1115 int swidth, int dwidth, int cn, int xmin, int xmax ) const
1117 for( int k = 0; k < count; k++ )
1119 const T *S = src[k];
1121 int dx = 0, limit = xmin;
1124 for( ; dx < limit; dx++, alpha += 8 )
1126 int j, sx = xofs[dx] - cn*3;
1128 for( j = 0; j < 8; j++ )
1130 int sxj = sx + j*cn;
1131 if( (unsigned)sxj >= (unsigned)swidth )
1135 while( sxj >= swidth )
1138 v += S[sxj]*alpha[j];
1142 if( limit == dwidth )
1144 for( ; dx < xmax; dx++, alpha += 8 )
1147 D[dx] = S[sx-cn*3]*alpha[0] + S[sx-cn*2]*alpha[1] +
1148 S[sx-cn]*alpha[2] + S[sx]*alpha[3] +
1149 S[sx+cn]*alpha[4] + S[sx+cn*2]*alpha[5] +
1150 S[sx+cn*3]*alpha[6] + S[sx+cn*4]*alpha[7];
1160 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1161 struct VResizeLanczos4
1163 typedef T value_type;
1164 typedef WT buf_type;
1165 typedef AT alpha_type;
1167 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1171 int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1172 #if CV_ENABLE_UNROLLED
1173 for( ; x <= width - 4; x += 4 )
1176 const WT* S = src[0];
1177 WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
1179 for( k = 1; k < 8; k++ )
1181 b = beta[k]; S = src[k];
1182 s0 += S[x]*b; s1 += S[x+1]*b;
1183 s2 += S[x+2]*b; s3 += S[x+3]*b;
1186 dst[x] = castOp(s0); dst[x+1] = castOp(s1);
1187 dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
1190 for( ; x < width; x++ )
1192 dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
1193 src[2][x]*beta[2] + src[3][x]*beta[3] + src[4][x]*beta[4] +
1194 src[5][x]*beta[5] + src[6][x]*beta[6] + src[7][x]*beta[7]);
1200 static inline int clip(int x, int a, int b)
1202 return x >= a ? (x < b ? x : b-1) : a;
1205 static const int MAX_ESIZE=16;
1207 template <typename HResize, typename VResize>
1208 class resizeGeneric_Invoker :
1209 public ParallelLoopBody
1212 typedef typename HResize::value_type T;
1213 typedef typename HResize::buf_type WT;
1214 typedef typename HResize::alpha_type AT;
1216 resizeGeneric_Invoker(const Mat& _src, Mat &_dst, const int *_xofs, const int *_yofs,
1217 const AT* _alpha, const AT* __beta, const Size& _ssize, const Size &_dsize,
1218 int _ksize, int _xmin, int _xmax) :
1219 ParallelLoopBody(), src(_src), dst(_dst), xofs(_xofs), yofs(_yofs),
1220 alpha(_alpha), _beta(__beta), ssize(_ssize), dsize(_dsize),
1221 ksize(_ksize), xmin(_xmin), xmax(_xmax)
1225 virtual void operator() (const Range& range) const
1227 int dy, cn = src.channels();
1231 int bufstep = (int)alignSize(dsize.width, 16);
1232 AutoBuffer<WT> _buffer(bufstep*ksize);
1233 const T* srows[MAX_ESIZE]={0};
1234 WT* rows[MAX_ESIZE]={0};
1235 int prev_sy[MAX_ESIZE];
1237 for(int k = 0; k < ksize; k++ )
1240 rows[k] = (WT*)_buffer + bufstep*k;
1243 const AT* beta = _beta + ksize * range.start;
1245 for( dy = range.start; dy < range.end; dy++, beta += ksize )
1247 int sy0 = yofs[dy], k0=ksize, k1=0, ksize2 = ksize/2;
1249 for(int k = 0; k < ksize; k++ )
1251 int sy = clip(sy0 - ksize2 + 1 + k, 0, ssize.height);
1252 for( k1 = std::max(k1, k); k1 < ksize; k1++ )
1254 if( sy == prev_sy[k1] ) // if the sy-th row has been computed already, reuse it.
1257 memcpy( rows[k], rows[k1], bufstep*sizeof(rows[0][0]) );
1262 k0 = std::min(k0, k); // remember the first row that needs to be computed
1263 srows[k] = (T*)(src.data + src.step*sy);
1268 hresize( (const T**)(srows + k0), (WT**)(rows + k0), ksize - k0, xofs, (const AT*)(alpha),
1269 ssize.width, dsize.width, cn, xmin, xmax );
1270 vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
1277 const int* xofs, *yofs;
1278 const AT* alpha, *_beta;
1280 int ksize, xmin, xmax;
1283 template<class HResize, class VResize>
1284 static void resizeGeneric_( const Mat& src, Mat& dst,
1285 const int* xofs, const void* _alpha,
1286 const int* yofs, const void* _beta,
1287 int xmin, int xmax, int ksize )
1289 typedef typename HResize::alpha_type AT;
1291 const AT* beta = (const AT*)_beta;
1292 Size ssize = src.size(), dsize = dst.size();
1293 int cn = src.channels();
1298 // image resize is a separable operation. In case of not too strong
1300 Range range(0, dsize.height);
1301 resizeGeneric_Invoker<HResize, VResize> invoker(src, dst, xofs, yofs, (const AT*)_alpha, beta,
1302 ssize, dsize, ksize, xmin, xmax);
1303 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1306 template <typename T, typename WT>
1307 struct ResizeAreaFastNoVec
1309 ResizeAreaFastNoVec(int, int) { }
1310 ResizeAreaFastNoVec(int, int, int, int) { }
1311 int operator() (const T*, T*, int) const
1316 class ResizeAreaFastVec_SIMD_8u
1319 ResizeAreaFastVec_SIMD_8u(int _cn, int _step) :
1320 cn(_cn), step(_step)
1322 use_simd = checkHardwareSupport(CV_CPU_SSE2);
1325 int operator() (const uchar* S, uchar* D, int w) const
1331 const uchar* S0 = S;
1332 const uchar* S1 = S0 + step;
1333 __m128i zero = _mm_setzero_si128();
1334 __m128i delta2 = _mm_set1_epi16(2);
1338 __m128i masklow = _mm_set1_epi16(0x00ff);
1339 for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
1341 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1342 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1344 __m128i s0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_and_si128(r0, masklow));
1345 __m128i s1 = _mm_add_epi16(_mm_srli_epi16(r1, 8), _mm_and_si128(r1, masklow));
1346 s0 = _mm_add_epi16(_mm_add_epi16(s0, s1), delta2);
1347 s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
1349 _mm_storel_epi64((__m128i*)D, s0);
1353 for ( ; dx <= w - 11; dx += 6, S0 += 12, S1 += 12, D += 6)
1355 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1356 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1358 __m128i r0_16l = _mm_unpacklo_epi8(r0, zero);
1359 __m128i r0_16h = _mm_unpacklo_epi8(_mm_srli_si128(r0, 6), zero);
1360 __m128i r1_16l = _mm_unpacklo_epi8(r1, zero);
1361 __m128i r1_16h = _mm_unpacklo_epi8(_mm_srli_si128(r1, 6), zero);
1363 __m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 6));
1364 __m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 6));
1365 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1366 s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
1367 _mm_storel_epi64((__m128i*)D, s0);
1369 s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 6));
1370 s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 6));
1371 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1372 s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
1373 _mm_storel_epi64((__m128i*)(D+3), s0);
1378 int v[] = { 0, 0, -1, -1 };
1379 __m128i mask = _mm_loadu_si128((const __m128i*)v);
1381 for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
1383 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1384 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1386 __m128i r0_16l = _mm_unpacklo_epi8(r0, zero);
1387 __m128i r0_16h = _mm_unpackhi_epi8(r0, zero);
1388 __m128i r1_16l = _mm_unpacklo_epi8(r1, zero);
1389 __m128i r1_16h = _mm_unpackhi_epi8(r1, zero);
1391 __m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 8));
1392 __m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 8));
1393 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1394 __m128i res0 = _mm_srli_epi16(s0, 2);
1396 s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 8));
1397 s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 8));
1398 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
1399 __m128i res1 = _mm_srli_epi16(s0, 2);
1400 s0 = _mm_packus_epi16(_mm_or_si128(_mm_andnot_si128(mask, res0),
1401 _mm_and_si128(mask, _mm_slli_si128(res1, 8))), zero);
1402 _mm_storel_epi64((__m128i*)(D), s0);
1415 class ResizeAreaFastVec_SIMD_16u
1418 ResizeAreaFastVec_SIMD_16u(int _cn, int _step) :
1419 cn(_cn), step(_step)
1421 use_simd = checkHardwareSupport(CV_CPU_SSE2);
1424 int operator() (const ushort* S, ushort* D, int w) const
1430 const ushort* S0 = (const ushort*)S;
1431 const ushort* S1 = (const ushort*)((const uchar*)(S) + step);
1432 __m128i masklow = _mm_set1_epi32(0x0000ffff);
1433 __m128i zero = _mm_setzero_si128();
1434 __m128i delta2 = _mm_set1_epi32(2);
1436 #define _mm_packus_epi32(a, zero) _mm_packs_epi32(_mm_srai_epi32(_mm_slli_epi32(a, 16), 16), zero)
1440 for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
1442 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1443 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1445 __m128i s0 = _mm_add_epi32(_mm_srli_epi32(r0, 16), _mm_and_si128(r0, masklow));
1446 __m128i s1 = _mm_add_epi32(_mm_srli_epi32(r1, 16), _mm_and_si128(r1, masklow));
1447 s0 = _mm_add_epi32(_mm_add_epi32(s0, s1), delta2);
1448 s0 = _mm_srli_epi32(s0, 2);
1449 s0 = _mm_packus_epi32(s0, zero);
1451 _mm_storel_epi64((__m128i*)D, s0);
1455 for ( ; dx <= w - 4; dx += 3, S0 += 6, S1 += 6, D += 3)
1457 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1458 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1460 __m128i r0_16l = _mm_unpacklo_epi16(r0, zero);
1461 __m128i r0_16h = _mm_unpacklo_epi16(_mm_srli_si128(r0, 6), zero);
1462 __m128i r1_16l = _mm_unpacklo_epi16(r1, zero);
1463 __m128i r1_16h = _mm_unpacklo_epi16(_mm_srli_si128(r1, 6), zero);
1465 __m128i s0 = _mm_add_epi32(r0_16l, r0_16h);
1466 __m128i s1 = _mm_add_epi32(r1_16l, r1_16h);
1467 s0 = _mm_add_epi32(delta2, _mm_add_epi32(s0, s1));
1468 s0 = _mm_packus_epi32(_mm_srli_epi32(s0, 2), zero);
1469 _mm_storel_epi64((__m128i*)D, s0);
1474 for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
1476 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
1477 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
1479 __m128i r0_32l = _mm_unpacklo_epi16(r0, zero);
1480 __m128i r0_32h = _mm_unpackhi_epi16(r0, zero);
1481 __m128i r1_32l = _mm_unpacklo_epi16(r1, zero);
1482 __m128i r1_32h = _mm_unpackhi_epi16(r1, zero);
1484 __m128i s0 = _mm_add_epi32(r0_32l, r0_32h);
1485 __m128i s1 = _mm_add_epi32(r1_32l, r1_32h);
1486 s0 = _mm_add_epi32(s1, _mm_add_epi32(s0, delta2));
1487 s0 = _mm_packus_epi32(_mm_srli_epi32(s0, 2), zero);
1488 _mm_storel_epi64((__m128i*)D, s0);
1492 #undef _mm_packus_epi32
1504 typedef ResizeAreaFastNoVec<uchar, uchar> ResizeAreaFastVec_SIMD_8u;
1505 typedef ResizeAreaFastNoVec<ushort, ushort> ResizeAreaFastVec_SIMD_16u;
1508 template<typename T, typename SIMDVecOp>
1509 struct ResizeAreaFastVec
1511 ResizeAreaFastVec(int _scale_x, int _scale_y, int _cn, int _step) :
1512 scale_x(_scale_x), scale_y(_scale_y), cn(_cn), step(_step), vecOp(_cn, _step)
1514 fast_mode = scale_x == 2 && scale_y == 2 && (cn == 1 || cn == 3 || cn == 4);
1517 int operator() (const T* S, T* D, int w) const
1522 const T* nextS = (const T*)((const uchar*)S + step);
1523 int dx = vecOp(S, D, w);
1526 for( ; dx < w; ++dx )
1529 D[dx] = (T)((S[index] + S[index+1] + nextS[index] + nextS[index+1] + 2) >> 2);
1532 for( ; dx < w; dx += 3 )
1535 D[dx] = (T)((S[index] + S[index+3] + nextS[index] + nextS[index+3] + 2) >> 2);
1536 D[dx+1] = (T)((S[index+1] + S[index+4] + nextS[index+1] + nextS[index+4] + 2) >> 2);
1537 D[dx+2] = (T)((S[index+2] + S[index+5] + nextS[index+2] + nextS[index+5] + 2) >> 2);
1542 for( ; dx < w; dx += 4 )
1545 D[dx] = (T)((S[index] + S[index+4] + nextS[index] + nextS[index+4] + 2) >> 2);
1546 D[dx+1] = (T)((S[index+1] + S[index+5] + nextS[index+1] + nextS[index+5] + 2) >> 2);
1547 D[dx+2] = (T)((S[index+2] + S[index+6] + nextS[index+2] + nextS[index+6] + 2) >> 2);
1548 D[dx+3] = (T)((S[index+3] + S[index+7] + nextS[index+3] + nextS[index+7] + 2) >> 2);
1556 int scale_x, scale_y;
1563 template <typename T, typename WT, typename VecOp>
1564 class resizeAreaFast_Invoker :
1565 public ParallelLoopBody
1568 resizeAreaFast_Invoker(const Mat &_src, Mat &_dst,
1569 int _scale_x, int _scale_y, const int* _ofs, const int* _xofs) :
1570 ParallelLoopBody(), src(_src), dst(_dst), scale_x(_scale_x),
1571 scale_y(_scale_y), ofs(_ofs), xofs(_xofs)
1575 virtual void operator() (const Range& range) const
1577 Size ssize = src.size(), dsize = dst.size();
1578 int cn = src.channels();
1579 int area = scale_x*scale_y;
1580 float scale = 1.f/(area);
1581 int dwidth1 = (ssize.width/scale_x)*cn;
1586 VecOp vop(scale_x, scale_y, src.channels(), (int)src.step/*, area_ofs*/);
1588 for( dy = range.start; dy < range.end; dy++ )
1590 T* D = (T*)(dst.data + dst.step*dy);
1591 int sy0 = dy*scale_y;
1592 int w = sy0 + scale_y <= ssize.height ? dwidth1 : 0;
1594 if( sy0 >= ssize.height )
1596 for( dx = 0; dx < dsize.width; dx++ )
1601 dx = vop((const T*)(src.data + src.step * sy0), D, w);
1602 for( ; dx < w; dx++ )
1604 const T* S = (const T*)(src.data + src.step * sy0) + xofs[dx];
1607 #if CV_ENABLE_UNROLLED
1608 for( ; k <= area - 4; k += 4 )
1609 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
1611 for( ; k < area; k++ )
1614 D[dx] = saturate_cast<T>(sum * scale);
1617 for( ; dx < dsize.width; dx++ )
1620 int count = 0, sx0 = xofs[dx];
1621 if( sx0 >= ssize.width )
1624 for( int sy = 0; sy < scale_y; sy++ )
1626 if( sy0 + sy >= ssize.height )
1628 const T* S = (const T*)(src.data + src.step*(sy0 + sy)) + sx0;
1629 for( int sx = 0; sx < scale_x*cn; sx += cn )
1631 if( sx0 + sx >= ssize.width )
1638 D[dx] = saturate_cast<T>((float)sum/count);
1646 int scale_x, scale_y;
1647 const int *ofs, *xofs;
1650 template<typename T, typename WT, typename VecOp>
1651 static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int* xofs,
1652 int scale_x, int scale_y )
1654 Range range(0, dst.rows);
1655 resizeAreaFast_Invoker<T, WT, VecOp> invoker(src, dst, scale_x,
1656 scale_y, ofs, xofs);
1657 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
1660 struct DecimateAlpha
1667 template<typename T, typename WT> class ResizeArea_Invoker :
1668 public ParallelLoopBody
1671 ResizeArea_Invoker( const Mat& _src, Mat& _dst,
1672 const DecimateAlpha* _xtab, int _xtab_size,
1673 const DecimateAlpha* _ytab, int _ytab_size,
1674 const int* _tabofs )
1679 xtab_size0 = _xtab_size;
1681 ytab_size = _ytab_size;
1685 virtual void operator() (const Range& range) const
1687 Size dsize = dst->size();
1688 int cn = dst->channels();
1690 AutoBuffer<WT> _buffer(dsize.width*2);
1691 const DecimateAlpha* xtab = xtab0;
1692 int xtab_size = xtab_size0;
1693 WT *buf = _buffer, *sum = buf + dsize.width;
1694 int j_start = tabofs[range.start], j_end = tabofs[range.end], j, k, dx, prev_dy = ytab[j_start].di;
1696 for( dx = 0; dx < dsize.width; dx++ )
1699 for( j = j_start; j < j_end; j++ )
1701 WT beta = ytab[j].alpha;
1702 int dy = ytab[j].di;
1703 int sy = ytab[j].si;
1706 const T* S = (const T*)(src->data + src->step*sy);
1707 for( dx = 0; dx < dsize.width; dx++ )
1711 for( k = 0; k < xtab_size; k++ )
1713 int dxn = xtab[k].di;
1714 WT alpha = xtab[k].alpha;
1715 buf[dxn] += S[xtab[k].si]*alpha;
1718 for( k = 0; k < xtab_size; k++ )
1720 int sxn = xtab[k].si;
1721 int dxn = xtab[k].di;
1722 WT alpha = xtab[k].alpha;
1723 WT t0 = buf[dxn] + S[sxn]*alpha;
1724 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1725 buf[dxn] = t0; buf[dxn+1] = t1;
1728 for( k = 0; k < xtab_size; k++ )
1730 int sxn = xtab[k].si;
1731 int dxn = xtab[k].di;
1732 WT alpha = xtab[k].alpha;
1733 WT t0 = buf[dxn] + S[sxn]*alpha;
1734 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1735 WT t2 = buf[dxn+2] + S[sxn+2]*alpha;
1736 buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;
1740 for( k = 0; k < xtab_size; k++ )
1742 int sxn = xtab[k].si;
1743 int dxn = xtab[k].di;
1744 WT alpha = xtab[k].alpha;
1745 WT t0 = buf[dxn] + S[sxn]*alpha;
1746 WT t1 = buf[dxn+1] + S[sxn+1]*alpha;
1747 buf[dxn] = t0; buf[dxn+1] = t1;
1748 t0 = buf[dxn+2] + S[sxn+2]*alpha;
1749 t1 = buf[dxn+3] + S[sxn+3]*alpha;
1750 buf[dxn+2] = t0; buf[dxn+3] = t1;
1755 for( k = 0; k < xtab_size; k++ )
1757 int sxn = xtab[k].si;
1758 int dxn = xtab[k].di;
1759 WT alpha = xtab[k].alpha;
1760 for( int c = 0; c < cn; c++ )
1761 buf[dxn + c] += S[sxn + c]*alpha;
1768 T* D = (T*)(dst->data + dst->step*prev_dy);
1770 for( dx = 0; dx < dsize.width; dx++ )
1772 D[dx] = saturate_cast<T>(sum[dx]);
1773 sum[dx] = beta*buf[dx];
1779 for( dx = 0; dx < dsize.width; dx++ )
1780 sum[dx] += beta*buf[dx];
1785 T* D = (T*)(dst->data + dst->step*prev_dy);
1786 for( dx = 0; dx < dsize.width; dx++ )
1787 D[dx] = saturate_cast<T>(sum[dx]);
1794 const DecimateAlpha* xtab0;
1795 const DecimateAlpha* ytab;
1796 int xtab_size0, ytab_size;
1801 template <typename T, typename WT>
1802 static void resizeArea_( const Mat& src, Mat& dst,
1803 const DecimateAlpha* xtab, int xtab_size,
1804 const DecimateAlpha* ytab, int ytab_size,
1807 parallel_for_(Range(0, dst.rows),
1808 ResizeArea_Invoker<T, WT>(src, dst, xtab, xtab_size, ytab, ytab_size, tabofs),
1809 dst.total()/((double)(1 << 16)));
1813 typedef void (*ResizeFunc)( const Mat& src, Mat& dst,
1814 const int* xofs, const void* alpha,
1815 const int* yofs, const void* beta,
1816 int xmin, int xmax, int ksize );
1818 typedef void (*ResizeAreaFastFunc)( const Mat& src, Mat& dst,
1819 const int* ofs, const int *xofs,
1820 int scale_x, int scale_y );
1822 typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
1823 const DecimateAlpha* xtab, int xtab_size,
1824 const DecimateAlpha* ytab, int ytab_size,
1828 static int computeResizeAreaTab( int ssize, int dsize, int cn, double scale, DecimateAlpha* tab )
1831 for(int dx = 0; dx < dsize; dx++ )
1833 double fsx1 = dx * scale;
1834 double fsx2 = fsx1 + scale;
1835 double cellWidth = std::min(scale, ssize - fsx1);
1837 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
1839 sx2 = std::min(sx2, ssize - 1);
1840 sx1 = std::min(sx1, sx2);
1842 if( sx1 - fsx1 > 1e-3 )
1844 assert( k < ssize*2 );
1845 tab[k].di = dx * cn;
1846 tab[k].si = (sx1 - 1) * cn;
1847 tab[k++].alpha = (float)((sx1 - fsx1) / cellWidth);
1850 for(int sx = sx1; sx < sx2; sx++ )
1852 assert( k < ssize*2 );
1853 tab[k].di = dx * cn;
1854 tab[k].si = sx * cn;
1855 tab[k++].alpha = float(1.0 / cellWidth);
1858 if( fsx2 - sx2 > 1e-3 )
1860 assert( k < ssize*2 );
1861 tab[k].di = dx * cn;
1862 tab[k].si = sx2 * cn;
1863 tab[k++].alpha = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
1869 #define CHECK_IPP_FUNC(FUNC) if( FUNC==0 ) { *ok = false; return;}
1870 #define CHECK_IPP_STATUS(STATUS) if( STATUS!=ippStsNoErr ) { *ok = false; return;}
1872 #define SET_IPP_RESIZE_LINEAR_FUNC_PTR(TYPE, CN) \
1873 func = (ippiResizeFunc)ippiResizeLinear_##TYPE##_##CN##R; CHECK_IPP_FUNC(func);\
1874 CHECK_IPP_STATUS(ippiResizeGetSize_##TYPE(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize));\
1875 specBuf.allocate(specSize);\
1876 pSpec = (uchar*)specBuf;\
1877 CHECK_IPP_STATUS(ippiResizeLinearInit_##TYPE(srcSize, dstSize, (IppiResizeSpec_32f*)pSpec));
1879 #define SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(TYPE, CN) \
1880 if (mode==(int)ippCubic) { *ok = false; return;}\
1881 func = (ippiResizeFunc)ippiResizeLinear_##TYPE##_##CN##R; CHECK_IPP_FUNC(func);\
1882 CHECK_IPP_STATUS(ippiResizeGetSize_##TYPE(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize));\
1883 specBuf.allocate(specSize);\
1884 pSpec = (uchar*)specBuf;\
1885 CHECK_IPP_STATUS(ippiResizeLinearInit_##TYPE(srcSize, dstSize, (IppiResizeSpec_64f*)pSpec));\
1886 getBufferSizeFunc = (ippiResizeGetBufferSize)ippiResizeGetBufferSize_##TYPE;\
1887 getSrcOffsetFunc = (ippiResizeGetSrcOffset) ippiResizeGetBufferSize_##TYPE;
1889 #define SET_IPP_RESIZE_CUBIC_FUNC_PTR(TYPE, CN) \
1890 func = (ippiResizeFunc)ippiResizeCubic_##TYPE##_##CN##R; CHECK_IPP_FUNC(func);\
1891 CHECK_IPP_STATUS(ippiResizeGetSize_##TYPE(srcSize, dstSize, (IppiInterpolationType)mode, 0, &specSize, &initSize));\
1892 specBuf.allocate(specSize);\
1893 pSpec = (uchar*)specBuf;\
1894 AutoBuffer<uchar> buf(initSize);\
1895 uchar* pInit = (uchar*)buf;\
1896 CHECK_IPP_STATUS(ippiResizeCubicInit_##TYPE(srcSize, dstSize, 0.f, 0.75f, (IppiResizeSpec_32f*)pSpec, pInit));
1898 #define SET_IPP_RESIZE_PTR(TYPE, CN) \
1899 if (mode == (int)ippLinear) { SET_IPP_RESIZE_LINEAR_FUNC_PTR(TYPE, CN);}\
1900 else if (mode == (int)ippCubic) { SET_IPP_RESIZE_CUBIC_FUNC_PTR(TYPE, CN);}\
1901 else { *ok = false; return;}\
1902 getBufferSizeFunc = (ippiResizeGetBufferSize)ippiResizeGetBufferSize_##TYPE;\
1903 getSrcOffsetFunc = (ippiResizeGetSrcOffset)ippiResizeGetSrcOffset_##TYPE;
1905 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701)
1906 class IPPresizeInvoker :
1907 public ParallelLoopBody
1910 IPPresizeInvoker(Mat &_src, Mat &_dst, double _inv_scale_x, double _inv_scale_y, int _mode, bool *_ok) :
1911 ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), ok(_ok)
1914 IppiSize srcSize, dstSize;
1915 int type = src.type();
1916 int specSize = 0, initSize = 0;
1917 srcSize.width = src.cols;
1918 srcSize.height = src.rows;
1919 dstSize.width = dst.cols;
1920 dstSize.height = dst.rows;
1924 case CV_8UC1: SET_IPP_RESIZE_PTR(8u,C1); break;
1925 case CV_8UC3: SET_IPP_RESIZE_PTR(8u,C3); break;
1926 case CV_8UC4: SET_IPP_RESIZE_PTR(8u,C4); break;
1927 case CV_16UC1: SET_IPP_RESIZE_PTR(16u,C1); break;
1928 case CV_16UC3: SET_IPP_RESIZE_PTR(16u,C3); break;
1929 case CV_16UC4: SET_IPP_RESIZE_PTR(16u,C4); break;
1930 case CV_16SC1: SET_IPP_RESIZE_PTR(16s,C1); break;
1931 case CV_16SC3: SET_IPP_RESIZE_PTR(16s,C3); break;
1932 case CV_16SC4: SET_IPP_RESIZE_PTR(16s,C4); break;
1933 case CV_32FC1: SET_IPP_RESIZE_PTR(32f,C1); break;
1934 case CV_32FC3: SET_IPP_RESIZE_PTR(32f,C3); break;
1935 case CV_32FC4: SET_IPP_RESIZE_PTR(32f,C4); break;
1936 case CV_64FC1: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C1); break;
1937 case CV_64FC3: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C3); break;
1938 case CV_64FC4: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C4); break;
1939 default: { *ok = false; return;} break;
1947 virtual void operator() (const Range& range) const
1949 if (*ok == false) return;
1951 int cn = src.channels();
1952 int dsty = min(cvRound(range.start * inv_scale_y), dst.rows);
1953 int dstwidth = min(cvRound(src.cols * inv_scale_x), dst.cols);
1954 int dstheight = min(cvRound(range.end * inv_scale_y), dst.rows);
1956 IppiPoint dstOffset = { 0, dsty }, srcOffset = {0, 0};
1957 IppiSize dstSize = { dstwidth, dstheight - dsty };
1958 int bufsize = 0, itemSize = (int)src.elemSize1();
1960 CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize));
1961 CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset));
1963 Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize;
1964 Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize;
1966 AutoBuffer<uchar> buf(bufsize + 64);
1967 uchar* bufptr = alignPtr((uchar*)buf, 32);
1969 if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 )
1978 AutoBuffer<uchar> specBuf;
1980 ippiResizeFunc func;
1981 ippiResizeGetBufferSize getBufferSizeFunc;
1982 ippiResizeGetSrcOffset getSrcOffsetFunc;
1984 const IPPresizeInvoker& operator= (const IPPresizeInvoker&);
1990 static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
1991 float * const alpha_tab, int * const ofs_tab)
1994 for ( ; dx < dsize; dx++)
1998 double fsx1 = dx * scale;
1999 double fsx2 = fsx1 + scale;
2000 double cellWidth = std::min(scale, ssize - fsx1);
2002 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
2004 sx2 = std::min(sx2, ssize - 1);
2005 sx1 = std::min(sx1, sx2);
2007 if (sx1 - fsx1 > 1e-3)
2009 map_tab[k] = sx1 - 1;
2010 alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth);
2013 for (int sx = sx1; sx < sx2; sx++)
2016 alpha_tab[k++] = float(1.0 / cellWidth);
2019 if (fsx2 - sx2 > 1e-3)
2022 alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
2028 static void ocl_computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
2030 for (int i = 0; i < dcols; ++i)
2031 dmap_tab[i] = scale * i;
2033 for (int i = 0, size = dcols * scale; i < size; ++i)
2034 smap_tab[i] = std::min(scol - 1, i);
2037 static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
2038 double fx, double fy, int interpolation)
2040 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2042 double inv_fx = 1. / fx, inv_fy = 1. / fy;
2043 float inv_fxf = (float)inv_fx, inv_fyf = (float)inv_fy;
2046 (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR ||
2047 (interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1) )) )
2050 UMat src = _src.getUMat();
2051 _dst.create(dsize, type);
2052 UMat dst = _dst.getUMat();
2055 size_t globalsize[] = { dst.cols, dst.rows };
2057 if (interpolation == INTER_LINEAR)
2059 int wdepth = std::max(depth, CV_32S);
2060 int wtype = CV_MAKETYPE(wdepth, cn);
2062 k.create("resizeLN", ocl::imgproc::resize_oclsrc,
2063 format("-D INTER_LINEAR -D depth=%d -D PIXTYPE=%s -D PIXTYPE1=%s "
2064 "-D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d",
2065 depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
2066 ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
2067 ocl::convertTypeStr(wdepth, depth, cn, buf[1]),
2070 else if (interpolation == INTER_NEAREST)
2072 k.create("resizeNN", ocl::imgproc::resize_oclsrc,
2073 format("-D INTER_NEAREST -D PIXTYPE=%s -D PIXTYPE1=%s -D cn=%d",
2074 ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), cn));
2076 else if (interpolation == INTER_AREA)
2078 int iscale_x = saturate_cast<int>(inv_fx);
2079 int iscale_y = saturate_cast<int>(inv_fy);
2080 bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON &&
2081 std::abs(inv_fy - iscale_y) < DBL_EPSILON;
2082 int wdepth = std::max(depth, is_area_fast ? CV_32S : CV_32F);
2083 int wtype = CV_MAKE_TYPE(wdepth, cn);
2086 String buildOption = format("-D INTER_AREA -D PIXTYPE=%s -D PIXTYPE1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d",
2087 ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype),
2088 ocl::convertTypeStr(depth, wdepth, cn, cvt[0]), cn);
2090 UMat alphaOcl, tabofsOcl, mapOcl;
2095 int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn);
2096 buildOption = buildOption + format(" -D convertToPIXTYPE=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
2097 " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff",
2098 ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]),
2099 ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]),
2100 iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
2102 k.create("resizeAREA_FAST", ocl::imgproc::resize_oclsrc, buildOption);
2106 int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
2107 AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
2108 int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
2109 int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
2111 ocl_computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
2112 ocl_computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
2114 Mat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab).copyTo(dmap);
2115 Mat(1, smap_tab_size, CV_32SC1, (void *)smap_tab).copyTo(smap);
2119 buildOption = buildOption + format(" -D convertToPIXTYPE=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0]));
2120 k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption);
2124 Size ssize = src.size();
2125 int xytab_size = (ssize.width + ssize.height) << 1;
2126 int tabofs_size = dsize.height + dsize.width + 2;
2128 AutoBuffer<int> _xymap_tab(xytab_size), _xyofs_tab(tabofs_size);
2129 AutoBuffer<float> _xyalpha_tab(xytab_size);
2130 int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1);
2131 float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1);
2132 int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1;
2134 ocl_computeResizeAreaTabs(ssize.width, dsize.width, inv_fx, xmap_tab, xalpha_tab, xofs_tab);
2135 ocl_computeResizeAreaTabs(ssize.height, dsize.height, inv_fy, ymap_tab, yalpha_tab, yofs_tab);
2137 // loading precomputed arrays to GPU
2138 Mat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab).copyTo(alphaOcl);
2139 Mat(1, xytab_size, CV_32SC1, (void *)_xymap_tab).copyTo(mapOcl);
2140 Mat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab).copyTo(tabofsOcl);
2143 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst);
2146 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(dmap), ocl::KernelArg::PtrReadOnly(smap));
2148 k.args(srcarg, dstarg, inv_fxf, inv_fyf, ocl::KernelArg::PtrReadOnly(tabofsOcl),
2149 ocl::KernelArg::PtrReadOnly(mapOcl), ocl::KernelArg::PtrReadOnly(alphaOcl));
2151 return k.run(2, globalsize, NULL, false);
2156 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
2157 (float)inv_fx, (float)inv_fy);
2159 return k.run(2, globalsize, 0, false);
2166 //////////////////////////////////////////////////////////////////////////////////////////
2168 void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
2169 double inv_scale_x, double inv_scale_y, int interpolation )
2171 static ResizeFunc linear_tab[] =
2174 HResizeLinear<uchar, int, short,
2175 INTER_RESIZE_COEF_SCALE,
2176 HResizeLinearVec_8u32s>,
2177 VResizeLinear<uchar, int, short,
2178 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
2179 VResizeLinearVec_32s8u> >,
2182 HResizeLinear<ushort, float, float, 1,
2183 HResizeLinearVec_16u32f>,
2184 VResizeLinear<ushort, float, float, Cast<float, ushort>,
2185 VResizeLinearVec_32f16u> >,
2187 HResizeLinear<short, float, float, 1,
2188 HResizeLinearVec_16s32f>,
2189 VResizeLinear<short, float, float, Cast<float, short>,
2190 VResizeLinearVec_32f16s> >,
2193 HResizeLinear<float, float, float, 1,
2194 HResizeLinearVec_32f>,
2195 VResizeLinear<float, float, float, Cast<float, float>,
2196 VResizeLinearVec_32f> >,
2198 HResizeLinear<double, double, float, 1,
2200 VResizeLinear<double, double, float, Cast<double, double>,
2205 static ResizeFunc cubic_tab[] =
2208 HResizeCubic<uchar, int, short>,
2209 VResizeCubic<uchar, int, short,
2210 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
2211 VResizeCubicVec_32s8u> >,
2214 HResizeCubic<ushort, float, float>,
2215 VResizeCubic<ushort, float, float, Cast<float, ushort>,
2216 VResizeCubicVec_32f16u> >,
2218 HResizeCubic<short, float, float>,
2219 VResizeCubic<short, float, float, Cast<float, short>,
2220 VResizeCubicVec_32f16s> >,
2223 HResizeCubic<float, float, float>,
2224 VResizeCubic<float, float, float, Cast<float, float>,
2225 VResizeCubicVec_32f> >,
2227 HResizeCubic<double, double, float>,
2228 VResizeCubic<double, double, float, Cast<double, double>,
2233 static ResizeFunc lanczos4_tab[] =
2235 resizeGeneric_<HResizeLanczos4<uchar, int, short>,
2236 VResizeLanczos4<uchar, int, short,
2237 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
2240 resizeGeneric_<HResizeLanczos4<ushort, float, float>,
2241 VResizeLanczos4<ushort, float, float, Cast<float, ushort>,
2243 resizeGeneric_<HResizeLanczos4<short, float, float>,
2244 VResizeLanczos4<short, float, float, Cast<float, short>,
2247 resizeGeneric_<HResizeLanczos4<float, float, float>,
2248 VResizeLanczos4<float, float, float, Cast<float, float>,
2250 resizeGeneric_<HResizeLanczos4<double, double, float>,
2251 VResizeLanczos4<double, double, float, Cast<double, double>,
2256 static ResizeAreaFastFunc areafast_tab[] =
2258 resizeAreaFast_<uchar, int, ResizeAreaFastVec<uchar, ResizeAreaFastVec_SIMD_8u> >,
2260 resizeAreaFast_<ushort, float, ResizeAreaFastVec<ushort, ResizeAreaFastVec_SIMD_16u> >,
2261 resizeAreaFast_<short, float, ResizeAreaFastVec<short, ResizeAreaFastNoVec<short, float> > >,
2263 resizeAreaFast_<float, float, ResizeAreaFastNoVec<float, float> >,
2264 resizeAreaFast_<double, double, ResizeAreaFastNoVec<double, double> >,
2268 static ResizeAreaFunc area_tab[] =
2270 resizeArea_<uchar, float>, 0, resizeArea_<ushort, float>,
2271 resizeArea_<short, float>, 0, resizeArea_<float, float>,
2272 resizeArea_<double, double>, 0
2275 Size ssize = _src.size();
2277 CV_Assert( ssize.area() > 0 );
2278 CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) );
2279 if( dsize.area() == 0 )
2281 dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
2282 saturate_cast<int>(ssize.height*inv_scale_y));
2283 CV_Assert( dsize.area() > 0 );
2287 inv_scale_x = (double)dsize.width/ssize.width;
2288 inv_scale_y = (double)dsize.height/ssize.height;
2291 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
2292 ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation))
2294 Mat src = _src.getMat();
2295 _dst.create(dsize, src.type());
2296 Mat dst = _dst.getMat();
2298 #ifdef HAVE_TEGRA_OPTIMIZATION
2299 if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation))
2303 int depth = src.depth(), cn = src.channels();
2304 double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
2305 int k, sx, sy, dx, dy;
2307 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701)
2308 #define IPP_RESIZE_EPS 1.e-10
2310 double ex = fabs((double)dsize.width/src.cols - inv_scale_x)/inv_scale_x;
2311 double ey = fabs((double)dsize.height/src.rows - inv_scale_y)/inv_scale_y;
2313 if ((ex < IPP_RESIZE_EPS && ey < IPP_RESIZE_EPS && depth != CV_64F) ||
2314 (ex == 0 && ey == 0 && depth == CV_64F))
2317 if (interpolation == INTER_LINEAR && src.rows >= 2 && src.cols >= 2)
2321 else if (interpolation == INTER_CUBIC && src.rows >= 4 && src.cols >= 4)
2325 if( mode != 0 && (cn == 1 || cn ==3 || cn == 4) &&
2326 (depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F ||
2327 (depth == CV_64F && mode == ippLinear)))
2330 Range range(0, src.rows);
2331 IPPresizeInvoker invoker(src, dst, inv_scale_x, inv_scale_y, mode, &ok);
2332 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2337 #undef IPP_RESIZE_EPS
2340 if( interpolation == INTER_NEAREST )
2342 resizeNN( src, dst, inv_scale_x, inv_scale_y );
2347 int iscale_x = saturate_cast<int>(scale_x);
2348 int iscale_y = saturate_cast<int>(scale_y);
2350 bool is_area_fast = std::abs(scale_x - iscale_x) < DBL_EPSILON &&
2351 std::abs(scale_y - iscale_y) < DBL_EPSILON;
2353 // in case of scale_x && scale_y is equal to 2
2354 // INTER_AREA (fast) also is equal to INTER_LINEAR
2355 if( interpolation == INTER_LINEAR && is_area_fast && iscale_x == 2 && iscale_y == 2 )
2356 interpolation = INTER_AREA;
2358 // true "area" interpolation is only implemented for the case (scale_x <= 1 && scale_y <= 1).
2359 // In other cases it is emulated using some variant of bilinear interpolation
2360 if( interpolation == INTER_AREA && scale_x >= 1 && scale_y >= 1 )
2364 int area = iscale_x*iscale_y;
2365 size_t srcstep = src.step / src.elemSize1();
2366 AutoBuffer<int> _ofs(area + dsize.width*cn);
2368 int* xofs = ofs + area;
2369 ResizeAreaFastFunc func = areafast_tab[depth];
2370 CV_Assert( func != 0 );
2372 for( sy = 0, k = 0; sy < iscale_y; sy++ )
2373 for( sx = 0; sx < iscale_x; sx++ )
2374 ofs[k++] = (int)(sy*srcstep + sx*cn);
2376 for( dx = 0; dx < dsize.width; dx++ )
2380 for( k = 0; k < cn; k++ )
2381 xofs[j + k] = sx + k;
2384 func( src, dst, ofs, xofs, iscale_x, iscale_y );
2388 ResizeAreaFunc func = area_tab[depth];
2389 CV_Assert( func != 0 && cn <= 4 );
2391 AutoBuffer<DecimateAlpha> _xytab((ssize.width + ssize.height)*2);
2392 DecimateAlpha* xtab = _xytab, *ytab = xtab + ssize.width*2;
2394 int xtab_size = computeResizeAreaTab(ssize.width, dsize.width, cn, scale_x, xtab);
2395 int ytab_size = computeResizeAreaTab(ssize.height, dsize.height, 1, scale_y, ytab);
2397 AutoBuffer<int> _tabofs(dsize.height + 1);
2398 int* tabofs = _tabofs;
2399 for( k = 0, dy = 0; k < ytab_size; k++ )
2401 if( k == 0 || ytab[k].di != ytab[k-1].di )
2403 assert( ytab[k].di == dy );
2407 tabofs[dy] = ytab_size;
2409 func( src, dst, xtab, xtab_size, ytab, ytab_size, tabofs );
2414 int xmin = 0, xmax = dsize.width, width = dsize.width*cn;
2415 bool area_mode = interpolation == INTER_AREA;
2416 bool fixpt = depth == CV_8U;
2419 int ksize=0, ksize2;
2420 if( interpolation == INTER_CUBIC )
2421 ksize = 4, func = cubic_tab[depth];
2422 else if( interpolation == INTER_LANCZOS4 )
2423 ksize = 8, func = lanczos4_tab[depth];
2424 else if( interpolation == INTER_LINEAR || interpolation == INTER_AREA )
2425 ksize = 2, func = linear_tab[depth];
2427 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
2430 CV_Assert( func != 0 );
2432 AutoBuffer<uchar> _buffer((width + dsize.height)*(sizeof(int) + sizeof(float)*ksize));
2433 int* xofs = (int*)(uchar*)_buffer;
2434 int* yofs = xofs + width;
2435 float* alpha = (float*)(yofs + dsize.height);
2436 short* ialpha = (short*)alpha;
2437 float* beta = alpha + width*ksize;
2438 short* ibeta = ialpha + width*ksize;
2439 float cbuf[MAX_ESIZE];
2441 for( dx = 0; dx < dsize.width; dx++ )
2445 fx = (float)((dx+0.5)*scale_x - 0.5);
2451 sx = cvFloor(dx*scale_x);
2452 fx = (float)((dx+1) - (sx+1)*inv_scale_x);
2453 fx = fx <= 0 ? 0.f : fx - cvFloor(fx);
2459 if( sx < 0 && (interpolation != INTER_CUBIC && interpolation != INTER_LANCZOS4))
2463 if( sx + ksize2 >= ssize.width )
2465 xmax = std::min( xmax, dx );
2466 if( sx >= ssize.width-1 && (interpolation != INTER_CUBIC && interpolation != INTER_LANCZOS4))
2467 fx = 0, sx = ssize.width-1;
2470 for( k = 0, sx *= cn; k < cn; k++ )
2471 xofs[dx*cn + k] = sx + k;
2473 if( interpolation == INTER_CUBIC )
2474 interpolateCubic( fx, cbuf );
2475 else if( interpolation == INTER_LANCZOS4 )
2476 interpolateLanczos4( fx, cbuf );
2484 for( k = 0; k < ksize; k++ )
2485 ialpha[dx*cn*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
2486 for( ; k < cn*ksize; k++ )
2487 ialpha[dx*cn*ksize + k] = ialpha[dx*cn*ksize + k - ksize];
2491 for( k = 0; k < ksize; k++ )
2492 alpha[dx*cn*ksize + k] = cbuf[k];
2493 for( ; k < cn*ksize; k++ )
2494 alpha[dx*cn*ksize + k] = alpha[dx*cn*ksize + k - ksize];
2498 for( dy = 0; dy < dsize.height; dy++ )
2502 fy = (float)((dy+0.5)*scale_y - 0.5);
2508 sy = cvFloor(dy*scale_y);
2509 fy = (float)((dy+1) - (sy+1)*inv_scale_y);
2510 fy = fy <= 0 ? 0.f : fy - cvFloor(fy);
2514 if( interpolation == INTER_CUBIC )
2515 interpolateCubic( fy, cbuf );
2516 else if( interpolation == INTER_LANCZOS4 )
2517 interpolateLanczos4( fy, cbuf );
2526 for( k = 0; k < ksize; k++ )
2527 ibeta[dy*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
2531 for( k = 0; k < ksize; k++ )
2532 beta[dy*ksize + k] = cbuf[k];
2536 func( src, dst, xofs, fixpt ? (void*)ialpha : (void*)alpha, yofs,
2537 fixpt ? (void*)ibeta : (void*)beta, xmin, xmax, ksize );
2541 /****************************************************************************************\
2542 * General warping (affine, perspective, remap) *
2543 \****************************************************************************************/
2548 template<typename T>
2549 static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
2550 int borderType, const Scalar& _borderValue )
2552 Size ssize = _src.size(), dsize = _dst.size();
2553 int cn = _src.channels();
2554 const T* S0 = (const T*)_src.data;
2555 size_t sstep = _src.step/sizeof(S0[0]);
2556 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2557 saturate_cast<T>(_borderValue[1]),
2558 saturate_cast<T>(_borderValue[2]),
2559 saturate_cast<T>(_borderValue[3]));
2562 unsigned width1 = ssize.width, height1 = ssize.height;
2564 if( _dst.isContinuous() && _xy.isContinuous() )
2566 dsize.width *= dsize.height;
2570 for( dy = 0; dy < dsize.height; dy++ )
2572 T* D = (T*)(_dst.data + _dst.step*dy);
2573 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2577 for( dx = 0; dx < dsize.width; dx++ )
2579 int sx = XY[dx*2], sy = XY[dx*2+1];
2580 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2581 D[dx] = S0[sy*sstep + sx];
2584 if( borderType == BORDER_REPLICATE )
2586 sx = clip(sx, 0, ssize.width);
2587 sy = clip(sy, 0, ssize.height);
2588 D[dx] = S0[sy*sstep + sx];
2590 else if( borderType == BORDER_CONSTANT )
2592 else if( borderType != BORDER_TRANSPARENT )
2594 sx = borderInterpolate(sx, ssize.width, borderType);
2595 sy = borderInterpolate(sy, ssize.height, borderType);
2596 D[dx] = S0[sy*sstep + sx];
2603 for( dx = 0; dx < dsize.width; dx++, D += cn )
2605 int sx = XY[dx*2], sy = XY[dx*2+1], k;
2607 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2611 S = S0 + sy*sstep + sx*3;
2612 D[0] = S[0], D[1] = S[1], D[2] = S[2];
2616 S = S0 + sy*sstep + sx*4;
2617 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
2621 S = S0 + sy*sstep + sx*cn;
2622 for( k = 0; k < cn; k++ )
2626 else if( borderType != BORDER_TRANSPARENT )
2628 if( borderType == BORDER_REPLICATE )
2630 sx = clip(sx, 0, ssize.width);
2631 sy = clip(sy, 0, ssize.height);
2632 S = S0 + sy*sstep + sx*cn;
2634 else if( borderType == BORDER_CONSTANT )
2638 sx = borderInterpolate(sx, ssize.width, borderType);
2639 sy = borderInterpolate(sy, ssize.height, borderType);
2640 S = S0 + sy*sstep + sx*cn;
2642 for( k = 0; k < cn; k++ )
2653 int operator()( const Mat&, void*, const short*, const ushort*,
2654 const void*, int ) const { return 0; }
2661 int operator()( const Mat& _src, void* _dst, const short* XY,
2662 const ushort* FXY, const void* _wtab, int width ) const
2664 int cn = _src.channels(), x = 0, sstep = (int)_src.step;
2666 if( (cn != 1 && cn != 3 && cn != 4) || !checkHardwareSupport(CV_CPU_SSE2) ||
2670 const uchar *S0 = _src.data, *S1 = _src.data + _src.step;
2671 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
2672 uchar* D = (uchar*)_dst;
2673 __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE/2);
2674 __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << 16));
2675 __m128i z = _mm_setzero_si128();
2676 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
2680 for( ; x <= width - 8; x += 8 )
2682 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2683 __m128i xy1 = _mm_loadu_si128( (const __m128i*)(XY + x*2 + 8));
2684 __m128i v0, v1, v2, v3, a0, a1, b0, b1;
2687 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2688 xy1 = _mm_madd_epi16( xy1, xy2ofs );
2689 _mm_store_si128( (__m128i*)iofs0, xy0 );
2690 _mm_store_si128( (__m128i*)iofs1, xy1 );
2692 i0 = *(ushort*)(S0 + iofs0[0]) + (*(ushort*)(S0 + iofs0[1]) << 16);
2693 i1 = *(ushort*)(S0 + iofs0[2]) + (*(ushort*)(S0 + iofs0[3]) << 16);
2694 v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2695 i0 = *(ushort*)(S1 + iofs0[0]) + (*(ushort*)(S1 + iofs0[1]) << 16);
2696 i1 = *(ushort*)(S1 + iofs0[2]) + (*(ushort*)(S1 + iofs0[3]) << 16);
2697 v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2698 v0 = _mm_unpacklo_epi8(v0, z);
2699 v1 = _mm_unpacklo_epi8(v1, z);
2701 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x]*4)),
2702 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+1]*4)));
2703 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+2]*4)),
2704 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+3]*4)));
2705 b0 = _mm_unpacklo_epi64(a0, a1);
2706 b1 = _mm_unpackhi_epi64(a0, a1);
2707 v0 = _mm_madd_epi16(v0, b0);
2708 v1 = _mm_madd_epi16(v1, b1);
2709 v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta);
2711 i0 = *(ushort*)(S0 + iofs1[0]) + (*(ushort*)(S0 + iofs1[1]) << 16);
2712 i1 = *(ushort*)(S0 + iofs1[2]) + (*(ushort*)(S0 + iofs1[3]) << 16);
2713 v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2714 i0 = *(ushort*)(S1 + iofs1[0]) + (*(ushort*)(S1 + iofs1[1]) << 16);
2715 i1 = *(ushort*)(S1 + iofs1[2]) + (*(ushort*)(S1 + iofs1[3]) << 16);
2716 v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
2717 v2 = _mm_unpacklo_epi8(v2, z);
2718 v3 = _mm_unpacklo_epi8(v3, z);
2720 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+4]*4)),
2721 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+5]*4)));
2722 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+6]*4)),
2723 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+7]*4)));
2724 b0 = _mm_unpacklo_epi64(a0, a1);
2725 b1 = _mm_unpackhi_epi64(a0, a1);
2726 v2 = _mm_madd_epi16(v2, b0);
2727 v3 = _mm_madd_epi16(v3, b1);
2728 v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta);
2730 v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS);
2731 v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS);
2732 v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
2733 _mm_storel_epi64( (__m128i*)(D + x), v0 );
2738 for( ; x <= width - 5; x += 4, D += 12 )
2740 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2741 __m128i u0, v0, u1, v1;
2743 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2744 _mm_store_si128( (__m128i*)iofs0, xy0 );
2745 const __m128i *w0, *w1;
2746 w0 = (const __m128i*)(wtab + FXY[x]*16);
2747 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
2749 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
2750 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 3)));
2751 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
2752 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 3)));
2753 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
2754 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 3)));
2755 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
2756 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 3)));
2757 u0 = _mm_unpacklo_epi8(u0, z);
2758 v0 = _mm_unpacklo_epi8(v0, z);
2759 u1 = _mm_unpacklo_epi8(u1, z);
2760 v1 = _mm_unpacklo_epi8(v1, z);
2761 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2762 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2763 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2764 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2765 u0 = _mm_slli_si128(u0, 4);
2766 u0 = _mm_packs_epi32(u0, u1);
2767 u0 = _mm_packus_epi16(u0, u0);
2768 _mm_storel_epi64((__m128i*)D, _mm_srli_si128(u0,1));
2770 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
2771 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
2773 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
2774 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 3)));
2775 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
2776 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 3)));
2777 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
2778 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 3)));
2779 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
2780 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 3)));
2781 u0 = _mm_unpacklo_epi8(u0, z);
2782 v0 = _mm_unpacklo_epi8(v0, z);
2783 u1 = _mm_unpacklo_epi8(u1, z);
2784 v1 = _mm_unpacklo_epi8(v1, z);
2785 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2786 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2787 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2788 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2789 u0 = _mm_slli_si128(u0, 4);
2790 u0 = _mm_packs_epi32(u0, u1);
2791 u0 = _mm_packus_epi16(u0, u0);
2792 _mm_storel_epi64((__m128i*)(D + 6), _mm_srli_si128(u0,1));
2797 for( ; x <= width - 4; x += 4, D += 16 )
2799 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
2800 __m128i u0, v0, u1, v1;
2802 xy0 = _mm_madd_epi16( xy0, xy2ofs );
2803 _mm_store_si128( (__m128i*)iofs0, xy0 );
2804 const __m128i *w0, *w1;
2805 w0 = (const __m128i*)(wtab + FXY[x]*16);
2806 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
2808 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
2809 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 4)));
2810 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
2811 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 4)));
2812 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
2813 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 4)));
2814 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
2815 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 4)));
2816 u0 = _mm_unpacklo_epi8(u0, z);
2817 v0 = _mm_unpacklo_epi8(v0, z);
2818 u1 = _mm_unpacklo_epi8(u1, z);
2819 v1 = _mm_unpacklo_epi8(v1, z);
2820 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2821 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2822 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2823 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2824 u0 = _mm_packs_epi32(u0, u1);
2825 u0 = _mm_packus_epi16(u0, u0);
2826 _mm_storel_epi64((__m128i*)D, u0);
2828 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
2829 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
2831 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
2832 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 4)));
2833 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
2834 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 4)));
2835 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
2836 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 4)));
2837 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
2838 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 4)));
2839 u0 = _mm_unpacklo_epi8(u0, z);
2840 v0 = _mm_unpacklo_epi8(v0, z);
2841 u1 = _mm_unpacklo_epi8(u1, z);
2842 v1 = _mm_unpacklo_epi8(v1, z);
2843 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
2844 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
2845 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
2846 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
2847 u0 = _mm_packs_epi32(u0, u1);
2848 u0 = _mm_packus_epi16(u0, u0);
2849 _mm_storel_epi64((__m128i*)(D + 8), u0);
2859 typedef RemapNoVec RemapVec_8u;
2864 template<class CastOp, class VecOp, typename AT>
2865 static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
2866 const Mat& _fxy, const void* _wtab,
2867 int borderType, const Scalar& _borderValue )
2869 typedef typename CastOp::rtype T;
2870 typedef typename CastOp::type1 WT;
2871 Size ssize = _src.size(), dsize = _dst.size();
2872 int cn = _src.channels();
2873 const AT* wtab = (const AT*)_wtab;
2874 const T* S0 = (const T*)_src.data;
2875 size_t sstep = _src.step/sizeof(S0[0]);
2876 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2877 saturate_cast<T>(_borderValue[1]),
2878 saturate_cast<T>(_borderValue[2]),
2879 saturate_cast<T>(_borderValue[3]));
2884 unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
2885 CV_Assert( cn <= 4 && ssize.area() > 0 );
2887 if( _src.type() == CV_8UC3 )
2888 width1 = std::max(ssize.width-2, 0);
2891 for( dy = 0; dy < dsize.height; dy++ )
2893 T* D = (T*)(_dst.data + _dst.step*dy);
2894 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2895 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2897 bool prevInlier = false;
2899 for( dx = 0; dx <= dsize.width; dx++ )
2901 bool curInlier = dx < dsize.width ?
2902 (unsigned)XY[dx*2] < width1 &&
2903 (unsigned)XY[dx*2+1] < height1 : !prevInlier;
2904 if( curInlier == prevInlier )
2910 prevInlier = curInlier;
2914 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
2920 for( ; dx < X1; dx++, D++ )
2922 int sx = XY[dx*2], sy = XY[dx*2+1];
2923 const AT* w = wtab + FXY[dx]*4;
2924 const T* S = S0 + sy*sstep + sx;
2925 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
2929 for( ; dx < X1; dx++, D += 2 )
2931 int sx = XY[dx*2], sy = XY[dx*2+1];
2932 const AT* w = wtab + FXY[dx]*4;
2933 const T* S = S0 + sy*sstep + sx*2;
2934 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
2935 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
2936 D[0] = castOp(t0); D[1] = castOp(t1);
2939 for( ; dx < X1; dx++, D += 3 )
2941 int sx = XY[dx*2], sy = XY[dx*2+1];
2942 const AT* w = wtab + FXY[dx]*4;
2943 const T* S = S0 + sy*sstep + sx*3;
2944 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
2945 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
2946 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
2947 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
2950 for( ; dx < X1; dx++, D += 4 )
2952 int sx = XY[dx*2], sy = XY[dx*2+1];
2953 const AT* w = wtab + FXY[dx]*4;
2954 const T* S = S0 + sy*sstep + sx*4;
2955 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
2956 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
2957 D[0] = castOp(t0); D[1] = castOp(t1);
2958 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
2959 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
2960 D[2] = castOp(t0); D[3] = castOp(t1);
2965 if( borderType == BORDER_TRANSPARENT && cn != 3 )
2973 for( ; dx < X1; dx++, D++ )
2975 int sx = XY[dx*2], sy = XY[dx*2+1];
2976 if( borderType == BORDER_CONSTANT &&
2977 (sx >= ssize.width || sx+1 < 0 ||
2978 sy >= ssize.height || sy+1 < 0) )
2984 int sx0, sx1, sy0, sy1;
2986 const AT* w = wtab + FXY[dx]*4;
2987 if( borderType == BORDER_REPLICATE )
2989 sx0 = clip(sx, 0, ssize.width);
2990 sx1 = clip(sx+1, 0, ssize.width);
2991 sy0 = clip(sy, 0, ssize.height);
2992 sy1 = clip(sy+1, 0, ssize.height);
2993 v0 = S0[sy0*sstep + sx0];
2994 v1 = S0[sy0*sstep + sx1];
2995 v2 = S0[sy1*sstep + sx0];
2996 v3 = S0[sy1*sstep + sx1];
3000 sx0 = borderInterpolate(sx, ssize.width, borderType);
3001 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
3002 sy0 = borderInterpolate(sy, ssize.height, borderType);
3003 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
3004 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
3005 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
3006 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
3007 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
3009 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
3013 for( ; dx < X1; dx++, D += cn )
3015 int sx = XY[dx*2], sy = XY[dx*2+1], k;
3016 if( borderType == BORDER_CONSTANT &&
3017 (sx >= ssize.width || sx+1 < 0 ||
3018 sy >= ssize.height || sy+1 < 0) )
3020 for( k = 0; k < cn; k++ )
3025 int sx0, sx1, sy0, sy1;
3026 const T *v0, *v1, *v2, *v3;
3027 const AT* w = wtab + FXY[dx]*4;
3028 if( borderType == BORDER_REPLICATE )
3030 sx0 = clip(sx, 0, ssize.width);
3031 sx1 = clip(sx+1, 0, ssize.width);
3032 sy0 = clip(sy, 0, ssize.height);
3033 sy1 = clip(sy+1, 0, ssize.height);
3034 v0 = S0 + sy0*sstep + sx0*cn;
3035 v1 = S0 + sy0*sstep + sx1*cn;
3036 v2 = S0 + sy1*sstep + sx0*cn;
3037 v3 = S0 + sy1*sstep + sx1*cn;
3039 else if( borderType == BORDER_TRANSPARENT &&
3040 ((unsigned)sx >= (unsigned)(ssize.width-1) ||
3041 (unsigned)sy >= (unsigned)(ssize.height-1)))
3045 sx0 = borderInterpolate(sx, ssize.width, borderType);
3046 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
3047 sy0 = borderInterpolate(sy, ssize.height, borderType);
3048 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
3049 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
3050 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
3051 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
3052 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
3054 for( k = 0; k < cn; k++ )
3055 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
3064 template<class CastOp, typename AT, int ONE>
3065 static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
3066 const Mat& _fxy, const void* _wtab,
3067 int borderType, const Scalar& _borderValue )
3069 typedef typename CastOp::rtype T;
3070 typedef typename CastOp::type1 WT;
3071 Size ssize = _src.size(), dsize = _dst.size();
3072 int cn = _src.channels();
3073 const AT* wtab = (const AT*)_wtab;
3074 const T* S0 = (const T*)_src.data;
3075 size_t sstep = _src.step/sizeof(S0[0]);
3076 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
3077 saturate_cast<T>(_borderValue[1]),
3078 saturate_cast<T>(_borderValue[2]),
3079 saturate_cast<T>(_borderValue[3]));
3082 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
3084 unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
3086 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
3088 dsize.width *= dsize.height;
3092 for( dy = 0; dy < dsize.height; dy++ )
3094 T* D = (T*)(_dst.data + _dst.step*dy);
3095 const short* XY = (const short*)(_xy.data + _xy.step*dy);
3096 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
3098 for( dx = 0; dx < dsize.width; dx++, D += cn )
3100 int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
3101 const AT* w = wtab + FXY[dx]*16;
3103 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
3105 const T* S = S0 + sy*sstep + sx*cn;
3106 for( k = 0; k < cn; k++ )
3108 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
3110 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
3112 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
3114 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
3122 if( borderType == BORDER_TRANSPARENT &&
3123 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
3124 (unsigned)(sy+1) >= (unsigned)ssize.height) )
3127 if( borderType1 == BORDER_CONSTANT &&
3128 (sx >= ssize.width || sx+4 <= 0 ||
3129 sy >= ssize.height || sy+4 <= 0))
3131 for( k = 0; k < cn; k++ )
3136 for( i = 0; i < 4; i++ )
3138 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
3139 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
3142 for( k = 0; k < cn; k++, S0++, w -= 16 )
3144 WT cv = cval[k], sum = cv*ONE;
3145 for( i = 0; i < 4; i++, w += 4 )
3148 const T* S = S0 + yi*sstep;
3152 sum += (S[x[0]] - cv)*w[0];
3154 sum += (S[x[1]] - cv)*w[1];
3156 sum += (S[x[2]] - cv)*w[2];
3158 sum += (S[x[3]] - cv)*w[3];
3169 template<class CastOp, typename AT, int ONE>
3170 static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
3171 const Mat& _fxy, const void* _wtab,
3172 int borderType, const Scalar& _borderValue )
3174 typedef typename CastOp::rtype T;
3175 typedef typename CastOp::type1 WT;
3176 Size ssize = _src.size(), dsize = _dst.size();
3177 int cn = _src.channels();
3178 const AT* wtab = (const AT*)_wtab;
3179 const T* S0 = (const T*)_src.data;
3180 size_t sstep = _src.step/sizeof(S0[0]);
3181 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
3182 saturate_cast<T>(_borderValue[1]),
3183 saturate_cast<T>(_borderValue[2]),
3184 saturate_cast<T>(_borderValue[3]));
3187 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
3189 unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
3191 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
3193 dsize.width *= dsize.height;
3197 for( dy = 0; dy < dsize.height; dy++ )
3199 T* D = (T*)(_dst.data + _dst.step*dy);
3200 const short* XY = (const short*)(_xy.data + _xy.step*dy);
3201 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
3203 for( dx = 0; dx < dsize.width; dx++, D += cn )
3205 int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
3206 const AT* w = wtab + FXY[dx]*64;
3207 const T* S = S0 + sy*sstep + sx*cn;
3209 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
3211 for( k = 0; k < cn; k++ )
3214 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
3215 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
3216 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
3225 if( borderType == BORDER_TRANSPARENT &&
3226 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
3227 (unsigned)(sy+3) >= (unsigned)ssize.height) )
3230 if( borderType1 == BORDER_CONSTANT &&
3231 (sx >= ssize.width || sx+8 <= 0 ||
3232 sy >= ssize.height || sy+8 <= 0))
3234 for( k = 0; k < cn; k++ )
3239 for( i = 0; i < 8; i++ )
3241 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
3242 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
3245 for( k = 0; k < cn; k++, S0++, w -= 64 )
3247 WT cv = cval[k], sum = cv*ONE;
3248 for( i = 0; i < 8; i++, w += 8 )
3251 const T* S1 = S0 + yi*sstep;
3255 sum += (S1[x[0]] - cv)*w[0];
3257 sum += (S1[x[1]] - cv)*w[1];
3259 sum += (S1[x[2]] - cv)*w[2];
3261 sum += (S1[x[3]] - cv)*w[3];
3263 sum += (S1[x[4]] - cv)*w[4];
3265 sum += (S1[x[5]] - cv)*w[5];
3267 sum += (S1[x[6]] - cv)*w[6];
3269 sum += (S1[x[7]] - cv)*w[7];
3280 typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
3281 int borderType, const Scalar& _borderValue );
3283 typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
3284 const Mat& _fxy, const void* _wtab,
3285 int borderType, const Scalar& _borderValue);
3287 class RemapInvoker :
3288 public ParallelLoopBody
3291 RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
3292 const Mat *_m2, int _borderType, const Scalar &_borderValue,
3293 int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
3294 ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
3295 borderType(_borderType), borderValue(_borderValue),
3296 planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
3300 virtual void operator() (const Range& range) const
3303 const int buf_size = 1 << 14;
3304 int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
3305 int bcols0 = std::min(buf_size/brows0, dst->cols);
3306 brows0 = std::min(buf_size/bcols0, dst->rows);
3308 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
3311 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
3313 _bufa.create(brows0, bcols0, CV_16UC1);
3315 for( y = range.start; y < range.end; y += brows0 )
3317 for( x = 0; x < dst->cols; x += bcols0 )
3319 int brows = std::min(brows0, range.end - y);
3320 int bcols = std::min(bcols0, dst->cols - x);
3321 Mat dpart(*dst, Rect(x, y, bcols, brows));
3322 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
3326 if( m1->type() == CV_16SC2 && !m2->data ) // the data is already in the right format
3327 bufxy = (*m1)(Rect(x, y, bcols, brows));
3328 else if( map_depth != CV_32F )
3330 for( y1 = 0; y1 < brows; y1++ )
3332 short* XY = (short*)(bufxy.data + bufxy.step*y1);
3333 const short* sXY = (const short*)(m1->data + m1->step*(y+y1)) + x*2;
3334 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
3336 for( x1 = 0; x1 < bcols; x1++ )
3338 int a = sA[x1] & (INTER_TAB_SIZE2-1);
3339 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
3340 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
3344 else if( !planar_input )
3345 (*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
3348 for( y1 = 0; y1 < brows; y1++ )
3350 short* XY = (short*)(bufxy.data + bufxy.step*y1);
3351 const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
3352 const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
3358 for( ; x1 <= bcols - 8; x1 += 8 )
3360 __m128 fx0 = _mm_loadu_ps(sX + x1);
3361 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
3362 __m128 fy0 = _mm_loadu_ps(sY + x1);
3363 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
3364 __m128i ix0 = _mm_cvtps_epi32(fx0);
3365 __m128i ix1 = _mm_cvtps_epi32(fx1);
3366 __m128i iy0 = _mm_cvtps_epi32(fy0);
3367 __m128i iy1 = _mm_cvtps_epi32(fy1);
3368 ix0 = _mm_packs_epi32(ix0, ix1);
3369 iy0 = _mm_packs_epi32(iy0, iy1);
3370 ix1 = _mm_unpacklo_epi16(ix0, iy0);
3371 iy1 = _mm_unpackhi_epi16(ix0, iy0);
3372 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
3373 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
3378 for( ; x1 < bcols; x1++ )
3380 XY[x1*2] = saturate_cast<short>(sX[x1]);
3381 XY[x1*2+1] = saturate_cast<short>(sY[x1]);
3385 nnfunc( *src, dpart, bufxy, borderType, borderValue );
3389 Mat bufa(_bufa, Rect(0, 0, bcols, brows));
3390 for( y1 = 0; y1 < brows; y1++ )
3392 short* XY = (short*)(bufxy.data + bufxy.step*y1);
3393 ushort* A = (ushort*)(bufa.data + bufa.step*y1);
3395 if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
3397 bufxy = (*m1)(Rect(x, y, bcols, brows));
3399 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
3400 for( x1 = 0; x1 < bcols; x1++ )
3401 A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
3403 else if( planar_input )
3405 const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
3406 const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
3412 __m128 scale = _mm_set1_ps((float)INTER_TAB_SIZE);
3413 __m128i mask = _mm_set1_epi32(INTER_TAB_SIZE-1);
3414 for( ; x1 <= bcols - 8; x1 += 8 )
3416 __m128 fx0 = _mm_loadu_ps(sX + x1);
3417 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
3418 __m128 fy0 = _mm_loadu_ps(sY + x1);
3419 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
3420 __m128i ix0 = _mm_cvtps_epi32(_mm_mul_ps(fx0, scale));
3421 __m128i ix1 = _mm_cvtps_epi32(_mm_mul_ps(fx1, scale));
3422 __m128i iy0 = _mm_cvtps_epi32(_mm_mul_ps(fy0, scale));
3423 __m128i iy1 = _mm_cvtps_epi32(_mm_mul_ps(fy1, scale));
3424 __m128i mx0 = _mm_and_si128(ix0, mask);
3425 __m128i mx1 = _mm_and_si128(ix1, mask);
3426 __m128i my0 = _mm_and_si128(iy0, mask);
3427 __m128i my1 = _mm_and_si128(iy1, mask);
3428 mx0 = _mm_packs_epi32(mx0, mx1);
3429 my0 = _mm_packs_epi32(my0, my1);
3430 my0 = _mm_slli_epi16(my0, INTER_BITS);
3431 mx0 = _mm_or_si128(mx0, my0);
3432 _mm_storeu_si128((__m128i*)(A + x1), mx0);
3433 ix0 = _mm_srai_epi32(ix0, INTER_BITS);
3434 ix1 = _mm_srai_epi32(ix1, INTER_BITS);
3435 iy0 = _mm_srai_epi32(iy0, INTER_BITS);
3436 iy1 = _mm_srai_epi32(iy1, INTER_BITS);
3437 ix0 = _mm_packs_epi32(ix0, ix1);
3438 iy0 = _mm_packs_epi32(iy0, iy1);
3439 ix1 = _mm_unpacklo_epi16(ix0, iy0);
3440 iy1 = _mm_unpackhi_epi16(ix0, iy0);
3441 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
3442 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
3447 for( ; x1 < bcols; x1++ )
3449 int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
3450 int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
3451 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
3452 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
3453 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
3459 const float* sXY = (const float*)(m1->data + m1->step*(y+y1)) + x*2;
3461 for( x1 = 0; x1 < bcols; x1++ )
3463 int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
3464 int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
3465 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
3466 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
3467 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
3472 ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
3491 static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, InputArray _map2,
3492 int interpolation, int borderType, const Scalar& borderValue)
3494 int cn = _src.channels(), type = _src.type(), depth = _src.depth();
3496 if (borderType == BORDER_TRANSPARENT || cn == 3 || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
3497 || _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
3500 UMat src = _src.getUMat(), map1 = _map1.getUMat(), map2 = _map2.getUMat();
3502 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.empty())) ||
3503 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.empty())) )
3505 if (map1.type() != CV_16SC2)
3506 std::swap(map1, map2);
3509 CV_Assert( map1.type() == CV_32FC2 || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
3511 _dst.create(map1.size(), type);
3512 UMat dst = _dst.getUMat();
3514 String kernelName = "remap";
3515 if (map1.type() == CV_32FC2 && map2.empty())
3516 kernelName += "_32FC2";
3517 else if (map1.type() == CV_16SC2)
3519 kernelName += "_16SC2";
3521 kernelName += "_16UC1";
3523 else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
3524 kernelName += "_2_32FC1";
3526 CV_Error(Error::StsBadArg, "Unsupported map types");
3528 static const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
3529 static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
3530 "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
3531 String buildOptions = format("-D %s -D %s -D T=%s", interMap[interpolation], borderMap[borderType], ocl::typeToStr(type));
3533 if (interpolation != INTER_NEAREST)
3536 int wdepth = std::max(CV_32F, dst.depth());
3537 buildOptions = buildOptions
3538 + format(" -D WT=%s -D convertToT=%s -D convertToWT=%s"
3539 " -D convertToWT2=%s -D WT2=%s",
3540 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
3541 ocl::convertTypeStr(wdepth, depth, cn, cvt[0]),
3542 ocl::convertTypeStr(depth, wdepth, cn, cvt[1]),
3543 ocl::convertTypeStr(CV_32S, wdepth, 2, cvt[2]),
3544 ocl::typeToStr(CV_MAKE_TYPE(wdepth, 2)));
3547 ocl::Kernel k(kernelName.c_str(), ocl::imgproc::remap_oclsrc, buildOptions);
3549 Mat scalar(1, 1, type, borderValue);
3550 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst),
3551 map1arg = ocl::KernelArg::ReadOnlyNoSize(map1),
3552 scalararg = ocl::KernelArg::Constant((void*)scalar.data, scalar.elemSize());
3555 k.args(srcarg, dstarg, map1arg, scalararg);
3557 k.args(srcarg, dstarg, map1arg, ocl::KernelArg::ReadOnlyNoSize(map2), scalararg);
3559 size_t globalThreads[2] = { dst.cols, dst.rows };
3560 return k.run(2, globalThreads, NULL, false);
3567 void cv::remap( InputArray _src, OutputArray _dst,
3568 InputArray _map1, InputArray _map2,
3569 int interpolation, int borderType, const Scalar& borderValue )
3571 static RemapNNFunc nn_tab[] =
3573 remapNearest<uchar>, remapNearest<schar>, remapNearest<ushort>, remapNearest<short>,
3574 remapNearest<int>, remapNearest<float>, remapNearest<double>, 0
3577 static RemapFunc linear_tab[] =
3579 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
3580 remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
3581 remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
3582 remapBilinear<Cast<float, float>, RemapNoVec, float>,
3583 remapBilinear<Cast<double, double>, RemapNoVec, float>, 0
3586 static RemapFunc cubic_tab[] =
3588 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
3589 remapBicubic<Cast<float, ushort>, float, 1>,
3590 remapBicubic<Cast<float, short>, float, 1>, 0,
3591 remapBicubic<Cast<float, float>, float, 1>,
3592 remapBicubic<Cast<double, double>, float, 1>, 0
3595 static RemapFunc lanczos4_tab[] =
3597 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
3598 remapLanczos4<Cast<float, ushort>, float, 1>,
3599 remapLanczos4<Cast<float, short>, float, 1>, 0,
3600 remapLanczos4<Cast<float, float>, float, 1>,
3601 remapLanczos4<Cast<double, double>, float, 1>, 0
3604 CV_Assert( _map1.size().area() > 0 );
3605 CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
3607 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
3608 ocl_remap(_src, _dst, _map1, _map2, interpolation, borderType, borderValue))
3610 Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
3611 _dst.create( map1.size(), src.type() );
3612 Mat dst = _dst.getMat();
3613 if( dst.data == src.data )
3616 int depth = src.depth();
3617 RemapNNFunc nnfunc = 0;
3618 RemapFunc ifunc = 0;
3619 const void* ctab = 0;
3620 bool fixpt = depth == CV_8U;
3621 bool planar_input = false;
3623 if( interpolation == INTER_NEAREST )
3625 nnfunc = nn_tab[depth];
3626 CV_Assert( nnfunc != 0 );
3630 if( interpolation == INTER_AREA )
3631 interpolation = INTER_LINEAR;
3633 if( interpolation == INTER_LINEAR )
3634 ifunc = linear_tab[depth];
3635 else if( interpolation == INTER_CUBIC )
3636 ifunc = cubic_tab[depth];
3637 else if( interpolation == INTER_LANCZOS4 )
3638 ifunc = lanczos4_tab[depth];
3640 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
3641 CV_Assert( ifunc != 0 );
3642 ctab = initInterTab2D( interpolation, fixpt );
3645 const Mat *m1 = &map1, *m2 = &map2;
3647 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || !map2.data)) ||
3648 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || !map1.data)) )
3650 if( map1.type() != CV_16SC2 )
3655 CV_Assert( ((map1.type() == CV_32FC2 || map1.type() == CV_16SC2) && !map2.data) ||
3656 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
3657 planar_input = map1.channels() == 1;
3660 RemapInvoker invoker(src, dst, m1, m2,
3661 borderType, borderValue, planar_input, nnfunc, ifunc,
3663 parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
3667 void cv::convertMaps( InputArray _map1, InputArray _map2,
3668 OutputArray _dstmap1, OutputArray _dstmap2,
3669 int dstm1type, bool nninterpolate )
3671 Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
3672 Size size = map1.size();
3673 const Mat *m1 = &map1, *m2 = &map2;
3674 int m1type = m1->type(), m2type = m2->type();
3676 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
3677 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
3678 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
3679 (m1type == CV_32FC2 && !m2->data) );
3681 if( m2type == CV_16SC2 )
3683 std::swap( m1, m2 );
3684 std::swap( m1type, m2type );
3687 if( dstm1type <= 0 )
3688 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
3689 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
3690 _dstmap1.create( size, dstm1type );
3691 dstmap1 = _dstmap1.getMat();
3693 if( !nninterpolate && dstm1type != CV_32FC2 )
3695 _dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
3696 dstmap2 = _dstmap2.getMat();
3701 if( m1type == dstm1type || (nninterpolate &&
3702 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
3703 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
3705 m1->convertTo( dstmap1, dstmap1.type() );
3706 if( dstmap2.data && dstmap2.type() == m2->type() )
3707 m2->copyTo( dstmap2 );
3711 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
3713 Mat vdata[] = { *m1, *m2 };
3714 merge( vdata, 2, dstmap1 );
3718 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
3720 Mat mv[] = { dstmap1, dstmap2 };
3725 if( m1->isContinuous() && (!m2->data || m2->isContinuous()) &&
3726 dstmap1.isContinuous() && (!dstmap2.data || dstmap2.isContinuous()) )
3728 size.width *= size.height;
3732 const float scale = 1.f/INTER_TAB_SIZE;
3734 for( y = 0; y < size.height; y++ )
3736 const float* src1f = (const float*)(m1->data + m1->step*y);
3737 const float* src2f = (const float*)(m2->data + m2->step*y);
3738 const short* src1 = (const short*)src1f;
3739 const ushort* src2 = (const ushort*)src2f;
3741 float* dst1f = (float*)(dstmap1.data + dstmap1.step*y);
3742 float* dst2f = (float*)(dstmap2.data + dstmap2.step*y);
3743 short* dst1 = (short*)dst1f;
3744 ushort* dst2 = (ushort*)dst2f;
3746 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
3749 for( x = 0; x < size.width; x++ )
3751 dst1[x*2] = saturate_cast<short>(src1f[x]);
3752 dst1[x*2+1] = saturate_cast<short>(src2f[x]);
3755 for( x = 0; x < size.width; x++ )
3757 int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
3758 int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
3759 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
3760 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
3761 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
3764 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
3767 for( x = 0; x < size.width; x++ )
3769 dst1[x*2] = saturate_cast<short>(src1f[x*2]);
3770 dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
3773 for( x = 0; x < size.width; x++ )
3775 int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
3776 int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
3777 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
3778 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
3779 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
3782 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
3784 for( x = 0; x < size.width; x++ )
3786 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
3787 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
3788 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
3791 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
3793 for( x = 0; x < size.width; x++ )
3795 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
3796 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
3797 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
3801 CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
3809 class warpAffineInvoker :
3810 public ParallelLoopBody
3813 warpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
3814 const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) :
3815 ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
3816 borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
3821 virtual void operator() (const Range& range) const
3823 const int BLOCK_SZ = 64;
3824 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
3825 const int AB_BITS = MAX(10, (int)INTER_BITS);
3826 const int AB_SCALE = 1 << AB_BITS;
3827 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
3829 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
3832 int bh0 = std::min(BLOCK_SZ/2, dst.rows);
3833 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
3834 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, dst.rows);
3836 for( y = range.start; y < range.end; y += bh0 )
3838 for( x = 0; x < dst.cols; x += bw0 )
3840 int bw = std::min( bw0, dst.cols - x);
3841 int bh = std::min( bh0, range.end - y);
3843 Mat _XY(bh, bw, CV_16SC2, XY), matA;
3844 Mat dpart(dst, Rect(x, y, bw, bh));
3846 for( y1 = 0; y1 < bh; y1++ )
3848 short* xy = XY + y1*bw*2;
3849 int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
3850 int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
3852 if( interpolation == INTER_NEAREST )
3853 for( x1 = 0; x1 < bw; x1++ )
3855 int X = (X0 + adelta[x+x1]) >> AB_BITS;
3856 int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
3857 xy[x1*2] = saturate_cast<short>(X);
3858 xy[x1*2+1] = saturate_cast<short>(Y);
3862 short* alpha = A + y1*bw;
3867 __m128i fxy_mask = _mm_set1_epi32(INTER_TAB_SIZE - 1);
3868 __m128i XX = _mm_set1_epi32(X0), YY = _mm_set1_epi32(Y0);
3869 for( ; x1 <= bw - 8; x1 += 8 )
3871 __m128i tx0, tx1, ty0, ty1;
3872 tx0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1)), XX);
3873 ty0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1)), YY);
3874 tx1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1 + 4)), XX);
3875 ty1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1 + 4)), YY);
3877 tx0 = _mm_srai_epi32(tx0, AB_BITS - INTER_BITS);
3878 ty0 = _mm_srai_epi32(ty0, AB_BITS - INTER_BITS);
3879 tx1 = _mm_srai_epi32(tx1, AB_BITS - INTER_BITS);
3880 ty1 = _mm_srai_epi32(ty1, AB_BITS - INTER_BITS);
3882 __m128i fx_ = _mm_packs_epi32(_mm_and_si128(tx0, fxy_mask),
3883 _mm_and_si128(tx1, fxy_mask));
3884 __m128i fy_ = _mm_packs_epi32(_mm_and_si128(ty0, fxy_mask),
3885 _mm_and_si128(ty1, fxy_mask));
3886 tx0 = _mm_packs_epi32(_mm_srai_epi32(tx0, INTER_BITS),
3887 _mm_srai_epi32(tx1, INTER_BITS));
3888 ty0 = _mm_packs_epi32(_mm_srai_epi32(ty0, INTER_BITS),
3889 _mm_srai_epi32(ty1, INTER_BITS));
3890 fx_ = _mm_adds_epi16(fx_, _mm_slli_epi16(fy_, INTER_BITS));
3892 _mm_storeu_si128((__m128i*)(xy + x1*2), _mm_unpacklo_epi16(tx0, ty0));
3893 _mm_storeu_si128((__m128i*)(xy + x1*2 + 8), _mm_unpackhi_epi16(tx0, ty0));
3894 _mm_storeu_si128((__m128i*)(alpha + x1), fx_);
3898 for( ; x1 < bw; x1++ )
3900 int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
3901 int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
3902 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3903 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3904 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3905 (X & (INTER_TAB_SIZE-1)));
3910 if( interpolation == INTER_NEAREST )
3911 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
3914 Mat _matA(bh, bw, CV_16U, A);
3915 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
3924 int interpolation, borderType;
3926 int *adelta, *bdelta;
3930 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3931 class IPPwarpAffineInvoker :
3932 public ParallelLoopBody
3935 IPPwarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
3936 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
3941 virtual void operator() (const Range& range) const
3943 IppiSize srcsize = { src.cols, src.rows };
3944 IppiRect srcroi = { 0, 0, src.cols, src.rows };
3945 IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
3946 int cnn = src.channels();
3947 if( borderType == BORDER_CONSTANT )
3949 IppiSize setSize = { dst.cols, range.end - range.start };
3950 void *dataPointer = dst.data + dst.step[0] * range.start;
3951 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
3957 if( func( src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode ) < 0) ////Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
3963 double (&coeffs)[2][3];
3967 ippiWarpAffineBackFunc func;
3969 const IPPwarpAffineInvoker& operator= (const IPPwarpAffineInvoker&);
3975 enum { OCL_OP_PERSPECTIVE = 1, OCL_OP_AFFINE = 0 };
3977 static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
3978 Size dsize, int flags, int borderType, const Scalar& borderValue,
3981 CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
3983 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
3984 double doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
3986 int interpolation = flags & INTER_MAX;
3987 if( interpolation == INTER_AREA )
3988 interpolation = INTER_LINEAR;
3990 if ( !(borderType == cv::BORDER_CONSTANT &&
3991 (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)) ||
3992 (!doubleSupport && depth == CV_64F) || cn > 4)
3995 const char * const interpolationMap[3] = { "NEAREST", "LINEAR", "CUBIC" };
3996 ocl::ProgramSource program = op_type == OCL_OP_AFFINE ?
3997 ocl::imgproc::warp_affine_oclsrc : ocl::imgproc::warp_perspective_oclsrc;
3998 const char * const kernelName = op_type == OCL_OP_AFFINE ? "warpAffine" : "warpPerspective";
4000 int scalarcn = cn == 3 ? 4 : cn;
4001 int wdepth = interpolation == INTER_NEAREST ? depth : std::max(CV_32S, depth);
4002 int sctype = CV_MAKETYPE(wdepth, scalarcn);
4006 if (interpolation == INTER_NEAREST)
4008 opts = format("-D INTER_NEAREST -D T=%s%s -D T1=%s -D ST=%s -D cn=%d", ocl::typeToStr(type),
4009 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
4010 ocl::typeToStr(CV_MAT_DEPTH(type)),
4011 ocl::typeToStr(sctype),
4017 opts = format("-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D depth=%d -D convertToWT=%s -D convertToT=%s%s -D cn=%d",
4018 interpolationMap[interpolation], ocl::typeToStr(type),
4019 ocl::typeToStr(CV_MAT_DEPTH(type)),
4020 ocl::typeToStr(sctype),
4021 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), depth,
4022 ocl::convertTypeStr(depth, wdepth, cn, cvt[0]),
4023 ocl::convertTypeStr(wdepth, depth, cn, cvt[1]),
4024 doubleSupport ? " -D DOUBLE_SUPPORT" : "", cn);
4027 k.create(kernelName, program, opts);
4031 double borderBuf[] = {0, 0, 0, 0};
4032 scalarToRawData(borderValue, borderBuf, sctype);
4034 UMat src = _src.getUMat(), M0;
4035 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
4036 UMat dst = _dst.getUMat();
4039 int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
4040 Mat matM(matRows, 3, CV_64F, M), M1 = _M0.getMat();
4041 CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) &&
4042 M1.rows == matRows && M1.cols == 3 );
4043 M1.convertTo(matM, matM.type());
4045 if( !(flags & WARP_INVERSE_MAP) )
4047 if (op_type == OCL_OP_PERSPECTIVE)
4051 double D = M[0]*M[4] - M[1]*M[3];
4052 D = D != 0 ? 1./D : 0;
4053 double A11 = M[4]*D, A22=M[0]*D;
4054 M[0] = A11; M[1] *= -D;
4055 M[3] *= -D; M[4] = A22;
4056 double b1 = -M[0]*M[2] - M[1]*M[5];
4057 double b2 = -M[3]*M[2] - M[4]*M[5];
4058 M[2] = b1; M[5] = b2;
4061 matM.convertTo(M0, doubleSupport ? CV_64F : CV_32F);
4063 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
4064 ocl::KernelArg(0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
4066 size_t globalThreads[2] = { dst.cols, dst.rows };
4067 return k.run(2, globalThreads, NULL, false);
4075 void cv::warpAffine( InputArray _src, OutputArray _dst,
4076 InputArray _M0, Size dsize,
4077 int flags, int borderType, const Scalar& borderValue )
4079 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
4080 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType,
4081 borderValue, OCL_OP_AFFINE))
4083 Mat src = _src.getMat(), M0 = _M0.getMat();
4084 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
4085 Mat dst = _dst.getMat();
4086 CV_Assert( src.cols > 0 && src.rows > 0 );
4087 if( dst.data == src.data )
4091 Mat matM(2, 3, CV_64F, M);
4092 int interpolation = flags & INTER_MAX;
4093 if( interpolation == INTER_AREA )
4094 interpolation = INTER_LINEAR;
4096 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
4097 M0.convertTo(matM, matM.type());
4099 #ifdef HAVE_TEGRA_OPTIMIZATION
4100 if( tegra::warpAffine(src, dst, M, flags, borderType, borderValue) )
4104 if( !(flags & WARP_INVERSE_MAP) )
4106 double D = M[0]*M[4] - M[1]*M[3];
4107 D = D != 0 ? 1./D : 0;
4108 double A11 = M[4]*D, A22=M[0]*D;
4109 M[0] = A11; M[1] *= -D;
4110 M[3] *= -D; M[4] = A22;
4111 double b1 = -M[0]*M[2] - M[1]*M[5];
4112 double b2 = -M[3]*M[2] - M[4]*M[5];
4113 M[2] = b1; M[5] = b2;
4117 AutoBuffer<int> _abdelta(dst.cols*2);
4118 int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
4119 const int AB_BITS = MAX(10, (int)INTER_BITS);
4120 const int AB_SCALE = 1 << AB_BITS;
4122 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4123 int depth = src.depth();
4124 int channels = src.channels();
4125 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
4126 ( channels == 1 || channels == 3 || channels == 4 ) &&
4127 ( borderType == cv::BORDER_TRANSPARENT || ( borderType == cv::BORDER_CONSTANT ) ) )
4129 int type = src.type();
4130 ippiWarpAffineBackFunc ippFunc =
4131 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
4132 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
4133 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
4134 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
4135 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
4136 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
4137 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
4138 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
4139 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
4142 flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
4143 flags == INTER_NEAREST ? IPPI_INTER_NN :
4144 flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
4146 if( mode && ippFunc )
4148 double coeffs[2][3];
4149 for( int i = 0; i < 2; i++ )
4151 for( int j = 0; j < 3; j++ )
4153 coeffs[i][j] = matM.at<double>(i, j);
4157 Range range(0, dst.rows);
4158 IPPwarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
4159 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4166 for( x = 0; x < dst.cols; x++ )
4168 adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
4169 bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
4172 Range range(0, dst.rows);
4173 warpAffineInvoker invoker(src, dst, interpolation, borderType,
4174 borderValue, adelta, bdelta, M);
4175 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4182 class warpPerspectiveInvoker :
4183 public ParallelLoopBody
4187 warpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation,
4188 int _borderType, const Scalar &_borderValue) :
4189 ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
4190 borderType(_borderType), borderValue(_borderValue)
4194 virtual void operator() (const Range& range) const
4196 const int BLOCK_SZ = 32;
4197 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
4198 int x, y, x1, y1, width = dst.cols, height = dst.rows;
4200 int bh0 = std::min(BLOCK_SZ/2, height);
4201 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
4202 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
4204 for( y = range.start; y < range.end; y += bh0 )
4206 for( x = 0; x < width; x += bw0 )
4208 int bw = std::min( bw0, width - x);
4209 int bh = std::min( bh0, range.end - y); // height
4211 Mat _XY(bh, bw, CV_16SC2, XY), matA;
4212 Mat dpart(dst, Rect(x, y, bw, bh));
4214 for( y1 = 0; y1 < bh; y1++ )
4216 short* xy = XY + y1*bw*2;
4217 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
4218 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
4219 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
4221 if( interpolation == INTER_NEAREST )
4222 for( x1 = 0; x1 < bw; x1++ )
4224 double W = W0 + M[6]*x1;
4226 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
4227 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
4228 int X = saturate_cast<int>(fX);
4229 int Y = saturate_cast<int>(fY);
4231 xy[x1*2] = saturate_cast<short>(X);
4232 xy[x1*2+1] = saturate_cast<short>(Y);
4236 short* alpha = A + y1*bw;
4237 for( x1 = 0; x1 < bw; x1++ )
4239 double W = W0 + M[6]*x1;
4240 W = W ? INTER_TAB_SIZE/W : 0;
4241 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
4242 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
4243 int X = saturate_cast<int>(fX);
4244 int Y = saturate_cast<int>(fY);
4246 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
4247 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
4248 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
4249 (X & (INTER_TAB_SIZE-1)));
4254 if( interpolation == INTER_NEAREST )
4255 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
4258 Mat _matA(bh, bw, CV_16U, A);
4259 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
4269 int interpolation, borderType;
4273 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4274 class IPPwarpPerspectiveInvoker :
4275 public ParallelLoopBody
4278 IPPwarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation, int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveBackFunc _func, bool *_ok) :
4279 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs), borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
4284 virtual void operator() (const Range& range) const
4286 IppiSize srcsize = {src.cols, src.rows};
4287 IppiRect srcroi = {0, 0, src.cols, src.rows};
4288 IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
4289 int cnn = src.channels();
4291 if( borderType == BORDER_CONSTANT )
4293 IppiSize setSize = {dst.cols, range.end - range.start};
4294 void *dataPointer = dst.data + dst.step[0] * range.start;
4295 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
4301 if( func(src.data, srcsize, (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, coeffs, mode) < 0)
4307 double (&coeffs)[3][3];
4310 const Scalar borderValue;
4311 ippiWarpPerspectiveBackFunc func;
4313 const IPPwarpPerspectiveInvoker& operator= (const IPPwarpPerspectiveInvoker&);
4319 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
4320 Size dsize, int flags, int borderType, const Scalar& borderValue )
4322 CV_Assert( _src.total() > 0 );
4324 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
4325 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType, borderValue,
4326 OCL_OP_PERSPECTIVE))
4328 Mat src = _src.getMat(), M0 = _M0.getMat();
4329 _dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
4330 Mat dst = _dst.getMat();
4332 if( dst.data == src.data )
4336 Mat matM(3, 3, CV_64F, M);
4337 int interpolation = flags & INTER_MAX;
4338 if( interpolation == INTER_AREA )
4339 interpolation = INTER_LINEAR;
4341 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
4342 M0.convertTo(matM, matM.type());
4344 #ifdef HAVE_TEGRA_OPTIMIZATION
4345 if( tegra::warpPerspective(src, dst, M, flags, borderType, borderValue) )
4349 if( !(flags & WARP_INVERSE_MAP) )
4352 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
4353 int depth = src.depth();
4354 int channels = src.channels();
4355 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
4356 ( channels == 1 || channels == 3 || channels == 4 ) &&
4357 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) )
4359 int type = src.type();
4360 ippiWarpPerspectiveBackFunc ippFunc =
4361 type == CV_8UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C1R :
4362 type == CV_8UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C3R :
4363 type == CV_8UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_8u_C4R :
4364 type == CV_16UC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C1R :
4365 type == CV_16UC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C3R :
4366 type == CV_16UC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_16u_C4R :
4367 type == CV_32FC1 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C1R :
4368 type == CV_32FC3 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C3R :
4369 type == CV_32FC4 ? (ippiWarpPerspectiveBackFunc)ippiWarpPerspectiveBack_32f_C4R :
4372 flags == INTER_LINEAR ? IPPI_INTER_LINEAR :
4373 flags == INTER_NEAREST ? IPPI_INTER_NN :
4374 flags == INTER_CUBIC ? IPPI_INTER_CUBIC :
4376 if( mode && ippFunc )
4378 double coeffs[3][3];
4379 for( int i = 0; i < 3; i++ )
4381 for( int j = 0; j < 3; j++ )
4383 coeffs[i][j] = matM.at<double>(i, j);
4387 Range range(0, dst.rows);
4388 IPPwarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
4389 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4396 Range range(0, dst.rows);
4397 warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
4398 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
4402 cv::Mat cv::getRotationMatrix2D( Point2f center, double angle, double scale )
4405 double alpha = cos(angle)*scale;
4406 double beta = sin(angle)*scale;
4408 Mat M(2, 3, CV_64F);
4409 double* m = (double*)M.data;
4413 m[2] = (1-alpha)*center.x - beta*center.y;
4416 m[5] = beta*center.x + (1-alpha)*center.y;
4421 /* Calculates coefficients of perspective transformation
4422 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
4424 * c00*xi + c01*yi + c02
4425 * ui = ---------------------
4426 * c20*xi + c21*yi + c22
4428 * c10*xi + c11*yi + c12
4429 * vi = ---------------------
4430 * c20*xi + c21*yi + c22
4432 * Coefficients are calculated by solving linear system:
4433 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
4434 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
4435 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
4436 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
4437 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
4438 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
4439 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
4440 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
4443 * cij - matrix coefficients, c22 = 1
4445 cv::Mat cv::getPerspectiveTransform( const Point2f src[], const Point2f dst[] )
4447 Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.data);
4448 double a[8][8], b[8];
4449 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
4451 for( int i = 0; i < 4; ++i )
4453 a[i][0] = a[i+4][3] = src[i].x;
4454 a[i][1] = a[i+4][4] = src[i].y;
4455 a[i][2] = a[i+4][5] = 1;
4456 a[i][3] = a[i][4] = a[i][5] =
4457 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
4458 a[i][6] = -src[i].x*dst[i].x;
4459 a[i][7] = -src[i].y*dst[i].x;
4460 a[i+4][6] = -src[i].x*dst[i].y;
4461 a[i+4][7] = -src[i].y*dst[i].y;
4466 solve( A, B, X, DECOMP_SVD );
4467 ((double*)M.data)[8] = 1.;
4472 /* Calculates coefficients of affine transformation
4473 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
4475 * ui = c00*xi + c01*yi + c02
4477 * vi = c10*xi + c11*yi + c12
4479 * Coefficients are calculated by solving linear system:
4480 * / x0 y0 1 0 0 0 \ /c00\ /u0\
4481 * | x1 y1 1 0 0 0 | |c01| |u1|
4482 * | x2 y2 1 0 0 0 | |c02| |u2|
4483 * | 0 0 0 x0 y0 1 | |c10| |v0|
4484 * | 0 0 0 x1 y1 1 | |c11| |v1|
4485 * \ 0 0 0 x2 y2 1 / |c12| |v2|
4488 * cij - matrix coefficients
4491 cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
4493 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.data);
4494 double a[6*6], b[6];
4495 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
4497 for( int i = 0; i < 3; i++ )
4501 a[j] = a[k+3] = src[i].x;
4502 a[j+1] = a[k+4] = src[i].y;
4503 a[j+2] = a[k+5] = 1;
4504 a[j+3] = a[j+4] = a[j+5] = 0;
4505 a[k] = a[k+1] = a[k+2] = 0;
4507 b[i*2+1] = dst[i].y;
4514 void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
4516 Mat matM = _matM.getMat();
4517 CV_Assert(matM.rows == 2 && matM.cols == 3);
4518 __iM.create(2, 3, matM.type());
4519 Mat _iM = __iM.getMat();
4521 if( matM.type() == CV_32F )
4523 const float* M = (const float*)matM.data;
4524 float* iM = (float*)_iM.data;
4525 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
4527 double D = M[0]*M[step+1] - M[1]*M[step];
4528 D = D != 0 ? 1./D : 0;
4529 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
4530 double b1 = -A11*M[2] - A12*M[step+2];
4531 double b2 = -A21*M[2] - A22*M[step+2];
4533 iM[0] = (float)A11; iM[1] = (float)A12; iM[2] = (float)b1;
4534 iM[istep] = (float)A21; iM[istep+1] = (float)A22; iM[istep+2] = (float)b2;
4536 else if( matM.type() == CV_64F )
4538 const double* M = (const double*)matM.data;
4539 double* iM = (double*)_iM.data;
4540 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
4542 double D = M[0]*M[step+1] - M[1]*M[step];
4543 D = D != 0 ? 1./D : 0;
4544 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
4545 double b1 = -A11*M[2] - A12*M[step+2];
4546 double b2 = -A21*M[2] - A22*M[step+2];
4548 iM[0] = A11; iM[1] = A12; iM[2] = b1;
4549 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
4552 CV_Error( CV_StsUnsupportedFormat, "" );
4555 cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst)
4557 Mat src = _src.getMat(), dst = _dst.getMat();
4558 CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
4559 return getPerspectiveTransform((const Point2f*)src.data, (const Point2f*)dst.data);
4562 cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
4564 Mat src = _src.getMat(), dst = _dst.getMat();
4565 CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
4566 return getAffineTransform((const Point2f*)src.data, (const Point2f*)dst.data);
4570 cvResize( const CvArr* srcarr, CvArr* dstarr, int method )
4572 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4573 CV_Assert( src.type() == dst.type() );
4574 cv::resize( src, dst, dst.size(), (double)dst.cols/src.cols,
4575 (double)dst.rows/src.rows, method );
4580 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
4581 int flags, CvScalar fillval )
4583 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4584 cv::Mat matrix = cv::cvarrToMat(marr);
4585 CV_Assert( src.type() == dst.type() );
4586 cv::warpAffine( src, dst, matrix, dst.size(), flags,
4587 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
4592 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
4593 int flags, CvScalar fillval )
4595 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4596 cv::Mat matrix = cv::cvarrToMat(marr);
4597 CV_Assert( src.type() == dst.type() );
4598 cv::warpPerspective( src, dst, matrix, dst.size(), flags,
4599 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
4604 cvRemap( const CvArr* srcarr, CvArr* dstarr,
4605 const CvArr* _mapx, const CvArr* _mapy,
4606 int flags, CvScalar fillval )
4608 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
4609 cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
4610 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
4611 cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
4612 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
4614 CV_Assert( dst0.data == dst.data );
4619 cv2DRotationMatrix( CvPoint2D32f center, double angle,
4620 double scale, CvMat* matrix )
4622 cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
4623 CV_Assert( M.size() == M0.size() );
4624 M.convertTo(M0, M0.type());
4630 cvGetPerspectiveTransform( const CvPoint2D32f* src,
4631 const CvPoint2D32f* dst,
4634 cv::Mat M0 = cv::cvarrToMat(matrix),
4635 M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
4636 CV_Assert( M.size() == M0.size() );
4637 M.convertTo(M0, M0.type());
4643 cvGetAffineTransform( const CvPoint2D32f* src,
4644 const CvPoint2D32f* dst,
4647 cv::Mat M0 = cv::cvarrToMat(matrix),
4648 M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
4649 CV_Assert( M.size() == M0.size() );
4650 M.convertTo(M0, M0.type());
4656 cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
4658 cv::Mat map1 = cv::cvarrToMat(arr1), map2;
4659 cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
4662 map2 = cv::cvarrToMat(arr2);
4665 dstmap2 = cv::cvarrToMat(dstarr2);
4666 if( dstmap2.type() == CV_16SC1 )
4667 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.data, dstmap2.step);
4670 cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
4673 /****************************************************************************************\
4674 * Log-Polar Transform *
4675 \****************************************************************************************/
4677 /* now it is done via Remap; more correct implementation should use
4678 some super-sampling technique outside of the "fovea" circle */
4680 cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
4681 CvPoint2D32f center, double M, int flags )
4683 cv::Ptr<CvMat> mapx, mapy;
4685 CvMat srcstub, *src = cvGetMat(srcarr, &srcstub);
4686 CvMat dststub, *dst = cvGetMat(dstarr, &dststub);
4687 CvSize ssize, dsize;
4689 if( !CV_ARE_TYPES_EQ( src, dst ))
4690 CV_Error( CV_StsUnmatchedFormats, "" );
4693 CV_Error( CV_StsOutOfRange, "M should be >0" );
4695 ssize = cvGetMatSize(src);
4696 dsize = cvGetMatSize(dst);
4698 mapx.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4699 mapy.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4701 if( !(flags & CV_WARP_INVERSE_MAP) )
4704 cv::AutoBuffer<double> _exp_tab(dsize.width);
4705 double* exp_tab = _exp_tab;
4707 for( rho = 0; rho < dst->width; rho++ )
4708 exp_tab[rho] = std::exp(rho/M);
4710 for( phi = 0; phi < dsize.height; phi++ )
4712 double cp = cos(phi*2*CV_PI/dsize.height);
4713 double sp = sin(phi*2*CV_PI/dsize.height);
4714 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
4715 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
4717 for( rho = 0; rho < dsize.width; rho++ )
4719 double r = exp_tab[rho];
4720 double x = r*cp + center.x;
4721 double y = r*sp + center.y;
4731 CvMat bufx, bufy, bufp, bufa;
4732 double ascale = ssize.height/(2*CV_PI);
4733 cv::AutoBuffer<float> _buf(4*dsize.width);
4736 bufx = cvMat( 1, dsize.width, CV_32F, buf );
4737 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
4738 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
4739 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
4741 for( x = 0; x < dsize.width; x++ )
4742 bufx.data.fl[x] = (float)x - center.x;
4744 for( y = 0; y < dsize.height; y++ )
4746 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
4747 float* my = (float*)(mapy->data.ptr + y*mapy->step);
4749 for( x = 0; x < dsize.width; x++ )
4750 bufy.data.fl[x] = (float)y - center.y;
4753 cvCartToPolar( &bufx, &bufy, &bufp, &bufa );
4755 for( x = 0; x < dsize.width; x++ )
4756 bufp.data.fl[x] += 1.f;
4758 cvLog( &bufp, &bufp );
4760 for( x = 0; x < dsize.width; x++ )
4762 double rho = bufp.data.fl[x]*M;
4763 double phi = bufa.data.fl[x]*ascale;
4769 for( x = 0; x < dsize.width; x++ )
4771 double xx = bufx.data.fl[x];
4772 double yy = bufy.data.fl[x];
4774 double p = log(std::sqrt(xx*xx + yy*yy) + 1.)*M;
4775 double a = atan2(yy,xx);
4787 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
4790 void cv::logPolar( InputArray _src, OutputArray _dst,
4791 Point2f center, double M, int flags )
4793 Mat src = _src.getMat();
4794 _dst.create( src.size(), src.type() );
4795 CvMat c_src = src, c_dst = _dst.getMat();
4796 cvLogPolar( &c_src, &c_dst, center, M, flags );
4799 /****************************************************************************************
4800 Linear-Polar Transform
4801 J.L. Blanco, Apr 2009
4802 ****************************************************************************************/
4804 void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
4805 CvPoint2D32f center, double maxRadius, int flags )
4807 cv::Ptr<CvMat> mapx, mapy;
4809 CvMat srcstub, *src = (CvMat*)srcarr;
4810 CvMat dststub, *dst = (CvMat*)dstarr;
4811 CvSize ssize, dsize;
4813 src = cvGetMat( srcarr, &srcstub,0,0 );
4814 dst = cvGetMat( dstarr, &dststub,0,0 );
4816 if( !CV_ARE_TYPES_EQ( src, dst ))
4817 CV_Error( CV_StsUnmatchedFormats, "" );
4819 ssize.width = src->cols;
4820 ssize.height = src->rows;
4821 dsize.width = dst->cols;
4822 dsize.height = dst->rows;
4824 mapx.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4825 mapy.reset(cvCreateMat( dsize.height, dsize.width, CV_32F ));
4827 if( !(flags & CV_WARP_INVERSE_MAP) )
4831 for( phi = 0; phi < dsize.height; phi++ )
4833 double cp = cos(phi*2*CV_PI/dsize.height);
4834 double sp = sin(phi*2*CV_PI/dsize.height);
4835 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
4836 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
4838 for( rho = 0; rho < dsize.width; rho++ )
4840 double r = maxRadius*(rho+1)/dsize.width;
4841 double x = r*cp + center.x;
4842 double y = r*sp + center.y;
4852 CvMat bufx, bufy, bufp, bufa;
4853 const double ascale = ssize.height/(2*CV_PI);
4854 const double pscale = ssize.width/maxRadius;
4856 cv::AutoBuffer<float> _buf(4*dsize.width);
4859 bufx = cvMat( 1, dsize.width, CV_32F, buf );
4860 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
4861 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
4862 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
4864 for( x = 0; x < dsize.width; x++ )
4865 bufx.data.fl[x] = (float)x - center.x;
4867 for( y = 0; y < dsize.height; y++ )
4869 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
4870 float* my = (float*)(mapy->data.ptr + y*mapy->step);
4872 for( x = 0; x < dsize.width; x++ )
4873 bufy.data.fl[x] = (float)y - center.y;
4875 cvCartToPolar( &bufx, &bufy, &bufp, &bufa, 0 );
4877 for( x = 0; x < dsize.width; x++ )
4878 bufp.data.fl[x] += 1.f;
4880 for( x = 0; x < dsize.width; x++ )
4882 double rho = bufp.data.fl[x]*pscale;
4883 double phi = bufa.data.fl[x]*ascale;
4890 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
4893 void cv::linearPolar( InputArray _src, OutputArray _dst,
4894 Point2f center, double maxRadius, int flags )
4896 Mat src = _src.getMat();
4897 _dst.create( src.size(), src.type() );
4898 CvMat c_src = src, c_dst = _dst.getMat();
4899 cvLinearPolar( &c_src, &c_dst, center, maxRadius, flags );