typedef tuple<MatType, Size, Size> MatInfo_Size_Size_t;
typedef TestBaseWithParam<MatInfo_Size_Size_t> MatInfo_Size_Size;
+typedef tuple<Size,Size> Size_Size_t;
+typedef tuple<MatType, Size_Size_t> MatInfo_SizePair_t;
+typedef TestBaseWithParam<MatInfo_SizePair_t> MatInfo_SizePair;
+
+#define MATTYPE_NE_VALUES CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, \
+ CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4, \
+ CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4
+
+// For gradient-ish testing of the other matrix formats
+template<typename T>
+static void fillFPGradient(Mat& img)
+{
+ const int ch = img.channels();
+
+ int r, c, i;
+ for(r=0; r<img.rows; r++)
+ {
+ for(c=0; c<img.cols; c++)
+ {
+ T vals[] = {(T)r, (T)c, (T)(r*c), (T)(r*c/(r+c+1))};
+ T *p = (T*)img.ptr(r, c);
+ for(i=0; i<ch; i++) p[i] = (T)vals[i];
+ }
+ }
+}
PERF_TEST_P(MatInfo_Size_Size, resizeUpLinear,
testing::Values(
#endif
}
+PERF_TEST_P(MatInfo_SizePair, resizeUpLinearNonExact,
+ testing::Combine
+ (
+ testing::Values( MATTYPE_NE_VALUES ),
+ testing::Values( Size_Size_t(szVGA, szqHD), Size_Size_t(szVGA, sz720p) )
+ )
+ )
+{
+ int matType = get<0>(GetParam());
+ Size_Size_t sizes = get<1>(GetParam());
+ Size from = get<0>(sizes);
+ Size to = get<1>(sizes);
+
+ cv::Mat src(from, matType), dst(to, matType);
+ switch(src.depth())
+ {
+ case CV_8U: cvtest::fillGradient(src); break;
+ case CV_16U: fillFPGradient<ushort>(src); break;
+ case CV_32F: fillFPGradient<float>(src); break;
+ }
+ declare.in(src).out(dst);
+
+ TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR);
+
+ SANITY_CHECK_NOTHING();
+}
+
PERF_TEST_P(MatInfo_Size_Size, resizeDownLinear,
testing::Values(
MatInfo_Size_Size_t(CV_8UC1, szVGA, szQVGA),
#endif
}
+PERF_TEST_P(MatInfo_SizePair, resizeDownLinearNonExact,
+ testing::Combine
+ (
+ testing::Values( MATTYPE_NE_VALUES ),
+ testing::Values
+ (
+ Size_Size_t(szVGA, szQVGA),
+ Size_Size_t(szqHD, szVGA),
+ Size_Size_t(sz720p, Size(120 * sz720p.width / sz720p.height, 120)),
+ Size_Size_t(sz720p, szVGA),
+ Size_Size_t(sz720p, szQVGA)
+ )
+ )
+ )
+{
+ int matType = get<0>(GetParam());
+ Size_Size_t sizes = get<1>(GetParam());
+ Size from = get<0>(sizes);
+ Size to = get<1>(sizes);
+
+ cv::Mat src(from, matType), dst(to, matType);
+ switch(src.depth())
+ {
+ case CV_8U: cvtest::fillGradient(src); break;
+ case CV_16U: fillFPGradient<ushort>(src); break;
+ case CV_32F: fillFPGradient<float>(src); break;
+ }
+ declare.in(src).out(dst);
+
+ TEST_CYCLE_MULTIRUN(10) resize(src, dst, to, 0, 0, INTER_LINEAR);
+
+ SANITY_CHECK_NOTHING();
+}
+
typedef tuple<MatType, Size, int> MatInfo_Size_Scale_t;
typedef TestBaseWithParam<MatInfo_Size_Scale_t> MatInfo_Size_Scale;
#endif
+#if CV_SIMD128
+
+template<typename ST, typename DT, typename AT, typename DVT>
+struct HResizeLinearVec_X4
+{
+ int operator()(const uchar** _src, uchar** _dst, int count, const int* xofs,
+ const uchar* _alpha, int, int, int cn, int, int xmax) const
+ {
+ const ST **src = (const ST**)_src;
+ const AT *alpha = (const AT*)_alpha;
+ DT **dst = (DT**)_dst;
+ const int nlanes = 4;
+ const int len0 = xmax & -nlanes;
+ int dx = 0, k = 0;
+
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const ST *S0 = src[k];
+ DT *D0 = dst[k];
+ const ST *S1 = src[k+1];
+ DT *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += nlanes )
+ {
+ int sx0 = xofs[dx+0];
+ int sx1 = xofs[dx+1];
+ int sx2 = xofs[dx+2];
+ int sx3 = xofs[dx+3];
+ DVT a_even;
+ DVT a_odd;
+
+ v_load_deinterleave(&alpha[dx*2], a_even, a_odd);
+ DVT s0(S0[sx0], S0[sx1], S0[sx2], S0[sx3]);
+ DVT s1(S0[sx0+cn], S0[sx1+cn], S0[sx2+cn], S0[sx3+cn]);
+ DVT s0_u(S1[sx0], S1[sx1], S1[sx2], S1[sx3]);
+ DVT s1_u(S1[sx0+cn], S1[sx1+cn], S1[sx2+cn], S1[sx3+cn]);
+ v_store(&D1[dx], s0_u * a_even + s1_u * a_odd);
+ v_store(&D0[dx], s0 * a_even + s1 * a_odd);
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const ST *S = src[k];
+ DT *D = dst[k];
+ for( dx = 0; dx < len0; dx += nlanes )
+ {
+ int sx0 = xofs[dx+0];
+ int sx1 = xofs[dx+1];
+ int sx2 = xofs[dx+2];
+ int sx3 = xofs[dx+3];
+ DVT a_even;
+ DVT a_odd;
+
+ v_load_deinterleave(&alpha[dx*2], a_even, a_odd);
+ DVT s0(S[sx0], S[sx1], S[sx2], S[sx3]);
+ DVT s1(S[sx0+cn], S[sx1+cn], S[sx2+cn], S[sx3+cn]);
+ v_store(&D[dx], s0 * a_even + s1 * a_odd);
+ }
+ }
+ return dx;
+ }
+};
+
+struct HResizeLinearVecU8_X4
+{
+ int operator()(const uchar** src, uchar** _dst, int count, const int* xofs,
+ const uchar* _alpha, int, int, int cn, int, int xmax) const
+ {
+ const short *alpha = (const short*)_alpha;
+ int **dst = (int**)_dst;
+ int dx = 0, k = 0;
+
+ if(cn == 1)
+ {
+ const int step = 8;
+ const int len0 = xmax & -step;
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const uchar *S0 = src[k];
+ int *D0 = dst[k];
+ const uchar *S1 = src[k+1];
+ int *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 al = v_load(alpha+dx*2);
+ v_int16x8 ah = v_load(alpha+dx*2+8);
+ v_uint16x8 sl, sh;
+ v_expand(v_lut_pairs(S0, xofs+dx), sl, sh);
+ v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
+ v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
+ v_expand(v_lut_pairs(S1, xofs+dx), sl, sh);
+ v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
+ v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const uchar *S = src[k];
+ int *D = dst[k];
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 al = v_load(alpha+dx*2);
+ v_int16x8 ah = v_load(alpha+dx*2+8);
+ v_uint16x8 sl, sh;
+ v_expand(v_lut_pairs(S, xofs+dx), sl, sh);
+ v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
+ v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
+ }
+ }
+ }
+ else if(cn == 2)
+ {
+ const int step = 8;
+ const int len0 = xmax & -step;
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const uchar *S0 = src[k];
+ int *D0 = dst[k];
+ const uchar *S1 = src[k+1];
+ int *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 al = v_load(alpha+dx*2);
+ v_int16x8 ah = v_load(alpha+dx*2+8);
+ v_uint16x8 sl, sh;
+ v_expand(v_interleave_pairs(v_lut_quads(S0, xofs+dx)), sl, sh);
+ v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
+ v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
+ v_expand(v_interleave_pairs(v_lut_pairs(S1, xofs+dx)), sl, sh);
+ v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
+ v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const uchar *S = src[k];
+ int *D = dst[k];
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 al = v_load(alpha+dx*2);
+ v_int16x8 ah = v_load(alpha+dx*2+8);
+ v_uint16x8 sl, sh;
+ v_expand(v_interleave_pairs(v_lut_quads(S, xofs+dx)), sl, sh);
+ v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
+ v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
+ }
+ }
+ }
+ else if(cn == 3)
+ {
+ const int step = 4;
+ const int len0 = xmax - step;
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const uchar *S0 = src[k];
+ int *D0 = dst[k];
+ const uchar *S1 = src[k+1];
+ int *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += 3*step/4 )
+ {
+ v_int16x8 a = v_load(alpha+dx*2);
+ v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S0+xofs[dx]) | (v_load_expand_q(S0+xofs[dx]+cn)<<16)), a));
+ v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S1+xofs[dx]) | (v_load_expand_q(S1+xofs[dx]+cn)<<16)), a));
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const uchar *S = src[k];
+ int *D = dst[k];
+ for( dx = 0; dx < len0; dx += 3*step/4 )
+ {
+ v_int16x8 a = v_load(alpha+dx*2);
+ v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S+xofs[dx]) | (v_load_expand_q(S+xofs[dx]+cn)<<16)), a));
+ }
+ }
+ }
+ else if(cn == 4)
+ {
+ const int step = 4;
+ const int len0 = xmax & -step;
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const uchar *S0 = src[k];
+ int *D0 = dst[k];
+ const uchar *S1 = src[k+1];
+ int *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 a = v_load(alpha+dx*2);
+ v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(v_interleave_quads(v_load_expand(S0+xofs[dx]))), a));
+ v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(v_interleave_quads(v_load_expand(S1+xofs[dx]))), a));
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const uchar *S = src[k];
+ int *D = dst[k];
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 a = v_load(alpha+dx*2);
+ v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(v_interleave_quads(v_load_expand(S+xofs[dx]))), a));
+ }
+ }
+ }
+ else if(cn < 9)
+ {
+ const int step = 8;
+ const int len0 = xmax & -step;
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const uchar *S0 = src[k];
+ int *D0 = dst[k];
+ const uchar *S1 = src[k+1];
+ int *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += cn )
+ {
+ v_int16x8 a0 = v_load(alpha+dx*2);
+ v_int16x8 a1 = v_load(alpha+dx*2 + 8);
+ v_uint16x8 s0, s1;
+ v_zip(v_load_expand(S0+xofs[dx]), v_load_expand(S0+xofs[dx]+cn), s0, s1);
+ v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(s0), a0));
+ v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(s1), a1));
+ v_zip(v_load_expand(S1+xofs[dx]), v_load_expand(S1+xofs[dx]+cn), s0, s1);
+ v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(s0), a0));
+ v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(s1), a1));
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const uchar *S = src[k];
+ int *D = dst[k];
+ for( dx = 0; dx < len0; dx += cn )
+ {
+ v_int16x8 a0 = v_load(alpha+dx*2);
+ v_int16x8 a1 = v_load(alpha+dx*2 + 8);
+ v_uint16x8 s0, s1;
+ v_zip(v_load_expand(S+xofs[dx]), v_load_expand(S+xofs[dx]+cn), s0, s1);
+ v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(s0), a0));
+ v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(s1), a1));
+ }
+ }
+ }
+ else
+ {
+ const int step = 16;
+ const int len0 = (xmax - cn) & -step;
+ for( ; k <= (count - 2); k+=2 )
+ {
+ const uchar *S0 = src[k];
+ int *D0 = dst[k];
+ const uchar *S1 = src[k+1];
+ int *D1 = dst[k+1];
+
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 a0 = v_load(alpha+dx*2);
+ v_int16x8 a1 = v_load(alpha+dx*2 + 8);
+ v_int16x8 a2 = v_load(alpha+dx*2 + 16);
+ v_int16x8 a3 = v_load(alpha+dx*2 + 24);
+ v_uint8x16 s01, s23;
+ v_zip(v_lut(S0, xofs+dx), v_lut(S0+cn, xofs+dx), s01, s23);
+ v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(v_expand_low(s01)), a0));
+ v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(v_expand_high(s01)), a1));
+ v_store(&D0[dx+8], v_dotprod(v_reinterpret_as_s16(v_expand_low(s23)), a2));
+ v_store(&D0[dx+12], v_dotprod(v_reinterpret_as_s16(v_expand_high(s23)), a3));
+ v_zip(v_lut(S1, xofs+dx), v_lut(S1+cn, xofs+dx), s01, s23);
+ v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(v_expand_low(s01)), a0));
+ v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(v_expand_high(s01)), a1));
+ v_store(&D1[dx+8], v_dotprod(v_reinterpret_as_s16(v_expand_low(s23)), a2));
+ v_store(&D1[dx+12], v_dotprod(v_reinterpret_as_s16(v_expand_high(s23)), a3));
+ }
+ }
+ for( ; k < count; k++ )
+ {
+ const uchar *S = src[k];
+ int *D = dst[k];
+ for( dx = 0; dx < len0; dx += step )
+ {
+ v_int16x8 a0 = v_load(alpha+dx*2);
+ v_int16x8 a1 = v_load(alpha+dx*2 + 8);
+ v_int16x8 a2 = v_load(alpha+dx*2 + 16);
+ v_int16x8 a3 = v_load(alpha+dx*2 + 24);
+ v_uint8x16 s01, s23;
+ v_zip(v_lut(S, xofs+dx), v_lut(S+cn, xofs+dx), s01, s23);
+ v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(v_expand_low(s01)), a0));
+ v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(v_expand_high(s01)), a1));
+ v_store(&D[dx+8], v_dotprod(v_reinterpret_as_s16(v_expand_low(s23)), a2));
+ v_store(&D[dx+12], v_dotprod(v_reinterpret_as_s16(v_expand_high(s23)), a3));
+ }
+ }
+ }
+ return dx;
+ }
+};
+
+typedef HResizeLinearVec_X4<float,float,float,v_float32x4> HResizeLinearVec_32f;
+typedef HResizeLinearVec_X4<ushort,float,float,v_float32x4> HResizeLinearVec_16u32f;
+typedef HResizeLinearVec_X4<short,float,float,v_float32x4> HResizeLinearVec_16s32f;
+typedef HResizeLinearVecU8_X4 HResizeLinearVec_8u32s;
+
+#else
+
typedef HResizeNoVec HResizeLinearVec_8u32s;
typedef HResizeNoVec HResizeLinearVec_16u32f;
typedef HResizeNoVec HResizeLinearVec_16s32f;
typedef HResizeNoVec HResizeLinearVec_32f;
+
+#endif
+
typedef HResizeNoVec HResizeLinearVec_64f;
int dx0 = vecOp((const uchar**)src, (uchar**)dst, count,
xofs, (const uchar*)alpha, swidth, dwidth, cn, xmin, xmax );
- for( k = 0; k <= count - 2; k++ )
+ for( k = 0; k <= count - 2; k+=2 )
{
const T *S0 = src[k], *S1 = src[k+1];
WT *D0 = dst[k], *D1 = dst[k+1];
{
const T *S = src[k];
WT *D = dst[k];
- for( dx = 0; dx < xmax; dx++ )
+ for( dx = dx0; dx < xmax; dx++ )
{
int sx = xofs[dx];
D[dx] = S[sx]*alpha[dx*2] + S[sx+cn]*alpha[dx*2+1];