#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
+#include <iostream>
+
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
static IppStatus sts = ippInit();
#endif
int cn, step;
};
-typedef ResizeAreaFastNoVec<ushort, ushort> ResizeAreaFastVec_SIMD_16u;
+class ResizeAreaFastVec_SIMD_16u
+{
+public:
+ ResizeAreaFastVec_SIMD_16u(int _cn, int _step) :
+ cn(_cn), step(_step)
+ {
+ }
+
+ int operator() (const ushort * S, ushort * D, int w) const
+ {
+ int dx = 0;
+ const ushort * S0 = S, * S1 = (const ushort *)((const uchar *)(S0) + step);
+
+ uint32x4_t v_2 = vdupq_n_u32(2);
+
+ if (cn == 1)
+ {
+ for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
+ {
+ uint16x8x2_t v_row0 = vld2q_u16(S0), v_row1 = vld2q_u16(S1);
+
+ uint32x4_t v_dst0 = vaddl_u16(vget_low_u16(v_row0.val[0]), vget_low_u16(v_row0.val[1]));
+ v_dst0 = vaddq_u32(v_dst0, vaddl_u16(vget_low_u16(v_row1.val[0]), vget_low_u16(v_row1.val[1])));
+ v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_2), 2);
+
+ uint32x4_t v_dst1 = vaddl_u16(vget_high_u16(v_row0.val[0]), vget_high_u16(v_row0.val[1]));
+ v_dst1 = vaddq_u32(v_dst1, vaddl_u16(vget_high_u16(v_row1.val[0]), vget_high_u16(v_row1.val[1])));
+ v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_2), 2);
+
+ vst1q_u16(D, vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1)));
+ }
+ }
+ else if (cn == 4)
+ {
+ for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
+ {
+ uint16x8_t v_row0 = vld1q_u16(S0), v_row1 = vld1q_u16(S1);
+ uint32x4_t v_dst = vaddq_u32(vaddl_u16(vget_low_u16(v_row0), vget_high_u16(v_row0)),
+ vaddl_u16(vget_low_u16(v_row1), vget_high_u16(v_row1)));
+ vst1_u16(D, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_2), 2)));
+ }
+ }
+
+ return dx;
+ }
+
+private:
+ int cn, step;
+};
#elif CV_SSE2
TEST(Resize, Area_half)
{
- int types[] = { CV_8UC1, CV_8UC4 };
+ const int size = 10;
+ int types[] = { CV_8UC1, CV_8UC4, CV_16UC1, CV_16UC4 };
- for (int i = 0, size = sizeof(types) / sizeof(types[0]); i < size; ++i)
+ cv::RNG rng(17);
+
+ for (int i = 0, _size = sizeof(types) / sizeof(types[0]); i < _size; ++i)
{
- int type = types[i];
- cv::Mat src(100, 100, type), dst_actual(50, 50, type), dst_reference(50, 50, type);
+ int type = types[i], depth = CV_MAT_DEPTH(type);
+
+ SCOPED_TRACE(depth);
+
+ cv::Mat src(size, size, type), dst_actual(size >> 1, size >> 1, type),
+ dst_reference(size >> 1, size >> 1, type);
+
+ rng.fill(src, cv::RNG::UNIFORM, 0, 1000, true);
- if (CV_MAT_DEPTH(type) == CV_8U)
+ if (depth == CV_8U)
resizeArea<uchar, ushort>(src, dst_reference);
+ else if (depth == CV_16U)
+ resizeArea<ushort, int>(src, dst_reference);
else
CV_Assert(0);