static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)
{
- const auto *in = src.InLine<uchar>(0);
- auto *out1 = dst1.OutLine<uchar>();
- auto *out2 = dst2.OutLine<uchar>();
- auto *out3 = dst3.OutLine<uchar>();
- auto *out4 = dst4.OutLine<uchar>();
+ const auto *in = src.InLine<uchar>(0);
+ auto *out1 = dst1.OutLine<uchar>();
+ auto *out2 = dst2.OutLine<uchar>();
+ auto *out3 = dst3.OutLine<uchar>();
+ auto *out4 = dst4.OutLine<uchar>();
GAPI_Assert(4 == src.meta().chan);
int width = src.length();
+ int w = 0;
- int w = 0; // cycle counter
-
- #if CV_SIMD128
- for (; w <= width-16; w+=16)
- {
- v_uint8x16 a, b, c, d;
- v_load_deinterleave(&in[4*w], a, b, c, d);
- v_store(&out1[w], a);
- v_store(&out2[w], b);
- v_store(&out3[w], c);
- v_store(&out4[w], d);
- }
+ #if CV_SIMD
+ w = split4_simd(in, out1, out2, out3, out4, width);
#endif
for (; w < width; w++)
CV_CPU_DISPATCH_MODES_ALL);
}
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+ uchar out3[], uchar out4[], const int width)
+{
+ CV_CPU_DISPATCH(split4_simd, (in, out1, out2, out3, out4, width),
+ CV_CPU_DISPATCH_MODES_ALL);
+}
+
} // namespace fluid
} // namespace gapi
} // namespace cv
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width);
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+ uchar out3[], uchar out4[], const int width);
+
} // namespace fluid
} // namespace gapi
} // namespace cv
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width);
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+ uchar out3[], uchar out4[], const int width);
+
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
struct scale_tag {};
uchar out3[], const int width)
{
constexpr int nlanes = v_uint8::nlanes;
+ if (width < nlanes)
+ return 0;
+
+ int x = 0;
+ for (;;)
+ {
+ for (; x <= width - nlanes; x += nlanes)
+ {
+ v_uint8 a, b, c;
+ v_load_deinterleave(&in[3 * x], a, b, c);
+ vx_store(&out1[x], a);
+ vx_store(&out2[x], b);
+ vx_store(&out3[x], c);
+ }
+ if (x < width)
+ {
+ x = width - nlanes;
+ continue;
+ }
+ break;
+ }
+ return x;
+}
+
+//-------------------------
+//
+// Fluid kernels: Split4
+//
+//-------------------------
+
+int split4_simd(const uchar in[], uchar out1[], uchar out2[],
+ uchar out3[], uchar out4[], const int width)
+{
+ constexpr int nlanes = v_uint8::nlanes;
+ if (width < nlanes)
+ return 0;
+
int x = 0;
- for (; x <= width - nlanes; x += nlanes)
+ for (;;)
{
- v_uint8 a, b, c;
- v_load_deinterleave(&in[3 * x], a, b, c);
- vx_store(&out1[x], a);
- vx_store(&out2[x], b);
- vx_store(&out3[x], c);
+ for (; x <= width - nlanes; x += nlanes)
+ {
+ v_uint8 a, b, c, d;
+ v_load_deinterleave(&in[4 * x], a, b, c, d);
+ vx_store(&out1[x], a);
+ vx_store(&out2[x], b);
+ vx_store(&out3[x], c);
+ vx_store(&out4[x], d);
+ }
+ if (x < width)
+ {
+ x = width - nlanes;
+ continue;
+ }
+ break;
}
return x;
}