1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2018-2019 Intel Corporation
7 #if !defined(GAPI_STANDALONE)
11 #include "opencv2/gapi/own/assert.hpp"
12 #include "opencv2/core/traits.hpp"
13 #include "opencv2/core/hal/hal.hpp"
14 #include "opencv2/core/hal/intrin.hpp"
16 #include "opencv2/gapi/core.hpp"
18 #include "opencv2/gapi/fluid/gfluidbuffer.hpp"
19 #include "opencv2/gapi/fluid/gfluidkernel.hpp"
20 #include "opencv2/gapi/fluid/core.hpp"
22 #include "gfluidbuffer_priv.hpp"
23 #include "gfluidbackend.hpp"
24 #include "gfluidutils.hpp"
34 //---------------------
36 // Arithmetic functions
38 //---------------------
40 template<typename DST, typename SRC1, typename SRC2>
41 static inline DST absdiff(SRC1 x, SRC2 y)
43 auto result = x > y? x - y: y - x;
44 return saturate<DST>(result, roundf);
47 template<typename DST, typename SRC1, typename SRC2>
48 static inline DST addWeighted(SRC1 src1, SRC2 src2, float alpha, float beta, float gamma)
50 float dst = src1*alpha + src2*beta + gamma;
51 return saturate<DST>(dst, roundf);
54 template<typename DST, typename SRC1, typename SRC2>
55 static inline DST add(SRC1 x, SRC2 y)
57 return saturate<DST>(x + y, roundf);
60 template<typename DST, typename SRC1, typename SRC2>
61 static inline DST sub(SRC1 x, SRC2 y)
63 return saturate<DST>(x - y, roundf);
66 template<typename DST, typename SRC1, typename SRC2>
67 static inline DST subr(SRC1 x, SRC2 y)
69 return saturate<DST>(y - x, roundf); // reverse: y - x
72 template<typename DST, typename SRC1, typename SRC2>
73 static inline DST mul(SRC1 x, SRC2 y, float scale=1)
75 auto result = scale * x * y;
76 return saturate<DST>(result, rintf);
79 template<typename DST, typename SRC1, typename SRC2>
80 static inline DST div(SRC1 x, SRC2 y, float scale=1)
82 // like OpenCV: returns 0, if y=0
83 auto result = y? scale * x / y: 0;
84 return saturate<DST>(result, rintf);
87 template<typename DST, typename SRC1, typename SRC2>
88 static inline DST divr(SRC1 x, SRC2 y, float scale=1)
90 auto result = x? scale * y / x: 0; // reverse: y / x
91 return saturate<DST>(result, rintf);
94 //---------------------------
96 // Fluid kernels: addWeighted
98 //---------------------------
100 template<typename DST, typename SRC1, typename SRC2>
101 static void run_addweighted(Buffer &dst, const View &src1, const View &src2,
102 double alpha, double beta, double gamma)
104 static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
106 const auto *in1 = src1.InLine<SRC1>(0);
107 const auto *in2 = src2.InLine<SRC2>(0);
108 auto *out = dst.OutLine<DST>();
110 int width = dst.length();
111 int chan = dst.meta().chan;
112 int length = width * chan;
114 // NB: assume in/out types are not 64-bits
115 auto _alpha = static_cast<float>( alpha );
116 auto _beta = static_cast<float>( beta );
117 auto _gamma = static_cast<float>( gamma );
119 for (int l=0; l < length; l++)
120 out[l] = addWeighted<DST>(in1[l], in2[l], _alpha, _beta, _gamma);
123 GAPI_FLUID_KERNEL(GFluidAddW, cv::gapi::core::GAddW, false)
125 static const int Window = 1;
127 static void run(const View &src1, double alpha, const View &src2,
128 double beta, double gamma, int /*dtype*/,
131 // DST SRC1 SRC2 OP __VA_ARGS__
132 BINARY_(uchar , uchar , uchar , run_addweighted, dst, src1, src2, alpha, beta, gamma);
133 BINARY_(uchar , ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);
134 BINARY_(uchar , short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);
135 BINARY_( short, short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);
136 BINARY_(ushort, ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);
137 BINARY_( float, uchar , uchar , run_addweighted, dst, src1, src2, alpha, beta, gamma);
138 BINARY_( float, ushort, ushort, run_addweighted, dst, src1, src2, alpha, beta, gamma);
139 BINARY_( float, short, short, run_addweighted, dst, src1, src2, alpha, beta, gamma);
141 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
145 //--------------------------
147 // Fluid kernels: +, -, *, /
149 //--------------------------
151 enum Arithm { ARITHM_ABSDIFF, ARITHM_ADD, ARITHM_SUBTRACT, ARITHM_MULTIPLY, ARITHM_DIVIDE };
153 template<typename DST, typename SRC1, typename SRC2>
154 static void run_arithm(Buffer &dst, const View &src1, const View &src2, Arithm arithm,
157 static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
159 const auto *in1 = src1.InLine<SRC1>(0);
160 const auto *in2 = src2.InLine<SRC2>(0);
161 auto *out = dst.OutLine<DST>();
163 int width = dst.length();
164 int chan = dst.meta().chan;
165 int length = width * chan;
167 // NB: assume in/out types are not 64-bits
168 float _scale = static_cast<float>( scale );
173 for (int l=0; l < length; l++)
174 out[l] = absdiff<DST>(in1[l], in2[l]);
177 for (int l=0; l < length; l++)
178 out[l] = add<DST>(in1[l], in2[l]);
180 case ARITHM_SUBTRACT:
181 for (int l=0; l < length; l++)
182 out[l] = sub<DST>(in1[l], in2[l]);
184 case ARITHM_MULTIPLY:
185 for (int l=0; l < length; l++)
186 out[l] = mul<DST>(in1[l], in2[l], _scale);
189 for (int l=0; l < length; l++)
190 out[l] = div<DST>(in1[l], in2[l], _scale);
192 default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
196 GAPI_FLUID_KERNEL(GFluidAdd, cv::gapi::core::GAdd, false)
198 static const int Window = 1;
200 static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst)
202 // DST SRC1 SRC2 OP __VA_ARGS__
203 BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD);
204 BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_ADD);
205 BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_ADD);
206 BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD);
207 BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD);
208 BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD);
209 BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD);
211 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
215 GAPI_FLUID_KERNEL(GFluidSub, cv::gapi::core::GSub, false)
217 static const int Window = 1;
219 static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst)
221 // DST SRC1 SRC2 OP __VA_ARGS__
222 BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
223 BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
224 BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
225 BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
226 BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
227 BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
228 BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT);
230 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
234 GAPI_FLUID_KERNEL(GFluidMul, cv::gapi::core::GMul, false)
236 static const int Window = 1;
238 static void run(const View &src1, const View &src2, double scale, int /*dtype*/, Buffer &dst)
240 // DST SRC1 SRC2 OP __VA_ARGS__
241 BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
242 BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
243 BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
244 BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
245 BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
246 BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
247 BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_MULTIPLY, scale);
249 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
253 GAPI_FLUID_KERNEL(GFluidDiv, cv::gapi::core::GDiv, false)
255 static const int Window = 1;
257 static void run(const View &src1, const View &src2, double scale, int /*dtype*/, Buffer &dst)
259 // DST SRC1 SRC2 OP __VA_ARGS__
260 BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
261 BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
262 BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
263 BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
264 BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
265 BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
266 BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_DIVIDE, scale);
268 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
272 GAPI_FLUID_KERNEL(GFluidAbsDiff, cv::gapi::core::GAbsDiff, false)
274 static const int Window = 1;
276 static void run(const View &src1, const View &src2, Buffer &dst)
278 // DST SRC1 SRC2 OP __VA_ARGS__
279 BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
280 BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
281 BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
282 BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ABSDIFF);
284 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
288 //--------------------------------------
290 // Fluid kernels: +, -, *, / with Scalar
292 //--------------------------------------
294 static inline v_uint16x8 v_add_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x + y; }
295 static inline v_uint16x8 v_sub_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x - y; }
296 static inline v_uint16x8 v_subr_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return y - x; }
298 static inline v_float32x4 v_add_32f(const v_float32x4 &x, const v_float32x4 &y) { return x + y; }
299 static inline v_float32x4 v_sub_32f(const v_float32x4 &x, const v_float32x4 &y) { return x - y; }
300 static inline v_float32x4 v_subr_32f(const v_float32x4 &x, const v_float32x4 &y) { return y - x; }
302 static inline int s_add_8u(uchar x, uchar y) { return x + y; }
303 static inline int s_sub_8u(uchar x, uchar y) { return x - y; }
304 static inline int s_subr_8u(uchar x, uchar y) { return y - x; }
306 static inline float s_add_32f(float x, float y) { return x + y; }
307 static inline float s_sub_32f(float x, float y) { return x - y; }
308 static inline float s_subr_32f(float x, float y) { return y - x; }
310 // manual SIMD if important case 8UC3
311 static void run_arithm_s3(uchar out[], const uchar in[], int width, const uchar scalar[],
312 v_uint16x8 (*v_op)(const v_uint16x8&, const v_uint16x8&),
313 int (*s_op)(uchar, uchar))
318 for (; w <= width-16; w+=16)
321 v_load_deinterleave(&in[3*w], x, y, z);
326 r0 = v_op(r0, v_setall_u16(scalar[0])); // x + scalar[0]
327 r1 = v_op(r1, v_setall_u16(scalar[0]));
331 r0 = v_op(r0, v_setall_u16(scalar[1])); // y + scalar[1]
332 r1 = v_op(r1, v_setall_u16(scalar[1]));
336 r0 = v_op(r0, v_setall_u16(scalar[2])); // z + scalar[2]
337 r1 = v_op(r1, v_setall_u16(scalar[2]));
340 v_store_interleave(&out[3*w], x, y, z);
343 cv::util::suppress_unused_warning(v_op);
344 for (; w < width; w++)
346 out[3*w ] = saturate<uchar>( s_op(in[3*w ], scalar[0]) );
347 out[3*w + 1] = saturate<uchar>( s_op(in[3*w + 1], scalar[1]) );
348 out[3*w + 2] = saturate<uchar>( s_op(in[3*w + 2], scalar[2]) );
352 // manually SIMD if rounding 32F into 8U, single channel
353 static void run_arithm_s1(uchar out[], const float in[], int width, const float scalar[],
354 v_float32x4 (*v_op)(const v_float32x4&, const v_float32x4&),
355 float (*s_op)(float, float))
360 for (; w <= width-16; w+=16)
362 v_float32x4 r0, r1, r2, r3;
363 r0 = v_load(&in[w ]);
364 r1 = v_load(&in[w + 4]);
365 r2 = v_load(&in[w + 8]);
366 r3 = v_load(&in[w + 12]);
368 r0 = v_op(r0, v_setall_f32(scalar[0])); // r + scalar[0]
369 r1 = v_op(r1, v_setall_f32(scalar[0]));
370 r2 = v_op(r2, v_setall_f32(scalar[0]));
371 r3 = v_op(r3, v_setall_f32(scalar[0]));
373 v_int32x4 i0, i1, i2, i3;
380 us0 = v_pack_u(i0, i1);
381 us1 = v_pack_u(i2, i3);
384 uc = v_pack(us0, us1);
386 v_store(&out[w], uc);
389 cv::util::suppress_unused_warning(v_op);
390 for (; w < width; w++)
392 out[w] = saturate<uchar>(s_op(in[w], scalar[0]), std::roundf);
396 static void run_arithm_s_add3(uchar out[], const uchar in[], int width, const uchar scalar[])
398 run_arithm_s3(out, in, width, scalar, v_add_16u, s_add_8u);
401 static void run_arithm_s_sub3(uchar out[], const uchar in[], int width, const uchar scalar[])
403 run_arithm_s3(out, in, width, scalar, v_sub_16u, s_sub_8u);
406 static void run_arithm_s_subr3(uchar out[], const uchar in[], int width, const uchar scalar[])
408 run_arithm_s3(out, in, width, scalar, v_subr_16u, s_subr_8u); // reverse: subr
411 static void run_arithm_s_add1(uchar out[], const float in[], int width, const float scalar[])
413 run_arithm_s1(out, in, width, scalar, v_add_32f, s_add_32f);
416 static void run_arithm_s_sub1(uchar out[], const float in[], int width, const float scalar[])
418 run_arithm_s1(out, in, width, scalar, v_sub_32f, s_sub_32f);
421 static void run_arithm_s_subr1(uchar out[], const float in[], int width, const float scalar[])
423 run_arithm_s1(out, in, width, scalar, v_subr_32f, s_subr_32f); // reverse: subr
426 // manually unroll the inner cycle by channels
427 template<typename DST, typename SRC, typename SCALAR, typename FUNC>
428 static void run_arithm_s(DST out[], const SRC in[], int width, int chan,
429 const SCALAR scalar[4], FUNC func)
433 for (int w=0; w < width; w++)
435 out[4*w + 0] = func(in[4*w + 0], scalar[0]);
436 out[4*w + 1] = func(in[4*w + 1], scalar[1]);
437 out[4*w + 2] = func(in[4*w + 2], scalar[2]);
438 out[4*w + 3] = func(in[4*w + 3], scalar[3]);
444 for (int w=0; w < width; w++)
446 out[3*w + 0] = func(in[3*w + 0], scalar[0]);
447 out[3*w + 1] = func(in[3*w + 1], scalar[1]);
448 out[3*w + 2] = func(in[3*w + 2], scalar[2]);
454 for (int w=0; w < width; w++)
456 out[2*w + 0] = func(in[2*w + 0], scalar[0]);
457 out[2*w + 1] = func(in[2*w + 1], scalar[1]);
463 for (int w=0; w < width; w++)
465 out[w] = func(in[w], scalar[0]);
469 CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
472 template<typename DST, typename SRC>
473 static void run_arithm_s(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
476 const auto *in = src.InLine<SRC>(0);
477 auto *out = dst.OutLine<DST>();
479 int width = dst.length();
480 int chan = dst.meta().chan;
482 // What if we cast the scalar into the SRC type?
483 const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),
484 static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };
485 bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&
486 (myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);
491 for (int w=0; w < width; w++)
492 for (int c=0; c < chan; c++)
493 out[chan*w + c] = absdiff<DST>(in[chan*w + c], scalar[c]);
498 if (std::is_same<DST,uchar>::value &&
499 std::is_same<SRC,uchar>::value &&
501 run_arithm_s_add3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
502 else if (std::is_same<DST,uchar>::value &&
503 std::is_same<SRC,float>::value &&
505 run_arithm_s_add1((uchar*)out, (const float*)in, width, (const float*)myscal);
507 run_arithm_s(out, in, width, chan, myscal, add<DST,SRC,SRC>);
510 run_arithm_s(out, in, width, chan, scalar, add<DST,SRC,float>);
512 case ARITHM_SUBTRACT:
515 if (std::is_same<DST,uchar>::value &&
516 std::is_same<SRC,uchar>::value &&
518 run_arithm_s_sub3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
519 else if (std::is_same<DST,uchar>::value &&
520 std::is_same<SRC,float>::value &&
522 run_arithm_s_sub1((uchar*)out, (const float*)in, width, (const float*)myscal);
524 run_arithm_s(out, in, width, chan, myscal, sub<DST,SRC,SRC>);
527 run_arithm_s(out, in, width, chan, scalar, sub<DST,SRC,float>);
529 // TODO: optimize miltiplication and division
530 case ARITHM_MULTIPLY:
531 for (int w=0; w < width; w++)
532 for (int c=0; c < chan; c++)
533 out[chan*w + c] = mul<DST>(in[chan*w + c], scalar[c], scale);
536 for (int w=0; w < width; w++)
537 for (int c=0; c < chan; c++)
538 out[chan*w + c] = div<DST>(in[chan*w + c], scalar[c], scale);
540 default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
544 template<typename DST, typename SRC>
545 static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
548 const auto *in = src.InLine<SRC>(0);
549 auto *out = dst.OutLine<DST>();
551 int width = dst.length();
552 int chan = dst.meta().chan;
554 // What if we cast the scalar into the SRC type?
555 const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),
556 static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };
557 bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&
558 (myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);
562 case ARITHM_SUBTRACT:
565 if (std::is_same<DST,uchar>::value &&
566 std::is_same<SRC,uchar>::value &&
568 run_arithm_s_subr3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
569 else if (std::is_same<DST,uchar>::value &&
570 std::is_same<SRC,float>::value &&
572 run_arithm_s_subr1((uchar*)out, (const float*)in, width, (const float*)myscal);
574 run_arithm_s(out, in, width, chan, myscal, subr<DST,SRC,SRC>);
577 run_arithm_s(out, in, width, chan, scalar, subr<DST,SRC,float>);
579 // TODO: optimize division
581 for (int w=0; w < width; w++)
582 for (int c=0; c < chan; c++)
583 out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale);
585 default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
589 GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, false)
591 static const int Window = 1;
593 static void run(const View &src, const cv::Scalar &_scalar, Buffer &dst)
595 const float scalar[4] = {
596 static_cast<float>(_scalar[0]),
597 static_cast<float>(_scalar[1]),
598 static_cast<float>(_scalar[2]),
599 static_cast<float>(_scalar[3])
602 // DST SRC OP __VA_ARGS__
603 UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);
604 UNARY_(ushort, ushort, run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);
605 UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_ABSDIFF);
607 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
611 GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, false)
613 static const int Window = 1;
615 static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
617 const float scalar[4] = {
618 static_cast<float>(_scalar[0]),
619 static_cast<float>(_scalar[1]),
620 static_cast<float>(_scalar[2]),
621 static_cast<float>(_scalar[3])
624 // DST SRC OP __VA_ARGS__
625 UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_ADD);
626 UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_ADD);
627 UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_ADD);
628 UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_ADD);
629 UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_ADD);
630 UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_ADD);
631 UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_ADD);
633 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
637 GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, false)
639 static const int Window = 1;
641 static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
643 const float scalar[4] = {
644 static_cast<float>(_scalar[0]),
645 static_cast<float>(_scalar[1]),
646 static_cast<float>(_scalar[2]),
647 static_cast<float>(_scalar[3])
650 // DST SRC OP __VA_ARGS__
651 UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
652 UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
653 UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
654 UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
655 UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
656 UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
657 UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
659 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
663 GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, false)
665 static const int Window = 1;
667 static void run(const cv::Scalar &_scalar, const View &src, int /*dtype*/, Buffer &dst)
669 const float scalar[4] = {
670 static_cast<float>(_scalar[0]),
671 static_cast<float>(_scalar[1]),
672 static_cast<float>(_scalar[2]),
673 static_cast<float>(_scalar[3])
676 // DST SRC OP __VA_ARGS__
677 UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
678 UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
679 UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
680 UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
681 UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
682 UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
683 UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_SUBTRACT);
685 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
689 GAPI_FLUID_KERNEL(GFluidMulC, cv::gapi::core::GMulC, false)
691 static const int Window = 1;
693 static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
695 const float scalar[4] = {
696 static_cast<float>(_scalar[0]),
697 static_cast<float>(_scalar[1]),
698 static_cast<float>(_scalar[2]),
699 static_cast<float>(_scalar[3])
701 const float scale = 1.f;
703 // DST SRC OP __VA_ARGS__
704 UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
705 UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
706 UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
707 UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
708 UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
709 UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
710 UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
712 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
716 GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, false)
718 static const int Window = 1;
720 static void run(const View &src, double _scalar, int /*dtype*/, Buffer &dst)
722 const float scalar[4] = {
723 static_cast<float>(_scalar),
724 static_cast<float>(_scalar),
725 static_cast<float>(_scalar),
726 static_cast<float>(_scalar)
728 const float scale = 1.f;
730 // DST SRC OP __VA_ARGS__
731 UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
732 UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
733 UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
734 UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
735 UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
736 UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
737 UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_MULTIPLY, scale);
739 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
743 GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, false)
745 static const int Window = 1;
747 static void run(const View &src, const cv::Scalar &_scalar, double _scale, int /*dtype*/,
750 const float scalar[4] = {
751 static_cast<float>(_scalar[0]),
752 static_cast<float>(_scalar[1]),
753 static_cast<float>(_scalar[2]),
754 static_cast<float>(_scalar[3])
756 const float scale = static_cast<float>(_scale);
758 // DST SRC OP __VA_ARGS__
759 UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
760 UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
761 UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
762 UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
763 UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
764 UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
765 UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale);
767 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
771 GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)
773 static const int Window = 1;
775 static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/,
778 const float scalar[4] = {
779 static_cast<float>(_scalar[0]),
780 static_cast<float>(_scalar[1]),
781 static_cast<float>(_scalar[2]),
782 static_cast<float>(_scalar[3])
784 const float scale = static_cast<float>(_scale);
786 // DST SRC OP __VA_ARGS__
787 UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
788 UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
789 UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
790 UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
791 UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
792 UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
793 UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
795 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
799 //----------------------------
801 // Fluid math kernels: bitwise
803 //----------------------------
805 enum Bitwise { BW_AND, BW_OR, BW_XOR, BW_NOT };
807 template<typename DST, typename SRC1, typename SRC2>
808 static void run_bitwise2(Buffer &dst, const View &src1, const View &src2, Bitwise bitwise)
810 static_assert(std::is_same<DST, SRC1>::value, "wrong types");
811 static_assert(std::is_same<DST, SRC2>::value, "wrong types");
813 const auto *in1 = src1.InLine<SRC1>(0);
814 const auto *in2 = src2.InLine<SRC2>(0);
815 auto *out = dst.OutLine<DST>();
817 int width = dst.length();
818 int chan = dst.meta().chan;
819 int length = width * chan;
824 for (int l=0; l < length; l++)
825 out[l] = in1[l] & in2[l];
828 for (int l=0; l < length; l++)
829 out[l] = in1[l] | in2[l];
832 for (int l=0; l < length; l++)
833 out[l] = in1[l] ^ in2[l];
835 default: CV_Error(cv::Error::StsBadArg, "unsupported bitwise operation");
839 template<typename DST, typename SRC>
840 static void run_bitwise1(Buffer &dst, const View &src, Bitwise bitwise)
842 static_assert(std::is_same<DST, SRC>::value, "wrong types");
844 const auto *in = src.InLine<SRC>(0);
845 auto *out = dst.OutLine<DST>();
847 int width = dst.length();
848 int chan = dst.meta().chan;
849 int length = width * chan;
854 for (int l=0; l < length; l++)
857 default: CV_Error(cv::Error::StsBadArg, "unsupported bitwise operation");
861 GAPI_FLUID_KERNEL(GFluidAnd, cv::gapi::core::GAnd, false)
863 static const int Window = 1;
865 static void run(const View &src1, const View &src2, Buffer &dst)
868 // DST SRC1 SRC2 OP __VA_ARGS__
869 BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_AND);
870 BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_AND);
871 BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_AND);
873 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
877 GAPI_FLUID_KERNEL(GFluidOr, cv::gapi::core::GOr, false)
879 static const int Window = 1;
881 static void run(const View &src1, const View &src2, Buffer &dst)
884 // DST SRC1 SRC2 OP __VA_ARGS__
885 BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_OR);
886 BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_OR);
887 BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_OR);
889 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
893 GAPI_FLUID_KERNEL(GFluidXor, cv::gapi::core::GXor, false)
895 static const int Window = 1;
897 static void run(const View &src1, const View &src2, Buffer &dst)
900 // DST SRC1 SRC2 OP __VA_ARGS__
901 BINARY_(uchar , uchar , uchar , run_bitwise2, dst, src1, src2, BW_XOR);
902 BINARY_(ushort, ushort, ushort, run_bitwise2, dst, src1, src2, BW_XOR);
903 BINARY_( short, short, short, run_bitwise2, dst, src1, src2, BW_XOR);
905 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
909 GAPI_FLUID_KERNEL(GFluidNot, cv::gapi::core::GNot, false)
911 static const int Window = 1;
913 static void run(const View &src, Buffer &dst)
915 // DST SRC OP __VA_ARGS__
916 UNARY_(uchar , uchar , run_bitwise1, dst, src, BW_NOT);
917 UNARY_(ushort, ushort, run_bitwise1, dst, src, BW_NOT);
918 UNARY_( short, short, run_bitwise1, dst, src, BW_NOT);
920 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
924 //-------------------
926 // Fluid kernels: LUT
928 //-------------------
930 GAPI_FLUID_KERNEL(GFluidLUT, cv::gapi::core::GLUT, false)
932 static const int Window = 1;
934 static void run(const View &src, const cv::Mat& lut, Buffer &dst)
936 GAPI_Assert(CV_8U == dst.meta().depth);
937 GAPI_Assert(CV_8U == src.meta().depth);
939 GAPI_DbgAssert(CV_8U == lut.type());
940 GAPI_DbgAssert(256 == lut.cols * lut.rows);
941 GAPI_DbgAssert(dst.length() == src.length());
942 GAPI_DbgAssert(dst.meta().chan == src.meta().chan);
944 const auto *in = src.InLine<uchar>(0);
945 auto *out = dst.OutLine<uchar>();
947 int width = dst.length();
948 int chan = dst.meta().chan;
949 int length = width * chan;
951 for (int l=0; l < length; l++)
952 out[l] = lut.data[ in[l] ];
956 //-------------------------
958 // Fluid kernels: convertTo
960 //-------------------------
962 template<typename DST, typename SRC>
963 static void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta)
965 const auto *in = src.InLine<SRC>(0);
966 auto *out = dst.OutLine<DST>();
968 int width = dst.length();
969 int chan = dst.meta().chan;
970 int length = width * chan;
972 // NB: don't do this if SRC or DST is 64-bit
973 auto alpha = static_cast<float>( _alpha );
974 auto beta = static_cast<float>( _beta );
976 // compute faster if no alpha no beta
977 if (alpha == 1 && beta == 0)
979 // manual SIMD if need rounding
980 if (std::is_integral<DST>::value && std::is_floating_point<SRC>::value)
982 GAPI_Assert(( std::is_same<SRC,float>::value ));
984 int l = 0; // cycle index
987 if (std::is_same<DST,uchar>::value)
989 for (; l <= length-16; l+=16)
991 v_int32x4 i0, i1, i2, i3;
992 i0 = v_round( v_load( (float*)& in[l ] ) );
993 i1 = v_round( v_load( (float*)& in[l + 4] ) );
994 i2 = v_round( v_load( (float*)& in[l + 8] ) );
995 i3 = v_round( v_load( (float*)& in[l + 12] ) );
998 us0 = v_pack_u(i0, i1);
999 us1 = v_pack_u(i2, i3);
1002 uc = v_pack(us0, us1);
1003 v_store((uchar*)& out[l], uc);
1006 if (std::is_same<DST,ushort>::value)
1008 for (; l <= length-8; l+=8)
1011 i0 = v_round( v_load( (float*)& in[l ] ) );
1012 i1 = v_round( v_load( (float*)& in[l + 4] ) );
1015 us = v_pack_u(i0, i1);
1016 v_store((ushort*)& out[l], us);
1021 // tail of SIMD cycle
1022 for (; l < length; l++)
1024 out[l] = saturate<DST>(in[l], rintf);
1027 else if (std::is_integral<DST>::value) // here SRC is integral
1029 for (int l=0; l < length; l++)
1031 out[l] = saturate<DST>(in[l]);
1034 else // DST is floating-point, SRC is any
1036 for (int l=0; l < length; l++)
1038 out[l] = static_cast<DST>(in[l]);
1042 else // if alpha or beta is non-trivial
1044 // TODO: optimize if alpha and beta and data are integral
1045 for (int l=0; l < length; l++)
1047 out[l] = saturate<DST>(in[l]*alpha + beta, rintf);
1052 GAPI_FLUID_KERNEL(GFluidConvertTo, cv::gapi::core::GConvertTo, false)
1054 static const int Window = 1;
1056 static void run(const View &src, int /*rtype*/, double alpha, double beta, Buffer &dst)
1058 // DST SRC OP __VA_ARGS__
1059 UNARY_(uchar , uchar , run_convertto, dst, src, alpha, beta);
1060 UNARY_(uchar , ushort, run_convertto, dst, src, alpha, beta);
1061 UNARY_(uchar , float, run_convertto, dst, src, alpha, beta);
1062 UNARY_(ushort, uchar , run_convertto, dst, src, alpha, beta);
1063 UNARY_(ushort, ushort, run_convertto, dst, src, alpha, beta);
1064 UNARY_(ushort, float, run_convertto, dst, src, alpha, beta);
1065 UNARY_( float, uchar , run_convertto, dst, src, alpha, beta);
1066 UNARY_( float, ushort, run_convertto, dst, src, alpha, beta);
1067 UNARY_( float, float, run_convertto, dst, src, alpha, beta);
1069 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1073 //-----------------------------
1075 // Fluid math kernels: min, max
1077 //-----------------------------
1079 enum Minmax { MM_MIN, MM_MAX };
1081 template<typename DST, typename SRC1, typename SRC2>
1082 static void run_minmax(Buffer &dst, const View &src1, const View &src2, Minmax minmax)
1084 static_assert(std::is_same<DST, SRC1>::value, "wrong types");
1085 static_assert(std::is_same<DST, SRC2>::value, "wrong types");
1087 const auto *in1 = src1.InLine<SRC1>(0);
1088 const auto *in2 = src2.InLine<SRC2>(0);
1089 auto *out = dst.OutLine<DST>();
1091 int width = dst.length();
1092 int chan = dst.meta().chan;
1094 int length = width * chan;
1099 for (int l=0; l < length; l++)
1100 out[l] = in1[l] < in2[l]? in1[l]: in2[l];
1103 for (int l=0; l < length; l++)
1104 out[l] = in1[l] > in2[l]? in1[l]: in2[l];
1106 default: CV_Error(cv::Error::StsBadArg, "unsupported min/max operation");
1110 GAPI_FLUID_KERNEL(GFluidMin, cv::gapi::core::GMin, false)
1112 static const int Window = 1;
1114 static void run(const View &src1, const View &src2, Buffer &dst)
1116 // DST SRC1 SRC2 OP __VA_ARGS__
1117 BINARY_(uchar , uchar , uchar , run_minmax, dst, src1, src2, MM_MIN);
1118 BINARY_(ushort, ushort, ushort, run_minmax, dst, src1, src2, MM_MIN);
1119 BINARY_( short, short, short, run_minmax, dst, src1, src2, MM_MIN);
1120 BINARY_( float, float, float, run_minmax, dst, src1, src2, MM_MIN);
1122 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1126 GAPI_FLUID_KERNEL(GFluidMax, cv::gapi::core::GMax, false)
1128 static const int Window = 1;
1130 static void run(const View &src1, const View &src2, Buffer &dst)
1132 // DST SRC1 SRC2 OP __VA_ARGS__
1133 BINARY_(uchar , uchar , uchar , run_minmax, dst, src1, src2, MM_MAX);
1134 BINARY_(ushort, ushort, ushort, run_minmax, dst, src1, src2, MM_MAX);
1135 BINARY_( short, short, short, run_minmax, dst, src1, src2, MM_MAX);
1136 BINARY_( float, float, float, run_minmax, dst, src1, src2, MM_MAX);
1138 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1142 //-----------------------
1144 // Fluid kernels: compare
1146 //-----------------------
1148 enum Compare { CMP_EQ, CMP_NE, CMP_GE, CMP_GT, CMP_LE, CMP_LT };
1150 template<typename DST, typename SRC1, typename SRC2>
1151 static void run_cmp(Buffer &dst, const View &src1, const View &src2, Compare compare)
1153 static_assert(std::is_same<SRC1, SRC2>::value, "wrong types");
1154 static_assert(std::is_same<DST, uchar>::value, "wrong types");
1156 const auto *in1 = src1.InLine<SRC1>(0);
1157 const auto *in2 = src2.InLine<SRC2>(0);
1158 auto *out = dst.OutLine<DST>();
1160 int width = dst.length();
1161 int chan = dst.meta().chan;
1163 int length = width * chan;
1168 for (int l=0; l < length; l++)
1169 out[l] = in1[l] == in2[l]? 255: 0;
1172 for (int l=0; l < length; l++)
1173 out[l] = in1[l] != in2[l]? 255: 0;
1176 for (int l=0; l < length; l++)
1177 out[l] = in1[l] >= in2[l]? 255: 0;
1180 for (int l=0; l < length; l++)
1181 out[l] = in1[l] <= in2[l]? 255: 0;
1184 for (int l=0; l < length; l++)
1185 out[l] = in1[l] > in2[l]? 255: 0;
1188 for (int l=0; l < length; l++)
1189 out[l] = in1[l] < in2[l]? 255: 0;
1192 CV_Error(cv::Error::StsBadArg, "unsupported compare operation");
1196 GAPI_FLUID_KERNEL(GFluidCmpEQ, cv::gapi::core::GCmpEQ, false)
1198 static const int Window = 1;
1200 static void run(const View &src1, const View &src2, Buffer &dst)
1202 // DST SRC1 SRC2 OP __VA_ARGS__
1203 BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_EQ);
1204 BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_EQ);
1205 BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_EQ);
1207 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1211 GAPI_FLUID_KERNEL(GFluidCmpNE, cv::gapi::core::GCmpNE, false)
1213 static const int Window = 1;
1215 static void run(const View &src1, const View &src2, Buffer &dst)
1217 // DST SRC1 SRC2 OP __VA_ARGS__
1218 BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_NE);
1219 BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_NE);
1220 BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_NE);
1222 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1226 GAPI_FLUID_KERNEL(GFluidCmpGE, cv::gapi::core::GCmpGE, false)
1228 static const int Window = 1;
1230 static void run(const View &src1, const View &src2, Buffer &dst)
1232 // DST SRC1 SRC2 OP __VA_ARGS__
1233 BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_GE);
1234 BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_GE);
1235 BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_GE);
1237 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1241 GAPI_FLUID_KERNEL(GFluidCmpGT, cv::gapi::core::GCmpGT, false)
1243 static const int Window = 1;
1245 static void run(const View &src1, const View &src2, Buffer &dst)
1247 // DST SRC1 SRC2 OP __VA_ARGS__
1248 BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_GT);
1249 BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_GT);
1250 BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_GT);
1252 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1256 GAPI_FLUID_KERNEL(GFluidCmpLE, cv::gapi::core::GCmpLE, false)
1258 static const int Window = 1;
1260 static void run(const View &src1, const View &src2, Buffer &dst)
1262 // DST SRC1 SRC2 OP __VA_ARGS__
1263 BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_LE);
1264 BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_LE);
1265 BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_LE);
1267 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1271 GAPI_FLUID_KERNEL(GFluidCmpLT, cv::gapi::core::GCmpLT, false)
1273 static const int Window = 1;
1275 static void run(const View &src1, const View &src2, Buffer &dst)
1277 // DST SRC1 SRC2 OP __VA_ARGS__
1278 BINARY_(uchar, uchar , uchar , run_cmp, dst, src1, src2, CMP_LT);
1279 BINARY_(uchar, short, short, run_cmp, dst, src1, src2, CMP_LT);
1280 BINARY_(uchar, float, float, run_cmp, dst, src1, src2, CMP_LT);
1282 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1286 //---------------------
1288 // Compare with GScalar
1290 //---------------------
1292 template<typename DST, typename SRC, typename SCALAR=double>
1293 static void run_cmp(DST out[], const SRC in[], int length, Compare compare, SCALAR s)
1298 for (int l=0; l < length; l++)
1299 out[l] = in[l] == s? 255: 0;
1302 for (int l=0; l < length; l++)
1303 out[l] = in[l] != s? 255: 0;
1306 for (int l=0; l < length; l++)
1307 out[l] = in[l] >= s? 255: 0;
1310 for (int l=0; l < length; l++)
1311 out[l] = in[l] <= s? 255: 0;
1314 for (int l=0; l < length; l++)
1315 out[l] = in[l] > s? 255: 0;
1318 for (int l=0; l < length; l++)
1319 out[l] = in[l] < s? 255: 0;
1322 CV_Error(cv::Error::StsBadArg, "unsupported compare operation");
1326 template<typename DST, typename SRC>
1327 static void run_cmp(Buffer &dst, const View &src, Compare compare, const cv::Scalar &scalar)
1329 static_assert(std::is_same<DST, uchar>::value, "wrong types");
1331 const auto *in = src.InLine<SRC>(0);
1332 auto *out = dst.OutLine<DST>();
1334 int width = dst.length();
1335 int chan = dst.meta().chan;
1337 int length = width * chan;
1339 // compute faster if scalar rounds to SRC
1340 double d = scalar[0] ;
1341 SRC s = static_cast<SRC>( scalar[0] );
1344 run_cmp(out, in, length, compare, s);
1346 run_cmp(out, in, length, compare, d);
1349 GAPI_FLUID_KERNEL(GFluidCmpEQScalar, cv::gapi::core::GCmpEQScalar, false)
1351 static const int Window = 1;
1353 static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1355 // DST SRC OP __VA_ARGS__
1356 UNARY_(uchar, uchar , run_cmp, dst, src, CMP_EQ, scalar);
1357 UNARY_(uchar, short, run_cmp, dst, src, CMP_EQ, scalar);
1358 UNARY_(uchar, float, run_cmp, dst, src, CMP_EQ, scalar);
1360 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1364 GAPI_FLUID_KERNEL(GFluidCmpNEScalar, cv::gapi::core::GCmpNEScalar, false)
1366 static const int Window = 1;
1368 static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1370 // DST SRC OP __VA_ARGS__
1371 UNARY_(uchar, uchar , run_cmp, dst, src, CMP_NE, scalar);
1372 UNARY_(uchar, short, run_cmp, dst, src, CMP_NE, scalar);
1373 UNARY_(uchar, float, run_cmp, dst, src, CMP_NE, scalar);
1375 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1379 GAPI_FLUID_KERNEL(GFluidCmpGEScalar, cv::gapi::core::GCmpGEScalar, false)
1381 static const int Window = 1;
1383 static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1385 // DST SRC OP __VA_ARGS__
1386 UNARY_(uchar, uchar , run_cmp, dst, src, CMP_GE, scalar);
1387 UNARY_(uchar, short, run_cmp, dst, src, CMP_GE, scalar);
1388 UNARY_(uchar, float, run_cmp, dst, src, CMP_GE, scalar);
1390 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1394 GAPI_FLUID_KERNEL(GFluidCmpGTScalar, cv::gapi::core::GCmpGTScalar, false)
1396 static const int Window = 1;
1398 static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1400 // DST SRC OP __VA_ARGS__
1401 UNARY_(uchar, uchar , run_cmp, dst, src, CMP_GT, scalar);
1402 UNARY_(uchar, short, run_cmp, dst, src, CMP_GT, scalar);
1403 UNARY_(uchar, float, run_cmp, dst, src, CMP_GT, scalar);
1405 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1409 GAPI_FLUID_KERNEL(GFluidCmpLEScalar, cv::gapi::core::GCmpLEScalar, false)
1411 static const int Window = 1;
1413 static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1415 // DST SRC OP __VA_ARGS__
1416 UNARY_(uchar, uchar , run_cmp, dst, src, CMP_LE, scalar);
1417 UNARY_(uchar, short, run_cmp, dst, src, CMP_LE, scalar);
1418 UNARY_(uchar, float, run_cmp, dst, src, CMP_LE, scalar);
1420 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1424 GAPI_FLUID_KERNEL(GFluidCmpLTScalar, cv::gapi::core::GCmpLTScalar, false)
1426 static const int Window = 1;
1428 static void run(const View &src, const cv::Scalar &scalar, Buffer &dst)
1430 // DST SRC OP __VA_ARGS__
1431 UNARY_(uchar, uchar , run_cmp, dst, src, CMP_LT, scalar);
1432 UNARY_(uchar, short, run_cmp, dst, src, CMP_LT, scalar);
1433 UNARY_(uchar, float, run_cmp, dst, src, CMP_LT, scalar);
1435 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1439 //-------------------------
1441 // Fluid kernels: threshold
1443 //-------------------------
1445 template<typename DST, typename SRC>
1446 static void run_threshold(Buffer &dst, const View &src, const cv::Scalar &thresh,
1447 const cv::Scalar &maxval,
1450 static_assert(std::is_same<DST, SRC>::value, "wrong types");
1452 const auto *in = src.InLine<SRC>(0);
1453 auto *out = dst.OutLine<DST>();
1455 int width = dst.length();
1456 int chan = dst.meta().chan;
1458 int length = width * chan;
1460 DST thresh_ = saturate<DST>(thresh[0], floord);
1461 DST threshd = saturate<DST>(thresh[0], roundd);
1462 DST maxvald = saturate<DST>(maxval[0], roundd);
1466 case cv::THRESH_BINARY:
1467 for (int l=0; l < length; l++)
1468 out[l] = in[l] > thresh_? maxvald: 0;
1470 case cv::THRESH_BINARY_INV:
1471 for (int l=0; l < length; l++)
1472 out[l] = in[l] > thresh_? 0: maxvald;
1474 case cv::THRESH_TRUNC:
1475 for (int l=0; l < length; l++)
1476 out[l] = in[l] > thresh_? threshd: in[l];
1478 case cv::THRESH_TOZERO:
1479 for (int l=0; l < length; l++)
1480 out[l] = in[l] > thresh_? in[l]: 0;
1482 case cv::THRESH_TOZERO_INV:
1483 for (int l=0; l < length; l++)
1484 out[l] = in[l] > thresh_? 0: in[l];
1486 default: CV_Error(cv::Error::StsBadArg, "unsupported threshold type");
1490 GAPI_FLUID_KERNEL(GFluidThreshold, cv::gapi::core::GThreshold, false)
1492 static const int Window = 1;
1494 static void run(const View &src, const cv::Scalar &thresh,
1495 const cv::Scalar &maxval,
1499 // DST SRC OP __VA_ARGS__
1500 UNARY_(uchar , uchar , run_threshold, dst, src, thresh, maxval, type);
1501 UNARY_(ushort, ushort, run_threshold, dst, src, thresh, maxval, type);
1502 UNARY_( short, short, run_threshold, dst, src, thresh, maxval, type);
1504 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1508 //------------------------
1510 // Fluid kernels: in-range
1512 //------------------------
1514 static void run_inrange3(uchar out[], const uchar in[], int width,
1515 const uchar lower[], const uchar upper[])
1517 int w = 0; // cycle index
1520 for (; w <= width-16; w+=16)
1522 v_uint8x16 i0, i1, i2;
1523 v_load_deinterleave(&in[3*w], i0, i1, i2);
1526 o = (i0 >= v_setall_u8(lower[0])) & (i0 <= v_setall_u8(upper[0])) &
1527 (i1 >= v_setall_u8(lower[1])) & (i1 <= v_setall_u8(upper[1])) &
1528 (i2 >= v_setall_u8(lower[2])) & (i2 <= v_setall_u8(upper[2]));
1530 v_store(&out[w], o);
1534 for (; w < width; w++)
1536 out[w] = in[3*w ] >= lower[0] && in[3*w ] <= upper[0] &&
1537 in[3*w+1] >= lower[1] && in[3*w+1] <= upper[1] &&
1538 in[3*w+2] >= lower[2] && in[3*w+2] <= upper[2] ? 255: 0;
1542 template<typename DST, typename SRC>
1543 static void run_inrange(Buffer &dst, const View &src, const cv::Scalar &upperb,
1544 const cv::Scalar &lowerb)
1546 static_assert(std::is_same<DST, uchar>::value, "wrong types");
1548 const auto *in = src.InLine<SRC>(0);
1549 auto *out = dst.OutLine<DST>();
1551 int width = src.length();
1552 int chan = src.meta().chan;
1553 GAPI_Assert(dst.meta().chan == 1);
1555 SRC lower[4], upper[4];
1556 for (int c=0; c < chan; c++)
1558 if (std::is_integral<SRC>::value)
1560 // for integral input, in[i] >= lower equals in[i] >= ceil(lower)
1561 // so we can optimize compare operations by rounding lower/upper
1562 lower[c] = saturate<SRC>(lowerb[c], ceild);
1563 upper[c] = saturate<SRC>(upperb[c], floord);
1567 // FIXME: now values used in comparison are floats (while they
1568 // have double precision initially). Comparison float/float
1569 // may differ from float/double (how it should work in this case)
1571 // Example: threshold=1/3 (or 1/10)
1572 lower[c] = static_cast<SRC>(lowerb[c]);
1573 upper[c] = static_cast<SRC>(upperb[c]);
1577 // manually SIMD for important case if RGB/BGR
1578 if (std::is_same<SRC,uchar>::value && chan==3)
1580 run_inrange3((uchar*)out, (const uchar*)in, width,
1581 (const uchar*)lower, (const uchar*)upper);
1585 // TODO: please manually SIMD if multiple channels:
1586 // modern compilers would perfectly vectorize this code if one channel,
1587 // but may need help with de-interleaving channels if RGB/BGR image etc
1591 for (int w=0; w < width; w++)
1592 out[w] = in[w] >= lower[0] && in[w] <= upper[0]? 255: 0;
1595 for (int w=0; w < width; w++)
1596 out[w] = in[2*w ] >= lower[0] && in[2*w ] <= upper[0] &&
1597 in[2*w+1] >= lower[1] && in[2*w+1] <= upper[1] ? 255: 0;
1600 for (int w=0; w < width; w++)
1601 out[w] = in[3*w ] >= lower[0] && in[3*w ] <= upper[0] &&
1602 in[3*w+1] >= lower[1] && in[3*w+1] <= upper[1] &&
1603 in[3*w+2] >= lower[2] && in[3*w+2] <= upper[2] ? 255: 0;
1606 for (int w=0; w < width; w++)
1607 out[w] = in[4*w ] >= lower[0] && in[4*w ] <= upper[0] &&
1608 in[4*w+1] >= lower[1] && in[4*w+1] <= upper[1] &&
1609 in[4*w+2] >= lower[2] && in[4*w+2] <= upper[2] &&
1610 in[4*w+3] >= lower[3] && in[4*w+3] <= upper[3] ? 255: 0;
1612 default: CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
1616 GAPI_FLUID_KERNEL(GFluidInRange, cv::gapi::core::GInRange, false)
1618 static const int Window = 1;
1620 static void run(const View &src, const cv::Scalar &lowerb, const cv::Scalar& upperb,
1623 // DST SRC OP __VA_ARGS__
1624 INRANGE_(uchar, uchar , run_inrange, dst, src, upperb, lowerb);
1625 INRANGE_(uchar, ushort, run_inrange, dst, src, upperb, lowerb);
1626 INRANGE_(uchar, short, run_inrange, dst, src, upperb, lowerb);
1627 INRANGE_(uchar, float, run_inrange, dst, src, upperb, lowerb);
1629 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1633 //----------------------
1635 // Fluid kernels: select
1637 //----------------------
1639 // manually vectored function for important case if RGB/BGR image
1640 static void run_select_row3(int width, uchar out[], uchar in1[], uchar in2[], uchar in3[])
1642 int w = 0; // cycle index
1645 for (; w <= width-16; w+=16)
1647 v_uint8x16 a1, b1, c1;
1648 v_uint8x16 a2, b2, c2;
1652 v_load_deinterleave(&in1[3*w], a1, b1, c1);
1653 v_load_deinterleave(&in2[3*w], a2, b2, c2);
1655 mask = v_load(&in3[w]);
1656 mask = mask != v_setzero_u8();
1658 a = v_select(mask, a1, a2);
1659 b = v_select(mask, b1, b2);
1660 c = v_select(mask, c1, c2);
1662 v_store_interleave(&out[3*w], a, b, c);
1666 for (; w < width; w++)
1668 out[3*w ] = in3[w]? in1[3*w ]: in2[3*w ];
1669 out[3*w + 1] = in3[w]? in1[3*w + 1]: in2[3*w + 1];
1670 out[3*w + 2] = in3[w]? in1[3*w + 2]: in2[3*w + 2];
1674 // parameter chan is compile-time known constant, normally chan=1..4
1675 template<int chan, typename DST, typename SRC1, typename SRC2, typename SRC3>
1676 static void run_select_row(int width, DST out[], SRC1 in1[], SRC2 in2[], SRC3 in3[])
1678 if (std::is_same<DST,uchar>::value && chan==3)
1680 // manually vectored function for important case if RGB/BGR image
1681 run_select_row3(width, (uchar*)out, (uchar*)in1, (uchar*)in2, (uchar*)in3);
1685 // because `chan` is template parameter, its value is known at compilation time,
1686 // so that modern compilers would efficiently vectorize this cycle if chan==1
1687 // (if chan>1, compilers may need help with de-interleaving of the channels)
1688 for (int w=0; w < width; w++)
1690 for (int c=0; c < chan; c++)
1692 out[w*chan + c] = in3[w]? in1[w*chan + c]: in2[w*chan + c];
1697 template<typename DST, typename SRC1, typename SRC2, typename SRC3>
1698 static void run_select(Buffer &dst, const View &src1, const View &src2, const View &src3)
1700 static_assert(std::is_same<DST , SRC1>::value, "wrong types");
1701 static_assert(std::is_same<DST , SRC2>::value, "wrong types");
1702 static_assert(std::is_same<uchar, SRC3>::value, "wrong types");
1704 auto *out = dst.OutLine<DST>();
1706 const auto *in1 = src1.InLine<SRC1>(0);
1707 const auto *in2 = src2.InLine<SRC2>(0);
1708 const auto *in3 = src3.InLine<SRC3>(0);
1710 int width = dst.length();
1711 int chan = dst.meta().chan;
1715 case 1: run_select_row<1>(width, out, in1, in2, in3); break;
1716 case 2: run_select_row<2>(width, out, in1, in2, in3); break;
1717 case 3: run_select_row<3>(width, out, in1, in2, in3); break;
1718 case 4: run_select_row<4>(width, out, in1, in2, in3); break;
1719 default: CV_Error(cv::Error::StsBadArg, "unsupported number of channels");
1723 GAPI_FLUID_KERNEL(GFluidSelect, cv::gapi::core::GSelect, false)
1725 static const int Window = 1;
1727 static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)
1729 // DST SRC1 SRC2 SRC3 OP __VA_ARGS__
1730 SELECT_(uchar , uchar , uchar , uchar, run_select, dst, src1, src2, src3);
1731 SELECT_(ushort, ushort, ushort, uchar, run_select, dst, src1, src2, src3);
1732 SELECT_( short, short, short, uchar, run_select, dst, src1, src2, src3);
1734 CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
1738 //----------------------------------------------------
1740 // Fluid kernels: split, merge, polat2cart, cart2polar
1742 //----------------------------------------------------
1744 GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false)
1746 static const int Window = 1;
1748 static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3)
1750 const auto *in = src.InLine<uchar>(0);
1751 auto *out1 = dst1.OutLine<uchar>();
1752 auto *out2 = dst2.OutLine<uchar>();
1753 auto *out3 = dst3.OutLine<uchar>();
1755 GAPI_Assert(3 == src.meta().chan);
1756 int width = src.length();
1758 int w = 0; // cycle counter
1761 for (; w <= width-16; w+=16)
1764 v_load_deinterleave(&in[3*w], a, b, c);
1765 v_store(&out1[w], a);
1766 v_store(&out2[w], b);
1767 v_store(&out3[w], c);
1771 for (; w < width; w++)
1774 out2[w] = in[3*w + 1];
1775 out3[w] = in[3*w + 2];
1780 GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false)
1782 static const int Window = 1;
1784 static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4)
1786 const auto *in = src.InLine<uchar>(0);
1787 auto *out1 = dst1.OutLine<uchar>();
1788 auto *out2 = dst2.OutLine<uchar>();
1789 auto *out3 = dst3.OutLine<uchar>();
1790 auto *out4 = dst4.OutLine<uchar>();
1792 GAPI_Assert(4 == src.meta().chan);
1793 int width = src.length();
1795 int w = 0; // cycle counter
1798 for (; w <= width-16; w+=16)
1800 v_uint8x16 a, b, c, d;
1801 v_load_deinterleave(&in[4*w], a, b, c, d);
1802 v_store(&out1[w], a);
1803 v_store(&out2[w], b);
1804 v_store(&out3[w], c);
1805 v_store(&out4[w], d);
1809 for (; w < width; w++)
1812 out2[w] = in[4*w + 1];
1813 out3[w] = in[4*w + 2];
1814 out4[w] = in[4*w + 3];
1819 GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false)
1821 static const int Window = 1;
1823 static void run(const View &src1, const View &src2, const View &src3, Buffer &dst)
1825 const auto *in1 = src1.InLine<uchar>(0);
1826 const auto *in2 = src2.InLine<uchar>(0);
1827 const auto *in3 = src3.InLine<uchar>(0);
1828 auto *out = dst.OutLine<uchar>();
1830 GAPI_Assert(3 == dst.meta().chan);
1831 int width = dst.length();
1833 int w = 0; // cycle counter
1836 for (; w <= width-16; w+=16)
1839 a = v_load(&in1[w]);
1840 b = v_load(&in2[w]);
1841 c = v_load(&in3[w]);
1842 v_store_interleave(&out[3*w], a, b, c);
1846 for (; w < width; w++)
1849 out[3*w + 1] = in2[w];
1850 out[3*w + 2] = in3[w];
1855 GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false)
1857 static const int Window = 1;
1859 static void run(const View &src1, const View &src2, const View &src3, const View &src4,
1862 const auto *in1 = src1.InLine<uchar>(0);
1863 const auto *in2 = src2.InLine<uchar>(0);
1864 const auto *in3 = src3.InLine<uchar>(0);
1865 const auto *in4 = src4.InLine<uchar>(0);
1866 auto *out = dst.OutLine<uchar>();
1868 GAPI_Assert(4 == dst.meta().chan);
1869 int width = dst.length();
1871 int w = 0; // cycle counter
1874 for (; w <= width-16; w+=16)
1876 v_uint8x16 a, b, c, d;
1877 a = v_load(&in1[w]);
1878 b = v_load(&in2[w]);
1879 c = v_load(&in3[w]);
1880 d = v_load(&in4[w]);
1881 v_store_interleave(&out[4*w], a, b, c, d);
1885 for (; w < width; w++)
1888 out[4*w + 1] = in2[w];
1889 out[4*w + 2] = in3[w];
1890 out[4*w + 3] = in4[w];
1895 GAPI_FLUID_KERNEL(GFluidPolarToCart, cv::gapi::core::GPolarToCart, false)
1897 static const int Window = 1;
1899 static void run(const View &src1, const View &src2, bool angleInDegrees,
1900 Buffer &dst1, Buffer &dst2)
1902 GAPI_Assert(src1.meta().depth == CV_32F);
1903 GAPI_Assert(src2.meta().depth == CV_32F);
1904 GAPI_Assert(dst1.meta().depth == CV_32F);
1905 GAPI_Assert(dst2.meta().depth == CV_32F);
1907 const auto * in1 = src1.InLine<float>(0);
1908 const auto * in2 = src2.InLine<float>(0);
1909 auto *out1 = dst1.OutLine<float>();
1910 auto *out2 = dst2.OutLine<float>();
1912 int width = src1.length();
1913 int chan = src2.meta().chan;
1914 int length = width * chan;
1916 // SIMD: compiler vectoring!
1917 for (int l=0; l < length; l++)
1919 float angle = angleInDegrees?
1920 in2[l] * static_cast<float>(CV_PI / 180):
1922 float magnitude = in1[l];
1923 float x = magnitude * std::cos(angle);
1924 float y = magnitude * std::sin(angle);
1931 GAPI_FLUID_KERNEL(GFluidCartToPolar, cv::gapi::core::GCartToPolar, false)
1933 static const int Window = 1;
1935 static void run(const View &src1, const View &src2, bool angleInDegrees,
1936 Buffer &dst1, Buffer &dst2)
1938 GAPI_Assert(src1.meta().depth == CV_32F);
1939 GAPI_Assert(src2.meta().depth == CV_32F);
1940 GAPI_Assert(dst1.meta().depth == CV_32F);
1941 GAPI_Assert(dst2.meta().depth == CV_32F);
1943 const auto * in1 = src1.InLine<float>(0);
1944 const auto * in2 = src2.InLine<float>(0);
1945 auto *out1 = dst1.OutLine<float>();
1946 auto *out2 = dst2.OutLine<float>();
1948 int width = src1.length();
1949 int chan = src2.meta().chan;
1950 int length = width * chan;
1952 // SIMD: compiler vectoring!
1953 for (int l=0; l < length; l++)
1957 float magnitude = std::hypot(y, x);
1958 float angle_rad = std::atan2(y, x);
1959 float angle = angleInDegrees?
1960 angle_rad * static_cast<float>(180 / CV_PI):
1962 out1[l] = magnitude;
1968 GAPI_FLUID_KERNEL(GFluidPhase, cv::gapi::core::GPhase, false)
1970 static const int Window = 1;
1972 static void run(const View &src_x,
1974 bool angleInDegrees,
1977 const auto w = dst.length() * dst.meta().chan;
1978 if (src_x.meta().depth == CV_32F && src_y.meta().depth == CV_32F)
1980 hal::fastAtan32f(src_y.InLine<float>(0),
1981 src_x.InLine<float>(0),
1982 dst.OutLine<float>(),
1986 else if (src_x.meta().depth == CV_64F && src_y.meta().depth == CV_64F)
1988 hal::fastAtan64f(src_y.InLine<double>(0),
1989 src_x.InLine<double>(0),
1990 dst.OutLine<double>(),
1993 } else GAPI_Assert(false && !"Phase supports 32F/64F input only!");
1997 GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::core::GResize, true)
1999 static const int Window = 1;
2000 static const auto Kind = GFluidKernel::Kind::Resize;
2002 constexpr static const int INTER_RESIZE_COEF_BITS = 11;
2003 constexpr static const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS;
2004 constexpr static const short ONE = INTER_RESIZE_COEF_SCALE;
2014 static ResizeUnit map(double ratio, int start, int max, int outCoord)
2016 float f = static_cast<float>((outCoord + 0.5f) * ratio - 0.5f);
2022 ru.s0 = std::max(s - start, 0);
2023 ru.s1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;
2025 ru.alpha0 = saturate_cast<short>((1.0f - f) * INTER_RESIZE_COEF_SCALE);
2026 ru.alpha1 = saturate_cast<short>((f) * INTER_RESIZE_COEF_SCALE);
2031 static void initScratch(const cv::GMatDesc& in,
2032 cv::Size outSz, double /*fx*/, double /*fy*/, int /*interp*/,
2033 cv::gapi::fluid::Buffer &scratch)
2035 CV_Assert(in.depth == CV_8U && in.chan == 3);
2037 cv::Size scratch_size{static_cast<int>(outSz.width * sizeof(ResizeUnit)), 1};
2041 desc.depth = CV_8UC1;
2042 desc.size = to_own(scratch_size);
2044 cv::gapi::fluid::Buffer buffer(desc);
2045 scratch = std::move(buffer);
2047 ResizeUnit* mapX = scratch.OutLine<ResizeUnit>();
2048 double hRatio = (double)in.size.width / outSz.width;
2050 for (int x = 0, w = outSz.width; x < w; x++)
2052 mapX[x] = map(hRatio, 0, in.size.width, x);
2056 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/)
2059 static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, double /*fy*/, int /*interp*/,
2060 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch)
2062 double vRatio = (double)in.meta().size.height / out.meta().size.height;
2063 auto mapY = map(vRatio, in.y(), in.meta().size.height, out.y());
2065 auto beta0 = mapY.alpha0;
2066 auto beta1 = mapY.alpha1;
2068 const auto src0 = in.InLine <unsigned char>(mapY.s0);
2069 const auto src1 = in.InLine <unsigned char>(mapY.s1);
2071 auto dst = out.OutLine<unsigned char>();
2073 ResizeUnit* mapX = scratch.OutLine<ResizeUnit>();
2075 for (int x = 0; x < out.length(); x++)
2077 short alpha0 = mapX[x].alpha0;
2078 short alpha1 = mapX[x].alpha1;
2079 int sx0 = mapX[x].s0;
2080 int sx1 = mapX[x].s1;
2082 int res00 = src0[3*sx0 ]*alpha0 + src0[3*(sx1) ]*alpha1;
2083 int res10 = src1[3*sx0 ]*alpha0 + src1[3*(sx1) ]*alpha1;
2085 int res01 = src0[3*sx0 + 1]*alpha0 + src0[3*(sx1) + 1]*alpha1;
2086 int res11 = src1[3*sx0 + 1]*alpha0 + src1[3*(sx1) + 1]*alpha1;
2088 int res02 = src0[3*sx0 + 2]*alpha0 + src0[3*(sx1) + 2]*alpha1;
2089 int res12 = src1[3*sx0 + 2]*alpha0 + src1[3*(sx1) + 2]*alpha1;
2091 dst[3*x ] = uchar(( ((beta0 * (res00 >> 4)) >> 16) + ((beta1 * (res10 >> 4)) >> 16) + 2)>>2);
2092 dst[3*x + 1] = uchar(( ((beta0 * (res01 >> 4)) >> 16) + ((beta1 * (res11 >> 4)) >> 16) + 2)>>2);
2093 dst[3*x + 2] = uchar(( ((beta0 * (res02 >> 4)) >> 16) + ((beta1 * (res12 >> 4)) >> 16) + 2)>>2);
2098 GAPI_FLUID_KERNEL(GFluidSqrt, cv::gapi::core::GSqrt, false)
2100 static const int Window = 1;
2102 static void run(const View &in, Buffer &out)
2104 const auto w = out.length() * out.meta().chan;
2105 if (in.meta().depth == CV_32F)
2107 hal::sqrt32f(in.InLine<float>(0),
2108 out.OutLine<float>(0),
2111 else if (in.meta().depth == CV_64F)
2113 hal::sqrt64f(in.InLine<double>(0),
2114 out.OutLine<double>(0),
2116 } else GAPI_Assert(false && !"Sqrt supports 32F/64F input only!");
2120 } // namespace fliud
2124 cv::gapi::GKernelPackage cv::gapi::core::fluid::kernels()
2126 using namespace cv::gapi::fluid;
2128 return cv::gapi::kernels
2176 ,GFluidMean -- not fluid
2177 ,GFluidSum -- not fluid
2178 ,GFluidNormL1 -- not fluid
2179 ,GFluidNormL2 -- not fluid
2180 ,GFluidNormInf -- not fluid
2181 ,GFluidIntegral -- not fluid
2182 ,GFluidThresholdOT -- not fluid
2183 ,GFluidResize -- not fluid (?)
2184 ,GFluidRemap -- not fluid
2185 ,GFluidFlip -- not fluid
2186 ,GFluidCrop -- not fluid
2188 ,GFluidConcatVert -- not fluid
2193 #endif // !defined(GAPI_STANDALONE)