1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ie_preprocess_gapi_kernels.hpp"
6 #include "ie_preprocess_gapi_kernels_impl.hpp"
8 // AFTER "ie_preprocess_gapi_kernels_impl.hpp"
9 // (MANUAL_SIMD is defined there)
11 #include "cpu_detector.hpp"
12 #include "ie_preprocess_gapi_kernels_sse42.hpp"
15 #include <opencv2/gapi/opencv_includes.hpp>
16 #include <opencv2/gapi/fluid/gfluidkernel.hpp>
17 #include <opencv2/gapi/gcompoundkernel.hpp>
20 #include <type_traits>
24 namespace InferenceEngine {
29 template<typename T, int chs> static
30 void mergeRow(const std::array<const uint8_t*, chs>& ins, uint8_t* out, int length) {
32 if (with_cpu_x86_sse42()) {
33 if (std::is_same<T, uint8_t>::value && chs == 2) {
34 mergeRow_8UC2(ins[0], ins[1], out, length);
38 if (std::is_same<T, uint8_t>::value && chs == 3) {
39 mergeRow_8UC3(ins[0], ins[1], ins[2], out, length);
43 if (std::is_same<T, uint8_t>::value && chs == 4) {
44 mergeRow_8UC4(ins[0], ins[1], ins[2], ins[3], out, length);
48 if (std::is_same<T, float>::value && chs == 2) {
49 mergeRow_32FC2(reinterpret_cast<const float*>(ins[0]),
50 reinterpret_cast<const float*>(ins[1]),
51 reinterpret_cast<float*>(out), length);
55 if (std::is_same<T, float>::value && chs == 3) {
56 mergeRow_32FC3(reinterpret_cast<const float*>(ins[0]),
57 reinterpret_cast<const float*>(ins[1]),
58 reinterpret_cast<const float*>(ins[2]),
59 reinterpret_cast<float*>(out), length);
63 if (std::is_same<T, float>::value && chs == 4) {
64 mergeRow_32FC4(reinterpret_cast<const float*>(ins[0]),
65 reinterpret_cast<const float*>(ins[1]),
66 reinterpret_cast<const float*>(ins[2]),
67 reinterpret_cast<const float*>(ins[3]),
68 reinterpret_cast<float*>(out), length);
75 for (int c = 0; c < chs; c++) {
76 insT[c] = reinterpret_cast<const T*>(ins[c]);
78 auto outT = reinterpret_cast<T*>(out);
80 for (int x = 0; x < length; x++) {
81 for (int c = 0; c < chs; c++) {
82 outT[chs*x + c] = insT[c][x];
87 template<typename T, int chs> static
88 void splitRow(const uint8_t* in, std::array<uint8_t*, chs>& outs, int length) {
90 if (with_cpu_x86_sse42()) {
91 if (std::is_same<T, uint8_t>::value && chs == 2) {
92 splitRow_8UC2(in, outs[0], outs[1], length);
96 if (std::is_same<T, uint8_t>::value && chs == 3) {
97 splitRow_8UC3(in, outs[0], outs[1], outs[2], length);
101 if (std::is_same<T, uint8_t>::value && chs == 4) {
102 splitRow_8UC4(in, outs[0], outs[1], outs[2], outs[3], length);
106 if (std::is_same<T, float>::value && chs == 2) {
107 splitRow_32FC2(reinterpret_cast<const float*>(in),
108 reinterpret_cast<float*>(outs[0]),
109 reinterpret_cast<float*>(outs[1]),
114 if (std::is_same<T, float>::value && chs == 3) {
115 splitRow_32FC3(reinterpret_cast<const float*>(in),
116 reinterpret_cast<float*>(outs[0]),
117 reinterpret_cast<float*>(outs[1]),
118 reinterpret_cast<float*>(outs[2]),
123 if (std::is_same<T, float>::value && chs == 4) {
124 splitRow_32FC4(reinterpret_cast<const float*>(in),
125 reinterpret_cast<float*>(outs[0]),
126 reinterpret_cast<float*>(outs[1]),
127 reinterpret_cast<float*>(outs[2]),
128 reinterpret_cast<float*>(outs[3]),
135 auto inT = reinterpret_cast<const T*>(in);
138 for (int c = 0; c < chs; c++) {
139 outsT[c] = reinterpret_cast<T*>(outs[c]);
142 for (int x = 0; x < length; x++) {
143 for (int c = 0; c < chs; c++) {
144 outsT[c][x] = inT[chs*x + c];
149 GAPI_FLUID_KERNEL(FMerge2, Merge2, false) {
150 static const int LPI = 4;
151 static const int Window = 1;
152 static void run(const cv::gapi::fluid::View& a,
153 const cv::gapi::fluid::View& b,
154 cv::gapi::fluid::Buffer& out) {
155 const auto rowFunc = (a.meta().depth == CV_8U) ? &mergeRow<uint8_t, 2> : &mergeRow<float, 2>;
156 for (int l = 0; l < out.lpi(); l++) {
157 rowFunc({a.InLineB(l), b.InLineB(l)}, out.OutLineB(l), a.length());
162 GAPI_FLUID_KERNEL(FMerge3, Merge3, false) {
163 static const int LPI = 4;
164 static const int Window = 1;
165 static void run(const cv::gapi::fluid::View& a,
166 const cv::gapi::fluid::View& b,
167 const cv::gapi::fluid::View& c,
168 cv::gapi::fluid::Buffer& out) {
169 const auto rowFunc = (a.meta().depth == CV_8U) ? &mergeRow<uint8_t, 3> : &mergeRow<float, 3>;
170 for (int l = 0; l < out.lpi(); l++) {
171 rowFunc({a.InLineB(l), b.InLineB(l), c.InLineB(l)}, out.OutLineB(l), a.length());
176 GAPI_FLUID_KERNEL(FMerge4, Merge4, false) {
177 static const int LPI = 4;
178 static const int Window = 1;
179 static void run(const cv::gapi::fluid::View& a,
180 const cv::gapi::fluid::View& b,
181 const cv::gapi::fluid::View& c,
182 const cv::gapi::fluid::View& d,
183 cv::gapi::fluid::Buffer& out) {
184 const auto rowFunc = (a.meta().depth == CV_8U) ? &mergeRow<uint8_t, 4> : &mergeRow<float, 4>;
185 for (int l = 0; l < out.lpi(); l++) {
186 rowFunc({a.InLineB(l), b.InLineB(l), c.InLineB(l), d.InLineB(l)}, out.OutLineB(l), a.length());
191 GAPI_FLUID_KERNEL(FSplit2, Split2, false) {
192 static const int LPI = 4;
193 static const int Window = 1;
194 static void run(const cv::gapi::fluid::View & in,
195 cv::gapi::fluid::Buffer& out1,
196 cv::gapi::fluid::Buffer& out2) {
197 GAPI_DbgAssert(2 == in.meta().chan);
198 GAPI_DbgAssert(1 == out1.meta().chan);
199 GAPI_DbgAssert(1 == out2.meta().chan);
200 GAPI_DbgAssert(in.meta().depth == out1.meta().depth);
201 GAPI_DbgAssert(in.meta().depth == out2.meta().depth);
202 GAPI_DbgAssert(CV_8U == in.meta().depth || CV_32F == in.meta().depth);
203 const auto rowFunc = (in.meta().depth == CV_8U) ?
204 &splitRow<uint8_t, 2> :
205 &splitRow<float , 2>;
206 for (int i = 0, lpi = out1.lpi(); i < lpi; i++) {
207 std::array<uint8_t*, 2> outs = {out1.OutLineB(i), out2.OutLineB(i)};
208 rowFunc(in.InLineB(i), outs, in.length());
213 GAPI_FLUID_KERNEL(FSplit3, Split3, false) {
214 static const int LPI = 4;
215 static const int Window = 1;
216 static void run(const cv::gapi::fluid::View & in,
217 cv::gapi::fluid::Buffer& out1,
218 cv::gapi::fluid::Buffer& out2,
219 cv::gapi::fluid::Buffer& out3) {
220 GAPI_DbgAssert(3 == in.meta().chan);
221 GAPI_DbgAssert(1 == out1.meta().chan);
222 GAPI_DbgAssert(1 == out2.meta().chan);
223 GAPI_DbgAssert(1 == out3.meta().chan);
224 GAPI_DbgAssert(in.meta().depth == out1.meta().depth);
225 GAPI_DbgAssert(in.meta().depth == out2.meta().depth);
226 GAPI_DbgAssert(in.meta().depth == out3.meta().depth);
227 GAPI_DbgAssert(CV_8U == in.meta().depth || CV_32F == in.meta().depth);
228 const auto rowFunc = (in.meta().depth == CV_8U) ?
229 &splitRow<uint8_t, 3> :
230 &splitRow<float , 3>;
231 for (int i = 0, lpi = out1.lpi(); i < lpi; i++) {
232 std::array<uint8_t*, 3> outs = {out1.OutLineB(i), out2.OutLineB(i),
234 rowFunc(in.InLineB(i), outs, in.length());
239 GAPI_FLUID_KERNEL(FSplit4, Split4, false) {
240 static const int LPI = 4;
241 static const int Window = 1;
242 static void run(const cv::gapi::fluid::View & in,
243 cv::gapi::fluid::Buffer& out1,
244 cv::gapi::fluid::Buffer& out2,
245 cv::gapi::fluid::Buffer& out3,
246 cv::gapi::fluid::Buffer& out4) {
247 GAPI_DbgAssert(4 == in.meta().chan);
248 GAPI_DbgAssert(1 == out1.meta().chan);
249 GAPI_DbgAssert(1 == out2.meta().chan);
250 GAPI_DbgAssert(1 == out3.meta().chan);
251 GAPI_DbgAssert(1 == out4.meta().chan);
252 GAPI_DbgAssert(in.meta().depth == out1.meta().depth);
253 GAPI_DbgAssert(in.meta().depth == out2.meta().depth);
254 GAPI_DbgAssert(in.meta().depth == out3.meta().depth);
255 GAPI_DbgAssert(in.meta().depth == out4.meta().depth);
256 GAPI_DbgAssert(CV_8U == in.meta().depth || CV_32F == in.meta().depth);
257 const auto rowFunc = (in.meta().depth == CV_8U) ?
258 &splitRow<uint8_t, 4> :
259 &splitRow<float , 4>;
260 for (int i = 0, lpi = out1.lpi(); i < lpi; i++) {
261 std::array<uint8_t*, 4> outs = {out1.OutLineB(i), out2.OutLineB(i),
262 out3.OutLineB(i), out4.OutLineB(i)};
263 rowFunc(in.InLineB(i), outs, in.length());
268 //----------------------------------------------------------------------
271 static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, int length) {
273 if (with_cpu_x86_sse42()) {
274 if (std::is_same<T, uint8_t>::value && chs == 1) {
275 copyRow_8U(in, out, length);
279 if (std::is_same<T, float>::value && chs == 1) {
280 copyRow_32F(reinterpret_cast<const float*>(in),
281 reinterpret_cast<float*>(out),
288 const auto inT = reinterpret_cast<const T*>(in);
289 auto outT = reinterpret_cast< T*>(out);
291 for (int x = 0; x < length; x++) {
292 outT[x] = inT[x*chs + chan];
296 // GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) {
297 // static void run(const cv::Mat &in, int chan, cv::Mat &out) {
298 // out.create(in.rows, in.cols, in.depth());
299 // const auto rowFunc = (in.depth() == CV_8U) ? &chanToPlaneRow<uint8_t> : &chanToPlaneRow<float>;
301 // for (int y = 0; y < out.rows; y++)
303 // rowFunc(in.data + y*in.step, chan, in.channels(), out.data + y*out.step, in.cols);
308 // GAPI_OCV_KERNEL(OCVScalePlane, ScalePlane) {
309 // static void run(const cv::Mat &in, int /*type*/, const Size &sz, int interp, cv::Mat &out) {
310 // cv::resize(in, out, sz, 0, 0, interp);
314 // GAPI_OCV_KERNEL(OCVMerge2, Merge2) {
315 // static void run(const cv::Mat &a, const cv::Mat &b, cv::Mat out) {
316 // out.create(a.rows, a.cols, CV_MAKETYPE(a.depth(), 2));
317 // const auto rowFunc = (a.depth() == CV_8U) ? &mergeRow<uint8_t, 2> : &mergeRow<float, 2>;
319 // for (int y = 0; y < out.rows; y++)
321 // rowFunc({a.data + y*a.step, b.data + y*b.step}, out.data + out.step, a.cols);
326 GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) {
327 static const int Window = 1;
328 static void run(const cv::gapi::fluid::View& in, int chan,
329 cv::gapi::fluid::Buffer& out) {
330 const auto rowFunc = (in.meta().depth == CV_8U) ? &chanToPlaneRow<uint8_t> : &chanToPlaneRow<float>;
331 rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length());
335 //----------------------------------------------------------------------
337 G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
338 static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
339 GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
340 return in.withSize(sz);
344 G_TYPED_KERNEL(ScalePlane32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_32f") {
345 static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
346 GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
347 return in.withSize(sz);
351 G_TYPED_KERNEL(UpscalePlaneArea8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.upscale_plane_area_8u") {
352 static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
353 GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
354 GAPI_DbgAssert(in.size.width < sz.width || in.size.height < sz.height);
355 return in.withSize(sz);
359 G_TYPED_KERNEL(UpscalePlaneArea32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.upscale_plane_area_32f") {
360 static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
361 GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
362 GAPI_DbgAssert(in.size.width < sz.width || in.size.height < sz.height);
363 return in.withSize(sz);
367 G_TYPED_KERNEL(ScalePlaneArea8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_area_8u") {
368 static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
369 GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
370 GAPI_DbgAssert(in.size.width >= sz.width && in.size.height >= sz.height);
371 return in.withSize(sz);
375 G_TYPED_KERNEL(ScalePlaneArea32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_area_32f") {
376 static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
377 GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
378 GAPI_DbgAssert(in.size.width >= sz.width && in.size.height >= sz.height);
379 return in.withSize(sz);
383 GAPI_COMPOUND_KERNEL(FScalePlane, ScalePlane) {
384 static cv::GMat expand(cv::GMat in, int type, const Size& szIn, const Size& szOut, int interp) {
385 GAPI_DbgAssert(CV_8UC1 == type || CV_32FC1 == type);
386 GAPI_DbgAssert(cv::INTER_AREA == interp || cv::INTER_LINEAR == interp);
388 if (cv::INTER_AREA == interp) {
389 bool upscale = szIn.width < szOut.width || szIn.height < szOut.height;
390 if (CV_8UC1 == type) {
392 return UpscalePlaneArea8u::on(in, szOut, interp);
394 return ScalePlaneArea8u::on(in, szOut, interp);
396 if (CV_32FC1 == type) {
398 return UpscalePlaneArea32f::on(in, szOut, interp);
400 return ScalePlaneArea32f::on(in, szOut, interp);
404 if (cv::INTER_LINEAR == interp) {
405 if (CV_8UC1 == type) {
406 return ScalePlane8u::on(in, szOut, interp);
408 if (CV_32FC1 == type) {
409 return ScalePlane32f::on(in, szOut, interp);
413 GAPI_Assert(!"unsupported parameters");
418 static inline double invRatio(int inSz, int outSz) {
419 return static_cast<double>(outSz) / inSz;
422 static inline double ratio(int inSz, int outSz) {
423 return 1 / invRatio(inSz, outSz);
426 template<typename T, typename Mapper, int chanNum>
427 struct linearScratchDesc {
428 using alpha_t = typename Mapper::alpha_type;
429 using index_t = typename Mapper::index_type;
438 linearScratchDesc(int /*inW*/, int /*inH*/, int outW, int outH, void* data) {
439 alpha = reinterpret_cast<alpha_t*>(data);
440 clone = reinterpret_cast<alpha_t*>(alpha + outW);
441 mapsx = reinterpret_cast<index_t*>(clone + outW*4);
442 beta = reinterpret_cast<alpha_t*>(mapsx + outW);
443 mapsy = reinterpret_cast<index_t*>(beta + outH);
444 tmp = reinterpret_cast<T*> (mapsy + outH*2);
447 static int bufSize(int inW, int inH, int outW, int outH, int lpi) {
448 auto size = outW * sizeof(alpha_t) +
449 outW * sizeof(alpha_t) * 4 + // alpha clones // previous alpha is redundant?
450 outW * sizeof(index_t) +
451 outH * sizeof(alpha_t) +
452 outH * sizeof(index_t) * 2 +
453 inW * sizeof(T) * lpi * chanNum;
455 return static_cast<int>(size);
459 template<typename T, typename Mapper, int chanNum = 1>
460 static void initScratchLinear(const cv::GMatDesc& in,
462 cv::gapi::fluid::Buffer& scratch,
464 using alpha_type = typename Mapper::alpha_type;
465 static const auto unity = Mapper::unity;
468 auto sbufsize = linearScratchDesc<T, Mapper, chanNum>::bufSize(inSz.width, inSz.height, outSz.width, outSz.height, lpi);
470 Size scratch_size{sbufsize, 1};
474 desc.depth = CV_8UC1;
475 desc.size = scratch_size;
477 cv::gapi::fluid::Buffer buffer(desc);
478 scratch = std::move(buffer);
480 double hRatio = ratio(in.size.width, outSz.width);
481 double vRatio = ratio(in.size.height, outSz.height);
483 linearScratchDesc<T, Mapper, chanNum> scr(inSz.width, inSz.height, outSz.width, outSz.height, scratch.OutLineB());
485 auto *alpha = scr.alpha;
486 auto *clone = scr.clone;
487 auto *index = scr.mapsx;
489 for (int x = 0; x < outSz.width; x++) {
490 auto map = Mapper::map(hRatio, 0, in.size.width, x);
491 auto alpha0 = map.alpha0;
492 auto index0 = map.index0;
495 // Algorithm takes pair of input pixels, sx0'th and sx1'th,
496 // and compute result as alpha0*src[sx0] + alpha1*src[sx1].
497 // By definition: sx1 == sx0 + 1 either sx1 == sx0, and
498 // alpha0 + alpha1 == unity (scaled appropriately).
499 // Here we modify formulas for alpha0 and sx1: by assuming
500 // that sx1 == sx0 + 1 always, and patching alpha0 so that
501 // result remains intact.
502 // Note that we need in.size.width >= 2, for both sx0 and
503 // sx0+1 were indexing pixels inside the input's width.
504 if (map.index1 != map.index0 + 1) {
505 GAPI_DbgAssert(map.index1 == map.index0);
506 GAPI_DbgAssert(in.size.width >= 2);
507 if (map.index0 < in.size.width-1) {
508 // sx1=sx0+1 fits inside row,
509 // make sure alpha0=unity and alpha1=0,
510 // so that result equals src[sx0]*unity
511 alpha0 = saturate_cast<alpha_type>(unity);
513 // shift sx0 to left by 1 pixel,
514 // and make sure that alpha0=0 and alpha1==1,
515 // so that result equals to src[sx0+1]*unity
524 for (int l = 0; l < 4; l++) {
525 clone[4*x + l] = alpha0;
529 auto *beta = scr.beta;
530 auto *index_y = scr.mapsy;
532 for (int y = 0; y < outSz.height; y++) {
533 auto mapY = Mapper::map(vRatio, 0, in.size.height, y);
534 beta[y] = mapY.alpha0;
535 index_y[y] = mapY.index0;
536 index_y[outSz.height + y] = mapY.index1;
540 template<typename T, class Mapper>
541 static void calcRowLinear(const cv::gapi::fluid::View & in,
542 cv::gapi::fluid::Buffer& out,
543 cv::gapi::fluid::Buffer& scratch) {
544 using alpha_type = typename Mapper::alpha_type;
546 auto inSz = in.meta().size;
547 auto outSz = out.meta().size;
550 int length = out.length();
553 GAPI_DbgAssert(outY + lpi <= outSz.height);
555 GAPI_DbgAssert(lpi <= 4);
557 linearScratchDesc<T, Mapper, 1> scr(inSz.width, inSz.height, outSz.width, outSz.height, scratch.OutLineB());
559 const auto *alpha = scr.alpha;
560 const auto *clone = scr.clone;
561 const auto *mapsx = scr.mapsx;
562 const auto *beta0 = scr.beta;
563 const auto *mapsy = scr.mapsy;
566 const auto *beta = beta0 + outY;
571 for (int l = 0; l < lpi; l++) {
572 auto index0 = mapsy[outY + l] - inY;
573 auto index1 = mapsy[outSz.height + outY + l] - inY;
574 src0[l] = in.InLine<const T>(index0);
575 src1[l] = in.InLine<const T>(index1);
576 dst[l] = out.OutLine<T>(l);
580 if (with_cpu_x86_sse42()) {
581 if (std::is_same<T, uint8_t>::value) {
582 if (inSz.width >= 16 && outSz.width >= 8) {
583 calcRowLinear_8U(reinterpret_cast<uint8_t**>(dst),
584 reinterpret_cast<const uint8_t**>(src0),
585 reinterpret_cast<const uint8_t**>(src1),
586 reinterpret_cast<const short*>(alpha),
587 reinterpret_cast<const short*>(clone),
588 reinterpret_cast<const short*>(mapsx),
589 reinterpret_cast<const short*>(beta),
590 reinterpret_cast<uint8_t*>(tmp),
596 if (std::is_same<T, float>::value) {
597 calcRowLinear_32F(reinterpret_cast<float**>(dst),
598 reinterpret_cast<const float**>(src0),
599 reinterpret_cast<const float**>(src1),
600 reinterpret_cast<const float*>(alpha),
601 reinterpret_cast<const int*>(mapsx),
602 reinterpret_cast<const float*>(beta),
609 for (int l = 0; l < lpi; l++) {
610 constexpr static const auto unity = Mapper::unity;
612 auto beta0 = beta[l];
613 auto beta1 = saturate_cast<alpha_type>(unity - beta[l]);
615 for (int x = 0; x < length; x++) {
616 auto alpha0 = alpha[x];
617 auto alpha1 = saturate_cast<alpha_type>(unity - alpha[x]);
620 T tmp0 = calc(beta0, src0[l][sx0], beta1, src1[l][sx0]);
621 T tmp1 = calc(beta0, src0[l][sx1], beta1, src1[l][sx1]);
622 dst[l][x] = calc(alpha0, tmp0, alpha1, tmp1);
627 template<typename T, class Mapper>
628 static void calcRowLinearC3(const cv::gapi::fluid::View & in,
629 cv::gapi::fluid::Buffer& out0,
630 cv::gapi::fluid::Buffer& out1,
631 cv::gapi::fluid::Buffer& out2,
632 cv::gapi::fluid::Buffer& scratch) {
633 using alpha_type = typename Mapper::alpha_type;
635 auto inSz = in.meta().size;
636 auto outSz = out0.meta().size;
639 auto outY = out0.y();
640 auto lpi = out0.lpi();
642 GAPI_DbgAssert(outY + lpi <= outSz.height);
643 GAPI_DbgAssert(lpi <= 4);
645 linearScratchDesc<T, Mapper, 3> scr(inSz.width, inSz.height, outSz.width, outSz.height, scratch.OutLineB());
647 const auto *alpha = scr.alpha;
648 const auto *clone = scr.clone;
649 const auto *mapsx = scr.mapsx;
650 const auto *beta0 = scr.beta;
651 const auto *mapsy = scr.mapsy;
654 const auto *beta = beta0 + outY;
657 std::array<std::array<T*, 4>, 3> dst;
659 for (int l = 0; l < lpi; l++) {
660 auto index0 = mapsy[outY + l] - inY;
661 auto index1 = mapsy[outSz.height + outY + l] - inY;
662 src0[l] = in.InLine<const T>(index0);
663 src1[l] = in.InLine<const T>(index1);
664 dst[0][l] = out0.OutLine<T>(l);
665 dst[1][l] = out1.OutLine<T>(l);
666 dst[2][l] = out2.OutLine<T>(l);
670 if (with_cpu_x86_sse42()) {
671 if (inSz.width >= 16 && outSz.width >= 8) {
672 calcRowLinear_8UC3(dst,
673 reinterpret_cast<const uint8_t**>(src0),
674 reinterpret_cast<const uint8_t**>(src1),
675 reinterpret_cast<const short*>(alpha),
676 reinterpret_cast<const short*>(clone),
677 reinterpret_cast<const short*>(mapsx),
678 reinterpret_cast<const short*>(beta),
679 reinterpret_cast<uint8_t*>(tmp),
686 auto length = out0.length();
688 for (int l = 0; l < lpi; l++) {
689 constexpr static const auto unity = Mapper::unity;
691 auto beta0 = beta[l];
692 auto beta1 = saturate_cast<alpha_type>(unity - beta[l]);
694 for (int x = 0; x < length; x++) {
695 auto alpha0 = alpha[x];
696 auto alpha1 = saturate_cast<alpha_type>(unity - alpha[x]);
700 for (int c = 0; c < 3; c++) {
701 auto idx0 = 3*sx0 + c;
702 auto idx1 = 3*sx1 + c;
703 T tmp0 = calc(beta0, src0[l][idx0], beta1, src1[l][idx0]);
704 T tmp1 = calc(beta0, src0[l][idx1], beta1, src1[l][idx1]);
705 dst[c][l][x] = calc(alpha0, tmp0, alpha1, tmp1);
712 //------------------------------------------------------------------------------
716 typedef short alpha_type;
717 typedef short index_type;
718 constexpr static const int unity = ONE;
720 typedef MapperUnit<short, short> Unit;
722 static inline Unit map(double ratio, int start, int max, int outCoord) {
723 float f = ((outCoord + 0.5f) * ratio - 0.5f);
729 u.index0 = std::max(s - start, 0);
730 u.index1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;
732 u.alpha0 = saturate_cast<short>(ONE * (1.0f - f));
733 u.alpha1 = saturate_cast<short>(ONE * f);
738 } // namespace linear
740 namespace linear32f {
742 typedef float alpha_type;
743 typedef int index_type;
744 constexpr static const float unity = 1;
746 typedef MapperUnit<float, int> Unit;
748 static inline Unit map(double ratio, int start, int max, int outCoord) {
749 float f = ((outCoord + 0.5f) * ratio - 0.5f);
755 u.index0 = std::max(s - start, 0);
756 u.index1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;
764 } // namespace linear32f
766 namespace areaUpscale {
768 typedef short alpha_type;
769 typedef short index_type;
770 constexpr static const int unity = ONE;
772 typedef MapperUnit<short, short> Unit;
774 static inline Unit map(double ratio, int start, int max, int outCoord) {
775 int s = cvFloor(outCoord*ratio);
776 float f = static_cast<float>((outCoord+1) - (s+1)/ratio);
777 f = f <= 0 ? 0.f : f - cvFloor(f);
781 u.index0 = std::max(s - start, 0);
782 u.index1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;
784 u.alpha0 = saturate_cast<short>(ONE * (1.0f - f));
785 u.alpha1 = saturate_cast<short>(ONE * f);
790 } // namespace areaUpscale
792 namespace areaUpscale32f {
794 typedef float alpha_type;
795 typedef int index_type;
796 constexpr static const float unity = 1;
798 typedef MapperUnit<float, int> Unit;
800 static inline Unit map(double ratio, int start, int max, int outCoord) {
801 int s = cvFloor(outCoord*ratio);
802 float f = static_cast<float>((outCoord+1) - (s+1)/ratio);
803 f = f <= 0 ? 0.f : f - cvFloor(f);
807 u.index0 = std::max(s - start, 0);
808 u.index1 = ((f == 0.0) || s + 1 >= max) ? s - start : s - start + 1;
816 } // namespace areaUpscale32f
818 //------------------------------------------------------------------------------
820 template<typename A, typename I, typename W>
821 struct AreaDownMapper {
822 typedef A alpha_type;
823 typedef I index_type;
826 typedef MapperUnit<alpha_type, index_type> Unit;
828 inline Unit map(int outCoord) {
829 double inCoord0 = outCoord * ratio;
830 double inCoord1 = (outCoord + 1) * ratio;
832 double index0 = std::floor(inCoord0 + 0.001);
833 double index1 = std::ceil(inCoord1 - 0.001);
835 double alpha0 = (index0 + 1 - inCoord0) * inv_ratio;
836 double alpha1 = - (index1 - 1 - inCoord1) * inv_ratio;
838 GAPI_Assert(0 <= outCoord && outCoord <= outSz-1);
839 GAPI_Assert(0 <= index0 && index0 < index1 && index1 <= inSz);
843 unit.index0 = checked_cast<index_type>(index0);
844 unit.index1 = checked_cast<index_type>(index1);
846 unit.alpha0 = convert_cast<alpha_type>(alpha0);
847 unit.alpha1 = convert_cast<alpha_type>(alpha1);
853 double ratio, inv_ratio;
855 alpha_type alpha; // == inv_ratio, rounded
857 void init(int _inSz, int _outSz) {
861 inv_ratio = invRatio(inSz, outSz);
862 ratio = 1.0 / inv_ratio;
864 alpha = convert_cast<alpha_type>(inv_ratio);
868 namespace areaDownscale32f {
869 struct Mapper: public AreaDownMapper<float, int, float> {
870 Mapper(int _inSz, int _outSz) {
876 namespace areaDownscale8u {
877 struct Mapper: public AreaDownMapper<Q0_16, short, Q8_8> {
878 Mapper(int _inSz, int _outSz) {
884 template<typename Mapper>
885 static void initScratchArea(const cv::GMatDesc& in, const Size& outSz,
886 cv::gapi::fluid::Buffer &scratch) {
887 using Unit = typename Mapper::Unit;
888 using alpha_type = typename Mapper::alpha_type;
889 using index_type = typename Mapper::index_type;
891 // compute the chunk of input pixels for each output pixel,
892 // along with the coefficients for taking the weigthed sum
895 Mapper mapper(inSz.width, outSz.width);
897 std::vector<Unit> xmaps(outSz.width);
900 for (int w = 0; w < outSz.width; w++) {
901 Unit map = mapper.map(w);
904 int dif = map.index1 - map.index0;
909 // This assertion is critical for our trick with chunk sizes:
910 // we would expand a chunk it is is smaller than maximal size
911 GAPI_Assert(inSz.width >= maxdif);
913 // pack the input chunks positions and coefficients into scratch-buffer,
914 // along with the maximal size of chunk (note that chunk size may vary)
916 size_t scratch_bytes = sizeof(int)
917 + outSz.width * sizeof(index_type)
918 + outSz.width * sizeof(alpha_type) * maxdif
919 + inSz.width * sizeof(alpha_type);
920 Size scratch_size{static_cast<int>(scratch_bytes), 1};
924 desc.depth = CV_8UC1;
925 desc.size = scratch_size;
927 cv::gapi::fluid::Buffer buffer(desc);
928 scratch = std::move(buffer);
930 auto *maxdf = scratch.OutLine<int>();
931 auto *index = reinterpret_cast<index_type*>(maxdf + 1);
932 auto *alpha = reinterpret_cast<alpha_type*>(index + outSz.width);
933 // auto *vbuf = reinterpret_cast<work_type *>(alpha + outSz.width * maxdif);
935 for (int w = 0; w < outSz.width; w++) {
936 // adjust input indices so that:
937 // - data chunk is exactly maxdif pixels
938 // - data chunk fits inside input width
939 int index0 = xmaps[w].index0;
940 int index1 = xmaps[w].index1;
941 int i0 = index0, i1 = index1;
942 i1 = (std::min)(i0 + maxdif, in.size.width);
944 GAPI_DbgAssert(i0 >= 0);
946 // fulfill coefficients for the data chunk,
947 // extending with zeros if any extra pixels
948 alpha_type *alphaw = &alpha[w * maxdif];
949 for (int i = 0; i < maxdif; i++) {
950 if (i + i0 == index0) {
951 alphaw[i] = xmaps[w].alpha0;
953 } else if (i + i0 == index1 - 1) {
954 alphaw[i] = xmaps[w].alpha1;
956 } else if (i + i0 > index0 && i + i0 < index1 - 1) {
957 alphaw[i] = mapper.alpha;
964 // start input chunk with adjusted position
971 template<typename T, typename Mapper>
972 static void calcAreaRow(const cv::gapi::fluid::View& in, cv::gapi::fluid::Buffer& out,
973 cv::gapi::fluid::Buffer& scratch) {
974 using Unit = typename Mapper::Unit;
975 using alpha_type = typename Mapper::alpha_type;
976 using index_type = typename Mapper::index_type;
977 using work_type = typename Mapper::work_type;
979 Size inSz = in.meta().size;
980 Size outSz = out.meta().size;
982 // this method is valid only for down-scale
983 GAPI_DbgAssert(inSz.width >= outSz.width);
984 GAPI_DbgAssert(inSz.height >= outSz.height);
986 // Mapper xmapper(inSz.width, outSz.width);
987 Mapper ymapper(inSz.height, outSz.height);
989 auto *xmaxdf = scratch.OutLine<const int>();
990 auto maxdif = xmaxdf[0];
992 auto *xindex = reinterpret_cast<const index_type*>(xmaxdf + 1);
993 auto *xalpha = reinterpret_cast<const alpha_type*>(xindex + outSz.width);
994 auto *vbuf_c = reinterpret_cast<const work_type*>(xalpha + outSz.width * maxdif);
996 auto *vbuf = const_cast<work_type*>(vbuf_c);
1001 int lpi = out.lpi();
1002 GAPI_DbgAssert(y + lpi <= outSz.height);
1004 for (int l = 0; l < lpi; l++) {
1005 Unit ymap = ymapper.map(y + l);
1007 GAPI_Assert(ymap.index1 - ymap.index0 <= 32);
1008 GAPI_Assert(ymap.index1 - ymap.index0 > 0);
1009 const T *src[32] = {};
1011 for (int yin = ymap.index0; yin < ymap.index1; yin++) {
1012 src[yin - ymap.index0] = in.InLine<const T>(yin - iny);
1015 auto dst = out.OutLine<T>(l);
1018 if (with_cpu_x86_sse42()) {
1019 if (std::is_same<T, uchar>::value) {
1020 calcRowArea_8U(reinterpret_cast<uchar*>(dst),
1021 reinterpret_cast<const uchar**>(src),
1023 static_cast<Q0_16>(ymapper.alpha),
1024 reinterpret_cast<const MapperUnit8U&>(ymap),
1026 reinterpret_cast<const short*>(xindex),
1027 reinterpret_cast<const Q0_16*>(xalpha),
1028 reinterpret_cast<Q8_8*>(vbuf));
1029 continue; // next l = 0, ..., lpi-1
1032 if (std::is_same<T, float>::value) {
1033 calcRowArea_32F(reinterpret_cast<float*>(dst),
1034 reinterpret_cast<const float**>(src),
1036 static_cast<float>(ymapper.alpha),
1037 reinterpret_cast<const MapperUnit32F&>(ymap),
1039 reinterpret_cast<const int*>(xindex),
1040 reinterpret_cast<const float*>(xalpha),
1041 reinterpret_cast<float*>(vbuf));
1048 int y_1st = ymap.index0;
1049 int ylast = ymap.index1 - 1;
1050 if (y_1st < ylast) {
1051 for (int w = 0; w < inSz.width; w++) {
1052 vbuf[w] = mulas(ymap.alpha0, src[0][w]) // Q8_8 = Q0_16 * U8
1053 + mulas(ymap.alpha1, src[ylast - y_1st][w]);
1056 for (int i = 1; i < ylast - y_1st; i++) {
1057 for (int w = 0; w < inSz.width; w++) {
1058 vbuf[w] += mulas(ymapper.alpha, src[i][w]);
1062 for (int w = 0; w < inSz.width; w++) {
1063 vbuf[w] = convert_cast<work_type>(src[0][w]); // Q8_8 = U8
1068 for (int x = 0; x < outSz.width; x++) {
1071 auto index = xindex[x];
1072 const auto *alpha = &xalpha[x * maxdif];
1074 for (int i = 0; i < maxdif; i++) {
1075 sum += mulaw(alpha[i], vbuf[index + i]); // Q8_8 = Q0_16 * Q8_8
1078 dst[x] = convert_cast<T>(sum); // U8 = Q8_8
1083 //----------------------------------------------------------------------
1086 // taken from: ie_preprocess_data.cpp
1087 static int getResizeAreaTabSize(int dst_go, int ssize, int dsize, float scale) {
1088 static const float threshold = 1e-3f;
1091 for (int col = dst_go; col < dst_go + dsize; col++) {
1094 float fsx1 = col * scale;
1095 float fsx2 = fsx1 + scale;
1097 int sx1 = ceil(fsx1);
1098 int sx2 = floor(fsx2);
1100 sx2 = (std::min)(sx2, ssize - 1);
1101 sx1 = (std::min)(sx1, sx2);
1103 if (sx1 - fsx1 > threshold) {
1107 for (int sx = sx1; sx < sx2; sx++) {
1111 if (fsx2 - sx2 > threshold) {
1114 max_count = (std::max)(max_count, count);
1120 // taken from: ie_preprocess_data.cpp
1121 static void computeResizeAreaTab(int src_go, int dst_go, int ssize, int dsize, float scale,
1122 uint16_t* si, uint16_t* alpha, int max_count) {
1123 static const float threshold = 1e-3f;
1126 for (int col = dst_go; col < dst_go + dsize; col++) {
1129 float fsx1 = col * scale;
1130 float fsx2 = fsx1 + scale;
1131 float cellWidth = (std::min)(scale, ssize - fsx1);
1133 int sx1 = ceil(fsx1);
1134 int sx2 = floor(fsx2);
1136 sx2 = (std::min)(sx2, ssize - 1);
1137 sx1 = (std::min)(sx1, sx2);
1139 si[col - dst_go] = (uint16_t)(sx1 - src_go);
1141 if (sx1 - fsx1 > threshold) {
1142 si[col - dst_go] = (uint16_t)(sx1 - src_go - 1);
1143 alpha[k++] = (uint16_t)((1 << 16) * ((sx1 - fsx1) / cellWidth));
1147 for (int sx = sx1; sx < sx2; sx++) {
1148 alpha[k++] = (uint16_t)((1 << 16) * (1.0f / cellWidth));
1152 if (fsx2 - sx2 > threshold) {
1153 alpha[k++] = (uint16_t)((1 << 16) * ((std::min)((std::min)(fsx2 - sx2, 1.f), cellWidth) / cellWidth));
1157 if (count != max_count) {
1163 // teken from: ie_preprocess_data.cpp
1164 static void generate_alpha_and_id_arrays(int x_max_count, int dcols, const uint16_t* xalpha, uint16_t* xsi,
1165 uint16_t** alpha, uint16_t** sxid) {
1166 if (x_max_count <= 4) {
1167 for (int col = 0; col < dcols; col++) {
1168 for (int x = 0; x < x_max_count; x++) {
1169 alpha[x][col] = xalpha[col*x_max_count + x];
1173 if (x_max_count <= 4) {
1174 for (int col = 0; col <= dcols - 8; col += 8) {
1175 for (int chunk_num_h = 0; chunk_num_h < x_max_count; chunk_num_h++) {
1176 for (int i = 0; i < 128 / 16; i++) {
1177 int id_diff = xsi[col + i] - xsi[col];
1179 for (int chunk_num_v = 0; chunk_num_v < x_max_count; chunk_num_v++) {
1180 uint16_t* sxidp = sxid[chunk_num_v] + col * x_max_count + chunk_num_h * 8;
1182 int id0 = (id_diff + chunk_num_v) * 2 + 0;
1183 int id1 = (id_diff + chunk_num_v) * 2 + 1;
1185 (reinterpret_cast<int8_t*>(sxidp + i))[0] = static_cast<int8_t>(id0 >= (chunk_num_h * 16) && id0 < (chunk_num_h + 1) * 16 ? id0 : -1);
1186 (reinterpret_cast<int8_t*>(sxidp + i))[1] = static_cast<int8_t>(id1 >= (chunk_num_h * 16) && id1 < (chunk_num_h + 1) * 16 ? id1 : -1);
1194 // taken from: ie_preprocess_data.cpp
1195 // (and simplified for specifically downscale area 8u)
1196 static size_t resize_get_buffer_size(const Size& inSz, const Size& outSz) {
1197 int dst_full_width = outSz.width;
1198 int dst_full_height = outSz.height;
1199 int src_full_width = inSz.width;
1200 int src_full_height = inSz.height;
1202 auto resize_area_u8_downscale_sse_buffer_size = [&]() {
1203 const int dwidth = outSz.width;
1204 const int dheight = outSz.height;
1205 const int swidth = inSz.width;
1207 const int dst_go_x = 0;
1208 const int dst_go_y = 0;
1210 int x_max_count = getResizeAreaTabSize(dst_go_x, src_full_width, dwidth, static_cast<float>(src_full_width) / dst_full_width) + 1;
1211 int y_max_count = getResizeAreaTabSize(dst_go_y, src_full_height, dheight, static_cast<float>(src_full_height) / dst_full_height) + 1;
1213 size_t si_buf_size = sizeof(uint16_t) * dwidth + sizeof(uint16_t) * dheight;
1214 size_t alpha_buf_size =
1215 sizeof(uint16_t) * (dwidth * x_max_count + 8 * 16) + sizeof(uint16_t) * dheight * y_max_count;
1216 size_t vert_sum_buf_size = sizeof(uint16_t) * (swidth * 2);
1217 size_t alpha_array_buf_size = sizeof(uint16_t) * 4 * dwidth;
1218 size_t sxid_array_buf_size = sizeof(uint16_t) * 4 * 4 * dwidth;
1220 size_t buffer_size = si_buf_size +
1223 alpha_array_buf_size +
1224 sxid_array_buf_size;
1229 return resize_area_u8_downscale_sse_buffer_size();
1232 // buffer-fulfill is taken from: ie_preprocess_data_sse42.cpp
1233 static void initScratchArea_CVKL_U8(const cv::GMatDesc & in,
1235 cv::gapi::fluid::Buffer & scratch) {
1236 const Size& inSz = in.size;
1238 // estimate buffer size
1239 size_t scratch_bytes = resize_get_buffer_size(inSz, outSz);
1243 Size scratch_size{static_cast<int>(scratch_bytes), 1};
1247 desc.depth = CV_8UC1;
1248 desc.size = scratch_size;
1250 cv::gapi::fluid::Buffer buffer(desc);
1251 scratch = std::move(buffer);
1255 // this code is taken from: ie_preprocess_data_sse42.cpp
1256 // (and simplified for 1-channel cv::Mat instead of blob)
1258 auto dwidth = outSz.width;
1259 auto dheight = outSz.height;
1260 auto swidth = inSz.width;
1261 auto sheight = inSz.height;
1263 const int src_go_x = 0;
1264 const int src_go_y = 0;
1265 const int dst_go_x = 0;
1266 const int dst_go_y = 0;
1268 auto src_full_width = swidth;
1269 auto src_full_height = sheight;
1270 auto dst_full_width = dwidth;
1271 auto dst_full_height = dheight;
1273 float scale_x = static_cast<float>(src_full_width) / dst_full_width;
1274 float scale_y = static_cast<float>(src_full_height) / dst_full_height;
1276 int x_max_count = getResizeAreaTabSize(dst_go_x, src_full_width, dwidth, scale_x);
1277 int y_max_count = getResizeAreaTabSize(dst_go_y, src_full_height, dheight, scale_y);
1279 auto* maxdif = scratch.OutLine<int>();
1280 auto* xsi = reinterpret_cast<uint16_t*>(maxdif + 2);
1281 auto* ysi = xsi + dwidth;
1282 auto* xalpha = ysi + dheight;
1283 auto* yalpha = xalpha + dwidth*x_max_count + 8*16;
1284 // auto* vert_sum = yalpha + dheight*y_max_count;
1286 maxdif[0] = x_max_count;
1287 maxdif[1] = y_max_count;
1289 computeResizeAreaTab(src_go_x, dst_go_x, src_full_width, dwidth, scale_x, xsi, xalpha, x_max_count);
1290 computeResizeAreaTab(src_go_y, dst_go_y, src_full_height, dheight, scale_y, ysi, yalpha, y_max_count);
1292 int vest_sum_size = 2*swidth;
1293 uint16_t* vert_sum = yalpha + dheight*y_max_count;
1294 uint16_t* alpha0 = vert_sum + vest_sum_size;
1295 uint16_t* alpha1 = alpha0 + dwidth;
1296 uint16_t* alpha2 = alpha1 + dwidth;
1297 uint16_t* alpha3 = alpha2 + dwidth;
1298 uint16_t* sxid0 = alpha3 + dwidth;
1299 uint16_t* sxid1 = sxid0 + 4*dwidth;
1300 uint16_t* sxid2 = sxid1 + 4*dwidth;
1301 uint16_t* sxid3 = sxid2 + 4*dwidth;
1303 uint16_t* alpha[] = {alpha0, alpha1, alpha2, alpha3};
1304 uint16_t* sxid[] = {sxid0, sxid1, sxid2, sxid3};
1305 generate_alpha_and_id_arrays(x_max_count, dwidth, xalpha, xsi, alpha, sxid);
1309 static void calcAreaRow_CVKL_U8(const cv::gapi::fluid::View & in,
1310 cv::gapi::fluid::Buffer & out,
1311 cv::gapi::fluid::Buffer & scratch) {
1312 Size inSz = in.meta().size;
1313 Size outSz = out.meta().size;
1315 // this method is valid only for down-scale
1316 GAPI_DbgAssert(inSz.width >= outSz.width);
1317 GAPI_DbgAssert(inSz.height >= outSz.height);
1319 int dwidth = outSz.width;
1320 int dheight = outSz.height;
1322 auto* maxdif = scratch.OutLine<int>();
1323 int x_max_count = maxdif[0];
1324 int y_max_count = maxdif[1];
1326 auto* xsi = reinterpret_cast<uint16_t*>(maxdif + 2);
1327 auto* ysi = xsi + dwidth;
1328 auto* xalpha = ysi + dheight;
1329 auto* yalpha = xalpha + dwidth*x_max_count + 8*16;
1330 auto* vert_sum = yalpha + dheight*y_max_count;
1335 int lpi = out.lpi();
1336 GAPI_DbgAssert(y + lpi <= outSz.height);
1338 for (int l = 0; l < lpi; l++) {
1339 int yin0 = ysi[y + l];
1340 int yin1 = yin0 + y_max_count;
1342 GAPI_Assert(yin1 - yin0 <= 32);
1343 const uint8_t *src[32] = {};
1345 for (int yin = yin0; yin < yin1 && yin < inSz.height; yin++) {
1346 if (yalpha[(y+l)*y_max_count + yin - yin0] == 0) {
1347 src[yin - yin0] = in.InLine<const uint8_t>(yin - iny - 1);
1349 src[yin - yin0] = in.InLine<const uint8_t>(yin - iny);
1353 uint8_t *dst = out.OutLine<uint8_t>(l);
1355 calcRowArea_CVKL_U8_SSE42(src, dst, inSz, outSz, y + l, xsi, ysi,
1356 xalpha, yalpha, x_max_count, y_max_count, vert_sum);
1361 //----------------------------------------------------------------------
1363 GAPI_FLUID_KERNEL(FScalePlane8u, ScalePlane8u, true) {
1364 static const int Window = 1;
1365 static const int LPI = 4;
1366 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1368 static void initScratch(const cv::GMatDesc& in,
1369 Size outSz, int /*interp*/,
1370 cv::gapi::fluid::Buffer &scratch) {
1371 initScratchLinear<uchar, linear::Mapper>(in, outSz, scratch, LPI);
1374 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1377 static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
1378 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
1379 calcRowLinear<uint8_t, linear::Mapper>(in, out, scratch);
1383 GAPI_FLUID_KERNEL(FScalePlanes, ScalePlanes, true) {
1384 static const int Window = 1;
1385 static const int LPI = 4;
1386 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1388 static void initScratch(const cv::GMatDesc& in, int, Size,
1389 Size outSz, int /*interp*/,
1390 cv::gapi::fluid::Buffer &scratch) {
1391 initScratchLinear<uchar, linear::Mapper, 3>(in, outSz, scratch, LPI);
1394 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1397 static void run(const cv::gapi::fluid::View& in, int, Size, Size/*sz*/, int /*interp*/,
1398 cv::gapi::fluid::Buffer& out1,
1399 cv::gapi::fluid::Buffer& out2,
1400 cv::gapi::fluid::Buffer& out3,
1401 cv::gapi::fluid::Buffer& scratch) {
1402 calcRowLinearC3<uint8_t, linear::Mapper>(in, out1, out2, out3, scratch);
1406 GAPI_FLUID_KERNEL(FUpscalePlaneArea8u, UpscalePlaneArea8u, true) {
1407 static const int Window = 1;
1408 static const int LPI = 4;
1409 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1411 static void initScratch(const cv::GMatDesc& in,
1412 Size outSz, int /*interp*/,
1413 cv::gapi::fluid::Buffer &scratch) {
1414 initScratchLinear<uchar, areaUpscale::Mapper>(in, outSz, scratch, LPI);
1417 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1420 static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
1421 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
1422 calcRowLinear<uint8_t, areaUpscale::Mapper>(in, out, scratch);
1426 GAPI_FLUID_KERNEL(FUpscalePlaneArea32f, UpscalePlaneArea32f, true) {
1427 static const int Window = 1;
1428 static const int LPI = 4;
1429 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1431 static void initScratch(const cv::GMatDesc& in,
1432 Size outSz, int /*interp*/,
1433 cv::gapi::fluid::Buffer &scratch) {
1434 initScratchLinear<float, areaUpscale32f::Mapper>(in, outSz, scratch, 0);
1437 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1440 static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
1441 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
1442 calcRowLinear<float, areaUpscale32f::Mapper>(in, out, scratch);
1446 GAPI_FLUID_KERNEL(FScalePlane32f, ScalePlane32f, true) {
1447 static const int Window = 1;
1448 static const int LPI = 4;
1449 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1451 static void initScratch(const cv::GMatDesc& in,
1452 Size outSz, int /*interp*/,
1453 cv::gapi::fluid::Buffer &scratch) {
1454 GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
1456 initScratchLinear<float, linear32f::Mapper>(in, outSz, scratch, 0);
1459 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1462 static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
1463 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
1464 calcRowLinear<float, linear32f::Mapper>(in, out, scratch);
1468 //----------------------------------------------------------------------
1470 GAPI_FLUID_KERNEL(FScalePlaneArea32f, ScalePlaneArea32f, true) {
1471 static const int Window = 1;
1472 static const int LPI = 4;
1473 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1475 static void initScratch(const cv::GMatDesc& in,
1476 Size outSz, int /*interp*/,
1477 cv::gapi::fluid::Buffer &scratch) {
1478 initScratchArea<areaDownscale32f::Mapper>(in, outSz, scratch);
1481 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1484 static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
1485 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
1486 calcAreaRow<float, areaDownscale32f::Mapper>(in, out, scratch);
1490 GAPI_FLUID_KERNEL(FScalePlaneArea8u, ScalePlaneArea8u, true) {
1491 static const int Window = 1;
1492 static const int LPI = 4;
1493 static const auto Kind = cv::GFluidKernel::Kind::Resize;
1495 static void initScratch(const cv::GMatDesc& in,
1496 Size outSz, int /*interp*/,
1497 cv::gapi::fluid::Buffer &scratch) {
1499 if (with_cpu_x86_sse42()) {
1500 const Size& inSz = in.size;
1501 if (inSz.width > outSz.width && inSz.height > outSz.height) {
1502 // CVKL code we use supports only downscale
1503 initScratchArea_CVKL_U8(in, outSz, scratch);
1509 initScratchArea<areaDownscale8u::Mapper>(in, outSz, scratch);
1512 static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
1515 static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
1516 cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
1518 if (with_cpu_x86_sse42()) {
1519 auto inSz = in.meta().size;
1520 auto outSz = out.meta().size;
1521 if (inSz.width > outSz.width && inSz.height > outSz.height) {
1522 // CVKL's code supports only downscale
1523 calcAreaRow_CVKL_U8(in, out, scratch);
1529 calcAreaRow<uint8_t, areaDownscale8u::Mapper>(in, out, scratch);
1533 static const int ITUR_BT_601_CY = 1220542;
1534 static const int ITUR_BT_601_CUB = 2116026;
1535 static const int ITUR_BT_601_CUG = -409993;
1536 static const int ITUR_BT_601_CVG = -852492;
1537 static const int ITUR_BT_601_CVR = 1673527;
1538 static const int ITUR_BT_601_SHIFT = 20;
1540 static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
1542 uu = static_cast<int>(u) - 128;
1543 vv = static_cast<int>(v) - 128;
1545 ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
1546 guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
1547 buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
1550 static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
1551 uchar& r, uchar& g, uchar& b) {
1552 int yy = static_cast<int>(vy);
1553 int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
1554 r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
1555 g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
1556 b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
1559 static void calculate_nv12_to_rgb_fallback(const uchar **y_rows,
1560 const uchar *uv_row,
1563 for (int i = 0; i < buf_width; i += 2) {
1564 uchar u = uv_row[i];
1565 uchar v = uv_row[i + 1];
1567 uvToRGBuv(u, v, ruv, guv, buv);
1569 for (int y = 0; y < 2; y++) {
1570 for (int x = 0; x < 2; x++) {
1571 uchar vy = y_rows[y][i + x];
1573 yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
1575 out_rows[y][3*(i + x)] = r;
1576 out_rows[y][3*(i + x) + 1] = g;
1577 out_rows[y][3*(i + x) + 2] = b;
1583 GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
1584 static const int Window = 1;
1585 static const int LPI = 2;
1586 static const auto Kind = cv::GFluidKernel::Kind::NV12toRGB;
1588 static void run(const cv::gapi::fluid::View &in_y,
1589 const cv::gapi::fluid::View &in_uv,
1590 cv::gapi::fluid::Buffer &out) {
1591 const uchar* uv_row = in_uv.InLineB(0);
1592 const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
1593 uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
1595 int buf_width = out.length();
1598 calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
1600 calculate_nv12_to_rgb_fallback(y_rows, uv_row, out_rows, buf_width);
1605 } // namespace kernels
1607 //----------------------------------------------------------------------
1609 using namespace kernels;
1611 cv::gapi::GKernelPackage preprocKernels() {
1612 return cv::gapi::kernels
1618 , FUpscalePlaneArea8u
1619 , FUpscalePlaneArea32f
1621 , FScalePlaneArea32f
1633 } // namespace InferenceEngine