33 constexpr
float red_coef_bt709 = 1.5748F;
34 constexpr
float green_coef_bt709 = -0.1873f;
35 constexpr
float green_coef2_bt709 = -0.4681f;
36 constexpr
float blue_coef_bt709 = 1.8556f;
38 constexpr
float rgb2yuv_bt709_kr = 0.2126f;
39 constexpr
float rgb2yuv_bt709_kb = 0.0722f;
41 constexpr
float rgb2yuv_bt709_kg = 0.7152f;
43 constexpr
float rgb2yuv_bt709_cu = 0.5389f;
45 constexpr
float rgb2yuv_bt709_cv = 0.6350f;
47 inline void convert_uint8x16_to_float32x4x4(
const uint8x16_t &in, float32x4x4_t &out)
49 const auto tmp1 = vmovl_u8(vget_low_u8(in));
50 out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
51 out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
52 const auto tmp2 = vmovl_u8(vget_high_u8(in));
53 out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
54 out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
57 inline void convert_float32x4x3_to_uint8x8x3(
const float32x4x3_t &in1,
const float32x4x3_t &in2, uint8x8x3_t &out)
59 out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
60 vqmovn_u32(vcvtq_u32_f32(in2.val[0]))));
61 out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])),
62 vqmovn_u32(vcvtq_u32_f32(in2.val[1]))));
63 out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])),
64 vqmovn_u32(vcvtq_u32_f32(in2.val[2]))));
67 inline void convert_float32x4x4_to_unit8x16(
const float32x4x4_t &in, uint8x16_t &out)
69 const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])),
70 vqmovn_u32(vcvtq_u32_f32(in.val[1])));
71 const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])),
72 vqmovn_u32(vcvtq_u32_f32(in.val[3])));
73 out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
76 inline void rgb_to_yuv_calculation(
const float32x4_t &rvec,
const float32x4_t &gvec,
const float32x4_t &bvec,
77 float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
84 const auto c128 = vdupq_n_f32(128.f);
87 yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
88 yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
89 yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
92 uvec = vsubq_f32(bvec, yvec);
93 uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
96 vvec = vsubq_f32(rvec, yvec);
97 vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
100 inline void yuyv_to_rgb_calculation(
const float32x4_t &yvec_val, float32x4_t uvec_val,
const float32x4_t &yyvec_val,
101 float32x4_t vvec_val,
unsigned char *output_ptr,
const bool alpha)
103 float32x4x3_t rgb1, rgb2;
106 const auto c128 = vdupq_n_f32(128.f);
107 uvec_val = vsubq_f32(uvec_val, c128);
108 vvec_val = vsubq_f32(vvec_val, c128);
114 const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
115 const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
116 const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
117 vmulq_n_f32(vvec_val, green_coef2_bt709));
123 rgb1.val[0] = vaddq_f32(yvec_val, red);
124 rgb1.val[1] = vaddq_f32(yvec_val, green);
125 rgb1.val[2] = vaddq_f32(yvec_val, blue);
127 rgb2.val[0] = vaddq_f32(yyvec_val, red);
128 rgb2.val[1] = vaddq_f32(yyvec_val, green);
129 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
132 convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
136 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
137 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
138 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
139 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
140 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
141 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
142 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
143 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
148 u8_rgba.val[0] = u8_rgb.val[0];
149 u8_rgba.val[1] = u8_rgb.val[1];
150 u8_rgba.val[2] = u8_rgb.val[2];
151 u8_rgba.val[3] = vdup_n_u8(255);
152 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
153 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
154 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
155 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
156 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
157 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
158 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
159 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
163 inline uint8x16x3_t load_rgb(
const unsigned char *
const ptr,
const bool alpha)
169 const auto tmp = vld4q_u8(ptr);
170 rgb.val[0] = tmp.val[0];
171 rgb.val[1] = tmp.val[1];
172 rgb.val[2] = tmp.val[2];
182 inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
185 float32x4x4_t frvec_top, fgvec_top, fbvec_top;
186 convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top);
187 convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top);
188 convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top);
190 float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom;
191 convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom);
192 convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom);
193 convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom);
195 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
196 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
198 for(
auto i = 0; i < 4; ++i)
200 rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
201 fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
202 rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
203 fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
206 convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]);
207 convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]);
208 convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]);
209 convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]);
210 convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]);
211 convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]);
214 inline void store_rgb_to_nv12(
const uint8x16_t &rvec_top,
const uint8x16_t &gvec_top,
const uint8x16_t &bvec_top,
215 const uint8x16_t &rvec_bottom,
const uint8x16_t &gvec_bottom,
const uint8x16_t &bvec_bottom,
216 unsigned char *
const __restrict out_y_top,
unsigned char *
const __restrict out_y_bottom,
217 unsigned char *
const __restrict out_uv)
219 uint8x16x3_t vec_top, vec_bottom;
220 vec_top.val[0] = rvec_top;
221 vec_top.val[1] = gvec_top;
222 vec_top.val[2] = bvec_top;
223 vec_bottom.val[0] = rvec_bottom;
224 vec_bottom.val[1] = gvec_bottom;
225 vec_bottom.val[2] = bvec_bottom;
227 rgb_to_yuv_conversion(vec_top, vec_bottom);
229 vst1q_u8(out_y_top, vec_top.val[0]);
230 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
232 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
233 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
234 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
235 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
238 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
239 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
241 vst2_u8(out_uv, uvvec);
244 inline void store_rgb_to_iyuv(
const uint8x16_t &rvec_top,
const uint8x16_t &gvec_top,
const uint8x16_t &bvec_top,
245 const uint8x16_t &rvec_bottom,
const uint8x16_t &gvec_bottom,
const uint8x16_t &bvec_bottom,
246 unsigned char *
const __restrict out_y_top,
unsigned char *
const __restrict out_y_bottom,
247 unsigned char *
const __restrict out_u,
248 unsigned char *
const __restrict out_v)
250 uint8x16x3_t vec_top, vec_bottom;
251 vec_top.val[0] = rvec_top;
252 vec_top.val[1] = gvec_top;
253 vec_top.val[2] = bvec_top;
254 vec_bottom.val[0] = rvec_bottom;
255 vec_bottom.val[1] = gvec_bottom;
256 vec_bottom.val[2] = bvec_bottom;
258 rgb_to_yuv_conversion(vec_top, vec_bottom);
260 vst1q_u8(out_y_top, vec_top.val[0]);
261 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
263 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
264 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
265 const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
266 vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
268 vst1_u8(out_u, vget_low_u8(uvvec));
269 vst1_u8(out_v, vget_high_u8(uvvec));
272 inline void store_rgb_to_yuv4(
const uint8x16_t &rvec,
const uint8x16_t &gvec,
const uint8x16_t &bvec,
273 unsigned char *
const __restrict out_y,
274 unsigned char *
const __restrict out_u,
275 unsigned char *
const __restrict out_v)
278 float32x4x4_t frvec, fgvec, fbvec;
279 convert_uint8x16_to_float32x4x4(rvec, frvec);
280 convert_uint8x16_to_float32x4x4(gvec, fgvec);
281 convert_uint8x16_to_float32x4x4(bvec, fbvec);
283 float32x4x4_t fyvec, fuvec, fvvec;
284 for(
auto i = 0; i < 4; ++i)
286 rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
287 fyvec.val[i], fuvec.val[i], fvvec.val[i]);
290 uint8x16_t yvec, uvec, vvec;
291 convert_float32x4x4_to_unit8x16(fyvec, yvec);
292 convert_float32x4x4_to_unit8x16(fuvec, uvec);
293 convert_float32x4x4_to_unit8x16(fvvec, vvec);
295 vst1q_u8(out_y, yvec);
296 vst1q_u8(out_u, uvec);
297 vst1q_u8(out_v, vvec);
308 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
309 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
316 const auto ta1 = vld3q_u8(in.
ptr());
318 ta2.val[0] = ta1.val[0];
319 ta2.val[1] = ta1.val[1];
320 ta2.val[2] = ta1.val[2];
321 ta2.val[3] = vdupq_n_u8(255);
322 vst4q_u8(out.
ptr(), ta2);
332 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
333 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
340 const auto ta1 = vld4q_u8(in.
ptr());
342 ta2.val[0] = ta1.val[0];
343 ta2.val[1] = ta1.val[1];
344 ta2.val[2] = ta1.val[2];
345 vst3q_u8(out.
ptr(), ta2);
350 template <
bool yuyv,
bool alpha>
356 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
357 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
359 constexpr
auto element_size = alpha ? 32 : 24;
360 constexpr
auto shift = yuyv ? 0 : 1;
367 float32x4x4_t uvec, yvec, vvec, yyvec;
368 const auto ta = vld4q_u8(in.
ptr());
375 convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec);
376 convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec);
377 convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec);
378 convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec);
380 yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.
ptr() + 0 * element_size, alpha);
381 yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.
ptr() + 1 * element_size, alpha);
382 yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.
ptr() + 2 * element_size, alpha);
383 yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.
ptr() + 3 * element_size, alpha);
388 template <
bool uv,
bool alpha>
395 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(input);
396 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
398 constexpr
auto element_size = alpha ? 32 : 24;
400 constexpr
auto shift = uv ? 0 : 1;
408 Iterator in_y(input_ptr->plane(0), win);
409 Iterator in_uv(input_ptr->plane(1), win_uv);
414 const auto ta_y_top = vld2q_u8(in_y.ptr());
415 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
416 const auto ta_uv = vld2q_u8(in_uv.ptr());
423 float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
424 convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
425 convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
426 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
427 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
428 convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec);
429 convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec);
431 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
432 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
433 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
434 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
436 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
437 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
438 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
439 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
444 template <
bool alpha>
451 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(input);
452 const auto output_ptr =
static_cast<IImage *__restrict
>(output);
454 constexpr
auto element_size = alpha ? 32 : 24;
463 Iterator in_y(input_ptr->plane(0), win);
464 Iterator in_u(input_ptr->plane(1), win_uv);
465 Iterator in_v(input_ptr->plane(2), win_uv);
470 const auto ta_y_top = vld2q_u8(in_y.ptr());
471 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
472 const auto ta_u = vld1q_u8(in_u.ptr());
473 const auto ta_v = vld1q_u8(in_v.ptr());
480 float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
481 convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
482 convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
483 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
484 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
485 convert_uint8x16_to_float32x4x4(ta_u, uvec);
486 convert_uint8x16_to_float32x4x4(ta_v, vvec);
488 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
489 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
490 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
491 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
493 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
494 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
495 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
496 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
498 in_y, in_u, in_v, out);
508 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
509 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
511 constexpr
auto shift = yuyv ? 0 : 1;
520 Iterator out_y(output_ptr->plane(0), win);
521 Iterator out_uv(output_ptr->plane(1), win_uv);
525 const auto ta_top = vld4q_u8(in.
ptr());
526 const auto ta_bottom = vld4q_u8(in.
ptr() + input_ptr->info()->strides_in_bytes().y());
533 yvec.val[0] = ta_top.val[0 + shift];
534 yvec.val[1] = ta_top.val[2 + shift];
535 vst2q_u8(out_y.ptr(), yvec);
538 yyvec.val[0] = ta_bottom.val[0 + shift];
539 yyvec.val[1] = ta_bottom.val[2 + shift];
540 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
543 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
544 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
545 vst2q_u8(out_uv.ptr(), uvvec);
556 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(input);
557 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
565 Iterator in_y(input_ptr->plane(0), win);
566 Iterator in_u(input_ptr->plane(1), win_uv);
567 Iterator in_v(input_ptr->plane(2), win_uv);
568 Iterator out_y(output_ptr->plane(0), win);
569 Iterator out_uv(output_ptr->plane(1), win_uv);
573 const auto ta_y_top = vld2q_u8(in_y.ptr());
574 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
576 ta_uv.val[0] = vld1q_u8(in_u.ptr());
577 ta_uv.val[1] = vld1q_u8(in_v.ptr());
583 vst2q_u8(out_y.ptr(), ta_y_top);
584 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
585 vst2q_u8(out_uv.ptr(), ta_uv);
587 in_y, in_u, in_v, out_y, out_uv);
597 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(input);
598 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
600 constexpr
auto shift = uv ? 0 : 1;
608 Iterator in_y(input_ptr->plane(0), win);
609 Iterator in_uv(input_ptr->plane(1), win_uv);
610 Iterator out_y(output_ptr->plane(0), win);
611 Iterator out_u(output_ptr->plane(1), win_uv);
612 Iterator out_v(output_ptr->plane(2), win_uv);
616 const auto ta_y_top = vld2q_u8(in_y.ptr());
617 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
618 const auto ta_uv = vld2q_u8(in_uv.ptr());
624 vst2q_u8(out_y.ptr(), ta_y_top);
625 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
626 vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
627 vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
629 in_y, in_uv, out_y, out_u, out_v);
639 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
640 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
642 constexpr
auto shift = yuyv ? 0 : 1;
651 Iterator out_y(output_ptr->plane(0), win);
652 Iterator out_u(output_ptr->plane(1), win_uv);
653 Iterator out_v(output_ptr->plane(2), win_uv);
657 const auto ta_top = vld4q_u8(in.
ptr());
658 const auto ta_bottom = vld4q_u8(in.
ptr() + input_ptr->info()->strides_in_bytes().y());
665 yvec.val[0] = ta_top.val[0 + shift];
666 yvec.val[1] = ta_top.val[2 + shift];
667 vst2q_u8(out_y.ptr(), yvec);
670 yyvec.val[0] = ta_bottom.val[0 + shift];
671 yyvec.val[1] = ta_bottom.val[2 + shift];
672 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
675 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
676 vst1q_u8(out_u.ptr(), uvec);
679 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
680 vst1q_u8(out_v.ptr(), vvec);
682 in, out_y, out_u, out_v);
692 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(input);
693 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
695 constexpr
auto shift = uv ? 0 : 1;
703 Iterator in_y(input_ptr->plane(0), win);
704 Iterator in_uv(input_ptr->plane(1), win_uv);
705 Iterator out_y(output_ptr->plane(0), win);
706 Iterator out_u(output_ptr->plane(1), win);
707 Iterator out_v(output_ptr->plane(2), win);
711 const auto ta_y_top = vld2q_u8(in_y.ptr());
712 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
713 const auto ta_uv = vld2q_u8(in_uv.ptr());
719 vst2q_u8(out_y.ptr(), ta_y_top);
720 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
723 uvec.val[0] = ta_uv.val[0 + shift];
724 uvec.val[1] = ta_uv.val[0 + shift];
725 vst2q_u8(out_u.ptr(), uvec);
726 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
729 vvec.val[0] = ta_uv.val[1 - shift];
730 vvec.val[1] = ta_uv.val[1 - shift];
731 vst2q_u8(out_v.ptr(), vvec);
732 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
734 in_y, in_uv, out_y, out_u, out_v);
743 const auto input_ptr =
static_cast<const IMultiImage *__restrict
>(input);
744 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
752 Iterator in_y(input_ptr->plane(0), win);
753 Iterator in_u(input_ptr->plane(1), win_uv);
754 Iterator in_v(input_ptr->plane(2), win_uv);
755 Iterator out_y(output_ptr->plane(0), win);
756 Iterator out_u(output_ptr->plane(1), win);
757 Iterator out_v(output_ptr->plane(2), win);
761 const auto ta_y_top = vld2q_u8(in_y.ptr());
762 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
763 const auto ta_u = vld1q_u8(in_u.ptr());
764 const auto ta_v = vld1q_u8(in_v.ptr());
770 vst2q_u8(out_y.ptr(), ta_y_top);
771 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
776 vst2q_u8(out_u.ptr(), uvec);
777 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
782 vst2q_u8(out_v.ptr(), vvec);
783 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
785 in_y, in_u, in_v, out_y, out_u, out_v);
788 template <
bool alpha>
795 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
796 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
805 Iterator out_y(output_ptr->plane(0), win);
806 Iterator out_uv(output_ptr->plane(1), win_uv);
810 const auto ta_rgb_top = load_rgb(in.
ptr(), alpha);
811 const auto ta_rgb_bottom = load_rgb(in.
ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
816 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
817 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
818 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
824 template <
bool alpha>
831 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
832 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
841 Iterator out_y(output_ptr->plane(0), win);
842 Iterator out_u(output_ptr->plane(1), win_uv);
843 Iterator out_v(output_ptr->plane(2), win_uv);
847 const auto ta_rgb_top = load_rgb(in.
ptr(), alpha);
848 const auto ta_rgb_bottom = load_rgb(in.
ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
853 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
854 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
855 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
856 out_u.ptr(), out_v.ptr());
858 in, out_y, out_u, out_v);
861 template <
bool alpha>
868 const auto input_ptr =
static_cast<const IImage *__restrict
>(input);
869 const auto output_ptr =
static_cast<IMultiImage *__restrict
>(output);
872 Iterator out_y(output_ptr->plane(0), win);
873 Iterator out_u(output_ptr->plane(1), win);
874 Iterator out_v(output_ptr->plane(2), win);
878 const auto ta_rgb = load_rgb(in.
ptr(), alpha);
883 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
884 out_y.ptr(), out_u.ptr(), out_v.ptr());
886 in, out_y, out_u, out_v);
void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
Describe one of the image's dimensions with a start, end and step.
constexpr int start() const
Return the start of the dimension.
Interface for NEON tensor.
constexpr int end() const
Return the end of the dimension.
void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
virtual TensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Interface for multi-planar images.
void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
void validate() const
Will validate all the window's dimensions' values when asserts are enabled.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
T y() const
Alias to access the size of the second dimension.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
constexpr int step() const
Return the step of the dimension.
void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
const Strides & strides_in_bytes() const
The strides in bytes for accessing each dimension of the tensor.
Iterator updated by execute_window_loop for each window element.
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)