2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "libyuv/row.h"
13 #include <string.h> // For memcpy and memset.
15 #include "libyuv/basic_types.h"
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
24 #define USE_BRANCHLESS 1
26 static __inline int32 clamp0(int32 v) {
27 return ((-(v) >> 31) & (v));
30 static __inline int32 clamp255(int32 v) {
31 return (((255 - (v)) >> 31) | (v)) & 255;
34 static __inline uint32 Clamp(int32 val) {
36 return (uint32)(clamp255(v));
39 static __inline uint32 Abs(int32 v) {
43 #else // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45 return (v < 0) ? 0 : v;
48 static __inline int32 clamp255(int32 v) {
49 return (v > 255) ? 255 : v;
52 static __inline uint32 Clamp(int32 val) {
54 return (uint32)(clamp255(v));
57 static __inline uint32 Abs(int32 v) {
58 return (v < 0) ? -v : v;
60 #endif // USE_BRANCHLESS
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
65 static inline void WRITEWORD(uint8* p, uint32 v) {
66 p[0] = (uint8)(v & 255);
67 p[1] = (uint8)((v >> 8) & 255);
68 p[2] = (uint8)((v >> 16) & 255);
69 p[3] = (uint8)((v >> 24) & 255);
73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
75 for (x = 0; x < width; ++x) {
76 uint8 b = src_rgb24[0];
77 uint8 g = src_rgb24[1];
78 uint8 r = src_rgb24[2];
88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
90 for (x = 0; x < width; ++x) {
103 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
105 for (x = 0; x < width; ++x) {
106 uint8 b = src_rgb565[0] & 0x1f;
107 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
108 uint8 r = src_rgb565[1] >> 3;
109 dst_argb[0] = (b << 3) | (b >> 2);
110 dst_argb[1] = (g << 2) | (g >> 4);
111 dst_argb[2] = (r << 3) | (r >> 2);
118 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
121 for (x = 0; x < width; ++x) {
122 uint8 b = src_argb1555[0] & 0x1f;
123 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
124 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
125 uint8 a = src_argb1555[1] >> 7;
126 dst_argb[0] = (b << 3) | (b >> 2);
127 dst_argb[1] = (g << 3) | (g >> 2);
128 dst_argb[2] = (r << 3) | (r >> 2);
135 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
138 for (x = 0; x < width; ++x) {
139 uint8 b = src_argb4444[0] & 0x0f;
140 uint8 g = src_argb4444[0] >> 4;
141 uint8 r = src_argb4444[1] & 0x0f;
142 uint8 a = src_argb4444[1] >> 4;
143 dst_argb[0] = (b << 4) | b;
144 dst_argb[1] = (g << 4) | g;
145 dst_argb[2] = (r << 4) | r;
146 dst_argb[3] = (a << 4) | a;
152 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
154 for (x = 0; x < width; ++x) {
155 uint8 b = src_argb[0];
156 uint8 g = src_argb[1];
157 uint8 r = src_argb[2];
166 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
168 for (x = 0; x < width; ++x) {
169 uint8 b = src_argb[0];
170 uint8 g = src_argb[1];
171 uint8 r = src_argb[2];
180 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
182 for (x = 0; x < width - 1; x += 2) {
183 uint8 b0 = src_argb[0] >> 3;
184 uint8 g0 = src_argb[1] >> 2;
185 uint8 r0 = src_argb[2] >> 3;
186 uint8 b1 = src_argb[4] >> 3;
187 uint8 g1 = src_argb[5] >> 2;
188 uint8 r1 = src_argb[6] >> 3;
189 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190 (b1 << 16) | (g1 << 21) | (r1 << 27));
195 uint8 b0 = src_argb[0] >> 3;
196 uint8 g0 = src_argb[1] >> 2;
197 uint8 r0 = src_argb[2] >> 3;
198 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
202 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
204 for (x = 0; x < width - 1; x += 2) {
205 uint8 b0 = src_argb[0] >> 3;
206 uint8 g0 = src_argb[1] >> 3;
207 uint8 r0 = src_argb[2] >> 3;
208 uint8 a0 = src_argb[3] >> 7;
209 uint8 b1 = src_argb[4] >> 3;
210 uint8 g1 = src_argb[5] >> 3;
211 uint8 r1 = src_argb[6] >> 3;
212 uint8 a1 = src_argb[7] >> 7;
213 *(uint32*)(dst_rgb) =
214 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
215 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
220 uint8 b0 = src_argb[0] >> 3;
221 uint8 g0 = src_argb[1] >> 3;
222 uint8 r0 = src_argb[2] >> 3;
223 uint8 a0 = src_argb[3] >> 7;
224 *(uint16*)(dst_rgb) =
225 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
229 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
231 for (x = 0; x < width - 1; x += 2) {
232 uint8 b0 = src_argb[0] >> 4;
233 uint8 g0 = src_argb[1] >> 4;
234 uint8 r0 = src_argb[2] >> 4;
235 uint8 a0 = src_argb[3] >> 4;
236 uint8 b1 = src_argb[4] >> 4;
237 uint8 g1 = src_argb[5] >> 4;
238 uint8 r1 = src_argb[6] >> 4;
239 uint8 a1 = src_argb[7] >> 4;
240 *(uint32*)(dst_rgb) =
241 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
242 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
247 uint8 b0 = src_argb[0] >> 4;
248 uint8 g0 = src_argb[1] >> 4;
249 uint8 r0 = src_argb[2] >> 4;
250 uint8 a0 = src_argb[3] >> 4;
251 *(uint16*)(dst_rgb) =
252 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
256 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
260 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
263 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
267 #define MAKEROWY(NAME, R, G, B, BPP) \
268 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
270 for (x = 0; x < width; ++x) { \
271 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
277 uint8* dst_u, uint8* dst_v, int width) { \
278 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
280 for (x = 0; x < width - 1; x += 2) { \
281 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \
282 src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \
283 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \
284 src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \
285 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \
286 src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \
287 dst_u[0] = RGBToU(ar, ag, ab); \
288 dst_v[0] = RGBToV(ar, ag, ab); \
289 src_rgb0 += BPP * 2; \
290 src_rgb1 += BPP * 2; \
295 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
296 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
297 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
298 dst_u[0] = RGBToU(ar, ag, ab); \
299 dst_v[0] = RGBToV(ar, ag, ab); \
303 MAKEROWY(ARGB, 2, 1, 0, 4)
304 MAKEROWY(BGRA, 1, 2, 3, 4)
305 MAKEROWY(ABGR, 0, 1, 2, 4)
306 MAKEROWY(RGBA, 3, 2, 1, 4)
307 MAKEROWY(RGB24, 2, 1, 0, 3)
308 MAKEROWY(RAW, 0, 1, 2, 3)
311 // JPeg uses a variation on BT.601-1 full range
312 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
313 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
314 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
315 // BT.601 Mpeg range uses:
316 // b 0.1016 * 255 = 25.908 = 25
317 // g 0.5078 * 255 = 129.489 = 129
318 // r 0.2578 * 255 = 65.739 = 66
319 // JPeg 8 bit Y (not used):
320 // b 0.11400 * 256 = 29.184 = 29
321 // g 0.58700 * 256 = 150.272 = 150
322 // r 0.29900 * 256 = 76.544 = 77
324 // b 0.11400 * 128 = 14.592 = 15
325 // g 0.58700 * 128 = 75.136 = 75
326 // r 0.29900 * 128 = 38.272 = 38
328 // b 0.50000 * 255 = 127.5 = 127
329 // g -0.33126 * 255 = -84.4713 = -84
330 // r -0.16874 * 255 = -43.0287 = -43
332 // b -0.08131 * 255 = -20.73405 = -20
333 // g -0.41869 * 255 = -106.76595 = -107
334 // r 0.50000 * 255 = 127.5 = 127
336 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337 return (38 * r + 75 * g + 15 * b + 64) >> 7;
340 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
341 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
343 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
344 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
347 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
349 #define MAKEROWYJ(NAME, R, G, B, BPP) \
350 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
352 for (x = 0; x < width; ++x) { \
353 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
358 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
359 uint8* dst_u, uint8* dst_v, int width) { \
360 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
362 for (x = 0; x < width - 1; x += 2) { \
363 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
364 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
365 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
366 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
367 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
368 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
369 dst_u[0] = RGBToUJ(ar, ag, ab); \
370 dst_v[0] = RGBToVJ(ar, ag, ab); \
371 src_rgb0 += BPP * 2; \
372 src_rgb1 += BPP * 2; \
377 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
378 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
379 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
380 dst_u[0] = RGBToUJ(ar, ag, ab); \
381 dst_v[0] = RGBToVJ(ar, ag, ab); \
385 MAKEROWYJ(ARGB, 2, 1, 0, 4)
388 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
390 for (x = 0; x < width; ++x) {
391 uint8 b = src_rgb565[0] & 0x1f;
392 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
393 uint8 r = src_rgb565[1] >> 3;
394 b = (b << 3) | (b >> 2);
395 g = (g << 2) | (g >> 4);
396 r = (r << 3) | (r >> 2);
397 dst_y[0] = RGBToY(r, g, b);
403 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
405 for (x = 0; x < width; ++x) {
406 uint8 b = src_argb1555[0] & 0x1f;
407 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
408 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
409 b = (b << 3) | (b >> 2);
410 g = (g << 3) | (g >> 2);
411 r = (r << 3) | (r >> 2);
412 dst_y[0] = RGBToY(r, g, b);
418 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
420 for (x = 0; x < width; ++x) {
421 uint8 b = src_argb4444[0] & 0x0f;
422 uint8 g = src_argb4444[0] >> 4;
423 uint8 r = src_argb4444[1] & 0x0f;
427 dst_y[0] = RGBToY(r, g, b);
433 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
434 uint8* dst_u, uint8* dst_v, int width) {
435 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
437 for (x = 0; x < width - 1; x += 2) {
438 uint8 b0 = src_rgb565[0] & 0x1f;
439 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
440 uint8 r0 = src_rgb565[1] >> 3;
441 uint8 b1 = src_rgb565[2] & 0x1f;
442 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
443 uint8 r1 = src_rgb565[3] >> 3;
444 uint8 b2 = next_rgb565[0] & 0x1f;
445 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
446 uint8 r2 = next_rgb565[1] >> 3;
447 uint8 b3 = next_rgb565[2] & 0x1f;
448 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
449 uint8 r3 = next_rgb565[3] >> 3;
450 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
451 uint8 g = (g0 + g1 + g2 + g3);
452 uint8 r = (r0 + r1 + r2 + r3);
453 b = (b << 1) | (b >> 6); // 787 -> 888.
454 r = (r << 1) | (r >> 6);
455 dst_u[0] = RGBToU(r, g, b);
456 dst_v[0] = RGBToV(r, g, b);
463 uint8 b0 = src_rgb565[0] & 0x1f;
464 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
465 uint8 r0 = src_rgb565[1] >> 3;
466 uint8 b2 = next_rgb565[0] & 0x1f;
467 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
468 uint8 r2 = next_rgb565[1] >> 3;
469 uint8 b = (b0 + b2); // 565 * 2 = 676.
472 b = (b << 2) | (b >> 4); // 676 -> 888
473 g = (g << 1) | (g >> 6);
474 r = (r << 2) | (r >> 4);
475 dst_u[0] = RGBToU(r, g, b);
476 dst_v[0] = RGBToV(r, g, b);
480 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
481 uint8* dst_u, uint8* dst_v, int width) {
482 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
484 for (x = 0; x < width - 1; x += 2) {
485 uint8 b0 = src_argb1555[0] & 0x1f;
486 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
487 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
488 uint8 b1 = src_argb1555[2] & 0x1f;
489 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
490 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
491 uint8 b2 = next_argb1555[0] & 0x1f;
492 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
493 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
494 uint8 b3 = next_argb1555[2] & 0x1f;
495 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
496 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
497 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
498 uint8 g = (g0 + g1 + g2 + g3);
499 uint8 r = (r0 + r1 + r2 + r3);
500 b = (b << 1) | (b >> 6); // 777 -> 888.
501 g = (g << 1) | (g >> 6);
502 r = (r << 1) | (r >> 6);
503 dst_u[0] = RGBToU(r, g, b);
504 dst_v[0] = RGBToV(r, g, b);
511 uint8 b0 = src_argb1555[0] & 0x1f;
512 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
513 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
514 uint8 b2 = next_argb1555[0] & 0x1f;
515 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
516 uint8 r2 = next_argb1555[1] >> 3;
517 uint8 b = (b0 + b2); // 555 * 2 = 666.
520 b = (b << 2) | (b >> 4); // 666 -> 888.
521 g = (g << 2) | (g >> 4);
522 r = (r << 2) | (r >> 4);
523 dst_u[0] = RGBToU(r, g, b);
524 dst_v[0] = RGBToV(r, g, b);
528 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
529 uint8* dst_u, uint8* dst_v, int width) {
530 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
532 for (x = 0; x < width - 1; x += 2) {
533 uint8 b0 = src_argb4444[0] & 0x0f;
534 uint8 g0 = src_argb4444[0] >> 4;
535 uint8 r0 = src_argb4444[1] & 0x0f;
536 uint8 b1 = src_argb4444[2] & 0x0f;
537 uint8 g1 = src_argb4444[2] >> 4;
538 uint8 r1 = src_argb4444[3] & 0x0f;
539 uint8 b2 = next_argb4444[0] & 0x0f;
540 uint8 g2 = next_argb4444[0] >> 4;
541 uint8 r2 = next_argb4444[1] & 0x0f;
542 uint8 b3 = next_argb4444[2] & 0x0f;
543 uint8 g3 = next_argb4444[2] >> 4;
544 uint8 r3 = next_argb4444[3] & 0x0f;
545 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
546 uint8 g = (g0 + g1 + g2 + g3);
547 uint8 r = (r0 + r1 + r2 + r3);
548 b = (b << 2) | (b >> 4); // 666 -> 888.
549 g = (g << 2) | (g >> 4);
550 r = (r << 2) | (r >> 4);
551 dst_u[0] = RGBToU(r, g, b);
552 dst_v[0] = RGBToV(r, g, b);
559 uint8 b0 = src_argb4444[0] & 0x0f;
560 uint8 g0 = src_argb4444[0] >> 4;
561 uint8 r0 = src_argb4444[1] & 0x0f;
562 uint8 b2 = next_argb4444[0] & 0x0f;
563 uint8 g2 = next_argb4444[0] >> 4;
564 uint8 r2 = next_argb4444[1] & 0x0f;
565 uint8 b = (b0 + b2); // 444 * 2 = 555.
568 b = (b << 3) | (b >> 2); // 555 -> 888.
569 g = (g << 3) | (g >> 2);
570 r = (r << 3) | (r >> 2);
571 dst_u[0] = RGBToU(r, g, b);
572 dst_v[0] = RGBToV(r, g, b);
576 void ARGBToUV444Row_C(const uint8* src_argb,
577 uint8* dst_u, uint8* dst_v, int width) {
579 for (x = 0; x < width; ++x) {
580 uint8 ab = src_argb[0];
581 uint8 ag = src_argb[1];
582 uint8 ar = src_argb[2];
583 dst_u[0] = RGBToU(ar, ag, ab);
584 dst_v[0] = RGBToV(ar, ag, ab);
591 void ARGBToUV422Row_C(const uint8* src_argb,
592 uint8* dst_u, uint8* dst_v, int width) {
594 for (x = 0; x < width - 1; x += 2) {
595 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
596 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
597 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
598 dst_u[0] = RGBToU(ar, ag, ab);
599 dst_v[0] = RGBToV(ar, ag, ab);
605 uint8 ab = src_argb[0];
606 uint8 ag = src_argb[1];
607 uint8 ar = src_argb[2];
608 dst_u[0] = RGBToU(ar, ag, ab);
609 dst_v[0] = RGBToV(ar, ag, ab);
613 void ARGBToUV411Row_C(const uint8* src_argb,
614 uint8* dst_u, uint8* dst_v, int width) {
616 for (x = 0; x < width - 3; x += 4) {
617 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
618 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
619 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
620 dst_u[0] = RGBToU(ar, ag, ab);
621 dst_v[0] = RGBToV(ar, ag, ab);
626 if ((width & 3) == 3) {
627 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
628 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
629 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
630 dst_u[0] = RGBToU(ar, ag, ab);
631 dst_v[0] = RGBToV(ar, ag, ab);
632 } else if ((width & 3) == 2) {
633 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
634 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
635 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
636 dst_u[0] = RGBToU(ar, ag, ab);
637 dst_v[0] = RGBToV(ar, ag, ab);
638 } else if ((width & 3) == 1) {
639 uint8 ab = src_argb[0];
640 uint8 ag = src_argb[1];
641 uint8 ar = src_argb[2];
642 dst_u[0] = RGBToU(ar, ag, ab);
643 dst_v[0] = RGBToV(ar, ag, ab);
647 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
649 for (x = 0; x < width; ++x) {
650 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
651 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
652 dst_argb[3] = src_argb[3];
658 // Convert a row of image to Sepia tone.
659 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
661 for (x = 0; x < width; ++x) {
665 int sb = (b * 17 + g * 68 + r * 35) >> 7;
666 int sg = (b * 22 + g * 88 + r * 45) >> 7;
667 int sr = (b * 24 + g * 98 + r * 50) >> 7;
668 // b does not over flow. a is preserved from original.
670 dst_argb[1] = clamp255(sg);
671 dst_argb[2] = clamp255(sr);
676 // Apply color matrix to a row of image. Matrix is signed.
677 // TODO(fbarchard): Consider adding rounding (+32).
678 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
679 const int8* matrix_argb, int width) {
681 for (x = 0; x < width; ++x) {
686 int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
687 r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
688 int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
689 r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
690 int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
691 r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
692 int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
693 r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
694 dst_argb[0] = Clamp(sb);
695 dst_argb[1] = Clamp(sg);
696 dst_argb[2] = Clamp(sr);
697 dst_argb[3] = Clamp(sa);
703 // Apply color table to a row of image.
704 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
706 for (x = 0; x < width; ++x) {
711 dst_argb[0] = table_argb[b * 4 + 0];
712 dst_argb[1] = table_argb[g * 4 + 1];
713 dst_argb[2] = table_argb[r * 4 + 2];
714 dst_argb[3] = table_argb[a * 4 + 3];
719 // Apply color table to a row of image.
720 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
722 for (x = 0; x < width; ++x) {
726 dst_argb[0] = table_argb[b * 4 + 0];
727 dst_argb[1] = table_argb[g * 4 + 1];
728 dst_argb[2] = table_argb[r * 4 + 2];
733 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
734 int interval_offset, int width) {
736 for (x = 0; x < width; ++x) {
740 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
741 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
742 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
747 #define REPEAT8(v) (v) | ((v) << 8)
748 #define SHADE(f, v) v * f >> 24
750 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
752 const uint32 b_scale = REPEAT8(value & 0xff);
753 const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
754 const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
755 const uint32 a_scale = REPEAT8(value >> 24);
758 for (i = 0; i < width; ++i) {
759 const uint32 b = REPEAT8(src_argb[0]);
760 const uint32 g = REPEAT8(src_argb[1]);
761 const uint32 r = REPEAT8(src_argb[2]);
762 const uint32 a = REPEAT8(src_argb[3]);
763 dst_argb[0] = SHADE(b, b_scale);
764 dst_argb[1] = SHADE(g, g_scale);
765 dst_argb[2] = SHADE(r, r_scale);
766 dst_argb[3] = SHADE(a, a_scale);
774 #define REPEAT8(v) (v) | ((v) << 8)
775 #define SHADE(f, v) v * f >> 16
777 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
778 uint8* dst_argb, int width) {
780 for (i = 0; i < width; ++i) {
781 const uint32 b = REPEAT8(src_argb0[0]);
782 const uint32 g = REPEAT8(src_argb0[1]);
783 const uint32 r = REPEAT8(src_argb0[2]);
784 const uint32 a = REPEAT8(src_argb0[3]);
785 const uint32 b_scale = src_argb1[0];
786 const uint32 g_scale = src_argb1[1];
787 const uint32 r_scale = src_argb1[2];
788 const uint32 a_scale = src_argb1[3];
789 dst_argb[0] = SHADE(b, b_scale);
790 dst_argb[1] = SHADE(g, g_scale);
791 dst_argb[2] = SHADE(r, r_scale);
792 dst_argb[3] = SHADE(a, a_scale);
801 #define SHADE(f, v) clamp255(v + f)
803 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
804 uint8* dst_argb, int width) {
806 for (i = 0; i < width; ++i) {
807 const int b = src_argb0[0];
808 const int g = src_argb0[1];
809 const int r = src_argb0[2];
810 const int a = src_argb0[3];
811 const int b_add = src_argb1[0];
812 const int g_add = src_argb1[1];
813 const int r_add = src_argb1[2];
814 const int a_add = src_argb1[3];
815 dst_argb[0] = SHADE(b, b_add);
816 dst_argb[1] = SHADE(g, g_add);
817 dst_argb[2] = SHADE(r, r_add);
818 dst_argb[3] = SHADE(a, a_add);
826 #define SHADE(f, v) clamp0(f - v)
828 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
829 uint8* dst_argb, int width) {
831 for (i = 0; i < width; ++i) {
832 const int b = src_argb0[0];
833 const int g = src_argb0[1];
834 const int r = src_argb0[2];
835 const int a = src_argb0[3];
836 const int b_sub = src_argb1[0];
837 const int g_sub = src_argb1[1];
838 const int r_sub = src_argb1[2];
839 const int a_sub = src_argb1[3];
840 dst_argb[0] = SHADE(b, b_sub);
841 dst_argb[1] = SHADE(g, g_sub);
842 dst_argb[2] = SHADE(r, r_sub);
843 dst_argb[3] = SHADE(a, a_sub);
851 // Sobel functions which mimics SSSE3.
852 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
853 uint8* dst_sobelx, int width) {
855 for (i = 0; i < width; ++i) {
859 int a_sub = src_y0[i + 2];
860 int b_sub = src_y1[i + 2];
861 int c_sub = src_y2[i + 2];
862 int a_diff = a - a_sub;
863 int b_diff = b - b_sub;
864 int c_diff = c - c_sub;
865 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
866 dst_sobelx[i] = (uint8)(clamp255(sobel));
870 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
871 uint8* dst_sobely, int width) {
873 for (i = 0; i < width; ++i) {
874 int a = src_y0[i + 0];
875 int b = src_y0[i + 1];
876 int c = src_y0[i + 2];
877 int a_sub = src_y1[i + 0];
878 int b_sub = src_y1[i + 1];
879 int c_sub = src_y1[i + 2];
880 int a_diff = a - a_sub;
881 int b_diff = b - b_sub;
882 int c_diff = c - c_sub;
883 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
884 dst_sobely[i] = (uint8)(clamp255(sobel));
888 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
889 uint8* dst_argb, int width) {
891 for (i = 0; i < width; ++i) {
892 int r = src_sobelx[i];
893 int b = src_sobely[i];
894 int s = clamp255(r + b);
895 dst_argb[0] = (uint8)(s);
896 dst_argb[1] = (uint8)(s);
897 dst_argb[2] = (uint8)(s);
898 dst_argb[3] = (uint8)(255u);
903 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
904 uint8* dst_y, int width) {
906 for (i = 0; i < width; ++i) {
907 int r = src_sobelx[i];
908 int b = src_sobely[i];
909 int s = clamp255(r + b);
910 dst_y[i] = (uint8)(s);
914 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
915 uint8* dst_argb, int width) {
917 for (i = 0; i < width; ++i) {
918 int r = src_sobelx[i];
919 int b = src_sobely[i];
920 int g = clamp255(r + b);
921 dst_argb[0] = (uint8)(b);
922 dst_argb[1] = (uint8)(g);
923 dst_argb[2] = (uint8)(r);
924 dst_argb[3] = (uint8)(255u);
929 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
932 for (x = 0; x < width; ++x) {
934 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
941 // C reference code that mimics the YUV assembly.
943 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */
945 #define UB 127 /* min(63,(int8)(2.018 * 64)) */
946 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
950 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
951 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */
954 #define BB UB * 128 + VB * 128
955 #define BG UG * 128 + VG * 128
956 #define BR UR * 128 + VR * 128
958 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
959 uint8* b, uint8* g, uint8* r) {
960 int32 y1 = ((int32)(y) - 16) * YG;
961 *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
962 *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
963 *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
966 #if !defined(LIBYUV_DISABLE_NEON) && \
967 (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
969 // TODO(fbarchard): Remove subsampling from Neon.
970 void I444ToARGBRow_C(const uint8* src_y,
976 for (x = 0; x < width - 1; x += 2) {
977 uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
978 uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
979 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
981 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
986 rgb_buf += 8; // Advance 2 pixels.
989 YuvPixel(src_y[0], src_u[0], src_v[0],
990 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
994 void I444ToARGBRow_C(const uint8* src_y,
1000 for (x = 0; x < width; ++x) {
1001 YuvPixel(src_y[0], src_u[0], src_v[0],
1002 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1007 rgb_buf += 4; // Advance 1 pixel.
1011 // Also used for 420
1012 void I422ToARGBRow_C(const uint8* src_y,
1018 for (x = 0; x < width - 1; x += 2) {
1019 YuvPixel(src_y[0], src_u[0], src_v[0],
1020 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1022 YuvPixel(src_y[1], src_u[0], src_v[0],
1023 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1028 rgb_buf += 8; // Advance 2 pixels.
1031 YuvPixel(src_y[0], src_u[0], src_v[0],
1032 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1037 void I422ToRGB24Row_C(const uint8* src_y,
1043 for (x = 0; x < width - 1; x += 2) {
1044 YuvPixel(src_y[0], src_u[0], src_v[0],
1045 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1046 YuvPixel(src_y[1], src_u[0], src_v[0],
1047 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1051 rgb_buf += 6; // Advance 2 pixels.
1054 YuvPixel(src_y[0], src_u[0], src_v[0],
1055 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1059 void I422ToRAWRow_C(const uint8* src_y,
1065 for (x = 0; x < width - 1; x += 2) {
1066 YuvPixel(src_y[0], src_u[0], src_v[0],
1067 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1068 YuvPixel(src_y[1], src_u[0], src_v[0],
1069 rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1073 rgb_buf += 6; // Advance 2 pixels.
1076 YuvPixel(src_y[0], src_u[0], src_v[0],
1077 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1081 void I422ToARGB4444Row_C(const uint8* src_y,
1084 uint8* dst_argb4444,
1093 for (x = 0; x < width - 1; x += 2) {
1094 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1095 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1102 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1103 (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1107 dst_argb4444 += 4; // Advance 2 pixels.
1110 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1114 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1119 void I422ToARGB1555Row_C(const uint8* src_y,
1122 uint8* dst_argb1555,
1131 for (x = 0; x < width - 1; x += 2) {
1132 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1133 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1140 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1141 (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1145 dst_argb1555 += 4; // Advance 2 pixels.
1148 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1152 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1157 void I422ToRGB565Row_C(const uint8* src_y,
1169 for (x = 0; x < width - 1; x += 2) {
1170 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1171 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1178 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1179 (b1 << 16) | (g1 << 21) | (r1 << 27);
1183 dst_rgb565 += 4; // Advance 2 pixels.
1186 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1190 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1194 void I411ToARGBRow_C(const uint8* src_y,
1200 for (x = 0; x < width - 3; x += 4) {
1201 YuvPixel(src_y[0], src_u[0], src_v[0],
1202 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1204 YuvPixel(src_y[1], src_u[0], src_v[0],
1205 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1207 YuvPixel(src_y[2], src_u[0], src_v[0],
1208 rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
1210 YuvPixel(src_y[3], src_u[0], src_v[0],
1211 rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
1216 rgb_buf += 16; // Advance 4 pixels.
1219 YuvPixel(src_y[0], src_u[0], src_v[0],
1220 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1222 YuvPixel(src_y[1], src_u[0], src_v[0],
1223 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1226 rgb_buf += 8; // Advance 2 pixels.
1229 YuvPixel(src_y[0], src_u[0], src_v[0],
1230 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1235 void NV12ToARGBRow_C(const uint8* src_y,
1236 const uint8* usrc_v,
1240 for (x = 0; x < width - 1; x += 2) {
1241 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1242 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1244 YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
1245 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1249 rgb_buf += 8; // Advance 2 pixels.
1252 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1253 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1258 void NV21ToARGBRow_C(const uint8* src_y,
1259 const uint8* src_vu,
1263 for (x = 0; x < width - 1; x += 2) {
1264 YuvPixel(src_y[0], src_vu[1], src_vu[0],
1265 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1268 YuvPixel(src_y[1], src_vu[1], src_vu[0],
1269 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1274 rgb_buf += 8; // Advance 2 pixels.
1277 YuvPixel(src_y[0], src_vu[1], src_vu[0],
1278 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1283 void NV12ToRGB565Row_C(const uint8* src_y,
1284 const uint8* usrc_v,
1294 for (x = 0; x < width - 1; x += 2) {
1295 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1296 YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1303 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1304 (b1 << 16) | (g1 << 21) | (r1 << 27);
1307 dst_rgb565 += 4; // Advance 2 pixels.
1310 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1314 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1318 void NV21ToRGB565Row_C(const uint8* src_y,
1319 const uint8* vsrc_u,
1329 for (x = 0; x < width - 1; x += 2) {
1330 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1331 YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1338 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1339 (b1 << 16) | (g1 << 21) | (r1 << 27);
1342 dst_rgb565 += 4; // Advance 2 pixels.
1345 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1349 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1353 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1357 for (x = 0; x < width - 1; x += 2) {
1358 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1359 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1361 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
1362 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1365 rgb_buf += 8; // Advance 2 pixels.
1368 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1369 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1374 void UYVYToARGBRow_C(const uint8* src_uyvy,
1378 for (x = 0; x < width - 1; x += 2) {
1379 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1380 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1382 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
1383 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1386 rgb_buf += 8; // Advance 2 pixels.
1389 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1390 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1395 void I422ToBGRARow_C(const uint8* src_y,
1401 for (x = 0; x < width - 1; x += 2) {
1402 YuvPixel(src_y[0], src_u[0], src_v[0],
1403 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1405 YuvPixel(src_y[1], src_u[0], src_v[0],
1406 rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
1411 rgb_buf += 8; // Advance 2 pixels.
1414 YuvPixel(src_y[0], src_u[0], src_v[0],
1415 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1420 void I422ToABGRRow_C(const uint8* src_y,
1426 for (x = 0; x < width - 1; x += 2) {
1427 YuvPixel(src_y[0], src_u[0], src_v[0],
1428 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1430 YuvPixel(src_y[1], src_u[0], src_v[0],
1431 rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
1436 rgb_buf += 8; // Advance 2 pixels.
1439 YuvPixel(src_y[0], src_u[0], src_v[0],
1440 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1445 void I422ToRGBARow_C(const uint8* src_y,
1451 for (x = 0; x < width - 1; x += 2) {
1452 YuvPixel(src_y[0], src_u[0], src_v[0],
1453 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1455 YuvPixel(src_y[1], src_u[0], src_v[0],
1456 rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
1461 rgb_buf += 8; // Advance 2 pixels.
1464 YuvPixel(src_y[0], src_u[0], src_v[0],
1465 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1470 void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1472 for (x = 0; x < width - 1; x += 2) {
1473 YuvPixel(src_y[0], 128, 128,
1474 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1476 YuvPixel(src_y[1], 128, 128,
1477 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1480 rgb_buf += 8; // Advance 2 pixels.
1483 YuvPixel(src_y[0], 128, 128,
1484 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1489 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1492 for (x = 0; x < width - 1; x += 2) {
1494 dst[x + 1] = src[-1];
1498 dst[width - 1] = src[0];
1502 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1504 src_uv += (width - 1) << 1;
1505 for (x = 0; x < width - 1; x += 2) {
1506 dst_u[x] = src_uv[0];
1507 dst_u[x + 1] = src_uv[-2];
1508 dst_v[x] = src_uv[1];
1509 dst_v[x + 1] = src_uv[-2 + 1];
1513 dst_u[width - 1] = src_uv[0];
1514 dst_v[width - 1] = src_uv[1];
1518 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1520 const uint32* src32 = (const uint32*)(src);
1521 uint32* dst32 = (uint32*)(dst);
1523 for (x = 0; x < width - 1; x += 2) {
1524 dst32[x] = src32[0];
1525 dst32[x + 1] = src32[-1];
1529 dst32[width - 1] = src32[0];
1533 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1535 for (x = 0; x < width - 1; x += 2) {
1536 dst_u[x] = src_uv[0];
1537 dst_u[x + 1] = src_uv[2];
1538 dst_v[x] = src_uv[1];
1539 dst_v[x + 1] = src_uv[3];
1543 dst_u[width - 1] = src_uv[0];
1544 dst_v[width - 1] = src_uv[1];
1548 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
1551 for (x = 0; x < width - 1; x += 2) {
1552 dst_uv[0] = src_u[x];
1553 dst_uv[1] = src_v[x];
1554 dst_uv[2] = src_u[x + 1];
1555 dst_uv[3] = src_v[x + 1];
1559 dst_uv[0] = src_u[width - 1];
1560 dst_uv[1] = src_v[width - 1];
1564 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1565 memcpy(dst, src, count);
1568 void SetRow_C(uint8* dst, uint32 v8, int count) {
1570 // VC will generate rep stosb.
1572 for (x = 0; x < count; ++x) {
1576 memset(dst, v8, count);
1580 void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
1581 int dst_stride, int height) {
1583 for (y = 0; y < height; ++y) {
1584 uint32* d = (uint32*)(dst);
1586 for (x = 0; x < width; ++x) {
1593 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
1594 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1595 uint8* dst_u, uint8* dst_v, int width) {
1596 // Output a row of UV values, filtering 2 rows of YUY2.
1598 for (x = 0; x < width; x += 2) {
1599 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1600 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1607 // Copy row of YUY2 UV's (422) into U and V (422).
1608 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1609 uint8* dst_u, uint8* dst_v, int width) {
1610 // Output a row of UV values.
1612 for (x = 0; x < width; x += 2) {
1613 dst_u[0] = src_yuy2[1];
1614 dst_v[0] = src_yuy2[3];
1621 // Copy row of YUY2 Y's (422) into Y (420/422).
1622 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1623 // Output a row of Y values.
1625 for (x = 0; x < width - 1; x += 2) {
1626 dst_y[x] = src_yuy2[0];
1627 dst_y[x + 1] = src_yuy2[2];
1631 dst_y[width - 1] = src_yuy2[0];
1635 // Filter 2 rows of UYVY UV's (422) into U and V (420).
1636 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1637 uint8* dst_u, uint8* dst_v, int width) {
1638 // Output a row of UV values.
1640 for (x = 0; x < width; x += 2) {
1641 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1642 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1649 // Copy row of UYVY UV's (422) into U and V (422).
1650 void UYVYToUV422Row_C(const uint8* src_uyvy,
1651 uint8* dst_u, uint8* dst_v, int width) {
1652 // Output a row of UV values.
1654 for (x = 0; x < width; x += 2) {
1655 dst_u[0] = src_uyvy[0];
1656 dst_v[0] = src_uyvy[2];
1663 // Copy row of UYVY Y's (422) into Y (420/422).
1664 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1665 // Output a row of Y values.
1667 for (x = 0; x < width - 1; x += 2) {
1668 dst_y[x] = src_uyvy[1];
1669 dst_y[x + 1] = src_uyvy[3];
1673 dst_y[width - 1] = src_uyvy[1];
1677 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1679 // Blend src_argb0 over src_argb1 and store to dst_argb.
1680 // dst_argb may be src_argb0 or src_argb1.
1681 // This code mimics the SSSE3 version for better testability.
1682 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1683 uint8* dst_argb, int width) {
1685 for (x = 0; x < width - 1; x += 2) {
1686 uint32 fb = src_argb0[0];
1687 uint32 fg = src_argb0[1];
1688 uint32 fr = src_argb0[2];
1689 uint32 a = src_argb0[3];
1690 uint32 bb = src_argb1[0];
1691 uint32 bg = src_argb1[1];
1692 uint32 br = src_argb1[2];
1693 dst_argb[0] = BLEND(fb, bb, a);
1694 dst_argb[1] = BLEND(fg, bg, a);
1695 dst_argb[2] = BLEND(fr, br, a);
1698 fb = src_argb0[4 + 0];
1699 fg = src_argb0[4 + 1];
1700 fr = src_argb0[4 + 2];
1701 a = src_argb0[4 + 3];
1702 bb = src_argb1[4 + 0];
1703 bg = src_argb1[4 + 1];
1704 br = src_argb1[4 + 2];
1705 dst_argb[4 + 0] = BLEND(fb, bb, a);
1706 dst_argb[4 + 1] = BLEND(fg, bg, a);
1707 dst_argb[4 + 2] = BLEND(fr, br, a);
1708 dst_argb[4 + 3] = 255u;
1715 uint32 fb = src_argb0[0];
1716 uint32 fg = src_argb0[1];
1717 uint32 fr = src_argb0[2];
1718 uint32 a = src_argb0[3];
1719 uint32 bb = src_argb1[0];
1720 uint32 bg = src_argb1[1];
1721 uint32 br = src_argb1[2];
1722 dst_argb[0] = BLEND(fb, bb, a);
1723 dst_argb[1] = BLEND(fg, bg, a);
1724 dst_argb[2] = BLEND(fr, br, a);
1729 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1731 // Multiply source RGB by alpha and store to destination.
1732 // This code mimics the SSSE3 version for better testability.
1733 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1735 for (i = 0; i < width - 1; i += 2) {
1736 uint32 b = src_argb[0];
1737 uint32 g = src_argb[1];
1738 uint32 r = src_argb[2];
1739 uint32 a = src_argb[3];
1740 dst_argb[0] = ATTENUATE(b, a);
1741 dst_argb[1] = ATTENUATE(g, a);
1742 dst_argb[2] = ATTENUATE(r, a);
1748 dst_argb[4] = ATTENUATE(b, a);
1749 dst_argb[5] = ATTENUATE(g, a);
1750 dst_argb[6] = ATTENUATE(r, a);
1757 const uint32 b = src_argb[0];
1758 const uint32 g = src_argb[1];
1759 const uint32 r = src_argb[2];
1760 const uint32 a = src_argb[3];
1761 dst_argb[0] = ATTENUATE(b, a);
1762 dst_argb[1] = ATTENUATE(g, a);
1763 dst_argb[2] = ATTENUATE(r, a);
1769 // Divide source RGB by alpha and store to destination.
1770 // b = (b * 255 + (a / 2)) / a;
1771 // g = (g * 255 + (a / 2)) / a;
1772 // r = (r * 255 + (a / 2)) / a;
1773 // Reciprocal method is off by 1 on some values. ie 125
1774 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
1775 #define T(a) 0x01000000 + (0x10000 / a)
1776 const uint32 fixed_invtbl8[256] = {
1777 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1778 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
1779 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
1780 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
1781 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
1782 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
1783 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
1784 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
1785 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
1786 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
1787 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
1788 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
1789 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
1790 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
1791 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
1792 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
1793 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
1794 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
1795 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
1796 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
1797 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
1798 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
1799 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
1800 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
1801 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
1802 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
1803 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
1804 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
1805 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
1806 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
1807 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1808 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1811 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1813 for (i = 0; i < width; ++i) {
1814 uint32 b = src_argb[0];
1815 uint32 g = src_argb[1];
1816 uint32 r = src_argb[2];
1817 const uint32 a = src_argb[3];
1818 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
1822 // Clamping should not be necessary but is free in assembly.
1823 dst_argb[0] = clamp255(b);
1824 dst_argb[1] = clamp255(g);
1825 dst_argb[2] = clamp255(r);
1832 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1833 const int32* previous_cumsum, int width) {
1834 int32 row_sum[4] = {0, 0, 0, 0};
1836 for (x = 0; x < width; ++x) {
1837 row_sum[0] += row[x * 4 + 0];
1838 row_sum[1] += row[x * 4 + 1];
1839 row_sum[2] += row[x * 4 + 2];
1840 row_sum[3] += row[x * 4 + 3];
1841 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
1842 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
1843 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
1844 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
1848 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
1849 int w, int area, uint8* dst, int count) {
1850 float ooa = 1.0f / area;
1852 for (i = 0; i < count; ++i) {
1853 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1854 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1855 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1856 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1863 // Copy pixels from rotated source to destination row with a slope.
1865 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1866 uint8* dst_argb, const float* uv_dudv, int width) {
1868 // Render a row of pixels from source into a buffer.
1872 for (i = 0; i < width; ++i) {
1873 int x = (int)(uv[0]);
1874 int y = (int)(uv[1]);
1875 *(uint32*)(dst_argb) =
1876 *(const uint32*)(src_argb + y * src_argb_stride +
1879 uv[0] += uv_dudv[2];
1880 uv[1] += uv_dudv[3];
1884 // Blend 2 rows into 1 for conversions such as I422ToI420.
1885 void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1886 uint8* dst_uv, int pix) {
1888 for (x = 0; x < pix; ++x) {
1889 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1893 // C version 2x2 -> 2x1.
1894 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1895 ptrdiff_t src_stride,
1896 int width, int source_y_fraction) {
1897 int y1_fraction = source_y_fraction;
1898 int y0_fraction = 256 - y1_fraction;
1899 const uint8* src_ptr1 = src_ptr + src_stride;
1901 if (source_y_fraction == 0) {
1902 memcpy(dst_ptr, src_ptr, width);
1905 if (source_y_fraction == 128) {
1906 HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1909 for (x = 0; x < width - 1; x += 2) {
1910 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1911 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1917 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1921 // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
1922 void ARGBToBayerRow_C(const uint8* src_argb,
1923 uint8* dst_bayer, uint32 selector, int pix) {
1924 int index0 = selector & 0xff;
1925 int index1 = (selector >> 8) & 0xff;
1926 // Copy a row of Bayer.
1928 for (x = 0; x < pix - 1; x += 2) {
1929 dst_bayer[0] = src_argb[index0];
1930 dst_bayer[1] = src_argb[index1];
1935 dst_bayer[0] = src_argb[index0];
1939 // Select G channel from ARGB. e.g. GGGGGGGG
1940 void ARGBToBayerGGRow_C(const uint8* src_argb,
1941 uint8* dst_bayer, uint32 selector, int pix) {
1944 for (x = 0; x < pix - 1; x += 2) {
1945 dst_bayer[0] = src_argb[1];
1946 dst_bayer[1] = src_argb[5];
1951 dst_bayer[0] = src_argb[1];
1955 // Use first 4 shuffler values to reorder ARGB channels.
1956 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
1957 const uint8* shuffler, int pix) {
1958 int index0 = shuffler[0];
1959 int index1 = shuffler[1];
1960 int index2 = shuffler[2];
1961 int index3 = shuffler[3];
1962 // Shuffle a row of ARGB.
1964 for (x = 0; x < pix; ++x) {
1965 // To support in-place conversion.
1966 uint8 b = src_argb[index0];
1967 uint8 g = src_argb[index1];
1968 uint8 r = src_argb[index2];
1969 uint8 a = src_argb[index3];
1979 void I422ToYUY2Row_C(const uint8* src_y,
1982 uint8* dst_frame, int width) {
1984 for (x = 0; x < width - 1; x += 2) {
1985 dst_frame[0] = src_y[0];
1986 dst_frame[1] = src_u[0];
1987 dst_frame[2] = src_y[1];
1988 dst_frame[3] = src_v[0];
1995 dst_frame[0] = src_y[0];
1996 dst_frame[1] = src_u[0];
1997 dst_frame[2] = src_y[0]; // duplicate last y
1998 dst_frame[3] = src_v[0];
2002 void I422ToUYVYRow_C(const uint8* src_y,
2005 uint8* dst_frame, int width) {
2007 for (x = 0; x < width - 1; x += 2) {
2008 dst_frame[0] = src_u[0];
2009 dst_frame[1] = src_y[0];
2010 dst_frame[2] = src_v[0];
2011 dst_frame[3] = src_y[1];
2018 dst_frame[0] = src_u[0];
2019 dst_frame[1] = src_y[0];
2020 dst_frame[2] = src_v[0];
2021 dst_frame[3] = src_y[0]; // duplicate last y
2025 #if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2026 // row_win.cc has asm version, but GCC uses 2 step wrapper.
2027 #if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2028 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2033 // Allocate a row of ARGB.
2034 align_buffer_64(row, width * 4);
2035 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2036 ARGBToRGB565Row_SSE2(row, rgb_buf, width);
2037 free_aligned_buffer_64(row);
2039 #endif // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2041 #if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2042 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2047 // Allocate a row of ARGB.
2048 align_buffer_64(row, width * 4);
2049 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2050 ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
2051 free_aligned_buffer_64(row);
2054 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2059 // Allocate a row of ARGB.
2060 align_buffer_64(row, width * 4);
2061 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2062 ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
2063 free_aligned_buffer_64(row);
2066 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2067 const uint8* src_uv,
2070 // Allocate a row of ARGB.
2071 align_buffer_64(row, width * 4);
2072 NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
2073 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2074 free_aligned_buffer_64(row);
2077 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
2078 const uint8* src_vu,
2081 // Allocate a row of ARGB.
2082 align_buffer_64(row, width * 4);
2083 NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
2084 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2085 free_aligned_buffer_64(row);
2088 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2091 // Allocate a rows of yuv.
2092 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2093 uint8* row_u = row_y + ((width + 63) & ~63);
2094 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2095 YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
2096 YUY2ToYRow_SSE2(src_yuy2, row_y, width);
2097 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2098 free_aligned_buffer_64(row_y);
2101 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
2104 // Allocate a rows of yuv.
2105 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2106 uint8* row_u = row_y + ((width + 63) & ~63);
2107 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2108 YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
2109 YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
2110 I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2111 free_aligned_buffer_64(row_y);
2114 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2117 // Allocate a rows of yuv.
2118 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2119 uint8* row_u = row_y + ((width + 63) & ~63);
2120 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2121 UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
2122 UYVYToYRow_SSE2(src_uyvy, row_y, width);
2123 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2124 free_aligned_buffer_64(row_y);
2127 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
2130 // Allocate a rows of yuv.
2131 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2132 uint8* row_u = row_y + ((width + 63) & ~63);
2133 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2134 UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
2135 UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
2136 I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2137 free_aligned_buffer_64(row_y);
2140 #endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2141 #endif // !defined(LIBYUV_DISABLE_X86)
2143 void ARGBPolynomialRow_C(const uint8* src_argb,
2144 uint8* dst_argb, const float* poly,
2147 for (i = 0; i < width; ++i) {
2148 float b = (float)(src_argb[0]);
2149 float g = (float)(src_argb[1]);
2150 float r = (float)(src_argb[2]);
2151 float a = (float)(src_argb[3]);
2156 float db = poly[0] + poly[4] * b;
2157 float dg = poly[1] + poly[5] * g;
2158 float dr = poly[2] + poly[6] * r;
2159 float da = poly[3] + poly[7] * a;
2166 dr += poly[10] * r2;
2167 da += poly[11] * a2;
2168 db += poly[12] * b3;
2169 dg += poly[13] * g3;
2170 dr += poly[14] * r3;
2171 da += poly[15] * a3;
2173 dst_argb[0] = Clamp((int32)(db));
2174 dst_argb[1] = Clamp((int32)(dg));
2175 dst_argb[2] = Clamp((int32)(dr));
2176 dst_argb[3] = Clamp((int32)(da));
2182 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2183 const uint8* luma, uint32 lumacoeff) {
2184 uint32 bc = lumacoeff & 0xff;
2185 uint32 gc = (lumacoeff >> 8) & 0xff;
2186 uint32 rc = (lumacoeff >> 16) & 0xff;
2189 for (i = 0; i < width - 1; i += 2) {
2190 // Luminance in rows, color values in columns.
2191 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2192 src_argb[2] * rc) & 0x7F00u) + luma;
2194 dst_argb[0] = luma0[src_argb[0]];
2195 dst_argb[1] = luma0[src_argb[1]];
2196 dst_argb[2] = luma0[src_argb[2]];
2197 dst_argb[3] = src_argb[3];
2198 luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
2199 src_argb[6] * rc) & 0x7F00u) + luma;
2200 dst_argb[4] = luma1[src_argb[4]];
2201 dst_argb[5] = luma1[src_argb[5]];
2202 dst_argb[6] = luma1[src_argb[6]];
2203 dst_argb[7] = src_argb[7];
2208 // Luminance in rows, color values in columns.
2209 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2210 src_argb[2] * rc) & 0x7F00u) + luma;
2211 dst_argb[0] = luma0[src_argb[0]];
2212 dst_argb[1] = luma0[src_argb[1]];
2213 dst_argb[2] = luma0[src_argb[2]];
2214 dst_argb[3] = src_argb[3];
2218 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2220 for (i = 0; i < width - 1; i += 2) {
2231 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2233 for (i = 0; i < width - 1; i += 2) {
2246 } // namespace libyuv