2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "libyuv/row.h"
13 #include <string.h> // For memcpy and memset.
15 #include "libyuv/basic_types.h"
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
24 #define USE_BRANCHLESS 1
26 static __inline int32 clamp0(int32 v) {
27 return ((-(v) >> 31) & (v));
30 static __inline int32 clamp255(int32 v) {
31 return (((255 - (v)) >> 31) | (v)) & 255;
34 static __inline uint32 Clamp(int32 val) {
36 return (uint32)(clamp255(v));
39 static __inline uint32 Abs(int32 v) {
43 #else // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45 return (v < 0) ? 0 : v;
48 static __inline int32 clamp255(int32 v) {
49 return (v > 255) ? 255 : v;
52 static __inline uint32 Clamp(int32 val) {
54 return (uint32)(clamp255(v));
57 static __inline uint32 Abs(int32 v) {
58 return (v < 0) ? -v : v;
60 #endif // USE_BRANCHLESS
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
65 static inline void WRITEWORD(uint8* p, uint32 v) {
66 p[0] = (uint8)(v & 255);
67 p[1] = (uint8)((v >> 8) & 255);
68 p[2] = (uint8)((v >> 16) & 255);
69 p[3] = (uint8)((v >> 24) & 255);
73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
75 for (x = 0; x < width; ++x) {
76 uint8 b = src_rgb24[0];
77 uint8 g = src_rgb24[1];
78 uint8 r = src_rgb24[2];
88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
90 for (x = 0; x < width; ++x) {
103 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
105 for (x = 0; x < width; ++x) {
106 uint8 b = src_rgb565[0] & 0x1f;
107 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
108 uint8 r = src_rgb565[1] >> 3;
109 dst_argb[0] = (b << 3) | (b >> 2);
110 dst_argb[1] = (g << 2) | (g >> 4);
111 dst_argb[2] = (r << 3) | (r >> 2);
118 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
121 for (x = 0; x < width; ++x) {
122 uint8 b = src_argb1555[0] & 0x1f;
123 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
124 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
125 uint8 a = src_argb1555[1] >> 7;
126 dst_argb[0] = (b << 3) | (b >> 2);
127 dst_argb[1] = (g << 3) | (g >> 2);
128 dst_argb[2] = (r << 3) | (r >> 2);
135 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
138 for (x = 0; x < width; ++x) {
139 uint8 b = src_argb4444[0] & 0x0f;
140 uint8 g = src_argb4444[0] >> 4;
141 uint8 r = src_argb4444[1] & 0x0f;
142 uint8 a = src_argb4444[1] >> 4;
143 dst_argb[0] = (b << 4) | b;
144 dst_argb[1] = (g << 4) | g;
145 dst_argb[2] = (r << 4) | r;
146 dst_argb[3] = (a << 4) | a;
152 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
154 for (x = 0; x < width; ++x) {
155 uint8 b = src_argb[0];
156 uint8 g = src_argb[1];
157 uint8 r = src_argb[2];
166 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
168 for (x = 0; x < width; ++x) {
169 uint8 b = src_argb[0];
170 uint8 g = src_argb[1];
171 uint8 r = src_argb[2];
180 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
182 for (x = 0; x < width - 1; x += 2) {
183 uint8 b0 = src_argb[0] >> 3;
184 uint8 g0 = src_argb[1] >> 2;
185 uint8 r0 = src_argb[2] >> 3;
186 uint8 b1 = src_argb[4] >> 3;
187 uint8 g1 = src_argb[5] >> 2;
188 uint8 r1 = src_argb[6] >> 3;
189 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190 (b1 << 16) | (g1 << 21) | (r1 << 27));
195 uint8 b0 = src_argb[0] >> 3;
196 uint8 g0 = src_argb[1] >> 2;
197 uint8 r0 = src_argb[2] >> 3;
198 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
202 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
204 for (x = 0; x < width - 1; x += 2) {
205 uint8 b0 = src_argb[0] >> 3;
206 uint8 g0 = src_argb[1] >> 3;
207 uint8 r0 = src_argb[2] >> 3;
208 uint8 a0 = src_argb[3] >> 7;
209 uint8 b1 = src_argb[4] >> 3;
210 uint8 g1 = src_argb[5] >> 3;
211 uint8 r1 = src_argb[6] >> 3;
212 uint8 a1 = src_argb[7] >> 7;
213 *(uint32*)(dst_rgb) =
214 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
215 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
220 uint8 b0 = src_argb[0] >> 3;
221 uint8 g0 = src_argb[1] >> 3;
222 uint8 r0 = src_argb[2] >> 3;
223 uint8 a0 = src_argb[3] >> 7;
224 *(uint16*)(dst_rgb) =
225 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
229 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
231 for (x = 0; x < width - 1; x += 2) {
232 uint8 b0 = src_argb[0] >> 4;
233 uint8 g0 = src_argb[1] >> 4;
234 uint8 r0 = src_argb[2] >> 4;
235 uint8 a0 = src_argb[3] >> 4;
236 uint8 b1 = src_argb[4] >> 4;
237 uint8 g1 = src_argb[5] >> 4;
238 uint8 r1 = src_argb[6] >> 4;
239 uint8 a1 = src_argb[7] >> 4;
240 *(uint32*)(dst_rgb) =
241 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
242 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
247 uint8 b0 = src_argb[0] >> 4;
248 uint8 g0 = src_argb[1] >> 4;
249 uint8 r0 = src_argb[2] >> 4;
250 uint8 a0 = src_argb[3] >> 4;
251 *(uint16*)(dst_rgb) =
252 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
256 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
260 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
263 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
267 #define MAKEROWY(NAME, R, G, B, BPP) \
268 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
270 for (x = 0; x < width; ++x) { \
271 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
277 uint8* dst_u, uint8* dst_v, int width) { \
278 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
280 for (x = 0; x < width - 1; x += 2) { \
281 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \
282 src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \
283 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \
284 src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \
285 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \
286 src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \
287 dst_u[0] = RGBToU(ar, ag, ab); \
288 dst_v[0] = RGBToV(ar, ag, ab); \
289 src_rgb0 += BPP * 2; \
290 src_rgb1 += BPP * 2; \
295 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
296 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
297 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
298 dst_u[0] = RGBToU(ar, ag, ab); \
299 dst_v[0] = RGBToV(ar, ag, ab); \
303 MAKEROWY(ARGB, 2, 1, 0, 4)
304 MAKEROWY(BGRA, 1, 2, 3, 4)
305 MAKEROWY(ABGR, 0, 1, 2, 4)
306 MAKEROWY(RGBA, 3, 2, 1, 4)
307 MAKEROWY(RGB24, 2, 1, 0, 3)
308 MAKEROWY(RAW, 0, 1, 2, 3)
311 // JPeg uses a variation on BT.601-1 full range
312 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
313 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
314 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
315 // BT.601 Mpeg range uses:
316 // b 0.1016 * 255 = 25.908 = 25
317 // g 0.5078 * 255 = 129.489 = 129
318 // r 0.2578 * 255 = 65.739 = 66
319 // JPeg 8 bit Y (not used):
320 // b 0.11400 * 256 = 29.184 = 29
321 // g 0.58700 * 256 = 150.272 = 150
322 // r 0.29900 * 256 = 76.544 = 77
324 // b 0.11400 * 128 = 14.592 = 15
325 // g 0.58700 * 128 = 75.136 = 75
326 // r 0.29900 * 128 = 38.272 = 38
328 // b 0.50000 * 255 = 127.5 = 127
329 // g -0.33126 * 255 = -84.4713 = -84
330 // r -0.16874 * 255 = -43.0287 = -43
332 // b -0.08131 * 255 = -20.73405 = -20
333 // g -0.41869 * 255 = -106.76595 = -107
334 // r 0.50000 * 255 = 127.5 = 127
336 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337 return (38 * r + 75 * g + 15 * b + 64) >> 7;
340 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
341 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
343 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
344 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
347 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
349 #define MAKEROWYJ(NAME, R, G, B, BPP) \
350 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
352 for (x = 0; x < width; ++x) { \
353 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
358 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
359 uint8* dst_u, uint8* dst_v, int width) { \
360 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
362 for (x = 0; x < width - 1; x += 2) { \
363 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
364 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
365 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
366 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
367 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
368 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
369 dst_u[0] = RGBToUJ(ar, ag, ab); \
370 dst_v[0] = RGBToVJ(ar, ag, ab); \
371 src_rgb0 += BPP * 2; \
372 src_rgb1 += BPP * 2; \
377 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
378 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
379 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
380 dst_u[0] = RGBToUJ(ar, ag, ab); \
381 dst_v[0] = RGBToVJ(ar, ag, ab); \
385 MAKEROWYJ(ARGB, 2, 1, 0, 4)
388 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
390 for (x = 0; x < width; ++x) {
391 uint8 b = src_rgb565[0] & 0x1f;
392 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
393 uint8 r = src_rgb565[1] >> 3;
394 b = (b << 3) | (b >> 2);
395 g = (g << 2) | (g >> 4);
396 r = (r << 3) | (r >> 2);
397 dst_y[0] = RGBToY(r, g, b);
403 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
405 for (x = 0; x < width; ++x) {
406 uint8 b = src_argb1555[0] & 0x1f;
407 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
408 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
409 b = (b << 3) | (b >> 2);
410 g = (g << 3) | (g >> 2);
411 r = (r << 3) | (r >> 2);
412 dst_y[0] = RGBToY(r, g, b);
418 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
420 for (x = 0; x < width; ++x) {
421 uint8 b = src_argb4444[0] & 0x0f;
422 uint8 g = src_argb4444[0] >> 4;
423 uint8 r = src_argb4444[1] & 0x0f;
427 dst_y[0] = RGBToY(r, g, b);
433 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
434 uint8* dst_u, uint8* dst_v, int width) {
435 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
437 for (x = 0; x < width - 1; x += 2) {
438 uint8 b0 = src_rgb565[0] & 0x1f;
439 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
440 uint8 r0 = src_rgb565[1] >> 3;
441 uint8 b1 = src_rgb565[2] & 0x1f;
442 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
443 uint8 r1 = src_rgb565[3] >> 3;
444 uint8 b2 = next_rgb565[0] & 0x1f;
445 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
446 uint8 r2 = next_rgb565[1] >> 3;
447 uint8 b3 = next_rgb565[2] & 0x1f;
448 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
449 uint8 r3 = next_rgb565[3] >> 3;
450 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
451 uint8 g = (g0 + g1 + g2 + g3);
452 uint8 r = (r0 + r1 + r2 + r3);
453 b = (b << 1) | (b >> 6); // 787 -> 888.
454 r = (r << 1) | (r >> 6);
455 dst_u[0] = RGBToU(r, g, b);
456 dst_v[0] = RGBToV(r, g, b);
463 uint8 b0 = src_rgb565[0] & 0x1f;
464 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
465 uint8 r0 = src_rgb565[1] >> 3;
466 uint8 b2 = next_rgb565[0] & 0x1f;
467 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
468 uint8 r2 = next_rgb565[1] >> 3;
469 uint8 b = (b0 + b2); // 565 * 2 = 676.
472 b = (b << 2) | (b >> 4); // 676 -> 888
473 g = (g << 1) | (g >> 6);
474 r = (r << 2) | (r >> 4);
475 dst_u[0] = RGBToU(r, g, b);
476 dst_v[0] = RGBToV(r, g, b);
480 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
481 uint8* dst_u, uint8* dst_v, int width) {
482 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
484 for (x = 0; x < width - 1; x += 2) {
485 uint8 b0 = src_argb1555[0] & 0x1f;
486 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
487 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
488 uint8 b1 = src_argb1555[2] & 0x1f;
489 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
490 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
491 uint8 b2 = next_argb1555[0] & 0x1f;
492 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
493 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
494 uint8 b3 = next_argb1555[2] & 0x1f;
495 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
496 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
497 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
498 uint8 g = (g0 + g1 + g2 + g3);
499 uint8 r = (r0 + r1 + r2 + r3);
500 b = (b << 1) | (b >> 6); // 777 -> 888.
501 g = (g << 1) | (g >> 6);
502 r = (r << 1) | (r >> 6);
503 dst_u[0] = RGBToU(r, g, b);
504 dst_v[0] = RGBToV(r, g, b);
511 uint8 b0 = src_argb1555[0] & 0x1f;
512 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
513 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
514 uint8 b2 = next_argb1555[0] & 0x1f;
515 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
516 uint8 r2 = next_argb1555[1] >> 3;
517 uint8 b = (b0 + b2); // 555 * 2 = 666.
520 b = (b << 2) | (b >> 4); // 666 -> 888.
521 g = (g << 2) | (g >> 4);
522 r = (r << 2) | (r >> 4);
523 dst_u[0] = RGBToU(r, g, b);
524 dst_v[0] = RGBToV(r, g, b);
528 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
529 uint8* dst_u, uint8* dst_v, int width) {
530 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
532 for (x = 0; x < width - 1; x += 2) {
533 uint8 b0 = src_argb4444[0] & 0x0f;
534 uint8 g0 = src_argb4444[0] >> 4;
535 uint8 r0 = src_argb4444[1] & 0x0f;
536 uint8 b1 = src_argb4444[2] & 0x0f;
537 uint8 g1 = src_argb4444[2] >> 4;
538 uint8 r1 = src_argb4444[3] & 0x0f;
539 uint8 b2 = next_argb4444[0] & 0x0f;
540 uint8 g2 = next_argb4444[0] >> 4;
541 uint8 r2 = next_argb4444[1] & 0x0f;
542 uint8 b3 = next_argb4444[2] & 0x0f;
543 uint8 g3 = next_argb4444[2] >> 4;
544 uint8 r3 = next_argb4444[3] & 0x0f;
545 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
546 uint8 g = (g0 + g1 + g2 + g3);
547 uint8 r = (r0 + r1 + r2 + r3);
548 b = (b << 2) | (b >> 4); // 666 -> 888.
549 g = (g << 2) | (g >> 4);
550 r = (r << 2) | (r >> 4);
551 dst_u[0] = RGBToU(r, g, b);
552 dst_v[0] = RGBToV(r, g, b);
559 uint8 b0 = src_argb4444[0] & 0x0f;
560 uint8 g0 = src_argb4444[0] >> 4;
561 uint8 r0 = src_argb4444[1] & 0x0f;
562 uint8 b2 = next_argb4444[0] & 0x0f;
563 uint8 g2 = next_argb4444[0] >> 4;
564 uint8 r2 = next_argb4444[1] & 0x0f;
565 uint8 b = (b0 + b2); // 444 * 2 = 555.
568 b = (b << 3) | (b >> 2); // 555 -> 888.
569 g = (g << 3) | (g >> 2);
570 r = (r << 3) | (r >> 2);
571 dst_u[0] = RGBToU(r, g, b);
572 dst_v[0] = RGBToV(r, g, b);
576 void ARGBToUV444Row_C(const uint8* src_argb,
577 uint8* dst_u, uint8* dst_v, int width) {
579 for (x = 0; x < width; ++x) {
580 uint8 ab = src_argb[0];
581 uint8 ag = src_argb[1];
582 uint8 ar = src_argb[2];
583 dst_u[0] = RGBToU(ar, ag, ab);
584 dst_v[0] = RGBToV(ar, ag, ab);
591 void ARGBToUV422Row_C(const uint8* src_argb,
592 uint8* dst_u, uint8* dst_v, int width) {
594 for (x = 0; x < width - 1; x += 2) {
595 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
596 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
597 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
598 dst_u[0] = RGBToU(ar, ag, ab);
599 dst_v[0] = RGBToV(ar, ag, ab);
605 uint8 ab = src_argb[0];
606 uint8 ag = src_argb[1];
607 uint8 ar = src_argb[2];
608 dst_u[0] = RGBToU(ar, ag, ab);
609 dst_v[0] = RGBToV(ar, ag, ab);
613 void ARGBToUV411Row_C(const uint8* src_argb,
614 uint8* dst_u, uint8* dst_v, int width) {
616 for (x = 0; x < width - 3; x += 4) {
617 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
618 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
619 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
620 dst_u[0] = RGBToU(ar, ag, ab);
621 dst_v[0] = RGBToV(ar, ag, ab);
626 if ((width & 3) == 3) {
627 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
628 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
629 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
630 dst_u[0] = RGBToU(ar, ag, ab);
631 dst_v[0] = RGBToV(ar, ag, ab);
632 } else if ((width & 3) == 2) {
633 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
634 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
635 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
636 dst_u[0] = RGBToU(ar, ag, ab);
637 dst_v[0] = RGBToV(ar, ag, ab);
638 } else if ((width & 3) == 1) {
639 uint8 ab = src_argb[0];
640 uint8 ag = src_argb[1];
641 uint8 ar = src_argb[2];
642 dst_u[0] = RGBToU(ar, ag, ab);
643 dst_v[0] = RGBToV(ar, ag, ab);
647 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
649 for (x = 0; x < width; ++x) {
650 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
651 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
652 dst_argb[3] = src_argb[3];
658 // Convert a row of image to Sepia tone.
659 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
661 for (x = 0; x < width; ++x) {
665 int sb = (b * 17 + g * 68 + r * 35) >> 7;
666 int sg = (b * 22 + g * 88 + r * 45) >> 7;
667 int sr = (b * 24 + g * 98 + r * 50) >> 7;
668 // b does not over flow. a is preserved from original.
670 dst_argb[1] = clamp255(sg);
671 dst_argb[2] = clamp255(sr);
676 // Apply color matrix to a row of image. Matrix is signed.
677 // TODO(fbarchard): Consider adding rounding (+32).
678 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
679 const int8* matrix_argb, int width) {
681 for (x = 0; x < width; ++x) {
686 int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
687 r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
688 int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
689 r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
690 int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
691 r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
692 int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
693 r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
694 dst_argb[0] = Clamp(sb);
695 dst_argb[1] = Clamp(sg);
696 dst_argb[2] = Clamp(sr);
697 dst_argb[3] = Clamp(sa);
703 // Apply color table to a row of image.
704 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
706 for (x = 0; x < width; ++x) {
711 dst_argb[0] = table_argb[b * 4 + 0];
712 dst_argb[1] = table_argb[g * 4 + 1];
713 dst_argb[2] = table_argb[r * 4 + 2];
714 dst_argb[3] = table_argb[a * 4 + 3];
719 // Apply color table to a row of image.
720 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
722 for (x = 0; x < width; ++x) {
726 dst_argb[0] = table_argb[b * 4 + 0];
727 dst_argb[1] = table_argb[g * 4 + 1];
728 dst_argb[2] = table_argb[r * 4 + 2];
733 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
734 int interval_offset, int width) {
736 for (x = 0; x < width; ++x) {
740 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
741 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
742 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
747 #define REPEAT8(v) (v) | ((v) << 8)
748 #define SHADE(f, v) v * f >> 24
750 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
752 const uint32 b_scale = REPEAT8(value & 0xff);
753 const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
754 const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
755 const uint32 a_scale = REPEAT8(value >> 24);
758 for (i = 0; i < width; ++i) {
759 const uint32 b = REPEAT8(src_argb[0]);
760 const uint32 g = REPEAT8(src_argb[1]);
761 const uint32 r = REPEAT8(src_argb[2]);
762 const uint32 a = REPEAT8(src_argb[3]);
763 dst_argb[0] = SHADE(b, b_scale);
764 dst_argb[1] = SHADE(g, g_scale);
765 dst_argb[2] = SHADE(r, r_scale);
766 dst_argb[3] = SHADE(a, a_scale);
774 #define REPEAT8(v) (v) | ((v) << 8)
775 #define SHADE(f, v) v * f >> 16
777 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
778 uint8* dst_argb, int width) {
780 for (i = 0; i < width; ++i) {
781 const uint32 b = REPEAT8(src_argb0[0]);
782 const uint32 g = REPEAT8(src_argb0[1]);
783 const uint32 r = REPEAT8(src_argb0[2]);
784 const uint32 a = REPEAT8(src_argb0[3]);
785 const uint32 b_scale = src_argb1[0];
786 const uint32 g_scale = src_argb1[1];
787 const uint32 r_scale = src_argb1[2];
788 const uint32 a_scale = src_argb1[3];
789 dst_argb[0] = SHADE(b, b_scale);
790 dst_argb[1] = SHADE(g, g_scale);
791 dst_argb[2] = SHADE(r, r_scale);
792 dst_argb[3] = SHADE(a, a_scale);
801 #define SHADE(f, v) clamp255(v + f)
803 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
804 uint8* dst_argb, int width) {
806 for (i = 0; i < width; ++i) {
807 const int b = src_argb0[0];
808 const int g = src_argb0[1];
809 const int r = src_argb0[2];
810 const int a = src_argb0[3];
811 const int b_add = src_argb1[0];
812 const int g_add = src_argb1[1];
813 const int r_add = src_argb1[2];
814 const int a_add = src_argb1[3];
815 dst_argb[0] = SHADE(b, b_add);
816 dst_argb[1] = SHADE(g, g_add);
817 dst_argb[2] = SHADE(r, r_add);
818 dst_argb[3] = SHADE(a, a_add);
826 #define SHADE(f, v) clamp0(f - v)
828 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
829 uint8* dst_argb, int width) {
831 for (i = 0; i < width; ++i) {
832 const int b = src_argb0[0];
833 const int g = src_argb0[1];
834 const int r = src_argb0[2];
835 const int a = src_argb0[3];
836 const int b_sub = src_argb1[0];
837 const int g_sub = src_argb1[1];
838 const int r_sub = src_argb1[2];
839 const int a_sub = src_argb1[3];
840 dst_argb[0] = SHADE(b, b_sub);
841 dst_argb[1] = SHADE(g, g_sub);
842 dst_argb[2] = SHADE(r, r_sub);
843 dst_argb[3] = SHADE(a, a_sub);
851 // Sobel functions which mimics SSSE3.
852 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
853 uint8* dst_sobelx, int width) {
855 for (i = 0; i < width; ++i) {
859 int a_sub = src_y0[i + 2];
860 int b_sub = src_y1[i + 2];
861 int c_sub = src_y2[i + 2];
862 int a_diff = a - a_sub;
863 int b_diff = b - b_sub;
864 int c_diff = c - c_sub;
865 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
866 dst_sobelx[i] = (uint8)(clamp255(sobel));
870 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
871 uint8* dst_sobely, int width) {
873 for (i = 0; i < width; ++i) {
874 int a = src_y0[i + 0];
875 int b = src_y0[i + 1];
876 int c = src_y0[i + 2];
877 int a_sub = src_y1[i + 0];
878 int b_sub = src_y1[i + 1];
879 int c_sub = src_y1[i + 2];
880 int a_diff = a - a_sub;
881 int b_diff = b - b_sub;
882 int c_diff = c - c_sub;
883 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
884 dst_sobely[i] = (uint8)(clamp255(sobel));
888 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
889 uint8* dst_argb, int width) {
891 for (i = 0; i < width; ++i) {
892 int r = src_sobelx[i];
893 int b = src_sobely[i];
894 int s = clamp255(r + b);
895 dst_argb[0] = (uint8)(s);
896 dst_argb[1] = (uint8)(s);
897 dst_argb[2] = (uint8)(s);
898 dst_argb[3] = (uint8)(255u);
903 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
904 uint8* dst_y, int width) {
906 for (i = 0; i < width; ++i) {
907 int r = src_sobelx[i];
908 int b = src_sobely[i];
909 int s = clamp255(r + b);
910 dst_y[i] = (uint8)(s);
914 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
915 uint8* dst_argb, int width) {
917 for (i = 0; i < width; ++i) {
918 int r = src_sobelx[i];
919 int b = src_sobely[i];
920 int g = clamp255(r + b);
921 dst_argb[0] = (uint8)(b);
922 dst_argb[1] = (uint8)(g);
923 dst_argb[2] = (uint8)(r);
924 dst_argb[3] = (uint8)(255u);
929 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
932 for (x = 0; x < width; ++x) {
934 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
941 // C reference code that mimics the YUV assembly.
943 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */
945 #define UB 127 /* min(63,(int8)(2.018 * 64)) */
946 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
950 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
951 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */
954 #define BB UB * 128 + VB * 128
955 #define BG UG * 128 + VG * 128
956 #define BR UR * 128 + VR * 128
958 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
959 uint8* b, uint8* g, uint8* r) {
960 int32 y1 = ((int32)(y) - 16) * YG;
961 *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
962 *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
963 *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
966 #if !defined(LIBYUV_DISABLE_NEON) && \
967 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
969 // TODO(fbarchard): Remove subsampling from Neon.
970 void I444ToARGBRow_C(const uint8* src_y,
976 for (x = 0; x < width - 1; x += 2) {
977 uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
978 uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
979 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
981 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
986 rgb_buf += 8; // Advance 2 pixels.
989 YuvPixel(src_y[0], src_u[0], src_v[0],
990 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
994 void I444ToARGBRow_C(const uint8* src_y,
1000 for (x = 0; x < width; ++x) {
1001 YuvPixel(src_y[0], src_u[0], src_v[0],
1002 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1007 rgb_buf += 4; // Advance 1 pixel.
1011 // Also used for 420
1012 void I422ToARGBRow_C(const uint8* src_y,
1018 for (x = 0; x < width - 1; x += 2) {
1019 YuvPixel(src_y[0], src_u[0], src_v[0],
1020 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1022 YuvPixel(src_y[1], src_u[0], src_v[0],
1023 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1028 rgb_buf += 8; // Advance 2 pixels.
1031 YuvPixel(src_y[0], src_u[0], src_v[0],
1032 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1037 void I422ToRGB24Row_C(const uint8* src_y,
1043 for (x = 0; x < width - 1; x += 2) {
1044 YuvPixel(src_y[0], src_u[0], src_v[0],
1045 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1046 YuvPixel(src_y[1], src_u[0], src_v[0],
1047 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1051 rgb_buf += 6; // Advance 2 pixels.
1054 YuvPixel(src_y[0], src_u[0], src_v[0],
1055 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1059 void I422ToRAWRow_C(const uint8* src_y,
1065 for (x = 0; x < width - 1; x += 2) {
1066 YuvPixel(src_y[0], src_u[0], src_v[0],
1067 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1068 YuvPixel(src_y[1], src_u[0], src_v[0],
1069 rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1073 rgb_buf += 6; // Advance 2 pixels.
1076 YuvPixel(src_y[0], src_u[0], src_v[0],
1077 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1081 void I422ToARGB4444Row_C(const uint8* src_y,
1084 uint8* dst_argb4444,
1093 for (x = 0; x < width - 1; x += 2) {
1094 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1095 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1102 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1103 (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1107 dst_argb4444 += 4; // Advance 2 pixels.
1110 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1114 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1119 void I422ToARGB1555Row_C(const uint8* src_y,
1122 uint8* dst_argb1555,
1131 for (x = 0; x < width - 1; x += 2) {
1132 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1133 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1140 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1141 (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1145 dst_argb1555 += 4; // Advance 2 pixels.
1148 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1152 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1157 void I422ToRGB565Row_C(const uint8* src_y,
1169 for (x = 0; x < width - 1; x += 2) {
1170 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1171 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1178 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1179 (b1 << 16) | (g1 << 21) | (r1 << 27);
1183 dst_rgb565 += 4; // Advance 2 pixels.
1186 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1190 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1194 void I411ToARGBRow_C(const uint8* src_y,
1200 for (x = 0; x < width - 3; x += 4) {
1201 YuvPixel(src_y[0], src_u[0], src_v[0],
1202 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1204 YuvPixel(src_y[1], src_u[0], src_v[0],
1205 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1207 YuvPixel(src_y[2], src_u[0], src_v[0],
1208 rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
1210 YuvPixel(src_y[3], src_u[0], src_v[0],
1211 rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
1216 rgb_buf += 16; // Advance 4 pixels.
1219 YuvPixel(src_y[0], src_u[0], src_v[0],
1220 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1222 YuvPixel(src_y[1], src_u[0], src_v[0],
1223 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1226 rgb_buf += 8; // Advance 2 pixels.
1229 YuvPixel(src_y[0], src_u[0], src_v[0],
1230 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1235 void NV12ToARGBRow_C(const uint8* src_y,
1236 const uint8* usrc_v,
1240 for (x = 0; x < width - 1; x += 2) {
1241 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1242 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1244 YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
1245 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1249 rgb_buf += 8; // Advance 2 pixels.
1252 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1253 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1258 void NV21ToARGBRow_C(const uint8* src_y,
1259 const uint8* src_vu,
1263 for (x = 0; x < width - 1; x += 2) {
1264 YuvPixel(src_y[0], src_vu[1], src_vu[0],
1265 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1268 YuvPixel(src_y[1], src_vu[1], src_vu[0],
1269 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1274 rgb_buf += 8; // Advance 2 pixels.
1277 YuvPixel(src_y[0], src_vu[1], src_vu[0],
1278 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1283 void NV12ToRGB565Row_C(const uint8* src_y,
1284 const uint8* usrc_v,
1294 for (x = 0; x < width - 1; x += 2) {
1295 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1296 YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1303 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1304 (b1 << 16) | (g1 << 21) | (r1 << 27);
1307 dst_rgb565 += 4; // Advance 2 pixels.
1310 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1314 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1318 void NV21ToRGB565Row_C(const uint8* src_y,
1319 const uint8* vsrc_u,
1329 for (x = 0; x < width - 1; x += 2) {
1330 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1331 YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1338 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1339 (b1 << 16) | (g1 << 21) | (r1 << 27);
1342 dst_rgb565 += 4; // Advance 2 pixels.
1345 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1349 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1353 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1357 for (x = 0; x < width - 1; x += 2) {
1358 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1359 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1361 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
1362 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1365 rgb_buf += 8; // Advance 2 pixels.
1368 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1369 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1374 void UYVYToARGBRow_C(const uint8* src_uyvy,
1378 for (x = 0; x < width - 1; x += 2) {
1379 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1380 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1382 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
1383 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1386 rgb_buf += 8; // Advance 2 pixels.
1389 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1390 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1395 void I422ToBGRARow_C(const uint8* src_y,
1401 for (x = 0; x < width - 1; x += 2) {
1402 YuvPixel(src_y[0], src_u[0], src_v[0],
1403 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1405 YuvPixel(src_y[1], src_u[0], src_v[0],
1406 rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
1411 rgb_buf += 8; // Advance 2 pixels.
1414 YuvPixel(src_y[0], src_u[0], src_v[0],
1415 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1420 void I422ToABGRRow_C(const uint8* src_y,
1426 for (x = 0; x < width - 1; x += 2) {
1427 YuvPixel(src_y[0], src_u[0], src_v[0],
1428 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1430 YuvPixel(src_y[1], src_u[0], src_v[0],
1431 rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
1436 rgb_buf += 8; // Advance 2 pixels.
1439 YuvPixel(src_y[0], src_u[0], src_v[0],
1440 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1445 void I422ToRGBARow_C(const uint8* src_y,
1451 for (x = 0; x < width - 1; x += 2) {
1452 YuvPixel(src_y[0], src_u[0], src_v[0],
1453 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1455 YuvPixel(src_y[1], src_u[0], src_v[0],
1456 rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
1461 rgb_buf += 8; // Advance 2 pixels.
1464 YuvPixel(src_y[0], src_u[0], src_v[0],
1465 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1470 void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1472 for (x = 0; x < width - 1; x += 2) {
1473 YuvPixel(src_y[0], 128, 128,
1474 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1476 YuvPixel(src_y[1], 128, 128,
1477 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1480 rgb_buf += 8; // Advance 2 pixels.
1483 YuvPixel(src_y[0], 128, 128,
1484 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1489 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1492 for (x = 0; x < width - 1; x += 2) {
1494 dst[x + 1] = src[-1];
1498 dst[width - 1] = src[0];
1502 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1504 src_uv += (width - 1) << 1;
1505 for (x = 0; x < width - 1; x += 2) {
1506 dst_u[x] = src_uv[0];
1507 dst_u[x + 1] = src_uv[-2];
1508 dst_v[x] = src_uv[1];
1509 dst_v[x + 1] = src_uv[-2 + 1];
1513 dst_u[width - 1] = src_uv[0];
1514 dst_v[width - 1] = src_uv[1];
1518 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1520 const uint32* src32 = (const uint32*)(src);
1521 uint32* dst32 = (uint32*)(dst);
1523 for (x = 0; x < width - 1; x += 2) {
1524 dst32[x] = src32[0];
1525 dst32[x + 1] = src32[-1];
1529 dst32[width - 1] = src32[0];
1533 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1535 for (x = 0; x < width - 1; x += 2) {
1536 dst_u[x] = src_uv[0];
1537 dst_u[x + 1] = src_uv[2];
1538 dst_v[x] = src_uv[1];
1539 dst_v[x + 1] = src_uv[3];
1543 dst_u[width - 1] = src_uv[0];
1544 dst_v[width - 1] = src_uv[1];
1548 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
1551 for (x = 0; x < width - 1; x += 2) {
1552 dst_uv[0] = src_u[x];
1553 dst_uv[1] = src_v[x];
1554 dst_uv[2] = src_u[x + 1];
1555 dst_uv[3] = src_v[x + 1];
1559 dst_uv[0] = src_u[width - 1];
1560 dst_uv[1] = src_v[width - 1];
1564 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1565 memcpy(dst, src, count);
1568 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
1569 memcpy(dst, src, count * 2);
1572 void SetRow_C(uint8* dst, uint32 v8, int count) {
1574 // VC will generate rep stosb.
1576 for (x = 0; x < count; ++x) {
1580 memset(dst, v8, count);
1584 void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
1585 int dst_stride, int height) {
1587 for (y = 0; y < height; ++y) {
1588 uint32* d = (uint32*)(dst);
1590 for (x = 0; x < width; ++x) {
1597 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
1598 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1599 uint8* dst_u, uint8* dst_v, int width) {
1600 // Output a row of UV values, filtering 2 rows of YUY2.
1602 for (x = 0; x < width; x += 2) {
1603 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1604 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1611 // Copy row of YUY2 UV's (422) into U and V (422).
1612 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1613 uint8* dst_u, uint8* dst_v, int width) {
1614 // Output a row of UV values.
1616 for (x = 0; x < width; x += 2) {
1617 dst_u[0] = src_yuy2[1];
1618 dst_v[0] = src_yuy2[3];
1625 // Copy row of YUY2 Y's (422) into Y (420/422).
1626 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1627 // Output a row of Y values.
1629 for (x = 0; x < width - 1; x += 2) {
1630 dst_y[x] = src_yuy2[0];
1631 dst_y[x + 1] = src_yuy2[2];
1635 dst_y[width - 1] = src_yuy2[0];
1639 // Filter 2 rows of UYVY UV's (422) into U and V (420).
1640 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1641 uint8* dst_u, uint8* dst_v, int width) {
1642 // Output a row of UV values.
1644 for (x = 0; x < width; x += 2) {
1645 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1646 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1653 // Copy row of UYVY UV's (422) into U and V (422).
1654 void UYVYToUV422Row_C(const uint8* src_uyvy,
1655 uint8* dst_u, uint8* dst_v, int width) {
1656 // Output a row of UV values.
1658 for (x = 0; x < width; x += 2) {
1659 dst_u[0] = src_uyvy[0];
1660 dst_v[0] = src_uyvy[2];
1667 // Copy row of UYVY Y's (422) into Y (420/422).
1668 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1669 // Output a row of Y values.
1671 for (x = 0; x < width - 1; x += 2) {
1672 dst_y[x] = src_uyvy[1];
1673 dst_y[x + 1] = src_uyvy[3];
1677 dst_y[width - 1] = src_uyvy[1];
1681 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1683 // Blend src_argb0 over src_argb1 and store to dst_argb.
1684 // dst_argb may be src_argb0 or src_argb1.
1685 // This code mimics the SSSE3 version for better testability.
1686 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1687 uint8* dst_argb, int width) {
1689 for (x = 0; x < width - 1; x += 2) {
1690 uint32 fb = src_argb0[0];
1691 uint32 fg = src_argb0[1];
1692 uint32 fr = src_argb0[2];
1693 uint32 a = src_argb0[3];
1694 uint32 bb = src_argb1[0];
1695 uint32 bg = src_argb1[1];
1696 uint32 br = src_argb1[2];
1697 dst_argb[0] = BLEND(fb, bb, a);
1698 dst_argb[1] = BLEND(fg, bg, a);
1699 dst_argb[2] = BLEND(fr, br, a);
1702 fb = src_argb0[4 + 0];
1703 fg = src_argb0[4 + 1];
1704 fr = src_argb0[4 + 2];
1705 a = src_argb0[4 + 3];
1706 bb = src_argb1[4 + 0];
1707 bg = src_argb1[4 + 1];
1708 br = src_argb1[4 + 2];
1709 dst_argb[4 + 0] = BLEND(fb, bb, a);
1710 dst_argb[4 + 1] = BLEND(fg, bg, a);
1711 dst_argb[4 + 2] = BLEND(fr, br, a);
1712 dst_argb[4 + 3] = 255u;
1719 uint32 fb = src_argb0[0];
1720 uint32 fg = src_argb0[1];
1721 uint32 fr = src_argb0[2];
1722 uint32 a = src_argb0[3];
1723 uint32 bb = src_argb1[0];
1724 uint32 bg = src_argb1[1];
1725 uint32 br = src_argb1[2];
1726 dst_argb[0] = BLEND(fb, bb, a);
1727 dst_argb[1] = BLEND(fg, bg, a);
1728 dst_argb[2] = BLEND(fr, br, a);
1733 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1735 // Multiply source RGB by alpha and store to destination.
1736 // This code mimics the SSSE3 version for better testability.
1737 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1739 for (i = 0; i < width - 1; i += 2) {
1740 uint32 b = src_argb[0];
1741 uint32 g = src_argb[1];
1742 uint32 r = src_argb[2];
1743 uint32 a = src_argb[3];
1744 dst_argb[0] = ATTENUATE(b, a);
1745 dst_argb[1] = ATTENUATE(g, a);
1746 dst_argb[2] = ATTENUATE(r, a);
1752 dst_argb[4] = ATTENUATE(b, a);
1753 dst_argb[5] = ATTENUATE(g, a);
1754 dst_argb[6] = ATTENUATE(r, a);
1761 const uint32 b = src_argb[0];
1762 const uint32 g = src_argb[1];
1763 const uint32 r = src_argb[2];
1764 const uint32 a = src_argb[3];
1765 dst_argb[0] = ATTENUATE(b, a);
1766 dst_argb[1] = ATTENUATE(g, a);
1767 dst_argb[2] = ATTENUATE(r, a);
1773 // Divide source RGB by alpha and store to destination.
1774 // b = (b * 255 + (a / 2)) / a;
1775 // g = (g * 255 + (a / 2)) / a;
1776 // r = (r * 255 + (a / 2)) / a;
1777 // Reciprocal method is off by 1 on some values. ie 125
1778 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
1779 #define T(a) 0x01000000 + (0x10000 / a)
1780 const uint32 fixed_invtbl8[256] = {
1781 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1782 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
1783 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
1784 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
1785 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
1786 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
1787 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
1788 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
1789 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
1790 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
1791 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
1792 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
1793 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
1794 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
1795 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
1796 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
1797 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
1798 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
1799 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
1800 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
1801 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
1802 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
1803 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
1804 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
1805 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
1806 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
1807 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
1808 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
1809 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
1810 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
1811 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1812 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1815 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1817 for (i = 0; i < width; ++i) {
1818 uint32 b = src_argb[0];
1819 uint32 g = src_argb[1];
1820 uint32 r = src_argb[2];
1821 const uint32 a = src_argb[3];
1822 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
1826 // Clamping should not be necessary but is free in assembly.
1827 dst_argb[0] = clamp255(b);
1828 dst_argb[1] = clamp255(g);
1829 dst_argb[2] = clamp255(r);
1836 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1837 const int32* previous_cumsum, int width) {
1838 int32 row_sum[4] = {0, 0, 0, 0};
1840 for (x = 0; x < width; ++x) {
1841 row_sum[0] += row[x * 4 + 0];
1842 row_sum[1] += row[x * 4 + 1];
1843 row_sum[2] += row[x * 4 + 2];
1844 row_sum[3] += row[x * 4 + 3];
1845 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
1846 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
1847 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
1848 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
1852 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
1853 int w, int area, uint8* dst, int count) {
1854 float ooa = 1.0f / area;
1856 for (i = 0; i < count; ++i) {
1857 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1858 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1859 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1860 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1867 // Copy pixels from rotated source to destination row with a slope.
1869 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1870 uint8* dst_argb, const float* uv_dudv, int width) {
1872 // Render a row of pixels from source into a buffer.
1876 for (i = 0; i < width; ++i) {
1877 int x = (int)(uv[0]);
1878 int y = (int)(uv[1]);
1879 *(uint32*)(dst_argb) =
1880 *(const uint32*)(src_argb + y * src_argb_stride +
1883 uv[0] += uv_dudv[2];
1884 uv[1] += uv_dudv[3];
1888 // Blend 2 rows into 1.
1889 static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1890 uint8* dst_uv, int pix) {
1892 for (x = 0; x < pix; ++x) {
1893 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1897 static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
1898 uint16* dst_uv, int pix) {
1900 for (x = 0; x < pix; ++x) {
1901 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1905 // C version 2x2 -> 2x1.
1906 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1907 ptrdiff_t src_stride,
1908 int width, int source_y_fraction) {
1909 int y1_fraction = source_y_fraction;
1910 int y0_fraction = 256 - y1_fraction;
1911 const uint8* src_ptr1 = src_ptr + src_stride;
1913 if (source_y_fraction == 0) {
1914 memcpy(dst_ptr, src_ptr, width);
1917 if (source_y_fraction == 128) {
1918 HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1921 for (x = 0; x < width - 1; x += 2) {
1922 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1923 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1929 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1933 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
1934 ptrdiff_t src_stride,
1935 int width, int source_y_fraction) {
1936 int y1_fraction = source_y_fraction;
1937 int y0_fraction = 256 - y1_fraction;
1938 const uint16* src_ptr1 = src_ptr + src_stride;
1940 if (source_y_fraction == 0) {
1941 memcpy(dst_ptr, src_ptr, width * 2);
1944 if (source_y_fraction == 128) {
1945 HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
1948 for (x = 0; x < width - 1; x += 2) {
1949 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1950 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1956 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1960 // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
1961 void ARGBToBayerRow_C(const uint8* src_argb,
1962 uint8* dst_bayer, uint32 selector, int pix) {
1963 int index0 = selector & 0xff;
1964 int index1 = (selector >> 8) & 0xff;
1965 // Copy a row of Bayer.
1967 for (x = 0; x < pix - 1; x += 2) {
1968 dst_bayer[0] = src_argb[index0];
1969 dst_bayer[1] = src_argb[index1];
1974 dst_bayer[0] = src_argb[index0];
1978 // Select G channel from ARGB. e.g. GGGGGGGG
1979 void ARGBToBayerGGRow_C(const uint8* src_argb,
1980 uint8* dst_bayer, uint32 selector, int pix) {
1983 for (x = 0; x < pix - 1; x += 2) {
1984 dst_bayer[0] = src_argb[1];
1985 dst_bayer[1] = src_argb[5];
1990 dst_bayer[0] = src_argb[1];
1994 // Use first 4 shuffler values to reorder ARGB channels.
1995 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
1996 const uint8* shuffler, int pix) {
1997 int index0 = shuffler[0];
1998 int index1 = shuffler[1];
1999 int index2 = shuffler[2];
2000 int index3 = shuffler[3];
2001 // Shuffle a row of ARGB.
2003 for (x = 0; x < pix; ++x) {
2004 // To support in-place conversion.
2005 uint8 b = src_argb[index0];
2006 uint8 g = src_argb[index1];
2007 uint8 r = src_argb[index2];
2008 uint8 a = src_argb[index3];
2018 void I422ToYUY2Row_C(const uint8* src_y,
2021 uint8* dst_frame, int width) {
2023 for (x = 0; x < width - 1; x += 2) {
2024 dst_frame[0] = src_y[0];
2025 dst_frame[1] = src_u[0];
2026 dst_frame[2] = src_y[1];
2027 dst_frame[3] = src_v[0];
2034 dst_frame[0] = src_y[0];
2035 dst_frame[1] = src_u[0];
2036 dst_frame[2] = src_y[0]; // duplicate last y
2037 dst_frame[3] = src_v[0];
2041 void I422ToUYVYRow_C(const uint8* src_y,
2044 uint8* dst_frame, int width) {
2046 for (x = 0; x < width - 1; x += 2) {
2047 dst_frame[0] = src_u[0];
2048 dst_frame[1] = src_y[0];
2049 dst_frame[2] = src_v[0];
2050 dst_frame[3] = src_y[1];
2057 dst_frame[0] = src_u[0];
2058 dst_frame[1] = src_y[0];
2059 dst_frame[2] = src_v[0];
2060 dst_frame[3] = src_y[0]; // duplicate last y
2064 #if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2065 // row_win.cc has asm version, but GCC uses 2 step wrapper.
2066 #if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2067 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2072 // Allocate a row of ARGB.
2073 align_buffer_64(row, width * 4);
2074 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2075 ARGBToRGB565Row_SSE2(row, rgb_buf, width);
2076 free_aligned_buffer_64(row);
2078 #endif // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2080 #if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2081 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2086 // Allocate a row of ARGB.
2087 align_buffer_64(row, width * 4);
2088 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2089 ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
2090 free_aligned_buffer_64(row);
2093 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2098 // Allocate a row of ARGB.
2099 align_buffer_64(row, width * 4);
2100 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2101 ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
2102 free_aligned_buffer_64(row);
2105 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2106 const uint8* src_uv,
2109 // Allocate a row of ARGB.
2110 align_buffer_64(row, width * 4);
2111 NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
2112 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2113 free_aligned_buffer_64(row);
2116 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
2117 const uint8* src_vu,
2120 // Allocate a row of ARGB.
2121 align_buffer_64(row, width * 4);
2122 NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
2123 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2124 free_aligned_buffer_64(row);
2127 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2130 // Allocate a rows of yuv.
2131 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2132 uint8* row_u = row_y + ((width + 63) & ~63);
2133 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2134 YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
2135 YUY2ToYRow_SSE2(src_yuy2, row_y, width);
2136 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2137 free_aligned_buffer_64(row_y);
2140 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2143 // Allocate a rows of yuv.
2144 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2145 uint8* row_u = row_y + ((width + 63) & ~63);
2146 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2147 UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
2148 UYVYToYRow_SSE2(src_uyvy, row_y, width);
2149 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2150 free_aligned_buffer_64(row_y);
2153 #endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2154 #endif // !defined(LIBYUV_DISABLE_X86)
2156 void ARGBPolynomialRow_C(const uint8* src_argb,
2157 uint8* dst_argb, const float* poly,
2160 for (i = 0; i < width; ++i) {
2161 float b = (float)(src_argb[0]);
2162 float g = (float)(src_argb[1]);
2163 float r = (float)(src_argb[2]);
2164 float a = (float)(src_argb[3]);
2169 float db = poly[0] + poly[4] * b;
2170 float dg = poly[1] + poly[5] * g;
2171 float dr = poly[2] + poly[6] * r;
2172 float da = poly[3] + poly[7] * a;
2179 dr += poly[10] * r2;
2180 da += poly[11] * a2;
2181 db += poly[12] * b3;
2182 dg += poly[13] * g3;
2183 dr += poly[14] * r3;
2184 da += poly[15] * a3;
2186 dst_argb[0] = Clamp((int32)(db));
2187 dst_argb[1] = Clamp((int32)(dg));
2188 dst_argb[2] = Clamp((int32)(dr));
2189 dst_argb[3] = Clamp((int32)(da));
2195 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2196 const uint8* luma, uint32 lumacoeff) {
2197 uint32 bc = lumacoeff & 0xff;
2198 uint32 gc = (lumacoeff >> 8) & 0xff;
2199 uint32 rc = (lumacoeff >> 16) & 0xff;
2202 for (i = 0; i < width - 1; i += 2) {
2203 // Luminance in rows, color values in columns.
2204 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2205 src_argb[2] * rc) & 0x7F00u) + luma;
2207 dst_argb[0] = luma0[src_argb[0]];
2208 dst_argb[1] = luma0[src_argb[1]];
2209 dst_argb[2] = luma0[src_argb[2]];
2210 dst_argb[3] = src_argb[3];
2211 luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
2212 src_argb[6] * rc) & 0x7F00u) + luma;
2213 dst_argb[4] = luma1[src_argb[4]];
2214 dst_argb[5] = luma1[src_argb[5]];
2215 dst_argb[6] = luma1[src_argb[6]];
2216 dst_argb[7] = src_argb[7];
2221 // Luminance in rows, color values in columns.
2222 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2223 src_argb[2] * rc) & 0x7F00u) + luma;
2224 dst_argb[0] = luma0[src_argb[0]];
2225 dst_argb[1] = luma0[src_argb[1]];
2226 dst_argb[2] = luma0[src_argb[2]];
2227 dst_argb[3] = src_argb[3];
2231 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2233 for (i = 0; i < width - 1; i += 2) {
2244 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2246 for (i = 0; i < width - 1; i += 2) {
2259 } // namespace libyuv