1 .function video_orc_blend_little
12 .const 4 a_alpha 0x000000ff
19 x4 shruw a_wide, a_wide, 8
23 x4 subw s_wide, s_wide, d_wide
24 x4 mullw s_wide, s_wide, a_wide
25 x4 div255w s_wide, s_wide
26 x4 addw d_wide, d_wide, s_wide
31 .function video_orc_blend_big
43 .const 4 a_alpha 0xff000000
51 x4 shruw a_wide, a_wide, 8
55 x4 subw s_wide, s_wide, d_wide
56 x4 mullw s_wide, s_wide, a_wide
57 x4 div255w s_wide, s_wide
58 x4 addw d_wide, d_wide, s_wide
63 .function video_orc_unpack_I420
81 .function video_orc_pack_I420
93 x2 splitlw uv, ay, ayuv
99 .function video_orc_pack_Y
101 .source 4 ayuv guint8
107 .function video_orc_unpack_YUY2
109 .source 4 yuy2 guint8
116 x2 splitwb uv, yy, yuy2
117 x2 mergebw ayay, c255, yy
119 x2 mergewl ayuv, ayay, uvuv
122 .function video_orc_pack_YUY2
124 .source 8 ayuv guint8
130 x2 splitlw uvuv, ayay, ayuv
132 x2 select1wb yy, ayay
133 x2 mergebw yuy2, yy, uv
136 .function video_orc_pack_UYVY
138 .source 8 ayuv guint8
144 x2 splitlw uvuv, ayay, ayuv
146 x2 select1wb yy, ayay
147 x2 mergebw yuy2, uv, yy
150 .function video_orc_unpack_UYVY
152 .source 4 uyvy guint8
159 x2 splitwb yy, uv, uyvy
160 x2 mergebw ayay, c255, yy
162 x2 mergewl ayuv, ayay, uvuv
165 .function video_orc_pack_VYUY
167 .source 8 ayuv guint8
173 x2 splitlw uvuv, ayay, ayuv
175 x2 select1wb yy, ayay
177 x2 mergebw vyuy, vu, yy
180 .function video_orc_unpack_VYUY
182 .source 4 vyuy guint8
189 x2 splitwb yy, uv, vyuy
191 x2 mergebw ayay, c255, yy
193 x2 mergewl ayuv, ayay, uvuv
196 .function video_orc_unpack_YVYU
198 .source 4 uyvy guint8
205 x2 splitwb uv, yy, uyvy
207 x2 mergebw ayay, c255, yy
209 x2 mergewl ayuv, ayay, uvuv
212 .function video_orc_pack_YVYU
214 .source 8 ayuv guint8
220 x2 splitlw uvuv, ayay, ayuv
222 x2 select1wb yy, ayay
224 x2 mergebw yuy2, yy, uv
227 .function video_orc_unpack_YUV9
243 x2 mergebw ay, c255, y
247 .function video_orc_unpack_Y42B
259 x2 mergebw ayay, c255, yy
261 x2 mergewl ayuv, ayay, uvuv
263 .function video_orc_pack_Y42B
267 .source 8 ayuv guint8
272 x2 splitlw uvuv, ayay, ayuv
278 .function video_orc_unpack_Y444
292 .function video_orc_pack_Y444
296 .source 4 ayuv guint8
304 .function video_orc_unpack_GRAY8
308 .const 2 c0x8080 0x8080
312 mergewl ayuv, ay, c0x8080
315 .function video_orc_pack_GRAY8
317 .source 4 ayuv guint8
324 .function video_orc_unpack_BGRA
326 .source 4 bgra guint8
330 .function video_orc_pack_BGRA
332 .source 4 argb guint8
336 .function video_orc_pack_RGBA_le
338 .source 4 argb guint8
347 .function video_orc_unpack_RGBA_le
349 .source 4 rgba guint8
358 .function video_orc_pack_RGBA_be
360 .source 4 argb guint8
369 .function video_orc_unpack_RGBA_be
371 .source 4 rgba guint8
381 .function video_orc_unpack_ABGR_le
383 .source 4 abgr guint8
392 .function video_orc_pack_ABGR_le
394 .source 4 argb guint8
403 .function video_orc_unpack_ABGR_be
405 .source 4 abgr guint8
414 .function video_orc_pack_ABGR_be
416 .source 4 argb guint8
426 .function video_orc_unpack_NV12
435 x2 mergebw ay, c255, y
436 x2 mergewl d, ay, uvuv
438 .function video_orc_pack_NV12
441 .source 8 ayuv guint8
445 x2 splitlw uvuv, ay, ayuv
449 .function video_orc_unpack_NV21
460 x2 mergebw ay, c255, y
461 x2 mergewl d, ay, uvuv
464 .function video_orc_pack_NV21
467 .source 8 ayuv guint8
472 x2 splitlw uvuv, ay, ayuv
477 .function video_orc_unpack_NV24
487 .function video_orc_pack_NV24
490 .source 4 ayuv guint8
496 .function video_orc_unpack_A420
513 .function video_orc_pack_A420
518 .source 8 ayuv guint8
524 x2 splitlw uv, ay, ayuv
527 x2 splitwb vv, uu, uv
531 .function video_orc_pack_AY
534 .source 4 ayuv guint8
541 .function video_orc_unpack_RGB15_le
543 .source 2 rgb15 guint16
564 .function video_orc_unpack_RGB15_be
566 .source 2 rgb15 guint16
587 .function video_orc_unpack_RGB15_le_trunc
589 .source 2 rgb15 guint16
609 .function video_orc_unpack_RGB15_be_trunc
611 .source 2 rgb15 guint16
631 .function video_orc_pack_RGB15_le
632 .dest 2 rgb15 guint16
633 .source 4 argb guint32
643 andl b, t, 0xf8000000
651 .function video_orc_pack_RGB15_be
652 .dest 2 rgb15 guint16
653 .source 4 argb guint32
671 .function video_orc_unpack_BGR15_le
673 .source 2 bgr15 guint16
694 .function video_orc_unpack_BGR15_be
696 .source 2 bgr15 guint16
717 .function video_orc_unpack_BGR15_le_trunc
719 .source 2 bgr15 guint16
739 .function video_orc_unpack_BGR15_be_trunc
741 .source 2 bgr15 guint16
761 .function video_orc_pack_BGR15_le
762 .dest 2 rgb15 guint16
763 .source 4 argb guint32
773 andl b, t, 0xf8000000
781 .function video_orc_pack_BGR15_be
782 .dest 2 rgb15 guint16
783 .source 4 argb guint32
801 .function video_orc_unpack_RGB16
803 .source 2 rgb16 guint16
824 x4 convsuswb argb, t2
826 .function video_orc_unpack_RGB16_trunc
828 .source 2 rgb16 guint16
847 x4 convsuswb argb, t2
849 .function video_orc_pack_RGB16_le
850 .dest 2 rgb15 guint16
851 .source 4 argb guint32
861 andl b, t, 0xf8000000
868 .function video_orc_pack_RGB16_be
869 .dest 2 rgb16 guint16
870 .source 4 argb guint32
888 .function video_orc_unpack_BGR16
890 .source 2 bgr16 guint16
911 x4 convsuswb argb, t2
913 .function video_orc_unpack_BGR16_trunc
915 .source 2 bgr16 guint16
934 x4 convsuswb argb, t2
936 .function video_orc_pack_BGR16_le
937 .dest 2 rgb15 guint16
938 .source 4 argb guint32
948 andl b, t, 0xf8000000
956 .function video_orc_pack_BGR16_be
957 .dest 2 rgb15 guint16
958 .source 4 argb guint32
976 .function video_orc_resample_bilinear_u32
982 ldreslinl d1, s1, p1, p2
984 .function video_orc_merge_linear_u8
1004 .function video_orc_memset_2d
1011 .function video_orc_memcpy_2d
1018 .function video_orc_convert_u16_to_u8
1024 .function video_orc_convert_u8_to_u16
1030 .function video_orc_splat_u16
1036 .function video_orc_splat_u32
1042 .function video_orc_splat_u64
1048 .function video_orc_splat2_u64
1056 .function video_orc_convert_I420_UYVY
1066 x2 mergebw d1, uv, y1
1067 x2 mergebw d2, uv, y2
1070 .function video_orc_convert_I420_YUY2
1080 x2 mergebw d1, y1, uv
1081 x2 mergebw d2, y2, uv
1085 .function video_orc_convert_I420_AYUV
1101 mergebw ay, alpha, y1
1103 mergebw ay, alpha, y2
1107 .function video_orc_convert_YUY2_I420
1112 .source 4 yuv1 guint8
1113 .source 4 yuv2 guint8
1118 x2 splitwb t1, ty, yuv1
1120 x2 splitwb t2, ty, yuv2
1126 .function video_orc_convert_UYVY_YUY2
1129 .source 4 uyvy guint8
1134 .function video_orc_planar_chroma_420_422
1144 .function video_orc_planar_chroma_420_444
1156 .function video_orc_planar_chroma_422_444
1166 .function video_orc_planar_chroma_444_422
1177 .function video_orc_planar_chroma_444_420
1191 .function video_orc_planar_chroma_422_420
1200 .function video_orc_convert_YUY2_AYUV
1203 .source 4 yuy2 guint8
1210 x2 splitwb uv, yy, yuy2
1211 x2 mergebw ayay, alpha, yy
1212 mergewl uvuv, uv, uv
1213 x2 mergewl ayuv, ayay, uvuv
1216 .function video_orc_convert_UYVY_AYUV
1219 .source 4 uyvy guint8
1226 x2 splitwb yy, uv, uyvy
1227 x2 mergebw ayay, alpha, yy
1228 mergewl uvuv, uv, uv
1229 x2 mergewl ayuv, ayay, uvuv
1232 .function video_orc_convert_YUY2_Y42B
1237 .source 4 yuy2 guint8
1240 x2 splitwb uv, y, yuy2
1244 .function video_orc_convert_UYVY_Y42B
1249 .source 4 uyvy guint8
1252 x2 splitwb y, uv, uyvy
1256 .function video_orc_convert_YUY2_Y444
1261 .source 4 yuy2 guint8
1266 x2 splitwb uv, y, yuy2
1272 .function video_orc_convert_UYVY_Y444
1277 .source 4 uyvy guint8
1282 x2 splitwb y, uv, uyvy
1288 .function video_orc_convert_UYVY_I420
1293 .source 4 yuv1 guint8
1294 .source 4 yuv2 guint8
1299 x2 splitwb ty, t1, yuv1
1301 x2 splitwb ty, t2, yuv2
1308 .function video_orc_convert_AYUV_I420
1314 .source 8 ayuv1 guint8
1315 .source 8 ayuv2 guint8
1325 x2 splitlw uv1, ay, ayuv1
1327 x2 splitlw uv2, ay, ayuv2
1329 x4 avgub uv, uv1, uv2
1330 x2 splitwb vv, uu, uv
1338 .function video_orc_convert_AYUV_YUY2
1341 .source 8 ayuv guint8
1348 x2 splitlw uvuv, ayay, ayuv
1349 splitlw uv1, uv2, uvuv
1350 x2 avgub uv1, uv1, uv2
1351 x2 select1wb yy, ayay
1352 x2 mergebw yuy2, yy, uv1
1355 .function video_orc_convert_AYUV_UYVY
1358 .source 8 ayuv guint8
1365 x2 splitlw uvuv, ayay, ayuv
1366 splitlw uv1, uv2, uvuv
1367 x2 avgub uv1, uv1, uv2
1368 x2 select1wb yy, ayay
1369 x2 mergebw yuy2, uv1, yy
1373 .function video_orc_convert_AYUV_Y42B
1378 .source 8 ayuv guint8
1384 x2 splitlw uvuv, ayay, ayuv
1385 splitlw uv1, uv2, uvuv
1386 x2 avgub uv1, uv1, uv2
1388 x2 select1wb y, ayay
1391 .function video_orc_convert_AYUV_Y444
1396 .source 4 ayuv guint8
1400 splitlw uv, ay, ayuv
1405 .function video_orc_convert_Y42B_YUY2
1414 x2 mergebw yuy2, y, uv
1417 .function video_orc_convert_Y42B_UYVY
1426 x2 mergebw uyvy, uv, y
1429 .function video_orc_convert_Y42B_AYUV
1442 x2 mergebw ayay, alpha, yy
1443 mergewl uvuv, uv, uv
1444 x2 mergewl ayuv, ayay, uvuv
1447 .function video_orc_convert_Y444_YUY2
1458 x2 mergebw uvuv, u, v
1459 splitlw uv1, uv2, uvuv
1460 x2 avgub uv, uv1, uv2
1461 x2 mergebw yuy2, y, uv
1464 .function video_orc_convert_Y444_UYVY
1475 x2 mergebw uvuv, u, v
1476 splitlw uv1, uv2, uvuv
1477 x2 avgub uv, uv1, uv2
1478 x2 mergebw uyvy, uv, y
1481 .function video_orc_convert_Y444_AYUV
1492 mergebw ay, alpha, yy
1493 mergewl ayuv, ay, uv
1497 .function video_orc_convert_AYUV_ARGB
1500 .source 4 ayuv guint8
1522 x4 subb x, ayuv, c128
1551 x4 addb argb, x, c128
1553 .function video_orc_convert_AYUV_BGRA
1556 .source 4 ayuv guint8
1578 x4 subb x, ayuv, c128
1607 x4 addb bgra, x, c128
1610 .function video_orc_convert_AYUV_ABGR
1613 .source 4 ayuv guint8
1635 x4 subb x, ayuv, c128
1664 x4 addb argb, x, c128
1666 .function video_orc_convert_AYUV_RGBA
1669 .source 4 ayuv guint8
1691 x4 subb x, ayuv, c128
1720 x4 addb argb, x, c128
1722 .function video_orc_convert_I420_BGRA
1774 x4 addb argb, x, c4128
1776 .function video_orc_convert_I420_ARGB
1828 x4 addb argb, x, c4128
1830 .function video_orc_matrix8
1831 .backup _custom_video_orc_matrix8
1832 .source 4 argb guint8
1855 x4 subb l1, argb, c128
1864 x4 mergebw aq, l1, l1
1868 x4 mulhsw q1, q1, pr1
1874 x4 mulhsw q1, q1, pr2
1880 x4 mulhsw q1, q1, pr3
1883 x4 convssswb ayuv2, aq
1884 x4 addb ayuv, ayuv2, c128
1886 #.function video_orc_resample_h_near_u32
1887 #.source 4 src guint32
1889 #.dest 4 dest guint32
1892 #loadidxl t, src, idx
1895 .function video_orc_resample_h_near_u32_lq
1897 .source 4 s1 guint32
1901 ldresnearl d1, s1, p1, p2
1903 .function video_orc_resample_h_2tap_1u8_lq
1909 ldreslinb d1, s1, p1, p2
1911 .function video_orc_resample_h_2tap_4u8_lq
1913 .source 4 s1 guint32
1917 ldreslinl d1, s1, p1, p2
1919 .function video_orc_resample_h_2tap_u8_lq
1937 .function video_orc_resample_h_2tap_u16
1938 .source 2 s1 guint16
1939 .source 2 s2 guint16
1959 .function video_orc_resample_v_2tap_u8_lq
1960 .source 1 src1 guint8
1961 .source 1 src2 guint8
1976 .function video_orc_resample_v_2tap_u16
1977 .source 2 src1 guint16
1978 .source 2 src2 guint16
1979 .dest 2 dest guint16
1995 .function video_orc_resample_v_2tap_u8
2016 .function video_orc_resample_v_4tap_u8_lq
2044 .function video_orc_resample_v_4tap_u8
2075 # crashes ORC for now but is potentially faster
2076 #.function video_orc_resample_h_4tap_u8
2077 #.source 1 s1 guint8
2078 #.source 1 s2 guint8
2079 #.source 1 s3 guint8
2080 #.source 1 s4 guint8
2081 #.source 2 t1 gint16
2082 #.source 2 t2 gint16
2083 #.source 2 t3 gint16
2084 #.source 2 t4 gint16
2107 .function video_orc_resample_h_multaps_u8
2116 .function video_orc_resample_h_muladdtaps_u8
2128 .function video_orc_resample_scaletaps_u8
2139 .function video_orc_resample_h_multaps_u8_lq
2148 .function video_orc_resample_h_muladdtaps_u8_lq
2159 .function video_orc_resample_h_multaps3_u8_lq
2179 .function video_orc_resample_h_muladdtaps3_u8_lq
2200 .function video_orc_resample_h_muladdscaletaps3_u8_lq
2207 .source 2 temp gint16
2225 .function video_orc_resample_scaletaps_u8_lq
2234 .function video_orc_resample_h_multaps_u16
2245 .function video_orc_resample_h_muladdtaps_u16
2258 .function video_orc_resample_scaletaps_u16
2267 .function video_orc_resample_v_multaps_u8
2276 .function video_orc_resample_v_muladdtaps_u8
2287 .function video_orc_resample_v_multaps_u16
2296 .function video_orc_resample_v_muladdtaps_u16
2308 .function video_orc_resample_v_multaps_u8_lq
2317 .function video_orc_resample_v_multaps4_u8_lq
2342 .function video_orc_resample_v_muladdtaps_u8_lq
2352 .function video_orc_resample_v_muladdtaps4_u8_lq
2378 .function video_orc_resample_v_muladdscaletaps4_u8_lq
2383 .source 2 temp gint16
2408 .function video_orc_chroma_down_h2_u8
2417 splitql ayuv2, ayuv1, s
2418 splitlw uv1, ay1, ayuv1
2419 select1lw uv2, ayuv2
2420 x2 avgub uv1, uv1, uv2
2421 mergewl ayuv1, ay1, uv1
2422 mergelq d, ayuv1, ayuv2
2424 #.function video_orc_chroma_up_h2_cs_u8
2426 #.source 4 s1 guint8
2435 #splitql ayuv2, ayuv1, s
2436 #ldresnearl ayuv3, s1, 0x20000, 0x20000
2437 #splitlw uv2, ay2, ayuv2
2438 #select1lw uv3, ayuv3
2439 #x2 avgub uv2, uv2, uv3
2440 #mergewl ayuv2, ay2, uv2
2441 #mergelq d, ayuv1, ayuv2
2443 .function video_orc_chroma_down_v2_u8
2451 splitlw uv1, ay1, s1
2453 x2 avgub uv1, uv1, uv2
2456 .function video_orc_chroma_up_v2_u8
2469 splitlw uv1, ay1, s1
2470 splitlw uv2, ay2, s2
2471 x2 convubw uuvv1, uv1
2472 x2 convubw uuvv2, uv2
2474 x2 mullw uuvv3, uuvv1, 3
2475 x2 addw uuvv3, uuvv3, uuvv2
2476 x2 addw uuvv3, uuvv3, 2
2477 x2 shruw uuvv3, uuvv3, 2
2478 x2 convsuswb uv1, uuvv3
2479 mergewl d1, ay1, uv1
2481 x2 mullw uuvv3, uuvv2, 3
2482 x2 addw uuvv3, uuvv3, uuvv1
2483 x2 addw uuvv3, uuvv3, 2
2484 x2 shruw uuvv3, uuvv3, 2
2485 x2 convsuswb uv2, uuvv3
2486 mergewl d2, ay2, uv2
2488 .function video_orc_chroma_up_v2_u16
2489 .source 8 s1 guint16
2490 .source 8 s2 guint16
2501 splitql uv1, ay1, s1
2502 splitql uv2, ay2, s2
2503 x2 convuwl uuvv1, uv1
2504 x2 convuwl uuvv2, uv2
2506 x2 mulll uuvv3, uuvv1, 3
2507 x2 addl uuvv3, uuvv3, uuvv2
2508 x2 addl uuvv3, uuvv3, 2
2509 x2 shrul uuvv3, uuvv3, 2
2510 x2 convsuslw uv1, uuvv3
2511 mergelq d1, ay1, uv1
2513 x2 mulll uuvv3, uuvv2, 3
2514 x2 addl uuvv3, uuvv3, uuvv1
2515 x2 addl uuvv3, uuvv3, 2
2516 x2 shrul uuvv3, uuvv3, 2
2517 x2 convsuslw uv2, uuvv3
2518 mergelq d2, ay2, uv2
2520 .function video_orc_chroma_down_v2_u16
2521 .source 8 s1 guint16
2522 .source 8 s2 guint16
2528 splitql uv1, ay1, s1
2530 x2 avguw uv1, uv1, uv2
2534 .function video_orc_chroma_down_v4_u8
2546 splitlw uv1, ay1, s1
2547 x2 convubw uuvv1, uv1
2549 x2 convubw uuvv2, uv1
2550 x2 addw uuvv3, uuvv1, uuvv2
2552 x2 convubw uuvv1, uv1
2554 x2 convubw uuvv2, uv1
2555 x2 addw uuvv1, uuvv1, uuvv2
2556 x2 shlw uuvv2, uuvv1, 1
2557 x2 addw uuvv1, uuvv1, uuvv2
2558 x2 addw uuvv3, uuvv3, uuvv1
2559 x2 addw uuvv3, uuvv3, 4
2560 x2 shruw uuvv3, uuvv3, 3
2561 x2 convsuswb uv1, uuvv3
2564 .function video_orc_chroma_down_v4_u16
2565 .source 8 s1 guint16
2566 .source 8 s2 guint16
2567 .source 8 s3 guint16
2568 .source 8 s4 guint16
2576 splitql uv1, ay1, s1
2577 x2 convuwl uuvv1, uv1
2579 x2 convuwl uuvv2, uv1
2580 x2 addl uuvv3, uuvv1, uuvv2
2582 x2 convuwl uuvv1, uv1
2584 x2 convuwl uuvv2, uv1
2585 x2 addl uuvv1, uuvv1, uuvv2
2586 x2 shll uuvv2, uuvv1, 1
2587 x2 addl uuvv1, uuvv1, uuvv2
2588 x2 addl uuvv3, uuvv3, uuvv1
2589 x2 addl uuvv3, uuvv3, 4
2590 x2 shrul uuvv3, uuvv3, 3
2591 x2 convsuslw uv1, uuvv3
2594 .function video_orc_dither_none_4u8_mask
2602 .function video_orc_dither_none_4u16_mask
2610 .function video_orc_dither_verterr_4u8_mask
2624 .function video_orc_dither_fs_muladd_u8
2636 # due to error propagation we should disable
2637 # loop_shift for this function and only work on
2638 # 4 pixels at a time.
2639 #.function video_orc_dither_fs_add_4u8_mask
2654 #x4 andnw p, masks, t1
2656 #x4 andw e2, t1, masks
2658 .function video_orc_dither_ordered_u8
2664 .function video_orc_dither_ordered_4u8_mask
2665 .source 8 e1 guint16
2677 .function video_orc_dither_ordered_4u16_mask
2678 .source 8 e1 guint16
2688 .function video_orc_convert_UYVY_GRAY8