2 /* autogenerated from blendorc.orc */
9 #ifndef _ORC_INTEGER_TYPEDEFS_
10 #define _ORC_INTEGER_TYPEDEFS_
11 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
13 typedef int8_t orc_int8;
14 typedef int16_t orc_int16;
15 typedef int32_t orc_int32;
16 typedef int64_t orc_int64;
17 typedef uint8_t orc_uint8;
18 typedef uint16_t orc_uint16;
19 typedef uint32_t orc_uint32;
20 typedef uint64_t orc_uint64;
21 #define ORC_UINT64_C(x) UINT64_C(x)
22 #elif defined(_MSC_VER)
23 typedef signed __int8 orc_int8;
24 typedef signed __int16 orc_int16;
25 typedef signed __int32 orc_int32;
26 typedef signed __int64 orc_int64;
27 typedef unsigned __int8 orc_uint8;
28 typedef unsigned __int16 orc_uint16;
29 typedef unsigned __int32 orc_uint32;
30 typedef unsigned __int64 orc_uint64;
31 #define ORC_UINT64_C(x) (x##Ui64)
32 #define inline __inline
35 typedef signed char orc_int8;
36 typedef short orc_int16;
37 typedef int orc_int32;
38 typedef unsigned char orc_uint8;
39 typedef unsigned short orc_uint16;
40 typedef unsigned int orc_uint32;
41 #if INT_MAX == LONG_MAX
42 typedef long long orc_int64;
43 typedef unsigned long long orc_uint64;
44 #define ORC_UINT64_C(x) (x##ULL)
46 typedef long orc_int64;
47 typedef unsigned long orc_uint64;
48 #define ORC_UINT64_C(x) (x##UL)
73 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
74 #define ORC_RESTRICT restrict
75 #elif defined(__GNUC__) && __GNUC__ >= 4
76 #define ORC_RESTRICT __restrict__
85 void orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
86 void orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1,
88 void orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
89 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
90 void orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
91 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
92 void orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
93 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
94 void orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
95 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
96 void orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
97 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
100 /* begin Orc C target preamble */
101 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
102 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
103 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
104 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
105 #define ORC_SB_MAX 127
106 #define ORC_SB_MIN (-1-ORC_SB_MAX)
107 #define ORC_UB_MAX 255
109 #define ORC_SW_MAX 32767
110 #define ORC_SW_MIN (-1-ORC_SW_MAX)
111 #define ORC_UW_MAX 65535
113 #define ORC_SL_MAX 2147483647
114 #define ORC_SL_MIN (-1-ORC_SL_MAX)
115 #define ORC_UL_MAX 4294967295U
117 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
118 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
119 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
120 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
121 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
122 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
123 #define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
124 #define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
125 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
126 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
127 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
128 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
129 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
130 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
132 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
133 #define ORC_RESTRICT restrict
134 #elif defined(__GNUC__) && __GNUC__ >= 4
135 #define ORC_RESTRICT __restrict__
140 /* end Orc C target preamble */
147 orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
150 orc_union32 *ORC_RESTRICT ptr0;
154 ptr0 = (orc_union32 *) d1;
159 for (i = 0; i < n; i++) {
170 _backup_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
174 orc_union32 *ORC_RESTRICT ptr0;
178 ptr0 = (orc_union32 *) ex->arrays[0];
181 var32.i = ex->params[24];
183 for (i = 0; i < n; i++) {
193 orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
195 OrcExecutor _ex, *ex = &_ex;
196 static int p_inited = 0;
197 static OrcProgram *p = 0;
198 void (*func) (OrcExecutor *);
201 orc_once_mutex_lock ();
204 p = orc_program_new ();
205 orc_program_set_name (p, "orc_splat_u32");
206 orc_program_set_backup_function (p, _backup_orc_splat_u32);
207 orc_program_add_destination (p, 4, "d1");
208 orc_program_add_parameter (p, 4, "p1");
210 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
213 orc_program_compile (p);
216 orc_once_mutex_unlock ();
221 ex->arrays[ORC_VAR_D1] = d1;
222 ex->params[ORC_VAR_P1] = p1;
233 orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1,
237 orc_union32 *ORC_RESTRICT ptr0;
238 const orc_union32 *ORC_RESTRICT ptr4;
242 ptr0 = (orc_union32 *) d1;
243 ptr4 = (orc_union32 *) s1;
246 for (i = 0; i < n; i++) {
259 _backup_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex)
263 orc_union32 *ORC_RESTRICT ptr0;
264 const orc_union32 *ORC_RESTRICT ptr4;
268 ptr0 = (orc_union32 *) ex->arrays[0];
269 ptr4 = (orc_union32 *) ex->arrays[4];
272 for (i = 0; i < n; i++) {
284 orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1,
287 OrcExecutor _ex, *ex = &_ex;
288 static int p_inited = 0;
289 static OrcProgram *p = 0;
290 void (*func) (OrcExecutor *);
293 orc_once_mutex_lock ();
296 p = orc_program_new ();
297 orc_program_set_name (p, "orc_memcpy_u32");
298 orc_program_set_backup_function (p, _backup_orc_memcpy_u32);
299 orc_program_add_destination (p, 4, "d1");
300 orc_program_add_source (p, 4, "s1");
302 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1,
305 orc_program_compile (p);
308 orc_once_mutex_unlock ();
313 ex->arrays[ORC_VAR_D1] = d1;
314 ex->arrays[ORC_VAR_S1] = (void *) s1;
325 orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
326 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
330 orc_int8 *ORC_RESTRICT ptr0;
331 const orc_int8 *ORC_RESTRICT ptr4;
344 for (j = 0; j < m; j++) {
345 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
346 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
351 for (i = 0; i < n; i++) {
355 var38.i = (orc_uint8) var34;
359 var39.i = (orc_uint8) var35;
361 var40.i = var39.i - var38.i;
363 var41.i = (var40.i * var36.i) & 0xffff;
365 var42.i = var38.i << 8;
367 var43.i = var42.i + var41.i;
369 var44.i = ((orc_uint16) var43.i) >> 8;
371 var37 = ORC_CLAMP_UB (var44.i);
381 _backup_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex)
386 int m = ex->params[ORC_VAR_A1];
387 orc_int8 *ORC_RESTRICT ptr0;
388 const orc_int8 *ORC_RESTRICT ptr4;
401 for (j = 0; j < m; j++) {
402 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
403 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
406 var36.i = ex->params[24];
408 for (i = 0; i < n; i++) {
412 var38.i = (orc_uint8) var34;
416 var39.i = (orc_uint8) var35;
418 var40.i = var39.i - var38.i;
420 var41.i = (var40.i * var36.i) & 0xffff;
422 var42.i = var38.i << 8;
424 var43.i = var42.i + var41.i;
426 var44.i = ((orc_uint16) var43.i) >> 8;
428 var37 = ORC_CLAMP_UB (var44.i);
437 orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
438 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
440 OrcExecutor _ex, *ex = &_ex;
441 static int p_inited = 0;
442 static OrcProgram *p = 0;
443 void (*func) (OrcExecutor *);
446 orc_once_mutex_lock ();
449 p = orc_program_new ();
450 orc_program_set_2d (p);
451 orc_program_set_name (p, "orc_blend_u8");
452 orc_program_set_backup_function (p, _backup_orc_blend_u8);
453 orc_program_add_destination (p, 1, "d1");
454 orc_program_add_source (p, 1, "s1");
455 orc_program_add_constant (p, 1, 0x00000008, "c1");
456 orc_program_add_parameter (p, 2, "p1");
457 orc_program_add_temporary (p, 2, "t1");
458 orc_program_add_temporary (p, 2, "t2");
460 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
462 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
464 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
466 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
468 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
470 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
472 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
474 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2,
475 ORC_VAR_D1, ORC_VAR_D1);
477 orc_program_compile (p);
480 orc_once_mutex_unlock ();
485 ORC_EXECUTOR_M (ex) = m;
486 ex->arrays[ORC_VAR_D1] = d1;
487 ex->params[ORC_VAR_D1] = d1_stride;
488 ex->arrays[ORC_VAR_S1] = (void *) s1;
489 ex->params[ORC_VAR_S1] = s1_stride;
490 ex->params[ORC_VAR_P1] = p1;
501 orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
502 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
506 orc_union32 *ORC_RESTRICT ptr0;
507 const orc_union32 *ORC_RESTRICT ptr4;
527 for (j = 0; j < m; j++) {
528 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
529 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
537 var40.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */
539 for (i = 0; i < n; i++) {
548 ((var43 & 0xff) << 24) | ((var43 & 0xff) << 16) | ((var43 & 0xff) <<
551 var45.x4[0] = (orc_uint8) var44.x4[0];
552 var45.x4[1] = (orc_uint8) var44.x4[1];
553 var45.x4[2] = (orc_uint8) var44.x4[2];
554 var45.x4[3] = (orc_uint8) var44.x4[3];
556 var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
557 var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
558 var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
559 var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
561 var47.x4[0] = ((orc_uint16) var46.x4[0]) >> 8;
562 var47.x4[1] = ((orc_uint16) var46.x4[1]) >> 8;
563 var47.x4[2] = ((orc_uint16) var46.x4[2]) >> 8;
564 var47.x4[3] = ((orc_uint16) var46.x4[3]) >> 8;
566 var48.x4[0] = (orc_uint8) var41.x4[0];
567 var48.x4[1] = (orc_uint8) var41.x4[1];
568 var48.x4[2] = (orc_uint8) var41.x4[2];
569 var48.x4[3] = (orc_uint8) var41.x4[3];
573 var50.x4[0] = (orc_uint8) var49.x4[0];
574 var50.x4[1] = (orc_uint8) var49.x4[1];
575 var50.x4[2] = (orc_uint8) var49.x4[2];
576 var50.x4[3] = (orc_uint8) var49.x4[3];
578 var51.x4[0] = var48.x4[0] - var50.x4[0];
579 var51.x4[1] = var48.x4[1] - var50.x4[1];
580 var51.x4[2] = var48.x4[2] - var50.x4[2];
581 var51.x4[3] = var48.x4[3] - var50.x4[3];
583 var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
584 var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
585 var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
586 var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
589 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
590 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
592 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
593 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
595 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
596 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
598 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
599 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
601 var54.x4[0] = var50.x4[0] + var53.x4[0];
602 var54.x4[1] = var50.x4[1] + var53.x4[1];
603 var54.x4[2] = var50.x4[2] + var53.x4[2];
604 var54.x4[3] = var50.x4[3] + var53.x4[3];
606 var55.x4[0] = var54.x4[0];
607 var55.x4[1] = var54.x4[1];
608 var55.x4[2] = var54.x4[2];
609 var55.x4[3] = var54.x4[3];
611 var56.i = var55.i | var40.i;
621 _backup_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex)
626 int m = ex->params[ORC_VAR_A1];
627 orc_union32 *ORC_RESTRICT ptr0;
628 const orc_union32 *ORC_RESTRICT ptr4;
648 for (j = 0; j < m; j++) {
649 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
650 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
653 var39.x4[0] = ex->params[24];
654 var39.x4[1] = ex->params[24];
655 var39.x4[2] = ex->params[24];
656 var39.x4[3] = ex->params[24];
658 var40.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */
660 for (i = 0; i < n; i++) {
669 ((var43 & 0xff) << 24) | ((var43 & 0xff) << 16) | ((var43 & 0xff) <<
672 var45.x4[0] = (orc_uint8) var44.x4[0];
673 var45.x4[1] = (orc_uint8) var44.x4[1];
674 var45.x4[2] = (orc_uint8) var44.x4[2];
675 var45.x4[3] = (orc_uint8) var44.x4[3];
677 var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
678 var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
679 var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
680 var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
682 var47.x4[0] = ((orc_uint16) var46.x4[0]) >> 8;
683 var47.x4[1] = ((orc_uint16) var46.x4[1]) >> 8;
684 var47.x4[2] = ((orc_uint16) var46.x4[2]) >> 8;
685 var47.x4[3] = ((orc_uint16) var46.x4[3]) >> 8;
687 var48.x4[0] = (orc_uint8) var41.x4[0];
688 var48.x4[1] = (orc_uint8) var41.x4[1];
689 var48.x4[2] = (orc_uint8) var41.x4[2];
690 var48.x4[3] = (orc_uint8) var41.x4[3];
694 var50.x4[0] = (orc_uint8) var49.x4[0];
695 var50.x4[1] = (orc_uint8) var49.x4[1];
696 var50.x4[2] = (orc_uint8) var49.x4[2];
697 var50.x4[3] = (orc_uint8) var49.x4[3];
699 var51.x4[0] = var48.x4[0] - var50.x4[0];
700 var51.x4[1] = var48.x4[1] - var50.x4[1];
701 var51.x4[2] = var48.x4[2] - var50.x4[2];
702 var51.x4[3] = var48.x4[3] - var50.x4[3];
704 var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
705 var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
706 var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
707 var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
710 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
711 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
713 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
714 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
716 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
717 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
719 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
720 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
722 var54.x4[0] = var50.x4[0] + var53.x4[0];
723 var54.x4[1] = var50.x4[1] + var53.x4[1];
724 var54.x4[2] = var50.x4[2] + var53.x4[2];
725 var54.x4[3] = var50.x4[3] + var53.x4[3];
727 var55.x4[0] = var54.x4[0];
728 var55.x4[1] = var54.x4[1];
729 var55.x4[2] = var54.x4[2];
730 var55.x4[3] = var54.x4[3];
732 var56.i = var55.i | var40.i;
741 orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
742 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
744 OrcExecutor _ex, *ex = &_ex;
745 static int p_inited = 0;
746 static OrcProgram *p = 0;
747 void (*func) (OrcExecutor *);
750 orc_once_mutex_lock ();
753 p = orc_program_new ();
754 orc_program_set_2d (p);
755 orc_program_set_name (p, "orc_blend_argb");
756 orc_program_set_backup_function (p, _backup_orc_blend_argb);
757 orc_program_add_destination (p, 4, "d1");
758 orc_program_add_source (p, 4, "s1");
759 orc_program_add_constant (p, 4, 0x000000ff, "c1");
760 orc_program_add_constant (p, 4, 0x00000008, "c2");
761 orc_program_add_parameter (p, 2, "p1");
762 orc_program_add_temporary (p, 4, "t1");
763 orc_program_add_temporary (p, 2, "t2");
764 orc_program_add_temporary (p, 1, "t3");
765 orc_program_add_temporary (p, 4, "t4");
766 orc_program_add_temporary (p, 8, "t5");
767 orc_program_add_temporary (p, 8, "t6");
768 orc_program_add_temporary (p, 8, "t7");
770 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
772 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
774 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
776 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
778 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T4, ORC_VAR_D1,
780 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
782 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
784 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
786 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
788 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1,
790 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
792 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
794 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
796 orc_program_append_2 (p, "addw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6,
798 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1,
800 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
802 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
805 orc_program_compile (p);
808 orc_once_mutex_unlock ();
813 ORC_EXECUTOR_M (ex) = m;
814 ex->arrays[ORC_VAR_D1] = d1;
815 ex->params[ORC_VAR_D1] = d1_stride;
816 ex->arrays[ORC_VAR_S1] = (void *) s1;
817 ex->params[ORC_VAR_S1] = s1_stride;
818 ex->params[ORC_VAR_P1] = p1;
829 orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
830 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
834 orc_union32 *ORC_RESTRICT ptr0;
835 const orc_union32 *ORC_RESTRICT ptr4;
856 for (j = 0; j < m; j++) {
857 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
858 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
866 var41.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */
868 for (i = 0; i < n; i++) {
872 var43.i = ((orc_uint32) var42.i) >> 24;
879 ((var45 & 0xff) << 24) | ((var45 & 0xff) << 16) | ((var45 & 0xff) <<
882 var47.x4[0] = (orc_uint8) var46.x4[0];
883 var47.x4[1] = (orc_uint8) var46.x4[1];
884 var47.x4[2] = (orc_uint8) var46.x4[2];
885 var47.x4[3] = (orc_uint8) var46.x4[3];
887 var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
888 var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
889 var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
890 var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
892 var49.x4[0] = ((orc_uint16) var48.x4[0]) >> 8;
893 var49.x4[1] = ((orc_uint16) var48.x4[1]) >> 8;
894 var49.x4[2] = ((orc_uint16) var48.x4[2]) >> 8;
895 var49.x4[3] = ((orc_uint16) var48.x4[3]) >> 8;
897 var50.x4[0] = (orc_uint8) var42.x4[0];
898 var50.x4[1] = (orc_uint8) var42.x4[1];
899 var50.x4[2] = (orc_uint8) var42.x4[2];
900 var50.x4[3] = (orc_uint8) var42.x4[3];
904 var52.x4[0] = (orc_uint8) var51.x4[0];
905 var52.x4[1] = (orc_uint8) var51.x4[1];
906 var52.x4[2] = (orc_uint8) var51.x4[2];
907 var52.x4[3] = (orc_uint8) var51.x4[3];
909 var53.x4[0] = var50.x4[0] - var52.x4[0];
910 var53.x4[1] = var50.x4[1] - var52.x4[1];
911 var53.x4[2] = var50.x4[2] - var52.x4[2];
912 var53.x4[3] = var50.x4[3] - var52.x4[3];
914 var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
915 var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
916 var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
917 var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
920 ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
921 (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
923 ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
924 (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
926 ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
927 (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
929 ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
930 (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
932 var56.x4[0] = var52.x4[0] + var55.x4[0];
933 var56.x4[1] = var52.x4[1] + var55.x4[1];
934 var56.x4[2] = var52.x4[2] + var55.x4[2];
935 var56.x4[3] = var52.x4[3] + var55.x4[3];
937 var57.x4[0] = var56.x4[0];
938 var57.x4[1] = var56.x4[1];
939 var57.x4[2] = var56.x4[2];
940 var57.x4[3] = var56.x4[3];
942 var58.i = var57.i | var41.i;
952 _backup_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex)
957 int m = ex->params[ORC_VAR_A1];
958 orc_union32 *ORC_RESTRICT ptr0;
959 const orc_union32 *ORC_RESTRICT ptr4;
980 for (j = 0; j < m; j++) {
981 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
982 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
985 var40.x4[0] = ex->params[24];
986 var40.x4[1] = ex->params[24];
987 var40.x4[2] = ex->params[24];
988 var40.x4[3] = ex->params[24];
990 var41.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */
992 for (i = 0; i < n; i++) {
996 var43.i = ((orc_uint32) var42.i) >> 24;
1003 ((var45 & 0xff) << 24) | ((var45 & 0xff) << 16) | ((var45 & 0xff) <<
1004 8) | (var45 & 0xff);
1006 var47.x4[0] = (orc_uint8) var46.x4[0];
1007 var47.x4[1] = (orc_uint8) var46.x4[1];
1008 var47.x4[2] = (orc_uint8) var46.x4[2];
1009 var47.x4[3] = (orc_uint8) var46.x4[3];
1011 var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1012 var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1013 var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1014 var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1016 var49.x4[0] = ((orc_uint16) var48.x4[0]) >> 8;
1017 var49.x4[1] = ((orc_uint16) var48.x4[1]) >> 8;
1018 var49.x4[2] = ((orc_uint16) var48.x4[2]) >> 8;
1019 var49.x4[3] = ((orc_uint16) var48.x4[3]) >> 8;
1021 var50.x4[0] = (orc_uint8) var42.x4[0];
1022 var50.x4[1] = (orc_uint8) var42.x4[1];
1023 var50.x4[2] = (orc_uint8) var42.x4[2];
1024 var50.x4[3] = (orc_uint8) var42.x4[3];
1028 var52.x4[0] = (orc_uint8) var51.x4[0];
1029 var52.x4[1] = (orc_uint8) var51.x4[1];
1030 var52.x4[2] = (orc_uint8) var51.x4[2];
1031 var52.x4[3] = (orc_uint8) var51.x4[3];
1033 var53.x4[0] = var50.x4[0] - var52.x4[0];
1034 var53.x4[1] = var50.x4[1] - var52.x4[1];
1035 var53.x4[2] = var50.x4[2] - var52.x4[2];
1036 var53.x4[3] = var50.x4[3] - var52.x4[3];
1038 var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1039 var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1040 var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1041 var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1044 ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1045 (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1047 ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1048 (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1050 ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1051 (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1053 ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1054 (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1056 var56.x4[0] = var52.x4[0] + var55.x4[0];
1057 var56.x4[1] = var52.x4[1] + var55.x4[1];
1058 var56.x4[2] = var52.x4[2] + var55.x4[2];
1059 var56.x4[3] = var52.x4[3] + var55.x4[3];
1061 var57.x4[0] = var56.x4[0];
1062 var57.x4[1] = var56.x4[1];
1063 var57.x4[2] = var56.x4[2];
1064 var57.x4[3] = var56.x4[3];
1066 var58.i = var57.i | var41.i;
1075 orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1076 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1078 OrcExecutor _ex, *ex = &_ex;
1079 static int p_inited = 0;
1080 static OrcProgram *p = 0;
1081 void (*func) (OrcExecutor *);
1084 orc_once_mutex_lock ();
1087 p = orc_program_new ();
1088 orc_program_set_2d (p);
1089 orc_program_set_name (p, "orc_blend_bgra");
1090 orc_program_set_backup_function (p, _backup_orc_blend_bgra);
1091 orc_program_add_destination (p, 4, "d1");
1092 orc_program_add_source (p, 4, "s1");
1093 orc_program_add_constant (p, 4, 0xff000000, "c1");
1094 orc_program_add_constant (p, 4, 0x00000018, "c2");
1095 orc_program_add_constant (p, 4, 0x00000008, "c3");
1096 orc_program_add_parameter (p, 2, "p1");
1097 orc_program_add_temporary (p, 4, "t1");
1098 orc_program_add_temporary (p, 4, "t2");
1099 orc_program_add_temporary (p, 2, "t3");
1100 orc_program_add_temporary (p, 1, "t4");
1101 orc_program_add_temporary (p, 4, "t5");
1102 orc_program_add_temporary (p, 8, "t6");
1103 orc_program_add_temporary (p, 8, "t7");
1104 orc_program_add_temporary (p, 8, "t8");
1106 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1108 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C2,
1110 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1112 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1114 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1116 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T5, ORC_VAR_D1,
1118 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
1120 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C3,
1122 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1,
1124 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1126 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
1128 orc_program_append_2 (p, "subw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
1130 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8,
1132 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
1134 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
1136 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_D1,
1138 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
1140 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1143 orc_program_compile (p);
1146 orc_once_mutex_unlock ();
1151 ORC_EXECUTOR_M (ex) = m;
1152 ex->arrays[ORC_VAR_D1] = d1;
1153 ex->params[ORC_VAR_D1] = d1_stride;
1154 ex->arrays[ORC_VAR_S1] = (void *) s1;
1155 ex->params[ORC_VAR_S1] = s1_stride;
1156 ex->params[ORC_VAR_P1] = p1;
1158 func = p->code_exec;
1164 /* orc_overlay_argb */
1167 orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1168 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1172 orc_union32 *ORC_RESTRICT ptr0;
1173 const orc_union32 *ORC_RESTRICT ptr4;
1207 for (j = 0; j < m; j++) {
1208 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1209 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1217 var53.i = (int) 0xffffffff; /* -1 or 2.122e-314f */
1219 var42.i = (int) 0xffffff00; /* -256 or 2.122e-314f */
1221 var43.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */
1223 for (i = 0; i < n; i++) {
1232 ((var46 & 0xff) << 24) | ((var46 & 0xff) << 16) | ((var46 & 0xff) <<
1233 8) | (var46 & 0xff);
1235 var48.x4[0] = (orc_uint8) var47.x4[0];
1236 var48.x4[1] = (orc_uint8) var47.x4[1];
1237 var48.x4[2] = (orc_uint8) var47.x4[2];
1238 var48.x4[3] = (orc_uint8) var47.x4[3];
1240 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1241 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1242 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1243 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1245 var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8;
1246 var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8;
1247 var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8;
1248 var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8;
1250 var51.x4[0] = (orc_uint8) var44.x4[0];
1251 var51.x4[1] = (orc_uint8) var44.x4[1];
1252 var51.x4[2] = (orc_uint8) var44.x4[2];
1253 var51.x4[3] = (orc_uint8) var44.x4[3];
1255 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1256 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1257 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1258 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1260 var54.x4[0] = (orc_uint8) var53.x4[0];
1261 var54.x4[1] = (orc_uint8) var53.x4[1];
1262 var54.x4[2] = (orc_uint8) var53.x4[2];
1263 var54.x4[3] = (orc_uint8) var53.x4[3];
1265 var55.x4[0] = var54.x4[0] - var50.x4[0];
1266 var55.x4[1] = var54.x4[1] - var50.x4[1];
1267 var55.x4[2] = var54.x4[2] - var50.x4[2];
1268 var55.x4[3] = var54.x4[3] - var50.x4[3];
1277 ((var58 & 0xff) << 24) | ((var58 & 0xff) << 16) | ((var58 & 0xff) <<
1278 8) | (var58 & 0xff);
1280 var60.x4[0] = (orc_uint8) var59.x4[0];
1281 var60.x4[1] = (orc_uint8) var59.x4[1];
1282 var60.x4[2] = (orc_uint8) var59.x4[2];
1283 var60.x4[3] = (orc_uint8) var59.x4[3];
1285 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
1286 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
1287 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
1288 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
1291 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
1292 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
1294 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
1295 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
1297 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
1298 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
1300 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
1301 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
1303 var63.x4[0] = (orc_uint8) var56.x4[0];
1304 var63.x4[1] = (orc_uint8) var56.x4[1];
1305 var63.x4[2] = (orc_uint8) var56.x4[2];
1306 var63.x4[3] = (orc_uint8) var56.x4[3];
1308 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
1309 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
1310 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
1311 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
1313 var65.x4[0] = var64.x4[0] + var52.x4[0];
1314 var65.x4[1] = var64.x4[1] + var52.x4[1];
1315 var65.x4[2] = var64.x4[2] + var52.x4[2];
1316 var65.x4[3] = var64.x4[3] + var52.x4[3];
1318 var66.x4[0] = var62.x4[0] + var50.x4[0];
1319 var66.x4[1] = var62.x4[1] + var50.x4[1];
1320 var66.x4[2] = var62.x4[2] + var50.x4[2];
1321 var66.x4[3] = var62.x4[3] + var50.x4[3];
1324 ((var66.x4[0] & 0xff) ==
1325 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
1326 ((orc_uint16) var66.x4[0] & 0xff));
1328 ((var66.x4[1] & 0xff) ==
1329 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
1330 ((orc_uint16) var66.x4[1] & 0xff));
1332 ((var66.x4[2] & 0xff) ==
1333 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
1334 ((orc_uint16) var66.x4[2] & 0xff));
1336 ((var66.x4[3] & 0xff) ==
1337 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
1338 ((orc_uint16) var66.x4[3] & 0xff));
1340 var68.x4[0] = var67.x4[0];
1341 var68.x4[1] = var67.x4[1];
1342 var68.x4[2] = var67.x4[2];
1343 var68.x4[3] = var67.x4[3];
1345 var69.i = var68.i & var42.i;
1347 var70.x4[0] = var66.x4[0];
1348 var70.x4[1] = var66.x4[1];
1349 var70.x4[2] = var66.x4[2];
1350 var70.x4[3] = var66.x4[3];
1352 var71.i = var70.i & var43.i;
1354 var72.i = var69.i | var71.i;
1364 _backup_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex)
1369 int m = ex->params[ORC_VAR_A1];
1370 orc_union32 *ORC_RESTRICT ptr0;
1371 const orc_union32 *ORC_RESTRICT ptr4;
1405 for (j = 0; j < m; j++) {
1406 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1407 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1410 var41.x4[0] = ex->params[24];
1411 var41.x4[1] = ex->params[24];
1412 var41.x4[2] = ex->params[24];
1413 var41.x4[3] = ex->params[24];
1415 var53.i = (int) 0xffffffff; /* -1 or 2.122e-314f */
1417 var42.i = (int) 0xffffff00; /* -256 or 2.122e-314f */
1419 var43.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */
1421 for (i = 0; i < n; i++) {
1430 ((var46 & 0xff) << 24) | ((var46 & 0xff) << 16) | ((var46 & 0xff) <<
1431 8) | (var46 & 0xff);
1433 var48.x4[0] = (orc_uint8) var47.x4[0];
1434 var48.x4[1] = (orc_uint8) var47.x4[1];
1435 var48.x4[2] = (orc_uint8) var47.x4[2];
1436 var48.x4[3] = (orc_uint8) var47.x4[3];
1438 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1439 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1440 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1441 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1443 var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8;
1444 var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8;
1445 var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8;
1446 var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8;
1448 var51.x4[0] = (orc_uint8) var44.x4[0];
1449 var51.x4[1] = (orc_uint8) var44.x4[1];
1450 var51.x4[2] = (orc_uint8) var44.x4[2];
1451 var51.x4[3] = (orc_uint8) var44.x4[3];
1453 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1454 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1455 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1456 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1458 var54.x4[0] = (orc_uint8) var53.x4[0];
1459 var54.x4[1] = (orc_uint8) var53.x4[1];
1460 var54.x4[2] = (orc_uint8) var53.x4[2];
1461 var54.x4[3] = (orc_uint8) var53.x4[3];
1463 var55.x4[0] = var54.x4[0] - var50.x4[0];
1464 var55.x4[1] = var54.x4[1] - var50.x4[1];
1465 var55.x4[2] = var54.x4[2] - var50.x4[2];
1466 var55.x4[3] = var54.x4[3] - var50.x4[3];
1475 ((var58 & 0xff) << 24) | ((var58 & 0xff) << 16) | ((var58 & 0xff) <<
1476 8) | (var58 & 0xff);
1478 var60.x4[0] = (orc_uint8) var59.x4[0];
1479 var60.x4[1] = (orc_uint8) var59.x4[1];
1480 var60.x4[2] = (orc_uint8) var59.x4[2];
1481 var60.x4[3] = (orc_uint8) var59.x4[3];
1483 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
1484 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
1485 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
1486 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
1489 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
1490 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
1492 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
1493 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
1495 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
1496 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
1498 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
1499 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
1501 var63.x4[0] = (orc_uint8) var56.x4[0];
1502 var63.x4[1] = (orc_uint8) var56.x4[1];
1503 var63.x4[2] = (orc_uint8) var56.x4[2];
1504 var63.x4[3] = (orc_uint8) var56.x4[3];
1506 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
1507 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
1508 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
1509 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
1511 var65.x4[0] = var64.x4[0] + var52.x4[0];
1512 var65.x4[1] = var64.x4[1] + var52.x4[1];
1513 var65.x4[2] = var64.x4[2] + var52.x4[2];
1514 var65.x4[3] = var64.x4[3] + var52.x4[3];
1516 var66.x4[0] = var62.x4[0] + var50.x4[0];
1517 var66.x4[1] = var62.x4[1] + var50.x4[1];
1518 var66.x4[2] = var62.x4[2] + var50.x4[2];
1519 var66.x4[3] = var62.x4[3] + var50.x4[3];
1522 ((var66.x4[0] & 0xff) ==
1523 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
1524 ((orc_uint16) var66.x4[0] & 0xff));
1526 ((var66.x4[1] & 0xff) ==
1527 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
1528 ((orc_uint16) var66.x4[1] & 0xff));
1530 ((var66.x4[2] & 0xff) ==
1531 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
1532 ((orc_uint16) var66.x4[2] & 0xff));
1534 ((var66.x4[3] & 0xff) ==
1535 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
1536 ((orc_uint16) var66.x4[3] & 0xff));
1538 var68.x4[0] = var67.x4[0];
1539 var68.x4[1] = var67.x4[1];
1540 var68.x4[2] = var67.x4[2];
1541 var68.x4[3] = var67.x4[3];
1543 var69.i = var68.i & var42.i;
1545 var70.x4[0] = var66.x4[0];
1546 var70.x4[1] = var66.x4[1];
1547 var70.x4[2] = var66.x4[2];
1548 var70.x4[3] = var66.x4[3];
1550 var71.i = var70.i & var43.i;
1552 var72.i = var69.i | var71.i;
1561 orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1562 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1564 OrcExecutor _ex, *ex = &_ex;
1565 static int p_inited = 0;
1566 static OrcProgram *p = 0;
1567 void (*func) (OrcExecutor *);
1570 orc_once_mutex_lock ();
1573 p = orc_program_new ();
1574 orc_program_set_2d (p);
1575 orc_program_set_name (p, "orc_overlay_argb");
1576 orc_program_set_backup_function (p, _backup_orc_overlay_argb);
1577 orc_program_add_destination (p, 4, "d1");
1578 orc_program_add_source (p, 4, "s1");
1579 orc_program_add_constant (p, 4, 0xffffffff, "c1");
1580 orc_program_add_constant (p, 4, 0x000000ff, "c2");
1581 orc_program_add_constant (p, 4, 0xffffff00, "c3");
1582 orc_program_add_constant (p, 4, 0x00000008, "c4");
1583 orc_program_add_parameter (p, 2, "p1");
1584 orc_program_add_temporary (p, 4, "t1");
1585 orc_program_add_temporary (p, 2, "t2");
1586 orc_program_add_temporary (p, 1, "t3");
1587 orc_program_add_temporary (p, 8, "t4");
1588 orc_program_add_temporary (p, 8, "t5");
1589 orc_program_add_temporary (p, 8, "t6");
1590 orc_program_add_temporary (p, 4, "t7");
1591 orc_program_add_temporary (p, 8, "t8");
1592 orc_program_add_temporary (p, 8, "t9");
1594 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1596 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
1598 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1600 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
1602 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
1604 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
1606 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C4,
1608 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
1610 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4,
1612 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1,
1614 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1,
1616 orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
1618 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1620 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
1622 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1624 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
1626 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1,
1628 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
1630 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1632 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1,
1634 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
1636 orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9,
1638 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
1640 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
1642 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1,
1644 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
1646 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1,
1648 orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
1650 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7,
1652 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1655 orc_program_compile (p);
1658 orc_once_mutex_unlock ();
1663 ORC_EXECUTOR_M (ex) = m;
1664 ex->arrays[ORC_VAR_D1] = d1;
1665 ex->params[ORC_VAR_D1] = d1_stride;
1666 ex->arrays[ORC_VAR_S1] = (void *) s1;
1667 ex->params[ORC_VAR_S1] = s1_stride;
1668 ex->params[ORC_VAR_P1] = p1;
1670 func = p->code_exec;
1676 /* orc_overlay_bgra */
1679 orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1680 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1684 orc_union32 *ORC_RESTRICT ptr0;
1685 const orc_union32 *ORC_RESTRICT ptr4;
1721 for (j = 0; j < m; j++) {
1722 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1723 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1731 var55.i = (int) 0xffffffff; /* -1 or 2.122e-314f */
1733 var43.i = (int) 0x00ffffff; /* 16777215 or 8.28905e-317f */
1735 var44.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */
1737 for (i = 0; i < n; i++) {
1741 var46.i = ((orc_uint32) var45.i) >> 24;
1748 ((var48 & 0xff) << 24) | ((var48 & 0xff) << 16) | ((var48 & 0xff) <<
1749 8) | (var48 & 0xff);
1751 var50.x4[0] = (orc_uint8) var49.x4[0];
1752 var50.x4[1] = (orc_uint8) var49.x4[1];
1753 var50.x4[2] = (orc_uint8) var49.x4[2];
1754 var50.x4[3] = (orc_uint8) var49.x4[3];
1756 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
1757 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
1758 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
1759 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
1761 var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8;
1762 var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8;
1763 var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8;
1764 var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8;
1766 var53.x4[0] = (orc_uint8) var45.x4[0];
1767 var53.x4[1] = (orc_uint8) var45.x4[1];
1768 var53.x4[2] = (orc_uint8) var45.x4[2];
1769 var53.x4[3] = (orc_uint8) var45.x4[3];
1771 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
1772 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
1773 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
1774 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
1776 var56.x4[0] = (orc_uint8) var55.x4[0];
1777 var56.x4[1] = (orc_uint8) var55.x4[1];
1778 var56.x4[2] = (orc_uint8) var55.x4[2];
1779 var56.x4[3] = (orc_uint8) var55.x4[3];
1781 var57.x4[0] = var56.x4[0] - var52.x4[0];
1782 var57.x4[1] = var56.x4[1] - var52.x4[1];
1783 var57.x4[2] = var56.x4[2] - var52.x4[2];
1784 var57.x4[3] = var56.x4[3] - var52.x4[3];
1788 var59.i = ((orc_uint32) var58.i) >> 24;
1795 ((var61 & 0xff) << 24) | ((var61 & 0xff) << 16) | ((var61 & 0xff) <<
1796 8) | (var61 & 0xff);
1798 var63.x4[0] = (orc_uint8) var62.x4[0];
1799 var63.x4[1] = (orc_uint8) var62.x4[1];
1800 var63.x4[2] = (orc_uint8) var62.x4[2];
1801 var63.x4[3] = (orc_uint8) var62.x4[3];
1803 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
1804 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
1805 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
1806 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
1809 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
1810 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
1812 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
1813 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
1815 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
1816 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
1818 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
1819 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
1821 var66.x4[0] = (orc_uint8) var58.x4[0];
1822 var66.x4[1] = (orc_uint8) var58.x4[1];
1823 var66.x4[2] = (orc_uint8) var58.x4[2];
1824 var66.x4[3] = (orc_uint8) var58.x4[3];
1826 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
1827 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
1828 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
1829 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
1831 var68.x4[0] = var67.x4[0] + var54.x4[0];
1832 var68.x4[1] = var67.x4[1] + var54.x4[1];
1833 var68.x4[2] = var67.x4[2] + var54.x4[2];
1834 var68.x4[3] = var67.x4[3] + var54.x4[3];
1836 var69.x4[0] = var65.x4[0] + var52.x4[0];
1837 var69.x4[1] = var65.x4[1] + var52.x4[1];
1838 var69.x4[2] = var65.x4[2] + var52.x4[2];
1839 var69.x4[3] = var65.x4[3] + var52.x4[3];
1842 ((var69.x4[0] & 0xff) ==
1843 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
1844 ((orc_uint16) var69.x4[0] & 0xff));
1846 ((var69.x4[1] & 0xff) ==
1847 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
1848 ((orc_uint16) var69.x4[1] & 0xff));
1850 ((var69.x4[2] & 0xff) ==
1851 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
1852 ((orc_uint16) var69.x4[2] & 0xff));
1854 ((var69.x4[3] & 0xff) ==
1855 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
1856 ((orc_uint16) var69.x4[3] & 0xff));
1858 var71.x4[0] = var70.x4[0];
1859 var71.x4[1] = var70.x4[1];
1860 var71.x4[2] = var70.x4[2];
1861 var71.x4[3] = var70.x4[3];
1863 var72.i = var71.i & var43.i;
1865 var73.x4[0] = var69.x4[0];
1866 var73.x4[1] = var69.x4[1];
1867 var73.x4[2] = var69.x4[2];
1868 var73.x4[3] = var69.x4[3];
1870 var74.i = var73.i & var44.i;
1872 var75.i = var72.i | var74.i;
1882 _backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex)
1887 int m = ex->params[ORC_VAR_A1];
1888 orc_union32 *ORC_RESTRICT ptr0;
1889 const orc_union32 *ORC_RESTRICT ptr4;
1925 for (j = 0; j < m; j++) {
1926 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1927 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1930 var42.x4[0] = ex->params[24];
1931 var42.x4[1] = ex->params[24];
1932 var42.x4[2] = ex->params[24];
1933 var42.x4[3] = ex->params[24];
1935 var55.i = (int) 0xffffffff; /* -1 or 2.122e-314f */
1937 var43.i = (int) 0x00ffffff; /* 16777215 or 8.28905e-317f */
1939 var44.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */
1941 for (i = 0; i < n; i++) {
1945 var46.i = ((orc_uint32) var45.i) >> 24;
1952 ((var48 & 0xff) << 24) | ((var48 & 0xff) << 16) | ((var48 & 0xff) <<
1953 8) | (var48 & 0xff);
1955 var50.x4[0] = (orc_uint8) var49.x4[0];
1956 var50.x4[1] = (orc_uint8) var49.x4[1];
1957 var50.x4[2] = (orc_uint8) var49.x4[2];
1958 var50.x4[3] = (orc_uint8) var49.x4[3];
1960 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
1961 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
1962 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
1963 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
1965 var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8;
1966 var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8;
1967 var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8;
1968 var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8;
1970 var53.x4[0] = (orc_uint8) var45.x4[0];
1971 var53.x4[1] = (orc_uint8) var45.x4[1];
1972 var53.x4[2] = (orc_uint8) var45.x4[2];
1973 var53.x4[3] = (orc_uint8) var45.x4[3];
1975 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
1976 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
1977 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
1978 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
1980 var56.x4[0] = (orc_uint8) var55.x4[0];
1981 var56.x4[1] = (orc_uint8) var55.x4[1];
1982 var56.x4[2] = (orc_uint8) var55.x4[2];
1983 var56.x4[3] = (orc_uint8) var55.x4[3];
1985 var57.x4[0] = var56.x4[0] - var52.x4[0];
1986 var57.x4[1] = var56.x4[1] - var52.x4[1];
1987 var57.x4[2] = var56.x4[2] - var52.x4[2];
1988 var57.x4[3] = var56.x4[3] - var52.x4[3];
1992 var59.i = ((orc_uint32) var58.i) >> 24;
1999 ((var61 & 0xff) << 24) | ((var61 & 0xff) << 16) | ((var61 & 0xff) <<
2000 8) | (var61 & 0xff);
2002 var63.x4[0] = (orc_uint8) var62.x4[0];
2003 var63.x4[1] = (orc_uint8) var62.x4[1];
2004 var63.x4[2] = (orc_uint8) var62.x4[2];
2005 var63.x4[3] = (orc_uint8) var62.x4[3];
2007 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
2008 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
2009 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
2010 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
2013 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
2014 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
2016 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
2017 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
2019 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
2020 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
2022 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
2023 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
2025 var66.x4[0] = (orc_uint8) var58.x4[0];
2026 var66.x4[1] = (orc_uint8) var58.x4[1];
2027 var66.x4[2] = (orc_uint8) var58.x4[2];
2028 var66.x4[3] = (orc_uint8) var58.x4[3];
2030 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
2031 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
2032 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
2033 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
2035 var68.x4[0] = var67.x4[0] + var54.x4[0];
2036 var68.x4[1] = var67.x4[1] + var54.x4[1];
2037 var68.x4[2] = var67.x4[2] + var54.x4[2];
2038 var68.x4[3] = var67.x4[3] + var54.x4[3];
2040 var69.x4[0] = var65.x4[0] + var52.x4[0];
2041 var69.x4[1] = var65.x4[1] + var52.x4[1];
2042 var69.x4[2] = var65.x4[2] + var52.x4[2];
2043 var69.x4[3] = var65.x4[3] + var52.x4[3];
2046 ((var69.x4[0] & 0xff) ==
2047 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
2048 ((orc_uint16) var69.x4[0] & 0xff));
2050 ((var69.x4[1] & 0xff) ==
2051 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
2052 ((orc_uint16) var69.x4[1] & 0xff));
2054 ((var69.x4[2] & 0xff) ==
2055 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
2056 ((orc_uint16) var69.x4[2] & 0xff));
2058 ((var69.x4[3] & 0xff) ==
2059 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
2060 ((orc_uint16) var69.x4[3] & 0xff));
2062 var71.x4[0] = var70.x4[0];
2063 var71.x4[1] = var70.x4[1];
2064 var71.x4[2] = var70.x4[2];
2065 var71.x4[3] = var70.x4[3];
2067 var72.i = var71.i & var43.i;
2069 var73.x4[0] = var69.x4[0];
2070 var73.x4[1] = var69.x4[1];
2071 var73.x4[2] = var69.x4[2];
2072 var73.x4[3] = var69.x4[3];
2074 var74.i = var73.i & var44.i;
2076 var75.i = var72.i | var74.i;
2085 orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
2086 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2088 OrcExecutor _ex, *ex = &_ex;
2089 static int p_inited = 0;
2090 static OrcProgram *p = 0;
2091 void (*func) (OrcExecutor *);
2094 orc_once_mutex_lock ();
2097 p = orc_program_new ();
2098 orc_program_set_2d (p);
2099 orc_program_set_name (p, "orc_overlay_bgra");
2100 orc_program_set_backup_function (p, _backup_orc_overlay_bgra);
2101 orc_program_add_destination (p, 4, "d1");
2102 orc_program_add_source (p, 4, "s1");
2103 orc_program_add_constant (p, 4, 0xffffffff, "c1");
2104 orc_program_add_constant (p, 4, 0xff000000, "c2");
2105 orc_program_add_constant (p, 4, 0x00ffffff, "c3");
2106 orc_program_add_constant (p, 4, 0x00000018, "c4");
2107 orc_program_add_constant (p, 4, 0x00000008, "c5");
2108 orc_program_add_parameter (p, 2, "p1");
2109 orc_program_add_temporary (p, 4, "t1");
2110 orc_program_add_temporary (p, 4, "t2");
2111 orc_program_add_temporary (p, 2, "t3");
2112 orc_program_add_temporary (p, 1, "t4");
2113 orc_program_add_temporary (p, 8, "t5");
2114 orc_program_add_temporary (p, 8, "t6");
2115 orc_program_add_temporary (p, 8, "t7");
2116 orc_program_add_temporary (p, 4, "t8");
2117 orc_program_add_temporary (p, 8, "t9");
2118 orc_program_add_temporary (p, 8, "t10");
2120 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
2122 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
2124 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2126 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
2128 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
2130 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
2132 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
2134 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C5,
2136 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
2137 ORC_VAR_D1, ORC_VAR_D1);
2138 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5,
2140 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
2142 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
2144 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
2146 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
2148 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
2150 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2152 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
2154 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
2156 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
2158 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
2160 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
2162 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
2164 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
2166 orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
2168 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
2170 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
2172 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
2174 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
2176 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
2178 orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
2180 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
2182 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
2185 orc_program_compile (p);
2188 orc_once_mutex_unlock ();
2193 ORC_EXECUTOR_M (ex) = m;
2194 ex->arrays[ORC_VAR_D1] = d1;
2195 ex->params[ORC_VAR_D1] = d1_stride;
2196 ex->arrays[ORC_VAR_S1] = (void *) s1;
2197 ex->params[ORC_VAR_S1] = s1_stride;
2198 ex->params[ORC_VAR_P1] = p1;
2200 func = p->code_exec;