2 /* autogenerated from test.orc */
12 #ifndef _ORC_INTEGER_TYPEDEFS_
13 #define _ORC_INTEGER_TYPEDEFS_
14 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
16 typedef int8_t orc_int8;
17 typedef int16_t orc_int16;
18 typedef int32_t orc_int32;
19 typedef int64_t orc_int64;
20 typedef uint8_t orc_uint8;
21 typedef uint16_t orc_uint16;
22 typedef uint32_t orc_uint32;
23 typedef uint64_t orc_uint64;
24 #define ORC_UINT64_C(x) UINT64_C(x)
25 #elif defined(_MSC_VER)
26 typedef signed __int8 orc_int8;
27 typedef signed __int16 orc_int16;
28 typedef signed __int32 orc_int32;
29 typedef signed __int64 orc_int64;
30 typedef unsigned __int8 orc_uint8;
31 typedef unsigned __int16 orc_uint16;
32 typedef unsigned __int32 orc_uint32;
33 typedef unsigned __int64 orc_uint64;
34 #define ORC_UINT64_C(x) (x##Ui64)
37 typedef signed char orc_int8;
38 typedef short orc_int16;
39 typedef int orc_int32;
40 typedef unsigned char orc_uint8;
41 typedef unsigned short orc_uint16;
42 typedef unsigned int orc_uint32;
43 #if INT_MAX == LONG_MAX
44 typedef long long orc_int64;
45 typedef unsigned long long orc_uint64;
46 #define ORC_UINT64_C(x) (x##ULL)
48 typedef long orc_int64;
49 typedef unsigned long orc_uint64;
50 #define ORC_UINT64_C(x) (x##UL)
53 typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
54 typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
55 typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
58 void orc_add2_rshift_add_s16_22_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n);
59 void orc_add2_rshift_add_s16_22 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
60 void orc_add2_rshift_sub_s16_22_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n);
61 void orc_add2_rshift_sub_s16_22 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
62 void orc_add2_rshift_add_s16_11_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n);
63 void orc_add2_rshift_add_s16_11 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
64 void orc_add2_rshift_sub_s16_11_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n);
65 void orc_add2_rshift_sub_s16_11 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
66 void orc_add_const_rshift_s16_11 (int16_t * d1, const int16_t * s1, int n);
67 void orc_add_const_rshift_s16 (int16_t * d1, int p1, int p2, int n);
68 void orc_add_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
69 void orc_add_s16_2d (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int n, int m);
70 void orc_addc_rshift_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int n);
71 void orc_lshift1_s16 (int16_t * d1, const int16_t * s1, int n);
72 void orc_lshift2_s16 (int16_t * d1, const int16_t * s1, int n);
73 void orc_lshift_s16_ip (int16_t * d1, int p1, int n);
74 void orc_mas2_add_s16_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int p1, int p2, int p3, int n);
75 void orc_mas2_add_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int p2, int p3, int n);
76 void orc_mas2_sub_s16_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int p1, int p2, int p3, int n);
77 void orc_mas2_sub_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int p2, int p3, int n);
78 void orc_mas4_across_add_s16_1991_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, const int16_t * s5, int p1, int p2, int n);
79 void orc_mas4_across_add_s16_1991_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, int p1, int p2, int n);
80 void orc_mas4_across_sub_s16_1991_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, const int16_t * s5, int p1, int p2, int n);
81 void orc_mas4_across_sub_s16_1991_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, int p1, int p2, int n);
82 void orc_subtract_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
83 void orc_add_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n);
84 void orc_add_s16_u8_2d (int16_t * d1, int d1_stride, const orc_uint8 * s1, int s1_stride, int n, int m);
85 void orc_convert_s16_u8 (orc_uint16 * d1, const orc_uint8 * s1, int n);
86 void orc_convert_u8_s16 (orc_uint8 * d1, const int16_t * s1, int n);
87 void orc_offsetconvert_u8_s16 (orc_uint8 * d1, const int16_t * s1, int n);
88 void orc_offsetconvert_s16_u8 (int16_t * d1, const orc_uint8 * s1, int n);
89 void orc_subtract_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n);
90 void orc_multiply_and_add_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n);
91 void orc_splat_s16_ns (int16_t * d1, int p1, int n);
92 void orc_splat_s16_2d_4xn (int16_t * d1, int d1_stride, int p1, int m);
93 void orc_splat_s16_2d_8xn (int16_t * d1, int d1_stride, int p1, int m);
94 void orc_splat_s16_2d (int16_t * d1, int d1_stride, int p1, int n, int m);
95 void orc_splat_u8_ns (orc_uint8 * d1, int p1, int n);
96 void orc_splat_u8_2d (orc_uint8 * d1, int d1_stride, int p1, int n, int m);
97 void orc_average_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, int n);
98 void orc_rrshift6_add_s16_2d (uint8_t * d1, int d1_stride, const int16_t * s1, int s1_stride, const int16_t * s2, int s2_stride, int n, int m);
99 void orc_rrshift6_sub_s16_2d (int16_t * d1, int d1_stride, int16_t * d2, int d2_stride, int n, int m);
100 void orc_rrshift6_s16_ip_2d (int16_t * d1, int d1_stride, int n, int m);
101 void orc_rrshift6_s16_ip (int16_t * d1, int n);
102 void orc_unpack_yuyv_y (orc_uint8 * d1, const orc_uint16 * s1, int n);
103 void orc_unpack_yuyv_u (orc_uint8 * d1, const orc_uint32 * s1, int n);
104 void orc_unpack_yuyv_v (orc_uint8 * d1, const orc_uint32 * s1, int n);
105 void orc_packyuyv (orc_uint32 * d1, const uint8_t * s1, const orc_uint8 * s2, const orc_uint8 * s3, int n);
106 void orc_unpack_uyvy_y (orc_uint8 * d1, const orc_uint16 * s1, int n);
107 void orc_unpack_uyvy_u (orc_uint8 * d1, const orc_uint32 * s1, int n);
108 void orc_unpack_uyvy_v (orc_uint8 * d1, const orc_uint32 * s1, int n);
109 void orc_interleave2_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
110 void orc_interleave2_rrshift1_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
111 void orc_deinterleave2_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n);
112 void orc_deinterleave2_lshift1_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n);
113 void orc_haar_deint_lshift1_split_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n);
114 void orc_haar_deint_split_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n);
115 void orc_haar_split_s16_lo (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
116 void orc_haar_split_s16_hi (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
117 void orc_haar_split_s16_op (int16_t * d1, int16_t * d2, const int16_t * s1, const int16_t * s2, int n);
118 void orc_haar_split_s16 (int16_t * d1, int16_t * d2, int n);
119 void orc_haar_synth_s16_lo (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
120 void orc_haar_synth_s16_hi (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
121 void orc_haar_synth_s16_op (int16_t * d1, int16_t * d2, const int16_t * s1, const int16_t * s2, int n);
122 void orc_haar_synth_s16 (int16_t * d1, int16_t * d2, int n);
123 void orc_haar_synth_rrshift1_int_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
124 void orc_haar_synth_int_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
125 void orc_haar_sub_s16 (int16_t * d1, const int16_t * s1, int n);
126 void orc_haar_add_half_s16 (int16_t * d1, const int16_t * s1, int n);
127 void orc_haar_add_s16 (int16_t * d1, const int16_t * s1, int n);
128 void orc_haar_sub_half_s16 (int16_t * d1, const int16_t * s1, int n);
129 void orc_sum_u8 (int32_t * a1, const orc_uint8 * s1, int n);
130 void orc_sum_s16 (int32_t * a1, const int16_t * s1, int n);
131 void orc_sum_square_diff_u8 (int32_t * a1, const orc_uint8 * s1, const orc_uint8 * s2, int n);
132 void orc_dequantise_s16_2d_4xn (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int p1, int p2, int m);
133 void orc_dequantise_s16_2d_8xn (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int p1, int p2, int m);
134 void orc_dequantise_s16_ip_2d_8xn (int16_t * d1, int d1_stride, int p1, int p2, int m);
135 void orc_dequantise_s16_ip_2d (int16_t * d1, int d1_stride, int p1, int p2, int n, int m);
136 void orc_dequantise_s16_ip (int16_t * d1, int p1, int p2, int n);
137 void orc_dequantise_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int n);
138 void orc_dequantise_var_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int n);
139 void orc_quantise1_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int p3, int n);
140 void orc_quantise2_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int n);
141 void orc_quantdequant1_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int p5, int n);
142 void orc_quantdequant3_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int p5, int p6, int n);
143 void orc_quantdequant2_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int n);
144 void orc_downsample_vert_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int n);
145 void orc_downsample_horiz_u8 (orc_uint8 * d1, const uint8_t * s1, const uint8_t * s2, int n);
146 void orc_stats_moment_s16 (int32_t * a1, const int16_t * s1, int n);
147 void orc_stats_above_s16 (int32_t * a1, const int16_t * s1, int n);
148 void orc_accw (int * a1, const int16_t * s1, int n);
149 void orc_avg2_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m);
150 void orc_avg2_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m);
151 void orc_avg2_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m);
152 void orc_avg2_32xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m);
153 void orc_avg2_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int n, int m);
154 void orc_combine4_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m);
155 void orc_combine4_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m);
156 void orc_combine4_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m);
157 void orc_combine4_24xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m);
158 void orc_combine4_32xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m);
159 void orc_combine4_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int n, int m);
160 void orc_combine2_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m);
161 void orc_combine2_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m);
162 void orc_combine2_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m);
163 void orc_combine2_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int n, int m);
164 void orc_sad_nxm_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int n, int m);
165 void orc_sad_8x8_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride);
166 void orc_sad_12x12_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride);
167 void orc_sad_16xn_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m);
168 void orc_sad_32xn_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m);
169 void convert_rgb_to_gray (orc_uint8 * d1, const orc_uint32 * s1, int n);
170 void canny_calc_delta_x (int32_t * d1, const uint8_t * s1, const uint8_t * s2, int n);
171 void i420_to_ayuv (orc_uint32 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, int p1, int n);
172 void test_4x (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
173 void test_4x_2 (orc_uint32 * d1, const orc_uint32 * s1, int p1, int n);
174 void orc_splat_u16 (uint16_t * d1, int p1, int n);
175 void orc_splat_u32 (uint32_t * d1, int p1, int n);
176 void orc_splat_u16_2d (uint16_t * d1, int d1_stride, int p1, int n, int m);
177 void orc_splat_u32_2d (uint32_t * d1, int d1_stride, int p1, int n, int m);
178 void orc_copy_u16_2d (orc_uint16 * d1, int d1_stride, const orc_uint16 * s1, int s1_stride, int n, int m);
179 void orc_copy_u32_2d (orc_uint32 * d1, int d1_stride, const orc_uint32 * s1, int s1_stride, int n, int m);
180 void orc_composite_add_8888_8888_2d (orc_uint32 * d1, int d1_stride, const orc_uint32 * s1, int s1_stride, int n, int m);
181 void orc_composite_add_8_8_line (orc_uint8 * d1, const orc_uint8 * s1, int n);
182 void orc_composite_add_n_8_8_line (orc_uint8 * d1, const orc_uint8 * s1, int p1, int n);
183 void orc_code_combine_add_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
184 void orc_code_combine_add_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
185 void orc_code_combine_over_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
186 void orc_code_combine_over_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
187 void orc_code_combine_in_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
188 void orc_code_combine_in_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
189 void orc_code_combine_out_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
190 void orc_code_combine_out_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
191 void orc_code_combine_atop_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
192 void orc_code_combine_atop_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
193 void orc_code_combine_xor_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
194 void orc_code_combine_xor_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
195 void orc_code_combine_add_ca (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
196 void orc_code_combine_add_ca_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
197 void orc_code_combine_over_ca (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n);
198 void orc_code_combine_over_ca_n (orc_uint32 * d1, const orc_uint32 * s1, int n);
199 void orc_composite_over_8888_8_8888_line (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint8 * s2, int n);
200 void orc_composite_over_n_8888_8888_ca_2d (orc_uint32 * d1, const orc_uint32 * s1, int p1, int n);
201 void cogorc_resample_horiz_1tap (orc_uint8 * d1, const orc_uint8 * s1, int p1, int p2, int n);
202 void cogorc_resample_horiz_2tap (orc_uint8 * d1, const orc_uint8 * s1, int p1, int p2, int n);
203 void test_float_constant_1 (orc_uint32 * d1, int n);
204 void test_float_constant_2 (orc_uint32 * d1, int n);
205 void convert_fc32_to_int32 (orc_uint32 * d1, const orc_uint64 * s1, int n);
206 void param64 (orc_uint64 * d1, int p1, int n);
207 void const64 (orc_uint64 * d1, int n);
208 void param64_2 (orc_uint64 * d1, orc_int64 p1, int n);
209 void pa_volume_s16ne_orc_2ch (int16_t * d1, orc_int64 p1, int n);
212 /* begin Orc C target preamble */
213 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
214 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
215 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
216 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
217 #define ORC_SB_MAX 127
218 #define ORC_SB_MIN (-1-ORC_SB_MAX)
219 #define ORC_UB_MAX 255
221 #define ORC_SW_MAX 32767
222 #define ORC_SW_MIN (-1-ORC_SW_MAX)
223 #define ORC_UW_MAX 65535
225 #define ORC_SL_MAX 2147483647
226 #define ORC_SL_MIN (-1-ORC_SL_MAX)
227 #define ORC_UL_MAX 4294967295U
229 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
230 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
231 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
232 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
233 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
234 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
235 #define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
236 #define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
237 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
238 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
239 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
240 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
241 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
242 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
243 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
244 #define ORC_RESTRICT restrict
245 #elif defined(__GNUC__) && __GNUC__ >= 4
246 #define ORC_RESTRICT __restrict__
250 /* end Orc C target preamble */
254 /* orc_add2_rshift_add_s16_22_op */
257 orc_add2_rshift_add_s16_22_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n){
259 orc_union16 * ORC_RESTRICT ptr0;
260 const orc_union16 * ORC_RESTRICT ptr4;
261 const orc_union16 * ORC_RESTRICT ptr5;
262 const orc_union16 * ORC_RESTRICT ptr6;
272 ptr0 = (orc_union16 *)d1;
273 ptr4 = (orc_union16 *)s1;
274 ptr5 = (orc_union16 *)s2;
275 ptr6 = (orc_union16 *)s3;
278 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
280 for (i = 0; i < n; i++) {
286 var38.i = var33.i + var34.i;
288 var39.i = var38.i + var35.i;
290 var40.i = var39.i >> 2;
294 var37.i = var36.i + var40.i;
303 _backup_orc_add2_rshift_add_s16_22_op (OrcExecutor * ORC_RESTRICT ex)
307 orc_union16 * ORC_RESTRICT ptr0;
308 const orc_union16 * ORC_RESTRICT ptr4;
309 const orc_union16 * ORC_RESTRICT ptr5;
310 const orc_union16 * ORC_RESTRICT ptr6;
320 ptr0 = (orc_union16 *)ex->arrays[0];
321 ptr4 = (orc_union16 *)ex->arrays[4];
322 ptr5 = (orc_union16 *)ex->arrays[5];
323 ptr6 = (orc_union16 *)ex->arrays[6];
326 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
328 for (i = 0; i < n; i++) {
334 var38.i = var33.i + var34.i;
336 var39.i = var38.i + var35.i;
338 var40.i = var39.i >> 2;
342 var37.i = var36.i + var40.i;
350 orc_add2_rshift_add_s16_22_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n)
352 OrcExecutor _ex, *ex = &_ex;
353 static int p_inited = 0;
354 static OrcProgram *p = 0;
355 void (*func) (OrcExecutor *);
358 orc_once_mutex_lock ();
360 OrcCompileResult result;
362 p = orc_program_new ();
363 orc_program_set_name (p, "orc_add2_rshift_add_s16_22_op");
364 orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_22_op);
365 orc_program_add_destination (p, 2, "d1");
366 orc_program_add_source (p, 2, "s1");
367 orc_program_add_source (p, 2, "s2");
368 orc_program_add_source (p, 2, "s3");
369 orc_program_add_constant (p, 4, 0x00000002, "c1");
370 orc_program_add_temporary (p, 2, "t1");
372 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
373 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
374 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
375 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
377 result = orc_program_compile (p);
380 orc_once_mutex_unlock ();
385 ex->arrays[ORC_VAR_D1] = d1;
386 ex->arrays[ORC_VAR_S1] = (void *)s1;
387 ex->arrays[ORC_VAR_S2] = (void *)s2;
388 ex->arrays[ORC_VAR_S3] = (void *)s3;
396 /* orc_add2_rshift_add_s16_22 */
399 orc_add2_rshift_add_s16_22 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
401 orc_union16 * ORC_RESTRICT ptr0;
402 const orc_union16 * ORC_RESTRICT ptr4;
403 const orc_union16 * ORC_RESTRICT ptr5;
413 ptr0 = (orc_union16 *)d1;
414 ptr4 = (orc_union16 *)s1;
415 ptr5 = (orc_union16 *)s2;
418 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
420 for (i = 0; i < n; i++) {
426 var38.i = var33.i + var34.i;
428 var39.i = var38.i + var35.i;
430 var40.i = var39.i >> 2;
434 var37.i = var36.i + var40.i;
443 _backup_orc_add2_rshift_add_s16_22 (OrcExecutor * ORC_RESTRICT ex)
447 orc_union16 * ORC_RESTRICT ptr0;
448 const orc_union16 * ORC_RESTRICT ptr4;
449 const orc_union16 * ORC_RESTRICT ptr5;
459 ptr0 = (orc_union16 *)ex->arrays[0];
460 ptr4 = (orc_union16 *)ex->arrays[4];
461 ptr5 = (orc_union16 *)ex->arrays[5];
464 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
466 for (i = 0; i < n; i++) {
472 var38.i = var33.i + var34.i;
474 var39.i = var38.i + var35.i;
476 var40.i = var39.i >> 2;
480 var37.i = var36.i + var40.i;
488 orc_add2_rshift_add_s16_22 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
490 OrcExecutor _ex, *ex = &_ex;
491 static int p_inited = 0;
492 static OrcProgram *p = 0;
493 void (*func) (OrcExecutor *);
496 orc_once_mutex_lock ();
498 OrcCompileResult result;
500 p = orc_program_new ();
501 orc_program_set_name (p, "orc_add2_rshift_add_s16_22");
502 orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_22);
503 orc_program_add_destination (p, 2, "d1");
504 orc_program_add_source (p, 2, "s1");
505 orc_program_add_source (p, 2, "s2");
506 orc_program_add_constant (p, 4, 0x00000002, "c1");
507 orc_program_add_temporary (p, 2, "t1");
509 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
510 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
511 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
512 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
514 result = orc_program_compile (p);
517 orc_once_mutex_unlock ();
522 ex->arrays[ORC_VAR_D1] = d1;
523 ex->arrays[ORC_VAR_S1] = (void *)s1;
524 ex->arrays[ORC_VAR_S2] = (void *)s2;
532 /* orc_add2_rshift_sub_s16_22_op */
535 orc_add2_rshift_sub_s16_22_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n){
537 orc_union16 * ORC_RESTRICT ptr0;
538 const orc_union16 * ORC_RESTRICT ptr4;
539 const orc_union16 * ORC_RESTRICT ptr5;
540 const orc_union16 * ORC_RESTRICT ptr6;
550 ptr0 = (orc_union16 *)d1;
551 ptr4 = (orc_union16 *)s1;
552 ptr5 = (orc_union16 *)s2;
553 ptr6 = (orc_union16 *)s3;
556 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
558 for (i = 0; i < n; i++) {
564 var38.i = var33.i + var34.i;
566 var39.i = var38.i + var35.i;
568 var40.i = var39.i >> 2;
572 var37.i = var36.i - var40.i;
581 _backup_orc_add2_rshift_sub_s16_22_op (OrcExecutor * ORC_RESTRICT ex)
585 orc_union16 * ORC_RESTRICT ptr0;
586 const orc_union16 * ORC_RESTRICT ptr4;
587 const orc_union16 * ORC_RESTRICT ptr5;
588 const orc_union16 * ORC_RESTRICT ptr6;
598 ptr0 = (orc_union16 *)ex->arrays[0];
599 ptr4 = (orc_union16 *)ex->arrays[4];
600 ptr5 = (orc_union16 *)ex->arrays[5];
601 ptr6 = (orc_union16 *)ex->arrays[6];
604 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
606 for (i = 0; i < n; i++) {
612 var38.i = var33.i + var34.i;
614 var39.i = var38.i + var35.i;
616 var40.i = var39.i >> 2;
620 var37.i = var36.i - var40.i;
628 orc_add2_rshift_sub_s16_22_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n)
630 OrcExecutor _ex, *ex = &_ex;
631 static int p_inited = 0;
632 static OrcProgram *p = 0;
633 void (*func) (OrcExecutor *);
636 orc_once_mutex_lock ();
638 OrcCompileResult result;
640 p = orc_program_new ();
641 orc_program_set_name (p, "orc_add2_rshift_sub_s16_22_op");
642 orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_22_op);
643 orc_program_add_destination (p, 2, "d1");
644 orc_program_add_source (p, 2, "s1");
645 orc_program_add_source (p, 2, "s2");
646 orc_program_add_source (p, 2, "s3");
647 orc_program_add_constant (p, 4, 0x00000002, "c1");
648 orc_program_add_temporary (p, 2, "t1");
650 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
651 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
652 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
653 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
655 result = orc_program_compile (p);
658 orc_once_mutex_unlock ();
663 ex->arrays[ORC_VAR_D1] = d1;
664 ex->arrays[ORC_VAR_S1] = (void *)s1;
665 ex->arrays[ORC_VAR_S2] = (void *)s2;
666 ex->arrays[ORC_VAR_S3] = (void *)s3;
674 /* orc_add2_rshift_sub_s16_22 */
677 orc_add2_rshift_sub_s16_22 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
679 orc_union16 * ORC_RESTRICT ptr0;
680 const orc_union16 * ORC_RESTRICT ptr4;
681 const orc_union16 * ORC_RESTRICT ptr5;
691 ptr0 = (orc_union16 *)d1;
692 ptr4 = (orc_union16 *)s1;
693 ptr5 = (orc_union16 *)s2;
696 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
698 for (i = 0; i < n; i++) {
704 var38.i = var33.i + var34.i;
706 var39.i = var38.i + var35.i;
708 var40.i = var39.i >> 2;
712 var37.i = var36.i - var40.i;
721 _backup_orc_add2_rshift_sub_s16_22 (OrcExecutor * ORC_RESTRICT ex)
725 orc_union16 * ORC_RESTRICT ptr0;
726 const orc_union16 * ORC_RESTRICT ptr4;
727 const orc_union16 * ORC_RESTRICT ptr5;
737 ptr0 = (orc_union16 *)ex->arrays[0];
738 ptr4 = (orc_union16 *)ex->arrays[4];
739 ptr5 = (orc_union16 *)ex->arrays[5];
742 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
744 for (i = 0; i < n; i++) {
750 var38.i = var33.i + var34.i;
752 var39.i = var38.i + var35.i;
754 var40.i = var39.i >> 2;
758 var37.i = var36.i - var40.i;
766 orc_add2_rshift_sub_s16_22 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
768 OrcExecutor _ex, *ex = &_ex;
769 static int p_inited = 0;
770 static OrcProgram *p = 0;
771 void (*func) (OrcExecutor *);
774 orc_once_mutex_lock ();
776 OrcCompileResult result;
778 p = orc_program_new ();
779 orc_program_set_name (p, "orc_add2_rshift_sub_s16_22");
780 orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_22);
781 orc_program_add_destination (p, 2, "d1");
782 orc_program_add_source (p, 2, "s1");
783 orc_program_add_source (p, 2, "s2");
784 orc_program_add_constant (p, 4, 0x00000002, "c1");
785 orc_program_add_temporary (p, 2, "t1");
787 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
788 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
789 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
790 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
792 result = orc_program_compile (p);
795 orc_once_mutex_unlock ();
800 ex->arrays[ORC_VAR_D1] = d1;
801 ex->arrays[ORC_VAR_S1] = (void *)s1;
802 ex->arrays[ORC_VAR_S2] = (void *)s2;
810 /* orc_add2_rshift_add_s16_11_op */
813 orc_add2_rshift_add_s16_11_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n){
815 orc_union16 * ORC_RESTRICT ptr0;
816 const orc_union16 * ORC_RESTRICT ptr4;
817 const orc_union16 * ORC_RESTRICT ptr5;
818 const orc_union16 * ORC_RESTRICT ptr6;
825 ptr0 = (orc_union16 *)d1;
826 ptr4 = (orc_union16 *)s1;
827 ptr5 = (orc_union16 *)s2;
828 ptr6 = (orc_union16 *)s3;
831 for (i = 0; i < n; i++) {
837 var37.i = (var33.i + var34.i + 1)>>1;
841 var36.i = var35.i + var37.i;
850 _backup_orc_add2_rshift_add_s16_11_op (OrcExecutor * ORC_RESTRICT ex)
854 orc_union16 * ORC_RESTRICT ptr0;
855 const orc_union16 * ORC_RESTRICT ptr4;
856 const orc_union16 * ORC_RESTRICT ptr5;
857 const orc_union16 * ORC_RESTRICT ptr6;
864 ptr0 = (orc_union16 *)ex->arrays[0];
865 ptr4 = (orc_union16 *)ex->arrays[4];
866 ptr5 = (orc_union16 *)ex->arrays[5];
867 ptr6 = (orc_union16 *)ex->arrays[6];
870 for (i = 0; i < n; i++) {
876 var37.i = (var33.i + var34.i + 1)>>1;
880 var36.i = var35.i + var37.i;
888 orc_add2_rshift_add_s16_11_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n)
890 OrcExecutor _ex, *ex = &_ex;
891 static int p_inited = 0;
892 static OrcProgram *p = 0;
893 void (*func) (OrcExecutor *);
896 orc_once_mutex_lock ();
898 OrcCompileResult result;
900 p = orc_program_new ();
901 orc_program_set_name (p, "orc_add2_rshift_add_s16_11_op");
902 orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_11_op);
903 orc_program_add_destination (p, 2, "d1");
904 orc_program_add_source (p, 2, "s1");
905 orc_program_add_source (p, 2, "s2");
906 orc_program_add_source (p, 2, "s3");
907 orc_program_add_temporary (p, 2, "t1");
909 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
910 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
912 result = orc_program_compile (p);
915 orc_once_mutex_unlock ();
920 ex->arrays[ORC_VAR_D1] = d1;
921 ex->arrays[ORC_VAR_S1] = (void *)s1;
922 ex->arrays[ORC_VAR_S2] = (void *)s2;
923 ex->arrays[ORC_VAR_S3] = (void *)s3;
931 /* orc_add2_rshift_add_s16_11 */
934 orc_add2_rshift_add_s16_11 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
936 orc_union16 * ORC_RESTRICT ptr0;
937 const orc_union16 * ORC_RESTRICT ptr4;
938 const orc_union16 * ORC_RESTRICT ptr5;
945 ptr0 = (orc_union16 *)d1;
946 ptr4 = (orc_union16 *)s1;
947 ptr5 = (orc_union16 *)s2;
950 for (i = 0; i < n; i++) {
956 var37.i = (var33.i + var34.i + 1)>>1;
960 var36.i = var35.i + var37.i;
969 _backup_orc_add2_rshift_add_s16_11 (OrcExecutor * ORC_RESTRICT ex)
973 orc_union16 * ORC_RESTRICT ptr0;
974 const orc_union16 * ORC_RESTRICT ptr4;
975 const orc_union16 * ORC_RESTRICT ptr5;
982 ptr0 = (orc_union16 *)ex->arrays[0];
983 ptr4 = (orc_union16 *)ex->arrays[4];
984 ptr5 = (orc_union16 *)ex->arrays[5];
987 for (i = 0; i < n; i++) {
993 var37.i = (var33.i + var34.i + 1)>>1;
997 var36.i = var35.i + var37.i;
1005 orc_add2_rshift_add_s16_11 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
1007 OrcExecutor _ex, *ex = &_ex;
1008 static int p_inited = 0;
1009 static OrcProgram *p = 0;
1010 void (*func) (OrcExecutor *);
1013 orc_once_mutex_lock ();
1015 OrcCompileResult result;
1017 p = orc_program_new ();
1018 orc_program_set_name (p, "orc_add2_rshift_add_s16_11");
1019 orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_11);
1020 orc_program_add_destination (p, 2, "d1");
1021 orc_program_add_source (p, 2, "s1");
1022 orc_program_add_source (p, 2, "s2");
1023 orc_program_add_temporary (p, 2, "t1");
1025 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
1026 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
1028 result = orc_program_compile (p);
1031 orc_once_mutex_unlock ();
1036 ex->arrays[ORC_VAR_D1] = d1;
1037 ex->arrays[ORC_VAR_S1] = (void *)s1;
1038 ex->arrays[ORC_VAR_S2] = (void *)s2;
1040 func = p->code_exec;
1046 /* orc_add2_rshift_sub_s16_11_op */
1049 orc_add2_rshift_sub_s16_11_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n){
1051 orc_union16 * ORC_RESTRICT ptr0;
1052 const orc_union16 * ORC_RESTRICT ptr4;
1053 const orc_union16 * ORC_RESTRICT ptr5;
1054 const orc_union16 * ORC_RESTRICT ptr6;
1061 ptr0 = (orc_union16 *)d1;
1062 ptr4 = (orc_union16 *)s1;
1063 ptr5 = (orc_union16 *)s2;
1064 ptr6 = (orc_union16 *)s3;
1067 for (i = 0; i < n; i++) {
1073 var37.i = (var33.i + var34.i + 1)>>1;
1077 var36.i = var35.i - var37.i;
1086 _backup_orc_add2_rshift_sub_s16_11_op (OrcExecutor * ORC_RESTRICT ex)
1090 orc_union16 * ORC_RESTRICT ptr0;
1091 const orc_union16 * ORC_RESTRICT ptr4;
1092 const orc_union16 * ORC_RESTRICT ptr5;
1093 const orc_union16 * ORC_RESTRICT ptr6;
1100 ptr0 = (orc_union16 *)ex->arrays[0];
1101 ptr4 = (orc_union16 *)ex->arrays[4];
1102 ptr5 = (orc_union16 *)ex->arrays[5];
1103 ptr6 = (orc_union16 *)ex->arrays[6];
1106 for (i = 0; i < n; i++) {
1112 var37.i = (var33.i + var34.i + 1)>>1;
1116 var36.i = var35.i - var37.i;
1124 orc_add2_rshift_sub_s16_11_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int n)
1126 OrcExecutor _ex, *ex = &_ex;
1127 static int p_inited = 0;
1128 static OrcProgram *p = 0;
1129 void (*func) (OrcExecutor *);
1132 orc_once_mutex_lock ();
1134 OrcCompileResult result;
1136 p = orc_program_new ();
1137 orc_program_set_name (p, "orc_add2_rshift_sub_s16_11_op");
1138 orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_11_op);
1139 orc_program_add_destination (p, 2, "d1");
1140 orc_program_add_source (p, 2, "s1");
1141 orc_program_add_source (p, 2, "s2");
1142 orc_program_add_source (p, 2, "s3");
1143 orc_program_add_temporary (p, 2, "t1");
1145 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
1146 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
1148 result = orc_program_compile (p);
1151 orc_once_mutex_unlock ();
1156 ex->arrays[ORC_VAR_D1] = d1;
1157 ex->arrays[ORC_VAR_S1] = (void *)s1;
1158 ex->arrays[ORC_VAR_S2] = (void *)s2;
1159 ex->arrays[ORC_VAR_S3] = (void *)s3;
1161 func = p->code_exec;
1167 /* orc_add2_rshift_sub_s16_11 */
1170 orc_add2_rshift_sub_s16_11 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
1172 orc_union16 * ORC_RESTRICT ptr0;
1173 const orc_union16 * ORC_RESTRICT ptr4;
1174 const orc_union16 * ORC_RESTRICT ptr5;
1181 ptr0 = (orc_union16 *)d1;
1182 ptr4 = (orc_union16 *)s1;
1183 ptr5 = (orc_union16 *)s2;
1186 for (i = 0; i < n; i++) {
1192 var37.i = (var33.i + var34.i + 1)>>1;
1196 var36.i = var35.i - var37.i;
1205 _backup_orc_add2_rshift_sub_s16_11 (OrcExecutor * ORC_RESTRICT ex)
1209 orc_union16 * ORC_RESTRICT ptr0;
1210 const orc_union16 * ORC_RESTRICT ptr4;
1211 const orc_union16 * ORC_RESTRICT ptr5;
1218 ptr0 = (orc_union16 *)ex->arrays[0];
1219 ptr4 = (orc_union16 *)ex->arrays[4];
1220 ptr5 = (orc_union16 *)ex->arrays[5];
1223 for (i = 0; i < n; i++) {
1229 var37.i = (var33.i + var34.i + 1)>>1;
1233 var36.i = var35.i - var37.i;
1241 orc_add2_rshift_sub_s16_11 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
1243 OrcExecutor _ex, *ex = &_ex;
1244 static int p_inited = 0;
1245 static OrcProgram *p = 0;
1246 void (*func) (OrcExecutor *);
1249 orc_once_mutex_lock ();
1251 OrcCompileResult result;
1253 p = orc_program_new ();
1254 orc_program_set_name (p, "orc_add2_rshift_sub_s16_11");
1255 orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_11);
1256 orc_program_add_destination (p, 2, "d1");
1257 orc_program_add_source (p, 2, "s1");
1258 orc_program_add_source (p, 2, "s2");
1259 orc_program_add_temporary (p, 2, "t1");
1261 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
1262 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
1264 result = orc_program_compile (p);
1267 orc_once_mutex_unlock ();
1272 ex->arrays[ORC_VAR_D1] = d1;
1273 ex->arrays[ORC_VAR_S1] = (void *)s1;
1274 ex->arrays[ORC_VAR_S2] = (void *)s2;
1276 func = p->code_exec;
1282 /* orc_add_const_rshift_s16_11 */
1285 orc_add_const_rshift_s16_11 (int16_t * d1, const int16_t * s1, int n){
1287 orc_union16 * ORC_RESTRICT ptr0;
1288 const orc_union16 * ORC_RESTRICT ptr4;
1294 ptr0 = (orc_union16 *)d1;
1295 ptr4 = (orc_union16 *)s1;
1298 var34.i = 0x00000001; /* 1 or 4.94066e-324f */
1300 for (i = 0; i < n; i++) {
1304 var36.i = var33.i + var34.i;
1306 var35.i = var36.i >> 1;
1315 _backup_orc_add_const_rshift_s16_11 (OrcExecutor * ORC_RESTRICT ex)
1319 orc_union16 * ORC_RESTRICT ptr0;
1320 const orc_union16 * ORC_RESTRICT ptr4;
1326 ptr0 = (orc_union16 *)ex->arrays[0];
1327 ptr4 = (orc_union16 *)ex->arrays[4];
1330 var34.i = 0x00000001; /* 1 or 4.94066e-324f */
1332 for (i = 0; i < n; i++) {
1336 var36.i = var33.i + var34.i;
1338 var35.i = var36.i >> 1;
1346 orc_add_const_rshift_s16_11 (int16_t * d1, const int16_t * s1, int n)
1348 OrcExecutor _ex, *ex = &_ex;
1349 static int p_inited = 0;
1350 static OrcProgram *p = 0;
1351 void (*func) (OrcExecutor *);
1354 orc_once_mutex_lock ();
1356 OrcCompileResult result;
1358 p = orc_program_new ();
1359 orc_program_set_name (p, "orc_add_const_rshift_s16_11");
1360 orc_program_set_backup_function (p, _backup_orc_add_const_rshift_s16_11);
1361 orc_program_add_destination (p, 2, "d1");
1362 orc_program_add_source (p, 2, "s1");
1363 orc_program_add_constant (p, 4, 0x00000001, "c1");
1364 orc_program_add_temporary (p, 2, "t1");
1366 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
1367 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
1369 result = orc_program_compile (p);
1372 orc_once_mutex_unlock ();
1377 ex->arrays[ORC_VAR_D1] = d1;
1378 ex->arrays[ORC_VAR_S1] = (void *)s1;
1380 func = p->code_exec;
1386 /* orc_add_const_rshift_s16 */
1389 orc_add_const_rshift_s16 (int16_t * d1, int p1, int p2, int n){
1391 orc_union16 * ORC_RESTRICT ptr0;
1397 ptr0 = (orc_union16 *)d1;
1402 for (i = 0; i < n; i++) {
1406 var36.i = var33.i + var34.i;
1408 var35.i = var36.i >> p2;
1417 _backup_orc_add_const_rshift_s16 (OrcExecutor * ORC_RESTRICT ex)
1421 orc_union16 * ORC_RESTRICT ptr0;
1427 ptr0 = (orc_union16 *)ex->arrays[0];
1430 var34.i = ex->params[24];
1432 for (i = 0; i < n; i++) {
1436 var36.i = var33.i + var34.i;
1438 var35.i = var36.i >> ex->params[25];
1446 orc_add_const_rshift_s16 (int16_t * d1, int p1, int p2, int n)
1448 OrcExecutor _ex, *ex = &_ex;
1449 static int p_inited = 0;
1450 static OrcProgram *p = 0;
1451 void (*func) (OrcExecutor *);
1454 orc_once_mutex_lock ();
1456 OrcCompileResult result;
1458 p = orc_program_new ();
1459 orc_program_set_name (p, "orc_add_const_rshift_s16");
1460 orc_program_set_backup_function (p, _backup_orc_add_const_rshift_s16);
1461 orc_program_add_destination (p, 2, "d1");
1462 orc_program_add_parameter (p, 2, "p1");
1463 orc_program_add_parameter (p, 2, "p2");
1464 orc_program_add_temporary (p, 2, "t1");
1466 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1);
1467 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
1469 result = orc_program_compile (p);
1472 orc_once_mutex_unlock ();
1477 ex->arrays[ORC_VAR_D1] = d1;
1478 ex->params[ORC_VAR_P1] = p1;
1479 ex->params[ORC_VAR_P2] = p2;
1481 func = p->code_exec;
1490 orc_add_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
1492 orc_union16 * ORC_RESTRICT ptr0;
1493 const orc_union16 * ORC_RESTRICT ptr4;
1494 const orc_union16 * ORC_RESTRICT ptr5;
1499 ptr0 = (orc_union16 *)d1;
1500 ptr4 = (orc_union16 *)s1;
1501 ptr5 = (orc_union16 *)s2;
1504 for (i = 0; i < n; i++) {
1510 var34.i = var32.i + var33.i;
1519 _backup_orc_add_s16 (OrcExecutor * ORC_RESTRICT ex)
1523 orc_union16 * ORC_RESTRICT ptr0;
1524 const orc_union16 * ORC_RESTRICT ptr4;
1525 const orc_union16 * ORC_RESTRICT ptr5;
1530 ptr0 = (orc_union16 *)ex->arrays[0];
1531 ptr4 = (orc_union16 *)ex->arrays[4];
1532 ptr5 = (orc_union16 *)ex->arrays[5];
1535 for (i = 0; i < n; i++) {
1541 var34.i = var32.i + var33.i;
1549 orc_add_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
1551 OrcExecutor _ex, *ex = &_ex;
1552 static int p_inited = 0;
1553 static OrcProgram *p = 0;
1554 void (*func) (OrcExecutor *);
1557 orc_once_mutex_lock ();
1559 OrcCompileResult result;
1561 p = orc_program_new ();
1562 orc_program_set_name (p, "orc_add_s16");
1563 orc_program_set_backup_function (p, _backup_orc_add_s16);
1564 orc_program_add_destination (p, 2, "d1");
1565 orc_program_add_source (p, 2, "s1");
1566 orc_program_add_source (p, 2, "s2");
1568 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
1570 result = orc_program_compile (p);
1573 orc_once_mutex_unlock ();
1578 ex->arrays[ORC_VAR_D1] = d1;
1579 ex->arrays[ORC_VAR_S1] = (void *)s1;
1580 ex->arrays[ORC_VAR_S2] = (void *)s2;
1582 func = p->code_exec;
1588 /* orc_add_s16_2d */
1591 orc_add_s16_2d (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int n, int m){
1594 orc_union16 * ORC_RESTRICT ptr0;
1595 const orc_union16 * ORC_RESTRICT ptr4;
1600 for (j = 0; j < m; j++) {
1601 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
1602 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
1605 for (i = 0; i < n; i++) {
1611 var34.i = var32.i + var33.i;
1621 _backup_orc_add_s16_2d (OrcExecutor * ORC_RESTRICT ex)
1626 int m = ex->params[ORC_VAR_A1];
1627 orc_union16 * ORC_RESTRICT ptr0;
1628 const orc_union16 * ORC_RESTRICT ptr4;
1633 for (j = 0; j < m; j++) {
1634 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
1635 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
1638 for (i = 0; i < n; i++) {
1644 var34.i = var32.i + var33.i;
1653 orc_add_s16_2d (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int n, int m)
1655 OrcExecutor _ex, *ex = &_ex;
1656 static int p_inited = 0;
1657 static OrcProgram *p = 0;
1658 void (*func) (OrcExecutor *);
1661 orc_once_mutex_lock ();
1663 OrcCompileResult result;
1665 p = orc_program_new ();
1666 orc_program_set_2d (p);
1667 orc_program_set_name (p, "orc_add_s16_2d");
1668 orc_program_set_backup_function (p, _backup_orc_add_s16_2d);
1669 orc_program_add_destination (p, 2, "d1");
1670 orc_program_add_source (p, 2, "s1");
1672 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
1674 result = orc_program_compile (p);
1677 orc_once_mutex_unlock ();
1682 ORC_EXECUTOR_M(ex) = m;
1683 ex->arrays[ORC_VAR_D1] = d1;
1684 ex->params[ORC_VAR_D1] = d1_stride;
1685 ex->arrays[ORC_VAR_S1] = (void *)s1;
1686 ex->params[ORC_VAR_S1] = s1_stride;
1688 func = p->code_exec;
1694 /* orc_addc_rshift_s16 */
1697 orc_addc_rshift_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int n){
1699 orc_union16 * ORC_RESTRICT ptr0;
1700 const orc_union16 * ORC_RESTRICT ptr4;
1701 const orc_union16 * ORC_RESTRICT ptr5;
1707 ptr0 = (orc_union16 *)d1;
1708 ptr4 = (orc_union16 *)s1;
1709 ptr5 = (orc_union16 *)s2;
1712 for (i = 0; i < n; i++) {
1718 var36.i = var33.i + var34.i;
1720 var35.i = var36.i >> p1;
1729 _backup_orc_addc_rshift_s16 (OrcExecutor * ORC_RESTRICT ex)
1733 orc_union16 * ORC_RESTRICT ptr0;
1734 const orc_union16 * ORC_RESTRICT ptr4;
1735 const orc_union16 * ORC_RESTRICT ptr5;
1741 ptr0 = (orc_union16 *)ex->arrays[0];
1742 ptr4 = (orc_union16 *)ex->arrays[4];
1743 ptr5 = (orc_union16 *)ex->arrays[5];
1746 for (i = 0; i < n; i++) {
1752 var36.i = var33.i + var34.i;
1754 var35.i = var36.i >> ex->params[24];
1762 orc_addc_rshift_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int n)
1764 OrcExecutor _ex, *ex = &_ex;
1765 static int p_inited = 0;
1766 static OrcProgram *p = 0;
1767 void (*func) (OrcExecutor *);
1770 orc_once_mutex_lock ();
1772 OrcCompileResult result;
1774 p = orc_program_new ();
1775 orc_program_set_name (p, "orc_addc_rshift_s16");
1776 orc_program_set_backup_function (p, _backup_orc_addc_rshift_s16);
1777 orc_program_add_destination (p, 2, "d1");
1778 orc_program_add_source (p, 2, "s1");
1779 orc_program_add_source (p, 2, "s2");
1780 orc_program_add_parameter (p, 2, "p1");
1781 orc_program_add_temporary (p, 2, "t1");
1783 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
1784 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
1786 result = orc_program_compile (p);
1789 orc_once_mutex_unlock ();
1794 ex->arrays[ORC_VAR_D1] = d1;
1795 ex->arrays[ORC_VAR_S1] = (void *)s1;
1796 ex->arrays[ORC_VAR_S2] = (void *)s2;
1797 ex->params[ORC_VAR_P1] = p1;
1799 func = p->code_exec;
1805 /* orc_lshift1_s16 */
1808 orc_lshift1_s16 (int16_t * d1, const int16_t * s1, int n){
1810 orc_union16 * ORC_RESTRICT ptr0;
1811 const orc_union16 * ORC_RESTRICT ptr4;
1815 ptr0 = (orc_union16 *)d1;
1816 ptr4 = (orc_union16 *)s1;
1819 for (i = 0; i < n; i++) {
1823 var33.i = var32.i << 1;
1832 _backup_orc_lshift1_s16 (OrcExecutor * ORC_RESTRICT ex)
1836 orc_union16 * ORC_RESTRICT ptr0;
1837 const orc_union16 * ORC_RESTRICT ptr4;
1841 ptr0 = (orc_union16 *)ex->arrays[0];
1842 ptr4 = (orc_union16 *)ex->arrays[4];
1845 for (i = 0; i < n; i++) {
1849 var33.i = var32.i << 1;
1857 orc_lshift1_s16 (int16_t * d1, const int16_t * s1, int n)
1859 OrcExecutor _ex, *ex = &_ex;
1860 static int p_inited = 0;
1861 static OrcProgram *p = 0;
1862 void (*func) (OrcExecutor *);
1865 orc_once_mutex_lock ();
1867 OrcCompileResult result;
1869 p = orc_program_new ();
1870 orc_program_set_name (p, "orc_lshift1_s16");
1871 orc_program_set_backup_function (p, _backup_orc_lshift1_s16);
1872 orc_program_add_destination (p, 2, "d1");
1873 orc_program_add_source (p, 2, "s1");
1874 orc_program_add_constant (p, 4, 0x00000001, "c1");
1876 orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
1878 result = orc_program_compile (p);
1881 orc_once_mutex_unlock ();
1886 ex->arrays[ORC_VAR_D1] = d1;
1887 ex->arrays[ORC_VAR_S1] = (void *)s1;
1889 func = p->code_exec;
1895 /* orc_lshift2_s16 */
1898 orc_lshift2_s16 (int16_t * d1, const int16_t * s1, int n){
1900 orc_union16 * ORC_RESTRICT ptr0;
1901 const orc_union16 * ORC_RESTRICT ptr4;
1905 ptr0 = (orc_union16 *)d1;
1906 ptr4 = (orc_union16 *)s1;
1909 for (i = 0; i < n; i++) {
1913 var33.i = var32.i << 2;
1922 _backup_orc_lshift2_s16 (OrcExecutor * ORC_RESTRICT ex)
1926 orc_union16 * ORC_RESTRICT ptr0;
1927 const orc_union16 * ORC_RESTRICT ptr4;
1931 ptr0 = (orc_union16 *)ex->arrays[0];
1932 ptr4 = (orc_union16 *)ex->arrays[4];
1935 for (i = 0; i < n; i++) {
1939 var33.i = var32.i << 2;
1947 orc_lshift2_s16 (int16_t * d1, const int16_t * s1, int n)
1949 OrcExecutor _ex, *ex = &_ex;
1950 static int p_inited = 0;
1951 static OrcProgram *p = 0;
1952 void (*func) (OrcExecutor *);
1955 orc_once_mutex_lock ();
1957 OrcCompileResult result;
1959 p = orc_program_new ();
1960 orc_program_set_name (p, "orc_lshift2_s16");
1961 orc_program_set_backup_function (p, _backup_orc_lshift2_s16);
1962 orc_program_add_destination (p, 2, "d1");
1963 orc_program_add_source (p, 2, "s1");
1964 orc_program_add_constant (p, 4, 0x00000002, "c1");
1966 orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
1968 result = orc_program_compile (p);
1971 orc_once_mutex_unlock ();
1976 ex->arrays[ORC_VAR_D1] = d1;
1977 ex->arrays[ORC_VAR_S1] = (void *)s1;
1979 func = p->code_exec;
1985 /* orc_lshift_s16_ip */
1988 orc_lshift_s16_ip (int16_t * d1, int p1, int n){
1990 orc_union16 * ORC_RESTRICT ptr0;
1994 ptr0 = (orc_union16 *)d1;
1997 for (i = 0; i < n; i++) {
2001 var33.i = var32.i << p1;
2010 _backup_orc_lshift_s16_ip (OrcExecutor * ORC_RESTRICT ex)
2014 orc_union16 * ORC_RESTRICT ptr0;
2018 ptr0 = (orc_union16 *)ex->arrays[0];
2021 for (i = 0; i < n; i++) {
2025 var33.i = var32.i << ex->params[24];
2033 orc_lshift_s16_ip (int16_t * d1, int p1, int n)
2035 OrcExecutor _ex, *ex = &_ex;
2036 static int p_inited = 0;
2037 static OrcProgram *p = 0;
2038 void (*func) (OrcExecutor *);
2041 orc_once_mutex_lock ();
2043 OrcCompileResult result;
2045 p = orc_program_new ();
2046 orc_program_set_name (p, "orc_lshift_s16_ip");
2047 orc_program_set_backup_function (p, _backup_orc_lshift_s16_ip);
2048 orc_program_add_destination (p, 2, "d1");
2049 orc_program_add_parameter (p, 2, "p1");
2051 orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1);
2053 result = orc_program_compile (p);
2056 orc_once_mutex_unlock ();
2061 ex->arrays[ORC_VAR_D1] = d1;
2062 ex->params[ORC_VAR_P1] = p1;
2064 func = p->code_exec;
2070 /* orc_mas2_add_s16_op */
2073 orc_mas2_add_s16_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int p1, int p2, int p3, int n){
2075 orc_union16 * ORC_RESTRICT ptr0;
2076 const orc_union16 * ORC_RESTRICT ptr4;
2077 const orc_union16 * ORC_RESTRICT ptr5;
2078 const orc_union16 * ORC_RESTRICT ptr6;
2091 ptr0 = (orc_union16 *)d1;
2092 ptr4 = (orc_union16 *)s1;
2093 ptr5 = (orc_union16 *)s2;
2094 ptr6 = (orc_union16 *)s3;
2101 for (i = 0; i < n; i++) {
2107 var40.i = var34.i + var35.i;
2109 var41.i = var40.i * var36.i;
2111 var42.i = var41.i + var37.i;
2113 var43.i = var42.i >> p3;
2119 var39.i = var38.i + var44.i;
2128 _backup_orc_mas2_add_s16_op (OrcExecutor * ORC_RESTRICT ex)
2132 orc_union16 * ORC_RESTRICT ptr0;
2133 const orc_union16 * ORC_RESTRICT ptr4;
2134 const orc_union16 * ORC_RESTRICT ptr5;
2135 const orc_union16 * ORC_RESTRICT ptr6;
2148 ptr0 = (orc_union16 *)ex->arrays[0];
2149 ptr4 = (orc_union16 *)ex->arrays[4];
2150 ptr5 = (orc_union16 *)ex->arrays[5];
2151 ptr6 = (orc_union16 *)ex->arrays[6];
2154 var36.i = ex->params[24];
2156 var37.i = ex->params[25];
2158 for (i = 0; i < n; i++) {
2164 var40.i = var34.i + var35.i;
2166 var41.i = var40.i * var36.i;
2168 var42.i = var41.i + var37.i;
2170 var43.i = var42.i >> ex->params[26];
2176 var39.i = var38.i + var44.i;
2184 orc_mas2_add_s16_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int p1, int p2, int p3, int n)
2186 OrcExecutor _ex, *ex = &_ex;
2187 static int p_inited = 0;
2188 static OrcProgram *p = 0;
2189 void (*func) (OrcExecutor *);
2192 orc_once_mutex_lock ();
2194 OrcCompileResult result;
2196 p = orc_program_new ();
2197 orc_program_set_name (p, "orc_mas2_add_s16_op");
2198 orc_program_set_backup_function (p, _backup_orc_mas2_add_s16_op);
2199 orc_program_add_destination (p, 2, "d1");
2200 orc_program_add_source (p, 2, "s1");
2201 orc_program_add_source (p, 2, "s2");
2202 orc_program_add_source (p, 2, "s3");
2203 orc_program_add_parameter (p, 2, "p1");
2204 orc_program_add_parameter (p, 4, "p2");
2205 orc_program_add_parameter (p, 4, "p3");
2206 orc_program_add_temporary (p, 2, "t1");
2207 orc_program_add_temporary (p, 4, "t2");
2209 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
2210 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
2211 orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
2212 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
2213 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
2214 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
2216 result = orc_program_compile (p);
2219 orc_once_mutex_unlock ();
2224 ex->arrays[ORC_VAR_D1] = d1;
2225 ex->arrays[ORC_VAR_S1] = (void *)s1;
2226 ex->arrays[ORC_VAR_S2] = (void *)s2;
2227 ex->arrays[ORC_VAR_S3] = (void *)s3;
2228 ex->params[ORC_VAR_P1] = p1;
2229 ex->params[ORC_VAR_P2] = p2;
2230 ex->params[ORC_VAR_P3] = p3;
2232 func = p->code_exec;
2238 /* orc_mas2_add_s16_ip */
2241 orc_mas2_add_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int p2, int p3, int n){
2243 orc_union16 * ORC_RESTRICT ptr0;
2244 const orc_union16 * ORC_RESTRICT ptr4;
2245 const orc_union16 * ORC_RESTRICT ptr5;
2258 ptr0 = (orc_union16 *)d1;
2259 ptr4 = (orc_union16 *)s1;
2260 ptr5 = (orc_union16 *)s2;
2267 for (i = 0; i < n; i++) {
2273 var40.i = var34.i + var35.i;
2275 var41.i = var40.i * var36.i;
2277 var42.i = var41.i + var37.i;
2279 var43.i = var42.i >> p3;
2285 var39.i = var38.i + var44.i;
2294 _backup_orc_mas2_add_s16_ip (OrcExecutor * ORC_RESTRICT ex)
2298 orc_union16 * ORC_RESTRICT ptr0;
2299 const orc_union16 * ORC_RESTRICT ptr4;
2300 const orc_union16 * ORC_RESTRICT ptr5;
2313 ptr0 = (orc_union16 *)ex->arrays[0];
2314 ptr4 = (orc_union16 *)ex->arrays[4];
2315 ptr5 = (orc_union16 *)ex->arrays[5];
2318 var36.i = ex->params[24];
2320 var37.i = ex->params[25];
2322 for (i = 0; i < n; i++) {
2328 var40.i = var34.i + var35.i;
2330 var41.i = var40.i * var36.i;
2332 var42.i = var41.i + var37.i;
2334 var43.i = var42.i >> ex->params[26];
2340 var39.i = var38.i + var44.i;
2348 orc_mas2_add_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int p2, int p3, int n)
2350 OrcExecutor _ex, *ex = &_ex;
2351 static int p_inited = 0;
2352 static OrcProgram *p = 0;
2353 void (*func) (OrcExecutor *);
2356 orc_once_mutex_lock ();
2358 OrcCompileResult result;
2360 p = orc_program_new ();
2361 orc_program_set_name (p, "orc_mas2_add_s16_ip");
2362 orc_program_set_backup_function (p, _backup_orc_mas2_add_s16_ip);
2363 orc_program_add_destination (p, 2, "d1");
2364 orc_program_add_source (p, 2, "s1");
2365 orc_program_add_source (p, 2, "s2");
2366 orc_program_add_parameter (p, 2, "p1");
2367 orc_program_add_parameter (p, 4, "p2");
2368 orc_program_add_parameter (p, 4, "p3");
2369 orc_program_add_temporary (p, 2, "t1");
2370 orc_program_add_temporary (p, 4, "t2");
2372 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
2373 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
2374 orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
2375 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
2376 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
2377 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
2379 result = orc_program_compile (p);
2382 orc_once_mutex_unlock ();
2387 ex->arrays[ORC_VAR_D1] = d1;
2388 ex->arrays[ORC_VAR_S1] = (void *)s1;
2389 ex->arrays[ORC_VAR_S2] = (void *)s2;
2390 ex->params[ORC_VAR_P1] = p1;
2391 ex->params[ORC_VAR_P2] = p2;
2392 ex->params[ORC_VAR_P3] = p3;
2394 func = p->code_exec;
2400 /* orc_mas2_sub_s16_op */
2403 orc_mas2_sub_s16_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int p1, int p2, int p3, int n){
2405 orc_union16 * ORC_RESTRICT ptr0;
2406 const orc_union16 * ORC_RESTRICT ptr4;
2407 const orc_union16 * ORC_RESTRICT ptr5;
2408 const orc_union16 * ORC_RESTRICT ptr6;
2421 ptr0 = (orc_union16 *)d1;
2422 ptr4 = (orc_union16 *)s1;
2423 ptr5 = (orc_union16 *)s2;
2424 ptr6 = (orc_union16 *)s3;
2431 for (i = 0; i < n; i++) {
2437 var40.i = var34.i + var35.i;
2439 var41.i = var40.i * var36.i;
2441 var42.i = var41.i + var37.i;
2443 var43.i = var42.i >> p3;
2449 var39.i = var38.i - var44.i;
2458 _backup_orc_mas2_sub_s16_op (OrcExecutor * ORC_RESTRICT ex)
2462 orc_union16 * ORC_RESTRICT ptr0;
2463 const orc_union16 * ORC_RESTRICT ptr4;
2464 const orc_union16 * ORC_RESTRICT ptr5;
2465 const orc_union16 * ORC_RESTRICT ptr6;
2478 ptr0 = (orc_union16 *)ex->arrays[0];
2479 ptr4 = (orc_union16 *)ex->arrays[4];
2480 ptr5 = (orc_union16 *)ex->arrays[5];
2481 ptr6 = (orc_union16 *)ex->arrays[6];
2484 var36.i = ex->params[24];
2486 var37.i = ex->params[25];
2488 for (i = 0; i < n; i++) {
2494 var40.i = var34.i + var35.i;
2496 var41.i = var40.i * var36.i;
2498 var42.i = var41.i + var37.i;
2500 var43.i = var42.i >> ex->params[26];
2506 var39.i = var38.i - var44.i;
2514 orc_mas2_sub_s16_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, int p1, int p2, int p3, int n)
2516 OrcExecutor _ex, *ex = &_ex;
2517 static int p_inited = 0;
2518 static OrcProgram *p = 0;
2519 void (*func) (OrcExecutor *);
2522 orc_once_mutex_lock ();
2524 OrcCompileResult result;
2526 p = orc_program_new ();
2527 orc_program_set_name (p, "orc_mas2_sub_s16_op");
2528 orc_program_set_backup_function (p, _backup_orc_mas2_sub_s16_op);
2529 orc_program_add_destination (p, 2, "d1");
2530 orc_program_add_source (p, 2, "s1");
2531 orc_program_add_source (p, 2, "s2");
2532 orc_program_add_source (p, 2, "s3");
2533 orc_program_add_parameter (p, 2, "p1");
2534 orc_program_add_parameter (p, 4, "p2");
2535 orc_program_add_parameter (p, 4, "p3");
2536 orc_program_add_temporary (p, 2, "t1");
2537 orc_program_add_temporary (p, 4, "t2");
2539 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
2540 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
2541 orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
2542 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
2543 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
2544 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
2546 result = orc_program_compile (p);
2549 orc_once_mutex_unlock ();
2554 ex->arrays[ORC_VAR_D1] = d1;
2555 ex->arrays[ORC_VAR_S1] = (void *)s1;
2556 ex->arrays[ORC_VAR_S2] = (void *)s2;
2557 ex->arrays[ORC_VAR_S3] = (void *)s3;
2558 ex->params[ORC_VAR_P1] = p1;
2559 ex->params[ORC_VAR_P2] = p2;
2560 ex->params[ORC_VAR_P3] = p3;
2562 func = p->code_exec;
2568 /* orc_mas2_sub_s16_ip */
2571 orc_mas2_sub_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int p2, int p3, int n){
2573 orc_union16 * ORC_RESTRICT ptr0;
2574 const orc_union16 * ORC_RESTRICT ptr4;
2575 const orc_union16 * ORC_RESTRICT ptr5;
2588 ptr0 = (orc_union16 *)d1;
2589 ptr4 = (orc_union16 *)s1;
2590 ptr5 = (orc_union16 *)s2;
2597 for (i = 0; i < n; i++) {
2603 var40.i = var34.i + var35.i;
2605 var41.i = var40.i * var36.i;
2607 var42.i = var41.i + var37.i;
2609 var43.i = var42.i >> p3;
2615 var39.i = var38.i - var44.i;
2624 _backup_orc_mas2_sub_s16_ip (OrcExecutor * ORC_RESTRICT ex)
2628 orc_union16 * ORC_RESTRICT ptr0;
2629 const orc_union16 * ORC_RESTRICT ptr4;
2630 const orc_union16 * ORC_RESTRICT ptr5;
2643 ptr0 = (orc_union16 *)ex->arrays[0];
2644 ptr4 = (orc_union16 *)ex->arrays[4];
2645 ptr5 = (orc_union16 *)ex->arrays[5];
2648 var36.i = ex->params[24];
2650 var37.i = ex->params[25];
2652 for (i = 0; i < n; i++) {
2658 var40.i = var34.i + var35.i;
2660 var41.i = var40.i * var36.i;
2662 var42.i = var41.i + var37.i;
2664 var43.i = var42.i >> ex->params[26];
2670 var39.i = var38.i - var44.i;
2678 orc_mas2_sub_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int p1, int p2, int p3, int n)
2680 OrcExecutor _ex, *ex = &_ex;
2681 static int p_inited = 0;
2682 static OrcProgram *p = 0;
2683 void (*func) (OrcExecutor *);
2686 orc_once_mutex_lock ();
2688 OrcCompileResult result;
2690 p = orc_program_new ();
2691 orc_program_set_name (p, "orc_mas2_sub_s16_ip");
2692 orc_program_set_backup_function (p, _backup_orc_mas2_sub_s16_ip);
2693 orc_program_add_destination (p, 2, "d1");
2694 orc_program_add_source (p, 2, "s1");
2695 orc_program_add_source (p, 2, "s2");
2696 orc_program_add_parameter (p, 2, "p1");
2697 orc_program_add_parameter (p, 4, "p2");
2698 orc_program_add_parameter (p, 4, "p3");
2699 orc_program_add_temporary (p, 2, "t1");
2700 orc_program_add_temporary (p, 4, "t2");
2702 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
2703 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
2704 orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
2705 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
2706 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
2707 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
2709 result = orc_program_compile (p);
2712 orc_once_mutex_unlock ();
2717 ex->arrays[ORC_VAR_D1] = d1;
2718 ex->arrays[ORC_VAR_S1] = (void *)s1;
2719 ex->arrays[ORC_VAR_S2] = (void *)s2;
2720 ex->params[ORC_VAR_P1] = p1;
2721 ex->params[ORC_VAR_P2] = p2;
2722 ex->params[ORC_VAR_P3] = p3;
2724 func = p->code_exec;
2730 /* orc_mas4_across_add_s16_1991_op */
2733 orc_mas4_across_add_s16_1991_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, const int16_t * s5, int p1, int p2, int n){
2735 orc_union16 * ORC_RESTRICT ptr0;
2736 const orc_union16 * ORC_RESTRICT ptr4;
2737 const orc_union16 * ORC_RESTRICT ptr5;
2738 const orc_union16 * ORC_RESTRICT ptr6;
2739 const orc_union16 * ORC_RESTRICT ptr7;
2740 const orc_union16 * ORC_RESTRICT ptr8;
2758 ptr0 = (orc_union16 *)d1;
2759 ptr4 = (orc_union16 *)s1;
2760 ptr5 = (orc_union16 *)s2;
2761 ptr6 = (orc_union16 *)s3;
2762 ptr7 = (orc_union16 *)s4;
2763 ptr8 = (orc_union16 *)s5;
2766 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
2770 for (i = 0; i < n; i++) {
2776 var44.i = var36.i + var37.i;
2778 var45.i = var44.i * var38.i;
2784 var46.i = var39.i + var40.i;
2788 var48.i = var45.i - var47.i;
2790 var49.i = var48.i + var41.i;
2792 var50.i = var49.i >> p2;
2798 var43.i = var42.i + var51.i;
2807 _backup_orc_mas4_across_add_s16_1991_op (OrcExecutor * ORC_RESTRICT ex)
2811 orc_union16 * ORC_RESTRICT ptr0;
2812 const orc_union16 * ORC_RESTRICT ptr4;
2813 const orc_union16 * ORC_RESTRICT ptr5;
2814 const orc_union16 * ORC_RESTRICT ptr6;
2815 const orc_union16 * ORC_RESTRICT ptr7;
2816 const orc_union16 * ORC_RESTRICT ptr8;
2834 ptr0 = (orc_union16 *)ex->arrays[0];
2835 ptr4 = (orc_union16 *)ex->arrays[4];
2836 ptr5 = (orc_union16 *)ex->arrays[5];
2837 ptr6 = (orc_union16 *)ex->arrays[6];
2838 ptr7 = (orc_union16 *)ex->arrays[7];
2839 ptr8 = (orc_union16 *)ex->arrays[8];
2842 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
2844 var41.i = ex->params[24];
2846 for (i = 0; i < n; i++) {
2852 var44.i = var36.i + var37.i;
2854 var45.i = var44.i * var38.i;
2860 var46.i = var39.i + var40.i;
2864 var48.i = var45.i - var47.i;
2866 var49.i = var48.i + var41.i;
2868 var50.i = var49.i >> ex->params[25];
2874 var43.i = var42.i + var51.i;
2882 orc_mas4_across_add_s16_1991_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, const int16_t * s5, int p1, int p2, int n)
2884 OrcExecutor _ex, *ex = &_ex;
2885 static int p_inited = 0;
2886 static OrcProgram *p = 0;
2887 void (*func) (OrcExecutor *);
2890 orc_once_mutex_lock ();
2892 OrcCompileResult result;
2894 p = orc_program_new ();
2895 orc_program_set_name (p, "orc_mas4_across_add_s16_1991_op");
2896 orc_program_set_backup_function (p, _backup_orc_mas4_across_add_s16_1991_op);
2897 orc_program_add_destination (p, 2, "d1");
2898 orc_program_add_source (p, 2, "s1");
2899 orc_program_add_source (p, 2, "s2");
2900 orc_program_add_source (p, 2, "s3");
2901 orc_program_add_source (p, 2, "s4");
2902 orc_program_add_source (p, 2, "s5");
2903 orc_program_add_constant (p, 4, 0x00000009, "c1");
2904 orc_program_add_parameter (p, 4, "p1");
2905 orc_program_add_parameter (p, 4, "p2");
2906 orc_program_add_temporary (p, 2, "t1");
2907 orc_program_add_temporary (p, 2, "t2");
2908 orc_program_add_temporary (p, 4, "t3");
2909 orc_program_add_temporary (p, 4, "t4");
2911 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_S4, ORC_VAR_D1);
2912 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
2913 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_S5, ORC_VAR_D1);
2914 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
2915 orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
2916 orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
2917 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
2918 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
2919 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
2921 result = orc_program_compile (p);
2924 orc_once_mutex_unlock ();
2929 ex->arrays[ORC_VAR_D1] = d1;
2930 ex->arrays[ORC_VAR_S1] = (void *)s1;
2931 ex->arrays[ORC_VAR_S2] = (void *)s2;
2932 ex->arrays[ORC_VAR_S3] = (void *)s3;
2933 ex->arrays[ORC_VAR_S4] = (void *)s4;
2934 ex->arrays[ORC_VAR_S5] = (void *)s5;
2935 ex->params[ORC_VAR_P1] = p1;
2936 ex->params[ORC_VAR_P2] = p2;
2938 func = p->code_exec;
2944 /* orc_mas4_across_add_s16_1991_ip */
2947 orc_mas4_across_add_s16_1991_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, int p1, int p2, int n){
2949 orc_union16 * ORC_RESTRICT ptr0;
2950 const orc_union16 * ORC_RESTRICT ptr4;
2951 const orc_union16 * ORC_RESTRICT ptr5;
2952 const orc_union16 * ORC_RESTRICT ptr6;
2953 const orc_union16 * ORC_RESTRICT ptr7;
2971 ptr0 = (orc_union16 *)d1;
2972 ptr4 = (orc_union16 *)s1;
2973 ptr5 = (orc_union16 *)s2;
2974 ptr6 = (orc_union16 *)s3;
2975 ptr7 = (orc_union16 *)s4;
2978 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
2982 for (i = 0; i < n; i++) {
2988 var44.i = var36.i + var37.i;
2990 var45.i = var44.i * var38.i;
2996 var46.i = var39.i + var40.i;
3000 var48.i = var45.i - var47.i;
3002 var49.i = var48.i + var41.i;
3004 var50.i = var49.i >> p2;
3010 var43.i = var42.i + var51.i;
3019 _backup_orc_mas4_across_add_s16_1991_ip (OrcExecutor * ORC_RESTRICT ex)
3023 orc_union16 * ORC_RESTRICT ptr0;
3024 const orc_union16 * ORC_RESTRICT ptr4;
3025 const orc_union16 * ORC_RESTRICT ptr5;
3026 const orc_union16 * ORC_RESTRICT ptr6;
3027 const orc_union16 * ORC_RESTRICT ptr7;
3045 ptr0 = (orc_union16 *)ex->arrays[0];
3046 ptr4 = (orc_union16 *)ex->arrays[4];
3047 ptr5 = (orc_union16 *)ex->arrays[5];
3048 ptr6 = (orc_union16 *)ex->arrays[6];
3049 ptr7 = (orc_union16 *)ex->arrays[7];
3052 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
3054 var41.i = ex->params[24];
3056 for (i = 0; i < n; i++) {
3062 var44.i = var36.i + var37.i;
3064 var45.i = var44.i * var38.i;
3070 var46.i = var39.i + var40.i;
3074 var48.i = var45.i - var47.i;
3076 var49.i = var48.i + var41.i;
3078 var50.i = var49.i >> ex->params[25];
3084 var43.i = var42.i + var51.i;
3092 orc_mas4_across_add_s16_1991_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, int p1, int p2, int n)
3094 OrcExecutor _ex, *ex = &_ex;
3095 static int p_inited = 0;
3096 static OrcProgram *p = 0;
3097 void (*func) (OrcExecutor *);
3100 orc_once_mutex_lock ();
3102 OrcCompileResult result;
3104 p = orc_program_new ();
3105 orc_program_set_name (p, "orc_mas4_across_add_s16_1991_ip");
3106 orc_program_set_backup_function (p, _backup_orc_mas4_across_add_s16_1991_ip);
3107 orc_program_add_destination (p, 2, "d1");
3108 orc_program_add_source (p, 2, "s1");
3109 orc_program_add_source (p, 2, "s2");
3110 orc_program_add_source (p, 2, "s3");
3111 orc_program_add_source (p, 2, "s4");
3112 orc_program_add_constant (p, 4, 0x00000009, "c1");
3113 orc_program_add_parameter (p, 4, "p1");
3114 orc_program_add_parameter (p, 4, "p2");
3115 orc_program_add_temporary (p, 2, "t1");
3116 orc_program_add_temporary (p, 2, "t2");
3117 orc_program_add_temporary (p, 4, "t3");
3118 orc_program_add_temporary (p, 4, "t4");
3120 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
3121 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
3122 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_S4, ORC_VAR_D1);
3123 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
3124 orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
3125 orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
3126 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
3127 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
3128 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
3130 result = orc_program_compile (p);
3133 orc_once_mutex_unlock ();
3138 ex->arrays[ORC_VAR_D1] = d1;
3139 ex->arrays[ORC_VAR_S1] = (void *)s1;
3140 ex->arrays[ORC_VAR_S2] = (void *)s2;
3141 ex->arrays[ORC_VAR_S3] = (void *)s3;
3142 ex->arrays[ORC_VAR_S4] = (void *)s4;
3143 ex->params[ORC_VAR_P1] = p1;
3144 ex->params[ORC_VAR_P2] = p2;
3146 func = p->code_exec;
3152 /* orc_mas4_across_sub_s16_1991_op */
3155 orc_mas4_across_sub_s16_1991_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, const int16_t * s5, int p1, int p2, int n){
3157 orc_union16 * ORC_RESTRICT ptr0;
3158 const orc_union16 * ORC_RESTRICT ptr4;
3159 const orc_union16 * ORC_RESTRICT ptr5;
3160 const orc_union16 * ORC_RESTRICT ptr6;
3161 const orc_union16 * ORC_RESTRICT ptr7;
3162 const orc_union16 * ORC_RESTRICT ptr8;
3180 ptr0 = (orc_union16 *)d1;
3181 ptr4 = (orc_union16 *)s1;
3182 ptr5 = (orc_union16 *)s2;
3183 ptr6 = (orc_union16 *)s3;
3184 ptr7 = (orc_union16 *)s4;
3185 ptr8 = (orc_union16 *)s5;
3188 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
3192 for (i = 0; i < n; i++) {
3198 var44.i = var36.i + var37.i;
3200 var45.i = var44.i * var38.i;
3206 var46.i = var39.i + var40.i;
3210 var48.i = var45.i - var47.i;
3212 var49.i = var48.i + var41.i;
3214 var50.i = var49.i >> p2;
3220 var43.i = var42.i - var51.i;
3229 _backup_orc_mas4_across_sub_s16_1991_op (OrcExecutor * ORC_RESTRICT ex)
3233 orc_union16 * ORC_RESTRICT ptr0;
3234 const orc_union16 * ORC_RESTRICT ptr4;
3235 const orc_union16 * ORC_RESTRICT ptr5;
3236 const orc_union16 * ORC_RESTRICT ptr6;
3237 const orc_union16 * ORC_RESTRICT ptr7;
3238 const orc_union16 * ORC_RESTRICT ptr8;
3256 ptr0 = (orc_union16 *)ex->arrays[0];
3257 ptr4 = (orc_union16 *)ex->arrays[4];
3258 ptr5 = (orc_union16 *)ex->arrays[5];
3259 ptr6 = (orc_union16 *)ex->arrays[6];
3260 ptr7 = (orc_union16 *)ex->arrays[7];
3261 ptr8 = (orc_union16 *)ex->arrays[8];
3264 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
3266 var41.i = ex->params[24];
3268 for (i = 0; i < n; i++) {
3274 var44.i = var36.i + var37.i;
3276 var45.i = var44.i * var38.i;
3282 var46.i = var39.i + var40.i;
3286 var48.i = var45.i - var47.i;
3288 var49.i = var48.i + var41.i;
3290 var50.i = var49.i >> ex->params[25];
3296 var43.i = var42.i - var51.i;
3304 orc_mas4_across_sub_s16_1991_op (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, const int16_t * s5, int p1, int p2, int n)
3306 OrcExecutor _ex, *ex = &_ex;
3307 static int p_inited = 0;
3308 static OrcProgram *p = 0;
3309 void (*func) (OrcExecutor *);
3312 orc_once_mutex_lock ();
3314 OrcCompileResult result;
3316 p = orc_program_new ();
3317 orc_program_set_name (p, "orc_mas4_across_sub_s16_1991_op");
3318 orc_program_set_backup_function (p, _backup_orc_mas4_across_sub_s16_1991_op);
3319 orc_program_add_destination (p, 2, "d1");
3320 orc_program_add_source (p, 2, "s1");
3321 orc_program_add_source (p, 2, "s2");
3322 orc_program_add_source (p, 2, "s3");
3323 orc_program_add_source (p, 2, "s4");
3324 orc_program_add_source (p, 2, "s5");
3325 orc_program_add_constant (p, 4, 0x00000009, "c1");
3326 orc_program_add_parameter (p, 4, "p1");
3327 orc_program_add_parameter (p, 4, "p2");
3328 orc_program_add_temporary (p, 2, "t1");
3329 orc_program_add_temporary (p, 2, "t2");
3330 orc_program_add_temporary (p, 4, "t3");
3331 orc_program_add_temporary (p, 4, "t4");
3333 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_S4, ORC_VAR_D1);
3334 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
3335 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_S5, ORC_VAR_D1);
3336 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
3337 orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
3338 orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
3339 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
3340 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
3341 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
3343 result = orc_program_compile (p);
3346 orc_once_mutex_unlock ();
3351 ex->arrays[ORC_VAR_D1] = d1;
3352 ex->arrays[ORC_VAR_S1] = (void *)s1;
3353 ex->arrays[ORC_VAR_S2] = (void *)s2;
3354 ex->arrays[ORC_VAR_S3] = (void *)s3;
3355 ex->arrays[ORC_VAR_S4] = (void *)s4;
3356 ex->arrays[ORC_VAR_S5] = (void *)s5;
3357 ex->params[ORC_VAR_P1] = p1;
3358 ex->params[ORC_VAR_P2] = p2;
3360 func = p->code_exec;
3366 /* orc_mas4_across_sub_s16_1991_ip */
3369 orc_mas4_across_sub_s16_1991_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, int p1, int p2, int n){
3371 orc_union16 * ORC_RESTRICT ptr0;
3372 const orc_union16 * ORC_RESTRICT ptr4;
3373 const orc_union16 * ORC_RESTRICT ptr5;
3374 const orc_union16 * ORC_RESTRICT ptr6;
3375 const orc_union16 * ORC_RESTRICT ptr7;
3393 ptr0 = (orc_union16 *)d1;
3394 ptr4 = (orc_union16 *)s1;
3395 ptr5 = (orc_union16 *)s2;
3396 ptr6 = (orc_union16 *)s3;
3397 ptr7 = (orc_union16 *)s4;
3400 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
3404 for (i = 0; i < n; i++) {
3410 var44.i = var36.i + var37.i;
3412 var45.i = var44.i * var38.i;
3418 var46.i = var39.i + var40.i;
3422 var48.i = var45.i - var47.i;
3424 var49.i = var48.i + var41.i;
3426 var50.i = var49.i >> p2;
3432 var43.i = var42.i - var51.i;
3441 _backup_orc_mas4_across_sub_s16_1991_ip (OrcExecutor * ORC_RESTRICT ex)
3445 orc_union16 * ORC_RESTRICT ptr0;
3446 const orc_union16 * ORC_RESTRICT ptr4;
3447 const orc_union16 * ORC_RESTRICT ptr5;
3448 const orc_union16 * ORC_RESTRICT ptr6;
3449 const orc_union16 * ORC_RESTRICT ptr7;
3467 ptr0 = (orc_union16 *)ex->arrays[0];
3468 ptr4 = (orc_union16 *)ex->arrays[4];
3469 ptr5 = (orc_union16 *)ex->arrays[5];
3470 ptr6 = (orc_union16 *)ex->arrays[6];
3471 ptr7 = (orc_union16 *)ex->arrays[7];
3474 var38.i = 0x00000009; /* 9 or 4.44659e-323f */
3476 var41.i = ex->params[24];
3478 for (i = 0; i < n; i++) {
3484 var44.i = var36.i + var37.i;
3486 var45.i = var44.i * var38.i;
3492 var46.i = var39.i + var40.i;
3496 var48.i = var45.i - var47.i;
3498 var49.i = var48.i + var41.i;
3500 var50.i = var49.i >> ex->params[25];
3506 var43.i = var42.i - var51.i;
3514 orc_mas4_across_sub_s16_1991_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, const int16_t * s3, const int16_t * s4, int p1, int p2, int n)
3516 OrcExecutor _ex, *ex = &_ex;
3517 static int p_inited = 0;
3518 static OrcProgram *p = 0;
3519 void (*func) (OrcExecutor *);
3522 orc_once_mutex_lock ();
3524 OrcCompileResult result;
3526 p = orc_program_new ();
3527 orc_program_set_name (p, "orc_mas4_across_sub_s16_1991_ip");
3528 orc_program_set_backup_function (p, _backup_orc_mas4_across_sub_s16_1991_ip);
3529 orc_program_add_destination (p, 2, "d1");
3530 orc_program_add_source (p, 2, "s1");
3531 orc_program_add_source (p, 2, "s2");
3532 orc_program_add_source (p, 2, "s3");
3533 orc_program_add_source (p, 2, "s4");
3534 orc_program_add_constant (p, 4, 0x00000009, "c1");
3535 orc_program_add_parameter (p, 4, "p1");
3536 orc_program_add_parameter (p, 4, "p2");
3537 orc_program_add_temporary (p, 2, "t1");
3538 orc_program_add_temporary (p, 2, "t2");
3539 orc_program_add_temporary (p, 4, "t3");
3540 orc_program_add_temporary (p, 4, "t4");
3542 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
3543 orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
3544 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_S4, ORC_VAR_D1);
3545 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
3546 orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
3547 orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
3548 orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
3549 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
3550 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
3552 result = orc_program_compile (p);
3555 orc_once_mutex_unlock ();
3560 ex->arrays[ORC_VAR_D1] = d1;
3561 ex->arrays[ORC_VAR_S1] = (void *)s1;
3562 ex->arrays[ORC_VAR_S2] = (void *)s2;
3563 ex->arrays[ORC_VAR_S3] = (void *)s3;
3564 ex->arrays[ORC_VAR_S4] = (void *)s4;
3565 ex->params[ORC_VAR_P1] = p1;
3566 ex->params[ORC_VAR_P2] = p2;
3568 func = p->code_exec;
3574 /* orc_subtract_s16 */
3577 orc_subtract_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
3579 orc_union16 * ORC_RESTRICT ptr0;
3580 const orc_union16 * ORC_RESTRICT ptr4;
3581 const orc_union16 * ORC_RESTRICT ptr5;
3586 ptr0 = (orc_union16 *)d1;
3587 ptr4 = (orc_union16 *)s1;
3588 ptr5 = (orc_union16 *)s2;
3591 for (i = 0; i < n; i++) {
3597 var34.i = var32.i - var33.i;
3606 _backup_orc_subtract_s16 (OrcExecutor * ORC_RESTRICT ex)
3610 orc_union16 * ORC_RESTRICT ptr0;
3611 const orc_union16 * ORC_RESTRICT ptr4;
3612 const orc_union16 * ORC_RESTRICT ptr5;
3617 ptr0 = (orc_union16 *)ex->arrays[0];
3618 ptr4 = (orc_union16 *)ex->arrays[4];
3619 ptr5 = (orc_union16 *)ex->arrays[5];
3622 for (i = 0; i < n; i++) {
3628 var34.i = var32.i - var33.i;
3636 orc_subtract_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
3638 OrcExecutor _ex, *ex = &_ex;
3639 static int p_inited = 0;
3640 static OrcProgram *p = 0;
3641 void (*func) (OrcExecutor *);
3644 orc_once_mutex_lock ();
3646 OrcCompileResult result;
3648 p = orc_program_new ();
3649 orc_program_set_name (p, "orc_subtract_s16");
3650 orc_program_set_backup_function (p, _backup_orc_subtract_s16);
3651 orc_program_add_destination (p, 2, "d1");
3652 orc_program_add_source (p, 2, "s1");
3653 orc_program_add_source (p, 2, "s2");
3655 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
3657 result = orc_program_compile (p);
3660 orc_once_mutex_unlock ();
3665 ex->arrays[ORC_VAR_D1] = d1;
3666 ex->arrays[ORC_VAR_S1] = (void *)s1;
3667 ex->arrays[ORC_VAR_S2] = (void *)s2;
3669 func = p->code_exec;
3675 /* orc_add_s16_u8 */
3678 orc_add_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n){
3680 orc_union16 * ORC_RESTRICT ptr0;
3681 const orc_union16 * ORC_RESTRICT ptr4;
3682 const orc_int8 * ORC_RESTRICT ptr5;
3688 ptr0 = (orc_union16 *)d1;
3689 ptr4 = (orc_union16 *)s1;
3690 ptr5 = (orc_int8 *)s2;
3693 for (i = 0; i < n; i++) {
3697 var36.i = (orc_uint8)var33;
3701 var35.i = var36.i + var34.i;
3710 _backup_orc_add_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
3714 orc_union16 * ORC_RESTRICT ptr0;
3715 const orc_union16 * ORC_RESTRICT ptr4;
3716 const orc_int8 * ORC_RESTRICT ptr5;
3722 ptr0 = (orc_union16 *)ex->arrays[0];
3723 ptr4 = (orc_union16 *)ex->arrays[4];
3724 ptr5 = (orc_int8 *)ex->arrays[5];
3727 for (i = 0; i < n; i++) {
3731 var36.i = (orc_uint8)var33;
3735 var35.i = var36.i + var34.i;
3743 orc_add_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n)
3745 OrcExecutor _ex, *ex = &_ex;
3746 static int p_inited = 0;
3747 static OrcProgram *p = 0;
3748 void (*func) (OrcExecutor *);
3751 orc_once_mutex_lock ();
3753 OrcCompileResult result;
3755 p = orc_program_new ();
3756 orc_program_set_name (p, "orc_add_s16_u8");
3757 orc_program_set_backup_function (p, _backup_orc_add_s16_u8);
3758 orc_program_add_destination (p, 2, "d1");
3759 orc_program_add_source (p, 2, "s1");
3760 orc_program_add_source (p, 1, "s2");
3761 orc_program_add_temporary (p, 2, "t1");
3763 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
3764 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1);
3766 result = orc_program_compile (p);
3769 orc_once_mutex_unlock ();
3774 ex->arrays[ORC_VAR_D1] = d1;
3775 ex->arrays[ORC_VAR_S1] = (void *)s1;
3776 ex->arrays[ORC_VAR_S2] = (void *)s2;
3778 func = p->code_exec;
3784 /* orc_add_s16_u8_2d */
3787 orc_add_s16_u8_2d (int16_t * d1, int d1_stride, const orc_uint8 * s1, int s1_stride, int n, int m){
3790 orc_union16 * ORC_RESTRICT ptr0;
3791 const orc_int8 * ORC_RESTRICT ptr4;
3797 for (j = 0; j < m; j++) {
3798 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
3799 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
3802 for (i = 0; i < n; i++) {
3806 var36.i = (orc_uint8)var33;
3810 var35.i = var34.i + var36.i;
3820 _backup_orc_add_s16_u8_2d (OrcExecutor * ORC_RESTRICT ex)
3825 int m = ex->params[ORC_VAR_A1];
3826 orc_union16 * ORC_RESTRICT ptr0;
3827 const orc_int8 * ORC_RESTRICT ptr4;
3833 for (j = 0; j < m; j++) {
3834 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
3835 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
3838 for (i = 0; i < n; i++) {
3842 var36.i = (orc_uint8)var33;
3846 var35.i = var34.i + var36.i;
3855 orc_add_s16_u8_2d (int16_t * d1, int d1_stride, const orc_uint8 * s1, int s1_stride, int n, int m)
3857 OrcExecutor _ex, *ex = &_ex;
3858 static int p_inited = 0;
3859 static OrcProgram *p = 0;
3860 void (*func) (OrcExecutor *);
3863 orc_once_mutex_lock ();
3865 OrcCompileResult result;
3867 p = orc_program_new ();
3868 orc_program_set_2d (p);
3869 orc_program_set_name (p, "orc_add_s16_u8_2d");
3870 orc_program_set_backup_function (p, _backup_orc_add_s16_u8_2d);
3871 orc_program_add_destination (p, 2, "d1");
3872 orc_program_add_source (p, 1, "s1");
3873 orc_program_add_temporary (p, 2, "t1");
3875 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
3876 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
3878 result = orc_program_compile (p);
3881 orc_once_mutex_unlock ();
3886 ORC_EXECUTOR_M(ex) = m;
3887 ex->arrays[ORC_VAR_D1] = d1;
3888 ex->params[ORC_VAR_D1] = d1_stride;
3889 ex->arrays[ORC_VAR_S1] = (void *)s1;
3890 ex->params[ORC_VAR_S1] = s1_stride;
3892 func = p->code_exec;
3898 /* orc_convert_s16_u8 */
3901 orc_convert_s16_u8 (orc_uint16 * d1, const orc_uint8 * s1, int n){
3903 orc_union16 * ORC_RESTRICT ptr0;
3904 const orc_int8 * ORC_RESTRICT ptr4;
3908 ptr0 = (orc_union16 *)d1;
3909 ptr4 = (orc_int8 *)s1;
3912 for (i = 0; i < n; i++) {
3916 var33.i = (orc_uint8)var32;
3925 _backup_orc_convert_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
3929 orc_union16 * ORC_RESTRICT ptr0;
3930 const orc_int8 * ORC_RESTRICT ptr4;
3934 ptr0 = (orc_union16 *)ex->arrays[0];
3935 ptr4 = (orc_int8 *)ex->arrays[4];
3938 for (i = 0; i < n; i++) {
3942 var33.i = (orc_uint8)var32;
3950 orc_convert_s16_u8 (orc_uint16 * d1, const orc_uint8 * s1, int n)
3952 OrcExecutor _ex, *ex = &_ex;
3953 static int p_inited = 0;
3954 static OrcProgram *p = 0;
3955 void (*func) (OrcExecutor *);
3958 orc_once_mutex_lock ();
3960 OrcCompileResult result;
3962 p = orc_program_new ();
3963 orc_program_set_name (p, "orc_convert_s16_u8");
3964 orc_program_set_backup_function (p, _backup_orc_convert_s16_u8);
3965 orc_program_add_destination (p, 2, "d1");
3966 orc_program_add_source (p, 1, "s1");
3968 orc_program_append_2 (p, "convubw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
3970 result = orc_program_compile (p);
3973 orc_once_mutex_unlock ();
3978 ex->arrays[ORC_VAR_D1] = d1;
3979 ex->arrays[ORC_VAR_S1] = (void *)s1;
3981 func = p->code_exec;
3987 /* orc_convert_u8_s16 */
3990 orc_convert_u8_s16 (orc_uint8 * d1, const int16_t * s1, int n){
3992 orc_int8 * ORC_RESTRICT ptr0;
3993 const orc_union16 * ORC_RESTRICT ptr4;
3997 ptr0 = (orc_int8 *)d1;
3998 ptr4 = (orc_union16 *)s1;
4001 for (i = 0; i < n; i++) {
4005 var33 = ORC_CLAMP_UB(var32.i);
4014 _backup_orc_convert_u8_s16 (OrcExecutor * ORC_RESTRICT ex)
4018 orc_int8 * ORC_RESTRICT ptr0;
4019 const orc_union16 * ORC_RESTRICT ptr4;
4023 ptr0 = (orc_int8 *)ex->arrays[0];
4024 ptr4 = (orc_union16 *)ex->arrays[4];
4027 for (i = 0; i < n; i++) {
4031 var33 = ORC_CLAMP_UB(var32.i);
4039 orc_convert_u8_s16 (orc_uint8 * d1, const int16_t * s1, int n)
4041 OrcExecutor _ex, *ex = &_ex;
4042 static int p_inited = 0;
4043 static OrcProgram *p = 0;
4044 void (*func) (OrcExecutor *);
4047 orc_once_mutex_lock ();
4049 OrcCompileResult result;
4051 p = orc_program_new ();
4052 orc_program_set_name (p, "orc_convert_u8_s16");
4053 orc_program_set_backup_function (p, _backup_orc_convert_u8_s16);
4054 orc_program_add_destination (p, 1, "d1");
4055 orc_program_add_source (p, 2, "s1");
4057 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
4059 result = orc_program_compile (p);
4062 orc_once_mutex_unlock ();
4067 ex->arrays[ORC_VAR_D1] = d1;
4068 ex->arrays[ORC_VAR_S1] = (void *)s1;
4070 func = p->code_exec;
4076 /* orc_offsetconvert_u8_s16 */
4079 orc_offsetconvert_u8_s16 (orc_uint8 * d1, const int16_t * s1, int n){
4081 orc_int8 * ORC_RESTRICT ptr0;
4082 const orc_union16 * ORC_RESTRICT ptr4;
4088 ptr0 = (orc_int8 *)d1;
4089 ptr4 = (orc_union16 *)s1;
4092 var34.i = 0x00000080; /* 128 or 6.32404e-322f */
4094 for (i = 0; i < n; i++) {
4098 var36.i = var33.i + var34.i;
4100 var35 = ORC_CLAMP_UB(var36.i);
4109 _backup_orc_offsetconvert_u8_s16 (OrcExecutor * ORC_RESTRICT ex)
4113 orc_int8 * ORC_RESTRICT ptr0;
4114 const orc_union16 * ORC_RESTRICT ptr4;
4120 ptr0 = (orc_int8 *)ex->arrays[0];
4121 ptr4 = (orc_union16 *)ex->arrays[4];
4124 var34.i = 0x00000080; /* 128 or 6.32404e-322f */
4126 for (i = 0; i < n; i++) {
4130 var36.i = var33.i + var34.i;
4132 var35 = ORC_CLAMP_UB(var36.i);
4140 orc_offsetconvert_u8_s16 (orc_uint8 * d1, const int16_t * s1, int n)
4142 OrcExecutor _ex, *ex = &_ex;
4143 static int p_inited = 0;
4144 static OrcProgram *p = 0;
4145 void (*func) (OrcExecutor *);
4148 orc_once_mutex_lock ();
4150 OrcCompileResult result;
4152 p = orc_program_new ();
4153 orc_program_set_name (p, "orc_offsetconvert_u8_s16");
4154 orc_program_set_backup_function (p, _backup_orc_offsetconvert_u8_s16);
4155 orc_program_add_destination (p, 1, "d1");
4156 orc_program_add_source (p, 2, "s1");
4157 orc_program_add_constant (p, 4, 0x00000080, "c1");
4158 orc_program_add_temporary (p, 2, "t1");
4160 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
4161 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
4163 result = orc_program_compile (p);
4166 orc_once_mutex_unlock ();
4171 ex->arrays[ORC_VAR_D1] = d1;
4172 ex->arrays[ORC_VAR_S1] = (void *)s1;
4174 func = p->code_exec;
4180 /* orc_offsetconvert_s16_u8 */
4183 orc_offsetconvert_s16_u8 (int16_t * d1, const orc_uint8 * s1, int n){
4185 orc_union16 * ORC_RESTRICT ptr0;
4186 const orc_int8 * ORC_RESTRICT ptr4;
4192 ptr0 = (orc_union16 *)d1;
4193 ptr4 = (orc_int8 *)s1;
4196 var34.i = 0x00000080; /* 128 or 6.32404e-322f */
4198 for (i = 0; i < n; i++) {
4202 var36.i = (orc_uint8)var33;
4204 var35.i = var36.i - var34.i;
4213 _backup_orc_offsetconvert_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
4217 orc_union16 * ORC_RESTRICT ptr0;
4218 const orc_int8 * ORC_RESTRICT ptr4;
4224 ptr0 = (orc_union16 *)ex->arrays[0];
4225 ptr4 = (orc_int8 *)ex->arrays[4];
4228 var34.i = 0x00000080; /* 128 or 6.32404e-322f */
4230 for (i = 0; i < n; i++) {
4234 var36.i = (orc_uint8)var33;
4236 var35.i = var36.i - var34.i;
4244 orc_offsetconvert_s16_u8 (int16_t * d1, const orc_uint8 * s1, int n)
4246 OrcExecutor _ex, *ex = &_ex;
4247 static int p_inited = 0;
4248 static OrcProgram *p = 0;
4249 void (*func) (OrcExecutor *);
4252 orc_once_mutex_lock ();
4254 OrcCompileResult result;
4256 p = orc_program_new ();
4257 orc_program_set_name (p, "orc_offsetconvert_s16_u8");
4258 orc_program_set_backup_function (p, _backup_orc_offsetconvert_s16_u8);
4259 orc_program_add_destination (p, 2, "d1");
4260 orc_program_add_source (p, 1, "s1");
4261 orc_program_add_constant (p, 4, 0x00000080, "c1");
4262 orc_program_add_temporary (p, 2, "t1");
4264 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
4265 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
4267 result = orc_program_compile (p);
4270 orc_once_mutex_unlock ();
4275 ex->arrays[ORC_VAR_D1] = d1;
4276 ex->arrays[ORC_VAR_S1] = (void *)s1;
4278 func = p->code_exec;
4284 /* orc_subtract_s16_u8 */
4287 orc_subtract_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n){
4289 orc_union16 * ORC_RESTRICT ptr0;
4290 const orc_union16 * ORC_RESTRICT ptr4;
4291 const orc_int8 * ORC_RESTRICT ptr5;
4297 ptr0 = (orc_union16 *)d1;
4298 ptr4 = (orc_union16 *)s1;
4299 ptr5 = (orc_int8 *)s2;
4302 for (i = 0; i < n; i++) {
4306 var36.i = (orc_uint8)var33;
4310 var35.i = var34.i - var36.i;
4319 _backup_orc_subtract_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
4323 orc_union16 * ORC_RESTRICT ptr0;
4324 const orc_union16 * ORC_RESTRICT ptr4;
4325 const orc_int8 * ORC_RESTRICT ptr5;
4331 ptr0 = (orc_union16 *)ex->arrays[0];
4332 ptr4 = (orc_union16 *)ex->arrays[4];
4333 ptr5 = (orc_int8 *)ex->arrays[5];
4336 for (i = 0; i < n; i++) {
4340 var36.i = (orc_uint8)var33;
4344 var35.i = var34.i - var36.i;
4352 orc_subtract_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n)
4354 OrcExecutor _ex, *ex = &_ex;
4355 static int p_inited = 0;
4356 static OrcProgram *p = 0;
4357 void (*func) (OrcExecutor *);
4360 orc_once_mutex_lock ();
4362 OrcCompileResult result;
4364 p = orc_program_new ();
4365 orc_program_set_name (p, "orc_subtract_s16_u8");
4366 orc_program_set_backup_function (p, _backup_orc_subtract_s16_u8);
4367 orc_program_add_destination (p, 2, "d1");
4368 orc_program_add_source (p, 2, "s1");
4369 orc_program_add_source (p, 1, "s2");
4370 orc_program_add_temporary (p, 2, "t1");
4372 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
4373 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
4375 result = orc_program_compile (p);
4378 orc_once_mutex_unlock ();
4383 ex->arrays[ORC_VAR_D1] = d1;
4384 ex->arrays[ORC_VAR_S1] = (void *)s1;
4385 ex->arrays[ORC_VAR_S2] = (void *)s2;
4387 func = p->code_exec;
4393 /* orc_multiply_and_add_s16_u8 */
4396 orc_multiply_and_add_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n){
4398 orc_union16 * ORC_RESTRICT ptr0;
4399 const orc_union16 * ORC_RESTRICT ptr4;
4400 const orc_int8 * ORC_RESTRICT ptr5;
4408 ptr0 = (orc_union16 *)d1;
4409 ptr4 = (orc_union16 *)s1;
4410 ptr5 = (orc_int8 *)s2;
4413 for (i = 0; i < n; i++) {
4417 var37.i = (orc_uint8)var33;
4421 var38.i = (var37.i * var34.i) & 0xffff;
4425 var36.i = var35.i + var38.i;
4434 _backup_orc_multiply_and_add_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
4438 orc_union16 * ORC_RESTRICT ptr0;
4439 const orc_union16 * ORC_RESTRICT ptr4;
4440 const orc_int8 * ORC_RESTRICT ptr5;
4448 ptr0 = (orc_union16 *)ex->arrays[0];
4449 ptr4 = (orc_union16 *)ex->arrays[4];
4450 ptr5 = (orc_int8 *)ex->arrays[5];
4453 for (i = 0; i < n; i++) {
4457 var37.i = (orc_uint8)var33;
4461 var38.i = (var37.i * var34.i) & 0xffff;
4465 var36.i = var35.i + var38.i;
4473 orc_multiply_and_add_s16_u8 (int16_t * d1, const int16_t * s1, const orc_uint8 * s2, int n)
4475 OrcExecutor _ex, *ex = &_ex;
4476 static int p_inited = 0;
4477 static OrcProgram *p = 0;
4478 void (*func) (OrcExecutor *);
4481 orc_once_mutex_lock ();
4483 OrcCompileResult result;
4485 p = orc_program_new ();
4486 orc_program_set_name (p, "orc_multiply_and_add_s16_u8");
4487 orc_program_set_backup_function (p, _backup_orc_multiply_and_add_s16_u8);
4488 orc_program_add_destination (p, 2, "d1");
4489 orc_program_add_source (p, 2, "s1");
4490 orc_program_add_source (p, 1, "s2");
4491 orc_program_add_temporary (p, 2, "t1");
4493 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
4494 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1);
4495 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
4497 result = orc_program_compile (p);
4500 orc_once_mutex_unlock ();
4505 ex->arrays[ORC_VAR_D1] = d1;
4506 ex->arrays[ORC_VAR_S1] = (void *)s1;
4507 ex->arrays[ORC_VAR_S2] = (void *)s2;
4509 func = p->code_exec;
4515 /* orc_splat_s16_ns */
4518 orc_splat_s16_ns (int16_t * d1, int p1, int n){
4520 orc_union16 * ORC_RESTRICT ptr0;
4524 ptr0 = (orc_union16 *)d1;
4529 for (i = 0; i < n; i++) {
4540 _backup_orc_splat_s16_ns (OrcExecutor * ORC_RESTRICT ex)
4544 orc_union16 * ORC_RESTRICT ptr0;
4548 ptr0 = (orc_union16 *)ex->arrays[0];
4551 var32.i = ex->params[24];
4553 for (i = 0; i < n; i++) {
4563 orc_splat_s16_ns (int16_t * d1, int p1, int n)
4565 OrcExecutor _ex, *ex = &_ex;
4566 static int p_inited = 0;
4567 static OrcProgram *p = 0;
4568 void (*func) (OrcExecutor *);
4571 orc_once_mutex_lock ();
4573 OrcCompileResult result;
4575 p = orc_program_new ();
4576 orc_program_set_name (p, "orc_splat_s16_ns");
4577 orc_program_set_backup_function (p, _backup_orc_splat_s16_ns);
4578 orc_program_add_destination (p, 2, "d1");
4579 orc_program_add_parameter (p, 2, "p1");
4581 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
4583 result = orc_program_compile (p);
4586 orc_once_mutex_unlock ();
4591 ex->arrays[ORC_VAR_D1] = d1;
4592 ex->params[ORC_VAR_P1] = p1;
4594 func = p->code_exec;
4600 /* orc_splat_s16_2d_4xn */
4603 orc_splat_s16_2d_4xn (int16_t * d1, int d1_stride, int p1, int m){
4607 orc_union16 * ORC_RESTRICT ptr0;
4611 for (j = 0; j < m; j++) {
4612 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
4617 for (i = 0; i < n; i++) {
4629 _backup_orc_splat_s16_2d_4xn (OrcExecutor * ORC_RESTRICT ex)
4634 int m = ex->params[ORC_VAR_A1];
4635 orc_union16 * ORC_RESTRICT ptr0;
4639 for (j = 0; j < m; j++) {
4640 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
4643 var32.i = ex->params[24];
4645 for (i = 0; i < n; i++) {
4656 orc_splat_s16_2d_4xn (int16_t * d1, int d1_stride, int p1, int m)
4658 OrcExecutor _ex, *ex = &_ex;
4659 static int p_inited = 0;
4660 static OrcProgram *p = 0;
4661 void (*func) (OrcExecutor *);
4664 orc_once_mutex_lock ();
4666 OrcCompileResult result;
4668 p = orc_program_new ();
4669 orc_program_set_constant_n (p, 4);
4670 orc_program_set_2d (p);
4671 orc_program_set_name (p, "orc_splat_s16_2d_4xn");
4672 orc_program_set_backup_function (p, _backup_orc_splat_s16_2d_4xn);
4673 orc_program_add_destination (p, 2, "d1");
4674 orc_program_add_parameter (p, 2, "p1");
4676 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
4678 result = orc_program_compile (p);
4681 orc_once_mutex_unlock ();
4686 ORC_EXECUTOR_M(ex) = m;
4687 ex->arrays[ORC_VAR_D1] = d1;
4688 ex->params[ORC_VAR_D1] = d1_stride;
4689 ex->params[ORC_VAR_P1] = p1;
4691 func = p->code_exec;
4697 /* orc_splat_s16_2d_8xn */
4700 orc_splat_s16_2d_8xn (int16_t * d1, int d1_stride, int p1, int m){
4704 orc_union16 * ORC_RESTRICT ptr0;
4708 for (j = 0; j < m; j++) {
4709 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
4714 for (i = 0; i < n; i++) {
4726 _backup_orc_splat_s16_2d_8xn (OrcExecutor * ORC_RESTRICT ex)
4731 int m = ex->params[ORC_VAR_A1];
4732 orc_union16 * ORC_RESTRICT ptr0;
4736 for (j = 0; j < m; j++) {
4737 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
4740 var32.i = ex->params[24];
4742 for (i = 0; i < n; i++) {
4753 orc_splat_s16_2d_8xn (int16_t * d1, int d1_stride, int p1, int m)
4755 OrcExecutor _ex, *ex = &_ex;
4756 static int p_inited = 0;
4757 static OrcProgram *p = 0;
4758 void (*func) (OrcExecutor *);
4761 orc_once_mutex_lock ();
4763 OrcCompileResult result;
4765 p = orc_program_new ();
4766 orc_program_set_constant_n (p, 8);
4767 orc_program_set_2d (p);
4768 orc_program_set_name (p, "orc_splat_s16_2d_8xn");
4769 orc_program_set_backup_function (p, _backup_orc_splat_s16_2d_8xn);
4770 orc_program_add_destination (p, 2, "d1");
4771 orc_program_add_parameter (p, 2, "p1");
4773 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
4775 result = orc_program_compile (p);
4778 orc_once_mutex_unlock ();
4783 ORC_EXECUTOR_M(ex) = m;
4784 ex->arrays[ORC_VAR_D1] = d1;
4785 ex->params[ORC_VAR_D1] = d1_stride;
4786 ex->params[ORC_VAR_P1] = p1;
4788 func = p->code_exec;
4794 /* orc_splat_s16_2d */
4797 orc_splat_s16_2d (int16_t * d1, int d1_stride, int p1, int n, int m){
4800 orc_union16 * ORC_RESTRICT ptr0;
4804 for (j = 0; j < m; j++) {
4805 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
4810 for (i = 0; i < n; i++) {
4822 _backup_orc_splat_s16_2d (OrcExecutor * ORC_RESTRICT ex)
4827 int m = ex->params[ORC_VAR_A1];
4828 orc_union16 * ORC_RESTRICT ptr0;
4832 for (j = 0; j < m; j++) {
4833 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
4836 var32.i = ex->params[24];
4838 for (i = 0; i < n; i++) {
4849 orc_splat_s16_2d (int16_t * d1, int d1_stride, int p1, int n, int m)
4851 OrcExecutor _ex, *ex = &_ex;
4852 static int p_inited = 0;
4853 static OrcProgram *p = 0;
4854 void (*func) (OrcExecutor *);
4857 orc_once_mutex_lock ();
4859 OrcCompileResult result;
4861 p = orc_program_new ();
4862 orc_program_set_2d (p);
4863 orc_program_set_name (p, "orc_splat_s16_2d");
4864 orc_program_set_backup_function (p, _backup_orc_splat_s16_2d);
4865 orc_program_add_destination (p, 2, "d1");
4866 orc_program_add_parameter (p, 2, "p1");
4868 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
4870 result = orc_program_compile (p);
4873 orc_once_mutex_unlock ();
4878 ORC_EXECUTOR_M(ex) = m;
4879 ex->arrays[ORC_VAR_D1] = d1;
4880 ex->params[ORC_VAR_D1] = d1_stride;
4881 ex->params[ORC_VAR_P1] = p1;
4883 func = p->code_exec;
4889 /* orc_splat_u8_ns */
4892 orc_splat_u8_ns (orc_uint8 * d1, int p1, int n){
4894 orc_int8 * ORC_RESTRICT ptr0;
4898 ptr0 = (orc_int8 *)d1;
4903 for (i = 0; i < n; i++) {
4914 _backup_orc_splat_u8_ns (OrcExecutor * ORC_RESTRICT ex)
4918 orc_int8 * ORC_RESTRICT ptr0;
4922 ptr0 = (orc_int8 *)ex->arrays[0];
4925 var32 = ex->params[24];
4927 for (i = 0; i < n; i++) {
4937 orc_splat_u8_ns (orc_uint8 * d1, int p1, int n)
4939 OrcExecutor _ex, *ex = &_ex;
4940 static int p_inited = 0;
4941 static OrcProgram *p = 0;
4942 void (*func) (OrcExecutor *);
4945 orc_once_mutex_lock ();
4947 OrcCompileResult result;
4949 p = orc_program_new ();
4950 orc_program_set_name (p, "orc_splat_u8_ns");
4951 orc_program_set_backup_function (p, _backup_orc_splat_u8_ns);
4952 orc_program_add_destination (p, 1, "d1");
4953 orc_program_add_parameter (p, 1, "p1");
4955 orc_program_append_2 (p, "copyb", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
4957 result = orc_program_compile (p);
4960 orc_once_mutex_unlock ();
4965 ex->arrays[ORC_VAR_D1] = d1;
4966 ex->params[ORC_VAR_P1] = p1;
4968 func = p->code_exec;
4974 /* orc_splat_u8_2d */
4977 orc_splat_u8_2d (orc_uint8 * d1, int d1_stride, int p1, int n, int m){
4980 orc_int8 * ORC_RESTRICT ptr0;
4984 for (j = 0; j < m; j++) {
4985 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
4990 for (i = 0; i < n; i++) {
5002 _backup_orc_splat_u8_2d (OrcExecutor * ORC_RESTRICT ex)
5007 int m = ex->params[ORC_VAR_A1];
5008 orc_int8 * ORC_RESTRICT ptr0;
5012 for (j = 0; j < m; j++) {
5013 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
5016 var32 = ex->params[24];
5018 for (i = 0; i < n; i++) {
5029 orc_splat_u8_2d (orc_uint8 * d1, int d1_stride, int p1, int n, int m)
5031 OrcExecutor _ex, *ex = &_ex;
5032 static int p_inited = 0;
5033 static OrcProgram *p = 0;
5034 void (*func) (OrcExecutor *);
5037 orc_once_mutex_lock ();
5039 OrcCompileResult result;
5041 p = orc_program_new ();
5042 orc_program_set_2d (p);
5043 orc_program_set_name (p, "orc_splat_u8_2d");
5044 orc_program_set_backup_function (p, _backup_orc_splat_u8_2d);
5045 orc_program_add_destination (p, 1, "d1");
5046 orc_program_add_parameter (p, 1, "p1");
5048 orc_program_append_2 (p, "copyb", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
5050 result = orc_program_compile (p);
5053 orc_once_mutex_unlock ();
5058 ORC_EXECUTOR_M(ex) = m;
5059 ex->arrays[ORC_VAR_D1] = d1;
5060 ex->params[ORC_VAR_D1] = d1_stride;
5061 ex->params[ORC_VAR_P1] = p1;
5063 func = p->code_exec;
5069 /* orc_average_u8 */
5072 orc_average_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, int n){
5074 orc_int8 * ORC_RESTRICT ptr0;
5075 const orc_int8 * ORC_RESTRICT ptr4;
5076 const orc_int8 * ORC_RESTRICT ptr5;
5081 ptr0 = (orc_int8 *)d1;
5082 ptr4 = (orc_int8 *)s1;
5083 ptr5 = (orc_int8 *)s2;
5086 for (i = 0; i < n; i++) {
5092 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
5101 _backup_orc_average_u8 (OrcExecutor * ORC_RESTRICT ex)
5105 orc_int8 * ORC_RESTRICT ptr0;
5106 const orc_int8 * ORC_RESTRICT ptr4;
5107 const orc_int8 * ORC_RESTRICT ptr5;
5112 ptr0 = (orc_int8 *)ex->arrays[0];
5113 ptr4 = (orc_int8 *)ex->arrays[4];
5114 ptr5 = (orc_int8 *)ex->arrays[5];
5117 for (i = 0; i < n; i++) {
5123 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
5131 orc_average_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, int n)
5133 OrcExecutor _ex, *ex = &_ex;
5134 static int p_inited = 0;
5135 static OrcProgram *p = 0;
5136 void (*func) (OrcExecutor *);
5139 orc_once_mutex_lock ();
5141 OrcCompileResult result;
5143 p = orc_program_new ();
5144 orc_program_set_name (p, "orc_average_u8");
5145 orc_program_set_backup_function (p, _backup_orc_average_u8);
5146 orc_program_add_destination (p, 1, "d1");
5147 orc_program_add_source (p, 1, "s1");
5148 orc_program_add_source (p, 1, "s2");
5150 orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
5152 result = orc_program_compile (p);
5155 orc_once_mutex_unlock ();
5160 ex->arrays[ORC_VAR_D1] = d1;
5161 ex->arrays[ORC_VAR_S1] = (void *)s1;
5162 ex->arrays[ORC_VAR_S2] = (void *)s2;
5164 func = p->code_exec;
5170 /* orc_rrshift6_add_s16_2d */
5173 orc_rrshift6_add_s16_2d (uint8_t * d1, int d1_stride, const int16_t * s1, int s1_stride, const int16_t * s2, int s2_stride, int n, int m){
5176 orc_int8 * ORC_RESTRICT ptr0;
5177 const orc_union16 * ORC_RESTRICT ptr4;
5178 const orc_union16 * ORC_RESTRICT ptr5;
5187 for (j = 0; j < m; j++) {
5188 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
5189 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
5190 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
5193 var34.i = 0x00000020; /* 32 or 1.58101e-322f */
5195 for (i = 0; i < n; i++) {
5199 var37.i = var33.i + var34.i;
5201 var38.i = var37.i >> 6;
5205 var39.i = var35.i + var38.i;
5207 var36 = ORC_CLAMP_UB(var39.i);
5217 _backup_orc_rrshift6_add_s16_2d (OrcExecutor * ORC_RESTRICT ex)
5222 int m = ex->params[ORC_VAR_A1];
5223 orc_int8 * ORC_RESTRICT ptr0;
5224 const orc_union16 * ORC_RESTRICT ptr4;
5225 const orc_union16 * ORC_RESTRICT ptr5;
5234 for (j = 0; j < m; j++) {
5235 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
5236 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
5237 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
5240 var34.i = 0x00000020; /* 32 or 1.58101e-322f */
5242 for (i = 0; i < n; i++) {
5246 var37.i = var33.i + var34.i;
5248 var38.i = var37.i >> 6;
5252 var39.i = var35.i + var38.i;
5254 var36 = ORC_CLAMP_UB(var39.i);
5263 orc_rrshift6_add_s16_2d (uint8_t * d1, int d1_stride, const int16_t * s1, int s1_stride, const int16_t * s2, int s2_stride, int n, int m)
5265 OrcExecutor _ex, *ex = &_ex;
5266 static int p_inited = 0;
5267 static OrcProgram *p = 0;
5268 void (*func) (OrcExecutor *);
5271 orc_once_mutex_lock ();
5273 OrcCompileResult result;
5275 p = orc_program_new ();
5276 orc_program_set_2d (p);
5277 orc_program_set_name (p, "orc_rrshift6_add_s16_2d");
5278 orc_program_set_backup_function (p, _backup_orc_rrshift6_add_s16_2d);
5279 orc_program_add_destination (p, 1, "d1");
5280 orc_program_add_source (p, 2, "s1");
5281 orc_program_add_source (p, 2, "s2");
5282 orc_program_add_constant (p, 4, 0x00000020, "c1");
5283 orc_program_add_constant (p, 4, 0x00000006, "c2");
5284 orc_program_add_temporary (p, 2, "t1");
5286 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_C1, ORC_VAR_D1);
5287 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
5288 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
5289 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
5291 result = orc_program_compile (p);
5294 orc_once_mutex_unlock ();
5299 ORC_EXECUTOR_M(ex) = m;
5300 ex->arrays[ORC_VAR_D1] = d1;
5301 ex->params[ORC_VAR_D1] = d1_stride;
5302 ex->arrays[ORC_VAR_S1] = (void *)s1;
5303 ex->params[ORC_VAR_S1] = s1_stride;
5304 ex->arrays[ORC_VAR_S2] = (void *)s2;
5305 ex->params[ORC_VAR_S2] = s2_stride;
5307 func = p->code_exec;
5313 /* orc_rrshift6_sub_s16_2d */
5316 orc_rrshift6_sub_s16_2d (int16_t * d1, int d1_stride, int16_t * d2, int d2_stride, int n, int m){
5319 orc_union16 * ORC_RESTRICT ptr0;
5320 orc_union16 * ORC_RESTRICT ptr1;
5329 for (j = 0; j < m; j++) {
5330 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
5331 ptr1 = ORC_PTR_OFFSET(d2, d2_stride * j);
5334 var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */
5336 for (i = 0; i < n; i++) {
5340 var38.i = var33.i - var34.i;
5342 var39.i = var38.i >> 6;
5350 var37.i = var36.i - var39.i;
5360 _backup_orc_rrshift6_sub_s16_2d (OrcExecutor * ORC_RESTRICT ex)
5365 int m = ex->params[ORC_VAR_A1];
5366 orc_union16 * ORC_RESTRICT ptr0;
5367 orc_union16 * ORC_RESTRICT ptr1;
5376 for (j = 0; j < m; j++) {
5377 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
5378 ptr1 = ORC_PTR_OFFSET(ex->arrays[1], ex->params[1] * j);
5381 var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */
5383 for (i = 0; i < n; i++) {
5387 var38.i = var33.i - var34.i;
5389 var39.i = var38.i >> 6;
5397 var37.i = var36.i - var39.i;
5406 orc_rrshift6_sub_s16_2d (int16_t * d1, int d1_stride, int16_t * d2, int d2_stride, int n, int m)
5408 OrcExecutor _ex, *ex = &_ex;
5409 static int p_inited = 0;
5410 static OrcProgram *p = 0;
5411 void (*func) (OrcExecutor *);
5414 orc_once_mutex_lock ();
5416 OrcCompileResult result;
5418 p = orc_program_new ();
5419 orc_program_set_2d (p);
5420 orc_program_set_name (p, "orc_rrshift6_sub_s16_2d");
5421 orc_program_set_backup_function (p, _backup_orc_rrshift6_sub_s16_2d);
5422 orc_program_add_destination (p, 2, "d1");
5423 orc_program_add_destination (p, 2, "d2");
5424 orc_program_add_constant (p, 4, 0x00001fe0, "c1");
5425 orc_program_add_constant (p, 4, 0x00000006, "c2");
5426 orc_program_add_temporary (p, 2, "t1");
5428 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_C1, ORC_VAR_D1);
5429 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
5430 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
5431 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
5433 result = orc_program_compile (p);
5436 orc_once_mutex_unlock ();
5441 ORC_EXECUTOR_M(ex) = m;
5442 ex->arrays[ORC_VAR_D1] = d1;
5443 ex->params[ORC_VAR_D1] = d1_stride;
5444 ex->arrays[ORC_VAR_D2] = d2;
5445 ex->params[ORC_VAR_D2] = d2_stride;
5447 func = p->code_exec;
5453 /* orc_rrshift6_s16_ip_2d */
5456 orc_rrshift6_s16_ip_2d (int16_t * d1, int d1_stride, int n, int m){
5459 orc_union16 * ORC_RESTRICT ptr0;
5465 for (j = 0; j < m; j++) {
5466 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
5469 var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */
5471 for (i = 0; i < n; i++) {
5475 var36.i = var33.i - var34.i;
5477 var35.i = var36.i >> 6;
5487 _backup_orc_rrshift6_s16_ip_2d (OrcExecutor * ORC_RESTRICT ex)
5492 int m = ex->params[ORC_VAR_A1];
5493 orc_union16 * ORC_RESTRICT ptr0;
5499 for (j = 0; j < m; j++) {
5500 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
5503 var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */
5505 for (i = 0; i < n; i++) {
5509 var36.i = var33.i - var34.i;
5511 var35.i = var36.i >> 6;
5520 orc_rrshift6_s16_ip_2d (int16_t * d1, int d1_stride, int n, int m)
5522 OrcExecutor _ex, *ex = &_ex;
5523 static int p_inited = 0;
5524 static OrcProgram *p = 0;
5525 void (*func) (OrcExecutor *);
5528 orc_once_mutex_lock ();
5530 OrcCompileResult result;
5532 p = orc_program_new ();
5533 orc_program_set_2d (p);
5534 orc_program_set_name (p, "orc_rrshift6_s16_ip_2d");
5535 orc_program_set_backup_function (p, _backup_orc_rrshift6_s16_ip_2d);
5536 orc_program_add_destination (p, 2, "d1");
5537 orc_program_add_constant (p, 4, 0x00001fe0, "c1");
5538 orc_program_add_constant (p, 4, 0x00000006, "c2");
5539 orc_program_add_temporary (p, 2, "t1");
5541 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1);
5542 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
5544 result = orc_program_compile (p);
5547 orc_once_mutex_unlock ();
5552 ORC_EXECUTOR_M(ex) = m;
5553 ex->arrays[ORC_VAR_D1] = d1;
5554 ex->params[ORC_VAR_D1] = d1_stride;
5556 func = p->code_exec;
5562 /* orc_rrshift6_s16_ip */
5565 orc_rrshift6_s16_ip (int16_t * d1, int n){
5567 orc_union16 * ORC_RESTRICT ptr0;
5573 ptr0 = (orc_union16 *)d1;
5576 var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */
5578 for (i = 0; i < n; i++) {
5582 var36.i = var33.i - var34.i;
5584 var35.i = var36.i >> 6;
5593 _backup_orc_rrshift6_s16_ip (OrcExecutor * ORC_RESTRICT ex)
5597 orc_union16 * ORC_RESTRICT ptr0;
5603 ptr0 = (orc_union16 *)ex->arrays[0];
5606 var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */
5608 for (i = 0; i < n; i++) {
5612 var36.i = var33.i - var34.i;
5614 var35.i = var36.i >> 6;
5622 orc_rrshift6_s16_ip (int16_t * d1, int n)
5624 OrcExecutor _ex, *ex = &_ex;
5625 static int p_inited = 0;
5626 static OrcProgram *p = 0;
5627 void (*func) (OrcExecutor *);
5630 orc_once_mutex_lock ();
5632 OrcCompileResult result;
5634 p = orc_program_new ();
5635 orc_program_set_name (p, "orc_rrshift6_s16_ip");
5636 orc_program_set_backup_function (p, _backup_orc_rrshift6_s16_ip);
5637 orc_program_add_destination (p, 2, "d1");
5638 orc_program_add_constant (p, 4, 0x00001fe0, "c1");
5639 orc_program_add_constant (p, 4, 0x00000006, "c2");
5640 orc_program_add_temporary (p, 2, "t1");
5642 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1);
5643 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
5645 result = orc_program_compile (p);
5648 orc_once_mutex_unlock ();
5653 ex->arrays[ORC_VAR_D1] = d1;
5655 func = p->code_exec;
5661 /* orc_unpack_yuyv_y */
5664 orc_unpack_yuyv_y (orc_uint8 * d1, const orc_uint16 * s1, int n){
5666 orc_int8 * ORC_RESTRICT ptr0;
5667 const orc_union16 * ORC_RESTRICT ptr4;
5671 ptr0 = (orc_int8 *)d1;
5672 ptr4 = (orc_union16 *)s1;
5675 for (i = 0; i < n; i++) {
5679 var33 = (orc_uint16)var32.i & 0xff;
5688 _backup_orc_unpack_yuyv_y (OrcExecutor * ORC_RESTRICT ex)
5692 orc_int8 * ORC_RESTRICT ptr0;
5693 const orc_union16 * ORC_RESTRICT ptr4;
5697 ptr0 = (orc_int8 *)ex->arrays[0];
5698 ptr4 = (orc_union16 *)ex->arrays[4];
5701 for (i = 0; i < n; i++) {
5705 var33 = (orc_uint16)var32.i & 0xff;
5713 orc_unpack_yuyv_y (orc_uint8 * d1, const orc_uint16 * s1, int n)
5715 OrcExecutor _ex, *ex = &_ex;
5716 static int p_inited = 0;
5717 static OrcProgram *p = 0;
5718 void (*func) (OrcExecutor *);
5721 orc_once_mutex_lock ();
5723 OrcCompileResult result;
5725 p = orc_program_new ();
5726 orc_program_set_name (p, "orc_unpack_yuyv_y");
5727 orc_program_set_backup_function (p, _backup_orc_unpack_yuyv_y);
5728 orc_program_add_destination (p, 1, "d1");
5729 orc_program_add_source (p, 2, "s1");
5731 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
5733 result = orc_program_compile (p);
5736 orc_once_mutex_unlock ();
5741 ex->arrays[ORC_VAR_D1] = d1;
5742 ex->arrays[ORC_VAR_S1] = (void *)s1;
5744 func = p->code_exec;
5750 /* orc_unpack_yuyv_u */
5753 orc_unpack_yuyv_u (orc_uint8 * d1, const orc_uint32 * s1, int n){
5755 orc_int8 * ORC_RESTRICT ptr0;
5756 const orc_union32 * ORC_RESTRICT ptr4;
5761 ptr0 = (orc_int8 *)d1;
5762 ptr4 = (orc_union32 *)s1;
5765 for (i = 0; i < n; i++) {
5769 var35.i = (orc_uint32)var33.i & 0xffff;
5771 var34 = ((orc_uint16)var35.i >> 8)&0xff;
5780 _backup_orc_unpack_yuyv_u (OrcExecutor * ORC_RESTRICT ex)
5784 orc_int8 * ORC_RESTRICT ptr0;
5785 const orc_union32 * ORC_RESTRICT ptr4;
5790 ptr0 = (orc_int8 *)ex->arrays[0];
5791 ptr4 = (orc_union32 *)ex->arrays[4];
5794 for (i = 0; i < n; i++) {
5798 var35.i = (orc_uint32)var33.i & 0xffff;
5800 var34 = ((orc_uint16)var35.i >> 8)&0xff;
5808 orc_unpack_yuyv_u (orc_uint8 * d1, const orc_uint32 * s1, int n)
5810 OrcExecutor _ex, *ex = &_ex;
5811 static int p_inited = 0;
5812 static OrcProgram *p = 0;
5813 void (*func) (OrcExecutor *);
5816 orc_once_mutex_lock ();
5818 OrcCompileResult result;
5820 p = orc_program_new ();
5821 orc_program_set_name (p, "orc_unpack_yuyv_u");
5822 orc_program_set_backup_function (p, _backup_orc_unpack_yuyv_u);
5823 orc_program_add_destination (p, 1, "d1");
5824 orc_program_add_source (p, 4, "s1");
5825 orc_program_add_temporary (p, 2, "t1");
5827 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
5828 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
5830 result = orc_program_compile (p);
5833 orc_once_mutex_unlock ();
5838 ex->arrays[ORC_VAR_D1] = d1;
5839 ex->arrays[ORC_VAR_S1] = (void *)s1;
5841 func = p->code_exec;
5847 /* orc_unpack_yuyv_v */
5850 orc_unpack_yuyv_v (orc_uint8 * d1, const orc_uint32 * s1, int n){
5852 orc_int8 * ORC_RESTRICT ptr0;
5853 const orc_union32 * ORC_RESTRICT ptr4;
5858 ptr0 = (orc_int8 *)d1;
5859 ptr4 = (orc_union32 *)s1;
5862 for (i = 0; i < n; i++) {
5866 var35.i = ((orc_uint32)var33.i >> 16)&0xffff;
5868 var34 = ((orc_uint16)var35.i >> 8)&0xff;
5877 _backup_orc_unpack_yuyv_v (OrcExecutor * ORC_RESTRICT ex)
5881 orc_int8 * ORC_RESTRICT ptr0;
5882 const orc_union32 * ORC_RESTRICT ptr4;
5887 ptr0 = (orc_int8 *)ex->arrays[0];
5888 ptr4 = (orc_union32 *)ex->arrays[4];
5891 for (i = 0; i < n; i++) {
5895 var35.i = ((orc_uint32)var33.i >> 16)&0xffff;
5897 var34 = ((orc_uint16)var35.i >> 8)&0xff;
5905 orc_unpack_yuyv_v (orc_uint8 * d1, const orc_uint32 * s1, int n)
5907 OrcExecutor _ex, *ex = &_ex;
5908 static int p_inited = 0;
5909 static OrcProgram *p = 0;
5910 void (*func) (OrcExecutor *);
5913 orc_once_mutex_lock ();
5915 OrcCompileResult result;
5917 p = orc_program_new ();
5918 orc_program_set_name (p, "orc_unpack_yuyv_v");
5919 orc_program_set_backup_function (p, _backup_orc_unpack_yuyv_v);
5920 orc_program_add_destination (p, 1, "d1");
5921 orc_program_add_source (p, 4, "s1");
5922 orc_program_add_temporary (p, 2, "t1");
5924 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
5925 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
5927 result = orc_program_compile (p);
5930 orc_once_mutex_unlock ();
5935 ex->arrays[ORC_VAR_D1] = d1;
5936 ex->arrays[ORC_VAR_S1] = (void *)s1;
5938 func = p->code_exec;
5947 orc_packyuyv (orc_uint32 * d1, const uint8_t * s1, const orc_uint8 * s2, const orc_uint8 * s3, int n){
5949 orc_union32 * ORC_RESTRICT ptr0;
5950 const orc_union16 * ORC_RESTRICT ptr4;
5951 const orc_int8 * ORC_RESTRICT ptr5;
5952 const orc_int8 * ORC_RESTRICT ptr6;
5963 ptr0 = (orc_union32 *)d1;
5964 ptr4 = (orc_union16 *)s1;
5965 ptr5 = (orc_int8 *)s2;
5966 ptr6 = (orc_int8 *)s3;
5969 for (i = 0; i < n; i++) {
5975 var42 = (orc_uint16)var41.i & 0xff;
5977 var43 = ((orc_uint16)var41.i >> 8)&0xff;
5981 var44.i = ((orc_uint8)var42 & 0x00ff) | ((orc_uint8)var38 << 8);
5985 var45.i = ((orc_uint8)var43 & 0x00ff) | ((orc_uint8)var39 << 8);
5987 var40.i = ((orc_uint16)var44.i & 0x0000ffff) | ((orc_uint16)var45.i << 16);
5996 _backup_orc_packyuyv (OrcExecutor * ORC_RESTRICT ex)
6000 orc_union32 * ORC_RESTRICT ptr0;
6001 const orc_union16 * ORC_RESTRICT ptr4;
6002 const orc_int8 * ORC_RESTRICT ptr5;
6003 const orc_int8 * ORC_RESTRICT ptr6;
6014 ptr0 = (orc_union32 *)ex->arrays[0];
6015 ptr4 = (orc_union16 *)ex->arrays[4];
6016 ptr5 = (orc_int8 *)ex->arrays[5];
6017 ptr6 = (orc_int8 *)ex->arrays[6];
6020 for (i = 0; i < n; i++) {
6026 var42 = (orc_uint16)var41.i & 0xff;
6028 var43 = ((orc_uint16)var41.i >> 8)&0xff;
6032 var44.i = ((orc_uint8)var42 & 0x00ff) | ((orc_uint8)var38 << 8);
6036 var45.i = ((orc_uint8)var43 & 0x00ff) | ((orc_uint8)var39 << 8);
6038 var40.i = ((orc_uint16)var44.i & 0x0000ffff) | ((orc_uint16)var45.i << 16);
6046 orc_packyuyv (orc_uint32 * d1, const uint8_t * s1, const orc_uint8 * s2, const orc_uint8 * s3, int n)
6048 OrcExecutor _ex, *ex = &_ex;
6049 static int p_inited = 0;
6050 static OrcProgram *p = 0;
6051 void (*func) (OrcExecutor *);
6054 orc_once_mutex_lock ();
6056 OrcCompileResult result;
6058 p = orc_program_new ();
6059 orc_program_set_name (p, "orc_packyuyv");
6060 orc_program_set_backup_function (p, _backup_orc_packyuyv);
6061 orc_program_add_destination (p, 4, "d1");
6062 orc_program_add_source (p, 2, "s1");
6063 orc_program_add_source (p, 1, "s2");
6064 orc_program_add_source (p, 1, "s3");
6065 orc_program_add_temporary (p, 1, "t1");
6066 orc_program_add_temporary (p, 1, "t2");
6067 orc_program_add_temporary (p, 2, "t3");
6068 orc_program_add_temporary (p, 2, "t4");
6069 orc_program_add_temporary (p, 2, "t5");
6071 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T5, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
6072 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
6073 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T2, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
6074 orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1);
6075 orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_S3, ORC_VAR_D1);
6076 orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
6078 result = orc_program_compile (p);
6081 orc_once_mutex_unlock ();
6086 ex->arrays[ORC_VAR_D1] = d1;
6087 ex->arrays[ORC_VAR_S1] = (void *)s1;
6088 ex->arrays[ORC_VAR_S2] = (void *)s2;
6089 ex->arrays[ORC_VAR_S3] = (void *)s3;
6091 func = p->code_exec;
6097 /* orc_unpack_uyvy_y */
6100 orc_unpack_uyvy_y (orc_uint8 * d1, const orc_uint16 * s1, int n){
6102 orc_int8 * ORC_RESTRICT ptr0;
6103 const orc_union16 * ORC_RESTRICT ptr4;
6107 ptr0 = (orc_int8 *)d1;
6108 ptr4 = (orc_union16 *)s1;
6111 for (i = 0; i < n; i++) {
6115 var33 = ((orc_uint16)var32.i >> 8)&0xff;
6124 _backup_orc_unpack_uyvy_y (OrcExecutor * ORC_RESTRICT ex)
6128 orc_int8 * ORC_RESTRICT ptr0;
6129 const orc_union16 * ORC_RESTRICT ptr4;
6133 ptr0 = (orc_int8 *)ex->arrays[0];
6134 ptr4 = (orc_union16 *)ex->arrays[4];
6137 for (i = 0; i < n; i++) {
6141 var33 = ((orc_uint16)var32.i >> 8)&0xff;
6149 orc_unpack_uyvy_y (orc_uint8 * d1, const orc_uint16 * s1, int n)
6151 OrcExecutor _ex, *ex = &_ex;
6152 static int p_inited = 0;
6153 static OrcProgram *p = 0;
6154 void (*func) (OrcExecutor *);
6157 orc_once_mutex_lock ();
6159 OrcCompileResult result;
6161 p = orc_program_new ();
6162 orc_program_set_name (p, "orc_unpack_uyvy_y");
6163 orc_program_set_backup_function (p, _backup_orc_unpack_uyvy_y);
6164 orc_program_add_destination (p, 1, "d1");
6165 orc_program_add_source (p, 2, "s1");
6167 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
6169 result = orc_program_compile (p);
6172 orc_once_mutex_unlock ();
6177 ex->arrays[ORC_VAR_D1] = d1;
6178 ex->arrays[ORC_VAR_S1] = (void *)s1;
6180 func = p->code_exec;
6186 /* orc_unpack_uyvy_u */
6189 orc_unpack_uyvy_u (orc_uint8 * d1, const orc_uint32 * s1, int n){
6191 orc_int8 * ORC_RESTRICT ptr0;
6192 const orc_union32 * ORC_RESTRICT ptr4;
6197 ptr0 = (orc_int8 *)d1;
6198 ptr4 = (orc_union32 *)s1;
6201 for (i = 0; i < n; i++) {
6205 var35.i = (orc_uint32)var33.i & 0xffff;
6207 var34 = (orc_uint16)var35.i & 0xff;
6216 _backup_orc_unpack_uyvy_u (OrcExecutor * ORC_RESTRICT ex)
6220 orc_int8 * ORC_RESTRICT ptr0;
6221 const orc_union32 * ORC_RESTRICT ptr4;
6226 ptr0 = (orc_int8 *)ex->arrays[0];
6227 ptr4 = (orc_union32 *)ex->arrays[4];
6230 for (i = 0; i < n; i++) {
6234 var35.i = (orc_uint32)var33.i & 0xffff;
6236 var34 = (orc_uint16)var35.i & 0xff;
6244 orc_unpack_uyvy_u (orc_uint8 * d1, const orc_uint32 * s1, int n)
6246 OrcExecutor _ex, *ex = &_ex;
6247 static int p_inited = 0;
6248 static OrcProgram *p = 0;
6249 void (*func) (OrcExecutor *);
6252 orc_once_mutex_lock ();
6254 OrcCompileResult result;
6256 p = orc_program_new ();
6257 orc_program_set_name (p, "orc_unpack_uyvy_u");
6258 orc_program_set_backup_function (p, _backup_orc_unpack_uyvy_u);
6259 orc_program_add_destination (p, 1, "d1");
6260 orc_program_add_source (p, 4, "s1");
6261 orc_program_add_temporary (p, 2, "t1");
6263 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
6264 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
6266 result = orc_program_compile (p);
6269 orc_once_mutex_unlock ();
6274 ex->arrays[ORC_VAR_D1] = d1;
6275 ex->arrays[ORC_VAR_S1] = (void *)s1;
6277 func = p->code_exec;
6283 /* orc_unpack_uyvy_v */
6286 orc_unpack_uyvy_v (orc_uint8 * d1, const orc_uint32 * s1, int n){
6288 orc_int8 * ORC_RESTRICT ptr0;
6289 const orc_union32 * ORC_RESTRICT ptr4;
6294 ptr0 = (orc_int8 *)d1;
6295 ptr4 = (orc_union32 *)s1;
6298 for (i = 0; i < n; i++) {
6302 var35.i = ((orc_uint32)var33.i >> 16)&0xffff;
6304 var34 = (orc_uint16)var35.i & 0xff;
6313 _backup_orc_unpack_uyvy_v (OrcExecutor * ORC_RESTRICT ex)
6317 orc_int8 * ORC_RESTRICT ptr0;
6318 const orc_union32 * ORC_RESTRICT ptr4;
6323 ptr0 = (orc_int8 *)ex->arrays[0];
6324 ptr4 = (orc_union32 *)ex->arrays[4];
6327 for (i = 0; i < n; i++) {
6331 var35.i = ((orc_uint32)var33.i >> 16)&0xffff;
6333 var34 = (orc_uint16)var35.i & 0xff;
6341 orc_unpack_uyvy_v (orc_uint8 * d1, const orc_uint32 * s1, int n)
6343 OrcExecutor _ex, *ex = &_ex;
6344 static int p_inited = 0;
6345 static OrcProgram *p = 0;
6346 void (*func) (OrcExecutor *);
6349 orc_once_mutex_lock ();
6351 OrcCompileResult result;
6353 p = orc_program_new ();
6354 orc_program_set_name (p, "orc_unpack_uyvy_v");
6355 orc_program_set_backup_function (p, _backup_orc_unpack_uyvy_v);
6356 orc_program_add_destination (p, 1, "d1");
6357 orc_program_add_source (p, 4, "s1");
6358 orc_program_add_temporary (p, 2, "t1");
6360 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
6361 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
6363 result = orc_program_compile (p);
6366 orc_once_mutex_unlock ();
6371 ex->arrays[ORC_VAR_D1] = d1;
6372 ex->arrays[ORC_VAR_S1] = (void *)s1;
6374 func = p->code_exec;
6380 /* orc_interleave2_s16 */
6383 orc_interleave2_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
6385 orc_union32 * ORC_RESTRICT ptr0;
6386 const orc_union16 * ORC_RESTRICT ptr4;
6387 const orc_union16 * ORC_RESTRICT ptr5;
6392 ptr0 = (orc_union32 *)d1;
6393 ptr4 = (orc_union16 *)s1;
6394 ptr5 = (orc_union16 *)s2;
6397 for (i = 0; i < n; i++) {
6403 var34.i = ((orc_uint16)var32.i & 0x0000ffff) | ((orc_uint16)var33.i << 16);
6412 _backup_orc_interleave2_s16 (OrcExecutor * ORC_RESTRICT ex)
6416 orc_union32 * ORC_RESTRICT ptr0;
6417 const orc_union16 * ORC_RESTRICT ptr4;
6418 const orc_union16 * ORC_RESTRICT ptr5;
6423 ptr0 = (orc_union32 *)ex->arrays[0];
6424 ptr4 = (orc_union16 *)ex->arrays[4];
6425 ptr5 = (orc_union16 *)ex->arrays[5];
6428 for (i = 0; i < n; i++) {
6434 var34.i = ((orc_uint16)var32.i & 0x0000ffff) | ((orc_uint16)var33.i << 16);
6442 orc_interleave2_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
6444 OrcExecutor _ex, *ex = &_ex;
6445 static int p_inited = 0;
6446 static OrcProgram *p = 0;
6447 void (*func) (OrcExecutor *);
6450 orc_once_mutex_lock ();
6452 OrcCompileResult result;
6454 p = orc_program_new ();
6455 orc_program_set_name (p, "orc_interleave2_s16");
6456 orc_program_set_backup_function (p, _backup_orc_interleave2_s16);
6457 orc_program_add_destination (p, 4, "d1");
6458 orc_program_add_source (p, 2, "s1");
6459 orc_program_add_source (p, 2, "s2");
6461 orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
6463 result = orc_program_compile (p);
6466 orc_once_mutex_unlock ();
6471 ex->arrays[ORC_VAR_D1] = d1;
6472 ex->arrays[ORC_VAR_S1] = (void *)s1;
6473 ex->arrays[ORC_VAR_S2] = (void *)s2;
6475 func = p->code_exec;
6481 /* orc_interleave2_rrshift1_s16 */
6484 orc_interleave2_rrshift1_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
6486 orc_union32 * ORC_RESTRICT ptr0;
6487 const orc_union16 * ORC_RESTRICT ptr4;
6488 const orc_union16 * ORC_RESTRICT ptr5;
6499 ptr0 = (orc_union32 *)d1;
6500 ptr4 = (orc_union16 *)s1;
6501 ptr5 = (orc_union16 *)s2;
6504 var35.i = 0x00000001; /* 1 or 4.94066e-324f */
6506 var37.i = 0x00000001; /* 1 or 4.94066e-324f */
6508 for (i = 0; i < n; i++) {
6512 var39.i = var34.i + var35.i;
6514 var40.i = var39.i >> 1;
6518 var41.i = var36.i + var37.i;
6520 var42.i = var41.i >> 1;
6522 var38.i = ((orc_uint16)var40.i & 0x0000ffff) | ((orc_uint16)var42.i << 16);
6531 _backup_orc_interleave2_rrshift1_s16 (OrcExecutor * ORC_RESTRICT ex)
6535 orc_union32 * ORC_RESTRICT ptr0;
6536 const orc_union16 * ORC_RESTRICT ptr4;
6537 const orc_union16 * ORC_RESTRICT ptr5;
6548 ptr0 = (orc_union32 *)ex->arrays[0];
6549 ptr4 = (orc_union16 *)ex->arrays[4];
6550 ptr5 = (orc_union16 *)ex->arrays[5];
6553 var35.i = 0x00000001; /* 1 or 4.94066e-324f */
6555 var37.i = 0x00000001; /* 1 or 4.94066e-324f */
6557 for (i = 0; i < n; i++) {
6561 var39.i = var34.i + var35.i;
6563 var40.i = var39.i >> 1;
6567 var41.i = var36.i + var37.i;
6569 var42.i = var41.i >> 1;
6571 var38.i = ((orc_uint16)var40.i & 0x0000ffff) | ((orc_uint16)var42.i << 16);
6579 orc_interleave2_rrshift1_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
6581 OrcExecutor _ex, *ex = &_ex;
6582 static int p_inited = 0;
6583 static OrcProgram *p = 0;
6584 void (*func) (OrcExecutor *);
6587 orc_once_mutex_lock ();
6589 OrcCompileResult result;
6591 p = orc_program_new ();
6592 orc_program_set_name (p, "orc_interleave2_rrshift1_s16");
6593 orc_program_set_backup_function (p, _backup_orc_interleave2_rrshift1_s16);
6594 orc_program_add_destination (p, 4, "d1");
6595 orc_program_add_source (p, 2, "s1");
6596 orc_program_add_source (p, 2, "s2");
6597 orc_program_add_constant (p, 4, 0x00000001, "c1");
6598 orc_program_add_temporary (p, 2, "t1");
6599 orc_program_add_temporary (p, 2, "t2");
6601 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
6602 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
6603 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_C1, ORC_VAR_D1);
6604 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
6605 orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
6607 result = orc_program_compile (p);
6610 orc_once_mutex_unlock ();
6615 ex->arrays[ORC_VAR_D1] = d1;
6616 ex->arrays[ORC_VAR_S1] = (void *)s1;
6617 ex->arrays[ORC_VAR_S2] = (void *)s2;
6619 func = p->code_exec;
6625 /* orc_deinterleave2_s16 */
6628 orc_deinterleave2_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n){
6630 orc_union16 * ORC_RESTRICT ptr0;
6631 orc_union16 * ORC_RESTRICT ptr1;
6632 const orc_union32 * ORC_RESTRICT ptr4;
6638 ptr0 = (orc_union16 *)d1;
6639 ptr1 = (orc_union16 *)d2;
6640 ptr4 = (orc_union32 *)s1;
6643 for (i = 0; i < n; i++) {
6649 var34.i = (orc_uint32)var36.i & 0xffff;
6653 var35.i = ((orc_uint32)var36.i >> 16)&0xffff;
6662 _backup_orc_deinterleave2_s16 (OrcExecutor * ORC_RESTRICT ex)
6666 orc_union16 * ORC_RESTRICT ptr0;
6667 orc_union16 * ORC_RESTRICT ptr1;
6668 const orc_union32 * ORC_RESTRICT ptr4;
6674 ptr0 = (orc_union16 *)ex->arrays[0];
6675 ptr1 = (orc_union16 *)ex->arrays[1];
6676 ptr4 = (orc_union32 *)ex->arrays[4];
6679 for (i = 0; i < n; i++) {
6685 var34.i = (orc_uint32)var36.i & 0xffff;
6689 var35.i = ((orc_uint32)var36.i >> 16)&0xffff;
6697 orc_deinterleave2_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n)
6699 OrcExecutor _ex, *ex = &_ex;
6700 static int p_inited = 0;
6701 static OrcProgram *p = 0;
6702 void (*func) (OrcExecutor *);
6705 orc_once_mutex_lock ();
6707 OrcCompileResult result;
6709 p = orc_program_new ();
6710 orc_program_set_name (p, "orc_deinterleave2_s16");
6711 orc_program_set_backup_function (p, _backup_orc_deinterleave2_s16);
6712 orc_program_add_destination (p, 2, "d1");
6713 orc_program_add_destination (p, 2, "d2");
6714 orc_program_add_source (p, 4, "s1");
6715 orc_program_add_temporary (p, 4, "t1");
6717 orc_program_append_2 (p, "copyl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
6718 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
6719 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
6721 result = orc_program_compile (p);
6724 orc_once_mutex_unlock ();
6729 ex->arrays[ORC_VAR_D1] = d1;
6730 ex->arrays[ORC_VAR_D2] = d2;
6731 ex->arrays[ORC_VAR_S1] = (void *)s1;
6733 func = p->code_exec;
6739 /* orc_deinterleave2_lshift1_s16 */
6742 orc_deinterleave2_lshift1_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n){
6744 orc_union16 * ORC_RESTRICT ptr0;
6745 orc_union16 * ORC_RESTRICT ptr1;
6746 const orc_union32 * ORC_RESTRICT ptr4;
6754 ptr0 = (orc_union16 *)d1;
6755 ptr1 = (orc_union16 *)d2;
6756 ptr4 = (orc_union32 *)s1;
6759 for (i = 0; i < n; i++) {
6765 var39.i = (orc_uint32)var38.i & 0xffff;
6767 var36.i = var39.i << 1;
6771 var40.i = ((orc_uint32)var38.i >> 16)&0xffff;
6773 var37.i = var40.i << 1;
6782 _backup_orc_deinterleave2_lshift1_s16 (OrcExecutor * ORC_RESTRICT ex)
6786 orc_union16 * ORC_RESTRICT ptr0;
6787 orc_union16 * ORC_RESTRICT ptr1;
6788 const orc_union32 * ORC_RESTRICT ptr4;
6796 ptr0 = (orc_union16 *)ex->arrays[0];
6797 ptr1 = (orc_union16 *)ex->arrays[1];
6798 ptr4 = (orc_union32 *)ex->arrays[4];
6801 for (i = 0; i < n; i++) {
6807 var39.i = (orc_uint32)var38.i & 0xffff;
6809 var36.i = var39.i << 1;
6813 var40.i = ((orc_uint32)var38.i >> 16)&0xffff;
6815 var37.i = var40.i << 1;
6823 orc_deinterleave2_lshift1_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n)
6825 OrcExecutor _ex, *ex = &_ex;
6826 static int p_inited = 0;
6827 static OrcProgram *p = 0;
6828 void (*func) (OrcExecutor *);
6831 orc_once_mutex_lock ();
6833 OrcCompileResult result;
6835 p = orc_program_new ();
6836 orc_program_set_name (p, "orc_deinterleave2_lshift1_s16");
6837 orc_program_set_backup_function (p, _backup_orc_deinterleave2_lshift1_s16);
6838 orc_program_add_destination (p, 2, "d1");
6839 orc_program_add_destination (p, 2, "d2");
6840 orc_program_add_source (p, 4, "s1");
6841 orc_program_add_constant (p, 4, 0x00000001, "c1");
6842 orc_program_add_temporary (p, 4, "t1");
6843 orc_program_add_temporary (p, 2, "t2");
6844 orc_program_add_temporary (p, 2, "t3");
6846 orc_program_append_2 (p, "copyl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
6847 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
6848 orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
6849 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
6850 orc_program_append_2 (p, "shlw", 0, ORC_VAR_D2, ORC_VAR_T3, ORC_VAR_C1, ORC_VAR_D1);
6852 result = orc_program_compile (p);
6855 orc_once_mutex_unlock ();
6860 ex->arrays[ORC_VAR_D1] = d1;
6861 ex->arrays[ORC_VAR_D2] = d2;
6862 ex->arrays[ORC_VAR_S1] = (void *)s1;
6864 func = p->code_exec;
6870 /* orc_haar_deint_lshift1_split_s16 */
6873 orc_haar_deint_lshift1_split_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n){
6875 orc_union16 * ORC_RESTRICT ptr0;
6876 orc_union16 * ORC_RESTRICT ptr1;
6877 const orc_union32 * ORC_RESTRICT ptr4;
6890 ptr0 = (orc_union16 *)d1;
6891 ptr1 = (orc_union16 *)d2;
6892 ptr4 = (orc_union32 *)s1;
6895 var37.i = 0x00000000; /* 0 or 0f */
6897 for (i = 0; i < n; i++) {
6903 var40.i = (orc_uint32)var39.i & 0xffff;
6905 var41.i = ((orc_uint32)var39.i >> 16)&0xffff;
6907 var42.i = var40.i << 1;
6909 var43.i = var41.i << 1;
6911 var44.i = var43.i - var42.i;
6917 var45.i = (var44.i + var37.i + 1)>>1;
6919 var38.i = var42.i + var45.i;
6928 _backup_orc_haar_deint_lshift1_split_s16 (OrcExecutor * ORC_RESTRICT ex)
6932 orc_union16 * ORC_RESTRICT ptr0;
6933 orc_union16 * ORC_RESTRICT ptr1;
6934 const orc_union32 * ORC_RESTRICT ptr4;
6947 ptr0 = (orc_union16 *)ex->arrays[0];
6948 ptr1 = (orc_union16 *)ex->arrays[1];
6949 ptr4 = (orc_union32 *)ex->arrays[4];
6952 var37.i = 0x00000000; /* 0 or 0f */
6954 for (i = 0; i < n; i++) {
6960 var40.i = (orc_uint32)var39.i & 0xffff;
6962 var41.i = ((orc_uint32)var39.i >> 16)&0xffff;
6964 var42.i = var40.i << 1;
6966 var43.i = var41.i << 1;
6968 var44.i = var43.i - var42.i;
6974 var45.i = (var44.i + var37.i + 1)>>1;
6976 var38.i = var42.i + var45.i;
6984 orc_haar_deint_lshift1_split_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n)
6986 OrcExecutor _ex, *ex = &_ex;
6987 static int p_inited = 0;
6988 static OrcProgram *p = 0;
6989 void (*func) (OrcExecutor *);
6992 orc_once_mutex_lock ();
6994 OrcCompileResult result;
6996 p = orc_program_new ();
6997 orc_program_set_name (p, "orc_haar_deint_lshift1_split_s16");
6998 orc_program_set_backup_function (p, _backup_orc_haar_deint_lshift1_split_s16);
6999 orc_program_add_destination (p, 2, "d1");
7000 orc_program_add_destination (p, 2, "d2");
7001 orc_program_add_source (p, 4, "s1");
7002 orc_program_add_constant (p, 4, 0x00000001, "c1");
7003 orc_program_add_constant (p, 4, 0x00000000, "c2");
7004 orc_program_add_temporary (p, 2, "t1");
7005 orc_program_add_temporary (p, 2, "t2");
7006 orc_program_add_temporary (p, 4, "t3");
7008 orc_program_append_2 (p, "copyl", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
7009 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
7010 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
7011 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
7012 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
7013 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
7014 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
7015 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
7016 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
7018 result = orc_program_compile (p);
7021 orc_once_mutex_unlock ();
7026 ex->arrays[ORC_VAR_D1] = d1;
7027 ex->arrays[ORC_VAR_D2] = d2;
7028 ex->arrays[ORC_VAR_S1] = (void *)s1;
7030 func = p->code_exec;
7036 /* orc_haar_deint_split_s16 */
7039 orc_haar_deint_split_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n){
7041 orc_union16 * ORC_RESTRICT ptr0;
7042 orc_union16 * ORC_RESTRICT ptr1;
7043 const orc_union32 * ORC_RESTRICT ptr4;
7054 ptr0 = (orc_union16 *)d1;
7055 ptr1 = (orc_union16 *)d2;
7056 ptr4 = (orc_union32 *)s1;
7059 var37.i = 0x00000000; /* 0 or 0f */
7061 for (i = 0; i < n; i++) {
7067 var40.i = (orc_uint32)var39.i & 0xffff;
7069 var41.i = ((orc_uint32)var39.i >> 16)&0xffff;
7071 var42.i = var41.i - var40.i;
7077 var43.i = (var42.i + var37.i + 1)>>1;
7079 var38.i = var40.i + var43.i;
7088 _backup_orc_haar_deint_split_s16 (OrcExecutor * ORC_RESTRICT ex)
7092 orc_union16 * ORC_RESTRICT ptr0;
7093 orc_union16 * ORC_RESTRICT ptr1;
7094 const orc_union32 * ORC_RESTRICT ptr4;
7105 ptr0 = (orc_union16 *)ex->arrays[0];
7106 ptr1 = (orc_union16 *)ex->arrays[1];
7107 ptr4 = (orc_union32 *)ex->arrays[4];
7110 var37.i = 0x00000000; /* 0 or 0f */
7112 for (i = 0; i < n; i++) {
7118 var40.i = (orc_uint32)var39.i & 0xffff;
7120 var41.i = ((orc_uint32)var39.i >> 16)&0xffff;
7122 var42.i = var41.i - var40.i;
7128 var43.i = (var42.i + var37.i + 1)>>1;
7130 var38.i = var40.i + var43.i;
7138 orc_haar_deint_split_s16 (int16_t * d1, int16_t * d2, const int16_t * s1, int n)
7140 OrcExecutor _ex, *ex = &_ex;
7141 static int p_inited = 0;
7142 static OrcProgram *p = 0;
7143 void (*func) (OrcExecutor *);
7146 orc_once_mutex_lock ();
7148 OrcCompileResult result;
7150 p = orc_program_new ();
7151 orc_program_set_name (p, "orc_haar_deint_split_s16");
7152 orc_program_set_backup_function (p, _backup_orc_haar_deint_split_s16);
7153 orc_program_add_destination (p, 2, "d1");
7154 orc_program_add_destination (p, 2, "d2");
7155 orc_program_add_source (p, 4, "s1");
7156 orc_program_add_constant (p, 4, 0x00000000, "c1");
7157 orc_program_add_temporary (p, 2, "t1");
7158 orc_program_add_temporary (p, 2, "t2");
7159 orc_program_add_temporary (p, 4, "t3");
7161 orc_program_append_2 (p, "copyl", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
7162 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
7163 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
7164 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
7165 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
7166 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
7167 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
7169 result = orc_program_compile (p);
7172 orc_once_mutex_unlock ();
7177 ex->arrays[ORC_VAR_D1] = d1;
7178 ex->arrays[ORC_VAR_D2] = d2;
7179 ex->arrays[ORC_VAR_S1] = (void *)s1;
7181 func = p->code_exec;
7187 /* orc_haar_split_s16_lo */
7190 orc_haar_split_s16_lo (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
7192 orc_union16 * ORC_RESTRICT ptr0;
7193 const orc_union16 * ORC_RESTRICT ptr4;
7194 const orc_union16 * ORC_RESTRICT ptr5;
7203 ptr0 = (orc_union16 *)d1;
7204 ptr4 = (orc_union16 *)s1;
7205 ptr5 = (orc_union16 *)s2;
7208 var36.i = 0x00000000; /* 0 or 0f */
7210 for (i = 0; i < n; i++) {
7218 var39.i = var35.i - var38.i;
7220 var40.i = (var39.i + var36.i + 1)>>1;
7222 var37.i = var38.i + var40.i;
7231 _backup_orc_haar_split_s16_lo (OrcExecutor * ORC_RESTRICT ex)
7235 orc_union16 * ORC_RESTRICT ptr0;
7236 const orc_union16 * ORC_RESTRICT ptr4;
7237 const orc_union16 * ORC_RESTRICT ptr5;
7246 ptr0 = (orc_union16 *)ex->arrays[0];
7247 ptr4 = (orc_union16 *)ex->arrays[4];
7248 ptr5 = (orc_union16 *)ex->arrays[5];
7251 var36.i = 0x00000000; /* 0 or 0f */
7253 for (i = 0; i < n; i++) {
7261 var39.i = var35.i - var38.i;
7263 var40.i = (var39.i + var36.i + 1)>>1;
7265 var37.i = var38.i + var40.i;
7273 orc_haar_split_s16_lo (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
7275 OrcExecutor _ex, *ex = &_ex;
7276 static int p_inited = 0;
7277 static OrcProgram *p = 0;
7278 void (*func) (OrcExecutor *);
7281 orc_once_mutex_lock ();
7283 OrcCompileResult result;
7285 p = orc_program_new ();
7286 orc_program_set_name (p, "orc_haar_split_s16_lo");
7287 orc_program_set_backup_function (p, _backup_orc_haar_split_s16_lo);
7288 orc_program_add_destination (p, 2, "d1");
7289 orc_program_add_source (p, 2, "s1");
7290 orc_program_add_source (p, 2, "s2");
7291 orc_program_add_constant (p, 4, 0x00000000, "c1");
7292 orc_program_add_temporary (p, 2, "t1");
7293 orc_program_add_temporary (p, 2, "t2");
7295 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
7296 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_T1, ORC_VAR_D1);
7297 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
7298 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
7300 result = orc_program_compile (p);
7303 orc_once_mutex_unlock ();
7308 ex->arrays[ORC_VAR_D1] = d1;
7309 ex->arrays[ORC_VAR_S1] = (void *)s1;
7310 ex->arrays[ORC_VAR_S2] = (void *)s2;
7312 func = p->code_exec;
7318 /* orc_haar_split_s16_hi */
7321 orc_haar_split_s16_hi (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
7323 orc_union16 * ORC_RESTRICT ptr0;
7324 const orc_union16 * ORC_RESTRICT ptr4;
7325 const orc_union16 * ORC_RESTRICT ptr5;
7330 ptr0 = (orc_union16 *)d1;
7331 ptr4 = (orc_union16 *)s1;
7332 ptr5 = (orc_union16 *)s2;
7335 for (i = 0; i < n; i++) {
7341 var34.i = var32.i - var33.i;
7350 _backup_orc_haar_split_s16_hi (OrcExecutor * ORC_RESTRICT ex)
7354 orc_union16 * ORC_RESTRICT ptr0;
7355 const orc_union16 * ORC_RESTRICT ptr4;
7356 const orc_union16 * ORC_RESTRICT ptr5;
7361 ptr0 = (orc_union16 *)ex->arrays[0];
7362 ptr4 = (orc_union16 *)ex->arrays[4];
7363 ptr5 = (orc_union16 *)ex->arrays[5];
7366 for (i = 0; i < n; i++) {
7372 var34.i = var32.i - var33.i;
7380 orc_haar_split_s16_hi (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
7382 OrcExecutor _ex, *ex = &_ex;
7383 static int p_inited = 0;
7384 static OrcProgram *p = 0;
7385 void (*func) (OrcExecutor *);
7388 orc_once_mutex_lock ();
7390 OrcCompileResult result;
7392 p = orc_program_new ();
7393 orc_program_set_name (p, "orc_haar_split_s16_hi");
7394 orc_program_set_backup_function (p, _backup_orc_haar_split_s16_hi);
7395 orc_program_add_destination (p, 2, "d1");
7396 orc_program_add_source (p, 2, "s1");
7397 orc_program_add_source (p, 2, "s2");
7399 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S2, ORC_VAR_S1, ORC_VAR_D1);
7401 result = orc_program_compile (p);
7404 orc_once_mutex_unlock ();
7409 ex->arrays[ORC_VAR_D1] = d1;
7410 ex->arrays[ORC_VAR_S1] = (void *)s1;
7411 ex->arrays[ORC_VAR_S2] = (void *)s2;
7413 func = p->code_exec;
7419 /* orc_haar_split_s16_op */
7422 orc_haar_split_s16_op (int16_t * d1, int16_t * d2, const int16_t * s1, const int16_t * s2, int n){
7424 orc_union16 * ORC_RESTRICT ptr0;
7425 orc_union16 * ORC_RESTRICT ptr1;
7426 const orc_union16 * ORC_RESTRICT ptr4;
7427 const orc_union16 * ORC_RESTRICT ptr5;
7437 ptr0 = (orc_union16 *)d1;
7438 ptr1 = (orc_union16 *)d2;
7439 ptr4 = (orc_union16 *)s1;
7440 ptr5 = (orc_union16 *)s2;
7443 var37.i = 0x00000000; /* 0 or 0f */
7445 for (i = 0; i < n; i++) {
7453 var40.i = var35.i - var39.i;
7459 var41.i = (var40.i + var37.i + 1)>>1;
7461 var38.i = var39.i + var41.i;
7470 _backup_orc_haar_split_s16_op (OrcExecutor * ORC_RESTRICT ex)
7474 orc_union16 * ORC_RESTRICT ptr0;
7475 orc_union16 * ORC_RESTRICT ptr1;
7476 const orc_union16 * ORC_RESTRICT ptr4;
7477 const orc_union16 * ORC_RESTRICT ptr5;
7487 ptr0 = (orc_union16 *)ex->arrays[0];
7488 ptr1 = (orc_union16 *)ex->arrays[1];
7489 ptr4 = (orc_union16 *)ex->arrays[4];
7490 ptr5 = (orc_union16 *)ex->arrays[5];
7493 var37.i = 0x00000000; /* 0 or 0f */
7495 for (i = 0; i < n; i++) {
7503 var40.i = var35.i - var39.i;
7509 var41.i = (var40.i + var37.i + 1)>>1;
7511 var38.i = var39.i + var41.i;
7519 orc_haar_split_s16_op (int16_t * d1, int16_t * d2, const int16_t * s1, const int16_t * s2, int n)
7521 OrcExecutor _ex, *ex = &_ex;
7522 static int p_inited = 0;
7523 static OrcProgram *p = 0;
7524 void (*func) (OrcExecutor *);
7527 orc_once_mutex_lock ();
7529 OrcCompileResult result;
7531 p = orc_program_new ();
7532 orc_program_set_name (p, "orc_haar_split_s16_op");
7533 orc_program_set_backup_function (p, _backup_orc_haar_split_s16_op);
7534 orc_program_add_destination (p, 2, "d1");
7535 orc_program_add_destination (p, 2, "d2");
7536 orc_program_add_source (p, 2, "s1");
7537 orc_program_add_source (p, 2, "s2");
7538 orc_program_add_constant (p, 4, 0x00000000, "c1");
7539 orc_program_add_temporary (p, 2, "t1");
7540 orc_program_add_temporary (p, 2, "t2");
7542 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
7543 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_T1, ORC_VAR_D1);
7544 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
7545 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
7546 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
7548 result = orc_program_compile (p);
7551 orc_once_mutex_unlock ();
7556 ex->arrays[ORC_VAR_D1] = d1;
7557 ex->arrays[ORC_VAR_D2] = d2;
7558 ex->arrays[ORC_VAR_S1] = (void *)s1;
7559 ex->arrays[ORC_VAR_S2] = (void *)s2;
7561 func = p->code_exec;
7567 /* orc_haar_split_s16 */
7570 orc_haar_split_s16 (int16_t * d1, int16_t * d2, int n){
7572 orc_union16 * ORC_RESTRICT ptr0;
7573 orc_union16 * ORC_RESTRICT ptr1;
7584 ptr0 = (orc_union16 *)d1;
7585 ptr1 = (orc_union16 *)d2;
7588 var37.i = 0x00000000; /* 0 or 0f */
7590 for (i = 0; i < n; i++) {
7600 var41.i = var40.i - var39.i;
7606 var42.i = (var41.i + var37.i + 1)>>1;
7608 var38.i = var39.i + var42.i;
7617 _backup_orc_haar_split_s16 (OrcExecutor * ORC_RESTRICT ex)
7621 orc_union16 * ORC_RESTRICT ptr0;
7622 orc_union16 * ORC_RESTRICT ptr1;
7633 ptr0 = (orc_union16 *)ex->arrays[0];
7634 ptr1 = (orc_union16 *)ex->arrays[1];
7637 var37.i = 0x00000000; /* 0 or 0f */
7639 for (i = 0; i < n; i++) {
7649 var41.i = var40.i - var39.i;
7655 var42.i = (var41.i + var37.i + 1)>>1;
7657 var38.i = var39.i + var42.i;
7665 orc_haar_split_s16 (int16_t * d1, int16_t * d2, int n)
7667 OrcExecutor _ex, *ex = &_ex;
7668 static int p_inited = 0;
7669 static OrcProgram *p = 0;
7670 void (*func) (OrcExecutor *);
7673 orc_once_mutex_lock ();
7675 OrcCompileResult result;
7677 p = orc_program_new ();
7678 orc_program_set_name (p, "orc_haar_split_s16");
7679 orc_program_set_backup_function (p, _backup_orc_haar_split_s16);
7680 orc_program_add_destination (p, 2, "d1");
7681 orc_program_add_destination (p, 2, "d2");
7682 orc_program_add_constant (p, 4, 0x00000000, "c1");
7683 orc_program_add_temporary (p, 2, "t1");
7684 orc_program_add_temporary (p, 2, "t2");
7686 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
7687 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
7688 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
7689 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
7690 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
7691 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
7693 result = orc_program_compile (p);
7696 orc_once_mutex_unlock ();
7701 ex->arrays[ORC_VAR_D1] = d1;
7702 ex->arrays[ORC_VAR_D2] = d2;
7704 func = p->code_exec;
7710 /* orc_haar_synth_s16_lo */
7713 orc_haar_synth_s16_lo (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
7715 orc_union16 * ORC_RESTRICT ptr0;
7716 const orc_union16 * ORC_RESTRICT ptr4;
7717 const orc_union16 * ORC_RESTRICT ptr5;
7724 ptr0 = (orc_union16 *)d1;
7725 ptr4 = (orc_union16 *)s1;
7726 ptr5 = (orc_union16 *)s2;
7729 var34.i = 0x00000000; /* 0 or 0f */
7731 for (i = 0; i < n; i++) {
7735 var37.i = (var33.i + var34.i + 1)>>1;
7739 var36.i = var35.i - var37.i;
7748 _backup_orc_haar_synth_s16_lo (OrcExecutor * ORC_RESTRICT ex)
7752 orc_union16 * ORC_RESTRICT ptr0;
7753 const orc_union16 * ORC_RESTRICT ptr4;
7754 const orc_union16 * ORC_RESTRICT ptr5;
7761 ptr0 = (orc_union16 *)ex->arrays[0];
7762 ptr4 = (orc_union16 *)ex->arrays[4];
7763 ptr5 = (orc_union16 *)ex->arrays[5];
7766 var34.i = 0x00000000; /* 0 or 0f */
7768 for (i = 0; i < n; i++) {
7772 var37.i = (var33.i + var34.i + 1)>>1;
7776 var36.i = var35.i - var37.i;
7784 orc_haar_synth_s16_lo (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
7786 OrcExecutor _ex, *ex = &_ex;
7787 static int p_inited = 0;
7788 static OrcProgram *p = 0;
7789 void (*func) (OrcExecutor *);
7792 orc_once_mutex_lock ();
7794 OrcCompileResult result;
7796 p = orc_program_new ();
7797 orc_program_set_name (p, "orc_haar_synth_s16_lo");
7798 orc_program_set_backup_function (p, _backup_orc_haar_synth_s16_lo);
7799 orc_program_add_destination (p, 2, "d1");
7800 orc_program_add_source (p, 2, "s1");
7801 orc_program_add_source (p, 2, "s2");
7802 orc_program_add_constant (p, 4, 0x00000000, "c1");
7803 orc_program_add_temporary (p, 2, "t1");
7805 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_C1, ORC_VAR_D1);
7806 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
7808 result = orc_program_compile (p);
7811 orc_once_mutex_unlock ();
7816 ex->arrays[ORC_VAR_D1] = d1;
7817 ex->arrays[ORC_VAR_S1] = (void *)s1;
7818 ex->arrays[ORC_VAR_S2] = (void *)s2;
7820 func = p->code_exec;
7826 /* orc_haar_synth_s16_hi */
7829 orc_haar_synth_s16_hi (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
7831 orc_union16 * ORC_RESTRICT ptr0;
7832 const orc_union16 * ORC_RESTRICT ptr4;
7833 const orc_union16 * ORC_RESTRICT ptr5;
7842 ptr0 = (orc_union16 *)d1;
7843 ptr4 = (orc_union16 *)s1;
7844 ptr5 = (orc_union16 *)s2;
7847 var36.i = 0x00000000; /* 0 or 0f */
7849 for (i = 0; i < n; i++) {
7855 var40.i = (var39.i + var36.i + 1)>>1;
7859 var41.i = var37.i - var40.i;
7861 var38.i = var39.i + var41.i;
7870 _backup_orc_haar_synth_s16_hi (OrcExecutor * ORC_RESTRICT ex)
7874 orc_union16 * ORC_RESTRICT ptr0;
7875 const orc_union16 * ORC_RESTRICT ptr4;
7876 const orc_union16 * ORC_RESTRICT ptr5;
7885 ptr0 = (orc_union16 *)ex->arrays[0];
7886 ptr4 = (orc_union16 *)ex->arrays[4];
7887 ptr5 = (orc_union16 *)ex->arrays[5];
7890 var36.i = 0x00000000; /* 0 or 0f */
7892 for (i = 0; i < n; i++) {
7898 var40.i = (var39.i + var36.i + 1)>>1;
7902 var41.i = var37.i - var40.i;
7904 var38.i = var39.i + var41.i;
7912 orc_haar_synth_s16_hi (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
7914 OrcExecutor _ex, *ex = &_ex;
7915 static int p_inited = 0;
7916 static OrcProgram *p = 0;
7917 void (*func) (OrcExecutor *);
7920 orc_once_mutex_lock ();
7922 OrcCompileResult result;
7924 p = orc_program_new ();
7925 orc_program_set_name (p, "orc_haar_synth_s16_hi");
7926 orc_program_set_backup_function (p, _backup_orc_haar_synth_s16_hi);
7927 orc_program_add_destination (p, 2, "d1");
7928 orc_program_add_source (p, 2, "s1");
7929 orc_program_add_source (p, 2, "s2");
7930 orc_program_add_constant (p, 4, 0x00000000, "c1");
7931 orc_program_add_temporary (p, 2, "t1");
7932 orc_program_add_temporary (p, 2, "t2");
7933 orc_program_add_temporary (p, 2, "t3");
7935 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
7936 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
7937 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T3, ORC_VAR_D1);
7938 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
7940 result = orc_program_compile (p);
7943 orc_once_mutex_unlock ();
7948 ex->arrays[ORC_VAR_D1] = d1;
7949 ex->arrays[ORC_VAR_S1] = (void *)s1;
7950 ex->arrays[ORC_VAR_S2] = (void *)s2;
7952 func = p->code_exec;
7958 /* orc_haar_synth_s16_op */
7961 orc_haar_synth_s16_op (int16_t * d1, int16_t * d2, const int16_t * s1, const int16_t * s2, int n){
7963 orc_union16 * ORC_RESTRICT ptr0;
7964 orc_union16 * ORC_RESTRICT ptr1;
7965 const orc_union16 * ORC_RESTRICT ptr4;
7966 const orc_union16 * ORC_RESTRICT ptr5;
7976 ptr0 = (orc_union16 *)d1;
7977 ptr1 = (orc_union16 *)d2;
7978 ptr4 = (orc_union16 *)s1;
7979 ptr5 = (orc_union16 *)s2;
7982 var36.i = 0x00000000; /* 0 or 0f */
7984 for (i = 0; i < n; i++) {
7990 var41.i = (var40.i + var36.i + 1)>>1;
7994 var42.i = var37.i - var41.i;
8000 var39.i = var40.i + var42.i;
8009 _backup_orc_haar_synth_s16_op (OrcExecutor * ORC_RESTRICT ex)
8013 orc_union16 * ORC_RESTRICT ptr0;
8014 orc_union16 * ORC_RESTRICT ptr1;
8015 const orc_union16 * ORC_RESTRICT ptr4;
8016 const orc_union16 * ORC_RESTRICT ptr5;
8026 ptr0 = (orc_union16 *)ex->arrays[0];
8027 ptr1 = (orc_union16 *)ex->arrays[1];
8028 ptr4 = (orc_union16 *)ex->arrays[4];
8029 ptr5 = (orc_union16 *)ex->arrays[5];
8032 var36.i = 0x00000000; /* 0 or 0f */
8034 for (i = 0; i < n; i++) {
8040 var41.i = (var40.i + var36.i + 1)>>1;
8044 var42.i = var37.i - var41.i;
8050 var39.i = var40.i + var42.i;
8058 orc_haar_synth_s16_op (int16_t * d1, int16_t * d2, const int16_t * s1, const int16_t * s2, int n)
8060 OrcExecutor _ex, *ex = &_ex;
8061 static int p_inited = 0;
8062 static OrcProgram *p = 0;
8063 void (*func) (OrcExecutor *);
8066 orc_once_mutex_lock ();
8068 OrcCompileResult result;
8070 p = orc_program_new ();
8071 orc_program_set_name (p, "orc_haar_synth_s16_op");
8072 orc_program_set_backup_function (p, _backup_orc_haar_synth_s16_op);
8073 orc_program_add_destination (p, 2, "d1");
8074 orc_program_add_destination (p, 2, "d2");
8075 orc_program_add_source (p, 2, "s1");
8076 orc_program_add_source (p, 2, "s2");
8077 orc_program_add_constant (p, 4, 0x00000000, "c1");
8078 orc_program_add_temporary (p, 2, "t1");
8079 orc_program_add_temporary (p, 2, "t2");
8080 orc_program_add_temporary (p, 2, "t3");
8082 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
8083 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
8084 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T3, ORC_VAR_D1);
8085 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
8086 orc_program_append_2 (p, "addw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
8088 result = orc_program_compile (p);
8091 orc_once_mutex_unlock ();
8096 ex->arrays[ORC_VAR_D1] = d1;
8097 ex->arrays[ORC_VAR_D2] = d2;
8098 ex->arrays[ORC_VAR_S1] = (void *)s1;
8099 ex->arrays[ORC_VAR_S2] = (void *)s2;
8101 func = p->code_exec;
8107 /* orc_haar_synth_s16 */
8110 orc_haar_synth_s16 (int16_t * d1, int16_t * d2, int n){
8112 orc_union16 * ORC_RESTRICT ptr0;
8113 orc_union16 * ORC_RESTRICT ptr1;
8124 ptr0 = (orc_union16 *)d1;
8125 ptr1 = (orc_union16 *)d2;
8128 var37.i = 0x00000000; /* 0 or 0f */
8130 for (i = 0; i < n; i++) {
8140 var42.i = (var41.i + var37.i + 1)>>1;
8142 var43.i = var40.i - var42.i;
8148 var39.i = var41.i + var43.i;
8157 _backup_orc_haar_synth_s16 (OrcExecutor * ORC_RESTRICT ex)
8161 orc_union16 * ORC_RESTRICT ptr0;
8162 orc_union16 * ORC_RESTRICT ptr1;
8173 ptr0 = (orc_union16 *)ex->arrays[0];
8174 ptr1 = (orc_union16 *)ex->arrays[1];
8177 var37.i = 0x00000000; /* 0 or 0f */
8179 for (i = 0; i < n; i++) {
8189 var42.i = (var41.i + var37.i + 1)>>1;
8191 var43.i = var40.i - var42.i;
8197 var39.i = var41.i + var43.i;
8205 orc_haar_synth_s16 (int16_t * d1, int16_t * d2, int n)
8207 OrcExecutor _ex, *ex = &_ex;
8208 static int p_inited = 0;
8209 static OrcProgram *p = 0;
8210 void (*func) (OrcExecutor *);
8213 orc_once_mutex_lock ();
8215 OrcCompileResult result;
8217 p = orc_program_new ();
8218 orc_program_set_name (p, "orc_haar_synth_s16");
8219 orc_program_set_backup_function (p, _backup_orc_haar_synth_s16);
8220 orc_program_add_destination (p, 2, "d1");
8221 orc_program_add_destination (p, 2, "d2");
8222 orc_program_add_constant (p, 4, 0x00000000, "c1");
8223 orc_program_add_temporary (p, 2, "t1");
8224 orc_program_add_temporary (p, 2, "t2");
8225 orc_program_add_temporary (p, 2, "t3");
8227 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
8228 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
8229 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
8230 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1);
8231 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
8232 orc_program_append_2 (p, "addw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
8234 result = orc_program_compile (p);
8237 orc_once_mutex_unlock ();
8242 ex->arrays[ORC_VAR_D1] = d1;
8243 ex->arrays[ORC_VAR_D2] = d2;
8245 func = p->code_exec;
8251 /* orc_haar_synth_rrshift1_int_s16 */
8254 orc_haar_synth_rrshift1_int_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
8256 orc_union32 * ORC_RESTRICT ptr0;
8257 const orc_union16 * ORC_RESTRICT ptr4;
8258 const orc_union16 * ORC_RESTRICT ptr5;
8272 ptr0 = (orc_union32 *)d1;
8273 ptr4 = (orc_union16 *)s1;
8274 ptr5 = (orc_union16 *)s2;
8277 var35.i = 0x00000000; /* 0 or 0f */
8279 var37.i = 0x00000000; /* 0 or 0f */
8281 var38.i = 0x00000000; /* 0 or 0f */
8283 for (i = 0; i < n; i++) {
8289 var41.i = (var40.i + var35.i + 1)>>1;
8293 var42.i = var36.i - var41.i;
8295 var43.i = var40.i + var42.i;
8297 var44.i = (var42.i + var37.i + 1)>>1;
8299 var45.i = (var43.i + var38.i + 1)>>1;
8301 var39.i = ((orc_uint16)var44.i & 0x0000ffff) | ((orc_uint16)var45.i << 16);
8310 _backup_orc_haar_synth_rrshift1_int_s16 (OrcExecutor * ORC_RESTRICT ex)
8314 orc_union32 * ORC_RESTRICT ptr0;
8315 const orc_union16 * ORC_RESTRICT ptr4;
8316 const orc_union16 * ORC_RESTRICT ptr5;
8330 ptr0 = (orc_union32 *)ex->arrays[0];
8331 ptr4 = (orc_union16 *)ex->arrays[4];
8332 ptr5 = (orc_union16 *)ex->arrays[5];
8335 var35.i = 0x00000000; /* 0 or 0f */
8337 var37.i = 0x00000000; /* 0 or 0f */
8339 var38.i = 0x00000000; /* 0 or 0f */
8341 for (i = 0; i < n; i++) {
8347 var41.i = (var40.i + var35.i + 1)>>1;
8351 var42.i = var36.i - var41.i;
8353 var43.i = var40.i + var42.i;
8355 var44.i = (var42.i + var37.i + 1)>>1;
8357 var45.i = (var43.i + var38.i + 1)>>1;
8359 var39.i = ((orc_uint16)var44.i & 0x0000ffff) | ((orc_uint16)var45.i << 16);
8367 orc_haar_synth_rrshift1_int_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
8369 OrcExecutor _ex, *ex = &_ex;
8370 static int p_inited = 0;
8371 static OrcProgram *p = 0;
8372 void (*func) (OrcExecutor *);
8375 orc_once_mutex_lock ();
8377 OrcCompileResult result;
8379 p = orc_program_new ();
8380 orc_program_set_name (p, "orc_haar_synth_rrshift1_int_s16");
8381 orc_program_set_backup_function (p, _backup_orc_haar_synth_rrshift1_int_s16);
8382 orc_program_add_destination (p, 4, "d1");
8383 orc_program_add_source (p, 2, "s1");
8384 orc_program_add_source (p, 2, "s2");
8385 orc_program_add_constant (p, 4, 0x00000000, "c1");
8386 orc_program_add_temporary (p, 2, "t1");
8387 orc_program_add_temporary (p, 2, "t2");
8389 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
8390 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
8391 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
8392 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
8393 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
8394 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
8395 orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
8397 result = orc_program_compile (p);
8400 orc_once_mutex_unlock ();
8405 ex->arrays[ORC_VAR_D1] = d1;
8406 ex->arrays[ORC_VAR_S1] = (void *)s1;
8407 ex->arrays[ORC_VAR_S2] = (void *)s2;
8409 func = p->code_exec;
8415 /* orc_haar_synth_int_s16 */
8418 orc_haar_synth_int_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
8420 orc_union32 * ORC_RESTRICT ptr0;
8421 const orc_union16 * ORC_RESTRICT ptr4;
8422 const orc_union16 * ORC_RESTRICT ptr5;
8432 ptr0 = (orc_union32 *)d1;
8433 ptr4 = (orc_union16 *)s1;
8434 ptr5 = (orc_union16 *)s2;
8437 var35.i = 0x00000000; /* 0 or 0f */
8439 for (i = 0; i < n; i++) {
8445 var39.i = (var38.i + var35.i + 1)>>1;
8449 var40.i = var36.i - var39.i;
8451 var41.i = var38.i + var40.i;
8453 var37.i = ((orc_uint16)var40.i & 0x0000ffff) | ((orc_uint16)var41.i << 16);
8462 _backup_orc_haar_synth_int_s16 (OrcExecutor * ORC_RESTRICT ex)
8466 orc_union32 * ORC_RESTRICT ptr0;
8467 const orc_union16 * ORC_RESTRICT ptr4;
8468 const orc_union16 * ORC_RESTRICT ptr5;
8478 ptr0 = (orc_union32 *)ex->arrays[0];
8479 ptr4 = (orc_union16 *)ex->arrays[4];
8480 ptr5 = (orc_union16 *)ex->arrays[5];
8483 var35.i = 0x00000000; /* 0 or 0f */
8485 for (i = 0; i < n; i++) {
8491 var39.i = (var38.i + var35.i + 1)>>1;
8495 var40.i = var36.i - var39.i;
8497 var41.i = var38.i + var40.i;
8499 var37.i = ((orc_uint16)var40.i & 0x0000ffff) | ((orc_uint16)var41.i << 16);
8507 orc_haar_synth_int_s16 (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
8509 OrcExecutor _ex, *ex = &_ex;
8510 static int p_inited = 0;
8511 static OrcProgram *p = 0;
8512 void (*func) (OrcExecutor *);
8515 orc_once_mutex_lock ();
8517 OrcCompileResult result;
8519 p = orc_program_new ();
8520 orc_program_set_name (p, "orc_haar_synth_int_s16");
8521 orc_program_set_backup_function (p, _backup_orc_haar_synth_int_s16);
8522 orc_program_add_destination (p, 4, "d1");
8523 orc_program_add_source (p, 2, "s1");
8524 orc_program_add_source (p, 2, "s2");
8525 orc_program_add_constant (p, 4, 0x00000000, "c1");
8526 orc_program_add_temporary (p, 2, "t1");
8527 orc_program_add_temporary (p, 2, "t2");
8529 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
8530 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
8531 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
8532 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
8533 orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
8535 result = orc_program_compile (p);
8538 orc_once_mutex_unlock ();
8543 ex->arrays[ORC_VAR_D1] = d1;
8544 ex->arrays[ORC_VAR_S1] = (void *)s1;
8545 ex->arrays[ORC_VAR_S2] = (void *)s2;
8547 func = p->code_exec;
8553 /* orc_haar_sub_s16 */
8556 orc_haar_sub_s16 (int16_t * d1, const int16_t * s1, int n){
8558 orc_union16 * ORC_RESTRICT ptr0;
8559 const orc_union16 * ORC_RESTRICT ptr4;
8564 ptr0 = (orc_union16 *)d1;
8565 ptr4 = (orc_union16 *)s1;
8568 for (i = 0; i < n; i++) {
8574 var34.i = var32.i - var33.i;
8583 _backup_orc_haar_sub_s16 (OrcExecutor * ORC_RESTRICT ex)
8587 orc_union16 * ORC_RESTRICT ptr0;
8588 const orc_union16 * ORC_RESTRICT ptr4;
8593 ptr0 = (orc_union16 *)ex->arrays[0];
8594 ptr4 = (orc_union16 *)ex->arrays[4];
8597 for (i = 0; i < n; i++) {
8603 var34.i = var32.i - var33.i;
8611 orc_haar_sub_s16 (int16_t * d1, const int16_t * s1, int n)
8613 OrcExecutor _ex, *ex = &_ex;
8614 static int p_inited = 0;
8615 static OrcProgram *p = 0;
8616 void (*func) (OrcExecutor *);
8619 orc_once_mutex_lock ();
8621 OrcCompileResult result;
8623 p = orc_program_new ();
8624 orc_program_set_name (p, "orc_haar_sub_s16");
8625 orc_program_set_backup_function (p, _backup_orc_haar_sub_s16);
8626 orc_program_add_destination (p, 2, "d1");
8627 orc_program_add_source (p, 2, "s1");
8629 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
8631 result = orc_program_compile (p);
8634 orc_once_mutex_unlock ();
8639 ex->arrays[ORC_VAR_D1] = d1;
8640 ex->arrays[ORC_VAR_S1] = (void *)s1;
8642 func = p->code_exec;
8648 /* orc_haar_add_half_s16 */
8651 orc_haar_add_half_s16 (int16_t * d1, const int16_t * s1, int n){
8653 orc_union16 * ORC_RESTRICT ptr0;
8654 const orc_union16 * ORC_RESTRICT ptr4;
8661 ptr0 = (orc_union16 *)d1;
8662 ptr4 = (orc_union16 *)s1;
8665 var34.i = 0x00000000; /* 0 or 0f */
8667 for (i = 0; i < n; i++) {
8671 var37.i = (var33.i + var34.i + 1)>>1;
8675 var36.i = var35.i + var37.i;
8684 _backup_orc_haar_add_half_s16 (OrcExecutor * ORC_RESTRICT ex)
8688 orc_union16 * ORC_RESTRICT ptr0;
8689 const orc_union16 * ORC_RESTRICT ptr4;
8696 ptr0 = (orc_union16 *)ex->arrays[0];
8697 ptr4 = (orc_union16 *)ex->arrays[4];
8700 var34.i = 0x00000000; /* 0 or 0f */
8702 for (i = 0; i < n; i++) {
8706 var37.i = (var33.i + var34.i + 1)>>1;
8710 var36.i = var35.i + var37.i;
8718 orc_haar_add_half_s16 (int16_t * d1, const int16_t * s1, int n)
8720 OrcExecutor _ex, *ex = &_ex;
8721 static int p_inited = 0;
8722 static OrcProgram *p = 0;
8723 void (*func) (OrcExecutor *);
8726 orc_once_mutex_lock ();
8728 OrcCompileResult result;
8730 p = orc_program_new ();
8731 orc_program_set_name (p, "orc_haar_add_half_s16");
8732 orc_program_set_backup_function (p, _backup_orc_haar_add_half_s16);
8733 orc_program_add_destination (p, 2, "d1");
8734 orc_program_add_source (p, 2, "s1");
8735 orc_program_add_constant (p, 4, 0x00000000, "c1");
8736 orc_program_add_temporary (p, 2, "t1");
8738 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
8739 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
8741 result = orc_program_compile (p);
8744 orc_once_mutex_unlock ();
8749 ex->arrays[ORC_VAR_D1] = d1;
8750 ex->arrays[ORC_VAR_S1] = (void *)s1;
8752 func = p->code_exec;
8758 /* orc_haar_add_s16 */
8761 orc_haar_add_s16 (int16_t * d1, const int16_t * s1, int n){
8763 orc_union16 * ORC_RESTRICT ptr0;
8764 const orc_union16 * ORC_RESTRICT ptr4;
8769 ptr0 = (orc_union16 *)d1;
8770 ptr4 = (orc_union16 *)s1;
8773 for (i = 0; i < n; i++) {
8779 var34.i = var32.i + var33.i;
8788 _backup_orc_haar_add_s16 (OrcExecutor * ORC_RESTRICT ex)
8792 orc_union16 * ORC_RESTRICT ptr0;
8793 const orc_union16 * ORC_RESTRICT ptr4;
8798 ptr0 = (orc_union16 *)ex->arrays[0];
8799 ptr4 = (orc_union16 *)ex->arrays[4];
8802 for (i = 0; i < n; i++) {
8808 var34.i = var32.i + var33.i;
8816 orc_haar_add_s16 (int16_t * d1, const int16_t * s1, int n)
8818 OrcExecutor _ex, *ex = &_ex;
8819 static int p_inited = 0;
8820 static OrcProgram *p = 0;
8821 void (*func) (OrcExecutor *);
8824 orc_once_mutex_lock ();
8826 OrcCompileResult result;
8828 p = orc_program_new ();
8829 orc_program_set_name (p, "orc_haar_add_s16");
8830 orc_program_set_backup_function (p, _backup_orc_haar_add_s16);
8831 orc_program_add_destination (p, 2, "d1");
8832 orc_program_add_source (p, 2, "s1");
8834 orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
8836 result = orc_program_compile (p);
8839 orc_once_mutex_unlock ();
8844 ex->arrays[ORC_VAR_D1] = d1;
8845 ex->arrays[ORC_VAR_S1] = (void *)s1;
8847 func = p->code_exec;
8853 /* orc_haar_sub_half_s16 */
8856 orc_haar_sub_half_s16 (int16_t * d1, const int16_t * s1, int n){
8858 orc_union16 * ORC_RESTRICT ptr0;
8859 const orc_union16 * ORC_RESTRICT ptr4;
8866 ptr0 = (orc_union16 *)d1;
8867 ptr4 = (orc_union16 *)s1;
8870 var34.i = 0x00000000; /* 0 or 0f */
8872 for (i = 0; i < n; i++) {
8876 var37.i = (var33.i + var34.i + 1)>>1;
8880 var36.i = var35.i - var37.i;
8889 _backup_orc_haar_sub_half_s16 (OrcExecutor * ORC_RESTRICT ex)
8893 orc_union16 * ORC_RESTRICT ptr0;
8894 const orc_union16 * ORC_RESTRICT ptr4;
8901 ptr0 = (orc_union16 *)ex->arrays[0];
8902 ptr4 = (orc_union16 *)ex->arrays[4];
8905 var34.i = 0x00000000; /* 0 or 0f */
8907 for (i = 0; i < n; i++) {
8911 var37.i = (var33.i + var34.i + 1)>>1;
8915 var36.i = var35.i - var37.i;
8923 orc_haar_sub_half_s16 (int16_t * d1, const int16_t * s1, int n)
8925 OrcExecutor _ex, *ex = &_ex;
8926 static int p_inited = 0;
8927 static OrcProgram *p = 0;
8928 void (*func) (OrcExecutor *);
8931 orc_once_mutex_lock ();
8933 OrcCompileResult result;
8935 p = orc_program_new ();
8936 orc_program_set_name (p, "orc_haar_sub_half_s16");
8937 orc_program_set_backup_function (p, _backup_orc_haar_sub_half_s16);
8938 orc_program_add_destination (p, 2, "d1");
8939 orc_program_add_source (p, 2, "s1");
8940 orc_program_add_constant (p, 4, 0x00000000, "c1");
8941 orc_program_add_temporary (p, 2, "t1");
8943 orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
8944 orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);
8946 result = orc_program_compile (p);
8949 orc_once_mutex_unlock ();
8954 ex->arrays[ORC_VAR_D1] = d1;
8955 ex->arrays[ORC_VAR_S1] = (void *)s1;
8957 func = p->code_exec;
8966 orc_sum_u8 (int32_t * a1, const orc_uint8 * s1, int n){
8968 const orc_int8 * ORC_RESTRICT ptr4;
8969 orc_union32 var12 = { 0 };
8974 ptr4 = (orc_int8 *)s1;
8977 for (i = 0; i < n; i++) {
8981 var35.i = (orc_uint8)var34;
8983 var36.i = (orc_uint16)var35.i;
8985 var12.i = var12.i + var36.i;
8993 _backup_orc_sum_u8 (OrcExecutor * ORC_RESTRICT ex)
8997 const orc_int8 * ORC_RESTRICT ptr4;
8998 orc_union32 var12 = { 0 };
9003 ptr4 = (orc_int8 *)ex->arrays[4];
9006 for (i = 0; i < n; i++) {
9010 var35.i = (orc_uint8)var34;
9012 var36.i = (orc_uint16)var35.i;
9014 var12.i = var12.i + var36.i;
9016 ex->accumulators[0] = var12.i;
9021 orc_sum_u8 (int32_t * a1, const orc_uint8 * s1, int n)
9023 OrcExecutor _ex, *ex = &_ex;
9024 static int p_inited = 0;
9025 static OrcProgram *p = 0;
9026 void (*func) (OrcExecutor *);
9029 orc_once_mutex_lock ();
9031 OrcCompileResult result;
9033 p = orc_program_new ();
9034 orc_program_set_name (p, "orc_sum_u8");
9035 orc_program_set_backup_function (p, _backup_orc_sum_u8);
9036 orc_program_add_source (p, 1, "s1");
9037 orc_program_add_accumulator (p, 4, "a1");
9038 orc_program_add_temporary (p, 2, "t1");
9039 orc_program_add_temporary (p, 4, "t2");
9041 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
9042 orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9043 orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
9045 result = orc_program_compile (p);
9048 orc_once_mutex_unlock ();
9053 ex->arrays[ORC_VAR_S1] = (void *)s1;
9055 func = p->code_exec;
9057 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
9065 orc_sum_s16 (int32_t * a1, const int16_t * s1, int n){
9067 const orc_union16 * ORC_RESTRICT ptr4;
9068 orc_union32 var12 = { 0 };
9072 ptr4 = (orc_union16 *)s1;
9075 for (i = 0; i < n; i++) {
9081 var12.i = var12.i + var34.i;
9089 _backup_orc_sum_s16 (OrcExecutor * ORC_RESTRICT ex)
9093 const orc_union16 * ORC_RESTRICT ptr4;
9094 orc_union32 var12 = { 0 };
9098 ptr4 = (orc_union16 *)ex->arrays[4];
9101 for (i = 0; i < n; i++) {
9107 var12.i = var12.i + var34.i;
9109 ex->accumulators[0] = var12.i;
9114 orc_sum_s16 (int32_t * a1, const int16_t * s1, int n)
9116 OrcExecutor _ex, *ex = &_ex;
9117 static int p_inited = 0;
9118 static OrcProgram *p = 0;
9119 void (*func) (OrcExecutor *);
9122 orc_once_mutex_lock ();
9124 OrcCompileResult result;
9126 p = orc_program_new ();
9127 orc_program_set_name (p, "orc_sum_s16");
9128 orc_program_set_backup_function (p, _backup_orc_sum_s16);
9129 orc_program_add_source (p, 2, "s1");
9130 orc_program_add_accumulator (p, 4, "a1");
9131 orc_program_add_temporary (p, 4, "t1");
9133 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
9134 orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9136 result = orc_program_compile (p);
9139 orc_once_mutex_unlock ();
9144 ex->arrays[ORC_VAR_S1] = (void *)s1;
9146 func = p->code_exec;
9148 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
9153 /* orc_sum_square_diff_u8 */
9156 orc_sum_square_diff_u8 (int32_t * a1, const orc_uint8 * s1, const orc_uint8 * s2, int n){
9158 const orc_int8 * ORC_RESTRICT ptr4;
9159 const orc_int8 * ORC_RESTRICT ptr5;
9160 orc_union32 var12 = { 0 };
9169 ptr4 = (orc_int8 *)s1;
9170 ptr5 = (orc_int8 *)s2;
9173 for (i = 0; i < n; i++) {
9177 var37.i = (orc_uint8)var35;
9181 var38.i = (orc_uint8)var36;
9183 var39.i = var37.i - var38.i;
9185 var40.i = (var39.i * var39.i) & 0xffff;
9187 var41.i = (orc_uint16)var40.i;
9189 var12.i = var12.i + var41.i;
9197 _backup_orc_sum_square_diff_u8 (OrcExecutor * ORC_RESTRICT ex)
9201 const orc_int8 * ORC_RESTRICT ptr4;
9202 const orc_int8 * ORC_RESTRICT ptr5;
9203 orc_union32 var12 = { 0 };
9212 ptr4 = (orc_int8 *)ex->arrays[4];
9213 ptr5 = (orc_int8 *)ex->arrays[5];
9216 for (i = 0; i < n; i++) {
9220 var37.i = (orc_uint8)var35;
9224 var38.i = (orc_uint8)var36;
9226 var39.i = var37.i - var38.i;
9228 var40.i = (var39.i * var39.i) & 0xffff;
9230 var41.i = (orc_uint16)var40.i;
9232 var12.i = var12.i + var41.i;
9234 ex->accumulators[0] = var12.i;
9239 orc_sum_square_diff_u8 (int32_t * a1, const orc_uint8 * s1, const orc_uint8 * s2, int n)
9241 OrcExecutor _ex, *ex = &_ex;
9242 static int p_inited = 0;
9243 static OrcProgram *p = 0;
9244 void (*func) (OrcExecutor *);
9247 orc_once_mutex_lock ();
9249 OrcCompileResult result;
9251 p = orc_program_new ();
9252 orc_program_set_name (p, "orc_sum_square_diff_u8");
9253 orc_program_set_backup_function (p, _backup_orc_sum_square_diff_u8);
9254 orc_program_add_source (p, 1, "s1");
9255 orc_program_add_source (p, 1, "s2");
9256 orc_program_add_accumulator (p, 4, "a1");
9257 orc_program_add_temporary (p, 2, "t1");
9258 orc_program_add_temporary (p, 2, "t2");
9259 orc_program_add_temporary (p, 4, "t3");
9261 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
9262 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
9263 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
9264 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1);
9265 orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9266 orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
9268 result = orc_program_compile (p);
9271 orc_once_mutex_unlock ();
9276 ex->arrays[ORC_VAR_S1] = (void *)s1;
9277 ex->arrays[ORC_VAR_S2] = (void *)s2;
9279 func = p->code_exec;
9281 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
9286 /* orc_dequantise_s16_2d_4xn */
9289 orc_dequantise_s16_2d_4xn (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int p1, int p2, int m){
9293 orc_union16 * ORC_RESTRICT ptr0;
9294 const orc_union16 * ORC_RESTRICT ptr4;
9306 for (j = 0; j < m; j++) {
9307 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
9308 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
9315 for (i = 0; i < n; i++) {
9321 var39.i = ORC_CLAMP(var38.i,-1,1);
9323 var40.i = ORC_ABS(var38.i);
9325 var41.i = (var40.i * var35.i) & 0xffff;
9327 var42.i = var41.i + var36.i;
9329 var43.i = var42.i >> 2;
9331 var37.i = (var43.i * var39.i) & 0xffff;
9341 _backup_orc_dequantise_s16_2d_4xn (OrcExecutor * ORC_RESTRICT ex)
9346 int m = ex->params[ORC_VAR_A1];
9347 orc_union16 * ORC_RESTRICT ptr0;
9348 const orc_union16 * ORC_RESTRICT ptr4;
9360 for (j = 0; j < m; j++) {
9361 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
9362 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
9365 var35.i = ex->params[24];
9367 var36.i = ex->params[25];
9369 for (i = 0; i < n; i++) {
9375 var39.i = ORC_CLAMP(var38.i,-1,1);
9377 var40.i = ORC_ABS(var38.i);
9379 var41.i = (var40.i * var35.i) & 0xffff;
9381 var42.i = var41.i + var36.i;
9383 var43.i = var42.i >> 2;
9385 var37.i = (var43.i * var39.i) & 0xffff;
9394 orc_dequantise_s16_2d_4xn (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int p1, int p2, int m)
9396 OrcExecutor _ex, *ex = &_ex;
9397 static int p_inited = 0;
9398 static OrcProgram *p = 0;
9399 void (*func) (OrcExecutor *);
9402 orc_once_mutex_lock ();
9404 OrcCompileResult result;
9406 p = orc_program_new ();
9407 orc_program_set_constant_n (p, 4);
9408 orc_program_set_2d (p);
9409 orc_program_set_name (p, "orc_dequantise_s16_2d_4xn");
9410 orc_program_set_backup_function (p, _backup_orc_dequantise_s16_2d_4xn);
9411 orc_program_add_destination (p, 2, "d1");
9412 orc_program_add_source (p, 2, "s1");
9413 orc_program_add_constant (p, 4, 0x00000002, "c1");
9414 orc_program_add_parameter (p, 2, "p1");
9415 orc_program_add_parameter (p, 2, "p2");
9416 orc_program_add_temporary (p, 2, "t1");
9417 orc_program_add_temporary (p, 2, "t2");
9419 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
9420 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9421 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9422 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
9423 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
9424 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
9425 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
9427 result = orc_program_compile (p);
9430 orc_once_mutex_unlock ();
9435 ORC_EXECUTOR_M(ex) = m;
9436 ex->arrays[ORC_VAR_D1] = d1;
9437 ex->params[ORC_VAR_D1] = d1_stride;
9438 ex->arrays[ORC_VAR_S1] = (void *)s1;
9439 ex->params[ORC_VAR_S1] = s1_stride;
9440 ex->params[ORC_VAR_P1] = p1;
9441 ex->params[ORC_VAR_P2] = p2;
9443 func = p->code_exec;
9449 /* orc_dequantise_s16_2d_8xn */
9452 orc_dequantise_s16_2d_8xn (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int p1, int p2, int m){
9456 orc_union16 * ORC_RESTRICT ptr0;
9457 const orc_union16 * ORC_RESTRICT ptr4;
9469 for (j = 0; j < m; j++) {
9470 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
9471 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
9478 for (i = 0; i < n; i++) {
9484 var39.i = ORC_CLAMP(var38.i,-1,1);
9486 var40.i = ORC_ABS(var38.i);
9488 var41.i = (var40.i * var35.i) & 0xffff;
9490 var42.i = var41.i + var36.i;
9492 var43.i = var42.i >> 2;
9494 var37.i = (var43.i * var39.i) & 0xffff;
9504 _backup_orc_dequantise_s16_2d_8xn (OrcExecutor * ORC_RESTRICT ex)
9509 int m = ex->params[ORC_VAR_A1];
9510 orc_union16 * ORC_RESTRICT ptr0;
9511 const orc_union16 * ORC_RESTRICT ptr4;
9523 for (j = 0; j < m; j++) {
9524 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
9525 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
9528 var35.i = ex->params[24];
9530 var36.i = ex->params[25];
9532 for (i = 0; i < n; i++) {
9538 var39.i = ORC_CLAMP(var38.i,-1,1);
9540 var40.i = ORC_ABS(var38.i);
9542 var41.i = (var40.i * var35.i) & 0xffff;
9544 var42.i = var41.i + var36.i;
9546 var43.i = var42.i >> 2;
9548 var37.i = (var43.i * var39.i) & 0xffff;
9557 orc_dequantise_s16_2d_8xn (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, int p1, int p2, int m)
9559 OrcExecutor _ex, *ex = &_ex;
9560 static int p_inited = 0;
9561 static OrcProgram *p = 0;
9562 void (*func) (OrcExecutor *);
9565 orc_once_mutex_lock ();
9567 OrcCompileResult result;
9569 p = orc_program_new ();
9570 orc_program_set_constant_n (p, 8);
9571 orc_program_set_2d (p);
9572 orc_program_set_name (p, "orc_dequantise_s16_2d_8xn");
9573 orc_program_set_backup_function (p, _backup_orc_dequantise_s16_2d_8xn);
9574 orc_program_add_destination (p, 2, "d1");
9575 orc_program_add_source (p, 2, "s1");
9576 orc_program_add_constant (p, 4, 0x00000002, "c1");
9577 orc_program_add_parameter (p, 2, "p1");
9578 orc_program_add_parameter (p, 2, "p2");
9579 orc_program_add_temporary (p, 2, "t1");
9580 orc_program_add_temporary (p, 2, "t2");
9582 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
9583 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9584 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9585 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
9586 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
9587 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
9588 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
9590 result = orc_program_compile (p);
9593 orc_once_mutex_unlock ();
9598 ORC_EXECUTOR_M(ex) = m;
9599 ex->arrays[ORC_VAR_D1] = d1;
9600 ex->params[ORC_VAR_D1] = d1_stride;
9601 ex->arrays[ORC_VAR_S1] = (void *)s1;
9602 ex->params[ORC_VAR_S1] = s1_stride;
9603 ex->params[ORC_VAR_P1] = p1;
9604 ex->params[ORC_VAR_P2] = p2;
9606 func = p->code_exec;
9612 /* orc_dequantise_s16_ip_2d_8xn */
9615 orc_dequantise_s16_ip_2d_8xn (int16_t * d1, int d1_stride, int p1, int p2, int m){
9619 orc_union16 * ORC_RESTRICT ptr0;
9631 for (j = 0; j < m; j++) {
9632 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
9639 for (i = 0; i < n; i++) {
9645 var39.i = ORC_CLAMP(var38.i,-1,1);
9647 var40.i = ORC_ABS(var38.i);
9649 var41.i = (var40.i * var35.i) & 0xffff;
9651 var42.i = var41.i + var36.i;
9653 var43.i = var42.i >> 2;
9655 var37.i = (var43.i * var39.i) & 0xffff;
9665 _backup_orc_dequantise_s16_ip_2d_8xn (OrcExecutor * ORC_RESTRICT ex)
9670 int m = ex->params[ORC_VAR_A1];
9671 orc_union16 * ORC_RESTRICT ptr0;
9683 for (j = 0; j < m; j++) {
9684 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
9687 var35.i = ex->params[24];
9689 var36.i = ex->params[25];
9691 for (i = 0; i < n; i++) {
9697 var39.i = ORC_CLAMP(var38.i,-1,1);
9699 var40.i = ORC_ABS(var38.i);
9701 var41.i = (var40.i * var35.i) & 0xffff;
9703 var42.i = var41.i + var36.i;
9705 var43.i = var42.i >> 2;
9707 var37.i = (var43.i * var39.i) & 0xffff;
9716 orc_dequantise_s16_ip_2d_8xn (int16_t * d1, int d1_stride, int p1, int p2, int m)
9718 OrcExecutor _ex, *ex = &_ex;
9719 static int p_inited = 0;
9720 static OrcProgram *p = 0;
9721 void (*func) (OrcExecutor *);
9724 orc_once_mutex_lock ();
9726 OrcCompileResult result;
9728 p = orc_program_new ();
9729 orc_program_set_constant_n (p, 8);
9730 orc_program_set_2d (p);
9731 orc_program_set_name (p, "orc_dequantise_s16_ip_2d_8xn");
9732 orc_program_set_backup_function (p, _backup_orc_dequantise_s16_ip_2d_8xn);
9733 orc_program_add_destination (p, 2, "d1");
9734 orc_program_add_constant (p, 4, 0x00000002, "c1");
9735 orc_program_add_parameter (p, 2, "p1");
9736 orc_program_add_parameter (p, 2, "p2");
9737 orc_program_add_temporary (p, 2, "t1");
9738 orc_program_add_temporary (p, 2, "t2");
9740 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
9741 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9742 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9743 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
9744 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
9745 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
9746 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
9748 result = orc_program_compile (p);
9751 orc_once_mutex_unlock ();
9756 ORC_EXECUTOR_M(ex) = m;
9757 ex->arrays[ORC_VAR_D1] = d1;
9758 ex->params[ORC_VAR_D1] = d1_stride;
9759 ex->params[ORC_VAR_P1] = p1;
9760 ex->params[ORC_VAR_P2] = p2;
9762 func = p->code_exec;
9768 /* orc_dequantise_s16_ip_2d */
9771 orc_dequantise_s16_ip_2d (int16_t * d1, int d1_stride, int p1, int p2, int n, int m){
9774 orc_union16 * ORC_RESTRICT ptr0;
9786 for (j = 0; j < m; j++) {
9787 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
9794 for (i = 0; i < n; i++) {
9800 var39.i = ORC_CLAMP(var38.i,-1,1);
9802 var40.i = ORC_ABS(var38.i);
9804 var41.i = (var40.i * var35.i) & 0xffff;
9806 var42.i = var41.i + var36.i;
9808 var43.i = var42.i >> 2;
9810 var37.i = (var43.i * var39.i) & 0xffff;
9820 _backup_orc_dequantise_s16_ip_2d (OrcExecutor * ORC_RESTRICT ex)
9825 int m = ex->params[ORC_VAR_A1];
9826 orc_union16 * ORC_RESTRICT ptr0;
9838 for (j = 0; j < m; j++) {
9839 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
9842 var35.i = ex->params[24];
9844 var36.i = ex->params[25];
9846 for (i = 0; i < n; i++) {
9852 var39.i = ORC_CLAMP(var38.i,-1,1);
9854 var40.i = ORC_ABS(var38.i);
9856 var41.i = (var40.i * var35.i) & 0xffff;
9858 var42.i = var41.i + var36.i;
9860 var43.i = var42.i >> 2;
9862 var37.i = (var43.i * var39.i) & 0xffff;
9871 orc_dequantise_s16_ip_2d (int16_t * d1, int d1_stride, int p1, int p2, int n, int m)
9873 OrcExecutor _ex, *ex = &_ex;
9874 static int p_inited = 0;
9875 static OrcProgram *p = 0;
9876 void (*func) (OrcExecutor *);
9879 orc_once_mutex_lock ();
9881 OrcCompileResult result;
9883 p = orc_program_new ();
9884 orc_program_set_2d (p);
9885 orc_program_set_name (p, "orc_dequantise_s16_ip_2d");
9886 orc_program_set_backup_function (p, _backup_orc_dequantise_s16_ip_2d);
9887 orc_program_add_destination (p, 2, "d1");
9888 orc_program_add_constant (p, 4, 0x00000002, "c1");
9889 orc_program_add_parameter (p, 2, "p1");
9890 orc_program_add_parameter (p, 2, "p2");
9891 orc_program_add_temporary (p, 2, "t1");
9892 orc_program_add_temporary (p, 2, "t2");
9894 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
9895 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9896 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
9897 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
9898 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
9899 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
9900 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
9902 result = orc_program_compile (p);
9905 orc_once_mutex_unlock ();
9910 ORC_EXECUTOR_M(ex) = m;
9911 ex->arrays[ORC_VAR_D1] = d1;
9912 ex->params[ORC_VAR_D1] = d1_stride;
9913 ex->params[ORC_VAR_P1] = p1;
9914 ex->params[ORC_VAR_P2] = p2;
9916 func = p->code_exec;
9922 /* orc_dequantise_s16_ip */
9925 orc_dequantise_s16_ip (int16_t * d1, int p1, int p2, int n){
9927 orc_union16 * ORC_RESTRICT ptr0;
9939 ptr0 = (orc_union16 *)d1;
9946 for (i = 0; i < n; i++) {
9952 var39.i = ORC_CLAMP(var38.i,-1,1);
9954 var40.i = ORC_ABS(var38.i);
9956 var41.i = (var40.i * var35.i) & 0xffff;
9958 var42.i = var41.i + var36.i;
9960 var43.i = var42.i >> 2;
9962 var37.i = (var43.i * var39.i) & 0xffff;
9971 _backup_orc_dequantise_s16_ip (OrcExecutor * ORC_RESTRICT ex)
9975 orc_union16 * ORC_RESTRICT ptr0;
9987 ptr0 = (orc_union16 *)ex->arrays[0];
9990 var35.i = ex->params[24];
9992 var36.i = ex->params[25];
9994 for (i = 0; i < n; i++) {
10000 var39.i = ORC_CLAMP(var38.i,-1,1);
10002 var40.i = ORC_ABS(var38.i);
10004 var41.i = (var40.i * var35.i) & 0xffff;
10006 var42.i = var41.i + var36.i;
10008 var43.i = var42.i >> 2;
10010 var37.i = (var43.i * var39.i) & 0xffff;
10018 orc_dequantise_s16_ip (int16_t * d1, int p1, int p2, int n)
10020 OrcExecutor _ex, *ex = &_ex;
10021 static int p_inited = 0;
10022 static OrcProgram *p = 0;
10023 void (*func) (OrcExecutor *);
10026 orc_once_mutex_lock ();
10028 OrcCompileResult result;
10030 p = orc_program_new ();
10031 orc_program_set_name (p, "orc_dequantise_s16_ip");
10032 orc_program_set_backup_function (p, _backup_orc_dequantise_s16_ip);
10033 orc_program_add_destination (p, 2, "d1");
10034 orc_program_add_constant (p, 4, 0x00000002, "c1");
10035 orc_program_add_parameter (p, 2, "p1");
10036 orc_program_add_parameter (p, 2, "p2");
10037 orc_program_add_temporary (p, 2, "t1");
10038 orc_program_add_temporary (p, 2, "t2");
10040 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
10041 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10042 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10043 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
10044 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
10045 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10046 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10048 result = orc_program_compile (p);
10051 orc_once_mutex_unlock ();
10056 ex->arrays[ORC_VAR_D1] = d1;
10057 ex->params[ORC_VAR_P1] = p1;
10058 ex->params[ORC_VAR_P2] = p2;
10060 func = p->code_exec;
10066 /* orc_dequantise_s16 */
10069 orc_dequantise_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int n){
10071 orc_union16 * ORC_RESTRICT ptr0;
10072 const orc_union16 * ORC_RESTRICT ptr4;
10084 ptr0 = (orc_union16 *)d1;
10085 ptr4 = (orc_union16 *)s1;
10092 for (i = 0; i < n; i++) {
10098 var39.i = ORC_CLAMP(var38.i,-1,1);
10100 var40.i = ORC_ABS(var38.i);
10102 var41.i = (var40.i * var35.i) & 0xffff;
10104 var42.i = var41.i + var36.i;
10106 var43.i = var42.i >> 2;
10108 var37.i = (var43.i * var39.i) & 0xffff;
10117 _backup_orc_dequantise_s16 (OrcExecutor * ORC_RESTRICT ex)
10121 orc_union16 * ORC_RESTRICT ptr0;
10122 const orc_union16 * ORC_RESTRICT ptr4;
10134 ptr0 = (orc_union16 *)ex->arrays[0];
10135 ptr4 = (orc_union16 *)ex->arrays[4];
10138 var35.i = ex->params[24];
10140 var36.i = ex->params[25];
10142 for (i = 0; i < n; i++) {
10148 var39.i = ORC_CLAMP(var38.i,-1,1);
10150 var40.i = ORC_ABS(var38.i);
10152 var41.i = (var40.i * var35.i) & 0xffff;
10154 var42.i = var41.i + var36.i;
10156 var43.i = var42.i >> 2;
10158 var37.i = (var43.i * var39.i) & 0xffff;
10166 orc_dequantise_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int n)
10168 OrcExecutor _ex, *ex = &_ex;
10169 static int p_inited = 0;
10170 static OrcProgram *p = 0;
10171 void (*func) (OrcExecutor *);
10174 orc_once_mutex_lock ();
10176 OrcCompileResult result;
10178 p = orc_program_new ();
10179 orc_program_set_name (p, "orc_dequantise_s16");
10180 orc_program_set_backup_function (p, _backup_orc_dequantise_s16);
10181 orc_program_add_destination (p, 2, "d1");
10182 orc_program_add_source (p, 2, "s1");
10183 orc_program_add_constant (p, 4, 0x00000002, "c1");
10184 orc_program_add_parameter (p, 2, "p1");
10185 orc_program_add_parameter (p, 2, "p2");
10186 orc_program_add_temporary (p, 2, "t1");
10187 orc_program_add_temporary (p, 2, "t2");
10189 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
10190 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10191 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10192 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
10193 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
10194 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10195 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10197 result = orc_program_compile (p);
10200 orc_once_mutex_unlock ();
10205 ex->arrays[ORC_VAR_D1] = d1;
10206 ex->arrays[ORC_VAR_S1] = (void *)s1;
10207 ex->params[ORC_VAR_P1] = p1;
10208 ex->params[ORC_VAR_P2] = p2;
10210 func = p->code_exec;
10216 /* orc_dequantise_var_s16_ip */
10219 orc_dequantise_var_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int n){
10221 orc_union16 * ORC_RESTRICT ptr0;
10222 const orc_union16 * ORC_RESTRICT ptr4;
10223 const orc_union16 * ORC_RESTRICT ptr5;
10235 ptr0 = (orc_union16 *)d1;
10236 ptr4 = (orc_union16 *)s1;
10237 ptr5 = (orc_union16 *)s2;
10240 for (i = 0; i < n; i++) {
10246 var39.i = ORC_CLAMP(var38.i,-1,1);
10248 var40.i = ORC_ABS(var38.i);
10252 var41.i = (var40.i * var35.i) & 0xffff;
10256 var42.i = var41.i + var36.i;
10258 var43.i = var42.i >> 2;
10260 var37.i = (var43.i * var39.i) & 0xffff;
10269 _backup_orc_dequantise_var_s16_ip (OrcExecutor * ORC_RESTRICT ex)
10273 orc_union16 * ORC_RESTRICT ptr0;
10274 const orc_union16 * ORC_RESTRICT ptr4;
10275 const orc_union16 * ORC_RESTRICT ptr5;
10287 ptr0 = (orc_union16 *)ex->arrays[0];
10288 ptr4 = (orc_union16 *)ex->arrays[4];
10289 ptr5 = (orc_union16 *)ex->arrays[5];
10292 for (i = 0; i < n; i++) {
10298 var39.i = ORC_CLAMP(var38.i,-1,1);
10300 var40.i = ORC_ABS(var38.i);
10304 var41.i = (var40.i * var35.i) & 0xffff;
10308 var42.i = var41.i + var36.i;
10310 var43.i = var42.i >> 2;
10312 var37.i = (var43.i * var39.i) & 0xffff;
10320 orc_dequantise_var_s16_ip (int16_t * d1, const int16_t * s1, const int16_t * s2, int n)
10322 OrcExecutor _ex, *ex = &_ex;
10323 static int p_inited = 0;
10324 static OrcProgram *p = 0;
10325 void (*func) (OrcExecutor *);
10328 orc_once_mutex_lock ();
10330 OrcCompileResult result;
10332 p = orc_program_new ();
10333 orc_program_set_name (p, "orc_dequantise_var_s16_ip");
10334 orc_program_set_backup_function (p, _backup_orc_dequantise_var_s16_ip);
10335 orc_program_add_destination (p, 2, "d1");
10336 orc_program_add_source (p, 2, "s1");
10337 orc_program_add_source (p, 2, "s2");
10338 orc_program_add_constant (p, 4, 0x00000002, "c1");
10339 orc_program_add_temporary (p, 2, "t1");
10340 orc_program_add_temporary (p, 2, "t2");
10342 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
10343 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10344 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10345 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1);
10346 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1);
10347 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10348 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10350 result = orc_program_compile (p);
10353 orc_once_mutex_unlock ();
10358 ex->arrays[ORC_VAR_D1] = d1;
10359 ex->arrays[ORC_VAR_S1] = (void *)s1;
10360 ex->arrays[ORC_VAR_S2] = (void *)s2;
10362 func = p->code_exec;
10368 /* orc_quantise1_s16 */
10371 orc_quantise1_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int p3, int n){
10373 orc_union16 * ORC_RESTRICT ptr0;
10374 const orc_union16 * ORC_RESTRICT ptr4;
10387 ptr0 = (orc_union16 *)d1;
10388 ptr4 = (orc_union16 *)s1;
10395 for (i = 0; i < n; i++) {
10401 var39.i = ORC_CLAMP(var38.i,-1,1);
10403 var40.i = ORC_ABS(var38.i);
10405 var41.i = var40.i << 2;
10407 var42.i = var41.i - var35.i;
10409 var43.i = ((orc_uint32)((orc_uint16)var42.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
10411 var44.i = ((orc_uint16)var43.i) >> p3;
10413 var37.i = (var44.i * var39.i) & 0xffff;
10422 _backup_orc_quantise1_s16 (OrcExecutor * ORC_RESTRICT ex)
10426 orc_union16 * ORC_RESTRICT ptr0;
10427 const orc_union16 * ORC_RESTRICT ptr4;
10440 ptr0 = (orc_union16 *)ex->arrays[0];
10441 ptr4 = (orc_union16 *)ex->arrays[4];
10444 var35.i = ex->params[25];
10446 var36.i = ex->params[24];
10448 for (i = 0; i < n; i++) {
10454 var39.i = ORC_CLAMP(var38.i,-1,1);
10456 var40.i = ORC_ABS(var38.i);
10458 var41.i = var40.i << 2;
10460 var42.i = var41.i - var35.i;
10462 var43.i = ((orc_uint32)((orc_uint16)var42.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
10464 var44.i = ((orc_uint16)var43.i) >> ex->params[26];
10466 var37.i = (var44.i * var39.i) & 0xffff;
10474 orc_quantise1_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int p3, int n)
10476 OrcExecutor _ex, *ex = &_ex;
10477 static int p_inited = 0;
10478 static OrcProgram *p = 0;
10479 void (*func) (OrcExecutor *);
10482 orc_once_mutex_lock ();
10484 OrcCompileResult result;
10486 p = orc_program_new ();
10487 orc_program_set_name (p, "orc_quantise1_s16");
10488 orc_program_set_backup_function (p, _backup_orc_quantise1_s16);
10489 orc_program_add_destination (p, 2, "d1");
10490 orc_program_add_source (p, 2, "s1");
10491 orc_program_add_constant (p, 4, 0x00000002, "c1");
10492 orc_program_add_parameter (p, 2, "p1");
10493 orc_program_add_parameter (p, 2, "p2");
10494 orc_program_add_parameter (p, 2, "p3");
10495 orc_program_add_temporary (p, 2, "t1");
10496 orc_program_add_temporary (p, 2, "t2");
10498 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
10499 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10500 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10501 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10502 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
10503 orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
10504 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
10505 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10507 result = orc_program_compile (p);
10510 orc_once_mutex_unlock ();
10515 ex->arrays[ORC_VAR_D1] = d1;
10516 ex->arrays[ORC_VAR_S1] = (void *)s1;
10517 ex->params[ORC_VAR_P1] = p1;
10518 ex->params[ORC_VAR_P2] = p2;
10519 ex->params[ORC_VAR_P3] = p3;
10521 func = p->code_exec;
10527 /* orc_quantise2_s16 */
10530 orc_quantise2_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int n){
10532 orc_union16 * ORC_RESTRICT ptr0;
10533 const orc_union16 * ORC_RESTRICT ptr4;
10544 ptr0 = (orc_union16 *)d1;
10545 ptr4 = (orc_union16 *)s1;
10550 for (i = 0; i < n; i++) {
10556 var38.i = ORC_CLAMP(var37.i,-1,1);
10558 var39.i = ORC_ABS(var37.i);
10560 var40.i = var39.i << 2;
10562 var41.i = var40.i - var35.i;
10564 var42.i = ((orc_uint16)var41.i) >> p1;
10566 var36.i = (var42.i * var38.i) & 0xffff;
10575 _backup_orc_quantise2_s16 (OrcExecutor * ORC_RESTRICT ex)
10579 orc_union16 * ORC_RESTRICT ptr0;
10580 const orc_union16 * ORC_RESTRICT ptr4;
10591 ptr0 = (orc_union16 *)ex->arrays[0];
10592 ptr4 = (orc_union16 *)ex->arrays[4];
10595 var35.i = ex->params[25];
10597 for (i = 0; i < n; i++) {
10603 var38.i = ORC_CLAMP(var37.i,-1,1);
10605 var39.i = ORC_ABS(var37.i);
10607 var40.i = var39.i << 2;
10609 var41.i = var40.i - var35.i;
10611 var42.i = ((orc_uint16)var41.i) >> ex->params[24];
10613 var36.i = (var42.i * var38.i) & 0xffff;
10621 orc_quantise2_s16 (int16_t * d1, const int16_t * s1, int p1, int p2, int n)
10623 OrcExecutor _ex, *ex = &_ex;
10624 static int p_inited = 0;
10625 static OrcProgram *p = 0;
10626 void (*func) (OrcExecutor *);
10629 orc_once_mutex_lock ();
10631 OrcCompileResult result;
10633 p = orc_program_new ();
10634 orc_program_set_name (p, "orc_quantise2_s16");
10635 orc_program_set_backup_function (p, _backup_orc_quantise2_s16);
10636 orc_program_add_destination (p, 2, "d1");
10637 orc_program_add_source (p, 2, "s1");
10638 orc_program_add_constant (p, 4, 0x00000002, "c1");
10639 orc_program_add_parameter (p, 2, "p1");
10640 orc_program_add_parameter (p, 2, "p2");
10641 orc_program_add_temporary (p, 2, "t1");
10642 orc_program_add_temporary (p, 2, "t2");
10644 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
10645 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10646 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10647 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10648 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
10649 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
10650 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10652 result = orc_program_compile (p);
10655 orc_once_mutex_unlock ();
10660 ex->arrays[ORC_VAR_D1] = d1;
10661 ex->arrays[ORC_VAR_S1] = (void *)s1;
10662 ex->params[ORC_VAR_P1] = p1;
10663 ex->params[ORC_VAR_P2] = p2;
10665 func = p->code_exec;
10671 /* orc_quantdequant1_s16 */
10674 orc_quantdequant1_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int p5, int n){
10676 orc_union16 * ORC_RESTRICT ptr0;
10677 orc_union16 * ORC_RESTRICT ptr1;
10698 ptr0 = (orc_union16 *)d1;
10699 ptr1 = (orc_union16 *)d2;
10710 for (i = 0; i < n; i++) {
10716 var42.i = ORC_CLAMP(var41.i,-1,1);
10718 var43.i = ORC_ABS(var41.i);
10720 var44.i = var43.i << 2;
10722 var45.i = var44.i - var35.i;
10724 var46.i = ((orc_uint32)((orc_uint16)var45.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
10726 var47.i = ((orc_uint16)var46.i) >> p3;
10728 var48.i = (var47.i * var42.i) & 0xffff;
10734 var49.i = ORC_CLAMP(var48.i,-1,1);
10736 var50.i = (var47.i * var38.i) & 0xffff;
10738 var51.i = var50.i + var39.i;
10740 var52.i = var51.i >> 2;
10742 var40.i = (var52.i * var49.i) & 0xffff;
10751 _backup_orc_quantdequant1_s16 (OrcExecutor * ORC_RESTRICT ex)
10755 orc_union16 * ORC_RESTRICT ptr0;
10756 orc_union16 * ORC_RESTRICT ptr1;
10777 ptr0 = (orc_union16 *)ex->arrays[0];
10778 ptr1 = (orc_union16 *)ex->arrays[1];
10781 var35.i = ex->params[25];
10783 var36.i = ex->params[24];
10785 var38.i = ex->params[27];
10787 var39.i = ex->params[28];
10789 for (i = 0; i < n; i++) {
10795 var42.i = ORC_CLAMP(var41.i,-1,1);
10797 var43.i = ORC_ABS(var41.i);
10799 var44.i = var43.i << 2;
10801 var45.i = var44.i - var35.i;
10803 var46.i = ((orc_uint32)((orc_uint16)var45.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
10805 var47.i = ((orc_uint16)var46.i) >> ex->params[26];
10807 var48.i = (var47.i * var42.i) & 0xffff;
10813 var49.i = ORC_CLAMP(var48.i,-1,1);
10815 var50.i = (var47.i * var38.i) & 0xffff;
10817 var51.i = var50.i + var39.i;
10819 var52.i = var51.i >> 2;
10821 var40.i = (var52.i * var49.i) & 0xffff;
10829 orc_quantdequant1_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int p5, int n)
10831 OrcExecutor _ex, *ex = &_ex;
10832 static int p_inited = 0;
10833 static OrcProgram *p = 0;
10834 void (*func) (OrcExecutor *);
10837 orc_once_mutex_lock ();
10839 OrcCompileResult result;
10841 p = orc_program_new ();
10842 orc_program_set_name (p, "orc_quantdequant1_s16");
10843 orc_program_set_backup_function (p, _backup_orc_quantdequant1_s16);
10844 orc_program_add_destination (p, 2, "d1");
10845 orc_program_add_destination (p, 2, "d2");
10846 orc_program_add_constant (p, 4, 0x00000002, "c1");
10847 orc_program_add_parameter (p, 2, "p1");
10848 orc_program_add_parameter (p, 2, "p2");
10849 orc_program_add_parameter (p, 2, "p3");
10850 orc_program_add_parameter (p, 2, "p4");
10851 orc_program_add_parameter (p, 2, "p5");
10852 orc_program_add_temporary (p, 2, "t1");
10853 orc_program_add_temporary (p, 2, "t2");
10855 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
10856 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10857 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
10858 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10859 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
10860 orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
10861 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
10862 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10863 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
10864 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
10865 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
10866 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P5, ORC_VAR_D1);
10867 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
10868 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
10870 result = orc_program_compile (p);
10873 orc_once_mutex_unlock ();
10878 ex->arrays[ORC_VAR_D1] = d1;
10879 ex->arrays[ORC_VAR_D2] = d2;
10880 ex->params[ORC_VAR_P1] = p1;
10881 ex->params[ORC_VAR_P2] = p2;
10882 ex->params[ORC_VAR_P3] = p3;
10883 ex->params[ORC_VAR_P4] = p4;
10884 ex->params[ORC_VAR_P5] = p5;
10886 func = p->code_exec;
10892 /* orc_quantdequant3_s16 */
10895 orc_quantdequant3_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int p5, int p6, int n){
10897 orc_union16 * ORC_RESTRICT ptr0;
10898 orc_union16 * ORC_RESTRICT ptr1;
10922 ptr0 = (orc_union16 *)d1;
10923 ptr1 = (orc_union16 *)d2;
10936 for (i = 0; i < n; i++) {
10942 var44.i = ORC_CLAMP(var43.i,-1,1);
10944 var45.i = ORC_ABS(var43.i);
10946 var46.i = var45.i << 2;
10948 var47.i = var46.i - var36.i;
10950 var48.i = ((orc_uint16)var47.i) * ((orc_uint16)var37.i);
10952 var49.i = var48.i + var38.i;
10954 var50.i = ((orc_uint32)var49.i) >> p3;
10958 var52.i = (var51.i * var44.i) & 0xffff;
10964 var53.i = ORC_CLAMP(var52.i,-1,1);
10966 var54.i = (var51.i * var40.i) & 0xffff;
10968 var55.i = var54.i + var41.i;
10970 var56.i = var55.i >> 2;
10972 var42.i = (var56.i * var53.i) & 0xffff;
10981 _backup_orc_quantdequant3_s16 (OrcExecutor * ORC_RESTRICT ex)
10985 orc_union16 * ORC_RESTRICT ptr0;
10986 orc_union16 * ORC_RESTRICT ptr1;
11010 ptr0 = (orc_union16 *)ex->arrays[0];
11011 ptr1 = (orc_union16 *)ex->arrays[1];
11014 var36.i = ex->params[25];
11016 var37.i = ex->params[24];
11018 var38.i = ex->params[29];
11020 var40.i = ex->params[27];
11022 var41.i = ex->params[28];
11024 for (i = 0; i < n; i++) {
11030 var44.i = ORC_CLAMP(var43.i,-1,1);
11032 var45.i = ORC_ABS(var43.i);
11034 var46.i = var45.i << 2;
11036 var47.i = var46.i - var36.i;
11038 var48.i = ((orc_uint16)var47.i) * ((orc_uint16)var37.i);
11040 var49.i = var48.i + var38.i;
11042 var50.i = ((orc_uint32)var49.i) >> ex->params[26];
11046 var52.i = (var51.i * var44.i) & 0xffff;
11052 var53.i = ORC_CLAMP(var52.i,-1,1);
11054 var54.i = (var51.i * var40.i) & 0xffff;
11056 var55.i = var54.i + var41.i;
11058 var56.i = var55.i >> 2;
11060 var42.i = (var56.i * var53.i) & 0xffff;
11068 orc_quantdequant3_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int p5, int p6, int n)
11070 OrcExecutor _ex, *ex = &_ex;
11071 static int p_inited = 0;
11072 static OrcProgram *p = 0;
11073 void (*func) (OrcExecutor *);
11076 orc_once_mutex_lock ();
11078 OrcCompileResult result;
11080 p = orc_program_new ();
11081 orc_program_set_name (p, "orc_quantdequant3_s16");
11082 orc_program_set_backup_function (p, _backup_orc_quantdequant3_s16);
11083 orc_program_add_destination (p, 2, "d1");
11084 orc_program_add_destination (p, 2, "d2");
11085 orc_program_add_constant (p, 4, 0x00000002, "c1");
11086 orc_program_add_parameter (p, 2, "p1");
11087 orc_program_add_parameter (p, 2, "p2");
11088 orc_program_add_parameter (p, 2, "p3");
11089 orc_program_add_parameter (p, 2, "p4");
11090 orc_program_add_parameter (p, 2, "p5");
11091 orc_program_add_parameter (p, 4, "p6");
11092 orc_program_add_temporary (p, 2, "t1");
11093 orc_program_add_temporary (p, 2, "t2");
11094 orc_program_add_temporary (p, 4, "t3");
11096 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
11097 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11098 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11099 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
11100 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
11101 orc_program_append_2 (p, "muluwl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
11102 orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P6, ORC_VAR_D1);
11103 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P3, ORC_VAR_D1);
11104 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
11105 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
11106 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11107 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11108 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
11109 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P5, ORC_VAR_D1);
11110 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
11111 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
11113 result = orc_program_compile (p);
11116 orc_once_mutex_unlock ();
11121 ex->arrays[ORC_VAR_D1] = d1;
11122 ex->arrays[ORC_VAR_D2] = d2;
11123 ex->params[ORC_VAR_P1] = p1;
11124 ex->params[ORC_VAR_P2] = p2;
11125 ex->params[ORC_VAR_P3] = p3;
11126 ex->params[ORC_VAR_P4] = p4;
11127 ex->params[ORC_VAR_P5] = p5;
11128 ex->params[ORC_VAR_P6] = p6;
11130 func = p->code_exec;
11136 /* orc_quantdequant2_s16 */
11139 orc_quantdequant2_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int n){
11141 orc_union16 * ORC_RESTRICT ptr0;
11142 orc_union16 * ORC_RESTRICT ptr1;
11161 ptr0 = (orc_union16 *)d1;
11162 ptr1 = (orc_union16 *)d2;
11171 for (i = 0; i < n; i++) {
11177 var41.i = ORC_CLAMP(var40.i,-1,1);
11179 var42.i = ORC_ABS(var40.i);
11181 var43.i = var42.i << 2;
11183 var44.i = var43.i - var35.i;
11185 var45.i = ((orc_uint16)var44.i) >> p1;
11187 var46.i = (var45.i * var41.i) & 0xffff;
11193 var47.i = ORC_CLAMP(var46.i,-1,1);
11195 var48.i = (var45.i * var37.i) & 0xffff;
11197 var49.i = var48.i + var38.i;
11199 var50.i = var49.i >> 2;
11201 var39.i = (var50.i * var47.i) & 0xffff;
11210 _backup_orc_quantdequant2_s16 (OrcExecutor * ORC_RESTRICT ex)
11214 orc_union16 * ORC_RESTRICT ptr0;
11215 orc_union16 * ORC_RESTRICT ptr1;
11234 ptr0 = (orc_union16 *)ex->arrays[0];
11235 ptr1 = (orc_union16 *)ex->arrays[1];
11238 var35.i = ex->params[25];
11240 var37.i = ex->params[26];
11242 var38.i = ex->params[27];
11244 for (i = 0; i < n; i++) {
11250 var41.i = ORC_CLAMP(var40.i,-1,1);
11252 var42.i = ORC_ABS(var40.i);
11254 var43.i = var42.i << 2;
11256 var44.i = var43.i - var35.i;
11258 var45.i = ((orc_uint16)var44.i) >> ex->params[24];
11260 var46.i = (var45.i * var41.i) & 0xffff;
11266 var47.i = ORC_CLAMP(var46.i,-1,1);
11268 var48.i = (var45.i * var37.i) & 0xffff;
11270 var49.i = var48.i + var38.i;
11272 var50.i = var49.i >> 2;
11274 var39.i = (var50.i * var47.i) & 0xffff;
11282 orc_quantdequant2_s16 (int16_t * d1, int16_t * d2, int p1, int p2, int p3, int p4, int n)
11284 OrcExecutor _ex, *ex = &_ex;
11285 static int p_inited = 0;
11286 static OrcProgram *p = 0;
11287 void (*func) (OrcExecutor *);
11290 orc_once_mutex_lock ();
11292 OrcCompileResult result;
11294 p = orc_program_new ();
11295 orc_program_set_name (p, "orc_quantdequant2_s16");
11296 orc_program_set_backup_function (p, _backup_orc_quantdequant2_s16);
11297 orc_program_add_destination (p, 2, "d1");
11298 orc_program_add_destination (p, 2, "d2");
11299 orc_program_add_constant (p, 4, 0x00000002, "c1");
11300 orc_program_add_parameter (p, 2, "p1");
11301 orc_program_add_parameter (p, 2, "p2");
11302 orc_program_add_parameter (p, 2, "p3");
11303 orc_program_add_parameter (p, 2, "p4");
11304 orc_program_add_temporary (p, 2, "t1");
11305 orc_program_add_temporary (p, 2, "t2");
11307 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
11308 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11309 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11310 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
11311 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
11312 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
11313 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
11314 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11315 orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11316 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
11317 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
11318 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
11319 orc_program_append_2 (p, "mullw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
11321 result = orc_program_compile (p);
11324 orc_once_mutex_unlock ();
11329 ex->arrays[ORC_VAR_D1] = d1;
11330 ex->arrays[ORC_VAR_D2] = d2;
11331 ex->params[ORC_VAR_P1] = p1;
11332 ex->params[ORC_VAR_P2] = p2;
11333 ex->params[ORC_VAR_P3] = p3;
11334 ex->params[ORC_VAR_P4] = p4;
11336 func = p->code_exec;
11342 /* orc_downsample_vert_u8 */
11345 orc_downsample_vert_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int n){
11347 orc_int8 * ORC_RESTRICT ptr0;
11348 const orc_int8 * ORC_RESTRICT ptr4;
11349 const orc_int8 * ORC_RESTRICT ptr5;
11350 const orc_int8 * ORC_RESTRICT ptr6;
11351 const orc_int8 * ORC_RESTRICT ptr7;
11372 ptr0 = (orc_int8 *)d1;
11373 ptr4 = (orc_int8 *)s1;
11374 ptr5 = (orc_int8 *)s2;
11375 ptr6 = (orc_int8 *)s3;
11376 ptr7 = (orc_int8 *)s4;
11379 var37.i = 0x00000006; /* 6 or 2.96439e-323f */
11381 var40.i = 0x0000001a; /* 26 or 1.28457e-322f */
11383 var41.i = 0x00000020; /* 32 or 1.58101e-322f */
11385 for (i = 0; i < n; i++) {
11389 var43.i = (orc_uint8)var35;
11393 var44.i = (orc_uint8)var36;
11395 var45.i = var43.i + var44.i;
11397 var46.i = (var45.i * var37.i) & 0xffff;
11401 var47.i = (orc_uint8)var38;
11405 var48.i = (orc_uint8)var39;
11407 var49.i = var47.i + var48.i;
11409 var50.i = (var49.i * var40.i) & 0xffff;
11411 var51.i = var50.i + var46.i;
11413 var52.i = var51.i + var41.i;
11415 var53.i = ((orc_uint16)var52.i) >> 6;
11426 _backup_orc_downsample_vert_u8 (OrcExecutor * ORC_RESTRICT ex)
11430 orc_int8 * ORC_RESTRICT ptr0;
11431 const orc_int8 * ORC_RESTRICT ptr4;
11432 const orc_int8 * ORC_RESTRICT ptr5;
11433 const orc_int8 * ORC_RESTRICT ptr6;
11434 const orc_int8 * ORC_RESTRICT ptr7;
11455 ptr0 = (orc_int8 *)ex->arrays[0];
11456 ptr4 = (orc_int8 *)ex->arrays[4];
11457 ptr5 = (orc_int8 *)ex->arrays[5];
11458 ptr6 = (orc_int8 *)ex->arrays[6];
11459 ptr7 = (orc_int8 *)ex->arrays[7];
11462 var37.i = 0x00000006; /* 6 or 2.96439e-323f */
11464 var40.i = 0x0000001a; /* 26 or 1.28457e-322f */
11466 var41.i = 0x00000020; /* 32 or 1.58101e-322f */
11468 for (i = 0; i < n; i++) {
11472 var43.i = (orc_uint8)var35;
11476 var44.i = (orc_uint8)var36;
11478 var45.i = var43.i + var44.i;
11480 var46.i = (var45.i * var37.i) & 0xffff;
11484 var47.i = (orc_uint8)var38;
11488 var48.i = (orc_uint8)var39;
11490 var49.i = var47.i + var48.i;
11492 var50.i = (var49.i * var40.i) & 0xffff;
11494 var51.i = var50.i + var46.i;
11496 var52.i = var51.i + var41.i;
11498 var53.i = ((orc_uint16)var52.i) >> 6;
11508 orc_downsample_vert_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int n)
11510 OrcExecutor _ex, *ex = &_ex;
11511 static int p_inited = 0;
11512 static OrcProgram *p = 0;
11513 void (*func) (OrcExecutor *);
11516 orc_once_mutex_lock ();
11518 OrcCompileResult result;
11520 p = orc_program_new ();
11521 orc_program_set_name (p, "orc_downsample_vert_u8");
11522 orc_program_set_backup_function (p, _backup_orc_downsample_vert_u8);
11523 orc_program_add_destination (p, 1, "d1");
11524 orc_program_add_source (p, 1, "s1");
11525 orc_program_add_source (p, 1, "s2");
11526 orc_program_add_source (p, 1, "s3");
11527 orc_program_add_source (p, 1, "s4");
11528 orc_program_add_constant (p, 4, 0x00000006, "c1");
11529 orc_program_add_constant (p, 4, 0x0000001a, "c2");
11530 orc_program_add_constant (p, 4, 0x00000020, "c3");
11531 orc_program_add_temporary (p, 2, "t1");
11532 orc_program_add_temporary (p, 2, "t2");
11533 orc_program_add_temporary (p, 2, "t3");
11535 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
11536 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
11537 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
11538 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
11539 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
11540 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
11541 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1);
11542 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
11543 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
11544 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C3, ORC_VAR_D1);
11545 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
11546 orc_program_append_2 (p, "convwb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11548 result = orc_program_compile (p);
11551 orc_once_mutex_unlock ();
11556 ex->arrays[ORC_VAR_D1] = d1;
11557 ex->arrays[ORC_VAR_S1] = (void *)s1;
11558 ex->arrays[ORC_VAR_S2] = (void *)s2;
11559 ex->arrays[ORC_VAR_S3] = (void *)s3;
11560 ex->arrays[ORC_VAR_S4] = (void *)s4;
11562 func = p->code_exec;
11568 /* orc_downsample_horiz_u8 */
11571 orc_downsample_horiz_u8 (orc_uint8 * d1, const uint8_t * s1, const uint8_t * s2, int n){
11573 orc_int8 * ORC_RESTRICT ptr0;
11574 const orc_union16 * ORC_RESTRICT ptr4;
11575 const orc_union16 * ORC_RESTRICT ptr5;
11600 ptr0 = (orc_int8 *)d1;
11601 ptr4 = (orc_union16 *)s1;
11602 ptr5 = (orc_union16 *)s2;
11605 var40.i = 0x00000006; /* 6 or 2.96439e-323f */
11607 var41.i = 0x0000001a; /* 26 or 1.28457e-322f */
11609 var42.i = 0x00000020; /* 32 or 1.58101e-322f */
11611 for (i = 0; i < n; i++) {
11621 var46 = (orc_uint16)var44.i & 0xff;
11623 var47.i = (orc_uint8)var46;
11625 var48 = ((orc_uint16)var45.i >> 8)&0xff;
11627 var49.i = (orc_uint8)var48;
11629 var50.i = var47.i + var49.i;
11631 var51.i = (var50.i * var40.i) & 0xffff;
11632 /* 11: select1wb */
11633 var52 = ((orc_uint16)var44.i >> 8)&0xff;
11635 var53.i = (orc_uint8)var52;
11636 /* 13: select0wb */
11637 var54 = (orc_uint16)var45.i & 0xff;
11639 var55.i = (orc_uint8)var54;
11641 var56.i = var53.i + var55.i;
11643 var57.i = (var56.i * var41.i) & 0xffff;
11645 var58.i = var51.i + var57.i;
11647 var59.i = var58.i + var42.i;
11649 var60.i = ((orc_uint16)var59.i) >> 6;
11660 _backup_orc_downsample_horiz_u8 (OrcExecutor * ORC_RESTRICT ex)
11664 orc_int8 * ORC_RESTRICT ptr0;
11665 const orc_union16 * ORC_RESTRICT ptr4;
11666 const orc_union16 * ORC_RESTRICT ptr5;
11691 ptr0 = (orc_int8 *)ex->arrays[0];
11692 ptr4 = (orc_union16 *)ex->arrays[4];
11693 ptr5 = (orc_union16 *)ex->arrays[5];
11696 var40.i = 0x00000006; /* 6 or 2.96439e-323f */
11698 var41.i = 0x0000001a; /* 26 or 1.28457e-322f */
11700 var42.i = 0x00000020; /* 32 or 1.58101e-322f */
11702 for (i = 0; i < n; i++) {
11712 var46 = (orc_uint16)var44.i & 0xff;
11714 var47.i = (orc_uint8)var46;
11716 var48 = ((orc_uint16)var45.i >> 8)&0xff;
11718 var49.i = (orc_uint8)var48;
11720 var50.i = var47.i + var49.i;
11722 var51.i = (var50.i * var40.i) & 0xffff;
11723 /* 11: select1wb */
11724 var52 = ((orc_uint16)var44.i >> 8)&0xff;
11726 var53.i = (orc_uint8)var52;
11727 /* 13: select0wb */
11728 var54 = (orc_uint16)var45.i & 0xff;
11730 var55.i = (orc_uint8)var54;
11732 var56.i = var53.i + var55.i;
11734 var57.i = (var56.i * var41.i) & 0xffff;
11736 var58.i = var51.i + var57.i;
11738 var59.i = var58.i + var42.i;
11740 var60.i = ((orc_uint16)var59.i) >> 6;
11750 orc_downsample_horiz_u8 (orc_uint8 * d1, const uint8_t * s1, const uint8_t * s2, int n)
11752 OrcExecutor _ex, *ex = &_ex;
11753 static int p_inited = 0;
11754 static OrcProgram *p = 0;
11755 void (*func) (OrcExecutor *);
11758 orc_once_mutex_lock ();
11760 OrcCompileResult result;
11762 p = orc_program_new ();
11763 orc_program_set_name (p, "orc_downsample_horiz_u8");
11764 orc_program_set_backup_function (p, _backup_orc_downsample_horiz_u8);
11765 orc_program_add_destination (p, 1, "d1");
11766 orc_program_add_source (p, 2, "s1");
11767 orc_program_add_source (p, 2, "s2");
11768 orc_program_add_constant (p, 4, 0x00000006, "c1");
11769 orc_program_add_constant (p, 4, 0x0000001a, "c2");
11770 orc_program_add_constant (p, 4, 0x00000020, "c3");
11771 orc_program_add_temporary (p, 2, "t1");
11772 orc_program_add_temporary (p, 2, "t2");
11773 orc_program_add_temporary (p, 1, "t3");
11774 orc_program_add_temporary (p, 2, "t4");
11775 orc_program_add_temporary (p, 2, "t5");
11776 orc_program_add_temporary (p, 2, "t6");
11778 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
11779 orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
11780 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11781 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
11782 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11783 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
11784 orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
11785 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C1, ORC_VAR_D1);
11786 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11787 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
11788 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11789 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T6, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
11790 orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6, ORC_VAR_D1);
11791 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C2, ORC_VAR_D1);
11792 orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
11793 orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C3, ORC_VAR_D1);
11794 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C1, ORC_VAR_D1);
11795 orc_program_append_2 (p, "convwb", 0, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
11797 result = orc_program_compile (p);
11800 orc_once_mutex_unlock ();
11805 ex->arrays[ORC_VAR_D1] = d1;
11806 ex->arrays[ORC_VAR_S1] = (void *)s1;
11807 ex->arrays[ORC_VAR_S2] = (void *)s2;
11809 func = p->code_exec;
11815 /* orc_stats_moment_s16 */
11818 orc_stats_moment_s16 (int32_t * a1, const int16_t * s1, int n){
11820 const orc_union16 * ORC_RESTRICT ptr4;
11821 orc_union32 var12 = { 0 };
11830 ptr4 = (orc_union16 *)s1;
11833 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
11835 var36.i = 0x00000000; /* 0 or 0f */
11837 for (i = 0; i < n; i++) {
11841 var37.i = ORC_ABS(var34.i);
11843 var38.i = var37.i - var35.i;
11845 var39.i = ORC_MAX(var38.i, var36.i);
11847 var40.i = (orc_uint16)var39.i;
11849 var12.i = var12.i + var40.i;
11857 _backup_orc_stats_moment_s16 (OrcExecutor * ORC_RESTRICT ex)
11861 const orc_union16 * ORC_RESTRICT ptr4;
11862 orc_union32 var12 = { 0 };
11871 ptr4 = (orc_union16 *)ex->arrays[4];
11874 var35.i = 0x00000002; /* 2 or 9.88131e-324f */
11876 var36.i = 0x00000000; /* 0 or 0f */
11878 for (i = 0; i < n; i++) {
11882 var37.i = ORC_ABS(var34.i);
11884 var38.i = var37.i - var35.i;
11886 var39.i = ORC_MAX(var38.i, var36.i);
11888 var40.i = (orc_uint16)var39.i;
11890 var12.i = var12.i + var40.i;
11892 ex->accumulators[0] = var12.i;
11897 orc_stats_moment_s16 (int32_t * a1, const int16_t * s1, int n)
11899 OrcExecutor _ex, *ex = &_ex;
11900 static int p_inited = 0;
11901 static OrcProgram *p = 0;
11902 void (*func) (OrcExecutor *);
11905 orc_once_mutex_lock ();
11907 OrcCompileResult result;
11909 p = orc_program_new ();
11910 orc_program_set_name (p, "orc_stats_moment_s16");
11911 orc_program_set_backup_function (p, _backup_orc_stats_moment_s16);
11912 orc_program_add_source (p, 2, "s1");
11913 orc_program_add_accumulator (p, 4, "a1");
11914 orc_program_add_constant (p, 4, 0x00000002, "c1");
11915 orc_program_add_constant (p, 4, 0x00000000, "c2");
11916 orc_program_add_temporary (p, 2, "t1");
11917 orc_program_add_temporary (p, 4, "t2");
11919 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
11920 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
11921 orc_program_append_2 (p, "maxsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
11922 orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
11923 orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
11925 result = orc_program_compile (p);
11928 orc_once_mutex_unlock ();
11933 ex->arrays[ORC_VAR_S1] = (void *)s1;
11935 func = p->code_exec;
11937 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
11942 /* orc_stats_above_s16 */
11945 orc_stats_above_s16 (int32_t * a1, const int16_t * s1, int n){
11947 const orc_union16 * ORC_RESTRICT ptr4;
11948 orc_union32 var12 = { 0 };
11959 ptr4 = (orc_union16 *)s1;
11962 var35.i = 0x00000001; /* 1 or 4.94066e-324f */
11964 var36.i = 0x00000000; /* 0 or 0f */
11966 var37.i = 0x00000001; /* 1 or 4.94066e-324f */
11968 for (i = 0; i < n; i++) {
11972 var38.i = ORC_ABS(var34.i);
11974 var39.i = var38.i - var35.i;
11976 var40.i = ORC_MAX(var39.i, var36.i);
11978 var41.i = ORC_MIN(var40.i, var37.i);
11980 var42.i = (orc_uint16)var41.i;
11982 var12.i = var12.i + var42.i;
11990 _backup_orc_stats_above_s16 (OrcExecutor * ORC_RESTRICT ex)
11994 const orc_union16 * ORC_RESTRICT ptr4;
11995 orc_union32 var12 = { 0 };
12006 ptr4 = (orc_union16 *)ex->arrays[4];
12009 var35.i = 0x00000001; /* 1 or 4.94066e-324f */
12011 var36.i = 0x00000000; /* 0 or 0f */
12013 var37.i = 0x00000001; /* 1 or 4.94066e-324f */
12015 for (i = 0; i < n; i++) {
12019 var38.i = ORC_ABS(var34.i);
12021 var39.i = var38.i - var35.i;
12023 var40.i = ORC_MAX(var39.i, var36.i);
12025 var41.i = ORC_MIN(var40.i, var37.i);
12027 var42.i = (orc_uint16)var41.i;
12029 var12.i = var12.i + var42.i;
12031 ex->accumulators[0] = var12.i;
12036 orc_stats_above_s16 (int32_t * a1, const int16_t * s1, int n)
12038 OrcExecutor _ex, *ex = &_ex;
12039 static int p_inited = 0;
12040 static OrcProgram *p = 0;
12041 void (*func) (OrcExecutor *);
12044 orc_once_mutex_lock ();
12046 OrcCompileResult result;
12048 p = orc_program_new ();
12049 orc_program_set_name (p, "orc_stats_above_s16");
12050 orc_program_set_backup_function (p, _backup_orc_stats_above_s16);
12051 orc_program_add_source (p, 2, "s1");
12052 orc_program_add_accumulator (p, 4, "a1");
12053 orc_program_add_constant (p, 4, 0x00000001, "c1");
12054 orc_program_add_constant (p, 4, 0x00000000, "c2");
12055 orc_program_add_temporary (p, 2, "t1");
12056 orc_program_add_temporary (p, 4, "t2");
12058 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
12059 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
12060 orc_program_append_2 (p, "maxsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
12061 orc_program_append_2 (p, "minsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
12062 orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
12063 orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
12065 result = orc_program_compile (p);
12068 orc_once_mutex_unlock ();
12073 ex->arrays[ORC_VAR_S1] = (void *)s1;
12075 func = p->code_exec;
12077 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
12085 orc_accw (int * a1, const int16_t * s1, int n){
12087 const orc_union16 * ORC_RESTRICT ptr4;
12088 orc_union16 var12 = { 0 };
12092 ptr4 = (orc_union16 *)s1;
12095 for (i = 0; i < n; i++) {
12099 var34.i = ORC_ABS(var33.i);
12101 var12.i = var12.i + var34.i;
12103 *a1 = (var12.i & 0xffff);
12109 _backup_orc_accw (OrcExecutor * ORC_RESTRICT ex)
12113 const orc_union16 * ORC_RESTRICT ptr4;
12114 orc_union16 var12 = { 0 };
12118 ptr4 = (orc_union16 *)ex->arrays[4];
12121 for (i = 0; i < n; i++) {
12125 var34.i = ORC_ABS(var33.i);
12127 var12.i = var12.i + var34.i;
12129 ex->accumulators[0] = (var12.i & 0xffff);
12134 orc_accw (int * a1, const int16_t * s1, int n)
12136 OrcExecutor _ex, *ex = &_ex;
12137 static int p_inited = 0;
12138 static OrcProgram *p = 0;
12139 void (*func) (OrcExecutor *);
12142 orc_once_mutex_lock ();
12144 OrcCompileResult result;
12146 p = orc_program_new ();
12147 orc_program_set_name (p, "orc_accw");
12148 orc_program_set_backup_function (p, _backup_orc_accw);
12149 orc_program_add_source (p, 2, "s1");
12150 orc_program_add_accumulator (p, 2, "a1");
12151 orc_program_add_temporary (p, 2, "t1");
12153 orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
12154 orc_program_append_2 (p, "accw", 0, ORC_VAR_A1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
12156 result = orc_program_compile (p);
12159 orc_once_mutex_unlock ();
12164 ex->arrays[ORC_VAR_S1] = (void *)s1;
12166 func = p->code_exec;
12168 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
12173 /* orc_avg2_8xn_u8 */
12176 orc_avg2_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m){
12180 orc_int8 * ORC_RESTRICT ptr0;
12181 const orc_int8 * ORC_RESTRICT ptr4;
12182 const orc_int8 * ORC_RESTRICT ptr5;
12187 for (j = 0; j < m; j++) {
12188 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
12189 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
12190 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
12193 for (i = 0; i < n; i++) {
12199 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12209 _backup_orc_avg2_8xn_u8 (OrcExecutor * ORC_RESTRICT ex)
12214 int m = ex->params[ORC_VAR_A1];
12215 orc_int8 * ORC_RESTRICT ptr0;
12216 const orc_int8 * ORC_RESTRICT ptr4;
12217 const orc_int8 * ORC_RESTRICT ptr5;
12222 for (j = 0; j < m; j++) {
12223 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
12224 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
12225 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
12228 for (i = 0; i < n; i++) {
12234 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12243 orc_avg2_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m)
12245 OrcExecutor _ex, *ex = &_ex;
12246 static int p_inited = 0;
12247 static OrcProgram *p = 0;
12248 void (*func) (OrcExecutor *);
12251 orc_once_mutex_lock ();
12253 OrcCompileResult result;
12255 p = orc_program_new ();
12256 orc_program_set_constant_n (p, 8);
12257 orc_program_set_2d (p);
12258 orc_program_set_name (p, "orc_avg2_8xn_u8");
12259 orc_program_set_backup_function (p, _backup_orc_avg2_8xn_u8);
12260 orc_program_add_destination (p, 1, "d1");
12261 orc_program_add_source (p, 1, "s1");
12262 orc_program_add_source (p, 1, "s2");
12264 orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
12266 result = orc_program_compile (p);
12269 orc_once_mutex_unlock ();
12274 ORC_EXECUTOR_M(ex) = m;
12275 ex->arrays[ORC_VAR_D1] = d1;
12276 ex->params[ORC_VAR_D1] = d1_stride;
12277 ex->arrays[ORC_VAR_S1] = (void *)s1;
12278 ex->params[ORC_VAR_S1] = s1_stride;
12279 ex->arrays[ORC_VAR_S2] = (void *)s2;
12280 ex->params[ORC_VAR_S2] = s2_stride;
12282 func = p->code_exec;
12288 /* orc_avg2_12xn_u8 */
12291 orc_avg2_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m){
12295 orc_int8 * ORC_RESTRICT ptr0;
12296 const orc_int8 * ORC_RESTRICT ptr4;
12297 const orc_int8 * ORC_RESTRICT ptr5;
12302 for (j = 0; j < m; j++) {
12303 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
12304 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
12305 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
12308 for (i = 0; i < n; i++) {
12314 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12324 _backup_orc_avg2_12xn_u8 (OrcExecutor * ORC_RESTRICT ex)
12329 int m = ex->params[ORC_VAR_A1];
12330 orc_int8 * ORC_RESTRICT ptr0;
12331 const orc_int8 * ORC_RESTRICT ptr4;
12332 const orc_int8 * ORC_RESTRICT ptr5;
12337 for (j = 0; j < m; j++) {
12338 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
12339 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
12340 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
12343 for (i = 0; i < n; i++) {
12349 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12358 orc_avg2_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m)
12360 OrcExecutor _ex, *ex = &_ex;
12361 static int p_inited = 0;
12362 static OrcProgram *p = 0;
12363 void (*func) (OrcExecutor *);
12366 orc_once_mutex_lock ();
12368 OrcCompileResult result;
12370 p = orc_program_new ();
12371 orc_program_set_constant_n (p, 12);
12372 orc_program_set_2d (p);
12373 orc_program_set_name (p, "orc_avg2_12xn_u8");
12374 orc_program_set_backup_function (p, _backup_orc_avg2_12xn_u8);
12375 orc_program_add_destination (p, 1, "d1");
12376 orc_program_add_source (p, 1, "s1");
12377 orc_program_add_source (p, 1, "s2");
12379 orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
12381 result = orc_program_compile (p);
12384 orc_once_mutex_unlock ();
12389 ORC_EXECUTOR_M(ex) = m;
12390 ex->arrays[ORC_VAR_D1] = d1;
12391 ex->params[ORC_VAR_D1] = d1_stride;
12392 ex->arrays[ORC_VAR_S1] = (void *)s1;
12393 ex->params[ORC_VAR_S1] = s1_stride;
12394 ex->arrays[ORC_VAR_S2] = (void *)s2;
12395 ex->params[ORC_VAR_S2] = s2_stride;
12397 func = p->code_exec;
12403 /* orc_avg2_16xn_u8 */
12406 orc_avg2_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m){
12410 orc_int8 * ORC_RESTRICT ptr0;
12411 const orc_int8 * ORC_RESTRICT ptr4;
12412 const orc_int8 * ORC_RESTRICT ptr5;
12417 for (j = 0; j < m; j++) {
12418 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
12419 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
12420 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
12423 for (i = 0; i < n; i++) {
12429 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12439 _backup_orc_avg2_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
12444 int m = ex->params[ORC_VAR_A1];
12445 orc_int8 * ORC_RESTRICT ptr0;
12446 const orc_int8 * ORC_RESTRICT ptr4;
12447 const orc_int8 * ORC_RESTRICT ptr5;
12452 for (j = 0; j < m; j++) {
12453 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
12454 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
12455 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
12458 for (i = 0; i < n; i++) {
12464 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12473 orc_avg2_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m)
12475 OrcExecutor _ex, *ex = &_ex;
12476 static int p_inited = 0;
12477 static OrcProgram *p = 0;
12478 void (*func) (OrcExecutor *);
12481 orc_once_mutex_lock ();
12483 OrcCompileResult result;
12485 p = orc_program_new ();
12486 orc_program_set_constant_n (p, 16);
12487 orc_program_set_2d (p);
12488 orc_program_set_name (p, "orc_avg2_16xn_u8");
12489 orc_program_set_backup_function (p, _backup_orc_avg2_16xn_u8);
12490 orc_program_add_destination (p, 1, "d1");
12491 orc_program_add_source (p, 1, "s1");
12492 orc_program_add_source (p, 1, "s2");
12494 orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
12496 result = orc_program_compile (p);
12499 orc_once_mutex_unlock ();
12504 ORC_EXECUTOR_M(ex) = m;
12505 ex->arrays[ORC_VAR_D1] = d1;
12506 ex->params[ORC_VAR_D1] = d1_stride;
12507 ex->arrays[ORC_VAR_S1] = (void *)s1;
12508 ex->params[ORC_VAR_S1] = s1_stride;
12509 ex->arrays[ORC_VAR_S2] = (void *)s2;
12510 ex->params[ORC_VAR_S2] = s2_stride;
12512 func = p->code_exec;
12518 /* orc_avg2_32xn_u8 */
12521 orc_avg2_32xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m){
12525 orc_int8 * ORC_RESTRICT ptr0;
12526 const orc_int8 * ORC_RESTRICT ptr4;
12527 const orc_int8 * ORC_RESTRICT ptr5;
12532 for (j = 0; j < m; j++) {
12533 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
12534 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
12535 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
12538 for (i = 0; i < n; i++) {
12544 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12554 _backup_orc_avg2_32xn_u8 (OrcExecutor * ORC_RESTRICT ex)
12559 int m = ex->params[ORC_VAR_A1];
12560 orc_int8 * ORC_RESTRICT ptr0;
12561 const orc_int8 * ORC_RESTRICT ptr4;
12562 const orc_int8 * ORC_RESTRICT ptr5;
12567 for (j = 0; j < m; j++) {
12568 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
12569 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
12570 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
12573 for (i = 0; i < n; i++) {
12579 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12588 orc_avg2_32xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m)
12590 OrcExecutor _ex, *ex = &_ex;
12591 static int p_inited = 0;
12592 static OrcProgram *p = 0;
12593 void (*func) (OrcExecutor *);
12596 orc_once_mutex_lock ();
12598 OrcCompileResult result;
12600 p = orc_program_new ();
12601 orc_program_set_constant_n (p, 32);
12602 orc_program_set_2d (p);
12603 orc_program_set_name (p, "orc_avg2_32xn_u8");
12604 orc_program_set_backup_function (p, _backup_orc_avg2_32xn_u8);
12605 orc_program_add_destination (p, 1, "d1");
12606 orc_program_add_source (p, 1, "s1");
12607 orc_program_add_source (p, 1, "s2");
12609 orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
12611 result = orc_program_compile (p);
12614 orc_once_mutex_unlock ();
12619 ORC_EXECUTOR_M(ex) = m;
12620 ex->arrays[ORC_VAR_D1] = d1;
12621 ex->params[ORC_VAR_D1] = d1_stride;
12622 ex->arrays[ORC_VAR_S1] = (void *)s1;
12623 ex->params[ORC_VAR_S1] = s1_stride;
12624 ex->arrays[ORC_VAR_S2] = (void *)s2;
12625 ex->params[ORC_VAR_S2] = s2_stride;
12627 func = p->code_exec;
12633 /* orc_avg2_nxm_u8 */
12636 orc_avg2_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int n, int m){
12639 orc_int8 * ORC_RESTRICT ptr0;
12640 const orc_int8 * ORC_RESTRICT ptr4;
12641 const orc_int8 * ORC_RESTRICT ptr5;
12646 for (j = 0; j < m; j++) {
12647 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
12648 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
12649 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
12652 for (i = 0; i < n; i++) {
12658 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12668 _backup_orc_avg2_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
12673 int m = ex->params[ORC_VAR_A1];
12674 orc_int8 * ORC_RESTRICT ptr0;
12675 const orc_int8 * ORC_RESTRICT ptr4;
12676 const orc_int8 * ORC_RESTRICT ptr5;
12681 for (j = 0; j < m; j++) {
12682 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
12683 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
12684 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
12687 for (i = 0; i < n; i++) {
12693 var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
12702 orc_avg2_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int n, int m)
12704 OrcExecutor _ex, *ex = &_ex;
12705 static int p_inited = 0;
12706 static OrcProgram *p = 0;
12707 void (*func) (OrcExecutor *);
12710 orc_once_mutex_lock ();
12712 OrcCompileResult result;
12714 p = orc_program_new ();
12715 orc_program_set_2d (p);
12716 orc_program_set_name (p, "orc_avg2_nxm_u8");
12717 orc_program_set_backup_function (p, _backup_orc_avg2_nxm_u8);
12718 orc_program_add_destination (p, 1, "d1");
12719 orc_program_add_source (p, 1, "s1");
12720 orc_program_add_source (p, 1, "s2");
12722 orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
12724 result = orc_program_compile (p);
12727 orc_once_mutex_unlock ();
12732 ORC_EXECUTOR_M(ex) = m;
12733 ex->arrays[ORC_VAR_D1] = d1;
12734 ex->params[ORC_VAR_D1] = d1_stride;
12735 ex->arrays[ORC_VAR_S1] = (void *)s1;
12736 ex->params[ORC_VAR_S1] = s1_stride;
12737 ex->arrays[ORC_VAR_S2] = (void *)s2;
12738 ex->params[ORC_VAR_S2] = s2_stride;
12740 func = p->code_exec;
12746 /* orc_combine4_8xn_u8 */
12749 orc_combine4_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m){
12753 orc_int8 * ORC_RESTRICT ptr0;
12754 const orc_int8 * ORC_RESTRICT ptr4;
12755 const orc_int8 * ORC_RESTRICT ptr5;
12756 const orc_int8 * ORC_RESTRICT ptr6;
12757 const orc_int8 * ORC_RESTRICT ptr7;
12781 for (j = 0; j < m; j++) {
12782 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
12783 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
12784 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
12785 ptr6 = ORC_PTR_OFFSET(s3, s3_stride * j);
12786 ptr7 = ORC_PTR_OFFSET(s4, s4_stride * j);
12797 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
12799 for (i = 0; i < n; i++) {
12803 var44.i = (orc_uint8)var34;
12805 var45.i = (var44.i * var35.i) & 0xffff;
12809 var46.i = (orc_uint8)var36;
12811 var47.i = (var46.i * var37.i) & 0xffff;
12813 var48.i = var45.i + var47.i;
12817 var49.i = (orc_uint8)var38;
12819 var50.i = (var49.i * var39.i) & 0xffff;
12821 var51.i = var48.i + var50.i;
12825 var52.i = (orc_uint8)var40;
12827 var53.i = (var52.i * var41.i) & 0xffff;
12829 var54.i = var51.i + var53.i;
12831 var55.i = var54.i + var42.i;
12832 /* 21: convsuswb */
12833 var43 = ORC_CLAMP_UB(var55.i);
12843 _backup_orc_combine4_8xn_u8 (OrcExecutor * ORC_RESTRICT ex)
12848 int m = ex->params[ORC_VAR_A1];
12849 orc_int8 * ORC_RESTRICT ptr0;
12850 const orc_int8 * ORC_RESTRICT ptr4;
12851 const orc_int8 * ORC_RESTRICT ptr5;
12852 const orc_int8 * ORC_RESTRICT ptr6;
12853 const orc_int8 * ORC_RESTRICT ptr7;
12877 for (j = 0; j < m; j++) {
12878 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
12879 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
12880 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
12881 ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
12882 ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);
12885 var35.i = ex->params[24];
12887 var37.i = ex->params[25];
12889 var39.i = ex->params[26];
12891 var41.i = ex->params[27];
12893 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
12895 for (i = 0; i < n; i++) {
12899 var44.i = (orc_uint8)var34;
12901 var45.i = (var44.i * var35.i) & 0xffff;
12905 var46.i = (orc_uint8)var36;
12907 var47.i = (var46.i * var37.i) & 0xffff;
12909 var48.i = var45.i + var47.i;
12913 var49.i = (orc_uint8)var38;
12915 var50.i = (var49.i * var39.i) & 0xffff;
12917 var51.i = var48.i + var50.i;
12921 var52.i = (orc_uint8)var40;
12923 var53.i = (var52.i * var41.i) & 0xffff;
12925 var54.i = var51.i + var53.i;
12927 var55.i = var54.i + var42.i;
12928 /* 21: convsuswb */
12929 var43 = ORC_CLAMP_UB(var55.i);
12938 orc_combine4_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m)
12940 OrcExecutor _ex, *ex = &_ex;
12941 static int p_inited = 0;
12942 static OrcProgram *p = 0;
12943 void (*func) (OrcExecutor *);
12946 orc_once_mutex_lock ();
12948 OrcCompileResult result;
12950 p = orc_program_new ();
12951 orc_program_set_constant_n (p, 8);
12952 orc_program_set_2d (p);
12953 orc_program_set_name (p, "orc_combine4_8xn_u8");
12954 orc_program_set_backup_function (p, _backup_orc_combine4_8xn_u8);
12955 orc_program_add_destination (p, 1, "d1");
12956 orc_program_add_source (p, 1, "s1");
12957 orc_program_add_source (p, 1, "s2");
12958 orc_program_add_source (p, 1, "s3");
12959 orc_program_add_source (p, 1, "s4");
12960 orc_program_add_constant (p, 4, 0x00000008, "c1");
12961 orc_program_add_parameter (p, 2, "p1");
12962 orc_program_add_parameter (p, 2, "p2");
12963 orc_program_add_parameter (p, 2, "p3");
12964 orc_program_add_parameter (p, 2, "p4");
12965 orc_program_add_temporary (p, 2, "t1");
12966 orc_program_add_temporary (p, 2, "t2");
12968 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
12969 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
12970 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
12971 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
12972 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
12973 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
12974 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
12975 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
12976 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
12977 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
12978 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
12979 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
12980 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
12982 result = orc_program_compile (p);
12985 orc_once_mutex_unlock ();
12990 ORC_EXECUTOR_M(ex) = m;
12991 ex->arrays[ORC_VAR_D1] = d1;
12992 ex->params[ORC_VAR_D1] = d1_stride;
12993 ex->arrays[ORC_VAR_S1] = (void *)s1;
12994 ex->params[ORC_VAR_S1] = s1_stride;
12995 ex->arrays[ORC_VAR_S2] = (void *)s2;
12996 ex->params[ORC_VAR_S2] = s2_stride;
12997 ex->arrays[ORC_VAR_S3] = (void *)s3;
12998 ex->params[ORC_VAR_S3] = s3_stride;
12999 ex->arrays[ORC_VAR_S4] = (void *)s4;
13000 ex->params[ORC_VAR_S4] = s4_stride;
13001 ex->params[ORC_VAR_P1] = p1;
13002 ex->params[ORC_VAR_P2] = p2;
13003 ex->params[ORC_VAR_P3] = p3;
13004 ex->params[ORC_VAR_P4] = p4;
13006 func = p->code_exec;
13012 /* orc_combine4_12xn_u8 */
13015 orc_combine4_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m){
13019 orc_int8 * ORC_RESTRICT ptr0;
13020 const orc_int8 * ORC_RESTRICT ptr4;
13021 const orc_int8 * ORC_RESTRICT ptr5;
13022 const orc_int8 * ORC_RESTRICT ptr6;
13023 const orc_int8 * ORC_RESTRICT ptr7;
13047 for (j = 0; j < m; j++) {
13048 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
13049 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
13050 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
13051 ptr6 = ORC_PTR_OFFSET(s3, s3_stride * j);
13052 ptr7 = ORC_PTR_OFFSET(s4, s4_stride * j);
13063 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13065 for (i = 0; i < n; i++) {
13069 var44.i = (orc_uint8)var34;
13071 var45.i = (var44.i * var35.i) & 0xffff;
13075 var46.i = (orc_uint8)var36;
13077 var47.i = (var46.i * var37.i) & 0xffff;
13079 var48.i = var45.i + var47.i;
13083 var49.i = (orc_uint8)var38;
13085 var50.i = (var49.i * var39.i) & 0xffff;
13087 var51.i = var48.i + var50.i;
13091 var52.i = (orc_uint8)var40;
13093 var53.i = (var52.i * var41.i) & 0xffff;
13095 var54.i = var51.i + var53.i;
13097 var55.i = var54.i + var42.i;
13098 /* 21: convsuswb */
13099 var43 = ORC_CLAMP_UB(var55.i);
13109 _backup_orc_combine4_12xn_u8 (OrcExecutor * ORC_RESTRICT ex)
13114 int m = ex->params[ORC_VAR_A1];
13115 orc_int8 * ORC_RESTRICT ptr0;
13116 const orc_int8 * ORC_RESTRICT ptr4;
13117 const orc_int8 * ORC_RESTRICT ptr5;
13118 const orc_int8 * ORC_RESTRICT ptr6;
13119 const orc_int8 * ORC_RESTRICT ptr7;
13143 for (j = 0; j < m; j++) {
13144 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
13145 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
13146 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
13147 ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
13148 ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);
13151 var35.i = ex->params[24];
13153 var37.i = ex->params[25];
13155 var39.i = ex->params[26];
13157 var41.i = ex->params[27];
13159 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13161 for (i = 0; i < n; i++) {
13165 var44.i = (orc_uint8)var34;
13167 var45.i = (var44.i * var35.i) & 0xffff;
13171 var46.i = (orc_uint8)var36;
13173 var47.i = (var46.i * var37.i) & 0xffff;
13175 var48.i = var45.i + var47.i;
13179 var49.i = (orc_uint8)var38;
13181 var50.i = (var49.i * var39.i) & 0xffff;
13183 var51.i = var48.i + var50.i;
13187 var52.i = (orc_uint8)var40;
13189 var53.i = (var52.i * var41.i) & 0xffff;
13191 var54.i = var51.i + var53.i;
13193 var55.i = var54.i + var42.i;
13194 /* 21: convsuswb */
13195 var43 = ORC_CLAMP_UB(var55.i);
13204 orc_combine4_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m)
13206 OrcExecutor _ex, *ex = &_ex;
13207 static int p_inited = 0;
13208 static OrcProgram *p = 0;
13209 void (*func) (OrcExecutor *);
13212 orc_once_mutex_lock ();
13214 OrcCompileResult result;
13216 p = orc_program_new ();
13217 orc_program_set_constant_n (p, 12);
13218 orc_program_set_2d (p);
13219 orc_program_set_name (p, "orc_combine4_12xn_u8");
13220 orc_program_set_backup_function (p, _backup_orc_combine4_12xn_u8);
13221 orc_program_add_destination (p, 1, "d1");
13222 orc_program_add_source (p, 1, "s1");
13223 orc_program_add_source (p, 1, "s2");
13224 orc_program_add_source (p, 1, "s3");
13225 orc_program_add_source (p, 1, "s4");
13226 orc_program_add_constant (p, 4, 0x00000008, "c1");
13227 orc_program_add_parameter (p, 2, "p1");
13228 orc_program_add_parameter (p, 2, "p2");
13229 orc_program_add_parameter (p, 2, "p3");
13230 orc_program_add_parameter (p, 2, "p4");
13231 orc_program_add_temporary (p, 2, "t1");
13232 orc_program_add_temporary (p, 2, "t2");
13234 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
13235 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
13236 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
13237 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
13238 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13239 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
13240 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
13241 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13242 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
13243 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
13244 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13245 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
13246 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
13248 result = orc_program_compile (p);
13251 orc_once_mutex_unlock ();
13256 ORC_EXECUTOR_M(ex) = m;
13257 ex->arrays[ORC_VAR_D1] = d1;
13258 ex->params[ORC_VAR_D1] = d1_stride;
13259 ex->arrays[ORC_VAR_S1] = (void *)s1;
13260 ex->params[ORC_VAR_S1] = s1_stride;
13261 ex->arrays[ORC_VAR_S2] = (void *)s2;
13262 ex->params[ORC_VAR_S2] = s2_stride;
13263 ex->arrays[ORC_VAR_S3] = (void *)s3;
13264 ex->params[ORC_VAR_S3] = s3_stride;
13265 ex->arrays[ORC_VAR_S4] = (void *)s4;
13266 ex->params[ORC_VAR_S4] = s4_stride;
13267 ex->params[ORC_VAR_P1] = p1;
13268 ex->params[ORC_VAR_P2] = p2;
13269 ex->params[ORC_VAR_P3] = p3;
13270 ex->params[ORC_VAR_P4] = p4;
13272 func = p->code_exec;
13278 /* orc_combine4_16xn_u8 */
13281 orc_combine4_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m){
13285 orc_int8 * ORC_RESTRICT ptr0;
13286 const orc_int8 * ORC_RESTRICT ptr4;
13287 const orc_int8 * ORC_RESTRICT ptr5;
13288 const orc_int8 * ORC_RESTRICT ptr6;
13289 const orc_int8 * ORC_RESTRICT ptr7;
13313 for (j = 0; j < m; j++) {
13314 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
13315 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
13316 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
13317 ptr6 = ORC_PTR_OFFSET(s3, s3_stride * j);
13318 ptr7 = ORC_PTR_OFFSET(s4, s4_stride * j);
13329 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13331 for (i = 0; i < n; i++) {
13335 var44.i = (orc_uint8)var34;
13337 var45.i = (var44.i * var35.i) & 0xffff;
13341 var46.i = (orc_uint8)var36;
13343 var47.i = (var46.i * var37.i) & 0xffff;
13345 var48.i = var45.i + var47.i;
13349 var49.i = (orc_uint8)var38;
13351 var50.i = (var49.i * var39.i) & 0xffff;
13353 var51.i = var48.i + var50.i;
13357 var52.i = (orc_uint8)var40;
13359 var53.i = (var52.i * var41.i) & 0xffff;
13361 var54.i = var51.i + var53.i;
13363 var55.i = var54.i + var42.i;
13364 /* 21: convsuswb */
13365 var43 = ORC_CLAMP_UB(var55.i);
13375 _backup_orc_combine4_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
13380 int m = ex->params[ORC_VAR_A1];
13381 orc_int8 * ORC_RESTRICT ptr0;
13382 const orc_int8 * ORC_RESTRICT ptr4;
13383 const orc_int8 * ORC_RESTRICT ptr5;
13384 const orc_int8 * ORC_RESTRICT ptr6;
13385 const orc_int8 * ORC_RESTRICT ptr7;
13409 for (j = 0; j < m; j++) {
13410 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
13411 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
13412 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
13413 ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
13414 ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);
13417 var35.i = ex->params[24];
13419 var37.i = ex->params[25];
13421 var39.i = ex->params[26];
13423 var41.i = ex->params[27];
13425 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13427 for (i = 0; i < n; i++) {
13431 var44.i = (orc_uint8)var34;
13433 var45.i = (var44.i * var35.i) & 0xffff;
13437 var46.i = (orc_uint8)var36;
13439 var47.i = (var46.i * var37.i) & 0xffff;
13441 var48.i = var45.i + var47.i;
13445 var49.i = (orc_uint8)var38;
13447 var50.i = (var49.i * var39.i) & 0xffff;
13449 var51.i = var48.i + var50.i;
13453 var52.i = (orc_uint8)var40;
13455 var53.i = (var52.i * var41.i) & 0xffff;
13457 var54.i = var51.i + var53.i;
13459 var55.i = var54.i + var42.i;
13460 /* 21: convsuswb */
13461 var43 = ORC_CLAMP_UB(var55.i);
13470 orc_combine4_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m)
13472 OrcExecutor _ex, *ex = &_ex;
13473 static int p_inited = 0;
13474 static OrcProgram *p = 0;
13475 void (*func) (OrcExecutor *);
13478 orc_once_mutex_lock ();
13480 OrcCompileResult result;
13482 p = orc_program_new ();
13483 orc_program_set_constant_n (p, 16);
13484 orc_program_set_2d (p);
13485 orc_program_set_name (p, "orc_combine4_16xn_u8");
13486 orc_program_set_backup_function (p, _backup_orc_combine4_16xn_u8);
13487 orc_program_add_destination (p, 1, "d1");
13488 orc_program_add_source (p, 1, "s1");
13489 orc_program_add_source (p, 1, "s2");
13490 orc_program_add_source (p, 1, "s3");
13491 orc_program_add_source (p, 1, "s4");
13492 orc_program_add_constant (p, 4, 0x00000008, "c1");
13493 orc_program_add_parameter (p, 2, "p1");
13494 orc_program_add_parameter (p, 2, "p2");
13495 orc_program_add_parameter (p, 2, "p3");
13496 orc_program_add_parameter (p, 2, "p4");
13497 orc_program_add_temporary (p, 2, "t1");
13498 orc_program_add_temporary (p, 2, "t2");
13500 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
13501 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
13502 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
13503 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
13504 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13505 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
13506 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
13507 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13508 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
13509 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
13510 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13511 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
13512 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
13514 result = orc_program_compile (p);
13517 orc_once_mutex_unlock ();
13522 ORC_EXECUTOR_M(ex) = m;
13523 ex->arrays[ORC_VAR_D1] = d1;
13524 ex->params[ORC_VAR_D1] = d1_stride;
13525 ex->arrays[ORC_VAR_S1] = (void *)s1;
13526 ex->params[ORC_VAR_S1] = s1_stride;
13527 ex->arrays[ORC_VAR_S2] = (void *)s2;
13528 ex->params[ORC_VAR_S2] = s2_stride;
13529 ex->arrays[ORC_VAR_S3] = (void *)s3;
13530 ex->params[ORC_VAR_S3] = s3_stride;
13531 ex->arrays[ORC_VAR_S4] = (void *)s4;
13532 ex->params[ORC_VAR_S4] = s4_stride;
13533 ex->params[ORC_VAR_P1] = p1;
13534 ex->params[ORC_VAR_P2] = p2;
13535 ex->params[ORC_VAR_P3] = p3;
13536 ex->params[ORC_VAR_P4] = p4;
13538 func = p->code_exec;
13544 /* orc_combine4_24xn_u8 */
13547 orc_combine4_24xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m){
13551 orc_int8 * ORC_RESTRICT ptr0;
13552 const orc_int8 * ORC_RESTRICT ptr4;
13553 const orc_int8 * ORC_RESTRICT ptr5;
13554 const orc_int8 * ORC_RESTRICT ptr6;
13555 const orc_int8 * ORC_RESTRICT ptr7;
13579 for (j = 0; j < m; j++) {
13580 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
13581 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
13582 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
13583 ptr6 = ORC_PTR_OFFSET(s3, s3_stride * j);
13584 ptr7 = ORC_PTR_OFFSET(s4, s4_stride * j);
13595 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13597 for (i = 0; i < n; i++) {
13601 var44.i = (orc_uint8)var34;
13603 var45.i = (var44.i * var35.i) & 0xffff;
13607 var46.i = (orc_uint8)var36;
13609 var47.i = (var46.i * var37.i) & 0xffff;
13611 var48.i = var45.i + var47.i;
13615 var49.i = (orc_uint8)var38;
13617 var50.i = (var49.i * var39.i) & 0xffff;
13619 var51.i = var48.i + var50.i;
13623 var52.i = (orc_uint8)var40;
13625 var53.i = (var52.i * var41.i) & 0xffff;
13627 var54.i = var51.i + var53.i;
13629 var55.i = var54.i + var42.i;
13630 /* 21: convsuswb */
13631 var43 = ORC_CLAMP_UB(var55.i);
13641 _backup_orc_combine4_24xn_u8 (OrcExecutor * ORC_RESTRICT ex)
13646 int m = ex->params[ORC_VAR_A1];
13647 orc_int8 * ORC_RESTRICT ptr0;
13648 const orc_int8 * ORC_RESTRICT ptr4;
13649 const orc_int8 * ORC_RESTRICT ptr5;
13650 const orc_int8 * ORC_RESTRICT ptr6;
13651 const orc_int8 * ORC_RESTRICT ptr7;
13675 for (j = 0; j < m; j++) {
13676 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
13677 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
13678 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
13679 ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
13680 ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);
13683 var35.i = ex->params[24];
13685 var37.i = ex->params[25];
13687 var39.i = ex->params[26];
13689 var41.i = ex->params[27];
13691 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13693 for (i = 0; i < n; i++) {
13697 var44.i = (orc_uint8)var34;
13699 var45.i = (var44.i * var35.i) & 0xffff;
13703 var46.i = (orc_uint8)var36;
13705 var47.i = (var46.i * var37.i) & 0xffff;
13707 var48.i = var45.i + var47.i;
13711 var49.i = (orc_uint8)var38;
13713 var50.i = (var49.i * var39.i) & 0xffff;
13715 var51.i = var48.i + var50.i;
13719 var52.i = (orc_uint8)var40;
13721 var53.i = (var52.i * var41.i) & 0xffff;
13723 var54.i = var51.i + var53.i;
13725 var55.i = var54.i + var42.i;
13726 /* 21: convsuswb */
13727 var43 = ORC_CLAMP_UB(var55.i);
13736 orc_combine4_24xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m)
13738 OrcExecutor _ex, *ex = &_ex;
13739 static int p_inited = 0;
13740 static OrcProgram *p = 0;
13741 void (*func) (OrcExecutor *);
13744 orc_once_mutex_lock ();
13746 OrcCompileResult result;
13748 p = orc_program_new ();
13749 orc_program_set_constant_n (p, 24);
13750 orc_program_set_2d (p);
13751 orc_program_set_name (p, "orc_combine4_24xn_u8");
13752 orc_program_set_backup_function (p, _backup_orc_combine4_24xn_u8);
13753 orc_program_add_destination (p, 1, "d1");
13754 orc_program_add_source (p, 1, "s1");
13755 orc_program_add_source (p, 1, "s2");
13756 orc_program_add_source (p, 1, "s3");
13757 orc_program_add_source (p, 1, "s4");
13758 orc_program_add_constant (p, 4, 0x00000008, "c1");
13759 orc_program_add_parameter (p, 2, "p1");
13760 orc_program_add_parameter (p, 2, "p2");
13761 orc_program_add_parameter (p, 2, "p3");
13762 orc_program_add_parameter (p, 2, "p4");
13763 orc_program_add_temporary (p, 2, "t1");
13764 orc_program_add_temporary (p, 2, "t2");
13766 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
13767 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
13768 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
13769 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
13770 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13771 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
13772 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
13773 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13774 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
13775 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
13776 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
13777 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
13778 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
13780 result = orc_program_compile (p);
13783 orc_once_mutex_unlock ();
13788 ORC_EXECUTOR_M(ex) = m;
13789 ex->arrays[ORC_VAR_D1] = d1;
13790 ex->params[ORC_VAR_D1] = d1_stride;
13791 ex->arrays[ORC_VAR_S1] = (void *)s1;
13792 ex->params[ORC_VAR_S1] = s1_stride;
13793 ex->arrays[ORC_VAR_S2] = (void *)s2;
13794 ex->params[ORC_VAR_S2] = s2_stride;
13795 ex->arrays[ORC_VAR_S3] = (void *)s3;
13796 ex->params[ORC_VAR_S3] = s3_stride;
13797 ex->arrays[ORC_VAR_S4] = (void *)s4;
13798 ex->params[ORC_VAR_S4] = s4_stride;
13799 ex->params[ORC_VAR_P1] = p1;
13800 ex->params[ORC_VAR_P2] = p2;
13801 ex->params[ORC_VAR_P3] = p3;
13802 ex->params[ORC_VAR_P4] = p4;
13804 func = p->code_exec;
13810 /* orc_combine4_32xn_u8 */
13813 orc_combine4_32xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m){
13817 orc_int8 * ORC_RESTRICT ptr0;
13818 const orc_int8 * ORC_RESTRICT ptr4;
13819 const orc_int8 * ORC_RESTRICT ptr5;
13820 const orc_int8 * ORC_RESTRICT ptr6;
13821 const orc_int8 * ORC_RESTRICT ptr7;
13845 for (j = 0; j < m; j++) {
13846 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
13847 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
13848 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
13849 ptr6 = ORC_PTR_OFFSET(s3, s3_stride * j);
13850 ptr7 = ORC_PTR_OFFSET(s4, s4_stride * j);
13861 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13863 for (i = 0; i < n; i++) {
13867 var44.i = (orc_uint8)var34;
13869 var45.i = (var44.i * var35.i) & 0xffff;
13873 var46.i = (orc_uint8)var36;
13875 var47.i = (var46.i * var37.i) & 0xffff;
13877 var48.i = var45.i + var47.i;
13881 var49.i = (orc_uint8)var38;
13883 var50.i = (var49.i * var39.i) & 0xffff;
13885 var51.i = var48.i + var50.i;
13889 var52.i = (orc_uint8)var40;
13891 var53.i = (var52.i * var41.i) & 0xffff;
13893 var54.i = var51.i + var53.i;
13895 var55.i = var54.i + var42.i;
13896 /* 21: convsuswb */
13897 var43 = ORC_CLAMP_UB(var55.i);
13907 _backup_orc_combine4_32xn_u8 (OrcExecutor * ORC_RESTRICT ex)
13912 int m = ex->params[ORC_VAR_A1];
13913 orc_int8 * ORC_RESTRICT ptr0;
13914 const orc_int8 * ORC_RESTRICT ptr4;
13915 const orc_int8 * ORC_RESTRICT ptr5;
13916 const orc_int8 * ORC_RESTRICT ptr6;
13917 const orc_int8 * ORC_RESTRICT ptr7;
13941 for (j = 0; j < m; j++) {
13942 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
13943 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
13944 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
13945 ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
13946 ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);
13949 var35.i = ex->params[24];
13951 var37.i = ex->params[25];
13953 var39.i = ex->params[26];
13955 var41.i = ex->params[27];
13957 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
13959 for (i = 0; i < n; i++) {
13963 var44.i = (orc_uint8)var34;
13965 var45.i = (var44.i * var35.i) & 0xffff;
13969 var46.i = (orc_uint8)var36;
13971 var47.i = (var46.i * var37.i) & 0xffff;
13973 var48.i = var45.i + var47.i;
13977 var49.i = (orc_uint8)var38;
13979 var50.i = (var49.i * var39.i) & 0xffff;
13981 var51.i = var48.i + var50.i;
13985 var52.i = (orc_uint8)var40;
13987 var53.i = (var52.i * var41.i) & 0xffff;
13989 var54.i = var51.i + var53.i;
13991 var55.i = var54.i + var42.i;
13992 /* 21: convsuswb */
13993 var43 = ORC_CLAMP_UB(var55.i);
14002 orc_combine4_32xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int m)
14004 OrcExecutor _ex, *ex = &_ex;
14005 static int p_inited = 0;
14006 static OrcProgram *p = 0;
14007 void (*func) (OrcExecutor *);
14010 orc_once_mutex_lock ();
14012 OrcCompileResult result;
14014 p = orc_program_new ();
14015 orc_program_set_constant_n (p, 32);
14016 orc_program_set_2d (p);
14017 orc_program_set_name (p, "orc_combine4_32xn_u8");
14018 orc_program_set_backup_function (p, _backup_orc_combine4_32xn_u8);
14019 orc_program_add_destination (p, 1, "d1");
14020 orc_program_add_source (p, 1, "s1");
14021 orc_program_add_source (p, 1, "s2");
14022 orc_program_add_source (p, 1, "s3");
14023 orc_program_add_source (p, 1, "s4");
14024 orc_program_add_constant (p, 4, 0x00000008, "c1");
14025 orc_program_add_parameter (p, 2, "p1");
14026 orc_program_add_parameter (p, 2, "p2");
14027 orc_program_add_parameter (p, 2, "p3");
14028 orc_program_add_parameter (p, 2, "p4");
14029 orc_program_add_temporary (p, 2, "t1");
14030 orc_program_add_temporary (p, 2, "t2");
14032 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
14033 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
14034 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
14035 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
14036 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
14037 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
14038 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
14039 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
14040 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
14041 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
14042 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
14043 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
14044 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
14046 result = orc_program_compile (p);
14049 orc_once_mutex_unlock ();
14054 ORC_EXECUTOR_M(ex) = m;
14055 ex->arrays[ORC_VAR_D1] = d1;
14056 ex->params[ORC_VAR_D1] = d1_stride;
14057 ex->arrays[ORC_VAR_S1] = (void *)s1;
14058 ex->params[ORC_VAR_S1] = s1_stride;
14059 ex->arrays[ORC_VAR_S2] = (void *)s2;
14060 ex->params[ORC_VAR_S2] = s2_stride;
14061 ex->arrays[ORC_VAR_S3] = (void *)s3;
14062 ex->params[ORC_VAR_S3] = s3_stride;
14063 ex->arrays[ORC_VAR_S4] = (void *)s4;
14064 ex->params[ORC_VAR_S4] = s4_stride;
14065 ex->params[ORC_VAR_P1] = p1;
14066 ex->params[ORC_VAR_P2] = p2;
14067 ex->params[ORC_VAR_P3] = p3;
14068 ex->params[ORC_VAR_P4] = p4;
14070 func = p->code_exec;
14076 /* orc_combine4_nxm_u8 */
14079 orc_combine4_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int n, int m){
14082 orc_int8 * ORC_RESTRICT ptr0;
14083 const orc_int8 * ORC_RESTRICT ptr4;
14084 const orc_int8 * ORC_RESTRICT ptr5;
14085 const orc_int8 * ORC_RESTRICT ptr6;
14086 const orc_int8 * ORC_RESTRICT ptr7;
14111 for (j = 0; j < m; j++) {
14112 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
14113 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
14114 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
14115 ptr6 = ORC_PTR_OFFSET(s3, s3_stride * j);
14116 ptr7 = ORC_PTR_OFFSET(s4, s4_stride * j);
14127 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
14129 for (i = 0; i < n; i++) {
14133 var44.i = (orc_uint8)var34;
14135 var45.i = (var44.i * var35.i) & 0xffff;
14139 var46.i = (orc_uint8)var36;
14141 var47.i = (var46.i * var37.i) & 0xffff;
14143 var48.i = var45.i + var47.i;
14147 var49.i = (orc_uint8)var38;
14149 var50.i = (var49.i * var39.i) & 0xffff;
14151 var51.i = var48.i + var50.i;
14155 var52.i = (orc_uint8)var40;
14157 var53.i = (var52.i * var41.i) & 0xffff;
14159 var54.i = var51.i + var53.i;
14161 var55.i = var54.i + var42.i;
14163 var56.i = var55.i >> 4;
14164 /* 22: convsuswb */
14165 var43 = ORC_CLAMP_UB(var56.i);
14175 _backup_orc_combine4_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
14180 int m = ex->params[ORC_VAR_A1];
14181 orc_int8 * ORC_RESTRICT ptr0;
14182 const orc_int8 * ORC_RESTRICT ptr4;
14183 const orc_int8 * ORC_RESTRICT ptr5;
14184 const orc_int8 * ORC_RESTRICT ptr6;
14185 const orc_int8 * ORC_RESTRICT ptr7;
14210 for (j = 0; j < m; j++) {
14211 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
14212 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
14213 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
14214 ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
14215 ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);
14218 var35.i = ex->params[24];
14220 var37.i = ex->params[25];
14222 var39.i = ex->params[26];
14224 var41.i = ex->params[27];
14226 var42.i = 0x00000008; /* 8 or 3.95253e-323f */
14228 for (i = 0; i < n; i++) {
14232 var44.i = (orc_uint8)var34;
14234 var45.i = (var44.i * var35.i) & 0xffff;
14238 var46.i = (orc_uint8)var36;
14240 var47.i = (var46.i * var37.i) & 0xffff;
14242 var48.i = var45.i + var47.i;
14246 var49.i = (orc_uint8)var38;
14248 var50.i = (var49.i * var39.i) & 0xffff;
14250 var51.i = var48.i + var50.i;
14254 var52.i = (orc_uint8)var40;
14256 var53.i = (var52.i * var41.i) & 0xffff;
14258 var54.i = var51.i + var53.i;
14260 var55.i = var54.i + var42.i;
14262 var56.i = var55.i >> 4;
14263 /* 22: convsuswb */
14264 var43 = ORC_CLAMP_UB(var56.i);
14273 orc_combine4_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, const uint8_t * s4, int s4_stride, int p1, int p2, int p3, int p4, int n, int m)
14275 OrcExecutor _ex, *ex = &_ex;
14276 static int p_inited = 0;
14277 static OrcProgram *p = 0;
14278 void (*func) (OrcExecutor *);
14281 orc_once_mutex_lock ();
14283 OrcCompileResult result;
14285 p = orc_program_new ();
14286 orc_program_set_2d (p);
14287 orc_program_set_name (p, "orc_combine4_nxm_u8");
14288 orc_program_set_backup_function (p, _backup_orc_combine4_nxm_u8);
14289 orc_program_add_destination (p, 1, "d1");
14290 orc_program_add_source (p, 1, "s1");
14291 orc_program_add_source (p, 1, "s2");
14292 orc_program_add_source (p, 1, "s3");
14293 orc_program_add_source (p, 1, "s4");
14294 orc_program_add_constant (p, 4, 0x00000008, "c1");
14295 orc_program_add_constant (p, 4, 0x00000004, "c2");
14296 orc_program_add_parameter (p, 2, "p1");
14297 orc_program_add_parameter (p, 2, "p2");
14298 orc_program_add_parameter (p, 2, "p3");
14299 orc_program_add_parameter (p, 2, "p4");
14300 orc_program_add_temporary (p, 2, "t1");
14301 orc_program_add_temporary (p, 2, "t2");
14303 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
14304 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
14305 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
14306 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
14307 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
14308 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
14309 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
14310 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
14311 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
14312 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
14313 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
14314 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
14315 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
14316 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
14318 result = orc_program_compile (p);
14321 orc_once_mutex_unlock ();
14326 ORC_EXECUTOR_M(ex) = m;
14327 ex->arrays[ORC_VAR_D1] = d1;
14328 ex->params[ORC_VAR_D1] = d1_stride;
14329 ex->arrays[ORC_VAR_S1] = (void *)s1;
14330 ex->params[ORC_VAR_S1] = s1_stride;
14331 ex->arrays[ORC_VAR_S2] = (void *)s2;
14332 ex->params[ORC_VAR_S2] = s2_stride;
14333 ex->arrays[ORC_VAR_S3] = (void *)s3;
14334 ex->params[ORC_VAR_S3] = s3_stride;
14335 ex->arrays[ORC_VAR_S4] = (void *)s4;
14336 ex->params[ORC_VAR_S4] = s4_stride;
14337 ex->params[ORC_VAR_P1] = p1;
14338 ex->params[ORC_VAR_P2] = p2;
14339 ex->params[ORC_VAR_P3] = p3;
14340 ex->params[ORC_VAR_P4] = p4;
14342 func = p->code_exec;
14348 /* orc_combine2_8xn_u8 */
14351 orc_combine2_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m){
14355 orc_int8 * ORC_RESTRICT ptr0;
14356 const orc_int8 * ORC_RESTRICT ptr4;
14357 const orc_int8 * ORC_RESTRICT ptr5;
14372 for (j = 0; j < m; j++) {
14373 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
14374 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
14375 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
14384 for (i = 0; i < n; i++) {
14388 var40.i = (orc_uint8)var34;
14392 var41.i = (orc_uint8)var35;
14394 var42.i = (var40.i * var36.i) & 0xffff;
14396 var43.i = (var41.i * var37.i) & 0xffff;
14398 var44.i = var42.i + var43.i;
14400 var45.i = var44.i + var38.i;
14402 var46.i = var45.i >> p4;
14403 /* 12: convsuswb */
14404 var39 = ORC_CLAMP_UB(var46.i);
14414 _backup_orc_combine2_8xn_u8 (OrcExecutor * ORC_RESTRICT ex)
14419 int m = ex->params[ORC_VAR_A1];
14420 orc_int8 * ORC_RESTRICT ptr0;
14421 const orc_int8 * ORC_RESTRICT ptr4;
14422 const orc_int8 * ORC_RESTRICT ptr5;
14437 for (j = 0; j < m; j++) {
14438 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
14439 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
14440 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
14443 var36.i = ex->params[24];
14445 var37.i = ex->params[25];
14447 var38.i = ex->params[26];
14449 for (i = 0; i < n; i++) {
14453 var40.i = (orc_uint8)var34;
14457 var41.i = (orc_uint8)var35;
14459 var42.i = (var40.i * var36.i) & 0xffff;
14461 var43.i = (var41.i * var37.i) & 0xffff;
14463 var44.i = var42.i + var43.i;
14465 var45.i = var44.i + var38.i;
14467 var46.i = var45.i >> ex->params[27];
14468 /* 12: convsuswb */
14469 var39 = ORC_CLAMP_UB(var46.i);
14478 orc_combine2_8xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m)
14480 OrcExecutor _ex, *ex = &_ex;
14481 static int p_inited = 0;
14482 static OrcProgram *p = 0;
14483 void (*func) (OrcExecutor *);
14486 orc_once_mutex_lock ();
14488 OrcCompileResult result;
14490 p = orc_program_new ();
14491 orc_program_set_constant_n (p, 8);
14492 orc_program_set_2d (p);
14493 orc_program_set_name (p, "orc_combine2_8xn_u8");
14494 orc_program_set_backup_function (p, _backup_orc_combine2_8xn_u8);
14495 orc_program_add_destination (p, 1, "d1");
14496 orc_program_add_source (p, 1, "s1");
14497 orc_program_add_source (p, 1, "s2");
14498 orc_program_add_parameter (p, 2, "p1");
14499 orc_program_add_parameter (p, 2, "p2");
14500 orc_program_add_parameter (p, 2, "p3");
14501 orc_program_add_parameter (p, 2, "p4");
14502 orc_program_add_temporary (p, 2, "t1");
14503 orc_program_add_temporary (p, 2, "t2");
14505 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
14506 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
14507 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
14508 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
14509 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
14510 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
14511 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
14512 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
14514 result = orc_program_compile (p);
14517 orc_once_mutex_unlock ();
14522 ORC_EXECUTOR_M(ex) = m;
14523 ex->arrays[ORC_VAR_D1] = d1;
14524 ex->params[ORC_VAR_D1] = d1_stride;
14525 ex->arrays[ORC_VAR_S1] = (void *)s1;
14526 ex->params[ORC_VAR_S1] = s1_stride;
14527 ex->arrays[ORC_VAR_S2] = (void *)s2;
14528 ex->params[ORC_VAR_S2] = s2_stride;
14529 ex->params[ORC_VAR_P1] = p1;
14530 ex->params[ORC_VAR_P2] = p2;
14531 ex->params[ORC_VAR_P3] = p3;
14532 ex->params[ORC_VAR_P4] = p4;
14534 func = p->code_exec;
14540 /* orc_combine2_12xn_u8 */
14543 orc_combine2_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m){
14547 orc_int8 * ORC_RESTRICT ptr0;
14548 const orc_int8 * ORC_RESTRICT ptr4;
14549 const orc_int8 * ORC_RESTRICT ptr5;
14564 for (j = 0; j < m; j++) {
14565 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
14566 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
14567 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
14576 for (i = 0; i < n; i++) {
14580 var40.i = (orc_uint8)var34;
14584 var41.i = (orc_uint8)var35;
14586 var42.i = (var40.i * var36.i) & 0xffff;
14588 var43.i = (var41.i * var37.i) & 0xffff;
14590 var44.i = var42.i + var43.i;
14592 var45.i = var44.i + var38.i;
14594 var46.i = var45.i >> p4;
14595 /* 12: convsuswb */
14596 var39 = ORC_CLAMP_UB(var46.i);
14606 _backup_orc_combine2_12xn_u8 (OrcExecutor * ORC_RESTRICT ex)
14611 int m = ex->params[ORC_VAR_A1];
14612 orc_int8 * ORC_RESTRICT ptr0;
14613 const orc_int8 * ORC_RESTRICT ptr4;
14614 const orc_int8 * ORC_RESTRICT ptr5;
14629 for (j = 0; j < m; j++) {
14630 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
14631 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
14632 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
14635 var36.i = ex->params[24];
14637 var37.i = ex->params[25];
14639 var38.i = ex->params[26];
14641 for (i = 0; i < n; i++) {
14645 var40.i = (orc_uint8)var34;
14649 var41.i = (orc_uint8)var35;
14651 var42.i = (var40.i * var36.i) & 0xffff;
14653 var43.i = (var41.i * var37.i) & 0xffff;
14655 var44.i = var42.i + var43.i;
14657 var45.i = var44.i + var38.i;
14659 var46.i = var45.i >> ex->params[27];
14660 /* 12: convsuswb */
14661 var39 = ORC_CLAMP_UB(var46.i);
14670 orc_combine2_12xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m)
14672 OrcExecutor _ex, *ex = &_ex;
14673 static int p_inited = 0;
14674 static OrcProgram *p = 0;
14675 void (*func) (OrcExecutor *);
14678 orc_once_mutex_lock ();
14680 OrcCompileResult result;
14682 p = orc_program_new ();
14683 orc_program_set_constant_n (p, 12);
14684 orc_program_set_2d (p);
14685 orc_program_set_name (p, "orc_combine2_12xn_u8");
14686 orc_program_set_backup_function (p, _backup_orc_combine2_12xn_u8);
14687 orc_program_add_destination (p, 1, "d1");
14688 orc_program_add_source (p, 1, "s1");
14689 orc_program_add_source (p, 1, "s2");
14690 orc_program_add_parameter (p, 2, "p1");
14691 orc_program_add_parameter (p, 2, "p2");
14692 orc_program_add_parameter (p, 2, "p3");
14693 orc_program_add_parameter (p, 2, "p4");
14694 orc_program_add_temporary (p, 2, "t1");
14695 orc_program_add_temporary (p, 2, "t2");
14697 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
14698 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
14699 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
14700 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
14701 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
14702 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
14703 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
14704 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
14706 result = orc_program_compile (p);
14709 orc_once_mutex_unlock ();
14714 ORC_EXECUTOR_M(ex) = m;
14715 ex->arrays[ORC_VAR_D1] = d1;
14716 ex->params[ORC_VAR_D1] = d1_stride;
14717 ex->arrays[ORC_VAR_S1] = (void *)s1;
14718 ex->params[ORC_VAR_S1] = s1_stride;
14719 ex->arrays[ORC_VAR_S2] = (void *)s2;
14720 ex->params[ORC_VAR_S2] = s2_stride;
14721 ex->params[ORC_VAR_P1] = p1;
14722 ex->params[ORC_VAR_P2] = p2;
14723 ex->params[ORC_VAR_P3] = p3;
14724 ex->params[ORC_VAR_P4] = p4;
14726 func = p->code_exec;
14732 /* orc_combine2_16xn_u8 */
14735 orc_combine2_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m){
14739 orc_int8 * ORC_RESTRICT ptr0;
14740 const orc_int8 * ORC_RESTRICT ptr4;
14741 const orc_int8 * ORC_RESTRICT ptr5;
14756 for (j = 0; j < m; j++) {
14757 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
14758 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
14759 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
14768 for (i = 0; i < n; i++) {
14772 var40.i = (orc_uint8)var34;
14776 var41.i = (orc_uint8)var35;
14778 var42.i = (var40.i * var36.i) & 0xffff;
14780 var43.i = (var41.i * var37.i) & 0xffff;
14782 var44.i = var42.i + var43.i;
14784 var45.i = var44.i + var38.i;
14786 var46.i = var45.i >> p4;
14787 /* 12: convsuswb */
14788 var39 = ORC_CLAMP_UB(var46.i);
14798 _backup_orc_combine2_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
14803 int m = ex->params[ORC_VAR_A1];
14804 orc_int8 * ORC_RESTRICT ptr0;
14805 const orc_int8 * ORC_RESTRICT ptr4;
14806 const orc_int8 * ORC_RESTRICT ptr5;
14821 for (j = 0; j < m; j++) {
14822 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
14823 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
14824 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
14827 var36.i = ex->params[24];
14829 var37.i = ex->params[25];
14831 var38.i = ex->params[26];
14833 for (i = 0; i < n; i++) {
14837 var40.i = (orc_uint8)var34;
14841 var41.i = (orc_uint8)var35;
14843 var42.i = (var40.i * var36.i) & 0xffff;
14845 var43.i = (var41.i * var37.i) & 0xffff;
14847 var44.i = var42.i + var43.i;
14849 var45.i = var44.i + var38.i;
14851 var46.i = var45.i >> ex->params[27];
14852 /* 12: convsuswb */
14853 var39 = ORC_CLAMP_UB(var46.i);
14862 orc_combine2_16xn_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int m)
14864 OrcExecutor _ex, *ex = &_ex;
14865 static int p_inited = 0;
14866 static OrcProgram *p = 0;
14867 void (*func) (OrcExecutor *);
14870 orc_once_mutex_lock ();
14872 OrcCompileResult result;
14874 p = orc_program_new ();
14875 orc_program_set_constant_n (p, 16);
14876 orc_program_set_2d (p);
14877 orc_program_set_name (p, "orc_combine2_16xn_u8");
14878 orc_program_set_backup_function (p, _backup_orc_combine2_16xn_u8);
14879 orc_program_add_destination (p, 1, "d1");
14880 orc_program_add_source (p, 1, "s1");
14881 orc_program_add_source (p, 1, "s2");
14882 orc_program_add_parameter (p, 2, "p1");
14883 orc_program_add_parameter (p, 2, "p2");
14884 orc_program_add_parameter (p, 2, "p3");
14885 orc_program_add_parameter (p, 2, "p4");
14886 orc_program_add_temporary (p, 2, "t1");
14887 orc_program_add_temporary (p, 2, "t2");
14889 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
14890 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
14891 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
14892 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
14893 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
14894 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
14895 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
14896 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
14898 result = orc_program_compile (p);
14901 orc_once_mutex_unlock ();
14906 ORC_EXECUTOR_M(ex) = m;
14907 ex->arrays[ORC_VAR_D1] = d1;
14908 ex->params[ORC_VAR_D1] = d1_stride;
14909 ex->arrays[ORC_VAR_S1] = (void *)s1;
14910 ex->params[ORC_VAR_S1] = s1_stride;
14911 ex->arrays[ORC_VAR_S2] = (void *)s2;
14912 ex->params[ORC_VAR_S2] = s2_stride;
14913 ex->params[ORC_VAR_P1] = p1;
14914 ex->params[ORC_VAR_P2] = p2;
14915 ex->params[ORC_VAR_P3] = p3;
14916 ex->params[ORC_VAR_P4] = p4;
14918 func = p->code_exec;
14924 /* orc_combine2_nxm_u8 */
14927 orc_combine2_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int n, int m){
14930 orc_int8 * ORC_RESTRICT ptr0;
14931 const orc_int8 * ORC_RESTRICT ptr4;
14932 const orc_int8 * ORC_RESTRICT ptr5;
14947 for (j = 0; j < m; j++) {
14948 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
14949 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
14950 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
14959 for (i = 0; i < n; i++) {
14963 var40.i = (orc_uint8)var34;
14967 var41.i = (orc_uint8)var35;
14969 var42.i = (var40.i * var36.i) & 0xffff;
14971 var43.i = (var41.i * var37.i) & 0xffff;
14973 var44.i = var42.i + var43.i;
14975 var45.i = var44.i + var38.i;
14977 var46.i = var45.i >> p4;
14978 /* 12: convsuswb */
14979 var39 = ORC_CLAMP_UB(var46.i);
14989 _backup_orc_combine2_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
14994 int m = ex->params[ORC_VAR_A1];
14995 orc_int8 * ORC_RESTRICT ptr0;
14996 const orc_int8 * ORC_RESTRICT ptr4;
14997 const orc_int8 * ORC_RESTRICT ptr5;
15012 for (j = 0; j < m; j++) {
15013 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
15014 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
15015 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
15018 var36.i = ex->params[24];
15020 var37.i = ex->params[25];
15022 var38.i = ex->params[26];
15024 for (i = 0; i < n; i++) {
15028 var40.i = (orc_uint8)var34;
15032 var41.i = (orc_uint8)var35;
15034 var42.i = (var40.i * var36.i) & 0xffff;
15036 var43.i = (var41.i * var37.i) & 0xffff;
15038 var44.i = var42.i + var43.i;
15040 var45.i = var44.i + var38.i;
15042 var46.i = var45.i >> ex->params[27];
15043 /* 12: convsuswb */
15044 var39 = ORC_CLAMP_UB(var46.i);
15053 orc_combine2_nxm_u8 (uint8_t * d1, int d1_stride, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int p1, int p2, int p3, int p4, int n, int m)
15055 OrcExecutor _ex, *ex = &_ex;
15056 static int p_inited = 0;
15057 static OrcProgram *p = 0;
15058 void (*func) (OrcExecutor *);
15061 orc_once_mutex_lock ();
15063 OrcCompileResult result;
15065 p = orc_program_new ();
15066 orc_program_set_2d (p);
15067 orc_program_set_name (p, "orc_combine2_nxm_u8");
15068 orc_program_set_backup_function (p, _backup_orc_combine2_nxm_u8);
15069 orc_program_add_destination (p, 1, "d1");
15070 orc_program_add_source (p, 1, "s1");
15071 orc_program_add_source (p, 1, "s2");
15072 orc_program_add_parameter (p, 2, "p1");
15073 orc_program_add_parameter (p, 2, "p2");
15074 orc_program_add_parameter (p, 2, "p3");
15075 orc_program_add_parameter (p, 2, "p4");
15076 orc_program_add_temporary (p, 2, "t1");
15077 orc_program_add_temporary (p, 2, "t2");
15079 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
15080 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
15081 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
15082 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
15083 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
15084 orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
15085 orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
15086 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
15088 result = orc_program_compile (p);
15091 orc_once_mutex_unlock ();
15096 ORC_EXECUTOR_M(ex) = m;
15097 ex->arrays[ORC_VAR_D1] = d1;
15098 ex->params[ORC_VAR_D1] = d1_stride;
15099 ex->arrays[ORC_VAR_S1] = (void *)s1;
15100 ex->params[ORC_VAR_S1] = s1_stride;
15101 ex->arrays[ORC_VAR_S2] = (void *)s2;
15102 ex->params[ORC_VAR_S2] = s2_stride;
15103 ex->params[ORC_VAR_P1] = p1;
15104 ex->params[ORC_VAR_P2] = p2;
15105 ex->params[ORC_VAR_P3] = p3;
15106 ex->params[ORC_VAR_P4] = p4;
15108 func = p->code_exec;
15114 /* orc_sad_nxm_u8 */
15117 orc_sad_nxm_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int n, int m){
15120 const orc_int8 * ORC_RESTRICT ptr4;
15121 const orc_int8 * ORC_RESTRICT ptr5;
15122 orc_union32 var12 = { 0 };
15126 for (j = 0; j < m; j++) {
15127 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
15128 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
15131 for (i = 0; i < n; i++) {
15137 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15146 _backup_orc_sad_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
15151 int m = ex->params[ORC_VAR_A1];
15152 const orc_int8 * ORC_RESTRICT ptr4;
15153 const orc_int8 * ORC_RESTRICT ptr5;
15154 orc_union32 var12 = { 0 };
15158 for (j = 0; j < m; j++) {
15159 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
15160 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
15163 for (i = 0; i < n; i++) {
15169 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15172 ex->accumulators[0] = var12.i;
15177 orc_sad_nxm_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int n, int m)
15179 OrcExecutor _ex, *ex = &_ex;
15180 static int p_inited = 0;
15181 static OrcProgram *p = 0;
15182 void (*func) (OrcExecutor *);
15185 orc_once_mutex_lock ();
15187 OrcCompileResult result;
15189 p = orc_program_new ();
15190 orc_program_set_2d (p);
15191 orc_program_set_name (p, "orc_sad_nxm_u8");
15192 orc_program_set_backup_function (p, _backup_orc_sad_nxm_u8);
15193 orc_program_add_source (p, 1, "s1");
15194 orc_program_add_source (p, 1, "s2");
15195 orc_program_add_accumulator (p, 4, "a1");
15197 orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
15199 result = orc_program_compile (p);
15202 orc_once_mutex_unlock ();
15207 ORC_EXECUTOR_M(ex) = m;
15208 ex->arrays[ORC_VAR_S1] = (void *)s1;
15209 ex->params[ORC_VAR_S1] = s1_stride;
15210 ex->arrays[ORC_VAR_S2] = (void *)s2;
15211 ex->params[ORC_VAR_S2] = s2_stride;
15213 func = p->code_exec;
15215 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
15220 /* orc_sad_8x8_u8 */
15223 orc_sad_8x8_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride){
15228 const orc_int8 * ORC_RESTRICT ptr4;
15229 const orc_int8 * ORC_RESTRICT ptr5;
15230 orc_union32 var12 = { 0 };
15234 for (j = 0; j < m; j++) {
15235 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
15236 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
15239 for (i = 0; i < n; i++) {
15245 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15254 _backup_orc_sad_8x8_u8 (OrcExecutor * ORC_RESTRICT ex)
15260 const orc_int8 * ORC_RESTRICT ptr4;
15261 const orc_int8 * ORC_RESTRICT ptr5;
15262 orc_union32 var12 = { 0 };
15266 for (j = 0; j < m; j++) {
15267 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
15268 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
15271 for (i = 0; i < n; i++) {
15277 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15280 ex->accumulators[0] = var12.i;
15285 orc_sad_8x8_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride)
15287 OrcExecutor _ex, *ex = &_ex;
15288 static int p_inited = 0;
15289 static OrcProgram *p = 0;
15290 void (*func) (OrcExecutor *);
15293 orc_once_mutex_lock ();
15295 OrcCompileResult result;
15297 p = orc_program_new ();
15298 orc_program_set_constant_n (p, 8);
15299 orc_program_set_2d (p);
15300 orc_program_set_constant_m (p, 8);
15301 orc_program_set_name (p, "orc_sad_8x8_u8");
15302 orc_program_set_backup_function (p, _backup_orc_sad_8x8_u8);
15303 orc_program_add_source (p, 1, "s1");
15304 orc_program_add_source (p, 1, "s2");
15305 orc_program_add_accumulator (p, 4, "a1");
15307 orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
15309 result = orc_program_compile (p);
15312 orc_once_mutex_unlock ();
15317 ORC_EXECUTOR_M(ex) = 8;
15318 ex->arrays[ORC_VAR_S1] = (void *)s1;
15319 ex->params[ORC_VAR_S1] = s1_stride;
15320 ex->arrays[ORC_VAR_S2] = (void *)s2;
15321 ex->params[ORC_VAR_S2] = s2_stride;
15323 func = p->code_exec;
15325 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
15330 /* orc_sad_12x12_u8 */
15333 orc_sad_12x12_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride){
15338 const orc_int8 * ORC_RESTRICT ptr4;
15339 const orc_int8 * ORC_RESTRICT ptr5;
15340 orc_union32 var12 = { 0 };
15344 for (j = 0; j < m; j++) {
15345 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
15346 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
15349 for (i = 0; i < n; i++) {
15355 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15364 _backup_orc_sad_12x12_u8 (OrcExecutor * ORC_RESTRICT ex)
15370 const orc_int8 * ORC_RESTRICT ptr4;
15371 const orc_int8 * ORC_RESTRICT ptr5;
15372 orc_union32 var12 = { 0 };
15376 for (j = 0; j < m; j++) {
15377 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
15378 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
15381 for (i = 0; i < n; i++) {
15387 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15390 ex->accumulators[0] = var12.i;
15395 orc_sad_12x12_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride)
15397 OrcExecutor _ex, *ex = &_ex;
15398 static int p_inited = 0;
15399 static OrcProgram *p = 0;
15400 void (*func) (OrcExecutor *);
15403 orc_once_mutex_lock ();
15405 OrcCompileResult result;
15407 p = orc_program_new ();
15408 orc_program_set_constant_n (p, 12);
15409 orc_program_set_2d (p);
15410 orc_program_set_constant_m (p, 12);
15411 orc_program_set_name (p, "orc_sad_12x12_u8");
15412 orc_program_set_backup_function (p, _backup_orc_sad_12x12_u8);
15413 orc_program_add_source (p, 1, "s1");
15414 orc_program_add_source (p, 1, "s2");
15415 orc_program_add_accumulator (p, 4, "a1");
15417 orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
15419 result = orc_program_compile (p);
15422 orc_once_mutex_unlock ();
15427 ORC_EXECUTOR_M(ex) = 12;
15428 ex->arrays[ORC_VAR_S1] = (void *)s1;
15429 ex->params[ORC_VAR_S1] = s1_stride;
15430 ex->arrays[ORC_VAR_S2] = (void *)s2;
15431 ex->params[ORC_VAR_S2] = s2_stride;
15433 func = p->code_exec;
15435 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
15440 /* orc_sad_16xn_u8 */
15443 orc_sad_16xn_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m){
15447 const orc_int8 * ORC_RESTRICT ptr4;
15448 const orc_int8 * ORC_RESTRICT ptr5;
15449 orc_union32 var12 = { 0 };
15453 for (j = 0; j < m; j++) {
15454 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
15455 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
15458 for (i = 0; i < n; i++) {
15464 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15473 _backup_orc_sad_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
15478 int m = ex->params[ORC_VAR_A1];
15479 const orc_int8 * ORC_RESTRICT ptr4;
15480 const orc_int8 * ORC_RESTRICT ptr5;
15481 orc_union32 var12 = { 0 };
15485 for (j = 0; j < m; j++) {
15486 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
15487 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
15490 for (i = 0; i < n; i++) {
15496 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15499 ex->accumulators[0] = var12.i;
15504 orc_sad_16xn_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m)
15506 OrcExecutor _ex, *ex = &_ex;
15507 static int p_inited = 0;
15508 static OrcProgram *p = 0;
15509 void (*func) (OrcExecutor *);
15512 orc_once_mutex_lock ();
15514 OrcCompileResult result;
15516 p = orc_program_new ();
15517 orc_program_set_constant_n (p, 16);
15518 orc_program_set_2d (p);
15519 orc_program_set_name (p, "orc_sad_16xn_u8");
15520 orc_program_set_backup_function (p, _backup_orc_sad_16xn_u8);
15521 orc_program_add_source (p, 1, "s1");
15522 orc_program_add_source (p, 1, "s2");
15523 orc_program_add_accumulator (p, 4, "a1");
15525 orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
15527 result = orc_program_compile (p);
15530 orc_once_mutex_unlock ();
15535 ORC_EXECUTOR_M(ex) = m;
15536 ex->arrays[ORC_VAR_S1] = (void *)s1;
15537 ex->params[ORC_VAR_S1] = s1_stride;
15538 ex->arrays[ORC_VAR_S2] = (void *)s2;
15539 ex->params[ORC_VAR_S2] = s2_stride;
15541 func = p->code_exec;
15543 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
15548 /* orc_sad_32xn_u8 */
15551 orc_sad_32xn_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m){
15555 const orc_int8 * ORC_RESTRICT ptr4;
15556 const orc_int8 * ORC_RESTRICT ptr5;
15557 orc_union32 var12 = { 0 };
15561 for (j = 0; j < m; j++) {
15562 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
15563 ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
15566 for (i = 0; i < n; i++) {
15572 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15581 _backup_orc_sad_32xn_u8 (OrcExecutor * ORC_RESTRICT ex)
15586 int m = ex->params[ORC_VAR_A1];
15587 const orc_int8 * ORC_RESTRICT ptr4;
15588 const orc_int8 * ORC_RESTRICT ptr5;
15589 orc_union32 var12 = { 0 };
15593 for (j = 0; j < m; j++) {
15594 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
15595 ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
15598 for (i = 0; i < n; i++) {
15604 var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
15607 ex->accumulators[0] = var12.i;
15612 orc_sad_32xn_u8 (uint32_t * a1, const uint8_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, int m)
15614 OrcExecutor _ex, *ex = &_ex;
15615 static int p_inited = 0;
15616 static OrcProgram *p = 0;
15617 void (*func) (OrcExecutor *);
15620 orc_once_mutex_lock ();
15622 OrcCompileResult result;
15624 p = orc_program_new ();
15625 orc_program_set_constant_n (p, 32);
15626 orc_program_set_2d (p);
15627 orc_program_set_name (p, "orc_sad_32xn_u8");
15628 orc_program_set_backup_function (p, _backup_orc_sad_32xn_u8);
15629 orc_program_add_source (p, 1, "s1");
15630 orc_program_add_source (p, 1, "s2");
15631 orc_program_add_accumulator (p, 4, "a1");
15633 orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
15635 result = orc_program_compile (p);
15638 orc_once_mutex_unlock ();
15643 ORC_EXECUTOR_M(ex) = m;
15644 ex->arrays[ORC_VAR_S1] = (void *)s1;
15645 ex->params[ORC_VAR_S1] = s1_stride;
15646 ex->arrays[ORC_VAR_S2] = (void *)s2;
15647 ex->params[ORC_VAR_S2] = s2_stride;
15649 func = p->code_exec;
15651 *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
15656 /* convert_rgb_to_gray */
15659 convert_rgb_to_gray (orc_uint8 * d1, const orc_uint32 * s1, int n){
15661 orc_int8 * ORC_RESTRICT ptr0;
15662 const orc_union32 * ORC_RESTRICT ptr4;
15690 ptr0 = (orc_int8 *)d1;
15691 ptr4 = (orc_union32 *)s1;
15694 var36.i = 0x00004c8b; /* 19595 or 9.68122e-320f */
15696 var38.i = 0x00009646; /* 38470 or 1.90067e-319f */
15698 var40.i = 0x00001d2f; /* 7471 or 3.69116e-320f */
15700 var41.i = 0x00000080; /* 128 or 6.32404e-322f */
15702 for (i = 0; i < n; i++) {
15706 var43.i = (orc_uint32)var35.i & 0xffff;
15708 var44 = (orc_uint16)var43.i & 0xff;
15710 var45.i = (orc_uint8)var44;
15712 var46.i = ORC_SWAP_W(var45.i);
15714 var47.i = ((orc_uint32)((orc_uint16)var46.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
15718 var48.i = (orc_uint32)var37.i & 0xffff;
15720 var49 = ((orc_uint16)var48.i >> 8)&0xff;
15722 var50.i = (orc_uint8)var49;
15724 var51.i = ORC_SWAP_W(var50.i);
15726 var52.i = ((orc_uint32)((orc_uint16)var51.i) * (orc_uint32)((orc_uint16)var38.i)) >> 16;
15728 var53.i = ORC_CLAMP_UW((orc_uint16)var47.i + (orc_uint16)var52.i);
15731 /* 16: select1lw */
15732 var54.i = ((orc_uint32)var39.i >> 16)&0xffff;
15733 /* 17: select0wb */
15734 var55 = (orc_uint16)var54.i & 0xff;
15736 var56.i = (orc_uint8)var55;
15738 var57.i = ORC_SWAP_W(var56.i);
15740 var58.i = ((orc_uint32)((orc_uint16)var57.i) * (orc_uint32)((orc_uint16)var40.i)) >> 16;
15742 var59.i = ORC_CLAMP_UW((orc_uint16)var53.i + (orc_uint16)var58.i);
15744 var60.i = ORC_CLAMP_UW((orc_uint16)var59.i + (orc_uint16)var41.i);
15745 /* 25: select1wb */
15746 var42 = ((orc_uint16)var60.i >> 8)&0xff;
15755 _backup_convert_rgb_to_gray (OrcExecutor * ORC_RESTRICT ex)
15759 orc_int8 * ORC_RESTRICT ptr0;
15760 const orc_union32 * ORC_RESTRICT ptr4;
15788 ptr0 = (orc_int8 *)ex->arrays[0];
15789 ptr4 = (orc_union32 *)ex->arrays[4];
15792 var36.i = 0x00004c8b; /* 19595 or 9.68122e-320f */
15794 var38.i = 0x00009646; /* 38470 or 1.90067e-319f */
15796 var40.i = 0x00001d2f; /* 7471 or 3.69116e-320f */
15798 var41.i = 0x00000080; /* 128 or 6.32404e-322f */
15800 for (i = 0; i < n; i++) {
15804 var43.i = (orc_uint32)var35.i & 0xffff;
15806 var44 = (orc_uint16)var43.i & 0xff;
15808 var45.i = (orc_uint8)var44;
15810 var46.i = ORC_SWAP_W(var45.i);
15812 var47.i = ((orc_uint32)((orc_uint16)var46.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
15816 var48.i = (orc_uint32)var37.i & 0xffff;
15818 var49 = ((orc_uint16)var48.i >> 8)&0xff;
15820 var50.i = (orc_uint8)var49;
15822 var51.i = ORC_SWAP_W(var50.i);
15824 var52.i = ((orc_uint32)((orc_uint16)var51.i) * (orc_uint32)((orc_uint16)var38.i)) >> 16;
15826 var53.i = ORC_CLAMP_UW((orc_uint16)var47.i + (orc_uint16)var52.i);
15829 /* 16: select1lw */
15830 var54.i = ((orc_uint32)var39.i >> 16)&0xffff;
15831 /* 17: select0wb */
15832 var55 = (orc_uint16)var54.i & 0xff;
15834 var56.i = (orc_uint8)var55;
15836 var57.i = ORC_SWAP_W(var56.i);
15838 var58.i = ((orc_uint32)((orc_uint16)var57.i) * (orc_uint32)((orc_uint16)var40.i)) >> 16;
15840 var59.i = ORC_CLAMP_UW((orc_uint16)var53.i + (orc_uint16)var58.i);
15842 var60.i = ORC_CLAMP_UW((orc_uint16)var59.i + (orc_uint16)var41.i);
15843 /* 25: select1wb */
15844 var42 = ((orc_uint16)var60.i >> 8)&0xff;
15852 convert_rgb_to_gray (orc_uint8 * d1, const orc_uint32 * s1, int n)
15854 OrcExecutor _ex, *ex = &_ex;
15855 static int p_inited = 0;
15856 static OrcProgram *p = 0;
15857 void (*func) (OrcExecutor *);
15860 orc_once_mutex_lock ();
15862 OrcCompileResult result;
15864 p = orc_program_new ();
15865 orc_program_set_name (p, "convert_rgb_to_gray");
15866 orc_program_set_backup_function (p, _backup_convert_rgb_to_gray);
15867 orc_program_add_destination (p, 1, "d1");
15868 orc_program_add_source (p, 4, "s1");
15869 orc_program_add_constant (p, 4, 0x00004c8b, "c1");
15870 orc_program_add_constant (p, 4, 0x00009646, "c2");
15871 orc_program_add_constant (p, 4, 0x00001d2f, "c3");
15872 orc_program_add_constant (p, 4, 0x00000080, "c4");
15873 orc_program_add_temporary (p, 1, "t1");
15874 orc_program_add_temporary (p, 2, "t2");
15875 orc_program_add_temporary (p, 2, "t3");
15877 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
15878 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
15879 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
15880 orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
15881 orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_C1, ORC_VAR_D1);
15882 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
15883 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
15884 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
15885 orc_program_append_2 (p, "swapw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
15886 orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
15887 orc_program_append_2 (p, "addusw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1);
15888 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
15889 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
15890 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
15891 orc_program_append_2 (p, "swapw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
15892 orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C3, ORC_VAR_D1);
15893 orc_program_append_2 (p, "addusw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1);
15894 orc_program_append_2 (p, "addusw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_C4, ORC_VAR_D1);
15895 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
15897 result = orc_program_compile (p);
15900 orc_once_mutex_unlock ();
15905 ex->arrays[ORC_VAR_D1] = d1;
15906 ex->arrays[ORC_VAR_S1] = (void *)s1;
15908 func = p->code_exec;
15914 /* canny_calc_delta_x */
15917 canny_calc_delta_x (int32_t * d1, const uint8_t * s1, const uint8_t * s2, int n){
15919 orc_union32 * ORC_RESTRICT ptr0;
15920 const orc_union32 * ORC_RESTRICT ptr4;
15921 const orc_union32 * ORC_RESTRICT ptr5;
15958 ptr0 = (orc_union32 *)d1;
15959 ptr4 = (orc_union32 *)s1;
15960 ptr5 = (orc_union32 *)s2;
15963 for (i = 0; i < n; i++) {
15967 var47.i = (orc_uint32)var40.i & 0xffff;
15969 var48 = ((orc_uint16)var47.i >> 8)&0xff;
15973 var49.i = (orc_uint32)var41.i & 0xffff;
15975 var50 = ((orc_uint16)var49.i >> 8)&0xff;
15977 var51.i = (orc_uint8)var48;
15979 var52.i = (orc_uint8)var50;
15981 var53.i = var51.i - var52.i;
15985 var55.i = (var54.i * var54.i) & 0xffffffff;
15988 /* 12: select1lw */
15989 var56.i = ((orc_uint32)var42.i >> 16)&0xffff;
15990 /* 13: select0wb */
15991 var57 = (orc_uint16)var56.i & 0xff;
15994 /* 15: select1lw */
15995 var58.i = ((orc_uint32)var43.i >> 16)&0xffff;
15996 /* 16: select0wb */
15997 var59 = (orc_uint16)var58.i & 0xff;
15999 var60.i = (orc_uint8)var57;
16001 var61.i = (orc_uint8)var59;
16003 var62.i = var60.i - var61.i;
16007 var64.i = (var63.i * var63.i) & 0xffffffff;
16009 var65.i = var55.i + var64.i;
16012 /* 24: select1lw */
16013 var66.i = ((orc_uint32)var44.i >> 16)&0xffff;
16014 /* 25: select1wb */
16015 var67 = ((orc_uint16)var66.i >> 8)&0xff;
16018 /* 27: select1lw */
16019 var68.i = ((orc_uint32)var45.i >> 16)&0xffff;
16020 /* 28: select1wb */
16021 var69 = ((orc_uint16)var68.i >> 8)&0xff;
16023 var70.i = (orc_uint8)var67;
16025 var71.i = (orc_uint8)var69;
16027 var72.i = var70.i - var71.i;
16031 var74.i = (var73.i * var73.i) & 0xffffffff;
16033 var46.i = var65.i + var74.i;
16042 _backup_canny_calc_delta_x (OrcExecutor * ORC_RESTRICT ex)
16046 orc_union32 * ORC_RESTRICT ptr0;
16047 const orc_union32 * ORC_RESTRICT ptr4;
16048 const orc_union32 * ORC_RESTRICT ptr5;
16085 ptr0 = (orc_union32 *)ex->arrays[0];
16086 ptr4 = (orc_union32 *)ex->arrays[4];
16087 ptr5 = (orc_union32 *)ex->arrays[5];
16090 for (i = 0; i < n; i++) {
16094 var47.i = (orc_uint32)var40.i & 0xffff;
16096 var48 = ((orc_uint16)var47.i >> 8)&0xff;
16100 var49.i = (orc_uint32)var41.i & 0xffff;
16102 var50 = ((orc_uint16)var49.i >> 8)&0xff;
16104 var51.i = (orc_uint8)var48;
16106 var52.i = (orc_uint8)var50;
16108 var53.i = var51.i - var52.i;
16112 var55.i = (var54.i * var54.i) & 0xffffffff;
16115 /* 12: select1lw */
16116 var56.i = ((orc_uint32)var42.i >> 16)&0xffff;
16117 /* 13: select0wb */
16118 var57 = (orc_uint16)var56.i & 0xff;
16121 /* 15: select1lw */
16122 var58.i = ((orc_uint32)var43.i >> 16)&0xffff;
16123 /* 16: select0wb */
16124 var59 = (orc_uint16)var58.i & 0xff;
16126 var60.i = (orc_uint8)var57;
16128 var61.i = (orc_uint8)var59;
16130 var62.i = var60.i - var61.i;
16134 var64.i = (var63.i * var63.i) & 0xffffffff;
16136 var65.i = var55.i + var64.i;
16139 /* 24: select1lw */
16140 var66.i = ((orc_uint32)var44.i >> 16)&0xffff;
16141 /* 25: select1wb */
16142 var67 = ((orc_uint16)var66.i >> 8)&0xff;
16145 /* 27: select1lw */
16146 var68.i = ((orc_uint32)var45.i >> 16)&0xffff;
16147 /* 28: select1wb */
16148 var69 = ((orc_uint16)var68.i >> 8)&0xff;
16150 var70.i = (orc_uint8)var67;
16152 var71.i = (orc_uint8)var69;
16154 var72.i = var70.i - var71.i;
16158 var74.i = (var73.i * var73.i) & 0xffffffff;
16160 var46.i = var65.i + var74.i;
16168 canny_calc_delta_x (int32_t * d1, const uint8_t * s1, const uint8_t * s2, int n)
16170 OrcExecutor _ex, *ex = &_ex;
16171 static int p_inited = 0;
16172 static OrcProgram *p = 0;
16173 void (*func) (OrcExecutor *);
16176 orc_once_mutex_lock ();
16178 OrcCompileResult result;
16180 p = orc_program_new ();
16181 orc_program_set_name (p, "canny_calc_delta_x");
16182 orc_program_set_backup_function (p, _backup_canny_calc_delta_x);
16183 orc_program_add_destination (p, 4, "d1");
16184 orc_program_add_source (p, 4, "s1");
16185 orc_program_add_source (p, 4, "s2");
16186 orc_program_add_temporary (p, 2, "t1");
16187 orc_program_add_temporary (p, 2, "t2");
16188 orc_program_add_temporary (p, 1, "t3");
16189 orc_program_add_temporary (p, 2, "t4");
16190 orc_program_add_temporary (p, 1, "t5");
16191 orc_program_add_temporary (p, 2, "t6");
16192 orc_program_add_temporary (p, 4, "t7");
16193 orc_program_add_temporary (p, 4, "t8");
16195 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
16196 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
16197 orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T4, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
16198 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
16199 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
16200 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
16201 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_T6, ORC_VAR_D1);
16202 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
16203 orc_program_append_2 (p, "mulll", 0, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1);
16204 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
16205 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
16206 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T4, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
16207 orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
16208 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
16209 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
16210 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_T6, ORC_VAR_D1);
16211 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
16212 orc_program_append_2 (p, "mulll", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1);
16213 orc_program_append_2 (p, "addl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1);
16214 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
16215 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
16216 orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T4, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
16217 orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
16218 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
16219 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
16220 orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_T6, ORC_VAR_D1);
16221 orc_program_append_2 (p, "convswl", 0, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
16222 orc_program_append_2 (p, "mulll", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1);
16223 orc_program_append_2 (p, "addl", 0, ORC_VAR_D1, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1);
16225 result = orc_program_compile (p);
16228 orc_once_mutex_unlock ();
16233 ex->arrays[ORC_VAR_D1] = d1;
16234 ex->arrays[ORC_VAR_S1] = (void *)s1;
16235 ex->arrays[ORC_VAR_S2] = (void *)s2;
16237 func = p->code_exec;
16246 i420_to_ayuv (orc_uint32 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, int p1, int n){
16248 orc_union32 * ORC_RESTRICT ptr0;
16249 const orc_int8 * ORC_RESTRICT ptr4;
16250 const orc_int8 * ORC_RESTRICT ptr5;
16251 const orc_int8 * ORC_RESTRICT ptr6;
16260 ptr0 = (orc_union32 *)d1;
16261 ptr4 = (orc_int8 *)s1;
16262 ptr5 = (orc_int8 *)s2;
16263 ptr6 = (orc_int8 *)s3;
16268 for (i = 0; i < n; i++) {
16270 var39 = ptr5[i>>1];
16272 var40 = ptr6[i>>1];
16276 var42.i = ((orc_uint8)var37 & 0x00ff) | ((orc_uint8)var41 << 8);
16278 var43.i = ((orc_uint8)var39 & 0x00ff) | ((orc_uint8)var40 << 8);
16280 var38.i = ((orc_uint16)var42.i & 0x0000ffff) | ((orc_uint16)var43.i << 16);
16289 _backup_i420_to_ayuv (OrcExecutor * ORC_RESTRICT ex)
16293 orc_union32 * ORC_RESTRICT ptr0;
16294 const orc_int8 * ORC_RESTRICT ptr4;
16295 const orc_int8 * ORC_RESTRICT ptr5;
16296 const orc_int8 * ORC_RESTRICT ptr6;
16305 ptr0 = (orc_union32 *)ex->arrays[0];
16306 ptr4 = (orc_int8 *)ex->arrays[4];
16307 ptr5 = (orc_int8 *)ex->arrays[5];
16308 ptr6 = (orc_int8 *)ex->arrays[6];
16311 var37 = ex->params[24];
16313 for (i = 0; i < n; i++) {
16315 var39 = ptr5[i>>1];
16317 var40 = ptr6[i>>1];
16321 var42.i = ((orc_uint8)var37 & 0x00ff) | ((orc_uint8)var41 << 8);
16323 var43.i = ((orc_uint8)var39 & 0x00ff) | ((orc_uint8)var40 << 8);
16325 var38.i = ((orc_uint16)var42.i & 0x0000ffff) | ((orc_uint16)var43.i << 16);
16333 i420_to_ayuv (orc_uint32 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, int p1, int n)
16335 OrcExecutor _ex, *ex = &_ex;
16336 static int p_inited = 0;
16337 static OrcProgram *p = 0;
16338 void (*func) (OrcExecutor *);
16341 orc_once_mutex_lock ();
16343 OrcCompileResult result;
16345 p = orc_program_new ();
16346 orc_program_set_name (p, "i420_to_ayuv");
16347 orc_program_set_backup_function (p, _backup_i420_to_ayuv);
16348 orc_program_add_destination (p, 4, "d1");
16349 orc_program_add_source (p, 1, "s1");
16350 orc_program_add_source (p, 1, "s2");
16351 orc_program_add_source (p, 1, "s3");
16352 orc_program_add_parameter (p, 1, "p1");
16353 orc_program_add_temporary (p, 1, "t1");
16354 orc_program_add_temporary (p, 1, "t2");
16355 orc_program_add_temporary (p, 1, "t3");
16356 orc_program_add_temporary (p, 2, "t4");
16357 orc_program_add_temporary (p, 2, "t5");
16359 orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
16360 orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T2, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
16361 orc_program_append_2 (p, "loadb", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
16362 orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_P1, ORC_VAR_T3, ORC_VAR_D1);
16363 orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
16364 orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
16366 result = orc_program_compile (p);
16369 orc_once_mutex_unlock ();
16374 ex->arrays[ORC_VAR_D1] = d1;
16375 ex->arrays[ORC_VAR_S1] = (void *)s1;
16376 ex->arrays[ORC_VAR_S2] = (void *)s2;
16377 ex->arrays[ORC_VAR_S3] = (void *)s3;
16378 ex->params[ORC_VAR_P1] = p1;
16380 func = p->code_exec;
16389 test_4x (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
16391 orc_union32 * ORC_RESTRICT ptr0;
16392 const orc_union32 * ORC_RESTRICT ptr4;
16393 const orc_union32 * ORC_RESTRICT ptr5;
16398 ptr0 = (orc_union32 *)d1;
16399 ptr4 = (orc_union32 *)s1;
16400 ptr5 = (orc_union32 *)s2;
16403 for (i = 0; i < n; i++) {
16409 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
16410 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
16411 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
16412 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
16421 _backup_test_4x (OrcExecutor * ORC_RESTRICT ex)
16425 orc_union32 * ORC_RESTRICT ptr0;
16426 const orc_union32 * ORC_RESTRICT ptr4;
16427 const orc_union32 * ORC_RESTRICT ptr5;
16432 ptr0 = (orc_union32 *)ex->arrays[0];
16433 ptr4 = (orc_union32 *)ex->arrays[4];
16434 ptr5 = (orc_union32 *)ex->arrays[5];
16437 for (i = 0; i < n; i++) {
16443 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
16444 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
16445 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
16446 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
16454 test_4x (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
16456 OrcExecutor _ex, *ex = &_ex;
16457 static int p_inited = 0;
16458 static OrcProgram *p = 0;
16459 void (*func) (OrcExecutor *);
16462 orc_once_mutex_lock ();
16464 OrcCompileResult result;
16466 p = orc_program_new ();
16467 orc_program_set_name (p, "test_4x");
16468 orc_program_set_backup_function (p, _backup_test_4x);
16469 orc_program_add_destination (p, 4, "d1");
16470 orc_program_add_source (p, 4, "s1");
16471 orc_program_add_source (p, 4, "s2");
16473 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
16475 result = orc_program_compile (p);
16478 orc_once_mutex_unlock ();
16483 ex->arrays[ORC_VAR_D1] = d1;
16484 ex->arrays[ORC_VAR_S1] = (void *)s1;
16485 ex->arrays[ORC_VAR_S2] = (void *)s2;
16487 func = p->code_exec;
16496 test_4x_2 (orc_uint32 * d1, const orc_uint32 * s1, int p1, int n){
16498 orc_union32 * ORC_RESTRICT ptr0;
16499 const orc_union32 * ORC_RESTRICT ptr4;
16504 ptr0 = (orc_union32 *)d1;
16505 ptr4 = (orc_union32 *)s1;
16513 for (i = 0; i < n; i++) {
16517 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
16518 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
16519 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
16520 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
16529 _backup_test_4x_2 (OrcExecutor * ORC_RESTRICT ex)
16533 orc_union32 * ORC_RESTRICT ptr0;
16534 const orc_union32 * ORC_RESTRICT ptr4;
16539 ptr0 = (orc_union32 *)ex->arrays[0];
16540 ptr4 = (orc_union32 *)ex->arrays[4];
16543 var33.x4[0] = ex->params[24];
16544 var33.x4[1] = ex->params[24];
16545 var33.x4[2] = ex->params[24];
16546 var33.x4[3] = ex->params[24];
16548 for (i = 0; i < n; i++) {
16552 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
16553 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
16554 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
16555 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
16563 test_4x_2 (orc_uint32 * d1, const orc_uint32 * s1, int p1, int n)
16565 OrcExecutor _ex, *ex = &_ex;
16566 static int p_inited = 0;
16567 static OrcProgram *p = 0;
16568 void (*func) (OrcExecutor *);
16571 orc_once_mutex_lock ();
16573 OrcCompileResult result;
16575 p = orc_program_new ();
16576 orc_program_set_name (p, "test_4x_2");
16577 orc_program_set_backup_function (p, _backup_test_4x_2);
16578 orc_program_add_destination (p, 4, "d1");
16579 orc_program_add_source (p, 4, "s1");
16580 orc_program_add_parameter (p, 4, "p1");
16582 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_P1, ORC_VAR_D1);
16584 result = orc_program_compile (p);
16587 orc_once_mutex_unlock ();
16592 ex->arrays[ORC_VAR_D1] = d1;
16593 ex->arrays[ORC_VAR_S1] = (void *)s1;
16594 ex->params[ORC_VAR_P1] = p1;
16596 func = p->code_exec;
16602 /* orc_splat_u16 */
16605 orc_splat_u16 (uint16_t * d1, int p1, int n){
16607 orc_union16 * ORC_RESTRICT ptr0;
16611 ptr0 = (orc_union16 *)d1;
16616 for (i = 0; i < n; i++) {
16627 _backup_orc_splat_u16 (OrcExecutor * ORC_RESTRICT ex)
16631 orc_union16 * ORC_RESTRICT ptr0;
16635 ptr0 = (orc_union16 *)ex->arrays[0];
16638 var32.i = ex->params[24];
16640 for (i = 0; i < n; i++) {
16650 orc_splat_u16 (uint16_t * d1, int p1, int n)
16652 OrcExecutor _ex, *ex = &_ex;
16653 static int p_inited = 0;
16654 static OrcProgram *p = 0;
16655 void (*func) (OrcExecutor *);
16658 orc_once_mutex_lock ();
16660 OrcCompileResult result;
16662 p = orc_program_new ();
16663 orc_program_set_name (p, "orc_splat_u16");
16664 orc_program_set_backup_function (p, _backup_orc_splat_u16);
16665 orc_program_add_destination (p, 2, "d1");
16666 orc_program_add_parameter (p, 2, "p1");
16668 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
16670 result = orc_program_compile (p);
16673 orc_once_mutex_unlock ();
16678 ex->arrays[ORC_VAR_D1] = d1;
16679 ex->params[ORC_VAR_P1] = p1;
16681 func = p->code_exec;
16687 /* orc_splat_u32 */
16690 orc_splat_u32 (uint32_t * d1, int p1, int n){
16692 orc_union32 * ORC_RESTRICT ptr0;
16696 ptr0 = (orc_union32 *)d1;
16701 for (i = 0; i < n; i++) {
16712 _backup_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
16716 orc_union32 * ORC_RESTRICT ptr0;
16720 ptr0 = (orc_union32 *)ex->arrays[0];
16723 var32.i = ex->params[24];
16725 for (i = 0; i < n; i++) {
16735 orc_splat_u32 (uint32_t * d1, int p1, int n)
16737 OrcExecutor _ex, *ex = &_ex;
16738 static int p_inited = 0;
16739 static OrcProgram *p = 0;
16740 void (*func) (OrcExecutor *);
16743 orc_once_mutex_lock ();
16745 OrcCompileResult result;
16747 p = orc_program_new ();
16748 orc_program_set_name (p, "orc_splat_u32");
16749 orc_program_set_backup_function (p, _backup_orc_splat_u32);
16750 orc_program_add_destination (p, 4, "d1");
16751 orc_program_add_parameter (p, 4, "p1");
16753 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
16755 result = orc_program_compile (p);
16758 orc_once_mutex_unlock ();
16763 ex->arrays[ORC_VAR_D1] = d1;
16764 ex->params[ORC_VAR_P1] = p1;
16766 func = p->code_exec;
16772 /* orc_splat_u16_2d */
16775 orc_splat_u16_2d (uint16_t * d1, int d1_stride, int p1, int n, int m){
16778 orc_union16 * ORC_RESTRICT ptr0;
16782 for (j = 0; j < m; j++) {
16783 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
16788 for (i = 0; i < n; i++) {
16800 _backup_orc_splat_u16_2d (OrcExecutor * ORC_RESTRICT ex)
16805 int m = ex->params[ORC_VAR_A1];
16806 orc_union16 * ORC_RESTRICT ptr0;
16810 for (j = 0; j < m; j++) {
16811 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
16814 var32.i = ex->params[24];
16816 for (i = 0; i < n; i++) {
16827 orc_splat_u16_2d (uint16_t * d1, int d1_stride, int p1, int n, int m)
16829 OrcExecutor _ex, *ex = &_ex;
16830 static int p_inited = 0;
16831 static OrcProgram *p = 0;
16832 void (*func) (OrcExecutor *);
16835 orc_once_mutex_lock ();
16837 OrcCompileResult result;
16839 p = orc_program_new ();
16840 orc_program_set_2d (p);
16841 orc_program_set_name (p, "orc_splat_u16_2d");
16842 orc_program_set_backup_function (p, _backup_orc_splat_u16_2d);
16843 orc_program_add_destination (p, 2, "d1");
16844 orc_program_add_parameter (p, 2, "p1");
16846 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
16848 result = orc_program_compile (p);
16851 orc_once_mutex_unlock ();
16856 ORC_EXECUTOR_M(ex) = m;
16857 ex->arrays[ORC_VAR_D1] = d1;
16858 ex->params[ORC_VAR_D1] = d1_stride;
16859 ex->params[ORC_VAR_P1] = p1;
16861 func = p->code_exec;
16867 /* orc_splat_u32_2d */
16870 orc_splat_u32_2d (uint32_t * d1, int d1_stride, int p1, int n, int m){
16873 orc_union32 * ORC_RESTRICT ptr0;
16877 for (j = 0; j < m; j++) {
16878 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
16883 for (i = 0; i < n; i++) {
16895 _backup_orc_splat_u32_2d (OrcExecutor * ORC_RESTRICT ex)
16900 int m = ex->params[ORC_VAR_A1];
16901 orc_union32 * ORC_RESTRICT ptr0;
16905 for (j = 0; j < m; j++) {
16906 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
16909 var32.i = ex->params[24];
16911 for (i = 0; i < n; i++) {
16922 orc_splat_u32_2d (uint32_t * d1, int d1_stride, int p1, int n, int m)
16924 OrcExecutor _ex, *ex = &_ex;
16925 static int p_inited = 0;
16926 static OrcProgram *p = 0;
16927 void (*func) (OrcExecutor *);
16930 orc_once_mutex_lock ();
16932 OrcCompileResult result;
16934 p = orc_program_new ();
16935 orc_program_set_2d (p);
16936 orc_program_set_name (p, "orc_splat_u32_2d");
16937 orc_program_set_backup_function (p, _backup_orc_splat_u32_2d);
16938 orc_program_add_destination (p, 4, "d1");
16939 orc_program_add_parameter (p, 4, "p1");
16941 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
16943 result = orc_program_compile (p);
16946 orc_once_mutex_unlock ();
16951 ORC_EXECUTOR_M(ex) = m;
16952 ex->arrays[ORC_VAR_D1] = d1;
16953 ex->params[ORC_VAR_D1] = d1_stride;
16954 ex->params[ORC_VAR_P1] = p1;
16956 func = p->code_exec;
16962 /* orc_copy_u16_2d */
16965 orc_copy_u16_2d (orc_uint16 * d1, int d1_stride, const orc_uint16 * s1, int s1_stride, int n, int m){
16968 orc_union16 * ORC_RESTRICT ptr0;
16969 const orc_union16 * ORC_RESTRICT ptr4;
16973 for (j = 0; j < m; j++) {
16974 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
16975 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
16978 for (i = 0; i < n; i++) {
16992 _backup_orc_copy_u16_2d (OrcExecutor * ORC_RESTRICT ex)
16997 int m = ex->params[ORC_VAR_A1];
16998 orc_union16 * ORC_RESTRICT ptr0;
16999 const orc_union16 * ORC_RESTRICT ptr4;
17003 for (j = 0; j < m; j++) {
17004 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
17005 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
17008 for (i = 0; i < n; i++) {
17021 orc_copy_u16_2d (orc_uint16 * d1, int d1_stride, const orc_uint16 * s1, int s1_stride, int n, int m)
17023 OrcExecutor _ex, *ex = &_ex;
17024 static int p_inited = 0;
17025 static OrcProgram *p = 0;
17026 void (*func) (OrcExecutor *);
17029 orc_once_mutex_lock ();
17031 OrcCompileResult result;
17033 p = orc_program_new ();
17034 orc_program_set_2d (p);
17035 orc_program_set_name (p, "orc_copy_u16_2d");
17036 orc_program_set_backup_function (p, _backup_orc_copy_u16_2d);
17037 orc_program_add_destination (p, 2, "d1");
17038 orc_program_add_source (p, 2, "s1");
17040 orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
17042 result = orc_program_compile (p);
17045 orc_once_mutex_unlock ();
17050 ORC_EXECUTOR_M(ex) = m;
17051 ex->arrays[ORC_VAR_D1] = d1;
17052 ex->params[ORC_VAR_D1] = d1_stride;
17053 ex->arrays[ORC_VAR_S1] = (void *)s1;
17054 ex->params[ORC_VAR_S1] = s1_stride;
17056 func = p->code_exec;
17062 /* orc_copy_u32_2d */
17065 orc_copy_u32_2d (orc_uint32 * d1, int d1_stride, const orc_uint32 * s1, int s1_stride, int n, int m){
17068 orc_union32 * ORC_RESTRICT ptr0;
17069 const orc_union32 * ORC_RESTRICT ptr4;
17073 for (j = 0; j < m; j++) {
17074 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
17075 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
17078 for (i = 0; i < n; i++) {
17092 _backup_orc_copy_u32_2d (OrcExecutor * ORC_RESTRICT ex)
17097 int m = ex->params[ORC_VAR_A1];
17098 orc_union32 * ORC_RESTRICT ptr0;
17099 const orc_union32 * ORC_RESTRICT ptr4;
17103 for (j = 0; j < m; j++) {
17104 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
17105 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
17108 for (i = 0; i < n; i++) {
17121 orc_copy_u32_2d (orc_uint32 * d1, int d1_stride, const orc_uint32 * s1, int s1_stride, int n, int m)
17123 OrcExecutor _ex, *ex = &_ex;
17124 static int p_inited = 0;
17125 static OrcProgram *p = 0;
17126 void (*func) (OrcExecutor *);
17129 orc_once_mutex_lock ();
17131 OrcCompileResult result;
17133 p = orc_program_new ();
17134 orc_program_set_2d (p);
17135 orc_program_set_name (p, "orc_copy_u32_2d");
17136 orc_program_set_backup_function (p, _backup_orc_copy_u32_2d);
17137 orc_program_add_destination (p, 4, "d1");
17138 orc_program_add_source (p, 4, "s1");
17140 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
17142 result = orc_program_compile (p);
17145 orc_once_mutex_unlock ();
17150 ORC_EXECUTOR_M(ex) = m;
17151 ex->arrays[ORC_VAR_D1] = d1;
17152 ex->params[ORC_VAR_D1] = d1_stride;
17153 ex->arrays[ORC_VAR_S1] = (void *)s1;
17154 ex->params[ORC_VAR_S1] = s1_stride;
17156 func = p->code_exec;
17162 /* orc_composite_add_8888_8888_2d */
17165 orc_composite_add_8888_8888_2d (orc_uint32 * d1, int d1_stride, const orc_uint32 * s1, int s1_stride, int n, int m){
17168 orc_union32 * ORC_RESTRICT ptr0;
17169 const orc_union32 * ORC_RESTRICT ptr4;
17174 for (j = 0; j < m; j++) {
17175 ptr0 = ORC_PTR_OFFSET(d1, d1_stride * j);
17176 ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
17179 for (i = 0; i < n; i++) {
17185 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
17186 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
17187 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
17188 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
17198 _backup_orc_composite_add_8888_8888_2d (OrcExecutor * ORC_RESTRICT ex)
17203 int m = ex->params[ORC_VAR_A1];
17204 orc_union32 * ORC_RESTRICT ptr0;
17205 const orc_union32 * ORC_RESTRICT ptr4;
17210 for (j = 0; j < m; j++) {
17211 ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
17212 ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
17215 for (i = 0; i < n; i++) {
17221 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
17222 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
17223 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
17224 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
17233 orc_composite_add_8888_8888_2d (orc_uint32 * d1, int d1_stride, const orc_uint32 * s1, int s1_stride, int n, int m)
17235 OrcExecutor _ex, *ex = &_ex;
17236 static int p_inited = 0;
17237 static OrcProgram *p = 0;
17238 void (*func) (OrcExecutor *);
17241 orc_once_mutex_lock ();
17243 OrcCompileResult result;
17245 p = orc_program_new ();
17246 orc_program_set_2d (p);
17247 orc_program_set_name (p, "orc_composite_add_8888_8888_2d");
17248 orc_program_set_backup_function (p, _backup_orc_composite_add_8888_8888_2d);
17249 orc_program_add_destination (p, 4, "d1");
17250 orc_program_add_source (p, 4, "s1");
17252 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
17254 result = orc_program_compile (p);
17257 orc_once_mutex_unlock ();
17262 ORC_EXECUTOR_M(ex) = m;
17263 ex->arrays[ORC_VAR_D1] = d1;
17264 ex->params[ORC_VAR_D1] = d1_stride;
17265 ex->arrays[ORC_VAR_S1] = (void *)s1;
17266 ex->params[ORC_VAR_S1] = s1_stride;
17268 func = p->code_exec;
17274 /* orc_composite_add_8_8_line */
17277 orc_composite_add_8_8_line (orc_uint8 * d1, const orc_uint8 * s1, int n){
17279 orc_int8 * ORC_RESTRICT ptr0;
17280 const orc_int8 * ORC_RESTRICT ptr4;
17285 ptr0 = (orc_int8 *)d1;
17286 ptr4 = (orc_int8 *)s1;
17289 for (i = 0; i < n; i++) {
17295 var34 = ORC_CLAMP_UB((orc_uint8)var32 + (orc_uint8)var33);
17304 _backup_orc_composite_add_8_8_line (OrcExecutor * ORC_RESTRICT ex)
17308 orc_int8 * ORC_RESTRICT ptr0;
17309 const orc_int8 * ORC_RESTRICT ptr4;
17314 ptr0 = (orc_int8 *)ex->arrays[0];
17315 ptr4 = (orc_int8 *)ex->arrays[4];
17318 for (i = 0; i < n; i++) {
17324 var34 = ORC_CLAMP_UB((orc_uint8)var32 + (orc_uint8)var33);
17332 orc_composite_add_8_8_line (orc_uint8 * d1, const orc_uint8 * s1, int n)
17334 OrcExecutor _ex, *ex = &_ex;
17335 static int p_inited = 0;
17336 static OrcProgram *p = 0;
17337 void (*func) (OrcExecutor *);
17340 orc_once_mutex_lock ();
17342 OrcCompileResult result;
17344 p = orc_program_new ();
17345 orc_program_set_name (p, "orc_composite_add_8_8_line");
17346 orc_program_set_backup_function (p, _backup_orc_composite_add_8_8_line);
17347 orc_program_add_destination (p, 1, "d1");
17348 orc_program_add_source (p, 1, "s1");
17350 orc_program_append_2 (p, "addusb", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
17352 result = orc_program_compile (p);
17355 orc_once_mutex_unlock ();
17360 ex->arrays[ORC_VAR_D1] = d1;
17361 ex->arrays[ORC_VAR_S1] = (void *)s1;
17363 func = p->code_exec;
17369 /* orc_composite_add_n_8_8_line */
17372 orc_composite_add_n_8_8_line (orc_uint8 * d1, const orc_uint8 * s1, int p1, int n){
17374 orc_int8 * ORC_RESTRICT ptr0;
17375 const orc_int8 * ORC_RESTRICT ptr4;
17385 ptr0 = (orc_int8 *)d1;
17386 ptr4 = (orc_int8 *)s1;
17391 for (i = 0; i < n; i++) {
17395 var38.i = (orc_uint8)var34;
17397 var39.i = (var38.i * var35.i) & 0xffff;
17399 var40.i = ((uint16_t)(((orc_uint16)(var39.i+128)) + (((orc_uint16)(var39.i+128))>>8)))>>8;
17405 var37 = ORC_CLAMP_UB((orc_uint8)var36 + (orc_uint8)var41);
17414 _backup_orc_composite_add_n_8_8_line (OrcExecutor * ORC_RESTRICT ex)
17418 orc_int8 * ORC_RESTRICT ptr0;
17419 const orc_int8 * ORC_RESTRICT ptr4;
17429 ptr0 = (orc_int8 *)ex->arrays[0];
17430 ptr4 = (orc_int8 *)ex->arrays[4];
17433 var35.i = ex->params[24];
17435 for (i = 0; i < n; i++) {
17439 var38.i = (orc_uint8)var34;
17441 var39.i = (var38.i * var35.i) & 0xffff;
17443 var40.i = ((uint16_t)(((orc_uint16)(var39.i+128)) + (((orc_uint16)(var39.i+128))>>8)))>>8;
17449 var37 = ORC_CLAMP_UB((orc_uint8)var36 + (orc_uint8)var41);
17457 orc_composite_add_n_8_8_line (orc_uint8 * d1, const orc_uint8 * s1, int p1, int n)
17459 OrcExecutor _ex, *ex = &_ex;
17460 static int p_inited = 0;
17461 static OrcProgram *p = 0;
17462 void (*func) (OrcExecutor *);
17465 orc_once_mutex_lock ();
17467 OrcCompileResult result;
17469 p = orc_program_new ();
17470 orc_program_set_name (p, "orc_composite_add_n_8_8_line");
17471 orc_program_set_backup_function (p, _backup_orc_composite_add_n_8_8_line);
17472 orc_program_add_destination (p, 1, "d1");
17473 orc_program_add_source (p, 1, "s1");
17474 orc_program_add_parameter (p, 2, "p1");
17475 orc_program_add_temporary (p, 2, "t1");
17476 orc_program_add_temporary (p, 1, "t2");
17478 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
17479 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
17480 orc_program_append_2 (p, "div255w", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
17481 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
17482 orc_program_append_2 (p, "addusb", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1);
17484 result = orc_program_compile (p);
17487 orc_once_mutex_unlock ();
17492 ex->arrays[ORC_VAR_D1] = d1;
17493 ex->arrays[ORC_VAR_S1] = (void *)s1;
17494 ex->params[ORC_VAR_P1] = p1;
17496 func = p->code_exec;
17502 /* orc_code_combine_add_u */
17505 orc_code_combine_add_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
17507 orc_union32 * ORC_RESTRICT ptr0;
17508 const orc_union32 * ORC_RESTRICT ptr4;
17509 const orc_union32 * ORC_RESTRICT ptr5;
17521 ptr0 = (orc_union32 *)d1;
17522 ptr4 = (orc_union32 *)s1;
17523 ptr5 = (orc_union32 *)s2;
17526 for (i = 0; i < n; i++) {
17530 var39.x4[0] = (orc_uint8)var35.x4[0];
17531 var39.x4[1] = (orc_uint8)var35.x4[1];
17532 var39.x4[2] = (orc_uint8)var35.x4[2];
17533 var39.x4[3] = (orc_uint8)var35.x4[3];
17537 var40.x4[0] = (orc_uint8)var36.x4[0];
17538 var40.x4[1] = (orc_uint8)var36.x4[1];
17539 var40.x4[2] = (orc_uint8)var36.x4[2];
17540 var40.x4[3] = (orc_uint8)var36.x4[3];
17542 var41.i = ((((orc_uint64)var40.i)>>48) << 48) | ((((orc_uint64)var40.i)>>48)<<32) | ((((orc_uint64)var40.i)>>48) << 16) | ((((orc_uint64)var40.i)>>48));
17544 var42.x4[0] = (var39.x4[0] * var41.x4[0]) & 0xffff;
17545 var42.x4[1] = (var39.x4[1] * var41.x4[1]) & 0xffff;
17546 var42.x4[2] = (var39.x4[2] * var41.x4[2]) & 0xffff;
17547 var42.x4[3] = (var39.x4[3] * var41.x4[3]) & 0xffff;
17549 var43.x4[0] = ((uint16_t)(((orc_uint16)(var42.x4[0]+128)) + (((orc_uint16)(var42.x4[0]+128))>>8)))>>8;
17550 var43.x4[1] = ((uint16_t)(((orc_uint16)(var42.x4[1]+128)) + (((orc_uint16)(var42.x4[1]+128))>>8)))>>8;
17551 var43.x4[2] = ((uint16_t)(((orc_uint16)(var42.x4[2]+128)) + (((orc_uint16)(var42.x4[2]+128))>>8)))>>8;
17552 var43.x4[3] = ((uint16_t)(((orc_uint16)(var42.x4[3]+128)) + (((orc_uint16)(var42.x4[3]+128))>>8)))>>8;
17554 var44.x4[0] = var43.x4[0];
17555 var44.x4[1] = var43.x4[1];
17556 var44.x4[2] = var43.x4[2];
17557 var44.x4[3] = var43.x4[3];
17561 var38.x4[0] = ORC_CLAMP_UB((orc_uint8)var37.x4[0] + (orc_uint8)var44.x4[0]);
17562 var38.x4[1] = ORC_CLAMP_UB((orc_uint8)var37.x4[1] + (orc_uint8)var44.x4[1]);
17563 var38.x4[2] = ORC_CLAMP_UB((orc_uint8)var37.x4[2] + (orc_uint8)var44.x4[2]);
17564 var38.x4[3] = ORC_CLAMP_UB((orc_uint8)var37.x4[3] + (orc_uint8)var44.x4[3]);
17573 _backup_orc_code_combine_add_u (OrcExecutor * ORC_RESTRICT ex)
17577 orc_union32 * ORC_RESTRICT ptr0;
17578 const orc_union32 * ORC_RESTRICT ptr4;
17579 const orc_union32 * ORC_RESTRICT ptr5;
17591 ptr0 = (orc_union32 *)ex->arrays[0];
17592 ptr4 = (orc_union32 *)ex->arrays[4];
17593 ptr5 = (orc_union32 *)ex->arrays[5];
17596 for (i = 0; i < n; i++) {
17600 var39.x4[0] = (orc_uint8)var35.x4[0];
17601 var39.x4[1] = (orc_uint8)var35.x4[1];
17602 var39.x4[2] = (orc_uint8)var35.x4[2];
17603 var39.x4[3] = (orc_uint8)var35.x4[3];
17607 var40.x4[0] = (orc_uint8)var36.x4[0];
17608 var40.x4[1] = (orc_uint8)var36.x4[1];
17609 var40.x4[2] = (orc_uint8)var36.x4[2];
17610 var40.x4[3] = (orc_uint8)var36.x4[3];
17612 var41.i = ((((orc_uint64)var40.i)>>48) << 48) | ((((orc_uint64)var40.i)>>48)<<32) | ((((orc_uint64)var40.i)>>48) << 16) | ((((orc_uint64)var40.i)>>48));
17614 var42.x4[0] = (var39.x4[0] * var41.x4[0]) & 0xffff;
17615 var42.x4[1] = (var39.x4[1] * var41.x4[1]) & 0xffff;
17616 var42.x4[2] = (var39.x4[2] * var41.x4[2]) & 0xffff;
17617 var42.x4[3] = (var39.x4[3] * var41.x4[3]) & 0xffff;
17619 var43.x4[0] = ((uint16_t)(((orc_uint16)(var42.x4[0]+128)) + (((orc_uint16)(var42.x4[0]+128))>>8)))>>8;
17620 var43.x4[1] = ((uint16_t)(((orc_uint16)(var42.x4[1]+128)) + (((orc_uint16)(var42.x4[1]+128))>>8)))>>8;
17621 var43.x4[2] = ((uint16_t)(((orc_uint16)(var42.x4[2]+128)) + (((orc_uint16)(var42.x4[2]+128))>>8)))>>8;
17622 var43.x4[3] = ((uint16_t)(((orc_uint16)(var42.x4[3]+128)) + (((orc_uint16)(var42.x4[3]+128))>>8)))>>8;
17624 var44.x4[0] = var43.x4[0];
17625 var44.x4[1] = var43.x4[1];
17626 var44.x4[2] = var43.x4[2];
17627 var44.x4[3] = var43.x4[3];
17631 var38.x4[0] = ORC_CLAMP_UB((orc_uint8)var37.x4[0] + (orc_uint8)var44.x4[0]);
17632 var38.x4[1] = ORC_CLAMP_UB((orc_uint8)var37.x4[1] + (orc_uint8)var44.x4[1]);
17633 var38.x4[2] = ORC_CLAMP_UB((orc_uint8)var37.x4[2] + (orc_uint8)var44.x4[2]);
17634 var38.x4[3] = ORC_CLAMP_UB((orc_uint8)var37.x4[3] + (orc_uint8)var44.x4[3]);
17642 orc_code_combine_add_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
17644 OrcExecutor _ex, *ex = &_ex;
17645 static int p_inited = 0;
17646 static OrcProgram *p = 0;
17647 void (*func) (OrcExecutor *);
17650 orc_once_mutex_lock ();
17652 OrcCompileResult result;
17654 p = orc_program_new ();
17655 orc_program_set_name (p, "orc_code_combine_add_u");
17656 orc_program_set_backup_function (p, _backup_orc_code_combine_add_u);
17657 orc_program_add_destination (p, 4, "d1");
17658 orc_program_add_source (p, 4, "s1");
17659 orc_program_add_source (p, 4, "s2");
17660 orc_program_add_temporary (p, 8, "t1");
17661 orc_program_add_temporary (p, 8, "t2");
17662 orc_program_add_temporary (p, 4, "t3");
17664 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
17665 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
17666 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
17667 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
17668 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
17669 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
17670 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1);
17672 result = orc_program_compile (p);
17675 orc_once_mutex_unlock ();
17680 ex->arrays[ORC_VAR_D1] = d1;
17681 ex->arrays[ORC_VAR_S1] = (void *)s1;
17682 ex->arrays[ORC_VAR_S2] = (void *)s2;
17684 func = p->code_exec;
17690 /* orc_code_combine_add_u_n */
17693 orc_code_combine_add_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
17695 orc_union32 * ORC_RESTRICT ptr0;
17696 const orc_union32 * ORC_RESTRICT ptr4;
17701 ptr0 = (orc_union32 *)d1;
17702 ptr4 = (orc_union32 *)s1;
17705 for (i = 0; i < n; i++) {
17711 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
17712 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
17713 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
17714 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
17723 _backup_orc_code_combine_add_u_n (OrcExecutor * ORC_RESTRICT ex)
17727 orc_union32 * ORC_RESTRICT ptr0;
17728 const orc_union32 * ORC_RESTRICT ptr4;
17733 ptr0 = (orc_union32 *)ex->arrays[0];
17734 ptr4 = (orc_union32 *)ex->arrays[4];
17737 for (i = 0; i < n; i++) {
17743 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
17744 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
17745 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
17746 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
17754 orc_code_combine_add_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
17756 OrcExecutor _ex, *ex = &_ex;
17757 static int p_inited = 0;
17758 static OrcProgram *p = 0;
17759 void (*func) (OrcExecutor *);
17762 orc_once_mutex_lock ();
17764 OrcCompileResult result;
17766 p = orc_program_new ();
17767 orc_program_set_name (p, "orc_code_combine_add_u_n");
17768 orc_program_set_backup_function (p, _backup_orc_code_combine_add_u_n);
17769 orc_program_add_destination (p, 4, "d1");
17770 orc_program_add_source (p, 4, "s1");
17772 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
17774 result = orc_program_compile (p);
17777 orc_once_mutex_unlock ();
17782 ex->arrays[ORC_VAR_D1] = d1;
17783 ex->arrays[ORC_VAR_S1] = (void *)s1;
17785 func = p->code_exec;
17791 /* orc_code_combine_over_u */
17794 orc_code_combine_over_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
17796 orc_union32 * ORC_RESTRICT ptr0;
17797 const orc_union32 * ORC_RESTRICT ptr4;
17798 const orc_union32 * ORC_RESTRICT ptr5;
17817 ptr0 = (orc_union32 *)d1;
17818 ptr4 = (orc_union32 *)s1;
17819 ptr5 = (orc_union32 *)s2;
17822 var39.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
17823 var39.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
17824 var39.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
17825 var39.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
17827 for (i = 0; i < n; i++) {
17831 var41.x4[0] = (orc_uint8)var37.x4[0];
17832 var41.x4[1] = (orc_uint8)var37.x4[1];
17833 var41.x4[2] = (orc_uint8)var37.x4[2];
17834 var41.x4[3] = (orc_uint8)var37.x4[3];
17838 var42.x4[0] = (orc_uint8)var38.x4[0];
17839 var42.x4[1] = (orc_uint8)var38.x4[1];
17840 var42.x4[2] = (orc_uint8)var38.x4[2];
17841 var42.x4[3] = (orc_uint8)var38.x4[3];
17843 var43.i = ((((orc_uint64)var42.i)>>48) << 48) | ((((orc_uint64)var42.i)>>48)<<32) | ((((orc_uint64)var42.i)>>48) << 16) | ((((orc_uint64)var42.i)>>48));
17845 var44.x4[0] = (var41.x4[0] * var43.x4[0]) & 0xffff;
17846 var44.x4[1] = (var41.x4[1] * var43.x4[1]) & 0xffff;
17847 var44.x4[2] = (var41.x4[2] * var43.x4[2]) & 0xffff;
17848 var44.x4[3] = (var41.x4[3] * var43.x4[3]) & 0xffff;
17850 var45.x4[0] = ((uint16_t)(((orc_uint16)(var44.x4[0]+128)) + (((orc_uint16)(var44.x4[0]+128))>>8)))>>8;
17851 var45.x4[1] = ((uint16_t)(((orc_uint16)(var44.x4[1]+128)) + (((orc_uint16)(var44.x4[1]+128))>>8)))>>8;
17852 var45.x4[2] = ((uint16_t)(((orc_uint16)(var44.x4[2]+128)) + (((orc_uint16)(var44.x4[2]+128))>>8)))>>8;
17853 var45.x4[3] = ((uint16_t)(((orc_uint16)(var44.x4[3]+128)) + (((orc_uint16)(var44.x4[3]+128))>>8)))>>8;
17855 var46.x4[0] = var45.x4[0];
17856 var46.x4[1] = var45.x4[1];
17857 var46.x4[2] = var45.x4[2];
17858 var46.x4[3] = var45.x4[3];
17862 var48.x4[0] = (orc_uint8)var47.x4[0];
17863 var48.x4[1] = (orc_uint8)var47.x4[1];
17864 var48.x4[2] = (orc_uint8)var47.x4[2];
17865 var48.x4[3] = (orc_uint8)var47.x4[3];
17867 var49.x4[0] = var45.x4[0] ^ var39.x4[0];
17868 var49.x4[1] = var45.x4[1] ^ var39.x4[1];
17869 var49.x4[2] = var45.x4[2] ^ var39.x4[2];
17870 var49.x4[3] = var45.x4[3] ^ var39.x4[3];
17872 var50.i = ((((orc_uint64)var49.i)>>48) << 48) | ((((orc_uint64)var49.i)>>48)<<32) | ((((orc_uint64)var49.i)>>48) << 16) | ((((orc_uint64)var49.i)>>48));
17874 var51.x4[0] = (var48.x4[0] * var50.x4[0]) & 0xffff;
17875 var51.x4[1] = (var48.x4[1] * var50.x4[1]) & 0xffff;
17876 var51.x4[2] = (var48.x4[2] * var50.x4[2]) & 0xffff;
17877 var51.x4[3] = (var48.x4[3] * var50.x4[3]) & 0xffff;
17879 var52.x4[0] = ((uint16_t)(((orc_uint16)(var51.x4[0]+128)) + (((orc_uint16)(var51.x4[0]+128))>>8)))>>8;
17880 var52.x4[1] = ((uint16_t)(((orc_uint16)(var51.x4[1]+128)) + (((orc_uint16)(var51.x4[1]+128))>>8)))>>8;
17881 var52.x4[2] = ((uint16_t)(((orc_uint16)(var51.x4[2]+128)) + (((orc_uint16)(var51.x4[2]+128))>>8)))>>8;
17882 var52.x4[3] = ((uint16_t)(((orc_uint16)(var51.x4[3]+128)) + (((orc_uint16)(var51.x4[3]+128))>>8)))>>8;
17884 var53.x4[0] = var52.x4[0];
17885 var53.x4[1] = var52.x4[1];
17886 var53.x4[2] = var52.x4[2];
17887 var53.x4[3] = var52.x4[3];
17889 var40.x4[0] = ORC_CLAMP_UB((orc_uint8)var53.x4[0] + (orc_uint8)var46.x4[0]);
17890 var40.x4[1] = ORC_CLAMP_UB((orc_uint8)var53.x4[1] + (orc_uint8)var46.x4[1]);
17891 var40.x4[2] = ORC_CLAMP_UB((orc_uint8)var53.x4[2] + (orc_uint8)var46.x4[2]);
17892 var40.x4[3] = ORC_CLAMP_UB((orc_uint8)var53.x4[3] + (orc_uint8)var46.x4[3]);
17901 _backup_orc_code_combine_over_u (OrcExecutor * ORC_RESTRICT ex)
17905 orc_union32 * ORC_RESTRICT ptr0;
17906 const orc_union32 * ORC_RESTRICT ptr4;
17907 const orc_union32 * ORC_RESTRICT ptr5;
17926 ptr0 = (orc_union32 *)ex->arrays[0];
17927 ptr4 = (orc_union32 *)ex->arrays[4];
17928 ptr5 = (orc_union32 *)ex->arrays[5];
17931 var39.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
17932 var39.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
17933 var39.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
17934 var39.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
17936 for (i = 0; i < n; i++) {
17940 var41.x4[0] = (orc_uint8)var37.x4[0];
17941 var41.x4[1] = (orc_uint8)var37.x4[1];
17942 var41.x4[2] = (orc_uint8)var37.x4[2];
17943 var41.x4[3] = (orc_uint8)var37.x4[3];
17947 var42.x4[0] = (orc_uint8)var38.x4[0];
17948 var42.x4[1] = (orc_uint8)var38.x4[1];
17949 var42.x4[2] = (orc_uint8)var38.x4[2];
17950 var42.x4[3] = (orc_uint8)var38.x4[3];
17952 var43.i = ((((orc_uint64)var42.i)>>48) << 48) | ((((orc_uint64)var42.i)>>48)<<32) | ((((orc_uint64)var42.i)>>48) << 16) | ((((orc_uint64)var42.i)>>48));
17954 var44.x4[0] = (var41.x4[0] * var43.x4[0]) & 0xffff;
17955 var44.x4[1] = (var41.x4[1] * var43.x4[1]) & 0xffff;
17956 var44.x4[2] = (var41.x4[2] * var43.x4[2]) & 0xffff;
17957 var44.x4[3] = (var41.x4[3] * var43.x4[3]) & 0xffff;
17959 var45.x4[0] = ((uint16_t)(((orc_uint16)(var44.x4[0]+128)) + (((orc_uint16)(var44.x4[0]+128))>>8)))>>8;
17960 var45.x4[1] = ((uint16_t)(((orc_uint16)(var44.x4[1]+128)) + (((orc_uint16)(var44.x4[1]+128))>>8)))>>8;
17961 var45.x4[2] = ((uint16_t)(((orc_uint16)(var44.x4[2]+128)) + (((orc_uint16)(var44.x4[2]+128))>>8)))>>8;
17962 var45.x4[3] = ((uint16_t)(((orc_uint16)(var44.x4[3]+128)) + (((orc_uint16)(var44.x4[3]+128))>>8)))>>8;
17964 var46.x4[0] = var45.x4[0];
17965 var46.x4[1] = var45.x4[1];
17966 var46.x4[2] = var45.x4[2];
17967 var46.x4[3] = var45.x4[3];
17971 var48.x4[0] = (orc_uint8)var47.x4[0];
17972 var48.x4[1] = (orc_uint8)var47.x4[1];
17973 var48.x4[2] = (orc_uint8)var47.x4[2];
17974 var48.x4[3] = (orc_uint8)var47.x4[3];
17976 var49.x4[0] = var45.x4[0] ^ var39.x4[0];
17977 var49.x4[1] = var45.x4[1] ^ var39.x4[1];
17978 var49.x4[2] = var45.x4[2] ^ var39.x4[2];
17979 var49.x4[3] = var45.x4[3] ^ var39.x4[3];
17981 var50.i = ((((orc_uint64)var49.i)>>48) << 48) | ((((orc_uint64)var49.i)>>48)<<32) | ((((orc_uint64)var49.i)>>48) << 16) | ((((orc_uint64)var49.i)>>48));
17983 var51.x4[0] = (var48.x4[0] * var50.x4[0]) & 0xffff;
17984 var51.x4[1] = (var48.x4[1] * var50.x4[1]) & 0xffff;
17985 var51.x4[2] = (var48.x4[2] * var50.x4[2]) & 0xffff;
17986 var51.x4[3] = (var48.x4[3] * var50.x4[3]) & 0xffff;
17988 var52.x4[0] = ((uint16_t)(((orc_uint16)(var51.x4[0]+128)) + (((orc_uint16)(var51.x4[0]+128))>>8)))>>8;
17989 var52.x4[1] = ((uint16_t)(((orc_uint16)(var51.x4[1]+128)) + (((orc_uint16)(var51.x4[1]+128))>>8)))>>8;
17990 var52.x4[2] = ((uint16_t)(((orc_uint16)(var51.x4[2]+128)) + (((orc_uint16)(var51.x4[2]+128))>>8)))>>8;
17991 var52.x4[3] = ((uint16_t)(((orc_uint16)(var51.x4[3]+128)) + (((orc_uint16)(var51.x4[3]+128))>>8)))>>8;
17993 var53.x4[0] = var52.x4[0];
17994 var53.x4[1] = var52.x4[1];
17995 var53.x4[2] = var52.x4[2];
17996 var53.x4[3] = var52.x4[3];
17998 var40.x4[0] = ORC_CLAMP_UB((orc_uint8)var53.x4[0] + (orc_uint8)var46.x4[0]);
17999 var40.x4[1] = ORC_CLAMP_UB((orc_uint8)var53.x4[1] + (orc_uint8)var46.x4[1]);
18000 var40.x4[2] = ORC_CLAMP_UB((orc_uint8)var53.x4[2] + (orc_uint8)var46.x4[2]);
18001 var40.x4[3] = ORC_CLAMP_UB((orc_uint8)var53.x4[3] + (orc_uint8)var46.x4[3]);
18009 orc_code_combine_over_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
18011 OrcExecutor _ex, *ex = &_ex;
18012 static int p_inited = 0;
18013 static OrcProgram *p = 0;
18014 void (*func) (OrcExecutor *);
18017 orc_once_mutex_lock ();
18019 OrcCompileResult result;
18021 p = orc_program_new ();
18022 orc_program_set_name (p, "orc_code_combine_over_u");
18023 orc_program_set_backup_function (p, _backup_orc_code_combine_over_u);
18024 orc_program_add_destination (p, 4, "d1");
18025 orc_program_add_source (p, 4, "s1");
18026 orc_program_add_source (p, 4, "s2");
18027 orc_program_add_constant (p, 4, 0x000000ff, "c1");
18028 orc_program_add_temporary (p, 8, "t1");
18029 orc_program_add_temporary (p, 8, "t2");
18030 orc_program_add_temporary (p, 4, "t3");
18031 orc_program_add_temporary (p, 4, "t4");
18032 orc_program_add_temporary (p, 8, "t5");
18034 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
18035 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
18036 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
18037 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
18038 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18039 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18040 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
18041 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18042 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
18043 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18044 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_D1);
18045 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18046 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18047 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1);
18049 result = orc_program_compile (p);
18052 orc_once_mutex_unlock ();
18057 ex->arrays[ORC_VAR_D1] = d1;
18058 ex->arrays[ORC_VAR_S1] = (void *)s1;
18059 ex->arrays[ORC_VAR_S2] = (void *)s2;
18061 func = p->code_exec;
18067 /* orc_code_combine_over_u_n */
18070 orc_code_combine_over_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
18072 orc_union32 * ORC_RESTRICT ptr0;
18073 const orc_union32 * ORC_RESTRICT ptr4;
18086 ptr0 = (orc_union32 *)d1;
18087 ptr4 = (orc_union32 *)s1;
18090 var37.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
18091 var37.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
18092 var37.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
18093 var37.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
18095 for (i = 0; i < n; i++) {
18099 var40.x4[0] = (orc_uint8)var39.x4[0];
18100 var40.x4[1] = (orc_uint8)var39.x4[1];
18101 var40.x4[2] = (orc_uint8)var39.x4[2];
18102 var40.x4[3] = (orc_uint8)var39.x4[3];
18106 var42.x4[0] = (orc_uint8)var41.x4[0];
18107 var42.x4[1] = (orc_uint8)var41.x4[1];
18108 var42.x4[2] = (orc_uint8)var41.x4[2];
18109 var42.x4[3] = (orc_uint8)var41.x4[3];
18111 var43.x4[0] = var40.x4[0] ^ var37.x4[0];
18112 var43.x4[1] = var40.x4[1] ^ var37.x4[1];
18113 var43.x4[2] = var40.x4[2] ^ var37.x4[2];
18114 var43.x4[3] = var40.x4[3] ^ var37.x4[3];
18116 var44.i = ((((orc_uint64)var43.i)>>48) << 48) | ((((orc_uint64)var43.i)>>48)<<32) | ((((orc_uint64)var43.i)>>48) << 16) | ((((orc_uint64)var43.i)>>48));
18118 var45.x4[0] = (var42.x4[0] * var44.x4[0]) & 0xffff;
18119 var45.x4[1] = (var42.x4[1] * var44.x4[1]) & 0xffff;
18120 var45.x4[2] = (var42.x4[2] * var44.x4[2]) & 0xffff;
18121 var45.x4[3] = (var42.x4[3] * var44.x4[3]) & 0xffff;
18123 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
18124 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
18125 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
18126 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
18128 var47.x4[0] = var46.x4[0];
18129 var47.x4[1] = var46.x4[1];
18130 var47.x4[2] = var46.x4[2];
18131 var47.x4[3] = var46.x4[3];
18133 var38.x4[0] = ORC_CLAMP_UB((orc_uint8)var47.x4[0] + (orc_uint8)var39.x4[0]);
18134 var38.x4[1] = ORC_CLAMP_UB((orc_uint8)var47.x4[1] + (orc_uint8)var39.x4[1]);
18135 var38.x4[2] = ORC_CLAMP_UB((orc_uint8)var47.x4[2] + (orc_uint8)var39.x4[2]);
18136 var38.x4[3] = ORC_CLAMP_UB((orc_uint8)var47.x4[3] + (orc_uint8)var39.x4[3]);
18145 _backup_orc_code_combine_over_u_n (OrcExecutor * ORC_RESTRICT ex)
18149 orc_union32 * ORC_RESTRICT ptr0;
18150 const orc_union32 * ORC_RESTRICT ptr4;
18163 ptr0 = (orc_union32 *)ex->arrays[0];
18164 ptr4 = (orc_union32 *)ex->arrays[4];
18167 var37.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
18168 var37.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
18169 var37.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
18170 var37.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
18172 for (i = 0; i < n; i++) {
18176 var40.x4[0] = (orc_uint8)var39.x4[0];
18177 var40.x4[1] = (orc_uint8)var39.x4[1];
18178 var40.x4[2] = (orc_uint8)var39.x4[2];
18179 var40.x4[3] = (orc_uint8)var39.x4[3];
18183 var42.x4[0] = (orc_uint8)var41.x4[0];
18184 var42.x4[1] = (orc_uint8)var41.x4[1];
18185 var42.x4[2] = (orc_uint8)var41.x4[2];
18186 var42.x4[3] = (orc_uint8)var41.x4[3];
18188 var43.x4[0] = var40.x4[0] ^ var37.x4[0];
18189 var43.x4[1] = var40.x4[1] ^ var37.x4[1];
18190 var43.x4[2] = var40.x4[2] ^ var37.x4[2];
18191 var43.x4[3] = var40.x4[3] ^ var37.x4[3];
18193 var44.i = ((((orc_uint64)var43.i)>>48) << 48) | ((((orc_uint64)var43.i)>>48)<<32) | ((((orc_uint64)var43.i)>>48) << 16) | ((((orc_uint64)var43.i)>>48));
18195 var45.x4[0] = (var42.x4[0] * var44.x4[0]) & 0xffff;
18196 var45.x4[1] = (var42.x4[1] * var44.x4[1]) & 0xffff;
18197 var45.x4[2] = (var42.x4[2] * var44.x4[2]) & 0xffff;
18198 var45.x4[3] = (var42.x4[3] * var44.x4[3]) & 0xffff;
18200 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
18201 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
18202 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
18203 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
18205 var47.x4[0] = var46.x4[0];
18206 var47.x4[1] = var46.x4[1];
18207 var47.x4[2] = var46.x4[2];
18208 var47.x4[3] = var46.x4[3];
18210 var38.x4[0] = ORC_CLAMP_UB((orc_uint8)var47.x4[0] + (orc_uint8)var39.x4[0]);
18211 var38.x4[1] = ORC_CLAMP_UB((orc_uint8)var47.x4[1] + (orc_uint8)var39.x4[1]);
18212 var38.x4[2] = ORC_CLAMP_UB((orc_uint8)var47.x4[2] + (orc_uint8)var39.x4[2]);
18213 var38.x4[3] = ORC_CLAMP_UB((orc_uint8)var47.x4[3] + (orc_uint8)var39.x4[3]);
18221 orc_code_combine_over_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
18223 OrcExecutor _ex, *ex = &_ex;
18224 static int p_inited = 0;
18225 static OrcProgram *p = 0;
18226 void (*func) (OrcExecutor *);
18229 orc_once_mutex_lock ();
18231 OrcCompileResult result;
18233 p = orc_program_new ();
18234 orc_program_set_name (p, "orc_code_combine_over_u_n");
18235 orc_program_set_backup_function (p, _backup_orc_code_combine_over_u_n);
18236 orc_program_add_destination (p, 4, "d1");
18237 orc_program_add_source (p, 4, "s1");
18238 orc_program_add_constant (p, 4, 0x000000ff, "c1");
18239 orc_program_add_temporary (p, 8, "t1");
18240 orc_program_add_temporary (p, 8, "t2");
18241 orc_program_add_temporary (p, 4, "t3");
18242 orc_program_add_temporary (p, 4, "t4");
18243 orc_program_add_temporary (p, 8, "t5");
18245 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
18246 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18247 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
18248 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
18249 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
18250 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18251 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_D1);
18252 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18253 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18254 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
18256 result = orc_program_compile (p);
18259 orc_once_mutex_unlock ();
18264 ex->arrays[ORC_VAR_D1] = d1;
18265 ex->arrays[ORC_VAR_S1] = (void *)s1;
18267 func = p->code_exec;
18273 /* orc_code_combine_in_u */
18276 orc_code_combine_in_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
18278 orc_union32 * ORC_RESTRICT ptr0;
18279 const orc_union32 * ORC_RESTRICT ptr4;
18280 const orc_union32 * ORC_RESTRICT ptr5;
18295 ptr0 = (orc_union32 *)d1;
18296 ptr4 = (orc_union32 *)s1;
18297 ptr5 = (orc_union32 *)s2;
18300 for (i = 0; i < n; i++) {
18304 var41.x4[0] = (orc_uint8)var37.x4[0];
18305 var41.x4[1] = (orc_uint8)var37.x4[1];
18306 var41.x4[2] = (orc_uint8)var37.x4[2];
18307 var41.x4[3] = (orc_uint8)var37.x4[3];
18311 var42.x4[0] = (orc_uint8)var38.x4[0];
18312 var42.x4[1] = (orc_uint8)var38.x4[1];
18313 var42.x4[2] = (orc_uint8)var38.x4[2];
18314 var42.x4[3] = (orc_uint8)var38.x4[3];
18316 var43.i = ((((orc_uint64)var42.i)>>48) << 48) | ((((orc_uint64)var42.i)>>48)<<32) | ((((orc_uint64)var42.i)>>48) << 16) | ((((orc_uint64)var42.i)>>48));
18318 var44.x4[0] = (var41.x4[0] * var43.x4[0]) & 0xffff;
18319 var44.x4[1] = (var41.x4[1] * var43.x4[1]) & 0xffff;
18320 var44.x4[2] = (var41.x4[2] * var43.x4[2]) & 0xffff;
18321 var44.x4[3] = (var41.x4[3] * var43.x4[3]) & 0xffff;
18323 var45.x4[0] = ((uint16_t)(((orc_uint16)(var44.x4[0]+128)) + (((orc_uint16)(var44.x4[0]+128))>>8)))>>8;
18324 var45.x4[1] = ((uint16_t)(((orc_uint16)(var44.x4[1]+128)) + (((orc_uint16)(var44.x4[1]+128))>>8)))>>8;
18325 var45.x4[2] = ((uint16_t)(((orc_uint16)(var44.x4[2]+128)) + (((orc_uint16)(var44.x4[2]+128))>>8)))>>8;
18326 var45.x4[3] = ((uint16_t)(((orc_uint16)(var44.x4[3]+128)) + (((orc_uint16)(var44.x4[3]+128))>>8)))>>8;
18330 var46.x4[0] = (orc_uint8)var39.x4[0];
18331 var46.x4[1] = (orc_uint8)var39.x4[1];
18332 var46.x4[2] = (orc_uint8)var39.x4[2];
18333 var46.x4[3] = (orc_uint8)var39.x4[3];
18335 var47.i = ((((orc_uint64)var46.i)>>48) << 48) | ((((orc_uint64)var46.i)>>48)<<32) | ((((orc_uint64)var46.i)>>48) << 16) | ((((orc_uint64)var46.i)>>48));
18337 var48.x4[0] = (var45.x4[0] * var47.x4[0]) & 0xffff;
18338 var48.x4[1] = (var45.x4[1] * var47.x4[1]) & 0xffff;
18339 var48.x4[2] = (var45.x4[2] * var47.x4[2]) & 0xffff;
18340 var48.x4[3] = (var45.x4[3] * var47.x4[3]) & 0xffff;
18342 var49.x4[0] = ((uint16_t)(((orc_uint16)(var48.x4[0]+128)) + (((orc_uint16)(var48.x4[0]+128))>>8)))>>8;
18343 var49.x4[1] = ((uint16_t)(((orc_uint16)(var48.x4[1]+128)) + (((orc_uint16)(var48.x4[1]+128))>>8)))>>8;
18344 var49.x4[2] = ((uint16_t)(((orc_uint16)(var48.x4[2]+128)) + (((orc_uint16)(var48.x4[2]+128))>>8)))>>8;
18345 var49.x4[3] = ((uint16_t)(((orc_uint16)(var48.x4[3]+128)) + (((orc_uint16)(var48.x4[3]+128))>>8)))>>8;
18347 var40.x4[0] = var49.x4[0];
18348 var40.x4[1] = var49.x4[1];
18349 var40.x4[2] = var49.x4[2];
18350 var40.x4[3] = var49.x4[3];
18359 _backup_orc_code_combine_in_u (OrcExecutor * ORC_RESTRICT ex)
18363 orc_union32 * ORC_RESTRICT ptr0;
18364 const orc_union32 * ORC_RESTRICT ptr4;
18365 const orc_union32 * ORC_RESTRICT ptr5;
18380 ptr0 = (orc_union32 *)ex->arrays[0];
18381 ptr4 = (orc_union32 *)ex->arrays[4];
18382 ptr5 = (orc_union32 *)ex->arrays[5];
18385 for (i = 0; i < n; i++) {
18389 var41.x4[0] = (orc_uint8)var37.x4[0];
18390 var41.x4[1] = (orc_uint8)var37.x4[1];
18391 var41.x4[2] = (orc_uint8)var37.x4[2];
18392 var41.x4[3] = (orc_uint8)var37.x4[3];
18396 var42.x4[0] = (orc_uint8)var38.x4[0];
18397 var42.x4[1] = (orc_uint8)var38.x4[1];
18398 var42.x4[2] = (orc_uint8)var38.x4[2];
18399 var42.x4[3] = (orc_uint8)var38.x4[3];
18401 var43.i = ((((orc_uint64)var42.i)>>48) << 48) | ((((orc_uint64)var42.i)>>48)<<32) | ((((orc_uint64)var42.i)>>48) << 16) | ((((orc_uint64)var42.i)>>48));
18403 var44.x4[0] = (var41.x4[0] * var43.x4[0]) & 0xffff;
18404 var44.x4[1] = (var41.x4[1] * var43.x4[1]) & 0xffff;
18405 var44.x4[2] = (var41.x4[2] * var43.x4[2]) & 0xffff;
18406 var44.x4[3] = (var41.x4[3] * var43.x4[3]) & 0xffff;
18408 var45.x4[0] = ((uint16_t)(((orc_uint16)(var44.x4[0]+128)) + (((orc_uint16)(var44.x4[0]+128))>>8)))>>8;
18409 var45.x4[1] = ((uint16_t)(((orc_uint16)(var44.x4[1]+128)) + (((orc_uint16)(var44.x4[1]+128))>>8)))>>8;
18410 var45.x4[2] = ((uint16_t)(((orc_uint16)(var44.x4[2]+128)) + (((orc_uint16)(var44.x4[2]+128))>>8)))>>8;
18411 var45.x4[3] = ((uint16_t)(((orc_uint16)(var44.x4[3]+128)) + (((orc_uint16)(var44.x4[3]+128))>>8)))>>8;
18415 var46.x4[0] = (orc_uint8)var39.x4[0];
18416 var46.x4[1] = (orc_uint8)var39.x4[1];
18417 var46.x4[2] = (orc_uint8)var39.x4[2];
18418 var46.x4[3] = (orc_uint8)var39.x4[3];
18420 var47.i = ((((orc_uint64)var46.i)>>48) << 48) | ((((orc_uint64)var46.i)>>48)<<32) | ((((orc_uint64)var46.i)>>48) << 16) | ((((orc_uint64)var46.i)>>48));
18422 var48.x4[0] = (var45.x4[0] * var47.x4[0]) & 0xffff;
18423 var48.x4[1] = (var45.x4[1] * var47.x4[1]) & 0xffff;
18424 var48.x4[2] = (var45.x4[2] * var47.x4[2]) & 0xffff;
18425 var48.x4[3] = (var45.x4[3] * var47.x4[3]) & 0xffff;
18427 var49.x4[0] = ((uint16_t)(((orc_uint16)(var48.x4[0]+128)) + (((orc_uint16)(var48.x4[0]+128))>>8)))>>8;
18428 var49.x4[1] = ((uint16_t)(((orc_uint16)(var48.x4[1]+128)) + (((orc_uint16)(var48.x4[1]+128))>>8)))>>8;
18429 var49.x4[2] = ((uint16_t)(((orc_uint16)(var48.x4[2]+128)) + (((orc_uint16)(var48.x4[2]+128))>>8)))>>8;
18430 var49.x4[3] = ((uint16_t)(((orc_uint16)(var48.x4[3]+128)) + (((orc_uint16)(var48.x4[3]+128))>>8)))>>8;
18432 var40.x4[0] = var49.x4[0];
18433 var40.x4[1] = var49.x4[1];
18434 var40.x4[2] = var49.x4[2];
18435 var40.x4[3] = var49.x4[3];
18443 orc_code_combine_in_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
18445 OrcExecutor _ex, *ex = &_ex;
18446 static int p_inited = 0;
18447 static OrcProgram *p = 0;
18448 void (*func) (OrcExecutor *);
18451 orc_once_mutex_lock ();
18453 OrcCompileResult result;
18455 p = orc_program_new ();
18456 orc_program_set_name (p, "orc_code_combine_in_u");
18457 orc_program_set_backup_function (p, _backup_orc_code_combine_in_u);
18458 orc_program_add_destination (p, 4, "d1");
18459 orc_program_add_source (p, 4, "s1");
18460 orc_program_add_source (p, 4, "s2");
18461 orc_program_add_temporary (p, 8, "t1");
18462 orc_program_add_temporary (p, 8, "t2");
18463 orc_program_add_temporary (p, 8, "t3");
18464 orc_program_add_temporary (p, 8, "t4");
18465 orc_program_add_temporary (p, 8, "t5");
18467 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
18468 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
18469 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
18470 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
18471 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18472 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
18473 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18474 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
18475 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18476 orc_program_append_2 (p, "convwb", 2, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18478 result = orc_program_compile (p);
18481 orc_once_mutex_unlock ();
18486 ex->arrays[ORC_VAR_D1] = d1;
18487 ex->arrays[ORC_VAR_S1] = (void *)s1;
18488 ex->arrays[ORC_VAR_S2] = (void *)s2;
18490 func = p->code_exec;
18496 /* orc_code_combine_in_u_n */
18499 orc_code_combine_in_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
18501 orc_union32 * ORC_RESTRICT ptr0;
18502 const orc_union32 * ORC_RESTRICT ptr4;
18512 ptr0 = (orc_union32 *)d1;
18513 ptr4 = (orc_union32 *)s1;
18516 for (i = 0; i < n; i++) {
18520 var40.x4[0] = (orc_uint8)var37.x4[0];
18521 var40.x4[1] = (orc_uint8)var37.x4[1];
18522 var40.x4[2] = (orc_uint8)var37.x4[2];
18523 var40.x4[3] = (orc_uint8)var37.x4[3];
18527 var41.x4[0] = (orc_uint8)var38.x4[0];
18528 var41.x4[1] = (orc_uint8)var38.x4[1];
18529 var41.x4[2] = (orc_uint8)var38.x4[2];
18530 var41.x4[3] = (orc_uint8)var38.x4[3];
18532 var42.i = ((((orc_uint64)var41.i)>>48) << 48) | ((((orc_uint64)var41.i)>>48)<<32) | ((((orc_uint64)var41.i)>>48) << 16) | ((((orc_uint64)var41.i)>>48));
18534 var43.x4[0] = (var40.x4[0] * var42.x4[0]) & 0xffff;
18535 var43.x4[1] = (var40.x4[1] * var42.x4[1]) & 0xffff;
18536 var43.x4[2] = (var40.x4[2] * var42.x4[2]) & 0xffff;
18537 var43.x4[3] = (var40.x4[3] * var42.x4[3]) & 0xffff;
18539 var44.x4[0] = ((uint16_t)(((orc_uint16)(var43.x4[0]+128)) + (((orc_uint16)(var43.x4[0]+128))>>8)))>>8;
18540 var44.x4[1] = ((uint16_t)(((orc_uint16)(var43.x4[1]+128)) + (((orc_uint16)(var43.x4[1]+128))>>8)))>>8;
18541 var44.x4[2] = ((uint16_t)(((orc_uint16)(var43.x4[2]+128)) + (((orc_uint16)(var43.x4[2]+128))>>8)))>>8;
18542 var44.x4[3] = ((uint16_t)(((orc_uint16)(var43.x4[3]+128)) + (((orc_uint16)(var43.x4[3]+128))>>8)))>>8;
18544 var39.x4[0] = var44.x4[0];
18545 var39.x4[1] = var44.x4[1];
18546 var39.x4[2] = var44.x4[2];
18547 var39.x4[3] = var44.x4[3];
18556 _backup_orc_code_combine_in_u_n (OrcExecutor * ORC_RESTRICT ex)
18560 orc_union32 * ORC_RESTRICT ptr0;
18561 const orc_union32 * ORC_RESTRICT ptr4;
18571 ptr0 = (orc_union32 *)ex->arrays[0];
18572 ptr4 = (orc_union32 *)ex->arrays[4];
18575 for (i = 0; i < n; i++) {
18579 var40.x4[0] = (orc_uint8)var37.x4[0];
18580 var40.x4[1] = (orc_uint8)var37.x4[1];
18581 var40.x4[2] = (orc_uint8)var37.x4[2];
18582 var40.x4[3] = (orc_uint8)var37.x4[3];
18586 var41.x4[0] = (orc_uint8)var38.x4[0];
18587 var41.x4[1] = (orc_uint8)var38.x4[1];
18588 var41.x4[2] = (orc_uint8)var38.x4[2];
18589 var41.x4[3] = (orc_uint8)var38.x4[3];
18591 var42.i = ((((orc_uint64)var41.i)>>48) << 48) | ((((orc_uint64)var41.i)>>48)<<32) | ((((orc_uint64)var41.i)>>48) << 16) | ((((orc_uint64)var41.i)>>48));
18593 var43.x4[0] = (var40.x4[0] * var42.x4[0]) & 0xffff;
18594 var43.x4[1] = (var40.x4[1] * var42.x4[1]) & 0xffff;
18595 var43.x4[2] = (var40.x4[2] * var42.x4[2]) & 0xffff;
18596 var43.x4[3] = (var40.x4[3] * var42.x4[3]) & 0xffff;
18598 var44.x4[0] = ((uint16_t)(((orc_uint16)(var43.x4[0]+128)) + (((orc_uint16)(var43.x4[0]+128))>>8)))>>8;
18599 var44.x4[1] = ((uint16_t)(((orc_uint16)(var43.x4[1]+128)) + (((orc_uint16)(var43.x4[1]+128))>>8)))>>8;
18600 var44.x4[2] = ((uint16_t)(((orc_uint16)(var43.x4[2]+128)) + (((orc_uint16)(var43.x4[2]+128))>>8)))>>8;
18601 var44.x4[3] = ((uint16_t)(((orc_uint16)(var43.x4[3]+128)) + (((orc_uint16)(var43.x4[3]+128))>>8)))>>8;
18603 var39.x4[0] = var44.x4[0];
18604 var39.x4[1] = var44.x4[1];
18605 var39.x4[2] = var44.x4[2];
18606 var39.x4[3] = var44.x4[3];
18614 orc_code_combine_in_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
18616 OrcExecutor _ex, *ex = &_ex;
18617 static int p_inited = 0;
18618 static OrcProgram *p = 0;
18619 void (*func) (OrcExecutor *);
18622 orc_once_mutex_lock ();
18624 OrcCompileResult result;
18626 p = orc_program_new ();
18627 orc_program_set_name (p, "orc_code_combine_in_u_n");
18628 orc_program_set_backup_function (p, _backup_orc_code_combine_in_u_n);
18629 orc_program_add_destination (p, 4, "d1");
18630 orc_program_add_source (p, 4, "s1");
18631 orc_program_add_temporary (p, 8, "t1");
18632 orc_program_add_temporary (p, 8, "t2");
18633 orc_program_add_temporary (p, 8, "t3");
18634 orc_program_add_temporary (p, 8, "t4");
18635 orc_program_add_temporary (p, 8, "t5");
18637 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
18638 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
18639 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18640 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
18641 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18642 orc_program_append_2 (p, "convwb", 2, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18644 result = orc_program_compile (p);
18647 orc_once_mutex_unlock ();
18652 ex->arrays[ORC_VAR_D1] = d1;
18653 ex->arrays[ORC_VAR_S1] = (void *)s1;
18655 func = p->code_exec;
18661 /* orc_code_combine_out_u */
18664 orc_code_combine_out_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
18666 orc_union32 * ORC_RESTRICT ptr0;
18667 const orc_union32 * ORC_RESTRICT ptr4;
18668 const orc_union32 * ORC_RESTRICT ptr5;
18685 ptr0 = (orc_union32 *)d1;
18686 ptr4 = (orc_union32 *)s1;
18687 ptr5 = (orc_union32 *)s2;
18690 var40.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
18691 var40.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
18692 var40.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
18693 var40.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
18695 for (i = 0; i < n; i++) {
18699 var42.x4[0] = (orc_uint8)var37.x4[0];
18700 var42.x4[1] = (orc_uint8)var37.x4[1];
18701 var42.x4[2] = (orc_uint8)var37.x4[2];
18702 var42.x4[3] = (orc_uint8)var37.x4[3];
18706 var43.x4[0] = (orc_uint8)var38.x4[0];
18707 var43.x4[1] = (orc_uint8)var38.x4[1];
18708 var43.x4[2] = (orc_uint8)var38.x4[2];
18709 var43.x4[3] = (orc_uint8)var38.x4[3];
18711 var44.i = ((((orc_uint64)var43.i)>>48) << 48) | ((((orc_uint64)var43.i)>>48)<<32) | ((((orc_uint64)var43.i)>>48) << 16) | ((((orc_uint64)var43.i)>>48));
18713 var45.x4[0] = (var42.x4[0] * var44.x4[0]) & 0xffff;
18714 var45.x4[1] = (var42.x4[1] * var44.x4[1]) & 0xffff;
18715 var45.x4[2] = (var42.x4[2] * var44.x4[2]) & 0xffff;
18716 var45.x4[3] = (var42.x4[3] * var44.x4[3]) & 0xffff;
18718 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
18719 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
18720 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
18721 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
18725 var47.x4[0] = (orc_uint8)var39.x4[0];
18726 var47.x4[1] = (orc_uint8)var39.x4[1];
18727 var47.x4[2] = (orc_uint8)var39.x4[2];
18728 var47.x4[3] = (orc_uint8)var39.x4[3];
18730 var48.i = ((((orc_uint64)var47.i)>>48) << 48) | ((((orc_uint64)var47.i)>>48)<<32) | ((((orc_uint64)var47.i)>>48) << 16) | ((((orc_uint64)var47.i)>>48));
18732 var49.x4[0] = var48.x4[0] ^ var40.x4[0];
18733 var49.x4[1] = var48.x4[1] ^ var40.x4[1];
18734 var49.x4[2] = var48.x4[2] ^ var40.x4[2];
18735 var49.x4[3] = var48.x4[3] ^ var40.x4[3];
18737 var50.x4[0] = (var46.x4[0] * var49.x4[0]) & 0xffff;
18738 var50.x4[1] = (var46.x4[1] * var49.x4[1]) & 0xffff;
18739 var50.x4[2] = (var46.x4[2] * var49.x4[2]) & 0xffff;
18740 var50.x4[3] = (var46.x4[3] * var49.x4[3]) & 0xffff;
18742 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
18743 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
18744 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
18745 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
18747 var41.x4[0] = var51.x4[0];
18748 var41.x4[1] = var51.x4[1];
18749 var41.x4[2] = var51.x4[2];
18750 var41.x4[3] = var51.x4[3];
18759 _backup_orc_code_combine_out_u (OrcExecutor * ORC_RESTRICT ex)
18763 orc_union32 * ORC_RESTRICT ptr0;
18764 const orc_union32 * ORC_RESTRICT ptr4;
18765 const orc_union32 * ORC_RESTRICT ptr5;
18782 ptr0 = (orc_union32 *)ex->arrays[0];
18783 ptr4 = (orc_union32 *)ex->arrays[4];
18784 ptr5 = (orc_union32 *)ex->arrays[5];
18787 var40.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
18788 var40.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
18789 var40.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
18790 var40.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
18792 for (i = 0; i < n; i++) {
18796 var42.x4[0] = (orc_uint8)var37.x4[0];
18797 var42.x4[1] = (orc_uint8)var37.x4[1];
18798 var42.x4[2] = (orc_uint8)var37.x4[2];
18799 var42.x4[3] = (orc_uint8)var37.x4[3];
18803 var43.x4[0] = (orc_uint8)var38.x4[0];
18804 var43.x4[1] = (orc_uint8)var38.x4[1];
18805 var43.x4[2] = (orc_uint8)var38.x4[2];
18806 var43.x4[3] = (orc_uint8)var38.x4[3];
18808 var44.i = ((((orc_uint64)var43.i)>>48) << 48) | ((((orc_uint64)var43.i)>>48)<<32) | ((((orc_uint64)var43.i)>>48) << 16) | ((((orc_uint64)var43.i)>>48));
18810 var45.x4[0] = (var42.x4[0] * var44.x4[0]) & 0xffff;
18811 var45.x4[1] = (var42.x4[1] * var44.x4[1]) & 0xffff;
18812 var45.x4[2] = (var42.x4[2] * var44.x4[2]) & 0xffff;
18813 var45.x4[3] = (var42.x4[3] * var44.x4[3]) & 0xffff;
18815 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
18816 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
18817 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
18818 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
18822 var47.x4[0] = (orc_uint8)var39.x4[0];
18823 var47.x4[1] = (orc_uint8)var39.x4[1];
18824 var47.x4[2] = (orc_uint8)var39.x4[2];
18825 var47.x4[3] = (orc_uint8)var39.x4[3];
18827 var48.i = ((((orc_uint64)var47.i)>>48) << 48) | ((((orc_uint64)var47.i)>>48)<<32) | ((((orc_uint64)var47.i)>>48) << 16) | ((((orc_uint64)var47.i)>>48));
18829 var49.x4[0] = var48.x4[0] ^ var40.x4[0];
18830 var49.x4[1] = var48.x4[1] ^ var40.x4[1];
18831 var49.x4[2] = var48.x4[2] ^ var40.x4[2];
18832 var49.x4[3] = var48.x4[3] ^ var40.x4[3];
18834 var50.x4[0] = (var46.x4[0] * var49.x4[0]) & 0xffff;
18835 var50.x4[1] = (var46.x4[1] * var49.x4[1]) & 0xffff;
18836 var50.x4[2] = (var46.x4[2] * var49.x4[2]) & 0xffff;
18837 var50.x4[3] = (var46.x4[3] * var49.x4[3]) & 0xffff;
18839 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
18840 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
18841 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
18842 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
18844 var41.x4[0] = var51.x4[0];
18845 var41.x4[1] = var51.x4[1];
18846 var41.x4[2] = var51.x4[2];
18847 var41.x4[3] = var51.x4[3];
18855 orc_code_combine_out_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
18857 OrcExecutor _ex, *ex = &_ex;
18858 static int p_inited = 0;
18859 static OrcProgram *p = 0;
18860 void (*func) (OrcExecutor *);
18863 orc_once_mutex_lock ();
18865 OrcCompileResult result;
18867 p = orc_program_new ();
18868 orc_program_set_name (p, "orc_code_combine_out_u");
18869 orc_program_set_backup_function (p, _backup_orc_code_combine_out_u);
18870 orc_program_add_destination (p, 4, "d1");
18871 orc_program_add_source (p, 4, "s1");
18872 orc_program_add_source (p, 4, "s2");
18873 orc_program_add_constant (p, 4, 0x000000ff, "c1");
18874 orc_program_add_temporary (p, 8, "t1");
18875 orc_program_add_temporary (p, 8, "t2");
18876 orc_program_add_temporary (p, 8, "t3");
18877 orc_program_add_temporary (p, 8, "t4");
18878 orc_program_add_temporary (p, 8, "t5");
18880 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
18881 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
18882 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
18883 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
18884 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18885 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
18886 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
18887 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
18888 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
18889 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18890 orc_program_append_2 (p, "convwb", 2, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
18892 result = orc_program_compile (p);
18895 orc_once_mutex_unlock ();
18900 ex->arrays[ORC_VAR_D1] = d1;
18901 ex->arrays[ORC_VAR_S1] = (void *)s1;
18902 ex->arrays[ORC_VAR_S2] = (void *)s2;
18904 func = p->code_exec;
18910 /* orc_code_combine_out_u_n */
18913 orc_code_combine_out_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
18915 orc_union32 * ORC_RESTRICT ptr0;
18916 const orc_union32 * ORC_RESTRICT ptr4;
18928 ptr0 = (orc_union32 *)d1;
18929 ptr4 = (orc_union32 *)s1;
18932 var39.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
18933 var39.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
18934 var39.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
18935 var39.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
18937 for (i = 0; i < n; i++) {
18941 var41.x4[0] = (orc_uint8)var37.x4[0];
18942 var41.x4[1] = (orc_uint8)var37.x4[1];
18943 var41.x4[2] = (orc_uint8)var37.x4[2];
18944 var41.x4[3] = (orc_uint8)var37.x4[3];
18948 var42.x4[0] = (orc_uint8)var38.x4[0];
18949 var42.x4[1] = (orc_uint8)var38.x4[1];
18950 var42.x4[2] = (orc_uint8)var38.x4[2];
18951 var42.x4[3] = (orc_uint8)var38.x4[3];
18953 var43.i = ((((orc_uint64)var42.i)>>48) << 48) | ((((orc_uint64)var42.i)>>48)<<32) | ((((orc_uint64)var42.i)>>48) << 16) | ((((orc_uint64)var42.i)>>48));
18955 var44.x4[0] = var43.x4[0] ^ var39.x4[0];
18956 var44.x4[1] = var43.x4[1] ^ var39.x4[1];
18957 var44.x4[2] = var43.x4[2] ^ var39.x4[2];
18958 var44.x4[3] = var43.x4[3] ^ var39.x4[3];
18960 var45.x4[0] = (var41.x4[0] * var44.x4[0]) & 0xffff;
18961 var45.x4[1] = (var41.x4[1] * var44.x4[1]) & 0xffff;
18962 var45.x4[2] = (var41.x4[2] * var44.x4[2]) & 0xffff;
18963 var45.x4[3] = (var41.x4[3] * var44.x4[3]) & 0xffff;
18965 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
18966 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
18967 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
18968 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
18970 var40.x4[0] = var46.x4[0];
18971 var40.x4[1] = var46.x4[1];
18972 var40.x4[2] = var46.x4[2];
18973 var40.x4[3] = var46.x4[3];
18982 _backup_orc_code_combine_out_u_n (OrcExecutor * ORC_RESTRICT ex)
18986 orc_union32 * ORC_RESTRICT ptr0;
18987 const orc_union32 * ORC_RESTRICT ptr4;
18999 ptr0 = (orc_union32 *)ex->arrays[0];
19000 ptr4 = (orc_union32 *)ex->arrays[4];
19003 var39.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19004 var39.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19005 var39.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19006 var39.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19008 for (i = 0; i < n; i++) {
19012 var41.x4[0] = (orc_uint8)var37.x4[0];
19013 var41.x4[1] = (orc_uint8)var37.x4[1];
19014 var41.x4[2] = (orc_uint8)var37.x4[2];
19015 var41.x4[3] = (orc_uint8)var37.x4[3];
19019 var42.x4[0] = (orc_uint8)var38.x4[0];
19020 var42.x4[1] = (orc_uint8)var38.x4[1];
19021 var42.x4[2] = (orc_uint8)var38.x4[2];
19022 var42.x4[3] = (orc_uint8)var38.x4[3];
19024 var43.i = ((((orc_uint64)var42.i)>>48) << 48) | ((((orc_uint64)var42.i)>>48)<<32) | ((((orc_uint64)var42.i)>>48) << 16) | ((((orc_uint64)var42.i)>>48));
19026 var44.x4[0] = var43.x4[0] ^ var39.x4[0];
19027 var44.x4[1] = var43.x4[1] ^ var39.x4[1];
19028 var44.x4[2] = var43.x4[2] ^ var39.x4[2];
19029 var44.x4[3] = var43.x4[3] ^ var39.x4[3];
19031 var45.x4[0] = (var41.x4[0] * var44.x4[0]) & 0xffff;
19032 var45.x4[1] = (var41.x4[1] * var44.x4[1]) & 0xffff;
19033 var45.x4[2] = (var41.x4[2] * var44.x4[2]) & 0xffff;
19034 var45.x4[3] = (var41.x4[3] * var44.x4[3]) & 0xffff;
19036 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
19037 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
19038 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
19039 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
19041 var40.x4[0] = var46.x4[0];
19042 var40.x4[1] = var46.x4[1];
19043 var40.x4[2] = var46.x4[2];
19044 var40.x4[3] = var46.x4[3];
19052 orc_code_combine_out_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
19054 OrcExecutor _ex, *ex = &_ex;
19055 static int p_inited = 0;
19056 static OrcProgram *p = 0;
19057 void (*func) (OrcExecutor *);
19060 orc_once_mutex_lock ();
19062 OrcCompileResult result;
19064 p = orc_program_new ();
19065 orc_program_set_name (p, "orc_code_combine_out_u_n");
19066 orc_program_set_backup_function (p, _backup_orc_code_combine_out_u_n);
19067 orc_program_add_destination (p, 4, "d1");
19068 orc_program_add_source (p, 4, "s1");
19069 orc_program_add_constant (p, 4, 0x000000ff, "c1");
19070 orc_program_add_temporary (p, 8, "t1");
19071 orc_program_add_temporary (p, 8, "t2");
19072 orc_program_add_temporary (p, 8, "t3");
19073 orc_program_add_temporary (p, 8, "t4");
19074 orc_program_add_temporary (p, 8, "t5");
19076 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
19077 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
19078 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
19079 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
19080 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
19081 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19082 orc_program_append_2 (p, "convwb", 2, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19084 result = orc_program_compile (p);
19087 orc_once_mutex_unlock ();
19092 ex->arrays[ORC_VAR_D1] = d1;
19093 ex->arrays[ORC_VAR_S1] = (void *)s1;
19095 func = p->code_exec;
19101 /* orc_code_combine_atop_u */
19104 orc_code_combine_atop_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
19106 orc_union32 * ORC_RESTRICT ptr0;
19107 const orc_union32 * ORC_RESTRICT ptr4;
19108 const orc_union32 * ORC_RESTRICT ptr5;
19132 ptr0 = (orc_union32 *)d1;
19133 ptr4 = (orc_union32 *)s1;
19134 ptr5 = (orc_union32 *)s2;
19137 var44.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19138 var44.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19139 var44.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19140 var44.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19142 for (i = 0; i < n; i++) {
19146 var46.x4[0] = (orc_uint8)var40.x4[0];
19147 var46.x4[1] = (orc_uint8)var40.x4[1];
19148 var46.x4[2] = (orc_uint8)var40.x4[2];
19149 var46.x4[3] = (orc_uint8)var40.x4[3];
19153 var47.x4[0] = (orc_uint8)var41.x4[0];
19154 var47.x4[1] = (orc_uint8)var41.x4[1];
19155 var47.x4[2] = (orc_uint8)var41.x4[2];
19156 var47.x4[3] = (orc_uint8)var41.x4[3];
19158 var48.i = ((((orc_uint64)var47.i)>>48) << 48) | ((((orc_uint64)var47.i)>>48)<<32) | ((((orc_uint64)var47.i)>>48) << 16) | ((((orc_uint64)var47.i)>>48));
19160 var49.x4[0] = (var46.x4[0] * var48.x4[0]) & 0xffff;
19161 var49.x4[1] = (var46.x4[1] * var48.x4[1]) & 0xffff;
19162 var49.x4[2] = (var46.x4[2] * var48.x4[2]) & 0xffff;
19163 var49.x4[3] = (var46.x4[3] * var48.x4[3]) & 0xffff;
19165 var50.x4[0] = ((uint16_t)(((orc_uint16)(var49.x4[0]+128)) + (((orc_uint16)(var49.x4[0]+128))>>8)))>>8;
19166 var50.x4[1] = ((uint16_t)(((orc_uint16)(var49.x4[1]+128)) + (((orc_uint16)(var49.x4[1]+128))>>8)))>>8;
19167 var50.x4[2] = ((uint16_t)(((orc_uint16)(var49.x4[2]+128)) + (((orc_uint16)(var49.x4[2]+128))>>8)))>>8;
19168 var50.x4[3] = ((uint16_t)(((orc_uint16)(var49.x4[3]+128)) + (((orc_uint16)(var49.x4[3]+128))>>8)))>>8;
19172 var51.x4[0] = (orc_uint8)var42.x4[0];
19173 var51.x4[1] = (orc_uint8)var42.x4[1];
19174 var51.x4[2] = (orc_uint8)var42.x4[2];
19175 var51.x4[3] = (orc_uint8)var42.x4[3];
19177 var52.i = ((((orc_uint64)var51.i)>>48) << 48) | ((((orc_uint64)var51.i)>>48)<<32) | ((((orc_uint64)var51.i)>>48) << 16) | ((((orc_uint64)var51.i)>>48));
19179 var53.x4[0] = (var50.x4[0] * var52.x4[0]) & 0xffff;
19180 var53.x4[1] = (var50.x4[1] * var52.x4[1]) & 0xffff;
19181 var53.x4[2] = (var50.x4[2] * var52.x4[2]) & 0xffff;
19182 var53.x4[3] = (var50.x4[3] * var52.x4[3]) & 0xffff;
19184 var54.x4[0] = ((uint16_t)(((orc_uint16)(var53.x4[0]+128)) + (((orc_uint16)(var53.x4[0]+128))>>8)))>>8;
19185 var54.x4[1] = ((uint16_t)(((orc_uint16)(var53.x4[1]+128)) + (((orc_uint16)(var53.x4[1]+128))>>8)))>>8;
19186 var54.x4[2] = ((uint16_t)(((orc_uint16)(var53.x4[2]+128)) + (((orc_uint16)(var53.x4[2]+128))>>8)))>>8;
19187 var54.x4[3] = ((uint16_t)(((orc_uint16)(var53.x4[3]+128)) + (((orc_uint16)(var53.x4[3]+128))>>8)))>>8;
19189 var55.x4[0] = var54.x4[0];
19190 var55.x4[1] = var54.x4[1];
19191 var55.x4[2] = var54.x4[2];
19192 var55.x4[3] = var54.x4[3];
19196 var56.x4[0] = (orc_uint8)var43.x4[0];
19197 var56.x4[1] = (orc_uint8)var43.x4[1];
19198 var56.x4[2] = (orc_uint8)var43.x4[2];
19199 var56.x4[3] = (orc_uint8)var43.x4[3];
19201 var57.i = ((((orc_uint64)var50.i)>>48) << 48) | ((((orc_uint64)var50.i)>>48)<<32) | ((((orc_uint64)var50.i)>>48) << 16) | ((((orc_uint64)var50.i)>>48));
19203 var58.x4[0] = var57.x4[0] ^ var44.x4[0];
19204 var58.x4[1] = var57.x4[1] ^ var44.x4[1];
19205 var58.x4[2] = var57.x4[2] ^ var44.x4[2];
19206 var58.x4[3] = var57.x4[3] ^ var44.x4[3];
19208 var59.x4[0] = (var56.x4[0] * var58.x4[0]) & 0xffff;
19209 var59.x4[1] = (var56.x4[1] * var58.x4[1]) & 0xffff;
19210 var59.x4[2] = (var56.x4[2] * var58.x4[2]) & 0xffff;
19211 var59.x4[3] = (var56.x4[3] * var58.x4[3]) & 0xffff;
19213 var60.x4[0] = ((uint16_t)(((orc_uint16)(var59.x4[0]+128)) + (((orc_uint16)(var59.x4[0]+128))>>8)))>>8;
19214 var60.x4[1] = ((uint16_t)(((orc_uint16)(var59.x4[1]+128)) + (((orc_uint16)(var59.x4[1]+128))>>8)))>>8;
19215 var60.x4[2] = ((uint16_t)(((orc_uint16)(var59.x4[2]+128)) + (((orc_uint16)(var59.x4[2]+128))>>8)))>>8;
19216 var60.x4[3] = ((uint16_t)(((orc_uint16)(var59.x4[3]+128)) + (((orc_uint16)(var59.x4[3]+128))>>8)))>>8;
19218 var61.x4[0] = var60.x4[0];
19219 var61.x4[1] = var60.x4[1];
19220 var61.x4[2] = var60.x4[2];
19221 var61.x4[3] = var60.x4[3];
19223 var45.x4[0] = ORC_CLAMP_UB((orc_uint8)var55.x4[0] + (orc_uint8)var61.x4[0]);
19224 var45.x4[1] = ORC_CLAMP_UB((orc_uint8)var55.x4[1] + (orc_uint8)var61.x4[1]);
19225 var45.x4[2] = ORC_CLAMP_UB((orc_uint8)var55.x4[2] + (orc_uint8)var61.x4[2]);
19226 var45.x4[3] = ORC_CLAMP_UB((orc_uint8)var55.x4[3] + (orc_uint8)var61.x4[3]);
19235 _backup_orc_code_combine_atop_u (OrcExecutor * ORC_RESTRICT ex)
19239 orc_union32 * ORC_RESTRICT ptr0;
19240 const orc_union32 * ORC_RESTRICT ptr4;
19241 const orc_union32 * ORC_RESTRICT ptr5;
19265 ptr0 = (orc_union32 *)ex->arrays[0];
19266 ptr4 = (orc_union32 *)ex->arrays[4];
19267 ptr5 = (orc_union32 *)ex->arrays[5];
19270 var44.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19271 var44.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19272 var44.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19273 var44.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19275 for (i = 0; i < n; i++) {
19279 var46.x4[0] = (orc_uint8)var40.x4[0];
19280 var46.x4[1] = (orc_uint8)var40.x4[1];
19281 var46.x4[2] = (orc_uint8)var40.x4[2];
19282 var46.x4[3] = (orc_uint8)var40.x4[3];
19286 var47.x4[0] = (orc_uint8)var41.x4[0];
19287 var47.x4[1] = (orc_uint8)var41.x4[1];
19288 var47.x4[2] = (orc_uint8)var41.x4[2];
19289 var47.x4[3] = (orc_uint8)var41.x4[3];
19291 var48.i = ((((orc_uint64)var47.i)>>48) << 48) | ((((orc_uint64)var47.i)>>48)<<32) | ((((orc_uint64)var47.i)>>48) << 16) | ((((orc_uint64)var47.i)>>48));
19293 var49.x4[0] = (var46.x4[0] * var48.x4[0]) & 0xffff;
19294 var49.x4[1] = (var46.x4[1] * var48.x4[1]) & 0xffff;
19295 var49.x4[2] = (var46.x4[2] * var48.x4[2]) & 0xffff;
19296 var49.x4[3] = (var46.x4[3] * var48.x4[3]) & 0xffff;
19298 var50.x4[0] = ((uint16_t)(((orc_uint16)(var49.x4[0]+128)) + (((orc_uint16)(var49.x4[0]+128))>>8)))>>8;
19299 var50.x4[1] = ((uint16_t)(((orc_uint16)(var49.x4[1]+128)) + (((orc_uint16)(var49.x4[1]+128))>>8)))>>8;
19300 var50.x4[2] = ((uint16_t)(((orc_uint16)(var49.x4[2]+128)) + (((orc_uint16)(var49.x4[2]+128))>>8)))>>8;
19301 var50.x4[3] = ((uint16_t)(((orc_uint16)(var49.x4[3]+128)) + (((orc_uint16)(var49.x4[3]+128))>>8)))>>8;
19305 var51.x4[0] = (orc_uint8)var42.x4[0];
19306 var51.x4[1] = (orc_uint8)var42.x4[1];
19307 var51.x4[2] = (orc_uint8)var42.x4[2];
19308 var51.x4[3] = (orc_uint8)var42.x4[3];
19310 var52.i = ((((orc_uint64)var51.i)>>48) << 48) | ((((orc_uint64)var51.i)>>48)<<32) | ((((orc_uint64)var51.i)>>48) << 16) | ((((orc_uint64)var51.i)>>48));
19312 var53.x4[0] = (var50.x4[0] * var52.x4[0]) & 0xffff;
19313 var53.x4[1] = (var50.x4[1] * var52.x4[1]) & 0xffff;
19314 var53.x4[2] = (var50.x4[2] * var52.x4[2]) & 0xffff;
19315 var53.x4[3] = (var50.x4[3] * var52.x4[3]) & 0xffff;
19317 var54.x4[0] = ((uint16_t)(((orc_uint16)(var53.x4[0]+128)) + (((orc_uint16)(var53.x4[0]+128))>>8)))>>8;
19318 var54.x4[1] = ((uint16_t)(((orc_uint16)(var53.x4[1]+128)) + (((orc_uint16)(var53.x4[1]+128))>>8)))>>8;
19319 var54.x4[2] = ((uint16_t)(((orc_uint16)(var53.x4[2]+128)) + (((orc_uint16)(var53.x4[2]+128))>>8)))>>8;
19320 var54.x4[3] = ((uint16_t)(((orc_uint16)(var53.x4[3]+128)) + (((orc_uint16)(var53.x4[3]+128))>>8)))>>8;
19322 var55.x4[0] = var54.x4[0];
19323 var55.x4[1] = var54.x4[1];
19324 var55.x4[2] = var54.x4[2];
19325 var55.x4[3] = var54.x4[3];
19329 var56.x4[0] = (orc_uint8)var43.x4[0];
19330 var56.x4[1] = (orc_uint8)var43.x4[1];
19331 var56.x4[2] = (orc_uint8)var43.x4[2];
19332 var56.x4[3] = (orc_uint8)var43.x4[3];
19334 var57.i = ((((orc_uint64)var50.i)>>48) << 48) | ((((orc_uint64)var50.i)>>48)<<32) | ((((orc_uint64)var50.i)>>48) << 16) | ((((orc_uint64)var50.i)>>48));
19336 var58.x4[0] = var57.x4[0] ^ var44.x4[0];
19337 var58.x4[1] = var57.x4[1] ^ var44.x4[1];
19338 var58.x4[2] = var57.x4[2] ^ var44.x4[2];
19339 var58.x4[3] = var57.x4[3] ^ var44.x4[3];
19341 var59.x4[0] = (var56.x4[0] * var58.x4[0]) & 0xffff;
19342 var59.x4[1] = (var56.x4[1] * var58.x4[1]) & 0xffff;
19343 var59.x4[2] = (var56.x4[2] * var58.x4[2]) & 0xffff;
19344 var59.x4[3] = (var56.x4[3] * var58.x4[3]) & 0xffff;
19346 var60.x4[0] = ((uint16_t)(((orc_uint16)(var59.x4[0]+128)) + (((orc_uint16)(var59.x4[0]+128))>>8)))>>8;
19347 var60.x4[1] = ((uint16_t)(((orc_uint16)(var59.x4[1]+128)) + (((orc_uint16)(var59.x4[1]+128))>>8)))>>8;
19348 var60.x4[2] = ((uint16_t)(((orc_uint16)(var59.x4[2]+128)) + (((orc_uint16)(var59.x4[2]+128))>>8)))>>8;
19349 var60.x4[3] = ((uint16_t)(((orc_uint16)(var59.x4[3]+128)) + (((orc_uint16)(var59.x4[3]+128))>>8)))>>8;
19351 var61.x4[0] = var60.x4[0];
19352 var61.x4[1] = var60.x4[1];
19353 var61.x4[2] = var60.x4[2];
19354 var61.x4[3] = var60.x4[3];
19356 var45.x4[0] = ORC_CLAMP_UB((orc_uint8)var55.x4[0] + (orc_uint8)var61.x4[0]);
19357 var45.x4[1] = ORC_CLAMP_UB((orc_uint8)var55.x4[1] + (orc_uint8)var61.x4[1]);
19358 var45.x4[2] = ORC_CLAMP_UB((orc_uint8)var55.x4[2] + (orc_uint8)var61.x4[2]);
19359 var45.x4[3] = ORC_CLAMP_UB((orc_uint8)var55.x4[3] + (orc_uint8)var61.x4[3]);
19367 orc_code_combine_atop_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
19369 OrcExecutor _ex, *ex = &_ex;
19370 static int p_inited = 0;
19371 static OrcProgram *p = 0;
19372 void (*func) (OrcExecutor *);
19375 orc_once_mutex_lock ();
19377 OrcCompileResult result;
19379 p = orc_program_new ();
19380 orc_program_set_name (p, "orc_code_combine_atop_u");
19381 orc_program_set_backup_function (p, _backup_orc_code_combine_atop_u);
19382 orc_program_add_destination (p, 4, "d1");
19383 orc_program_add_source (p, 4, "s1");
19384 orc_program_add_source (p, 4, "s2");
19385 orc_program_add_constant (p, 4, 0x000000ff, "c1");
19386 orc_program_add_temporary (p, 8, "t1");
19387 orc_program_add_temporary (p, 8, "t2");
19388 orc_program_add_temporary (p, 8, "t3");
19389 orc_program_add_temporary (p, 8, "t4");
19390 orc_program_add_temporary (p, 8, "t5");
19391 orc_program_add_temporary (p, 8, "t6");
19392 orc_program_add_temporary (p, 4, "t7");
19393 orc_program_add_temporary (p, 4, "t8");
19395 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
19396 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
19397 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
19398 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
19399 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19400 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
19401 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
19402 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
19403 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
19404 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
19405 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
19406 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19407 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
19408 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1);
19409 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19410 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19411 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1);
19413 result = orc_program_compile (p);
19416 orc_once_mutex_unlock ();
19421 ex->arrays[ORC_VAR_D1] = d1;
19422 ex->arrays[ORC_VAR_S1] = (void *)s1;
19423 ex->arrays[ORC_VAR_S2] = (void *)s2;
19425 func = p->code_exec;
19431 /* orc_code_combine_atop_u_n */
19434 orc_code_combine_atop_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
19436 orc_union32 * ORC_RESTRICT ptr0;
19437 const orc_union32 * ORC_RESTRICT ptr4;
19456 ptr0 = (orc_union32 *)d1;
19457 ptr4 = (orc_union32 *)s1;
19460 var43.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19461 var43.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19462 var43.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19463 var43.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19465 for (i = 0; i < n; i++) {
19469 var45.x4[0] = (orc_uint8)var40.x4[0];
19470 var45.x4[1] = (orc_uint8)var40.x4[1];
19471 var45.x4[2] = (orc_uint8)var40.x4[2];
19472 var45.x4[3] = (orc_uint8)var40.x4[3];
19476 var46.x4[0] = (orc_uint8)var41.x4[0];
19477 var46.x4[1] = (orc_uint8)var41.x4[1];
19478 var46.x4[2] = (orc_uint8)var41.x4[2];
19479 var46.x4[3] = (orc_uint8)var41.x4[3];
19481 var47.i = ((((orc_uint64)var46.i)>>48) << 48) | ((((orc_uint64)var46.i)>>48)<<32) | ((((orc_uint64)var46.i)>>48) << 16) | ((((orc_uint64)var46.i)>>48));
19483 var48.x4[0] = (var45.x4[0] * var47.x4[0]) & 0xffff;
19484 var48.x4[1] = (var45.x4[1] * var47.x4[1]) & 0xffff;
19485 var48.x4[2] = (var45.x4[2] * var47.x4[2]) & 0xffff;
19486 var48.x4[3] = (var45.x4[3] * var47.x4[3]) & 0xffff;
19488 var49.x4[0] = ((uint16_t)(((orc_uint16)(var48.x4[0]+128)) + (((orc_uint16)(var48.x4[0]+128))>>8)))>>8;
19489 var49.x4[1] = ((uint16_t)(((orc_uint16)(var48.x4[1]+128)) + (((orc_uint16)(var48.x4[1]+128))>>8)))>>8;
19490 var49.x4[2] = ((uint16_t)(((orc_uint16)(var48.x4[2]+128)) + (((orc_uint16)(var48.x4[2]+128))>>8)))>>8;
19491 var49.x4[3] = ((uint16_t)(((orc_uint16)(var48.x4[3]+128)) + (((orc_uint16)(var48.x4[3]+128))>>8)))>>8;
19493 var50.x4[0] = var49.x4[0];
19494 var50.x4[1] = var49.x4[1];
19495 var50.x4[2] = var49.x4[2];
19496 var50.x4[3] = var49.x4[3];
19500 var51.x4[0] = (orc_uint8)var42.x4[0];
19501 var51.x4[1] = (orc_uint8)var42.x4[1];
19502 var51.x4[2] = (orc_uint8)var42.x4[2];
19503 var51.x4[3] = (orc_uint8)var42.x4[3];
19505 var52.i = ((((orc_uint64)var45.i)>>48) << 48) | ((((orc_uint64)var45.i)>>48)<<32) | ((((orc_uint64)var45.i)>>48) << 16) | ((((orc_uint64)var45.i)>>48));
19507 var53.x4[0] = var52.x4[0] ^ var43.x4[0];
19508 var53.x4[1] = var52.x4[1] ^ var43.x4[1];
19509 var53.x4[2] = var52.x4[2] ^ var43.x4[2];
19510 var53.x4[3] = var52.x4[3] ^ var43.x4[3];
19512 var54.x4[0] = (var51.x4[0] * var53.x4[0]) & 0xffff;
19513 var54.x4[1] = (var51.x4[1] * var53.x4[1]) & 0xffff;
19514 var54.x4[2] = (var51.x4[2] * var53.x4[2]) & 0xffff;
19515 var54.x4[3] = (var51.x4[3] * var53.x4[3]) & 0xffff;
19517 var55.x4[0] = ((uint16_t)(((orc_uint16)(var54.x4[0]+128)) + (((orc_uint16)(var54.x4[0]+128))>>8)))>>8;
19518 var55.x4[1] = ((uint16_t)(((orc_uint16)(var54.x4[1]+128)) + (((orc_uint16)(var54.x4[1]+128))>>8)))>>8;
19519 var55.x4[2] = ((uint16_t)(((orc_uint16)(var54.x4[2]+128)) + (((orc_uint16)(var54.x4[2]+128))>>8)))>>8;
19520 var55.x4[3] = ((uint16_t)(((orc_uint16)(var54.x4[3]+128)) + (((orc_uint16)(var54.x4[3]+128))>>8)))>>8;
19522 var56.x4[0] = var55.x4[0];
19523 var56.x4[1] = var55.x4[1];
19524 var56.x4[2] = var55.x4[2];
19525 var56.x4[3] = var55.x4[3];
19527 var44.x4[0] = ORC_CLAMP_UB((orc_uint8)var50.x4[0] + (orc_uint8)var56.x4[0]);
19528 var44.x4[1] = ORC_CLAMP_UB((orc_uint8)var50.x4[1] + (orc_uint8)var56.x4[1]);
19529 var44.x4[2] = ORC_CLAMP_UB((orc_uint8)var50.x4[2] + (orc_uint8)var56.x4[2]);
19530 var44.x4[3] = ORC_CLAMP_UB((orc_uint8)var50.x4[3] + (orc_uint8)var56.x4[3]);
19539 _backup_orc_code_combine_atop_u_n (OrcExecutor * ORC_RESTRICT ex)
19543 orc_union32 * ORC_RESTRICT ptr0;
19544 const orc_union32 * ORC_RESTRICT ptr4;
19563 ptr0 = (orc_union32 *)ex->arrays[0];
19564 ptr4 = (orc_union32 *)ex->arrays[4];
19567 var43.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19568 var43.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19569 var43.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19570 var43.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19572 for (i = 0; i < n; i++) {
19576 var45.x4[0] = (orc_uint8)var40.x4[0];
19577 var45.x4[1] = (orc_uint8)var40.x4[1];
19578 var45.x4[2] = (orc_uint8)var40.x4[2];
19579 var45.x4[3] = (orc_uint8)var40.x4[3];
19583 var46.x4[0] = (orc_uint8)var41.x4[0];
19584 var46.x4[1] = (orc_uint8)var41.x4[1];
19585 var46.x4[2] = (orc_uint8)var41.x4[2];
19586 var46.x4[3] = (orc_uint8)var41.x4[3];
19588 var47.i = ((((orc_uint64)var46.i)>>48) << 48) | ((((orc_uint64)var46.i)>>48)<<32) | ((((orc_uint64)var46.i)>>48) << 16) | ((((orc_uint64)var46.i)>>48));
19590 var48.x4[0] = (var45.x4[0] * var47.x4[0]) & 0xffff;
19591 var48.x4[1] = (var45.x4[1] * var47.x4[1]) & 0xffff;
19592 var48.x4[2] = (var45.x4[2] * var47.x4[2]) & 0xffff;
19593 var48.x4[3] = (var45.x4[3] * var47.x4[3]) & 0xffff;
19595 var49.x4[0] = ((uint16_t)(((orc_uint16)(var48.x4[0]+128)) + (((orc_uint16)(var48.x4[0]+128))>>8)))>>8;
19596 var49.x4[1] = ((uint16_t)(((orc_uint16)(var48.x4[1]+128)) + (((orc_uint16)(var48.x4[1]+128))>>8)))>>8;
19597 var49.x4[2] = ((uint16_t)(((orc_uint16)(var48.x4[2]+128)) + (((orc_uint16)(var48.x4[2]+128))>>8)))>>8;
19598 var49.x4[3] = ((uint16_t)(((orc_uint16)(var48.x4[3]+128)) + (((orc_uint16)(var48.x4[3]+128))>>8)))>>8;
19600 var50.x4[0] = var49.x4[0];
19601 var50.x4[1] = var49.x4[1];
19602 var50.x4[2] = var49.x4[2];
19603 var50.x4[3] = var49.x4[3];
19607 var51.x4[0] = (orc_uint8)var42.x4[0];
19608 var51.x4[1] = (orc_uint8)var42.x4[1];
19609 var51.x4[2] = (orc_uint8)var42.x4[2];
19610 var51.x4[3] = (orc_uint8)var42.x4[3];
19612 var52.i = ((((orc_uint64)var45.i)>>48) << 48) | ((((orc_uint64)var45.i)>>48)<<32) | ((((orc_uint64)var45.i)>>48) << 16) | ((((orc_uint64)var45.i)>>48));
19614 var53.x4[0] = var52.x4[0] ^ var43.x4[0];
19615 var53.x4[1] = var52.x4[1] ^ var43.x4[1];
19616 var53.x4[2] = var52.x4[2] ^ var43.x4[2];
19617 var53.x4[3] = var52.x4[3] ^ var43.x4[3];
19619 var54.x4[0] = (var51.x4[0] * var53.x4[0]) & 0xffff;
19620 var54.x4[1] = (var51.x4[1] * var53.x4[1]) & 0xffff;
19621 var54.x4[2] = (var51.x4[2] * var53.x4[2]) & 0xffff;
19622 var54.x4[3] = (var51.x4[3] * var53.x4[3]) & 0xffff;
19624 var55.x4[0] = ((uint16_t)(((orc_uint16)(var54.x4[0]+128)) + (((orc_uint16)(var54.x4[0]+128))>>8)))>>8;
19625 var55.x4[1] = ((uint16_t)(((orc_uint16)(var54.x4[1]+128)) + (((orc_uint16)(var54.x4[1]+128))>>8)))>>8;
19626 var55.x4[2] = ((uint16_t)(((orc_uint16)(var54.x4[2]+128)) + (((orc_uint16)(var54.x4[2]+128))>>8)))>>8;
19627 var55.x4[3] = ((uint16_t)(((orc_uint16)(var54.x4[3]+128)) + (((orc_uint16)(var54.x4[3]+128))>>8)))>>8;
19629 var56.x4[0] = var55.x4[0];
19630 var56.x4[1] = var55.x4[1];
19631 var56.x4[2] = var55.x4[2];
19632 var56.x4[3] = var55.x4[3];
19634 var44.x4[0] = ORC_CLAMP_UB((orc_uint8)var50.x4[0] + (orc_uint8)var56.x4[0]);
19635 var44.x4[1] = ORC_CLAMP_UB((orc_uint8)var50.x4[1] + (orc_uint8)var56.x4[1]);
19636 var44.x4[2] = ORC_CLAMP_UB((orc_uint8)var50.x4[2] + (orc_uint8)var56.x4[2]);
19637 var44.x4[3] = ORC_CLAMP_UB((orc_uint8)var50.x4[3] + (orc_uint8)var56.x4[3]);
19645 orc_code_combine_atop_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
19647 OrcExecutor _ex, *ex = &_ex;
19648 static int p_inited = 0;
19649 static OrcProgram *p = 0;
19650 void (*func) (OrcExecutor *);
19653 orc_once_mutex_lock ();
19655 OrcCompileResult result;
19657 p = orc_program_new ();
19658 orc_program_set_name (p, "orc_code_combine_atop_u_n");
19659 orc_program_set_backup_function (p, _backup_orc_code_combine_atop_u_n);
19660 orc_program_add_destination (p, 4, "d1");
19661 orc_program_add_source (p, 4, "s1");
19662 orc_program_add_constant (p, 4, 0x000000ff, "c1");
19663 orc_program_add_temporary (p, 8, "t1");
19664 orc_program_add_temporary (p, 8, "t2");
19665 orc_program_add_temporary (p, 8, "t3");
19666 orc_program_add_temporary (p, 8, "t4");
19667 orc_program_add_temporary (p, 8, "t5");
19668 orc_program_add_temporary (p, 8, "t6");
19669 orc_program_add_temporary (p, 4, "t7");
19670 orc_program_add_temporary (p, 4, "t8");
19672 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
19673 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
19674 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
19675 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
19676 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
19677 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
19678 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
19679 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19680 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
19681 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1);
19682 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19683 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
19684 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1);
19686 result = orc_program_compile (p);
19689 orc_once_mutex_unlock ();
19694 ex->arrays[ORC_VAR_D1] = d1;
19695 ex->arrays[ORC_VAR_S1] = (void *)s1;
19697 func = p->code_exec;
19703 /* orc_code_combine_xor_u */
19706 orc_code_combine_xor_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
19708 orc_union32 * ORC_RESTRICT ptr0;
19709 const orc_union32 * ORC_RESTRICT ptr4;
19710 const orc_union32 * ORC_RESTRICT ptr5;
19736 ptr0 = (orc_union32 *)d1;
19737 ptr4 = (orc_union32 *)s1;
19738 ptr5 = (orc_union32 *)s2;
19741 var43.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19742 var43.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19743 var43.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19744 var43.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19746 var45.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19747 var45.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19748 var45.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19749 var45.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19751 for (i = 0; i < n; i++) {
19755 var47.x4[0] = (orc_uint8)var40.x4[0];
19756 var47.x4[1] = (orc_uint8)var40.x4[1];
19757 var47.x4[2] = (orc_uint8)var40.x4[2];
19758 var47.x4[3] = (orc_uint8)var40.x4[3];
19762 var48.x4[0] = (orc_uint8)var41.x4[0];
19763 var48.x4[1] = (orc_uint8)var41.x4[1];
19764 var48.x4[2] = (orc_uint8)var41.x4[2];
19765 var48.x4[3] = (orc_uint8)var41.x4[3];
19767 var49.i = ((((orc_uint64)var48.i)>>48) << 48) | ((((orc_uint64)var48.i)>>48)<<32) | ((((orc_uint64)var48.i)>>48) << 16) | ((((orc_uint64)var48.i)>>48));
19769 var50.x4[0] = (var47.x4[0] * var49.x4[0]) & 0xffff;
19770 var50.x4[1] = (var47.x4[1] * var49.x4[1]) & 0xffff;
19771 var50.x4[2] = (var47.x4[2] * var49.x4[2]) & 0xffff;
19772 var50.x4[3] = (var47.x4[3] * var49.x4[3]) & 0xffff;
19774 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
19775 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
19776 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
19777 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
19781 var52.x4[0] = (orc_uint8)var42.x4[0];
19782 var52.x4[1] = (orc_uint8)var42.x4[1];
19783 var52.x4[2] = (orc_uint8)var42.x4[2];
19784 var52.x4[3] = (orc_uint8)var42.x4[3];
19786 var53.i = ((((orc_uint64)var52.i)>>48) << 48) | ((((orc_uint64)var52.i)>>48)<<32) | ((((orc_uint64)var52.i)>>48) << 16) | ((((orc_uint64)var52.i)>>48));
19788 var54.x4[0] = var53.x4[0] ^ var43.x4[0];
19789 var54.x4[1] = var53.x4[1] ^ var43.x4[1];
19790 var54.x4[2] = var53.x4[2] ^ var43.x4[2];
19791 var54.x4[3] = var53.x4[3] ^ var43.x4[3];
19793 var55.x4[0] = (var51.x4[0] * var54.x4[0]) & 0xffff;
19794 var55.x4[1] = (var51.x4[1] * var54.x4[1]) & 0xffff;
19795 var55.x4[2] = (var51.x4[2] * var54.x4[2]) & 0xffff;
19796 var55.x4[3] = (var51.x4[3] * var54.x4[3]) & 0xffff;
19798 var56.x4[0] = ((uint16_t)(((orc_uint16)(var55.x4[0]+128)) + (((orc_uint16)(var55.x4[0]+128))>>8)))>>8;
19799 var56.x4[1] = ((uint16_t)(((orc_uint16)(var55.x4[1]+128)) + (((orc_uint16)(var55.x4[1]+128))>>8)))>>8;
19800 var56.x4[2] = ((uint16_t)(((orc_uint16)(var55.x4[2]+128)) + (((orc_uint16)(var55.x4[2]+128))>>8)))>>8;
19801 var56.x4[3] = ((uint16_t)(((orc_uint16)(var55.x4[3]+128)) + (((orc_uint16)(var55.x4[3]+128))>>8)))>>8;
19803 var57.x4[0] = var56.x4[0];
19804 var57.x4[1] = var56.x4[1];
19805 var57.x4[2] = var56.x4[2];
19806 var57.x4[3] = var56.x4[3];
19810 var58.x4[0] = (orc_uint8)var44.x4[0];
19811 var58.x4[1] = (orc_uint8)var44.x4[1];
19812 var58.x4[2] = (orc_uint8)var44.x4[2];
19813 var58.x4[3] = (orc_uint8)var44.x4[3];
19815 var59.i = ((((orc_uint64)var51.i)>>48) << 48) | ((((orc_uint64)var51.i)>>48)<<32) | ((((orc_uint64)var51.i)>>48) << 16) | ((((orc_uint64)var51.i)>>48));
19817 var60.x4[0] = var59.x4[0] ^ var45.x4[0];
19818 var60.x4[1] = var59.x4[1] ^ var45.x4[1];
19819 var60.x4[2] = var59.x4[2] ^ var45.x4[2];
19820 var60.x4[3] = var59.x4[3] ^ var45.x4[3];
19822 var61.x4[0] = (var58.x4[0] * var60.x4[0]) & 0xffff;
19823 var61.x4[1] = (var58.x4[1] * var60.x4[1]) & 0xffff;
19824 var61.x4[2] = (var58.x4[2] * var60.x4[2]) & 0xffff;
19825 var61.x4[3] = (var58.x4[3] * var60.x4[3]) & 0xffff;
19827 var62.x4[0] = ((uint16_t)(((orc_uint16)(var61.x4[0]+128)) + (((orc_uint16)(var61.x4[0]+128))>>8)))>>8;
19828 var62.x4[1] = ((uint16_t)(((orc_uint16)(var61.x4[1]+128)) + (((orc_uint16)(var61.x4[1]+128))>>8)))>>8;
19829 var62.x4[2] = ((uint16_t)(((orc_uint16)(var61.x4[2]+128)) + (((orc_uint16)(var61.x4[2]+128))>>8)))>>8;
19830 var62.x4[3] = ((uint16_t)(((orc_uint16)(var61.x4[3]+128)) + (((orc_uint16)(var61.x4[3]+128))>>8)))>>8;
19832 var63.x4[0] = var62.x4[0];
19833 var63.x4[1] = var62.x4[1];
19834 var63.x4[2] = var62.x4[2];
19835 var63.x4[3] = var62.x4[3];
19837 var46.x4[0] = ORC_CLAMP_UB((orc_uint8)var57.x4[0] + (orc_uint8)var63.x4[0]);
19838 var46.x4[1] = ORC_CLAMP_UB((orc_uint8)var57.x4[1] + (orc_uint8)var63.x4[1]);
19839 var46.x4[2] = ORC_CLAMP_UB((orc_uint8)var57.x4[2] + (orc_uint8)var63.x4[2]);
19840 var46.x4[3] = ORC_CLAMP_UB((orc_uint8)var57.x4[3] + (orc_uint8)var63.x4[3]);
19849 _backup_orc_code_combine_xor_u (OrcExecutor * ORC_RESTRICT ex)
19853 orc_union32 * ORC_RESTRICT ptr0;
19854 const orc_union32 * ORC_RESTRICT ptr4;
19855 const orc_union32 * ORC_RESTRICT ptr5;
19881 ptr0 = (orc_union32 *)ex->arrays[0];
19882 ptr4 = (orc_union32 *)ex->arrays[4];
19883 ptr5 = (orc_union32 *)ex->arrays[5];
19886 var43.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19887 var43.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19888 var43.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19889 var43.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19891 var45.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
19892 var45.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
19893 var45.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
19894 var45.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
19896 for (i = 0; i < n; i++) {
19900 var47.x4[0] = (orc_uint8)var40.x4[0];
19901 var47.x4[1] = (orc_uint8)var40.x4[1];
19902 var47.x4[2] = (orc_uint8)var40.x4[2];
19903 var47.x4[3] = (orc_uint8)var40.x4[3];
19907 var48.x4[0] = (orc_uint8)var41.x4[0];
19908 var48.x4[1] = (orc_uint8)var41.x4[1];
19909 var48.x4[2] = (orc_uint8)var41.x4[2];
19910 var48.x4[3] = (orc_uint8)var41.x4[3];
19912 var49.i = ((((orc_uint64)var48.i)>>48) << 48) | ((((orc_uint64)var48.i)>>48)<<32) | ((((orc_uint64)var48.i)>>48) << 16) | ((((orc_uint64)var48.i)>>48));
19914 var50.x4[0] = (var47.x4[0] * var49.x4[0]) & 0xffff;
19915 var50.x4[1] = (var47.x4[1] * var49.x4[1]) & 0xffff;
19916 var50.x4[2] = (var47.x4[2] * var49.x4[2]) & 0xffff;
19917 var50.x4[3] = (var47.x4[3] * var49.x4[3]) & 0xffff;
19919 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
19920 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
19921 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
19922 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
19926 var52.x4[0] = (orc_uint8)var42.x4[0];
19927 var52.x4[1] = (orc_uint8)var42.x4[1];
19928 var52.x4[2] = (orc_uint8)var42.x4[2];
19929 var52.x4[3] = (orc_uint8)var42.x4[3];
19931 var53.i = ((((orc_uint64)var52.i)>>48) << 48) | ((((orc_uint64)var52.i)>>48)<<32) | ((((orc_uint64)var52.i)>>48) << 16) | ((((orc_uint64)var52.i)>>48));
19933 var54.x4[0] = var53.x4[0] ^ var43.x4[0];
19934 var54.x4[1] = var53.x4[1] ^ var43.x4[1];
19935 var54.x4[2] = var53.x4[2] ^ var43.x4[2];
19936 var54.x4[3] = var53.x4[3] ^ var43.x4[3];
19938 var55.x4[0] = (var51.x4[0] * var54.x4[0]) & 0xffff;
19939 var55.x4[1] = (var51.x4[1] * var54.x4[1]) & 0xffff;
19940 var55.x4[2] = (var51.x4[2] * var54.x4[2]) & 0xffff;
19941 var55.x4[3] = (var51.x4[3] * var54.x4[3]) & 0xffff;
19943 var56.x4[0] = ((uint16_t)(((orc_uint16)(var55.x4[0]+128)) + (((orc_uint16)(var55.x4[0]+128))>>8)))>>8;
19944 var56.x4[1] = ((uint16_t)(((orc_uint16)(var55.x4[1]+128)) + (((orc_uint16)(var55.x4[1]+128))>>8)))>>8;
19945 var56.x4[2] = ((uint16_t)(((orc_uint16)(var55.x4[2]+128)) + (((orc_uint16)(var55.x4[2]+128))>>8)))>>8;
19946 var56.x4[3] = ((uint16_t)(((orc_uint16)(var55.x4[3]+128)) + (((orc_uint16)(var55.x4[3]+128))>>8)))>>8;
19948 var57.x4[0] = var56.x4[0];
19949 var57.x4[1] = var56.x4[1];
19950 var57.x4[2] = var56.x4[2];
19951 var57.x4[3] = var56.x4[3];
19955 var58.x4[0] = (orc_uint8)var44.x4[0];
19956 var58.x4[1] = (orc_uint8)var44.x4[1];
19957 var58.x4[2] = (orc_uint8)var44.x4[2];
19958 var58.x4[3] = (orc_uint8)var44.x4[3];
19960 var59.i = ((((orc_uint64)var51.i)>>48) << 48) | ((((orc_uint64)var51.i)>>48)<<32) | ((((orc_uint64)var51.i)>>48) << 16) | ((((orc_uint64)var51.i)>>48));
19962 var60.x4[0] = var59.x4[0] ^ var45.x4[0];
19963 var60.x4[1] = var59.x4[1] ^ var45.x4[1];
19964 var60.x4[2] = var59.x4[2] ^ var45.x4[2];
19965 var60.x4[3] = var59.x4[3] ^ var45.x4[3];
19967 var61.x4[0] = (var58.x4[0] * var60.x4[0]) & 0xffff;
19968 var61.x4[1] = (var58.x4[1] * var60.x4[1]) & 0xffff;
19969 var61.x4[2] = (var58.x4[2] * var60.x4[2]) & 0xffff;
19970 var61.x4[3] = (var58.x4[3] * var60.x4[3]) & 0xffff;
19972 var62.x4[0] = ((uint16_t)(((orc_uint16)(var61.x4[0]+128)) + (((orc_uint16)(var61.x4[0]+128))>>8)))>>8;
19973 var62.x4[1] = ((uint16_t)(((orc_uint16)(var61.x4[1]+128)) + (((orc_uint16)(var61.x4[1]+128))>>8)))>>8;
19974 var62.x4[2] = ((uint16_t)(((orc_uint16)(var61.x4[2]+128)) + (((orc_uint16)(var61.x4[2]+128))>>8)))>>8;
19975 var62.x4[3] = ((uint16_t)(((orc_uint16)(var61.x4[3]+128)) + (((orc_uint16)(var61.x4[3]+128))>>8)))>>8;
19977 var63.x4[0] = var62.x4[0];
19978 var63.x4[1] = var62.x4[1];
19979 var63.x4[2] = var62.x4[2];
19980 var63.x4[3] = var62.x4[3];
19982 var46.x4[0] = ORC_CLAMP_UB((orc_uint8)var57.x4[0] + (orc_uint8)var63.x4[0]);
19983 var46.x4[1] = ORC_CLAMP_UB((orc_uint8)var57.x4[1] + (orc_uint8)var63.x4[1]);
19984 var46.x4[2] = ORC_CLAMP_UB((orc_uint8)var57.x4[2] + (orc_uint8)var63.x4[2]);
19985 var46.x4[3] = ORC_CLAMP_UB((orc_uint8)var57.x4[3] + (orc_uint8)var63.x4[3]);
19993 orc_code_combine_xor_u (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
19995 OrcExecutor _ex, *ex = &_ex;
19996 static int p_inited = 0;
19997 static OrcProgram *p = 0;
19998 void (*func) (OrcExecutor *);
20001 orc_once_mutex_lock ();
20003 OrcCompileResult result;
20005 p = orc_program_new ();
20006 orc_program_set_name (p, "orc_code_combine_xor_u");
20007 orc_program_set_backup_function (p, _backup_orc_code_combine_xor_u);
20008 orc_program_add_destination (p, 4, "d1");
20009 orc_program_add_source (p, 4, "s1");
20010 orc_program_add_source (p, 4, "s2");
20011 orc_program_add_constant (p, 4, 0x000000ff, "c1");
20012 orc_program_add_temporary (p, 8, "t1");
20013 orc_program_add_temporary (p, 8, "t2");
20014 orc_program_add_temporary (p, 8, "t3");
20015 orc_program_add_temporary (p, 8, "t4");
20016 orc_program_add_temporary (p, 8, "t5");
20017 orc_program_add_temporary (p, 8, "t6");
20018 orc_program_add_temporary (p, 4, "t7");
20019 orc_program_add_temporary (p, 4, "t8");
20021 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
20022 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
20023 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
20024 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
20025 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20026 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
20027 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
20028 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
20029 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
20030 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20031 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20032 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
20033 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20034 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
20035 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1);
20036 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20037 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20038 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1);
20040 result = orc_program_compile (p);
20043 orc_once_mutex_unlock ();
20048 ex->arrays[ORC_VAR_D1] = d1;
20049 ex->arrays[ORC_VAR_S1] = (void *)s1;
20050 ex->arrays[ORC_VAR_S2] = (void *)s2;
20052 func = p->code_exec;
20058 /* orc_code_combine_xor_u_n */
20061 orc_code_combine_xor_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
20063 orc_union32 * ORC_RESTRICT ptr0;
20064 const orc_union32 * ORC_RESTRICT ptr4;
20085 ptr0 = (orc_union32 *)d1;
20086 ptr4 = (orc_union32 *)s1;
20089 var42.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20090 var42.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20091 var42.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20092 var42.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20094 var44.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20095 var44.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20096 var44.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20097 var44.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20099 for (i = 0; i < n; i++) {
20103 var46.x4[0] = (orc_uint8)var40.x4[0];
20104 var46.x4[1] = (orc_uint8)var40.x4[1];
20105 var46.x4[2] = (orc_uint8)var40.x4[2];
20106 var46.x4[3] = (orc_uint8)var40.x4[3];
20110 var47.x4[0] = (orc_uint8)var41.x4[0];
20111 var47.x4[1] = (orc_uint8)var41.x4[1];
20112 var47.x4[2] = (orc_uint8)var41.x4[2];
20113 var47.x4[3] = (orc_uint8)var41.x4[3];
20115 var48.i = ((((orc_uint64)var47.i)>>48) << 48) | ((((orc_uint64)var47.i)>>48)<<32) | ((((orc_uint64)var47.i)>>48) << 16) | ((((orc_uint64)var47.i)>>48));
20117 var49.x4[0] = var48.x4[0] ^ var42.x4[0];
20118 var49.x4[1] = var48.x4[1] ^ var42.x4[1];
20119 var49.x4[2] = var48.x4[2] ^ var42.x4[2];
20120 var49.x4[3] = var48.x4[3] ^ var42.x4[3];
20122 var50.x4[0] = (var46.x4[0] * var49.x4[0]) & 0xffff;
20123 var50.x4[1] = (var46.x4[1] * var49.x4[1]) & 0xffff;
20124 var50.x4[2] = (var46.x4[2] * var49.x4[2]) & 0xffff;
20125 var50.x4[3] = (var46.x4[3] * var49.x4[3]) & 0xffff;
20127 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
20128 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
20129 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
20130 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
20132 var52.x4[0] = var51.x4[0];
20133 var52.x4[1] = var51.x4[1];
20134 var52.x4[2] = var51.x4[2];
20135 var52.x4[3] = var51.x4[3];
20139 var53.x4[0] = (orc_uint8)var43.x4[0];
20140 var53.x4[1] = (orc_uint8)var43.x4[1];
20141 var53.x4[2] = (orc_uint8)var43.x4[2];
20142 var53.x4[3] = (orc_uint8)var43.x4[3];
20144 var54.i = ((((orc_uint64)var46.i)>>48) << 48) | ((((orc_uint64)var46.i)>>48)<<32) | ((((orc_uint64)var46.i)>>48) << 16) | ((((orc_uint64)var46.i)>>48));
20146 var55.x4[0] = var54.x4[0] ^ var44.x4[0];
20147 var55.x4[1] = var54.x4[1] ^ var44.x4[1];
20148 var55.x4[2] = var54.x4[2] ^ var44.x4[2];
20149 var55.x4[3] = var54.x4[3] ^ var44.x4[3];
20151 var56.x4[0] = (var53.x4[0] * var55.x4[0]) & 0xffff;
20152 var56.x4[1] = (var53.x4[1] * var55.x4[1]) & 0xffff;
20153 var56.x4[2] = (var53.x4[2] * var55.x4[2]) & 0xffff;
20154 var56.x4[3] = (var53.x4[3] * var55.x4[3]) & 0xffff;
20156 var57.x4[0] = ((uint16_t)(((orc_uint16)(var56.x4[0]+128)) + (((orc_uint16)(var56.x4[0]+128))>>8)))>>8;
20157 var57.x4[1] = ((uint16_t)(((orc_uint16)(var56.x4[1]+128)) + (((orc_uint16)(var56.x4[1]+128))>>8)))>>8;
20158 var57.x4[2] = ((uint16_t)(((orc_uint16)(var56.x4[2]+128)) + (((orc_uint16)(var56.x4[2]+128))>>8)))>>8;
20159 var57.x4[3] = ((uint16_t)(((orc_uint16)(var56.x4[3]+128)) + (((orc_uint16)(var56.x4[3]+128))>>8)))>>8;
20161 var58.x4[0] = var57.x4[0];
20162 var58.x4[1] = var57.x4[1];
20163 var58.x4[2] = var57.x4[2];
20164 var58.x4[3] = var57.x4[3];
20166 var45.x4[0] = ORC_CLAMP_UB((orc_uint8)var52.x4[0] + (orc_uint8)var58.x4[0]);
20167 var45.x4[1] = ORC_CLAMP_UB((orc_uint8)var52.x4[1] + (orc_uint8)var58.x4[1]);
20168 var45.x4[2] = ORC_CLAMP_UB((orc_uint8)var52.x4[2] + (orc_uint8)var58.x4[2]);
20169 var45.x4[3] = ORC_CLAMP_UB((orc_uint8)var52.x4[3] + (orc_uint8)var58.x4[3]);
20178 _backup_orc_code_combine_xor_u_n (OrcExecutor * ORC_RESTRICT ex)
20182 orc_union32 * ORC_RESTRICT ptr0;
20183 const orc_union32 * ORC_RESTRICT ptr4;
20204 ptr0 = (orc_union32 *)ex->arrays[0];
20205 ptr4 = (orc_union32 *)ex->arrays[4];
20208 var42.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20209 var42.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20210 var42.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20211 var42.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20213 var44.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20214 var44.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20215 var44.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20216 var44.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20218 for (i = 0; i < n; i++) {
20222 var46.x4[0] = (orc_uint8)var40.x4[0];
20223 var46.x4[1] = (orc_uint8)var40.x4[1];
20224 var46.x4[2] = (orc_uint8)var40.x4[2];
20225 var46.x4[3] = (orc_uint8)var40.x4[3];
20229 var47.x4[0] = (orc_uint8)var41.x4[0];
20230 var47.x4[1] = (orc_uint8)var41.x4[1];
20231 var47.x4[2] = (orc_uint8)var41.x4[2];
20232 var47.x4[3] = (orc_uint8)var41.x4[3];
20234 var48.i = ((((orc_uint64)var47.i)>>48) << 48) | ((((orc_uint64)var47.i)>>48)<<32) | ((((orc_uint64)var47.i)>>48) << 16) | ((((orc_uint64)var47.i)>>48));
20236 var49.x4[0] = var48.x4[0] ^ var42.x4[0];
20237 var49.x4[1] = var48.x4[1] ^ var42.x4[1];
20238 var49.x4[2] = var48.x4[2] ^ var42.x4[2];
20239 var49.x4[3] = var48.x4[3] ^ var42.x4[3];
20241 var50.x4[0] = (var46.x4[0] * var49.x4[0]) & 0xffff;
20242 var50.x4[1] = (var46.x4[1] * var49.x4[1]) & 0xffff;
20243 var50.x4[2] = (var46.x4[2] * var49.x4[2]) & 0xffff;
20244 var50.x4[3] = (var46.x4[3] * var49.x4[3]) & 0xffff;
20246 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
20247 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
20248 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
20249 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
20251 var52.x4[0] = var51.x4[0];
20252 var52.x4[1] = var51.x4[1];
20253 var52.x4[2] = var51.x4[2];
20254 var52.x4[3] = var51.x4[3];
20258 var53.x4[0] = (orc_uint8)var43.x4[0];
20259 var53.x4[1] = (orc_uint8)var43.x4[1];
20260 var53.x4[2] = (orc_uint8)var43.x4[2];
20261 var53.x4[3] = (orc_uint8)var43.x4[3];
20263 var54.i = ((((orc_uint64)var46.i)>>48) << 48) | ((((orc_uint64)var46.i)>>48)<<32) | ((((orc_uint64)var46.i)>>48) << 16) | ((((orc_uint64)var46.i)>>48));
20265 var55.x4[0] = var54.x4[0] ^ var44.x4[0];
20266 var55.x4[1] = var54.x4[1] ^ var44.x4[1];
20267 var55.x4[2] = var54.x4[2] ^ var44.x4[2];
20268 var55.x4[3] = var54.x4[3] ^ var44.x4[3];
20270 var56.x4[0] = (var53.x4[0] * var55.x4[0]) & 0xffff;
20271 var56.x4[1] = (var53.x4[1] * var55.x4[1]) & 0xffff;
20272 var56.x4[2] = (var53.x4[2] * var55.x4[2]) & 0xffff;
20273 var56.x4[3] = (var53.x4[3] * var55.x4[3]) & 0xffff;
20275 var57.x4[0] = ((uint16_t)(((orc_uint16)(var56.x4[0]+128)) + (((orc_uint16)(var56.x4[0]+128))>>8)))>>8;
20276 var57.x4[1] = ((uint16_t)(((orc_uint16)(var56.x4[1]+128)) + (((orc_uint16)(var56.x4[1]+128))>>8)))>>8;
20277 var57.x4[2] = ((uint16_t)(((orc_uint16)(var56.x4[2]+128)) + (((orc_uint16)(var56.x4[2]+128))>>8)))>>8;
20278 var57.x4[3] = ((uint16_t)(((orc_uint16)(var56.x4[3]+128)) + (((orc_uint16)(var56.x4[3]+128))>>8)))>>8;
20280 var58.x4[0] = var57.x4[0];
20281 var58.x4[1] = var57.x4[1];
20282 var58.x4[2] = var57.x4[2];
20283 var58.x4[3] = var57.x4[3];
20285 var45.x4[0] = ORC_CLAMP_UB((orc_uint8)var52.x4[0] + (orc_uint8)var58.x4[0]);
20286 var45.x4[1] = ORC_CLAMP_UB((orc_uint8)var52.x4[1] + (orc_uint8)var58.x4[1]);
20287 var45.x4[2] = ORC_CLAMP_UB((orc_uint8)var52.x4[2] + (orc_uint8)var58.x4[2]);
20288 var45.x4[3] = ORC_CLAMP_UB((orc_uint8)var52.x4[3] + (orc_uint8)var58.x4[3]);
20296 orc_code_combine_xor_u_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
20298 OrcExecutor _ex, *ex = &_ex;
20299 static int p_inited = 0;
20300 static OrcProgram *p = 0;
20301 void (*func) (OrcExecutor *);
20304 orc_once_mutex_lock ();
20306 OrcCompileResult result;
20308 p = orc_program_new ();
20309 orc_program_set_name (p, "orc_code_combine_xor_u_n");
20310 orc_program_set_backup_function (p, _backup_orc_code_combine_xor_u_n);
20311 orc_program_add_destination (p, 4, "d1");
20312 orc_program_add_source (p, 4, "s1");
20313 orc_program_add_constant (p, 4, 0x000000ff, "c1");
20314 orc_program_add_temporary (p, 8, "t1");
20315 orc_program_add_temporary (p, 8, "t2");
20316 orc_program_add_temporary (p, 8, "t3");
20317 orc_program_add_temporary (p, 8, "t4");
20318 orc_program_add_temporary (p, 8, "t5");
20319 orc_program_add_temporary (p, 8, "t6");
20320 orc_program_add_temporary (p, 4, "t7");
20321 orc_program_add_temporary (p, 4, "t8");
20323 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
20324 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
20325 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
20326 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
20327 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
20328 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20329 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20330 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
20331 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20332 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
20333 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1);
20334 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20335 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
20336 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1);
20338 result = orc_program_compile (p);
20341 orc_once_mutex_unlock ();
20346 ex->arrays[ORC_VAR_D1] = d1;
20347 ex->arrays[ORC_VAR_S1] = (void *)s1;
20349 func = p->code_exec;
20355 /* orc_code_combine_add_ca */
20358 orc_code_combine_add_ca (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
20360 orc_union32 * ORC_RESTRICT ptr0;
20361 const orc_union32 * ORC_RESTRICT ptr4;
20362 const orc_union32 * ORC_RESTRICT ptr5;
20373 ptr0 = (orc_union32 *)d1;
20374 ptr4 = (orc_union32 *)s1;
20375 ptr5 = (orc_union32 *)s2;
20378 for (i = 0; i < n; i++) {
20382 var39.x4[0] = (orc_uint8)var35.x4[0];
20383 var39.x4[1] = (orc_uint8)var35.x4[1];
20384 var39.x4[2] = (orc_uint8)var35.x4[2];
20385 var39.x4[3] = (orc_uint8)var35.x4[3];
20389 var40.x4[0] = (orc_uint8)var36.x4[0];
20390 var40.x4[1] = (orc_uint8)var36.x4[1];
20391 var40.x4[2] = (orc_uint8)var36.x4[2];
20392 var40.x4[3] = (orc_uint8)var36.x4[3];
20394 var41.x4[0] = (var39.x4[0] * var40.x4[0]) & 0xffff;
20395 var41.x4[1] = (var39.x4[1] * var40.x4[1]) & 0xffff;
20396 var41.x4[2] = (var39.x4[2] * var40.x4[2]) & 0xffff;
20397 var41.x4[3] = (var39.x4[3] * var40.x4[3]) & 0xffff;
20399 var42.x4[0] = ((uint16_t)(((orc_uint16)(var41.x4[0]+128)) + (((orc_uint16)(var41.x4[0]+128))>>8)))>>8;
20400 var42.x4[1] = ((uint16_t)(((orc_uint16)(var41.x4[1]+128)) + (((orc_uint16)(var41.x4[1]+128))>>8)))>>8;
20401 var42.x4[2] = ((uint16_t)(((orc_uint16)(var41.x4[2]+128)) + (((orc_uint16)(var41.x4[2]+128))>>8)))>>8;
20402 var42.x4[3] = ((uint16_t)(((orc_uint16)(var41.x4[3]+128)) + (((orc_uint16)(var41.x4[3]+128))>>8)))>>8;
20404 var43.x4[0] = var42.x4[0];
20405 var43.x4[1] = var42.x4[1];
20406 var43.x4[2] = var42.x4[2];
20407 var43.x4[3] = var42.x4[3];
20411 var38.x4[0] = ORC_CLAMP_UB((orc_uint8)var37.x4[0] + (orc_uint8)var43.x4[0]);
20412 var38.x4[1] = ORC_CLAMP_UB((orc_uint8)var37.x4[1] + (orc_uint8)var43.x4[1]);
20413 var38.x4[2] = ORC_CLAMP_UB((orc_uint8)var37.x4[2] + (orc_uint8)var43.x4[2]);
20414 var38.x4[3] = ORC_CLAMP_UB((orc_uint8)var37.x4[3] + (orc_uint8)var43.x4[3]);
20423 _backup_orc_code_combine_add_ca (OrcExecutor * ORC_RESTRICT ex)
20427 orc_union32 * ORC_RESTRICT ptr0;
20428 const orc_union32 * ORC_RESTRICT ptr4;
20429 const orc_union32 * ORC_RESTRICT ptr5;
20440 ptr0 = (orc_union32 *)ex->arrays[0];
20441 ptr4 = (orc_union32 *)ex->arrays[4];
20442 ptr5 = (orc_union32 *)ex->arrays[5];
20445 for (i = 0; i < n; i++) {
20449 var39.x4[0] = (orc_uint8)var35.x4[0];
20450 var39.x4[1] = (orc_uint8)var35.x4[1];
20451 var39.x4[2] = (orc_uint8)var35.x4[2];
20452 var39.x4[3] = (orc_uint8)var35.x4[3];
20456 var40.x4[0] = (orc_uint8)var36.x4[0];
20457 var40.x4[1] = (orc_uint8)var36.x4[1];
20458 var40.x4[2] = (orc_uint8)var36.x4[2];
20459 var40.x4[3] = (orc_uint8)var36.x4[3];
20461 var41.x4[0] = (var39.x4[0] * var40.x4[0]) & 0xffff;
20462 var41.x4[1] = (var39.x4[1] * var40.x4[1]) & 0xffff;
20463 var41.x4[2] = (var39.x4[2] * var40.x4[2]) & 0xffff;
20464 var41.x4[3] = (var39.x4[3] * var40.x4[3]) & 0xffff;
20466 var42.x4[0] = ((uint16_t)(((orc_uint16)(var41.x4[0]+128)) + (((orc_uint16)(var41.x4[0]+128))>>8)))>>8;
20467 var42.x4[1] = ((uint16_t)(((orc_uint16)(var41.x4[1]+128)) + (((orc_uint16)(var41.x4[1]+128))>>8)))>>8;
20468 var42.x4[2] = ((uint16_t)(((orc_uint16)(var41.x4[2]+128)) + (((orc_uint16)(var41.x4[2]+128))>>8)))>>8;
20469 var42.x4[3] = ((uint16_t)(((orc_uint16)(var41.x4[3]+128)) + (((orc_uint16)(var41.x4[3]+128))>>8)))>>8;
20471 var43.x4[0] = var42.x4[0];
20472 var43.x4[1] = var42.x4[1];
20473 var43.x4[2] = var42.x4[2];
20474 var43.x4[3] = var42.x4[3];
20478 var38.x4[0] = ORC_CLAMP_UB((orc_uint8)var37.x4[0] + (orc_uint8)var43.x4[0]);
20479 var38.x4[1] = ORC_CLAMP_UB((orc_uint8)var37.x4[1] + (orc_uint8)var43.x4[1]);
20480 var38.x4[2] = ORC_CLAMP_UB((orc_uint8)var37.x4[2] + (orc_uint8)var43.x4[2]);
20481 var38.x4[3] = ORC_CLAMP_UB((orc_uint8)var37.x4[3] + (orc_uint8)var43.x4[3]);
20489 orc_code_combine_add_ca (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
20491 OrcExecutor _ex, *ex = &_ex;
20492 static int p_inited = 0;
20493 static OrcProgram *p = 0;
20494 void (*func) (OrcExecutor *);
20497 orc_once_mutex_lock ();
20499 OrcCompileResult result;
20501 p = orc_program_new ();
20502 orc_program_set_name (p, "orc_code_combine_add_ca");
20503 orc_program_set_backup_function (p, _backup_orc_code_combine_add_ca);
20504 orc_program_add_destination (p, 4, "d1");
20505 orc_program_add_source (p, 4, "s1");
20506 orc_program_add_source (p, 4, "s2");
20507 orc_program_add_temporary (p, 8, "t1");
20508 orc_program_add_temporary (p, 8, "t2");
20509 orc_program_add_temporary (p, 4, "t3");
20511 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
20512 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
20513 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
20514 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
20515 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
20516 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1);
20518 result = orc_program_compile (p);
20521 orc_once_mutex_unlock ();
20526 ex->arrays[ORC_VAR_D1] = d1;
20527 ex->arrays[ORC_VAR_S1] = (void *)s1;
20528 ex->arrays[ORC_VAR_S2] = (void *)s2;
20530 func = p->code_exec;
20536 /* orc_code_combine_add_ca_n */
20539 orc_code_combine_add_ca_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
20541 orc_union32 * ORC_RESTRICT ptr0;
20542 const orc_union32 * ORC_RESTRICT ptr4;
20547 ptr0 = (orc_union32 *)d1;
20548 ptr4 = (orc_union32 *)s1;
20551 for (i = 0; i < n; i++) {
20557 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
20558 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
20559 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
20560 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
20569 _backup_orc_code_combine_add_ca_n (OrcExecutor * ORC_RESTRICT ex)
20573 orc_union32 * ORC_RESTRICT ptr0;
20574 const orc_union32 * ORC_RESTRICT ptr4;
20579 ptr0 = (orc_union32 *)ex->arrays[0];
20580 ptr4 = (orc_union32 *)ex->arrays[4];
20583 for (i = 0; i < n; i++) {
20589 var34.x4[0] = ORC_CLAMP_UB((orc_uint8)var32.x4[0] + (orc_uint8)var33.x4[0]);
20590 var34.x4[1] = ORC_CLAMP_UB((orc_uint8)var32.x4[1] + (orc_uint8)var33.x4[1]);
20591 var34.x4[2] = ORC_CLAMP_UB((orc_uint8)var32.x4[2] + (orc_uint8)var33.x4[2]);
20592 var34.x4[3] = ORC_CLAMP_UB((orc_uint8)var32.x4[3] + (orc_uint8)var33.x4[3]);
20600 orc_code_combine_add_ca_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
20602 OrcExecutor _ex, *ex = &_ex;
20603 static int p_inited = 0;
20604 static OrcProgram *p = 0;
20605 void (*func) (OrcExecutor *);
20608 orc_once_mutex_lock ();
20610 OrcCompileResult result;
20612 p = orc_program_new ();
20613 orc_program_set_name (p, "orc_code_combine_add_ca_n");
20614 orc_program_set_backup_function (p, _backup_orc_code_combine_add_ca_n);
20615 orc_program_add_destination (p, 4, "d1");
20616 orc_program_add_source (p, 4, "s1");
20618 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);
20620 result = orc_program_compile (p);
20623 orc_once_mutex_unlock ();
20628 ex->arrays[ORC_VAR_D1] = d1;
20629 ex->arrays[ORC_VAR_S1] = (void *)s1;
20631 func = p->code_exec;
20637 /* orc_code_combine_over_ca */
20640 orc_code_combine_over_ca (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n){
20642 orc_union32 * ORC_RESTRICT ptr0;
20643 const orc_union32 * ORC_RESTRICT ptr4;
20644 const orc_union32 * ORC_RESTRICT ptr5;
20664 ptr0 = (orc_union32 *)d1;
20665 ptr4 = (orc_union32 *)s1;
20666 ptr5 = (orc_union32 *)s2;
20669 var42.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20670 var42.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20671 var42.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20672 var42.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20674 for (i = 0; i < n; i++) {
20678 var44.x4[0] = (orc_uint8)var40.x4[0];
20679 var44.x4[1] = (orc_uint8)var40.x4[1];
20680 var44.x4[2] = (orc_uint8)var40.x4[2];
20681 var44.x4[3] = (orc_uint8)var40.x4[3];
20685 var45.x4[0] = (orc_uint8)var41.x4[0];
20686 var45.x4[1] = (orc_uint8)var41.x4[1];
20687 var45.x4[2] = (orc_uint8)var41.x4[2];
20688 var45.x4[3] = (orc_uint8)var41.x4[3];
20690 var46.i = ((((orc_uint64)var44.i)>>48) << 48) | ((((orc_uint64)var44.i)>>48)<<32) | ((((orc_uint64)var44.i)>>48) << 16) | ((((orc_uint64)var44.i)>>48));
20692 var47.x4[0] = (var44.x4[0] * var45.x4[0]) & 0xffff;
20693 var47.x4[1] = (var44.x4[1] * var45.x4[1]) & 0xffff;
20694 var47.x4[2] = (var44.x4[2] * var45.x4[2]) & 0xffff;
20695 var47.x4[3] = (var44.x4[3] * var45.x4[3]) & 0xffff;
20697 var48.x4[0] = ((uint16_t)(((orc_uint16)(var47.x4[0]+128)) + (((orc_uint16)(var47.x4[0]+128))>>8)))>>8;
20698 var48.x4[1] = ((uint16_t)(((orc_uint16)(var47.x4[1]+128)) + (((orc_uint16)(var47.x4[1]+128))>>8)))>>8;
20699 var48.x4[2] = ((uint16_t)(((orc_uint16)(var47.x4[2]+128)) + (((orc_uint16)(var47.x4[2]+128))>>8)))>>8;
20700 var48.x4[3] = ((uint16_t)(((orc_uint16)(var47.x4[3]+128)) + (((orc_uint16)(var47.x4[3]+128))>>8)))>>8;
20702 var49.x4[0] = var48.x4[0];
20703 var49.x4[1] = var48.x4[1];
20704 var49.x4[2] = var48.x4[2];
20705 var49.x4[3] = var48.x4[3];
20707 var50.x4[0] = (var45.x4[0] * var46.x4[0]) & 0xffff;
20708 var50.x4[1] = (var45.x4[1] * var46.x4[1]) & 0xffff;
20709 var50.x4[2] = (var45.x4[2] * var46.x4[2]) & 0xffff;
20710 var50.x4[3] = (var45.x4[3] * var46.x4[3]) & 0xffff;
20712 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
20713 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
20714 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
20715 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
20719 var53.x4[0] = (orc_uint8)var52.x4[0];
20720 var53.x4[1] = (orc_uint8)var52.x4[1];
20721 var53.x4[2] = (orc_uint8)var52.x4[2];
20722 var53.x4[3] = (orc_uint8)var52.x4[3];
20724 var54.x4[0] = var51.x4[0] ^ var42.x4[0];
20725 var54.x4[1] = var51.x4[1] ^ var42.x4[1];
20726 var54.x4[2] = var51.x4[2] ^ var42.x4[2];
20727 var54.x4[3] = var51.x4[3] ^ var42.x4[3];
20729 var55.x4[0] = (var53.x4[0] * var54.x4[0]) & 0xffff;
20730 var55.x4[1] = (var53.x4[1] * var54.x4[1]) & 0xffff;
20731 var55.x4[2] = (var53.x4[2] * var54.x4[2]) & 0xffff;
20732 var55.x4[3] = (var53.x4[3] * var54.x4[3]) & 0xffff;
20734 var56.x4[0] = ((uint16_t)(((orc_uint16)(var55.x4[0]+128)) + (((orc_uint16)(var55.x4[0]+128))>>8)))>>8;
20735 var56.x4[1] = ((uint16_t)(((orc_uint16)(var55.x4[1]+128)) + (((orc_uint16)(var55.x4[1]+128))>>8)))>>8;
20736 var56.x4[2] = ((uint16_t)(((orc_uint16)(var55.x4[2]+128)) + (((orc_uint16)(var55.x4[2]+128))>>8)))>>8;
20737 var56.x4[3] = ((uint16_t)(((orc_uint16)(var55.x4[3]+128)) + (((orc_uint16)(var55.x4[3]+128))>>8)))>>8;
20739 var57.x4[0] = var56.x4[0];
20740 var57.x4[1] = var56.x4[1];
20741 var57.x4[2] = var56.x4[2];
20742 var57.x4[3] = var56.x4[3];
20744 var43.x4[0] = ORC_CLAMP_UB((orc_uint8)var57.x4[0] + (orc_uint8)var49.x4[0]);
20745 var43.x4[1] = ORC_CLAMP_UB((orc_uint8)var57.x4[1] + (orc_uint8)var49.x4[1]);
20746 var43.x4[2] = ORC_CLAMP_UB((orc_uint8)var57.x4[2] + (orc_uint8)var49.x4[2]);
20747 var43.x4[3] = ORC_CLAMP_UB((orc_uint8)var57.x4[3] + (orc_uint8)var49.x4[3]);
20756 _backup_orc_code_combine_over_ca (OrcExecutor * ORC_RESTRICT ex)
20760 orc_union32 * ORC_RESTRICT ptr0;
20761 const orc_union32 * ORC_RESTRICT ptr4;
20762 const orc_union32 * ORC_RESTRICT ptr5;
20782 ptr0 = (orc_union32 *)ex->arrays[0];
20783 ptr4 = (orc_union32 *)ex->arrays[4];
20784 ptr5 = (orc_union32 *)ex->arrays[5];
20787 var42.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20788 var42.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20789 var42.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20790 var42.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20792 for (i = 0; i < n; i++) {
20796 var44.x4[0] = (orc_uint8)var40.x4[0];
20797 var44.x4[1] = (orc_uint8)var40.x4[1];
20798 var44.x4[2] = (orc_uint8)var40.x4[2];
20799 var44.x4[3] = (orc_uint8)var40.x4[3];
20803 var45.x4[0] = (orc_uint8)var41.x4[0];
20804 var45.x4[1] = (orc_uint8)var41.x4[1];
20805 var45.x4[2] = (orc_uint8)var41.x4[2];
20806 var45.x4[3] = (orc_uint8)var41.x4[3];
20808 var46.i = ((((orc_uint64)var44.i)>>48) << 48) | ((((orc_uint64)var44.i)>>48)<<32) | ((((orc_uint64)var44.i)>>48) << 16) | ((((orc_uint64)var44.i)>>48));
20810 var47.x4[0] = (var44.x4[0] * var45.x4[0]) & 0xffff;
20811 var47.x4[1] = (var44.x4[1] * var45.x4[1]) & 0xffff;
20812 var47.x4[2] = (var44.x4[2] * var45.x4[2]) & 0xffff;
20813 var47.x4[3] = (var44.x4[3] * var45.x4[3]) & 0xffff;
20815 var48.x4[0] = ((uint16_t)(((orc_uint16)(var47.x4[0]+128)) + (((orc_uint16)(var47.x4[0]+128))>>8)))>>8;
20816 var48.x4[1] = ((uint16_t)(((orc_uint16)(var47.x4[1]+128)) + (((orc_uint16)(var47.x4[1]+128))>>8)))>>8;
20817 var48.x4[2] = ((uint16_t)(((orc_uint16)(var47.x4[2]+128)) + (((orc_uint16)(var47.x4[2]+128))>>8)))>>8;
20818 var48.x4[3] = ((uint16_t)(((orc_uint16)(var47.x4[3]+128)) + (((orc_uint16)(var47.x4[3]+128))>>8)))>>8;
20820 var49.x4[0] = var48.x4[0];
20821 var49.x4[1] = var48.x4[1];
20822 var49.x4[2] = var48.x4[2];
20823 var49.x4[3] = var48.x4[3];
20825 var50.x4[0] = (var45.x4[0] * var46.x4[0]) & 0xffff;
20826 var50.x4[1] = (var45.x4[1] * var46.x4[1]) & 0xffff;
20827 var50.x4[2] = (var45.x4[2] * var46.x4[2]) & 0xffff;
20828 var50.x4[3] = (var45.x4[3] * var46.x4[3]) & 0xffff;
20830 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
20831 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
20832 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
20833 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
20837 var53.x4[0] = (orc_uint8)var52.x4[0];
20838 var53.x4[1] = (orc_uint8)var52.x4[1];
20839 var53.x4[2] = (orc_uint8)var52.x4[2];
20840 var53.x4[3] = (orc_uint8)var52.x4[3];
20842 var54.x4[0] = var51.x4[0] ^ var42.x4[0];
20843 var54.x4[1] = var51.x4[1] ^ var42.x4[1];
20844 var54.x4[2] = var51.x4[2] ^ var42.x4[2];
20845 var54.x4[3] = var51.x4[3] ^ var42.x4[3];
20847 var55.x4[0] = (var53.x4[0] * var54.x4[0]) & 0xffff;
20848 var55.x4[1] = (var53.x4[1] * var54.x4[1]) & 0xffff;
20849 var55.x4[2] = (var53.x4[2] * var54.x4[2]) & 0xffff;
20850 var55.x4[3] = (var53.x4[3] * var54.x4[3]) & 0xffff;
20852 var56.x4[0] = ((uint16_t)(((orc_uint16)(var55.x4[0]+128)) + (((orc_uint16)(var55.x4[0]+128))>>8)))>>8;
20853 var56.x4[1] = ((uint16_t)(((orc_uint16)(var55.x4[1]+128)) + (((orc_uint16)(var55.x4[1]+128))>>8)))>>8;
20854 var56.x4[2] = ((uint16_t)(((orc_uint16)(var55.x4[2]+128)) + (((orc_uint16)(var55.x4[2]+128))>>8)))>>8;
20855 var56.x4[3] = ((uint16_t)(((orc_uint16)(var55.x4[3]+128)) + (((orc_uint16)(var55.x4[3]+128))>>8)))>>8;
20857 var57.x4[0] = var56.x4[0];
20858 var57.x4[1] = var56.x4[1];
20859 var57.x4[2] = var56.x4[2];
20860 var57.x4[3] = var56.x4[3];
20862 var43.x4[0] = ORC_CLAMP_UB((orc_uint8)var57.x4[0] + (orc_uint8)var49.x4[0]);
20863 var43.x4[1] = ORC_CLAMP_UB((orc_uint8)var57.x4[1] + (orc_uint8)var49.x4[1]);
20864 var43.x4[2] = ORC_CLAMP_UB((orc_uint8)var57.x4[2] + (orc_uint8)var49.x4[2]);
20865 var43.x4[3] = ORC_CLAMP_UB((orc_uint8)var57.x4[3] + (orc_uint8)var49.x4[3]);
20873 orc_code_combine_over_ca (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint32 * s2, int n)
20875 OrcExecutor _ex, *ex = &_ex;
20876 static int p_inited = 0;
20877 static OrcProgram *p = 0;
20878 void (*func) (OrcExecutor *);
20881 orc_once_mutex_lock ();
20883 OrcCompileResult result;
20885 p = orc_program_new ();
20886 orc_program_set_name (p, "orc_code_combine_over_ca");
20887 orc_program_set_backup_function (p, _backup_orc_code_combine_over_ca);
20888 orc_program_add_destination (p, 4, "d1");
20889 orc_program_add_source (p, 4, "s1");
20890 orc_program_add_source (p, 4, "s2");
20891 orc_program_add_constant (p, 4, 0x000000ff, "c1");
20892 orc_program_add_temporary (p, 8, "t1");
20893 orc_program_add_temporary (p, 8, "t2");
20894 orc_program_add_temporary (p, 4, "t3");
20895 orc_program_add_temporary (p, 8, "t4");
20896 orc_program_add_temporary (p, 8, "t5");
20897 orc_program_add_temporary (p, 8, "t6");
20898 orc_program_add_temporary (p, 8, "t7");
20899 orc_program_add_temporary (p, 4, "t8");
20901 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
20902 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
20903 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20904 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1);
20905 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20906 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
20907 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1);
20908 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
20909 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
20910 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
20911 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
20912 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
20913 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
20914 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
20915 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_T8, ORC_VAR_D1);
20917 result = orc_program_compile (p);
20920 orc_once_mutex_unlock ();
20925 ex->arrays[ORC_VAR_D1] = d1;
20926 ex->arrays[ORC_VAR_S1] = (void *)s1;
20927 ex->arrays[ORC_VAR_S2] = (void *)s2;
20929 func = p->code_exec;
20935 /* orc_code_combine_over_ca_n */
20938 orc_code_combine_over_ca_n (orc_uint32 * d1, const orc_uint32 * s1, int n){
20940 orc_union32 * ORC_RESTRICT ptr0;
20941 const orc_union32 * ORC_RESTRICT ptr4;
20956 ptr0 = (orc_union32 *)d1;
20957 ptr4 = (orc_union32 *)s1;
20960 var41.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
20961 var41.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
20962 var41.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
20963 var41.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
20965 for (i = 0; i < n; i++) {
20969 var43.x4[0] = (orc_uint8)var40.x4[0];
20970 var43.x4[1] = (orc_uint8)var40.x4[1];
20971 var43.x4[2] = (orc_uint8)var40.x4[2];
20972 var43.x4[3] = (orc_uint8)var40.x4[3];
20974 var44.i = ((((orc_uint64)var43.i)>>48) << 48) | ((((orc_uint64)var43.i)>>48)<<32) | ((((orc_uint64)var43.i)>>48) << 16) | ((((orc_uint64)var43.i)>>48));
20976 var45.x4[0] = var43.x4[0];
20977 var45.x4[1] = var43.x4[1];
20978 var45.x4[2] = var43.x4[2];
20979 var45.x4[3] = var43.x4[3];
20981 var46.x4[0] = var44.x4[0];
20982 var46.x4[1] = var44.x4[1];
20983 var46.x4[2] = var44.x4[2];
20984 var46.x4[3] = var44.x4[3];
20988 var48.x4[0] = (orc_uint8)var47.x4[0];
20989 var48.x4[1] = (orc_uint8)var47.x4[1];
20990 var48.x4[2] = (orc_uint8)var47.x4[2];
20991 var48.x4[3] = (orc_uint8)var47.x4[3];
20993 var49.x4[0] = var46.x4[0] ^ var41.x4[0];
20994 var49.x4[1] = var46.x4[1] ^ var41.x4[1];
20995 var49.x4[2] = var46.x4[2] ^ var41.x4[2];
20996 var49.x4[3] = var46.x4[3] ^ var41.x4[3];
20998 var50.x4[0] = (var48.x4[0] * var49.x4[0]) & 0xffff;
20999 var50.x4[1] = (var48.x4[1] * var49.x4[1]) & 0xffff;
21000 var50.x4[2] = (var48.x4[2] * var49.x4[2]) & 0xffff;
21001 var50.x4[3] = (var48.x4[3] * var49.x4[3]) & 0xffff;
21003 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
21004 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
21005 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
21006 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
21008 var52.x4[0] = var51.x4[0];
21009 var52.x4[1] = var51.x4[1];
21010 var52.x4[2] = var51.x4[2];
21011 var52.x4[3] = var51.x4[3];
21013 var42.x4[0] = ORC_CLAMP_UB((orc_uint8)var52.x4[0] + (orc_uint8)var45.x4[0]);
21014 var42.x4[1] = ORC_CLAMP_UB((orc_uint8)var52.x4[1] + (orc_uint8)var45.x4[1]);
21015 var42.x4[2] = ORC_CLAMP_UB((orc_uint8)var52.x4[2] + (orc_uint8)var45.x4[2]);
21016 var42.x4[3] = ORC_CLAMP_UB((orc_uint8)var52.x4[3] + (orc_uint8)var45.x4[3]);
21025 _backup_orc_code_combine_over_ca_n (OrcExecutor * ORC_RESTRICT ex)
21029 orc_union32 * ORC_RESTRICT ptr0;
21030 const orc_union32 * ORC_RESTRICT ptr4;
21045 ptr0 = (orc_union32 *)ex->arrays[0];
21046 ptr4 = (orc_union32 *)ex->arrays[4];
21049 var41.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
21050 var41.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
21051 var41.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
21052 var41.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
21054 for (i = 0; i < n; i++) {
21058 var43.x4[0] = (orc_uint8)var40.x4[0];
21059 var43.x4[1] = (orc_uint8)var40.x4[1];
21060 var43.x4[2] = (orc_uint8)var40.x4[2];
21061 var43.x4[3] = (orc_uint8)var40.x4[3];
21063 var44.i = ((((orc_uint64)var43.i)>>48) << 48) | ((((orc_uint64)var43.i)>>48)<<32) | ((((orc_uint64)var43.i)>>48) << 16) | ((((orc_uint64)var43.i)>>48));
21065 var45.x4[0] = var43.x4[0];
21066 var45.x4[1] = var43.x4[1];
21067 var45.x4[2] = var43.x4[2];
21068 var45.x4[3] = var43.x4[3];
21070 var46.x4[0] = var44.x4[0];
21071 var46.x4[1] = var44.x4[1];
21072 var46.x4[2] = var44.x4[2];
21073 var46.x4[3] = var44.x4[3];
21077 var48.x4[0] = (orc_uint8)var47.x4[0];
21078 var48.x4[1] = (orc_uint8)var47.x4[1];
21079 var48.x4[2] = (orc_uint8)var47.x4[2];
21080 var48.x4[3] = (orc_uint8)var47.x4[3];
21082 var49.x4[0] = var46.x4[0] ^ var41.x4[0];
21083 var49.x4[1] = var46.x4[1] ^ var41.x4[1];
21084 var49.x4[2] = var46.x4[2] ^ var41.x4[2];
21085 var49.x4[3] = var46.x4[3] ^ var41.x4[3];
21087 var50.x4[0] = (var48.x4[0] * var49.x4[0]) & 0xffff;
21088 var50.x4[1] = (var48.x4[1] * var49.x4[1]) & 0xffff;
21089 var50.x4[2] = (var48.x4[2] * var49.x4[2]) & 0xffff;
21090 var50.x4[3] = (var48.x4[3] * var49.x4[3]) & 0xffff;
21092 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
21093 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
21094 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
21095 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
21097 var52.x4[0] = var51.x4[0];
21098 var52.x4[1] = var51.x4[1];
21099 var52.x4[2] = var51.x4[2];
21100 var52.x4[3] = var51.x4[3];
21102 var42.x4[0] = ORC_CLAMP_UB((orc_uint8)var52.x4[0] + (orc_uint8)var45.x4[0]);
21103 var42.x4[1] = ORC_CLAMP_UB((orc_uint8)var52.x4[1] + (orc_uint8)var45.x4[1]);
21104 var42.x4[2] = ORC_CLAMP_UB((orc_uint8)var52.x4[2] + (orc_uint8)var45.x4[2]);
21105 var42.x4[3] = ORC_CLAMP_UB((orc_uint8)var52.x4[3] + (orc_uint8)var45.x4[3]);
21113 orc_code_combine_over_ca_n (orc_uint32 * d1, const orc_uint32 * s1, int n)
21115 OrcExecutor _ex, *ex = &_ex;
21116 static int p_inited = 0;
21117 static OrcProgram *p = 0;
21118 void (*func) (OrcExecutor *);
21121 orc_once_mutex_lock ();
21123 OrcCompileResult result;
21125 p = orc_program_new ();
21126 orc_program_set_name (p, "orc_code_combine_over_ca_n");
21127 orc_program_set_backup_function (p, _backup_orc_code_combine_over_ca_n);
21128 orc_program_add_destination (p, 4, "d1");
21129 orc_program_add_source (p, 4, "s1");
21130 orc_program_add_constant (p, 4, 0x000000ff, "c1");
21131 orc_program_add_temporary (p, 8, "t1");
21132 orc_program_add_temporary (p, 8, "t2");
21133 orc_program_add_temporary (p, 4, "t3");
21134 orc_program_add_temporary (p, 8, "t4");
21135 orc_program_add_temporary (p, 8, "t5");
21136 orc_program_add_temporary (p, 8, "t6");
21137 orc_program_add_temporary (p, 8, "t7");
21138 orc_program_add_temporary (p, 4, "t8");
21140 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
21141 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
21142 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
21143 orc_program_append_2 (p, "copyw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1, ORC_VAR_D1);
21144 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
21145 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
21146 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
21147 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
21148 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21149 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21150 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_T8, ORC_VAR_D1);
21152 result = orc_program_compile (p);
21155 orc_once_mutex_unlock ();
21160 ex->arrays[ORC_VAR_D1] = d1;
21161 ex->arrays[ORC_VAR_S1] = (void *)s1;
21163 func = p->code_exec;
21169 /* orc_composite_over_8888_8_8888_line */
21172 orc_composite_over_8888_8_8888_line (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint8 * s2, int n){
21174 orc_union32 * ORC_RESTRICT ptr0;
21175 const orc_union32 * ORC_RESTRICT ptr4;
21176 const orc_int8 * ORC_RESTRICT ptr5;
21195 ptr0 = (orc_union32 *)d1;
21196 ptr4 = (orc_union32 *)s1;
21197 ptr5 = (orc_int8 *)s2;
21200 var40.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
21201 var40.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
21202 var40.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
21203 var40.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
21205 for (i = 0; i < n; i++) {
21209 var42.x4[0] = (orc_uint8)var38.x4[0];
21210 var42.x4[1] = (orc_uint8)var38.x4[1];
21211 var42.x4[2] = (orc_uint8)var38.x4[2];
21212 var42.x4[3] = (orc_uint8)var38.x4[3];
21216 var43.i = ((var39&0xff) << 24) | ((var39&0xff)<<16) | ((var39&0xff) << 8) | (var39&0xff);
21218 var44.x4[0] = (orc_uint8)var43.x4[0];
21219 var44.x4[1] = (orc_uint8)var43.x4[1];
21220 var44.x4[2] = (orc_uint8)var43.x4[2];
21221 var44.x4[3] = (orc_uint8)var43.x4[3];
21223 var45.x4[0] = (var42.x4[0] * var44.x4[0]) & 0xffff;
21224 var45.x4[1] = (var42.x4[1] * var44.x4[1]) & 0xffff;
21225 var45.x4[2] = (var42.x4[2] * var44.x4[2]) & 0xffff;
21226 var45.x4[3] = (var42.x4[3] * var44.x4[3]) & 0xffff;
21228 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
21229 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
21230 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
21231 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
21233 var47.x4[0] = var46.x4[0];
21234 var47.x4[1] = var46.x4[1];
21235 var47.x4[2] = var46.x4[2];
21236 var47.x4[3] = var46.x4[3];
21240 var49.x4[0] = (orc_uint8)var48.x4[0];
21241 var49.x4[1] = (orc_uint8)var48.x4[1];
21242 var49.x4[2] = (orc_uint8)var48.x4[2];
21243 var49.x4[3] = (orc_uint8)var48.x4[3];
21245 var50.x4[0] = var46.x4[0] ^ var40.x4[0];
21246 var50.x4[1] = var46.x4[1] ^ var40.x4[1];
21247 var50.x4[2] = var46.x4[2] ^ var40.x4[2];
21248 var50.x4[3] = var46.x4[3] ^ var40.x4[3];
21250 var51.i = ((((orc_uint64)var50.i)>>48) << 48) | ((((orc_uint64)var50.i)>>48)<<32) | ((((orc_uint64)var50.i)>>48) << 16) | ((((orc_uint64)var50.i)>>48));
21252 var52.x4[0] = (var49.x4[0] * var51.x4[0]) & 0xffff;
21253 var52.x4[1] = (var49.x4[1] * var51.x4[1]) & 0xffff;
21254 var52.x4[2] = (var49.x4[2] * var51.x4[2]) & 0xffff;
21255 var52.x4[3] = (var49.x4[3] * var51.x4[3]) & 0xffff;
21257 var53.x4[0] = ((uint16_t)(((orc_uint16)(var52.x4[0]+128)) + (((orc_uint16)(var52.x4[0]+128))>>8)))>>8;
21258 var53.x4[1] = ((uint16_t)(((orc_uint16)(var52.x4[1]+128)) + (((orc_uint16)(var52.x4[1]+128))>>8)))>>8;
21259 var53.x4[2] = ((uint16_t)(((orc_uint16)(var52.x4[2]+128)) + (((orc_uint16)(var52.x4[2]+128))>>8)))>>8;
21260 var53.x4[3] = ((uint16_t)(((orc_uint16)(var52.x4[3]+128)) + (((orc_uint16)(var52.x4[3]+128))>>8)))>>8;
21262 var54.x4[0] = var53.x4[0];
21263 var54.x4[1] = var53.x4[1];
21264 var54.x4[2] = var53.x4[2];
21265 var54.x4[3] = var53.x4[3];
21267 var41.x4[0] = ORC_CLAMP_UB((orc_uint8)var54.x4[0] + (orc_uint8)var47.x4[0]);
21268 var41.x4[1] = ORC_CLAMP_UB((orc_uint8)var54.x4[1] + (orc_uint8)var47.x4[1]);
21269 var41.x4[2] = ORC_CLAMP_UB((orc_uint8)var54.x4[2] + (orc_uint8)var47.x4[2]);
21270 var41.x4[3] = ORC_CLAMP_UB((orc_uint8)var54.x4[3] + (orc_uint8)var47.x4[3]);
21279 _backup_orc_composite_over_8888_8_8888_line (OrcExecutor * ORC_RESTRICT ex)
21283 orc_union32 * ORC_RESTRICT ptr0;
21284 const orc_union32 * ORC_RESTRICT ptr4;
21285 const orc_int8 * ORC_RESTRICT ptr5;
21304 ptr0 = (orc_union32 *)ex->arrays[0];
21305 ptr4 = (orc_union32 *)ex->arrays[4];
21306 ptr5 = (orc_int8 *)ex->arrays[5];
21309 var40.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
21310 var40.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
21311 var40.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
21312 var40.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
21314 for (i = 0; i < n; i++) {
21318 var42.x4[0] = (orc_uint8)var38.x4[0];
21319 var42.x4[1] = (orc_uint8)var38.x4[1];
21320 var42.x4[2] = (orc_uint8)var38.x4[2];
21321 var42.x4[3] = (orc_uint8)var38.x4[3];
21325 var43.i = ((var39&0xff) << 24) | ((var39&0xff)<<16) | ((var39&0xff) << 8) | (var39&0xff);
21327 var44.x4[0] = (orc_uint8)var43.x4[0];
21328 var44.x4[1] = (orc_uint8)var43.x4[1];
21329 var44.x4[2] = (orc_uint8)var43.x4[2];
21330 var44.x4[3] = (orc_uint8)var43.x4[3];
21332 var45.x4[0] = (var42.x4[0] * var44.x4[0]) & 0xffff;
21333 var45.x4[1] = (var42.x4[1] * var44.x4[1]) & 0xffff;
21334 var45.x4[2] = (var42.x4[2] * var44.x4[2]) & 0xffff;
21335 var45.x4[3] = (var42.x4[3] * var44.x4[3]) & 0xffff;
21337 var46.x4[0] = ((uint16_t)(((orc_uint16)(var45.x4[0]+128)) + (((orc_uint16)(var45.x4[0]+128))>>8)))>>8;
21338 var46.x4[1] = ((uint16_t)(((orc_uint16)(var45.x4[1]+128)) + (((orc_uint16)(var45.x4[1]+128))>>8)))>>8;
21339 var46.x4[2] = ((uint16_t)(((orc_uint16)(var45.x4[2]+128)) + (((orc_uint16)(var45.x4[2]+128))>>8)))>>8;
21340 var46.x4[3] = ((uint16_t)(((orc_uint16)(var45.x4[3]+128)) + (((orc_uint16)(var45.x4[3]+128))>>8)))>>8;
21342 var47.x4[0] = var46.x4[0];
21343 var47.x4[1] = var46.x4[1];
21344 var47.x4[2] = var46.x4[2];
21345 var47.x4[3] = var46.x4[3];
21349 var49.x4[0] = (orc_uint8)var48.x4[0];
21350 var49.x4[1] = (orc_uint8)var48.x4[1];
21351 var49.x4[2] = (orc_uint8)var48.x4[2];
21352 var49.x4[3] = (orc_uint8)var48.x4[3];
21354 var50.x4[0] = var46.x4[0] ^ var40.x4[0];
21355 var50.x4[1] = var46.x4[1] ^ var40.x4[1];
21356 var50.x4[2] = var46.x4[2] ^ var40.x4[2];
21357 var50.x4[3] = var46.x4[3] ^ var40.x4[3];
21359 var51.i = ((((orc_uint64)var50.i)>>48) << 48) | ((((orc_uint64)var50.i)>>48)<<32) | ((((orc_uint64)var50.i)>>48) << 16) | ((((orc_uint64)var50.i)>>48));
21361 var52.x4[0] = (var49.x4[0] * var51.x4[0]) & 0xffff;
21362 var52.x4[1] = (var49.x4[1] * var51.x4[1]) & 0xffff;
21363 var52.x4[2] = (var49.x4[2] * var51.x4[2]) & 0xffff;
21364 var52.x4[3] = (var49.x4[3] * var51.x4[3]) & 0xffff;
21366 var53.x4[0] = ((uint16_t)(((orc_uint16)(var52.x4[0]+128)) + (((orc_uint16)(var52.x4[0]+128))>>8)))>>8;
21367 var53.x4[1] = ((uint16_t)(((orc_uint16)(var52.x4[1]+128)) + (((orc_uint16)(var52.x4[1]+128))>>8)))>>8;
21368 var53.x4[2] = ((uint16_t)(((orc_uint16)(var52.x4[2]+128)) + (((orc_uint16)(var52.x4[2]+128))>>8)))>>8;
21369 var53.x4[3] = ((uint16_t)(((orc_uint16)(var52.x4[3]+128)) + (((orc_uint16)(var52.x4[3]+128))>>8)))>>8;
21371 var54.x4[0] = var53.x4[0];
21372 var54.x4[1] = var53.x4[1];
21373 var54.x4[2] = var53.x4[2];
21374 var54.x4[3] = var53.x4[3];
21376 var41.x4[0] = ORC_CLAMP_UB((orc_uint8)var54.x4[0] + (orc_uint8)var47.x4[0]);
21377 var41.x4[1] = ORC_CLAMP_UB((orc_uint8)var54.x4[1] + (orc_uint8)var47.x4[1]);
21378 var41.x4[2] = ORC_CLAMP_UB((orc_uint8)var54.x4[2] + (orc_uint8)var47.x4[2]);
21379 var41.x4[3] = ORC_CLAMP_UB((orc_uint8)var54.x4[3] + (orc_uint8)var47.x4[3]);
21387 orc_composite_over_8888_8_8888_line (orc_uint32 * d1, const orc_uint32 * s1, const orc_uint8 * s2, int n)
21389 OrcExecutor _ex, *ex = &_ex;
21390 static int p_inited = 0;
21391 static OrcProgram *p = 0;
21392 void (*func) (OrcExecutor *);
21395 orc_once_mutex_lock ();
21397 OrcCompileResult result;
21399 p = orc_program_new ();
21400 orc_program_set_name (p, "orc_composite_over_8888_8_8888_line");
21401 orc_program_set_backup_function (p, _backup_orc_composite_over_8888_8_8888_line);
21402 orc_program_add_destination (p, 4, "d1");
21403 orc_program_add_source (p, 4, "s1");
21404 orc_program_add_source (p, 1, "s2");
21405 orc_program_add_constant (p, 4, 0x000000ff, "c1");
21406 orc_program_add_temporary (p, 8, "t1");
21407 orc_program_add_temporary (p, 8, "t2");
21408 orc_program_add_temporary (p, 4, "t3");
21409 orc_program_add_temporary (p, 4, "t4");
21410 orc_program_add_temporary (p, 4, "t5");
21411 orc_program_add_temporary (p, 8, "t6");
21413 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
21414 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
21415 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T2, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
21416 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
21417 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21418 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21419 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
21420 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
21421 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
21422 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21423 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_D1);
21424 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21425 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21426 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1);
21428 result = orc_program_compile (p);
21431 orc_once_mutex_unlock ();
21436 ex->arrays[ORC_VAR_D1] = d1;
21437 ex->arrays[ORC_VAR_S1] = (void *)s1;
21438 ex->arrays[ORC_VAR_S2] = (void *)s2;
21440 func = p->code_exec;
21446 /* orc_composite_over_n_8888_8888_ca_2d */
21449 orc_composite_over_n_8888_8888_ca_2d (orc_uint32 * d1, const orc_uint32 * s1, int p1, int n){
21451 orc_union32 * ORC_RESTRICT ptr0;
21452 const orc_union32 * ORC_RESTRICT ptr4;
21472 ptr0 = (orc_union32 *)d1;
21473 ptr4 = (orc_union32 *)s1;
21481 var42.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
21482 var42.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
21483 var42.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
21484 var42.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
21486 for (i = 0; i < n; i++) {
21488 var44.x4[0] = (orc_uint8)var40.x4[0];
21489 var44.x4[1] = (orc_uint8)var40.x4[1];
21490 var44.x4[2] = (orc_uint8)var40.x4[2];
21491 var44.x4[3] = (orc_uint8)var40.x4[3];
21495 var45.x4[0] = (orc_uint8)var41.x4[0];
21496 var45.x4[1] = (orc_uint8)var41.x4[1];
21497 var45.x4[2] = (orc_uint8)var41.x4[2];
21498 var45.x4[3] = (orc_uint8)var41.x4[3];
21500 var46.i = ((((orc_uint64)var44.i)>>48) << 48) | ((((orc_uint64)var44.i)>>48)<<32) | ((((orc_uint64)var44.i)>>48) << 16) | ((((orc_uint64)var44.i)>>48));
21502 var47.x4[0] = (var44.x4[0] * var45.x4[0]) & 0xffff;
21503 var47.x4[1] = (var44.x4[1] * var45.x4[1]) & 0xffff;
21504 var47.x4[2] = (var44.x4[2] * var45.x4[2]) & 0xffff;
21505 var47.x4[3] = (var44.x4[3] * var45.x4[3]) & 0xffff;
21507 var48.x4[0] = ((uint16_t)(((orc_uint16)(var47.x4[0]+128)) + (((orc_uint16)(var47.x4[0]+128))>>8)))>>8;
21508 var48.x4[1] = ((uint16_t)(((orc_uint16)(var47.x4[1]+128)) + (((orc_uint16)(var47.x4[1]+128))>>8)))>>8;
21509 var48.x4[2] = ((uint16_t)(((orc_uint16)(var47.x4[2]+128)) + (((orc_uint16)(var47.x4[2]+128))>>8)))>>8;
21510 var48.x4[3] = ((uint16_t)(((orc_uint16)(var47.x4[3]+128)) + (((orc_uint16)(var47.x4[3]+128))>>8)))>>8;
21512 var49.x4[0] = var48.x4[0];
21513 var49.x4[1] = var48.x4[1];
21514 var49.x4[2] = var48.x4[2];
21515 var49.x4[3] = var48.x4[3];
21517 var50.x4[0] = (var45.x4[0] * var46.x4[0]) & 0xffff;
21518 var50.x4[1] = (var45.x4[1] * var46.x4[1]) & 0xffff;
21519 var50.x4[2] = (var45.x4[2] * var46.x4[2]) & 0xffff;
21520 var50.x4[3] = (var45.x4[3] * var46.x4[3]) & 0xffff;
21522 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
21523 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
21524 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
21525 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
21529 var53.x4[0] = (orc_uint8)var52.x4[0];
21530 var53.x4[1] = (orc_uint8)var52.x4[1];
21531 var53.x4[2] = (orc_uint8)var52.x4[2];
21532 var53.x4[3] = (orc_uint8)var52.x4[3];
21534 var54.x4[0] = var51.x4[0] ^ var42.x4[0];
21535 var54.x4[1] = var51.x4[1] ^ var42.x4[1];
21536 var54.x4[2] = var51.x4[2] ^ var42.x4[2];
21537 var54.x4[3] = var51.x4[3] ^ var42.x4[3];
21539 var55.x4[0] = (var53.x4[0] * var54.x4[0]) & 0xffff;
21540 var55.x4[1] = (var53.x4[1] * var54.x4[1]) & 0xffff;
21541 var55.x4[2] = (var53.x4[2] * var54.x4[2]) & 0xffff;
21542 var55.x4[3] = (var53.x4[3] * var54.x4[3]) & 0xffff;
21544 var56.x4[0] = ((uint16_t)(((orc_uint16)(var55.x4[0]+128)) + (((orc_uint16)(var55.x4[0]+128))>>8)))>>8;
21545 var56.x4[1] = ((uint16_t)(((orc_uint16)(var55.x4[1]+128)) + (((orc_uint16)(var55.x4[1]+128))>>8)))>>8;
21546 var56.x4[2] = ((uint16_t)(((orc_uint16)(var55.x4[2]+128)) + (((orc_uint16)(var55.x4[2]+128))>>8)))>>8;
21547 var56.x4[3] = ((uint16_t)(((orc_uint16)(var55.x4[3]+128)) + (((orc_uint16)(var55.x4[3]+128))>>8)))>>8;
21549 var57.x4[0] = var56.x4[0];
21550 var57.x4[1] = var56.x4[1];
21551 var57.x4[2] = var56.x4[2];
21552 var57.x4[3] = var56.x4[3];
21554 var43.x4[0] = ORC_CLAMP_UB((orc_uint8)var57.x4[0] + (orc_uint8)var49.x4[0]);
21555 var43.x4[1] = ORC_CLAMP_UB((orc_uint8)var57.x4[1] + (orc_uint8)var49.x4[1]);
21556 var43.x4[2] = ORC_CLAMP_UB((orc_uint8)var57.x4[2] + (orc_uint8)var49.x4[2]);
21557 var43.x4[3] = ORC_CLAMP_UB((orc_uint8)var57.x4[3] + (orc_uint8)var49.x4[3]);
21566 _backup_orc_composite_over_n_8888_8888_ca_2d (OrcExecutor * ORC_RESTRICT ex)
21570 orc_union32 * ORC_RESTRICT ptr0;
21571 const orc_union32 * ORC_RESTRICT ptr4;
21591 ptr0 = (orc_union32 *)ex->arrays[0];
21592 ptr4 = (orc_union32 *)ex->arrays[4];
21595 var40.x4[0] = ex->params[24];
21596 var40.x4[1] = ex->params[24];
21597 var40.x4[2] = ex->params[24];
21598 var40.x4[3] = ex->params[24];
21600 var42.x4[0] = 0x000000ff; /* 255 or 1.25987e-321f */
21601 var42.x4[1] = 0x000000ff; /* 255 or 1.25987e-321f */
21602 var42.x4[2] = 0x000000ff; /* 255 or 1.25987e-321f */
21603 var42.x4[3] = 0x000000ff; /* 255 or 1.25987e-321f */
21605 for (i = 0; i < n; i++) {
21607 var44.x4[0] = (orc_uint8)var40.x4[0];
21608 var44.x4[1] = (orc_uint8)var40.x4[1];
21609 var44.x4[2] = (orc_uint8)var40.x4[2];
21610 var44.x4[3] = (orc_uint8)var40.x4[3];
21614 var45.x4[0] = (orc_uint8)var41.x4[0];
21615 var45.x4[1] = (orc_uint8)var41.x4[1];
21616 var45.x4[2] = (orc_uint8)var41.x4[2];
21617 var45.x4[3] = (orc_uint8)var41.x4[3];
21619 var46.i = ((((orc_uint64)var44.i)>>48) << 48) | ((((orc_uint64)var44.i)>>48)<<32) | ((((orc_uint64)var44.i)>>48) << 16) | ((((orc_uint64)var44.i)>>48));
21621 var47.x4[0] = (var44.x4[0] * var45.x4[0]) & 0xffff;
21622 var47.x4[1] = (var44.x4[1] * var45.x4[1]) & 0xffff;
21623 var47.x4[2] = (var44.x4[2] * var45.x4[2]) & 0xffff;
21624 var47.x4[3] = (var44.x4[3] * var45.x4[3]) & 0xffff;
21626 var48.x4[0] = ((uint16_t)(((orc_uint16)(var47.x4[0]+128)) + (((orc_uint16)(var47.x4[0]+128))>>8)))>>8;
21627 var48.x4[1] = ((uint16_t)(((orc_uint16)(var47.x4[1]+128)) + (((orc_uint16)(var47.x4[1]+128))>>8)))>>8;
21628 var48.x4[2] = ((uint16_t)(((orc_uint16)(var47.x4[2]+128)) + (((orc_uint16)(var47.x4[2]+128))>>8)))>>8;
21629 var48.x4[3] = ((uint16_t)(((orc_uint16)(var47.x4[3]+128)) + (((orc_uint16)(var47.x4[3]+128))>>8)))>>8;
21631 var49.x4[0] = var48.x4[0];
21632 var49.x4[1] = var48.x4[1];
21633 var49.x4[2] = var48.x4[2];
21634 var49.x4[3] = var48.x4[3];
21636 var50.x4[0] = (var45.x4[0] * var46.x4[0]) & 0xffff;
21637 var50.x4[1] = (var45.x4[1] * var46.x4[1]) & 0xffff;
21638 var50.x4[2] = (var45.x4[2] * var46.x4[2]) & 0xffff;
21639 var50.x4[3] = (var45.x4[3] * var46.x4[3]) & 0xffff;
21641 var51.x4[0] = ((uint16_t)(((orc_uint16)(var50.x4[0]+128)) + (((orc_uint16)(var50.x4[0]+128))>>8)))>>8;
21642 var51.x4[1] = ((uint16_t)(((orc_uint16)(var50.x4[1]+128)) + (((orc_uint16)(var50.x4[1]+128))>>8)))>>8;
21643 var51.x4[2] = ((uint16_t)(((orc_uint16)(var50.x4[2]+128)) + (((orc_uint16)(var50.x4[2]+128))>>8)))>>8;
21644 var51.x4[3] = ((uint16_t)(((orc_uint16)(var50.x4[3]+128)) + (((orc_uint16)(var50.x4[3]+128))>>8)))>>8;
21648 var53.x4[0] = (orc_uint8)var52.x4[0];
21649 var53.x4[1] = (orc_uint8)var52.x4[1];
21650 var53.x4[2] = (orc_uint8)var52.x4[2];
21651 var53.x4[3] = (orc_uint8)var52.x4[3];
21653 var54.x4[0] = var51.x4[0] ^ var42.x4[0];
21654 var54.x4[1] = var51.x4[1] ^ var42.x4[1];
21655 var54.x4[2] = var51.x4[2] ^ var42.x4[2];
21656 var54.x4[3] = var51.x4[3] ^ var42.x4[3];
21658 var55.x4[0] = (var53.x4[0] * var54.x4[0]) & 0xffff;
21659 var55.x4[1] = (var53.x4[1] * var54.x4[1]) & 0xffff;
21660 var55.x4[2] = (var53.x4[2] * var54.x4[2]) & 0xffff;
21661 var55.x4[3] = (var53.x4[3] * var54.x4[3]) & 0xffff;
21663 var56.x4[0] = ((uint16_t)(((orc_uint16)(var55.x4[0]+128)) + (((orc_uint16)(var55.x4[0]+128))>>8)))>>8;
21664 var56.x4[1] = ((uint16_t)(((orc_uint16)(var55.x4[1]+128)) + (((orc_uint16)(var55.x4[1]+128))>>8)))>>8;
21665 var56.x4[2] = ((uint16_t)(((orc_uint16)(var55.x4[2]+128)) + (((orc_uint16)(var55.x4[2]+128))>>8)))>>8;
21666 var56.x4[3] = ((uint16_t)(((orc_uint16)(var55.x4[3]+128)) + (((orc_uint16)(var55.x4[3]+128))>>8)))>>8;
21668 var57.x4[0] = var56.x4[0];
21669 var57.x4[1] = var56.x4[1];
21670 var57.x4[2] = var56.x4[2];
21671 var57.x4[3] = var56.x4[3];
21673 var43.x4[0] = ORC_CLAMP_UB((orc_uint8)var57.x4[0] + (orc_uint8)var49.x4[0]);
21674 var43.x4[1] = ORC_CLAMP_UB((orc_uint8)var57.x4[1] + (orc_uint8)var49.x4[1]);
21675 var43.x4[2] = ORC_CLAMP_UB((orc_uint8)var57.x4[2] + (orc_uint8)var49.x4[2]);
21676 var43.x4[3] = ORC_CLAMP_UB((orc_uint8)var57.x4[3] + (orc_uint8)var49.x4[3]);
21684 orc_composite_over_n_8888_8888_ca_2d (orc_uint32 * d1, const orc_uint32 * s1, int p1, int n)
21686 OrcExecutor _ex, *ex = &_ex;
21687 static int p_inited = 0;
21688 static OrcProgram *p = 0;
21689 void (*func) (OrcExecutor *);
21692 orc_once_mutex_lock ();
21694 OrcCompileResult result;
21696 p = orc_program_new ();
21697 orc_program_set_name (p, "orc_composite_over_n_8888_8888_ca_2d");
21698 orc_program_set_backup_function (p, _backup_orc_composite_over_n_8888_8888_ca_2d);
21699 orc_program_add_destination (p, 4, "d1");
21700 orc_program_add_source (p, 4, "s1");
21701 orc_program_add_constant (p, 4, 0x000000ff, "c1");
21702 orc_program_add_parameter (p, 4, "p1");
21703 orc_program_add_temporary (p, 8, "t1");
21704 orc_program_add_temporary (p, 8, "t2");
21705 orc_program_add_temporary (p, 4, "t3");
21706 orc_program_add_temporary (p, 8, "t4");
21707 orc_program_add_temporary (p, 8, "t5");
21708 orc_program_add_temporary (p, 8, "t6");
21709 orc_program_add_temporary (p, 8, "t7");
21710 orc_program_add_temporary (p, 4, "t8");
21712 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
21713 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
21714 orc_program_append_2 (p, "splatw3q", 0, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
21715 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5, ORC_VAR_D1);
21716 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
21717 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);
21718 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1);
21719 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
21720 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
21721 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
21722 orc_program_append_2 (p, "xorw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_D1);
21723 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
21724 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21725 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
21726 orc_program_append_2 (p, "addusb", 2, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_T8, ORC_VAR_D1);
21728 result = orc_program_compile (p);
21731 orc_once_mutex_unlock ();
21736 ex->arrays[ORC_VAR_D1] = d1;
21737 ex->arrays[ORC_VAR_S1] = (void *)s1;
21738 ex->params[ORC_VAR_P1] = p1;
21740 func = p->code_exec;
21746 /* cogorc_resample_horiz_1tap */
21749 cogorc_resample_horiz_1tap (orc_uint8 * d1, const orc_uint8 * s1, int p1, int p2, int n){
21751 orc_int8 * ORC_RESTRICT ptr0;
21752 const orc_int8 * ORC_RESTRICT ptr4;
21755 ptr0 = (orc_int8 *)d1;
21756 ptr4 = (orc_int8 *)s1;
21759 for (i = 0; i < n; i++) {
21760 /* 0: ldresnearb */
21761 var32 = ptr4[(p1 + i*p2)>>16];
21770 _backup_cogorc_resample_horiz_1tap (OrcExecutor * ORC_RESTRICT ex)
21774 orc_int8 * ORC_RESTRICT ptr0;
21775 const orc_int8 * ORC_RESTRICT ptr4;
21778 ptr0 = (orc_int8 *)ex->arrays[0];
21779 ptr4 = (orc_int8 *)ex->arrays[4];
21782 for (i = 0; i < n; i++) {
21783 /* 0: ldresnearb */
21784 var32 = ptr4[(ex->params[24] + i*ex->params[25])>>16];
21792 cogorc_resample_horiz_1tap (orc_uint8 * d1, const orc_uint8 * s1, int p1, int p2, int n)
21794 OrcExecutor _ex, *ex = &_ex;
21795 static int p_inited = 0;
21796 static OrcProgram *p = 0;
21797 void (*func) (OrcExecutor *);
21800 orc_once_mutex_lock ();
21802 OrcCompileResult result;
21804 p = orc_program_new ();
21805 orc_program_set_name (p, "cogorc_resample_horiz_1tap");
21806 orc_program_set_backup_function (p, _backup_cogorc_resample_horiz_1tap);
21807 orc_program_add_destination (p, 1, "d1");
21808 orc_program_add_source (p, 1, "s1");
21809 orc_program_add_parameter (p, 2, "p1");
21810 orc_program_add_parameter (p, 2, "p2");
21812 orc_program_append_2 (p, "ldresnearb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_P1, ORC_VAR_P2);
21814 result = orc_program_compile (p);
21817 orc_once_mutex_unlock ();
21822 ex->arrays[ORC_VAR_D1] = d1;
21823 ex->arrays[ORC_VAR_S1] = (void *)s1;
21824 ex->params[ORC_VAR_P1] = p1;
21825 ex->params[ORC_VAR_P2] = p2;
21827 func = p->code_exec;
21833 /* cogorc_resample_horiz_2tap */
21836 cogorc_resample_horiz_2tap (orc_uint8 * d1, const orc_uint8 * s1, int p1, int p2, int n){
21838 orc_int8 * ORC_RESTRICT ptr0;
21839 const orc_int8 * ORC_RESTRICT ptr4;
21842 ptr0 = (orc_int8 *)d1;
21843 ptr4 = (orc_int8 *)s1;
21846 for (i = 0; i < n; i++) {
21849 int tmp = p1 + i * p2;
21850 var32 = ((orc_uint8)ptr4[tmp>>16] * (256-((tmp>>8)&0xff)) + (orc_uint8)ptr4[(tmp>>16)+1] * ((tmp>>8)&0xff))>>8;
21860 _backup_cogorc_resample_horiz_2tap (OrcExecutor * ORC_RESTRICT ex)
21864 orc_int8 * ORC_RESTRICT ptr0;
21865 const orc_int8 * ORC_RESTRICT ptr4;
21868 ptr0 = (orc_int8 *)ex->arrays[0];
21869 ptr4 = (orc_int8 *)ex->arrays[4];
21872 for (i = 0; i < n; i++) {
21875 int tmp = ex->params[24] + i * ex->params[25];
21876 var32 = ((orc_uint8)ptr4[tmp>>16] * (256-((tmp>>8)&0xff)) + (orc_uint8)ptr4[(tmp>>16)+1] * ((tmp>>8)&0xff))>>8;
21885 cogorc_resample_horiz_2tap (orc_uint8 * d1, const orc_uint8 * s1, int p1, int p2, int n)
21887 OrcExecutor _ex, *ex = &_ex;
21888 static int p_inited = 0;
21889 static OrcProgram *p = 0;
21890 void (*func) (OrcExecutor *);
21893 orc_once_mutex_lock ();
21895 OrcCompileResult result;
21897 p = orc_program_new ();
21898 orc_program_set_name (p, "cogorc_resample_horiz_2tap");
21899 orc_program_set_backup_function (p, _backup_cogorc_resample_horiz_2tap);
21900 orc_program_add_destination (p, 1, "d1");
21901 orc_program_add_source (p, 1, "s1");
21902 orc_program_add_parameter (p, 4, "p1");
21903 orc_program_add_parameter (p, 4, "p2");
21905 orc_program_append_2 (p, "ldreslinb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_P1, ORC_VAR_P2);
21907 result = orc_program_compile (p);
21910 orc_once_mutex_unlock ();
21915 ex->arrays[ORC_VAR_D1] = d1;
21916 ex->arrays[ORC_VAR_S1] = (void *)s1;
21917 ex->params[ORC_VAR_P1] = p1;
21918 ex->params[ORC_VAR_P2] = p2;
21920 func = p->code_exec;
21926 /* test_float_constant_1 */
21929 test_float_constant_1 (orc_uint32 * d1, int n){
21931 orc_union32 * ORC_RESTRICT ptr0;
21935 ptr0 = (orc_union32 *)d1;
21938 var32.i = 0x40000000; /* 1073741824 or 5.30499e-315f */
21940 for (i = 0; i < n; i++) {
21951 _backup_test_float_constant_1 (OrcExecutor * ORC_RESTRICT ex)
21955 orc_union32 * ORC_RESTRICT ptr0;
21959 ptr0 = (orc_union32 *)ex->arrays[0];
21962 var32.i = 0x40000000; /* 1073741824 or 5.30499e-315f */
21964 for (i = 0; i < n; i++) {
21974 test_float_constant_1 (orc_uint32 * d1, int n)
21976 OrcExecutor _ex, *ex = &_ex;
21977 static int p_inited = 0;
21978 static OrcProgram *p = 0;
21979 void (*func) (OrcExecutor *);
21982 orc_once_mutex_lock ();
21984 OrcCompileResult result;
21986 p = orc_program_new ();
21987 orc_program_set_name (p, "test_float_constant_1");
21988 orc_program_set_backup_function (p, _backup_test_float_constant_1);
21989 orc_program_add_destination (p, 4, "d1");
21990 orc_program_add_constant (p, 4, 0x40000000, "c1");
21992 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1, ORC_VAR_D1);
21994 result = orc_program_compile (p);
21997 orc_once_mutex_unlock ();
22002 ex->arrays[ORC_VAR_D1] = d1;
22004 func = p->code_exec;
22010 /* test_float_constant_2 */
22013 test_float_constant_2 (orc_uint32 * d1, int n){
22015 orc_union32 * ORC_RESTRICT ptr0;
22019 ptr0 = (orc_union32 *)d1;
22022 var32.i = 0x40000000; /* 1073741824 or 5.30499e-315f */
22024 for (i = 0; i < n; i++) {
22035 _backup_test_float_constant_2 (OrcExecutor * ORC_RESTRICT ex)
22039 orc_union32 * ORC_RESTRICT ptr0;
22043 ptr0 = (orc_union32 *)ex->arrays[0];
22046 var32.i = 0x40000000; /* 1073741824 or 5.30499e-315f */
22048 for (i = 0; i < n; i++) {
22058 test_float_constant_2 (orc_uint32 * d1, int n)
22060 OrcExecutor _ex, *ex = &_ex;
22061 static int p_inited = 0;
22062 static OrcProgram *p = 0;
22063 void (*func) (OrcExecutor *);
22066 orc_once_mutex_lock ();
22068 OrcCompileResult result;
22070 p = orc_program_new ();
22071 orc_program_set_name (p, "test_float_constant_2");
22072 orc_program_set_backup_function (p, _backup_test_float_constant_2);
22073 orc_program_add_destination (p, 4, "d1");
22074 orc_program_add_constant (p, 4, 0x40000000, "c1");
22076 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1, ORC_VAR_D1);
22078 result = orc_program_compile (p);
22081 orc_once_mutex_unlock ();
22086 ex->arrays[ORC_VAR_D1] = d1;
22088 func = p->code_exec;
22094 /* convert_fc32_to_int32 */
22097 convert_fc32_to_int32 (orc_uint32 * d1, const orc_uint64 * s1, int n){
22099 orc_union32 * ORC_RESTRICT ptr0;
22100 const orc_union64 * ORC_RESTRICT ptr4;
22107 ptr0 = (orc_union32 *)d1;
22108 ptr4 = (orc_union64 *)s1;
22111 var35.x2[0] = 0x46fffe00; /* 1191181824 or 5.88522e-315f */
22112 var35.x2[1] = 0x46fffe00; /* 1191181824 or 5.88522e-315f */
22114 for (i = 0; i < n; i++) {
22121 orc_union32 _dest1;
22122 _src1.i = ORC_DENORMAL(var34.x2[0]);
22123 _src2.i = ORC_DENORMAL(var35.x2[0]);
22124 _dest1.f = _src1.f * _src2.f;
22125 var37.x2[0] = ORC_DENORMAL(_dest1.i);
22130 orc_union32 _dest1;
22131 _src1.i = ORC_DENORMAL(var34.x2[1]);
22132 _src2.i = ORC_DENORMAL(var35.x2[1]);
22133 _dest1.f = _src1.f * _src2.f;
22134 var37.x2[1] = ORC_DENORMAL(_dest1.i);
22139 tmp = (int)var37.x2f[0];
22140 if (tmp == 0x80000000 && !(var37.x2[0]&0x80000000)) tmp = 0x7fffffff;
22145 tmp = (int)var37.x2f[1];
22146 if (tmp == 0x80000000 && !(var37.x2[1]&0x80000000)) tmp = 0x7fffffff;
22150 var36.x2[0] = var38.x2[0];
22151 var36.x2[1] = var38.x2[1];
22160 _backup_convert_fc32_to_int32 (OrcExecutor * ORC_RESTRICT ex)
22164 orc_union32 * ORC_RESTRICT ptr0;
22165 const orc_union64 * ORC_RESTRICT ptr4;
22172 ptr0 = (orc_union32 *)ex->arrays[0];
22173 ptr4 = (orc_union64 *)ex->arrays[4];
22176 var35.x2[0] = 0x46fffe00; /* 1191181824 or 5.88522e-315f */
22177 var35.x2[1] = 0x46fffe00; /* 1191181824 or 5.88522e-315f */
22179 for (i = 0; i < n; i++) {
22186 orc_union32 _dest1;
22187 _src1.i = ORC_DENORMAL(var34.x2[0]);
22188 _src2.i = ORC_DENORMAL(var35.x2[0]);
22189 _dest1.f = _src1.f * _src2.f;
22190 var37.x2[0] = ORC_DENORMAL(_dest1.i);
22195 orc_union32 _dest1;
22196 _src1.i = ORC_DENORMAL(var34.x2[1]);
22197 _src2.i = ORC_DENORMAL(var35.x2[1]);
22198 _dest1.f = _src1.f * _src2.f;
22199 var37.x2[1] = ORC_DENORMAL(_dest1.i);
22204 tmp = (int)var37.x2f[0];
22205 if (tmp == 0x80000000 && !(var37.x2[0]&0x80000000)) tmp = 0x7fffffff;
22210 tmp = (int)var37.x2f[1];
22211 if (tmp == 0x80000000 && !(var37.x2[1]&0x80000000)) tmp = 0x7fffffff;
22215 var36.x2[0] = var38.x2[0];
22216 var36.x2[1] = var38.x2[1];
22224 convert_fc32_to_int32 (orc_uint32 * d1, const orc_uint64 * s1, int n)
22226 OrcExecutor _ex, *ex = &_ex;
22227 static int p_inited = 0;
22228 static OrcProgram *p = 0;
22229 void (*func) (OrcExecutor *);
22232 orc_once_mutex_lock ();
22234 OrcCompileResult result;
22236 p = orc_program_new ();
22237 orc_program_set_name (p, "convert_fc32_to_int32");
22238 orc_program_set_backup_function (p, _backup_convert_fc32_to_int32);
22239 orc_program_add_destination (p, 4, "d1");
22240 orc_program_add_source (p, 8, "s1");
22241 orc_program_add_constant (p, 4, 0x46fffe00, "c1");
22242 orc_program_add_temporary (p, 8, "t1");
22243 orc_program_add_temporary (p, 8, "t2");
22245 orc_program_append_2 (p, "mulf", 1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
22246 orc_program_append_2 (p, "convfl", 1, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
22247 orc_program_append_2 (p, "convlw", 1, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
22249 result = orc_program_compile (p);
22252 orc_once_mutex_unlock ();
22257 ex->arrays[ORC_VAR_D1] = d1;
22258 ex->arrays[ORC_VAR_S1] = (void *)s1;
22260 func = p->code_exec;
22269 param64 (orc_uint64 * d1, int p1, int n){
22271 orc_union64 * ORC_RESTRICT ptr0;
22275 ptr0 = (orc_union64 *)d1;
22280 for (i = 0; i < n; i++) {
22291 _backup_param64 (OrcExecutor * ORC_RESTRICT ex)
22295 orc_union64 * ORC_RESTRICT ptr0;
22299 ptr0 = (orc_union64 *)ex->arrays[0];
22302 var32.i = (ex->params[24] & 0xffffffff) | ((orc_uint64)(ex->params[24 + (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
22304 for (i = 0; i < n; i++) {
22314 param64 (orc_uint64 * d1, int p1, int n)
22316 OrcExecutor _ex, *ex = &_ex;
22317 static int p_inited = 0;
22318 static OrcProgram *p = 0;
22319 void (*func) (OrcExecutor *);
22322 orc_once_mutex_lock ();
22324 OrcCompileResult result;
22326 p = orc_program_new ();
22327 orc_program_set_name (p, "param64");
22328 orc_program_set_backup_function (p, _backup_param64);
22329 orc_program_add_destination (p, 8, "d1");
22330 orc_program_add_parameter (p, 8, "p1");
22332 orc_program_append_2 (p, "copyq", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
22334 result = orc_program_compile (p);
22337 orc_once_mutex_unlock ();
22342 ex->arrays[ORC_VAR_D1] = d1;
22343 ex->params[ORC_VAR_P1] = p1;
22345 func = p->code_exec;
22354 const64 (orc_uint64 * d1, int n){
22356 orc_union64 * ORC_RESTRICT ptr0;
22360 ptr0 = (orc_union64 *)d1;
22363 var32.i = ORC_UINT64_C(0x0123456789abcdef); /* 3.5127e-303f */
22365 for (i = 0; i < n; i++) {
22376 _backup_const64 (OrcExecutor * ORC_RESTRICT ex)
22380 orc_union64 * ORC_RESTRICT ptr0;
22384 ptr0 = (orc_union64 *)ex->arrays[0];
22387 var32.i = ORC_UINT64_C(0x0123456789abcdef); /* 3.5127e-303f */
22389 for (i = 0; i < n; i++) {
22399 const64 (orc_uint64 * d1, int n)
22401 OrcExecutor _ex, *ex = &_ex;
22402 static int p_inited = 0;
22403 static OrcProgram *p = 0;
22404 void (*func) (OrcExecutor *);
22407 orc_once_mutex_lock ();
22409 OrcCompileResult result;
22411 p = orc_program_new ();
22412 orc_program_set_name (p, "const64");
22413 orc_program_set_backup_function (p, _backup_const64);
22414 orc_program_add_destination (p, 8, "d1");
22415 orc_program_add_constant_int64 (p, 8, 0x0123456789abcdefULL, "c1");
22417 orc_program_append_2 (p, "copyq", 0, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1, ORC_VAR_D1);
22419 result = orc_program_compile (p);
22422 orc_once_mutex_unlock ();
22427 ex->arrays[ORC_VAR_D1] = d1;
22429 func = p->code_exec;
22438 param64_2 (orc_uint64 * d1, orc_int64 p1, int n){
22440 orc_union64 * ORC_RESTRICT ptr0;
22444 ptr0 = (orc_union64 *)d1;
22449 for (i = 0; i < n; i++) {
22460 _backup_param64_2 (OrcExecutor * ORC_RESTRICT ex)
22464 orc_union64 * ORC_RESTRICT ptr0;
22468 ptr0 = (orc_union64 *)ex->arrays[0];
22471 var34.i = (ex->params[24] & 0xffffffff) | ((orc_uint64)(ex->params[24 + (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
22473 for (i = 0; i < n; i++) {
22483 param64_2 (orc_uint64 * d1, orc_int64 p1, int n)
22485 OrcExecutor _ex, *ex = &_ex;
22486 static int p_inited = 0;
22487 static OrcProgram *p = 0;
22488 void (*func) (OrcExecutor *);
22491 orc_once_mutex_lock ();
22493 OrcCompileResult result;
22495 p = orc_program_new ();
22496 orc_program_set_name (p, "param64_2");
22497 orc_program_set_backup_function (p, _backup_param64_2);
22498 orc_program_add_destination (p, 8, "d1");
22499 orc_program_add_parameter_int64 (p, 8, "p1");
22500 orc_program_add_temporary (p, 8, "t1");
22502 orc_program_append_2 (p, "loadpq", 0, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
22503 orc_program_append_2 (p, "copyq", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
22505 result = orc_program_compile (p);
22508 orc_once_mutex_unlock ();
22513 ex->arrays[ORC_VAR_D1] = d1;
22517 ex->params[ORC_VAR_P1] = tmp.x2[0];
22518 ex->params[ORC_VAR_T1] = tmp.x2[1];
22521 func = p->code_exec;
22527 /* pa_volume_s16ne_orc_2ch */
22530 pa_volume_s16ne_orc_2ch (int16_t * d1, orc_int64 p1, int n){
22532 orc_union32 * ORC_RESTRICT ptr0;
22548 ptr0 = (orc_union32 *)d1;
22553 var39.x4[0] = 0x00000000; /* 0 or 0f */
22554 var39.x4[1] = 0x00000000; /* 0 or 0f */
22555 var39.x4[2] = 0x00000000; /* 0 or 0f */
22556 var39.x4[3] = 0x00000000; /* 0 or 0f */
22558 for (i = 0; i < n; i++) {
22562 var42.x2[0] = (orc_uint16)var37.x2[0];
22563 var42.x2[1] = (orc_uint16)var37.x2[1];
22567 var43.x2[0] = var38.x2[0];
22568 var43.x2[1] = var38.x2[1];
22570 var44.x4[0] = (var39.x4[0] > var42.x4[0]) ? (~0) : 0;
22571 var44.x4[1] = (var39.x4[1] > var42.x4[1]) ? (~0) : 0;
22572 var44.x4[2] = (var39.x4[2] > var42.x4[2]) ? (~0) : 0;
22573 var44.x4[3] = (var39.x4[3] > var42.x4[3]) ? (~0) : 0;
22575 var45.x4[0] = var44.x4[0] & var41.x4[0];
22576 var45.x4[1] = var44.x4[1] & var41.x4[1];
22577 var45.x4[2] = var44.x4[2] & var41.x4[2];
22578 var45.x4[3] = var44.x4[3] & var41.x4[3];
22580 var46.x4[0] = ((orc_uint32)((orc_uint16)var42.x4[0]) * (orc_uint32)((orc_uint16)var41.x4[0])) >> 16;
22581 var46.x4[1] = ((orc_uint32)((orc_uint16)var42.x4[1]) * (orc_uint32)((orc_uint16)var41.x4[1])) >> 16;
22582 var46.x4[2] = ((orc_uint32)((orc_uint16)var42.x4[2]) * (orc_uint32)((orc_uint16)var41.x4[2])) >> 16;
22583 var46.x4[3] = ((orc_uint32)((orc_uint16)var42.x4[3]) * (orc_uint32)((orc_uint16)var41.x4[3])) >> 16;
22585 var47.x2[0] = var46.x2[0] - var45.x2[0];
22586 var47.x2[1] = var46.x2[1] - var45.x2[1];
22588 var48.x2[0] = ((orc_uint32)var41.x2[0]) >> 16;
22589 var48.x2[1] = ((orc_uint32)var41.x2[1]) >> 16;
22591 var49.x2[0] = (var43.x2[0] * var48.x2[0]) & 0xffffffff;
22592 var49.x2[1] = (var43.x2[1] * var48.x2[1]) & 0xffffffff;
22594 var50.x2[0] = var47.x2[0] + var49.x2[0];
22595 var50.x2[1] = var47.x2[1] + var49.x2[1];
22596 /* 13: convssslw */
22597 var40.x2[0] = ORC_CLAMP_SW(var50.x2[0]);
22598 var40.x2[1] = ORC_CLAMP_SW(var50.x2[1]);
22607 _backup_pa_volume_s16ne_orc_2ch (OrcExecutor * ORC_RESTRICT ex)
22611 orc_union32 * ORC_RESTRICT ptr0;
22627 ptr0 = (orc_union32 *)ex->arrays[0];
22630 var41.i = (ex->params[24] & 0xffffffff) | ((orc_uint64)(ex->params[24 + (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
22632 var39.x4[0] = 0x00000000; /* 0 or 0f */
22633 var39.x4[1] = 0x00000000; /* 0 or 0f */
22634 var39.x4[2] = 0x00000000; /* 0 or 0f */
22635 var39.x4[3] = 0x00000000; /* 0 or 0f */
22637 for (i = 0; i < n; i++) {
22641 var42.x2[0] = (orc_uint16)var37.x2[0];
22642 var42.x2[1] = (orc_uint16)var37.x2[1];
22646 var43.x2[0] = var38.x2[0];
22647 var43.x2[1] = var38.x2[1];
22649 var44.x4[0] = (var39.x4[0] > var42.x4[0]) ? (~0) : 0;
22650 var44.x4[1] = (var39.x4[1] > var42.x4[1]) ? (~0) : 0;
22651 var44.x4[2] = (var39.x4[2] > var42.x4[2]) ? (~0) : 0;
22652 var44.x4[3] = (var39.x4[3] > var42.x4[3]) ? (~0) : 0;
22654 var45.x4[0] = var44.x4[0] & var41.x4[0];
22655 var45.x4[1] = var44.x4[1] & var41.x4[1];
22656 var45.x4[2] = var44.x4[2] & var41.x4[2];
22657 var45.x4[3] = var44.x4[3] & var41.x4[3];
22659 var46.x4[0] = ((orc_uint32)((orc_uint16)var42.x4[0]) * (orc_uint32)((orc_uint16)var41.x4[0])) >> 16;
22660 var46.x4[1] = ((orc_uint32)((orc_uint16)var42.x4[1]) * (orc_uint32)((orc_uint16)var41.x4[1])) >> 16;
22661 var46.x4[2] = ((orc_uint32)((orc_uint16)var42.x4[2]) * (orc_uint32)((orc_uint16)var41.x4[2])) >> 16;
22662 var46.x4[3] = ((orc_uint32)((orc_uint16)var42.x4[3]) * (orc_uint32)((orc_uint16)var41.x4[3])) >> 16;
22664 var47.x2[0] = var46.x2[0] - var45.x2[0];
22665 var47.x2[1] = var46.x2[1] - var45.x2[1];
22667 var48.x2[0] = ((orc_uint32)var41.x2[0]) >> 16;
22668 var48.x2[1] = ((orc_uint32)var41.x2[1]) >> 16;
22670 var49.x2[0] = (var43.x2[0] * var48.x2[0]) & 0xffffffff;
22671 var49.x2[1] = (var43.x2[1] * var48.x2[1]) & 0xffffffff;
22673 var50.x2[0] = var47.x2[0] + var49.x2[0];
22674 var50.x2[1] = var47.x2[1] + var49.x2[1];
22675 /* 13: convssslw */
22676 var40.x2[0] = ORC_CLAMP_SW(var50.x2[0]);
22677 var40.x2[1] = ORC_CLAMP_SW(var50.x2[1]);
22685 pa_volume_s16ne_orc_2ch (int16_t * d1, orc_int64 p1, int n)
22687 OrcExecutor _ex, *ex = &_ex;
22688 static int p_inited = 0;
22689 static OrcProgram *p = 0;
22690 void (*func) (OrcExecutor *);
22693 orc_once_mutex_lock ();
22695 OrcCompileResult result;
22697 p = orc_program_new ();
22698 orc_program_set_name (p, "pa_volume_s16ne_orc_2ch");
22699 orc_program_set_backup_function (p, _backup_pa_volume_s16ne_orc_2ch);
22700 orc_program_add_destination (p, 4, "d1");
22701 orc_program_add_constant (p, 4, 0x00000000, "c1");
22702 orc_program_add_constant (p, 4, 0x00000010, "c2");
22703 orc_program_add_parameter_int64 (p, 8, "p1");
22704 orc_program_add_temporary (p, 8, "t1");
22705 orc_program_add_temporary (p, 8, "t2");
22706 orc_program_add_temporary (p, 8, "t3");
22707 orc_program_add_temporary (p, 8, "t4");
22708 orc_program_add_temporary (p, 8, "t5");
22710 orc_program_append_2 (p, "loadpq", 0, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);
22711 orc_program_append_2 (p, "convuwl", 1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
22712 orc_program_append_2 (p, "convswl", 1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
22713 orc_program_append_2 (p, "cmpgtsw", 2, ORC_VAR_T5, ORC_VAR_C1, ORC_VAR_T2, ORC_VAR_D1);
22714 orc_program_append_2 (p, "andw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1);
22715 orc_program_append_2 (p, "mulhuw", 2, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
22716 orc_program_append_2 (p, "subl", 1, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_D1);
22717 orc_program_append_2 (p, "shrul", 1, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
22718 orc_program_append_2 (p, "mulll", 1, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1);
22719 orc_program_append_2 (p, "addl", 1, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1);
22720 orc_program_append_2 (p, "convssslw", 1, ORC_VAR_D1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
22722 result = orc_program_compile (p);
22725 orc_once_mutex_unlock ();
22730 ex->arrays[ORC_VAR_D1] = d1;
22734 ex->params[ORC_VAR_P1] = tmp.x2[0];
22735 ex->params[ORC_VAR_T1] = tmp.x2[1];
22738 func = p->code_exec;