2 * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of ARM Ltd not be used in
9 * advertising or publicity pertaining to distribution of the software without
10 * specific, written prior permission. ARM Ltd makes no
11 * representations about the suitability of this software for any purpose. It
12 * is provided "as is" without express or implied warranty.
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
23 * Author: Ian Rickards (ian.rickards@arm.com)
24 * Author: Jonathan Morton (jonathan.morton@movial.com)
25 * Author: Markku Vire (markku.vire@movial.com)
34 #include "pixman-private.h"
35 #include "pixman-arm-common.h"
37 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
38 uint32_t, 1, uint32_t, 1)
39 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
40 uint32_t, 1, uint32_t, 1)
41 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
42 uint16_t, 1, uint16_t, 1)
43 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
44 uint8_t, 3, uint8_t, 3)
45 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
46 uint32_t, 1, uint16_t, 1)
47 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
48 uint16_t, 1, uint32_t, 1)
49 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
50 uint8_t, 3, uint32_t, 1)
51 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
52 uint8_t, 3, uint16_t, 1)
53 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
54 uint32_t, 1, uint32_t, 1)
55 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
56 uint8_t, 1, uint8_t, 1)
57 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
58 uint32_t, 1, uint32_t, 1)
59 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
60 uint32_t, 1, uint16_t, 1)
61 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
62 uint32_t, 1, uint32_t, 1)
63 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
64 uint8_t, 1, uint16_t, 1)
66 PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
68 PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
70 PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
73 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
74 uint8_t, 1, uint16_t, 1)
75 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
76 uint8_t, 1, uint32_t, 1)
77 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
78 uint32_t, 1, uint32_t, 1)
79 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
80 uint8_t, 1, uint8_t, 1)
81 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
82 uint8_t, 1, uint8_t, 1)
84 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
85 uint32_t, 1, uint32_t, 1)
86 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
87 uint32_t, 1, uint16_t, 1)
88 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
89 uint16_t, 1, uint16_t, 1)
91 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
92 uint8_t, 1, uint8_t, 1, uint8_t, 1)
93 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
94 uint16_t, 1, uint8_t, 1, uint16_t, 1)
95 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
96 uint32_t, 1, uint8_t, 1, uint32_t, 1)
97 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
98 uint32_t, 1, uint32_t, 1, uint32_t, 1)
99 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
100 uint32_t, 1, uint8_t, 1, uint32_t, 1)
101 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
102 uint32_t, 1, uint32_t, 1, uint32_t, 1)
103 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
104 uint32_t, 1, uint8_t, 1, uint16_t, 1)
105 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
106 uint16_t, 1, uint8_t, 1, uint16_t, 1)
108 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
110 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
112 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
114 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
118 pixman_composite_src_n_8_asm_neon (int32_t w,
125 pixman_composite_src_n_0565_asm_neon (int32_t w,
132 pixman_composite_src_n_8888_asm_neon (int32_t w,
139 pixman_fill_neon (uint32_t *bits,
148 /* stride is always multiple of 32bit units in pixman */
149 uint32_t byte_stride = stride * sizeof(uint32_t);
154 pixman_composite_src_n_8_asm_neon (
157 (uint8_t *)(((char *) bits) + y * byte_stride + x),
162 pixman_composite_src_n_0565_asm_neon (
165 (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
170 pixman_composite_src_n_8888_asm_neon (
173 (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
183 pixman_blt_neon (uint32_t *src_bits,
196 if (src_bpp != dst_bpp)
202 pixman_composite_src_0565_0565_asm_neon (
204 (uint16_t *)(((char *) dst_bits) +
205 dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
206 (uint16_t *)(((char *) src_bits) +
207 src_y * src_stride * 4 + src_x * 2), src_stride * 2);
210 pixman_composite_src_8888_8888_asm_neon (
212 (uint32_t *)(((char *) dst_bits) +
213 dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
214 (uint32_t *)(((char *) src_bits) +
215 src_y * src_stride * 4 + src_x * 4), src_stride);
222 static const pixman_fast_path_t arm_neon_fast_paths[] =
224 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
225 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
226 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
227 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
228 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
229 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
230 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
231 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
232 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
233 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
234 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
235 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
236 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
237 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
238 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
239 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
240 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
241 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
242 PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
243 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
244 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
245 PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
246 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
247 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
248 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
249 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
250 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
251 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
252 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
253 PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
254 PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
255 PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
256 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
257 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
258 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
259 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
260 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
261 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
262 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
263 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
264 PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565),
265 PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565),
266 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
267 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
268 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
269 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
270 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565),
271 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565),
272 PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565),
273 PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565),
274 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
275 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
276 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
277 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
278 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
279 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
280 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
281 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
282 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
283 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
284 PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
285 PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
286 PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
287 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
288 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
289 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
290 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
291 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
292 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
293 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
294 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
295 PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
296 PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
298 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
299 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
300 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
301 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
303 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
304 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
306 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
307 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
308 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
309 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
311 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
312 PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
313 /* Note: NONE repeat is not supported yet */
314 SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
315 SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
316 SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
317 SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
323 arm_neon_blt (pixman_implementation_t *imp,
337 if (!pixman_blt_neon (
338 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
339 src_x, src_y, dst_x, dst_y, width, height))
342 return _pixman_implementation_blt (
344 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
345 src_x, src_y, dst_x, dst_y, width, height);
352 arm_neon_fill (pixman_implementation_t *imp,
362 if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
365 return _pixman_implementation_fill (
366 imp->delegate, bits, stride, bpp, x, y, width, height, xor);
369 #define BIND_COMBINE_U(name) \
371 pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
372 const uint32_t *dst, \
373 const uint32_t *src, \
374 const uint32_t *mask); \
377 pixman_composite_scanline_##name##_asm_neon (int32_t w, \
378 const uint32_t *dst, \
379 const uint32_t *src); \
382 neon_combine_##name##_u (pixman_implementation_t *imp, \
385 const uint32_t * src, \
386 const uint32_t * mask, \
390 pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
393 pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
396 BIND_COMBINE_U (over)
398 BIND_COMBINE_U (out_reverse)
400 pixman_implementation_t *
401 _pixman_implementation_create_arm_neon (void)
404 pixman_implementation_t *fallback = _pixman_implementation_create_arm_simd ();
406 pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
408 pixman_implementation_t *imp =
409 _pixman_implementation_create (fallback, arm_neon_fast_paths);
411 imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
412 imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
413 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
415 imp->blt = arm_neon_blt;
416 imp->fill = arm_neon_fill;