2 * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of ARM Ltd not be used in
9 * advertising or publicity pertaining to distribution of the software without
10 * specific, written prior permission. ARM Ltd makes no
11 * representations about the suitability of this software for any purpose. It
12 * is provided "as is" without express or implied warranty.
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
23 * Author: Ian Rickards (ian.rickards@arm.com)
24 * Author: Jonathan Morton (jonathan.morton@movial.com)
25 * Author: Markku Vire (markku.vire@movial.com)
34 #include "pixman-private.h"
36 #define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
38 pixman_composite_##name##_asm_neon (int32_t w, \
43 int32_t src_stride); \
46 neon_composite_##name (pixman_implementation_t *imp, \
48 pixman_image_t * src_image, \
49 pixman_image_t * mask_image, \
50 pixman_image_t * dst_image, \
62 int32_t dst_stride, src_stride; \
64 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
65 src_stride, src_line, src_cnt); \
66 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
67 dst_stride, dst_line, dst_cnt); \
69 pixman_composite_##name##_asm_neon (width, height, \
70 dst_line, dst_stride, \
71 src_line, src_stride); \
74 #define BIND_N_NULL_DST(name, dst_type, dst_cnt) \
76 pixman_composite_##name##_asm_neon (int32_t w, \
83 neon_composite_##name (pixman_implementation_t *imp, \
85 pixman_image_t * src_image, \
86 pixman_image_t * mask_image, \
87 pixman_image_t * dst_image, \
101 src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
106 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
107 dst_stride, dst_line, dst_cnt); \
109 pixman_composite_##name##_asm_neon (width, height, \
110 dst_line, dst_stride, \
114 #define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt) \
116 pixman_composite_##name##_asm_neon (int32_t w, \
119 int32_t dst_stride, \
123 int32_t mask_stride); \
126 neon_composite_##name (pixman_implementation_t *imp, \
128 pixman_image_t * src_image, \
129 pixman_image_t * mask_image, \
130 pixman_image_t * dst_image, \
140 dst_type *dst_line; \
141 mask_type *mask_line; \
142 int32_t dst_stride, mask_stride; \
145 src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
150 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
151 dst_stride, dst_line, dst_cnt); \
152 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
153 mask_stride, mask_line, mask_cnt); \
155 pixman_composite_##name##_asm_neon (width, height, \
156 dst_line, dst_stride, \
158 mask_line, mask_stride); \
161 #define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
163 pixman_composite_##name##_asm_neon (int32_t w, \
166 int32_t dst_stride, \
168 int32_t src_stride, \
172 neon_composite_##name (pixman_implementation_t *imp, \
174 pixman_image_t * src_image, \
175 pixman_image_t * mask_image, \
176 pixman_image_t * dst_image, \
186 dst_type *dst_line; \
187 src_type *src_line; \
188 int32_t dst_stride, src_stride; \
191 mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
196 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
197 dst_stride, dst_line, dst_cnt); \
198 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
199 src_stride, src_line, src_cnt); \
201 pixman_composite_##name##_asm_neon (width, height, \
202 dst_line, dst_stride, \
203 src_line, src_stride, \
207 #define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
210 pixman_composite_##name##_asm_neon (int32_t w, \
213 int32_t dst_stride, \
215 int32_t src_stride, \
217 int32_t mask_stride); \
220 neon_composite_##name (pixman_implementation_t *imp, \
222 pixman_image_t * src_image, \
223 pixman_image_t * mask_image, \
224 pixman_image_t * dst_image, \
234 dst_type *dst_line; \
235 src_type *src_line; \
236 mask_type *mask_line; \
237 int32_t dst_stride, src_stride, mask_stride; \
239 PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
240 dst_stride, dst_line, dst_cnt); \
241 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
242 src_stride, src_line, src_cnt); \
243 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
244 mask_stride, mask_line, mask_cnt); \
246 pixman_composite_##name##_asm_neon (width, height, \
247 dst_line, dst_stride, \
248 src_line, src_stride, \
249 mask_line, mask_stride); \
253 BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
254 BIND_SRC_NULL_DST(src_x888_8888, uint32_t, 1, uint32_t, 1)
255 BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
256 BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
257 BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
258 BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
259 BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
260 BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
261 BIND_SRC_NULL_DST(src_pixbuf_8888, uint32_t, 1, uint32_t, 1)
262 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
263 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
265 BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
266 BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
267 BIND_N_NULL_DST(over_reverse_n_8888, uint32_t, 1)
269 BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
270 BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
272 BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
273 BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
274 BIND_N_MASK_DST(over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1)
275 BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
277 BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
279 BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
280 BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
281 BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
282 BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
285 pixman_composite_src_n_8_asm_neon (int32_t w,
292 pixman_composite_src_n_0565_asm_neon (int32_t w,
299 pixman_composite_src_n_8888_asm_neon (int32_t w,
306 pixman_fill_neon (uint32_t *bits,
315 /* stride is always multiple of 32bit units in pixman */
316 uint32_t byte_stride = stride * sizeof(uint32_t);
321 pixman_composite_src_n_8_asm_neon (
324 (uint8_t *)(((char *) bits) + y * byte_stride + x),
329 pixman_composite_src_n_0565_asm_neon (
332 (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
337 pixman_composite_src_n_8888_asm_neon (
340 (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
350 pixman_blt_neon (uint32_t *src_bits,
363 if (src_bpp != dst_bpp)
369 pixman_composite_src_0565_0565_asm_neon (
371 (uint16_t *)(((char *) dst_bits) +
372 dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
373 (uint16_t *)(((char *) src_bits) +
374 src_y * src_stride * 4 + src_x * 2), src_stride * 2);
377 pixman_composite_src_8888_8888_asm_neon (
379 (uint32_t *)(((char *) dst_bits) +
380 dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
381 (uint32_t *)(((char *) src_bits) +
382 src_y * src_stride * 4 + src_x * 4), src_stride);
389 static const pixman_fast_path_t arm_neon_fast_paths[] =
391 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
392 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
393 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
394 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
395 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
396 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
397 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
398 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
399 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
400 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
401 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
402 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
403 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
404 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
405 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
406 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
407 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
408 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
409 PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
410 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
411 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
412 PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
413 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
414 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
415 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
416 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
417 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
418 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
419 PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
420 PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
421 PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
422 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
423 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
424 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
425 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
426 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
427 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
428 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
429 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
430 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
431 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
432 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
433 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
434 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
435 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
436 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
437 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
438 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
439 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
440 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
441 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
442 PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
443 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
444 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8000_8000),
445 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
446 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
447 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
448 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
454 arm_neon_blt (pixman_implementation_t *imp,
468 if (!pixman_blt_neon (
469 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
470 src_x, src_y, dst_x, dst_y, width, height))
473 return _pixman_implementation_blt (
475 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
476 src_x, src_y, dst_x, dst_y, width, height);
483 arm_neon_fill (pixman_implementation_t *imp,
493 if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
496 return _pixman_implementation_fill (
497 imp->delegate, bits, stride, bpp, x, y, width, height, xor);
500 #define BIND_COMBINE_U(name) \
502 pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
503 const uint32_t *dst, \
504 const uint32_t *src, \
505 const uint32_t *mask); \
508 pixman_composite_scanline_##name##_asm_neon (int32_t w, \
509 const uint32_t *dst, \
510 const uint32_t *src); \
513 neon_combine_##name##_u (pixman_implementation_t *imp, \
516 const uint32_t * src, \
517 const uint32_t * mask, \
521 pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
524 pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
527 BIND_COMBINE_U (over)
530 pixman_implementation_t *
531 _pixman_implementation_create_arm_neon (void)
533 pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
534 pixman_implementation_t *imp =
535 _pixman_implementation_create (general, arm_neon_fast_paths);
537 imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
538 imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
540 imp->blt = arm_neon_blt;
541 imp->fill = arm_neon_fill;