ARM: NEON optimization for nearest scaled over_8888_8_0565
[profile/ivi/pixman.git] / pixman / pixman-arm-neon.c
1 /*
2  * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
3  *
4  * Permission to use, copy, modify, distribute, and sell this software and its
5  * documentation for any purpose is hereby granted without fee, provided that
6  * the above copyright notice appear in all copies and that both that
7  * copyright notice and this permission notice appear in supporting
8  * documentation, and that the name of ARM Ltd not be used in
9  * advertising or publicity pertaining to distribution of the software without
10  * specific, written prior permission.  ARM Ltd makes no
11  * representations about the suitability of this software for any purpose.  It
12  * is provided "as is" without express or implied warranty.
13  *
14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21  * SOFTWARE.
22  *
23  * Author:  Ian Rickards (ian.rickards@arm.com)
24  * Author:  Jonathan Morton (jonathan.morton@movial.com)
25  * Author:  Markku Vire (markku.vire@movial.com)
26  *
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <string.h>
34 #include "pixman-private.h"
35 #include "pixman-arm-common.h"
36
37 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
38                                    uint32_t, 1, uint32_t, 1)
39 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
40                                    uint32_t, 1, uint32_t, 1)
41 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
42                                    uint16_t, 1, uint16_t, 1)
43 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
44                                    uint8_t, 3, uint8_t, 3)
45 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
46                                    uint32_t, 1, uint16_t, 1)
47 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
48                                    uint16_t, 1, uint32_t, 1)
49 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
50                                    uint8_t, 3, uint32_t, 1)
51 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
52                                    uint8_t, 3, uint16_t, 1)
53 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
54                                    uint32_t, 1, uint32_t, 1)
55 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
56                                    uint32_t, 1, uint32_t, 1)
57 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
58                                    uint8_t, 1, uint8_t, 1)
59 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
60                                    uint32_t, 1, uint32_t, 1)
61 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
62                                    uint32_t, 1, uint16_t, 1)
63 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
64                                    uint32_t, 1, uint32_t, 1)
65 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
66                                    uint8_t, 1, uint16_t, 1)
67
68 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
69                                  uint16_t, 1)
70 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
71                                  uint32_t, 1)
72 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
73                                  uint32_t, 1)
74 PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
75                                  uint8_t, 1)
76
77 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
78                                       uint8_t, 1, uint16_t, 1)
79 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
80                                       uint8_t, 1, uint32_t, 1)
81 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
82                                       uint32_t, 1, uint32_t, 1)
83 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
84                                       uint8_t, 1, uint8_t, 1)
85 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
86                                       uint8_t, 1, uint8_t, 1)
87 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
88                                       uint8_t, 1, uint32_t, 1)
89
90 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
91                                      uint32_t, 1, uint32_t, 1)
92 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
93                                      uint32_t, 1, uint16_t, 1)
94 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
95                                      uint16_t, 1, uint16_t, 1)
96 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
97                                      uint32_t, 1, uint32_t, 1)
98
99 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
100                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
101 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
102                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
103 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
104                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
105 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
106                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
107 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
108                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
109 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
110                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
111 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
112                                         uint32_t, 1, uint8_t, 1, uint16_t, 1)
113 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
114                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
115
116 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
117                                         uint32_t, uint32_t)
118 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
119                                         uint32_t, uint16_t)
120 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
121                                         uint32_t, uint16_t)
122 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
123                                         uint16_t, uint32_t)
124
125 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
126                                            OVER, uint32_t, uint16_t)
127
128 void
129 pixman_composite_src_n_8_asm_neon (int32_t   w,
130                                    int32_t   h,
131                                    uint8_t  *dst,
132                                    int32_t   dst_stride,
133                                    uint8_t   src);
134
135 void
136 pixman_composite_src_n_0565_asm_neon (int32_t   w,
137                                       int32_t   h,
138                                       uint16_t *dst,
139                                       int32_t   dst_stride,
140                                       uint16_t  src);
141
142 void
143 pixman_composite_src_n_8888_asm_neon (int32_t   w,
144                                       int32_t   h,
145                                       uint32_t *dst,
146                                       int32_t   dst_stride,
147                                       uint32_t  src);
148
149 static pixman_bool_t
150 pixman_fill_neon (uint32_t *bits,
151                   int       stride,
152                   int       bpp,
153                   int       x,
154                   int       y,
155                   int       width,
156                   int       height,
157                   uint32_t  _xor)
158 {
159     /* stride is always multiple of 32bit units in pixman */
160     uint32_t byte_stride = stride * sizeof(uint32_t);
161
162     switch (bpp)
163     {
164     case 8:
165         pixman_composite_src_n_8_asm_neon (
166                 width,
167                 height,
168                 (uint8_t *)(((char *) bits) + y * byte_stride + x),
169                 byte_stride,
170                 _xor & 0xff);
171         return TRUE;
172     case 16:
173         pixman_composite_src_n_0565_asm_neon (
174                 width,
175                 height,
176                 (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
177                 byte_stride / 2,
178                 _xor & 0xffff);
179         return TRUE;
180     case 32:
181         pixman_composite_src_n_8888_asm_neon (
182                 width,
183                 height,
184                 (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
185                 byte_stride / 4,
186                 _xor);
187         return TRUE;
188     default:
189         return FALSE;
190     }
191 }
192
193 static pixman_bool_t
194 pixman_blt_neon (uint32_t *src_bits,
195                  uint32_t *dst_bits,
196                  int       src_stride,
197                  int       dst_stride,
198                  int       src_bpp,
199                  int       dst_bpp,
200                  int       src_x,
201                  int       src_y,
202                  int       dst_x,
203                  int       dst_y,
204                  int       width,
205                  int       height)
206 {
207     if (src_bpp != dst_bpp)
208         return FALSE;
209
210     switch (src_bpp)
211     {
212     case 16:
213         pixman_composite_src_0565_0565_asm_neon (
214                 width, height,
215                 (uint16_t *)(((char *) dst_bits) +
216                 dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
217                 (uint16_t *)(((char *) src_bits) +
218                 src_y * src_stride * 4 + src_x * 2), src_stride * 2);
219         return TRUE;
220     case 32:
221         pixman_composite_src_8888_8888_asm_neon (
222                 width, height,
223                 (uint32_t *)(((char *) dst_bits) +
224                 dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
225                 (uint32_t *)(((char *) src_bits) +
226                 src_y * src_stride * 4 + src_x * 4), src_stride);
227         return TRUE;
228     default:
229         return FALSE;
230     }
231 }
232
233 static const pixman_fast_path_t arm_neon_fast_paths[] =
234 {
235     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
236     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
237     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
238     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
239     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
240     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
241     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     a8r8g8b8, neon_composite_src_0565_8888),
242     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     x8r8g8b8, neon_composite_src_0565_8888),
243     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     a8b8g8r8, neon_composite_src_0565_8888),
244     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     x8b8g8r8, neon_composite_src_0565_8888),
245     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
246     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
247     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
248     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
249     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
250     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
251     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
252     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
253     PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
254     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
255     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
256     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
257     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8b8g8r8, neon_composite_src_rpixbuf_8888),
258     PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8r8g8b8, neon_composite_src_rpixbuf_8888),
259     PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8b8g8r8, neon_composite_src_pixbuf_8888),
260     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
261     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
262     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
263     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
264     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
265     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
266     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
267     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
268     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
269     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
270     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
271     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
272     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
273     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
274     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
275     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
276     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
277     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
278     PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
279     PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
280     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
281     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
282     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
283     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
284     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
285     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
286     PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
287     PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
288     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
289     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
290     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
291     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
292     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
293     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
294     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
295     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
296     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
297     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
298     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
299     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
300     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
301     PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
302     PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
303     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
304     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
305     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
306     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
307     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
308     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
309     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
310     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
311     PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
312     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
313     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
314     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
315     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, b5g6r5,   neon_composite_out_reverse_8_0565),
316
317     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
318     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
319     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
320     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
321
322     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
323     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
324
325     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
326     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
327     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
328     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
329
330     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
331     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
332     /* Note: NONE repeat is not supported yet */
333     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
334     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
335     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
336     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
337
338     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
339     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
340
341     { PIXMAN_OP_NONE },
342 };
343
344 static pixman_bool_t
345 arm_neon_blt (pixman_implementation_t *imp,
346               uint32_t *               src_bits,
347               uint32_t *               dst_bits,
348               int                      src_stride,
349               int                      dst_stride,
350               int                      src_bpp,
351               int                      dst_bpp,
352               int                      src_x,
353               int                      src_y,
354               int                      dst_x,
355               int                      dst_y,
356               int                      width,
357               int                      height)
358 {
359     if (!pixman_blt_neon (
360             src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
361             src_x, src_y, dst_x, dst_y, width, height))
362
363     {
364         return _pixman_implementation_blt (
365             imp->delegate,
366             src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
367             src_x, src_y, dst_x, dst_y, width, height);
368     }
369
370     return TRUE;
371 }
372
373 static pixman_bool_t
374 arm_neon_fill (pixman_implementation_t *imp,
375                uint32_t *               bits,
376                int                      stride,
377                int                      bpp,
378                int                      x,
379                int                      y,
380                int                      width,
381                int                      height,
382                uint32_t xor)
383 {
384     if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
385         return TRUE;
386
387     return _pixman_implementation_fill (
388         imp->delegate, bits, stride, bpp, x, y, width, height, xor);
389 }
390
391 #define BIND_COMBINE_U(name)                                             \
392 void                                                                     \
393 pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
394                                                   const uint32_t *dst,   \
395                                                   const uint32_t *src,   \
396                                                   const uint32_t *mask); \
397                                                                          \
398 void                                                                     \
399 pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
400                                              const uint32_t *dst,        \
401                                              const uint32_t *src);       \
402                                                                          \
403 static void                                                              \
404 neon_combine_##name##_u (pixman_implementation_t *imp,                   \
405                          pixman_op_t              op,                    \
406                          uint32_t *               dest,                  \
407                          const uint32_t *         src,                   \
408                          const uint32_t *         mask,                  \
409                          int                      width)                 \
410 {                                                                        \
411     if (mask)                                                            \
412         pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
413                                                           src, mask);    \
414     else                                                                 \
415         pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
416 }
417
418 BIND_COMBINE_U (over)
419 BIND_COMBINE_U (add)
420 BIND_COMBINE_U (out_reverse)
421
422 pixman_implementation_t *
423 _pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
424 {
425     pixman_implementation_t *imp =
426         _pixman_implementation_create (fallback, arm_neon_fast_paths);
427
428     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
429     imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
430     imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
431
432     imp->blt = arm_neon_blt;
433     imp->fill = arm_neon_fill;
434
435     return imp;
436 }