ARM: Helper ARM NEON assembly binding macros moved into a separate header
[profile/ivi/pixman.git] / pixman / pixman-arm-neon.c
1 /*
2  * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
3  *
4  * Permission to use, copy, modify, distribute, and sell this software and its
5  * documentation for any purpose is hereby granted without fee, provided that
6  * the above copyright notice appear in all copies and that both that
7  * copyright notice and this permission notice appear in supporting
8  * documentation, and that the name of ARM Ltd not be used in
9  * advertising or publicity pertaining to distribution of the software without
10  * specific, written prior permission.  ARM Ltd makes no
11  * representations about the suitability of this software for any purpose.  It
12  * is provided "as is" without express or implied warranty.
13  *
14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21  * SOFTWARE.
22  *
23  * Author:  Ian Rickards (ian.rickards@arm.com)
24  * Author:  Jonathan Morton (jonathan.morton@movial.com)
25  * Author:  Markku Vire (markku.vire@movial.com)
26  *
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <string.h>
34 #include "pixman-private.h"
35 #include "pixman-arm-common.h"
36
37 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
38                                    uint32_t, 1, uint32_t, 1)
39 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
40                                    uint32_t, 1, uint32_t, 1)
41 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
42                                    uint16_t, 1, uint16_t, 1)
43 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
44                                    uint8_t, 3, uint8_t, 3)
45 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
46                                    uint32_t, 1, uint16_t, 1)
47 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
48                                    uint16_t, 1, uint32_t, 1)
49 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
50                                    uint8_t, 3, uint32_t, 1)
51 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
52                                    uint8_t, 3, uint16_t, 1)
53 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
54                                    uint32_t, 1, uint32_t, 1)
55 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8000_8000,
56                                    uint8_t, 1, uint8_t, 1)
57 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
58                                    uint32_t, 1, uint32_t, 1)
59 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
60                                    uint32_t, 1, uint16_t, 1)
61 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
62                                    uint32_t, 1, uint32_t, 1)
63
64 PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
65                                  uint16_t, 1)
66 PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
67                                  uint32_t, 1)
68 PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
69                                  uint32_t, 1)
70
71 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
72                                       uint8_t, 1, uint16_t, 1)
73 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
74                                       uint8_t, 1, uint32_t, 1)
75 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
76                                       uint32_t, 1, uint32_t, 1)
77 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
78                                       uint8_t, 1, uint8_t, 1)
79
80 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
81                                      uint32_t, 1, uint32_t, 1)
82
83 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
84                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
85 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
86                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
87 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
88                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
89 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
90                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
91
92 void
93 pixman_composite_src_n_8_asm_neon (int32_t   w,
94                                    int32_t   h,
95                                    uint8_t  *dst,
96                                    int32_t   dst_stride,
97                                    uint8_t   src);
98
99 void
100 pixman_composite_src_n_0565_asm_neon (int32_t   w,
101                                       int32_t   h,
102                                       uint16_t *dst,
103                                       int32_t   dst_stride,
104                                       uint16_t  src);
105
106 void
107 pixman_composite_src_n_8888_asm_neon (int32_t   w,
108                                       int32_t   h,
109                                       uint32_t *dst,
110                                       int32_t   dst_stride,
111                                       uint32_t  src);
112
113 static pixman_bool_t
114 pixman_fill_neon (uint32_t *bits,
115                   int       stride,
116                   int       bpp,
117                   int       x,
118                   int       y,
119                   int       width,
120                   int       height,
121                   uint32_t  _xor)
122 {
123     /* stride is always multiple of 32bit units in pixman */
124     uint32_t byte_stride = stride * sizeof(uint32_t);
125
126     switch (bpp)
127     {
128     case 8:
129         pixman_composite_src_n_8_asm_neon (
130                 width,
131                 height,
132                 (uint8_t *)(((char *) bits) + y * byte_stride + x),
133                 byte_stride,
134                 _xor & 0xff);
135         return TRUE;
136     case 16:
137         pixman_composite_src_n_0565_asm_neon (
138                 width,
139                 height,
140                 (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
141                 byte_stride / 2,
142                 _xor & 0xffff);
143         return TRUE;
144     case 32:
145         pixman_composite_src_n_8888_asm_neon (
146                 width,
147                 height,
148                 (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
149                 byte_stride / 4,
150                 _xor);
151         return TRUE;
152     default:
153         return FALSE;
154     }
155 }
156
157 static pixman_bool_t
158 pixman_blt_neon (uint32_t *src_bits,
159                  uint32_t *dst_bits,
160                  int       src_stride,
161                  int       dst_stride,
162                  int       src_bpp,
163                  int       dst_bpp,
164                  int       src_x,
165                  int       src_y,
166                  int       dst_x,
167                  int       dst_y,
168                  int       width,
169                  int       height)
170 {
171     if (src_bpp != dst_bpp)
172         return FALSE;
173
174     switch (src_bpp)
175     {
176     case 16:
177         pixman_composite_src_0565_0565_asm_neon (
178                 width, height,
179                 (uint16_t *)(((char *) dst_bits) +
180                 dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
181                 (uint16_t *)(((char *) src_bits) +
182                 src_y * src_stride * 4 + src_x * 2), src_stride * 2);
183         return TRUE;
184     case 32:
185         pixman_composite_src_8888_8888_asm_neon (
186                 width, height,
187                 (uint32_t *)(((char *) dst_bits) +
188                 dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
189                 (uint32_t *)(((char *) src_bits) +
190                 src_y * src_stride * 4 + src_x * 4), src_stride);
191         return TRUE;
192     default:
193         return FALSE;
194     }
195 }
196
197 static const pixman_fast_path_t arm_neon_fast_paths[] =
198 {
199     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
200     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
201     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
202     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
203     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
204     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
205     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     a8r8g8b8, neon_composite_src_0565_8888),
206     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     x8r8g8b8, neon_composite_src_0565_8888),
207     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     a8b8g8r8, neon_composite_src_0565_8888),
208     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     x8b8g8r8, neon_composite_src_0565_8888),
209     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
210     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
211     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
212     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
213     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
214     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
215     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
216     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
217     PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
218     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
219     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
220     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
221     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
222     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
223     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
224     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
225     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
226     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
227     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
228     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
229     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
230     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
231     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
232     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
233     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
234     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
235     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
236     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
237     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
238     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
239     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
240     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
241     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
242     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
243     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
244     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
245     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
246     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
247     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
248     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
249     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
250     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
251     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
252     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8000_8000),
253     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
254     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
255     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
256     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
257
258     { PIXMAN_OP_NONE },
259 };
260
261 static pixman_bool_t
262 arm_neon_blt (pixman_implementation_t *imp,
263               uint32_t *               src_bits,
264               uint32_t *               dst_bits,
265               int                      src_stride,
266               int                      dst_stride,
267               int                      src_bpp,
268               int                      dst_bpp,
269               int                      src_x,
270               int                      src_y,
271               int                      dst_x,
272               int                      dst_y,
273               int                      width,
274               int                      height)
275 {
276     if (!pixman_blt_neon (
277             src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
278             src_x, src_y, dst_x, dst_y, width, height))
279
280     {
281         return _pixman_implementation_blt (
282             imp->delegate,
283             src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
284             src_x, src_y, dst_x, dst_y, width, height);
285     }
286
287     return TRUE;
288 }
289
290 static pixman_bool_t
291 arm_neon_fill (pixman_implementation_t *imp,
292                uint32_t *               bits,
293                int                      stride,
294                int                      bpp,
295                int                      x,
296                int                      y,
297                int                      width,
298                int                      height,
299                uint32_t xor)
300 {
301     if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
302         return TRUE;
303
304     return _pixman_implementation_fill (
305         imp->delegate, bits, stride, bpp, x, y, width, height, xor);
306 }
307
308 #define BIND_COMBINE_U(name)                                             \
309 void                                                                     \
310 pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
311                                                   const uint32_t *dst,   \
312                                                   const uint32_t *src,   \
313                                                   const uint32_t *mask); \
314                                                                          \
315 void                                                                     \
316 pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
317                                              const uint32_t *dst,        \
318                                              const uint32_t *src);       \
319                                                                          \
320 static void                                                              \
321 neon_combine_##name##_u (pixman_implementation_t *imp,                   \
322                          pixman_op_t              op,                    \
323                          uint32_t *               dest,                  \
324                          const uint32_t *         src,                   \
325                          const uint32_t *         mask,                  \
326                          int                      width)                 \
327 {                                                                        \
328     if (mask)                                                            \
329         pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
330                                                           src, mask);    \
331     else                                                                 \
332         pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
333 }
334
335 BIND_COMBINE_U (over)
336 BIND_COMBINE_U (add)
337
338 pixman_implementation_t *
339 _pixman_implementation_create_arm_neon (void)
340 {
341     pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
342     pixman_implementation_t *imp =
343         _pixman_implementation_create (general, arm_neon_fast_paths);
344
345     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
346     imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
347
348     imp->blt = arm_neon_blt;
349     imp->fill = arm_neon_fill;
350
351     return imp;
352 }