ARM: enabled 'neon_composite_add_8_8_8' fast path
[profile/ivi/pixman.git] / pixman / pixman-arm-neon.c
1 /*
2  * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
3  *
4  * Permission to use, copy, modify, distribute, and sell this software and its
5  * documentation for any purpose is hereby granted without fee, provided that
6  * the above copyright notice appear in all copies and that both that
7  * copyright notice and this permission notice appear in supporting
8  * documentation, and that the name of ARM Ltd not be used in
9  * advertising or publicity pertaining to distribution of the software without
10  * specific, written prior permission.  ARM Ltd makes no
11  * representations about the suitability of this software for any purpose.  It
12  * is provided "as is" without express or implied warranty.
13  *
14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21  * SOFTWARE.
22  *
23  * Author:  Ian Rickards (ian.rickards@arm.com)
24  * Author:  Jonathan Morton (jonathan.morton@movial.com)
25  * Author:  Markku Vire (markku.vire@movial.com)
26  *
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <string.h>
34 #include "pixman-private.h"
35
36 #define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt)   \
37 void                                                                    \
38 pixman_##name##_asm_neon (int32_t   w,                                  \
39                           int32_t   h,                                  \
40                           dst_type *dst,                                \
41                           int32_t   dst_stride,                         \
42                           src_type *src,                                \
43                           int32_t   src_stride);                        \
44                                                                         \
45 static void                                                             \
46 neon_##name (pixman_implementation_t *imp,                              \
47              pixman_op_t              op,                               \
48              pixman_image_t *         src_image,                        \
49              pixman_image_t *         mask_image,                       \
50              pixman_image_t *         dst_image,                        \
51              int32_t                  src_x,                            \
52              int32_t                  src_y,                            \
53              int32_t                  mask_x,                           \
54              int32_t                  mask_y,                           \
55              int32_t                  dest_x,                           \
56              int32_t                  dest_y,                           \
57              int32_t                  width,                            \
58              int32_t                  height)                           \
59 {                                                                       \
60     dst_type *dst_line;                                                 \
61     src_type *src_line;                                                 \
62     int32_t dst_stride, src_stride;                                     \
63                                                                         \
64     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
65                            src_stride, src_line, src_cnt);              \
66     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
67                            dst_stride, dst_line, dst_cnt);              \
68                                                                         \
69     pixman_##name##_asm_neon (width, height,                            \
70                               dst_line, dst_stride,                     \
71                               src_line, src_stride);                    \
72 }
73
74 #define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt)   \
75 void                                                                    \
76 pixman_##name##_asm_neon (int32_t    w,                                 \
77                           int32_t    h,                                 \
78                           dst_type  *dst,                               \
79                           int32_t    dst_stride,                        \
80                           uint32_t   src,                               \
81                           int32_t    unused,                            \
82                           mask_type *mask,                              \
83                           int32_t    mask_stride);                      \
84                                                                         \
85 static void                                                             \
86 neon_##name (pixman_implementation_t *imp,                              \
87              pixman_op_t              op,                               \
88              pixman_image_t *         src_image,                        \
89              pixman_image_t *         mask_image,                       \
90              pixman_image_t *         dst_image,                        \
91              int32_t                  src_x,                            \
92              int32_t                  src_y,                            \
93              int32_t                  mask_x,                           \
94              int32_t                  mask_y,                           \
95              int32_t                  dest_x,                           \
96              int32_t                  dest_y,                           \
97              int32_t                  width,                            \
98              int32_t                  height)                           \
99 {                                                                       \
100     dst_type  *dst_line;                                                \
101     mask_type *mask_line;                                               \
102     int32_t    dst_stride, mask_stride;                                 \
103     uint32_t   src;                                                     \
104                                                                         \
105     src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
106                                                                         \
107     if (src == 0)                                                       \
108         return;                                                         \
109                                                                         \
110     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
111                            dst_stride, dst_line, dst_cnt);              \
112     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
113                            mask_stride, mask_line, mask_cnt);           \
114                                                                         \
115     pixman_##name##_asm_neon (width, height,                            \
116                               dst_line, dst_stride,                     \
117                               src, 0,                                   \
118                               mask_line, mask_stride);                  \
119 }
120
121 #define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt)      \
122 void                                                                    \
123 pixman_##name##_asm_neon (int32_t    w,                                 \
124                           int32_t    h,                                 \
125                           dst_type  *dst,                               \
126                           int32_t    dst_stride,                        \
127                           src_type  *src,                               \
128                           int32_t    src_stride,                        \
129                           uint32_t   mask);                             \
130                                                                         \
131 static void                                                             \
132 neon_##name (pixman_implementation_t *imp,                              \
133              pixman_op_t              op,                               \
134              pixman_image_t *         src_image,                        \
135              pixman_image_t *         mask_image,                       \
136              pixman_image_t *         dst_image,                        \
137              int32_t                  src_x,                            \
138              int32_t                  src_y,                            \
139              int32_t                  mask_x,                           \
140              int32_t                  mask_y,                           \
141              int32_t                  dest_x,                           \
142              int32_t                  dest_y,                           \
143              int32_t                  width,                            \
144              int32_t                  height)                           \
145 {                                                                       \
146     dst_type  *dst_line;                                                \
147     src_type  *src_line;                                                \
148     int32_t    dst_stride, src_stride;                                  \
149     uint32_t   mask;                                                    \
150                                                                         \
151     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
152                                                                         \
153     if (mask == 0)                                                      \
154         return;                                                         \
155                                                                         \
156     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
157                            dst_stride, dst_line, dst_cnt);              \
158     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
159                            src_stride, src_line, src_cnt);              \
160                                                                         \
161     pixman_##name##_asm_neon (width, height,                            \
162                               dst_line, dst_stride,                     \
163                               src_line, src_stride,                     \
164                               mask);                                    \
165 }
166
167 #define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
168                           dst_type, dst_cnt)                            \
169 void                                                                    \
170 pixman_##name##_asm_neon (int32_t    w,                                 \
171                           int32_t    h,                                 \
172                           dst_type  *dst,                               \
173                           int32_t    dst_stride,                        \
174                           src_type  *src,                               \
175                           int32_t    src_stride,                        \
176                           mask_type *mask,                              \
177                           int32_t    mask_stride);                      \
178                                                                         \
179 static void                                                             \
180 neon_##name (pixman_implementation_t *imp,                              \
181              pixman_op_t              op,                               \
182              pixman_image_t *         src_image,                        \
183              pixman_image_t *         mask_image,                       \
184              pixman_image_t *         dst_image,                        \
185              int32_t                  src_x,                            \
186              int32_t                  src_y,                            \
187              int32_t                  mask_x,                           \
188              int32_t                  mask_y,                           \
189              int32_t                  dest_x,                           \
190              int32_t                  dest_y,                           \
191              int32_t                  width,                            \
192              int32_t                  height)                           \
193 {                                                                       \
194     dst_type  *dst_line;                                                \
195     src_type  *src_line;                                                \
196     mask_type *mask_line;                                               \
197     int32_t    dst_stride, src_stride, mask_stride;                     \
198                                                                         \
199     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
200                            dst_stride, dst_line, dst_cnt);              \
201     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
202                            src_stride, src_line, src_cnt);              \
203     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
204                            mask_stride, mask_line, mask_cnt);           \
205                                                                         \
206     pixman_##name##_asm_neon (width, height,                            \
207                               dst_line, dst_stride,                     \
208                               src_line, src_stride,                     \
209                               mask_line, mask_stride);                  \
210 }
211
212
213 BIND_SRC_NULL_DST(composite_src_8888_8888, uint32_t, 1, uint32_t, 1)
214 BIND_SRC_NULL_DST(composite_src_0565_0565, uint16_t, 1, uint16_t, 1)
215 BIND_SRC_NULL_DST(composite_src_0888_0888, uint8_t, 3, uint8_t, 3)
216 BIND_SRC_NULL_DST(composite_src_8888_0565, uint32_t, 1, uint16_t, 1)
217 BIND_SRC_NULL_DST(composite_add_8000_8000, uint8_t, 1, uint8_t, 1)
218
219 BIND_SRC_NULL_DST(composite_over_8888_0565, uint32_t, 1, uint16_t, 1)
220 BIND_SRC_NULL_DST(composite_over_8888_8888, uint32_t, 1, uint32_t, 1)
221
222 BIND_N_MASK_DST(composite_over_n_8_0565, uint8_t, 1, uint16_t, 1)
223 BIND_N_MASK_DST(composite_over_n_8_8888, uint8_t, 1, uint32_t, 1)
224 BIND_N_MASK_DST(composite_add_n_8_8, uint8_t, 1, uint8_t, 1)
225
226 BIND_SRC_N_DST(composite_over_8888_n_8888, uint32_t, 1, uint32_t, 1)
227
228 BIND_SRC_MASK_DST(composite_add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
229
230 void
231 pixman_composite_src_n_8_asm_neon (int32_t   w,
232                                    int32_t   h,
233                                    uint8_t  *dst,
234                                    int32_t   dst_stride,
235                                    uint8_t   src);
236
237 void
238 pixman_composite_src_n_0565_asm_neon (int32_t   w,
239                                       int32_t   h,
240                                       uint16_t *dst,
241                                       int32_t   dst_stride,
242                                       uint16_t  src);
243
244 void
245 pixman_composite_src_n_8888_asm_neon (int32_t   w,
246                                       int32_t   h,
247                                       uint32_t *dst,
248                                       int32_t   dst_stride,
249                                       uint32_t  src);
250
251 static pixman_bool_t
252 pixman_fill_neon (uint32_t *bits,
253                   int       stride,
254                   int       bpp,
255                   int       x,
256                   int       y,
257                   int       width,
258                   int       height,
259                   uint32_t  _xor)
260 {
261     /* stride is always multiple of 32bit units in pixman */
262     uint32_t byte_stride = stride * sizeof(uint32_t);
263
264     switch (bpp)
265     {
266     case 8:
267         pixman_composite_src_n_8_asm_neon (
268                 width,
269                 height,
270                 (uint8_t *)(((char *) bits) + y * byte_stride + x),
271                 byte_stride,
272                 _xor & 0xff);
273         return TRUE;
274     case 16:
275         pixman_composite_src_n_0565_asm_neon (
276                 width,
277                 height,
278                 (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
279                 byte_stride / 2,
280                 _xor & 0xffff);
281         return TRUE;
282     case 32:
283         pixman_composite_src_n_8888_asm_neon (
284                 width,
285                 height,
286                 (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
287                 byte_stride / 4,
288                 _xor);
289         return TRUE;
290     default:
291         return FALSE;
292     }
293 }
294
295 static const pixman_fast_path_t arm_neon_fast_path_array[] =
296 {
297     { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_0565_0565,    0 },
298     { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_0565_0565,    0 },
299     { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_8888_0565,    0 },
300     { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_8888_0565,    0 },
301     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_8888_0565,    0 },
302     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_8888_0565,    0 },
303     { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_8888_8888,    0 },
304     { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_8888_8888,    0 },
305     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888,    0 },
306     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888,    0 },
307     { PIXMAN_OP_SRC,  PIXMAN_r8g8b8,   PIXMAN_null,     PIXMAN_r8g8b8,   neon_composite_src_0888_0888,    0 },
308     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565,    0 },
309     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565,    0 },
310     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888,    0 },
311     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888,    0 },
312     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888,    0 },
313     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888,    0 },
314     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK },
315     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK },
316     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565,   0 },
317     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565,   0 },
318     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_8888_8888,   0 },
319     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_over_8888_8888,   0 },
320     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_over_8888_8888,   0 },
321     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_over_8888_8888,   0 },
322     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_n_8_8,        0 },
323     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8_8_8,        0 },
324     { PIXMAN_OP_NONE },
325 };
326
327 const pixman_fast_path_t *const arm_neon_fast_paths = arm_neon_fast_path_array;
328
329 static void
330 arm_neon_composite (pixman_implementation_t *imp,
331                     pixman_op_t              op,
332                     pixman_image_t *         src,
333                     pixman_image_t *         mask,
334                     pixman_image_t *         dest,
335                     int32_t                  src_x,
336                     int32_t                  src_y,
337                     int32_t                  mask_x,
338                     int32_t                  mask_y,
339                     int32_t                  dest_x,
340                     int32_t                  dest_y,
341                     int32_t                  width,
342                     int32_t                  height)
343 {
344     if (_pixman_run_fast_path (arm_neon_fast_paths, imp,
345                                op, src, mask, dest,
346                                src_x, src_y,
347                                mask_x, mask_y,
348                                dest_x, dest_y,
349                                width, height))
350     {
351         return;
352     }
353
354     _pixman_implementation_composite (imp->delegate, op,
355                                       src, mask, dest,
356                                       src_x, src_y,
357                                       mask_x, mask_y,
358                                       dest_x, dest_y,
359                                       width, height);
360 }
361
362 static pixman_bool_t
363 arm_neon_fill (pixman_implementation_t *imp,
364                uint32_t *               bits,
365                int                      stride,
366                int                      bpp,
367                int                      x,
368                int                      y,
369                int                      width,
370                int                      height,
371                uint32_t xor)
372 {
373     if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
374         return TRUE;
375
376     return _pixman_implementation_fill (
377         imp->delegate, bits, stride, bpp, x, y, width, height, xor);
378 }
379
380 pixman_implementation_t *
381 _pixman_implementation_create_arm_neon (void)
382 {
383     pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
384     pixman_implementation_t *imp = _pixman_implementation_create (general);
385
386     imp->composite = arm_neon_composite;
387     imp->fill = arm_neon_fill;
388
389     return imp;
390 }