ARM: added 'neon_composite_add_8888_8888_8888' fast path
[profile/ivi/pixman.git] / pixman / pixman-arm-neon.c
1 /*
2  * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
3  *
4  * Permission to use, copy, modify, distribute, and sell this software and its
5  * documentation for any purpose is hereby granted without fee, provided that
6  * the above copyright notice appear in all copies and that both that
7  * copyright notice and this permission notice appear in supporting
8  * documentation, and that the name of ARM Ltd not be used in
9  * advertising or publicity pertaining to distribution of the software without
10  * specific, written prior permission.  ARM Ltd makes no
11  * representations about the suitability of this software for any purpose.  It
12  * is provided "as is" without express or implied warranty.
13  *
14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21  * SOFTWARE.
22  *
23  * Author:  Ian Rickards (ian.rickards@arm.com)
24  * Author:  Jonathan Morton (jonathan.morton@movial.com)
25  * Author:  Markku Vire (markku.vire@movial.com)
26  *
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <string.h>
34 #include "pixman-private.h"
35
36 #define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt)   \
37 void                                                                    \
38 pixman_composite_##name##_asm_neon (int32_t   w,                        \
39                                     int32_t   h,                        \
40                                     dst_type *dst,                      \
41                                     int32_t   dst_stride,               \
42                                     src_type *src,                      \
43                                     int32_t   src_stride);              \
44                                                                         \
45 static void                                                             \
46 neon_composite_##name (pixman_implementation_t *imp,                    \
47                        pixman_op_t              op,                     \
48                        pixman_image_t *         src_image,              \
49                        pixman_image_t *         mask_image,             \
50                        pixman_image_t *         dst_image,              \
51                        int32_t                  src_x,                  \
52                        int32_t                  src_y,                  \
53                        int32_t                  mask_x,                 \
54                        int32_t                  mask_y,                 \
55                        int32_t                  dest_x,                 \
56                        int32_t                  dest_y,                 \
57                        int32_t                  width,                  \
58                        int32_t                  height)                 \
59 {                                                                       \
60     dst_type *dst_line;                                                 \
61     src_type *src_line;                                                 \
62     int32_t dst_stride, src_stride;                                     \
63                                                                         \
64     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
65                            src_stride, src_line, src_cnt);              \
66     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
67                            dst_stride, dst_line, dst_cnt);              \
68                                                                         \
69     pixman_composite_##name##_asm_neon (width, height,                  \
70                                         dst_line, dst_stride,           \
71                                         src_line, src_stride);          \
72 }
73
74 #define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt)   \
75 void                                                                    \
76 pixman_composite_##name##_asm_neon (int32_t    w,                       \
77                                     int32_t    h,                       \
78                                     dst_type  *dst,                     \
79                                     int32_t    dst_stride,              \
80                                     uint32_t   src,                     \
81                                     int32_t    unused,                  \
82                                     mask_type *mask,                    \
83                                     int32_t    mask_stride);            \
84                                                                         \
85 static void                                                             \
86 neon_composite_##name (pixman_implementation_t *imp,                    \
87                        pixman_op_t              op,                     \
88                        pixman_image_t *         src_image,              \
89                        pixman_image_t *         mask_image,             \
90                        pixman_image_t *         dst_image,              \
91                        int32_t                  src_x,                  \
92                        int32_t                  src_y,                  \
93                        int32_t                  mask_x,                 \
94                        int32_t                  mask_y,                 \
95                        int32_t                  dest_x,                 \
96                        int32_t                  dest_y,                 \
97                        int32_t                  width,                  \
98                        int32_t                  height)                 \
99 {                                                                       \
100     dst_type  *dst_line;                                                \
101     mask_type *mask_line;                                               \
102     int32_t    dst_stride, mask_stride;                                 \
103     uint32_t   src;                                                     \
104                                                                         \
105     src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
106                                                                         \
107     if (src == 0)                                                       \
108         return;                                                         \
109                                                                         \
110     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
111                            dst_stride, dst_line, dst_cnt);              \
112     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
113                            mask_stride, mask_line, mask_cnt);           \
114                                                                         \
115     pixman_composite_##name##_asm_neon (width, height,                  \
116                                         dst_line, dst_stride,           \
117                                         src, 0,                         \
118                                         mask_line, mask_stride);        \
119 }
120
121 #define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt)      \
122 void                                                                    \
123 pixman_composite_##name##_asm_neon (int32_t    w,                       \
124                                     int32_t    h,                       \
125                                     dst_type  *dst,                     \
126                                     int32_t    dst_stride,              \
127                                     src_type  *src,                     \
128                                     int32_t    src_stride,              \
129                                     uint32_t   mask);                   \
130                                                                         \
131 static void                                                             \
132 neon_composite_##name (pixman_implementation_t *imp,                    \
133                        pixman_op_t              op,                     \
134                        pixman_image_t *         src_image,              \
135                        pixman_image_t *         mask_image,             \
136                        pixman_image_t *         dst_image,              \
137                        int32_t                  src_x,                  \
138                        int32_t                  src_y,                  \
139                        int32_t                  mask_x,                 \
140                        int32_t                  mask_y,                 \
141                        int32_t                  dest_x,                 \
142                        int32_t                  dest_y,                 \
143                        int32_t                  width,                  \
144                        int32_t                  height)                 \
145 {                                                                       \
146     dst_type  *dst_line;                                                \
147     src_type  *src_line;                                                \
148     int32_t    dst_stride, src_stride;                                  \
149     uint32_t   mask;                                                    \
150                                                                         \
151     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
152                                                                         \
153     if (mask == 0)                                                      \
154         return;                                                         \
155                                                                         \
156     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
157                            dst_stride, dst_line, dst_cnt);              \
158     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
159                            src_stride, src_line, src_cnt);              \
160                                                                         \
161     pixman_composite_##name##_asm_neon (width, height,                  \
162                                         dst_line, dst_stride,           \
163                                         src_line, src_stride,           \
164                                         mask);                          \
165 }
166
167 #define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
168                           dst_type, dst_cnt)                            \
169 void                                                                    \
170 pixman_composite_##name##_asm_neon (int32_t    w,                       \
171                                     int32_t    h,                       \
172                                     dst_type  *dst,                     \
173                                     int32_t    dst_stride,              \
174                                     src_type  *src,                     \
175                                     int32_t    src_stride,              \
176                                     mask_type *mask,                    \
177                                     int32_t    mask_stride);            \
178                                                                         \
179 static void                                                             \
180 neon_composite_##name (pixman_implementation_t *imp,                    \
181                        pixman_op_t              op,                     \
182                        pixman_image_t *         src_image,              \
183                        pixman_image_t *         mask_image,             \
184                        pixman_image_t *         dst_image,              \
185                        int32_t                  src_x,                  \
186                        int32_t                  src_y,                  \
187                        int32_t                  mask_x,                 \
188                        int32_t                  mask_y,                 \
189                        int32_t                  dest_x,                 \
190                        int32_t                  dest_y,                 \
191                        int32_t                  width,                  \
192                        int32_t                  height)                 \
193 {                                                                       \
194     dst_type  *dst_line;                                                \
195     src_type  *src_line;                                                \
196     mask_type *mask_line;                                               \
197     int32_t    dst_stride, src_stride, mask_stride;                     \
198                                                                         \
199     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
200                            dst_stride, dst_line, dst_cnt);              \
201     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
202                            src_stride, src_line, src_cnt);              \
203     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
204                            mask_stride, mask_line, mask_cnt);           \
205                                                                         \
206     pixman_composite_##name##_asm_neon (width, height,                  \
207                                         dst_line, dst_stride,           \
208                                         src_line, src_stride,           \
209                                         mask_line, mask_stride);        \
210 }
211
212
213 BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
214 BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
215 BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
216 BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
217 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
218 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
219
220 BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
221 BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
222
223 BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
224 BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
225 BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
226
227 BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
228
229 BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
230 BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
231 BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
232 BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
233
234 void
235 pixman_composite_src_n_8_asm_neon (int32_t   w,
236                                    int32_t   h,
237                                    uint8_t  *dst,
238                                    int32_t   dst_stride,
239                                    uint8_t   src);
240
241 void
242 pixman_composite_src_n_0565_asm_neon (int32_t   w,
243                                       int32_t   h,
244                                       uint16_t *dst,
245                                       int32_t   dst_stride,
246                                       uint16_t  src);
247
248 void
249 pixman_composite_src_n_8888_asm_neon (int32_t   w,
250                                       int32_t   h,
251                                       uint32_t *dst,
252                                       int32_t   dst_stride,
253                                       uint32_t  src);
254
255 static pixman_bool_t
256 pixman_fill_neon (uint32_t *bits,
257                   int       stride,
258                   int       bpp,
259                   int       x,
260                   int       y,
261                   int       width,
262                   int       height,
263                   uint32_t  _xor)
264 {
265     /* stride is always multiple of 32bit units in pixman */
266     uint32_t byte_stride = stride * sizeof(uint32_t);
267
268     switch (bpp)
269     {
270     case 8:
271         pixman_composite_src_n_8_asm_neon (
272                 width,
273                 height,
274                 (uint8_t *)(((char *) bits) + y * byte_stride + x),
275                 byte_stride,
276                 _xor & 0xff);
277         return TRUE;
278     case 16:
279         pixman_composite_src_n_0565_asm_neon (
280                 width,
281                 height,
282                 (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
283                 byte_stride / 2,
284                 _xor & 0xffff);
285         return TRUE;
286     case 32:
287         pixman_composite_src_n_8888_asm_neon (
288                 width,
289                 height,
290                 (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
291                 byte_stride / 4,
292                 _xor);
293         return TRUE;
294     default:
295         return FALSE;
296     }
297 }
298
299 static pixman_bool_t
300 pixman_blt_neon (uint32_t *src_bits,
301                  uint32_t *dst_bits,
302                  int       src_stride,
303                  int       dst_stride,
304                  int       src_bpp,
305                  int       dst_bpp,
306                  int       src_x,
307                  int       src_y,
308                  int       dst_x,
309                  int       dst_y,
310                  int       width,
311                  int       height)
312 {
313     if (src_bpp != dst_bpp)
314         return FALSE;
315
316     switch (src_bpp)
317     {
318     case 16:
319         pixman_composite_src_0565_0565_asm_neon (
320                 width, height,
321                 (uint16_t *)(((char *) dst_bits) +
322                 dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
323                 (uint16_t *)(((char *) src_bits) +
324                 src_y * src_stride * 4 + src_x * 2), src_stride * 2);
325         return TRUE;
326     case 32:
327         pixman_composite_src_8888_8888_asm_neon (
328                 width, height,
329                 (uint32_t *)(((char *) dst_bits) +
330                 dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
331                 (uint32_t *)(((char *) src_bits) +
332                 src_y * src_stride * 4 + src_x * 4), src_stride);
333         return TRUE;
334     default:
335         return FALSE;
336     }
337 }
338
339 static const pixman_fast_path_t arm_neon_fast_path_array[] =
340 {
341     { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_0565_0565    },
342     { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_0565_0565    },
343     { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_8888_0565    },
344     { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_8888_0565    },
345     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_8888_0565    },
346     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_8888_0565    },
347     { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_8888_8888    },
348     { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_8888_8888    },
349     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888    },
350     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888    },
351     { PIXMAN_OP_SRC,  PIXMAN_r8g8b8,   PIXMAN_null,     PIXMAN_r8g8b8,   neon_composite_src_0888_0888    },
352     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565    },
353     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565    },
354     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888    },
355     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888    },
356     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888    },
357     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888    },
358     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 },
359     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 },
360     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },
361     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_8888_8_8888 },
362     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_8888_8_8888 },
363     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_8888_8_8888 },
364     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888_8888 },
365     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565   },
366     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565   },
367     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_8888_8888   },
368     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_over_8888_8888   },
369     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_over_8888_8888   },
370     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_over_8888_8888   },
371     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_n_8_8        },
372     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8_8_8        },
373     { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888_8888 },
374     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       neon_composite_add_8000_8000    },
375     { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_add_8888_8888    },
376     { PIXMAN_OP_ADD,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_add_8888_8888    },
377     { PIXMAN_OP_NONE },
378 };
379
380 const pixman_fast_path_t *const arm_neon_fast_paths = arm_neon_fast_path_array;
381
382 static void
383 arm_neon_composite (pixman_implementation_t *imp,
384                     pixman_op_t              op,
385                     pixman_image_t *         src,
386                     pixman_image_t *         mask,
387                     pixman_image_t *         dest,
388                     int32_t                  src_x,
389                     int32_t                  src_y,
390                     int32_t                  mask_x,
391                     int32_t                  mask_y,
392                     int32_t                  dest_x,
393                     int32_t                  dest_y,
394                     int32_t                  width,
395                     int32_t                  height)
396 {
397     if (_pixman_run_fast_path (arm_neon_fast_paths, imp,
398                                op, src, mask, dest,
399                                src_x, src_y,
400                                mask_x, mask_y,
401                                dest_x, dest_y,
402                                width, height))
403     {
404         return;
405     }
406
407     _pixman_implementation_composite (imp->delegate, op,
408                                       src, mask, dest,
409                                       src_x, src_y,
410                                       mask_x, mask_y,
411                                       dest_x, dest_y,
412                                       width, height);
413 }
414
415 static pixman_bool_t
416 arm_neon_blt (pixman_implementation_t *imp,
417               uint32_t *               src_bits,
418               uint32_t *               dst_bits,
419               int                      src_stride,
420               int                      dst_stride,
421               int                      src_bpp,
422               int                      dst_bpp,
423               int                      src_x,
424               int                      src_y,
425               int                      dst_x,
426               int                      dst_y,
427               int                      width,
428               int                      height)
429 {
430     if (!pixman_blt_neon (
431             src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
432             src_x, src_y, dst_x, dst_y, width, height))
433
434     {
435         return _pixman_implementation_blt (
436             imp->delegate,
437             src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
438             src_x, src_y, dst_x, dst_y, width, height);
439     }
440
441     return TRUE;
442 }
443
444 static pixman_bool_t
445 arm_neon_fill (pixman_implementation_t *imp,
446                uint32_t *               bits,
447                int                      stride,
448                int                      bpp,
449                int                      x,
450                int                      y,
451                int                      width,
452                int                      height,
453                uint32_t xor)
454 {
455     if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
456         return TRUE;
457
458     return _pixman_implementation_fill (
459         imp->delegate, bits, stride, bpp, x, y, width, height, xor);
460 }
461
462 pixman_implementation_t *
463 _pixman_implementation_create_arm_neon (void)
464 {
465     pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
466     pixman_implementation_t *imp = _pixman_implementation_create (general);
467
468     imp->composite = arm_neon_composite;
469     imp->blt = arm_neon_blt;
470     imp->fill = arm_neon_fill;
471
472     return imp;
473 }