mmx: add add_0565_0565
[profile/ivi/pixman.git] / pixman / pixman-arm-neon.c
index 7ff8fe1..ca139de 100644 (file)
 
 #include <string.h>
 #include "pixman-private.h"
+#include "pixman-arm-common.h"
 
-#define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt)   \
-void                                                                    \
-pixman_##name##_asm_neon (int32_t   w,                                  \
-                          int32_t   h,                                  \
-                          dst_type *dst,                                \
-                          int32_t   dst_stride,                         \
-                          src_type *src,                                \
-                          int32_t   src_stride);                        \
-                                                                        \
-static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
-{                                                                       \
-    dst_type *dst_line;                                                 \
-    src_type *src_line;                                                 \
-    int32_t dst_stride, src_stride;                                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src_line, src_stride);                    \
-}
-
-#define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt)   \
-void                                                                    \
-pixman_##name##_asm_neon (int32_t    w,                                 \
-                          int32_t    h,                                 \
-                          dst_type  *dst,                               \
-                          int32_t    dst_stride,                        \
-                          uint32_t   src,                               \
-                          int32_t    unused,                            \
-                          mask_type *mask,                              \
-                          int32_t    mask_stride);                      \
-                                                                        \
-static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, mask_stride;                                 \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
-                                                                        \
-    if (src == 0)                                                       \
-       return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src, 0,                                   \
-                              mask_line, mask_stride);                  \
-}
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
+                                   uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
+                                   uint8_t, 3, uint8_t, 3)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
+                                   uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
+                                   uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
+                                   uint8_t, 3, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
+                                   uint8_t, 3, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
+                                   uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+                                   uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
+                                   uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888,
+                                   uint8_t, 1, uint32_t, 1)
 
-#define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt)      \
-void                                                                    \
-pixman_##name##_asm_neon (int32_t    w,                                 \
-                          int32_t    h,                                 \
-                          dst_type  *dst,                               \
-                          int32_t    dst_stride,                        \
-                          src_type  *src,                               \
-                          int32_t    src_stride,                        \
-                          uint32_t   mask);                             \
-                                                                        \
-static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    src_type  *src_line;                                                \
-    int32_t    dst_stride, src_stride;                                  \
-    uint32_t   mask;                                                    \
-                                                                        \
-    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
-                                                                        \
-    if (mask == 0)                                                      \
-       return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-                                                                        \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src_line, src_stride,                     \
-                              mask);                                    \
-}
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
+                                 uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
+                                 uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
+                                 uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
+                                 uint8_t, 1)
 
-#define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
-                          dst_type, dst_cnt)                            \
-void                                                                    \
-pixman_##name##_asm_neon (int32_t    w,                                 \
-                          int32_t    h,                                 \
-                          dst_type  *dst,                               \
-                          int32_t    dst_stride,                        \
-                          src_type  *src,                               \
-                          int32_t    src_stride,                        \
-                          mask_type *mask,                              \
-                          int32_t    mask_stride);                      \
-                                                                        \
-static void                                                             \
-neon_##name (pixman_implementation_t *imp,                              \
-             pixman_op_t              op,                               \
-             pixman_image_t *         src_image,                        \
-             pixman_image_t *         mask_image,                       \
-             pixman_image_t *         dst_image,                        \
-             int32_t                  src_x,                            \
-             int32_t                  src_y,                            \
-             int32_t                  mask_x,                           \
-             int32_t                  mask_y,                           \
-             int32_t                  dest_x,                           \
-             int32_t                  dest_y,                           \
-             int32_t                  width,                            \
-             int32_t                  height)                           \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    src_type  *src_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, src_stride, mask_stride;                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_##name##_asm_neon (width, height,                            \
-                              dst_line, dst_stride,                     \
-                              src_line, src_stride,                     \
-                              mask_line, mask_stride);                  \
-}
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
+                                      uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
+                                      uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca,
+                                     uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
+                                      uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
+                                      uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8,
+                                      uint8_t, 1, uint8_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
+                                     uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
+                                     uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
 
-BIND_SRC_NULL_DST(composite_src_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(composite_src_0565_0565, uint16_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_src_0888_0888, uint8_t, 3, uint8_t, 3)
-BIND_SRC_NULL_DST(composite_src_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_add_8000_8000, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+                                        uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
+                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
+                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
+                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
+                                        uint32_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
+                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
 
-BIND_SRC_NULL_DST(composite_over_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(composite_over_8888_8888, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
+                                        uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
+                                        uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
+                                        uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
+                                        uint16_t, uint32_t)
 
-BIND_N_MASK_DST(composite_over_n_8_0565, uint8_t, 1, uint16_t, 1)
-BIND_N_MASK_DST(composite_over_n_8_8888, uint8_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(composite_add_n_8_8, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+                                           OVER, uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+                                           OVER, uint16_t, uint16_t)
 
-BIND_SRC_N_DST(composite_over_8888_n_8888, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+                                         uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+                                         uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
+                                         uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
+                                         uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
+                                         uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
+                                         uint32_t, uint32_t)
 
-BIND_SRC_MASK_DST(composite_add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
+                                            uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
+                                            uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
+                                            uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC,
+                                            uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER,
+                                            uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD,
+                                            uint32_t, uint32_t)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -292,46 +226,229 @@ pixman_fill_neon (uint32_t *bits,
     }
 }
 
-static const pixman_fast_path_t arm_neon_fast_path_array[] =
+static pixman_bool_t
+pixman_blt_neon (uint32_t *src_bits,
+                 uint32_t *dst_bits,
+                 int       src_stride,
+                 int       dst_stride,
+                 int       src_bpp,
+                 int       dst_bpp,
+                 int       src_x,
+                 int       src_y,
+                 int       dest_x,
+                 int       dest_y,
+                 int       width,
+                 int       height)
+{
+    if (src_bpp != dst_bpp)
+       return FALSE;
+
+    switch (src_bpp)
+    {
+    case 16:
+       pixman_composite_src_0565_0565_asm_neon (
+               width, height,
+               (uint16_t *)(((char *) dst_bits) +
+               dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
+               (uint16_t *)(((char *) src_bits) +
+               src_y * src_stride * 4 + src_x * 2), src_stride * 2);
+       return TRUE;
+    case 32:
+       pixman_composite_src_8888_8888_asm_neon (
+               width, height,
+               (uint32_t *)(((char *) dst_bits) +
+               dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
+               (uint32_t *)(((char *) src_bits) +
+               src_y * src_stride * 4 + src_x * 4), src_stride);
+       return TRUE;
+    default:
+       return FALSE;
+    }
+}
+
+static const pixman_fast_path_t arm_neon_fast_paths[] =
 {
-    { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_0565_0565,    0 },
-    { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_0565_0565,    0 },
+    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
+    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     a8r8g8b8, neon_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     x8r8g8b8, neon_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     a8b8g8r8, neon_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     x8b8g8r8, neon_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
+    PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
+    PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
+    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8b8g8r8, neon_composite_src_rpixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8r8g8b8, neon_composite_src_rpixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8b8g8r8, neon_composite_src_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8r8g8b8, neon_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8r8g8b8, neon_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8b8g8r8, neon_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8b8g8r8, neon_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8,       neon_composite_src_n_8_8),
+
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   neon_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   neon_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, b5g6r5,   neon_composite_out_reverse_8_0565),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8r8g8b8, neon_composite_out_reverse_8_8888),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8b8g8r8, neon_composite_out_reverse_8_8888),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
+    /* Note: NONE repeat is not supported yet */
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
     { PIXMAN_OP_NONE },
 };
 
-const pixman_fast_path_t *const arm_neon_fast_paths = arm_neon_fast_path_array;
-
-static void
-arm_neon_composite (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    pixman_image_t *         src,
-                    pixman_image_t *         mask,
-                    pixman_image_t *         dest,
-                    int32_t                  src_x,
-                    int32_t                  src_y,
-                    int32_t                  mask_x,
-                    int32_t                  mask_y,
-                    int32_t                  dest_x,
-                    int32_t                  dest_y,
-                    int32_t                  width,
-                    int32_t                  height)
+static pixman_bool_t
+arm_neon_blt (pixman_implementation_t *imp,
+              uint32_t *               src_bits,
+              uint32_t *               dst_bits,
+              int                      src_stride,
+              int                      dst_stride,
+              int                      src_bpp,
+              int                      dst_bpp,
+              int                      src_x,
+              int                      src_y,
+              int                      dest_x,
+              int                      dest_y,
+              int                      width,
+              int                      height)
 {
-    if (_pixman_run_fast_path (arm_neon_fast_paths, imp,
-                               op, src, mask, dest,
-                               src_x, src_y,
-                               mask_x, mask_y,
-                               dest_x, dest_y,
-                               width, height))
+    if (!pixman_blt_neon (
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dest_x, dest_y, width, height))
+
     {
-       return;
+       return _pixman_implementation_blt (
+           imp->delegate,
+           src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+           src_x, src_y, dest_x, dest_y, width, height);
     }
 
-    _pixman_implementation_composite (imp->delegate, op,
-                                      src, mask, dest,
-                                      src_x, src_y,
-                                      mask_x, mask_y,
-                                      dest_x, dest_y,
-                                      width, height);
+    return TRUE;
 }
 
 static pixman_bool_t
@@ -352,13 +469,48 @@ arm_neon_fill (pixman_implementation_t *imp,
        imp->delegate, bits, stride, bpp, x, y, width, height, xor);
 }
 
+#define BIND_COMBINE_U(name)                                             \
+void                                                                     \
+pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
+                                                  const uint32_t *dst,   \
+                                                  const uint32_t *src,   \
+                                                  const uint32_t *mask); \
+                                                                         \
+void                                                                     \
+pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
+                                             const uint32_t *dst,        \
+                                             const uint32_t *src);       \
+                                                                         \
+static void                                                              \
+neon_combine_##name##_u (pixman_implementation_t *imp,                   \
+                         pixman_op_t              op,                    \
+                         uint32_t *               dest,                  \
+                         const uint32_t *         src,                   \
+                         const uint32_t *         mask,                  \
+                         int                      width)                 \
+{                                                                        \
+    if (mask)                                                            \
+       pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
+                                                         src, mask);    \
+    else                                                                 \
+       pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
+}
+
+BIND_COMBINE_U (over)
+BIND_COMBINE_U (add)
+BIND_COMBINE_U (out_reverse)
+
 pixman_implementation_t *
-_pixman_implementation_create_arm_neon (void)
+_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
 {
-    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
-    pixman_implementation_t *imp = _pixman_implementation_create (general);
+    pixman_implementation_t *imp =
+       _pixman_implementation_create (fallback, arm_neon_fast_paths);
+
+    imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
+    imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
 
-    imp->composite = arm_neon_composite;
+    imp->blt = arm_neon_blt;
     imp->fill = arm_neon_fill;
 
     return imp;