Add missing pixman-arm-simd.[ch] files.
authorSøren Sandmann Pedersen <sandmann@redhat.com>
Mon, 3 Nov 2008 18:09:02 +0000 (13:09 -0500)
committerSøren Sandmann Pedersen <sandmann@redhat.com>
Mon, 3 Nov 2008 18:09:02 +0000 (13:09 -0500)
Pointed out by Chris Ball and Adrian Bunk.

pixman/pixman-arm-simd.c [new file with mode: 0644]
pixman/pixman-arm-simd.h [new file with mode: 0644]

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
new file mode 100644 (file)
index 0000000..c7851cb
--- /dev/null
@@ -0,0 +1,407 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-arm-simd.h"
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+                               pixman_image_t * pSrc,
+                               pixman_image_t * pMask,
+                               pixman_image_t * pDst,
+                               int16_t      xSrc,
+                               int16_t      ySrc,
+                               int16_t      xMask,
+                               int16_t      yMask,
+                               int16_t      xDst,
+                               int16_t      yDst,
+                               uint16_t     width,
+                               uint16_t     height)
+{
+    uint8_t    *dstLine, *dst;
+    uint8_t    *srcLine, *src;
+    int        dstStride, srcStride;
+    uint16_t   w;
+    uint8_t    s, d;
+
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+       dst = dstLine;
+       dstLine += dstStride;
+       src = srcLine;
+       srcLine += srcStride;
+       w = width;
+
+       while (w && (unsigned long)dst & 3)
+       {
+           s = *src;
+           d = *dst;
+           asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+           *dst = d;
+
+           dst++;
+           src++;
+           w--;
+       }
+
+       while (w >= 4)
+       {
+           asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
+           dst += 4;
+           src += 4;
+           w -= 4;
+       }
+
+       while (w)
+       {
+           s = *src;
+           d = *dst;
+           asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+           *dst = d;
+
+           dst++;
+           src++;
+           w--;
+       }
+    }
+
+}
+
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+                        pixman_image_t * pSrc,
+                        pixman_image_t * pMask,
+                        pixman_image_t * pDst,
+                        int16_t      xSrc,
+                        int16_t      ySrc,
+                        int16_t      xMask,
+                        int16_t      yMask,
+                        int16_t      xDst,
+                        int16_t      yDst,
+                        uint16_t     width,
+                        uint16_t     height)
+{
+    uint32_t   *dstLine, *dst;
+    uint32_t   *srcLine, *src;
+    int        dstStride, srcStride;
+    uint16_t   w;
+    uint32_t component_half = 0x800080;
+    uint32_t upper_component_mask = 0xff00ff00;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    while (height--)
+    {
+       dst = dstLine;
+       dstLine += dstStride;
+       src = srcLine;
+       srcLine += srcStride;
+       w = width;
+
+//#define inner_branch
+       asm volatile (
+                       "cmp %[w], #0\n\t"
+                       "beq 2f\n\t"
+                       "1:\n\t"
+                       /* load src */
+                       "ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+                       /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+                        * The 0x0 case also allows us to avoid doing an unecessary data
+                        * write which is more valuable so we only check for that */
+                       "cmp r5, #0\n\t"
+                       "beq 3f\n\t"
+
+                       /* = 255 - alpha */
+                       "sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+                       "ldr r4, [%[dest]] \n\t"
+
+#else
+                       "ldr r4, [%[dest]] \n\t"
+
+                       /* = 255 - alpha */
+                       "sub r8, %[alpha_mask], r5, lsr #24\n\t"
+#endif
+                       "uxtb16 r6, r4\n\t"
+                       "uxtb16 r7, r4, ror #8\n\t"
+
+                       /* multiply by 257 and divide by 65536 */
+                       "mla r6, r6, r8, %[component_half]\n\t"
+                       "mla r7, r7, r8, %[component_half]\n\t"
+
+                       "uxtab16 r6, r6, r6, ror #8\n\t"
+                       "uxtab16 r7, r7, r7, ror #8\n\t"
+
+                       /* recombine the 0xff00ff00 bytes of r6 and r7 */
+                       "and r7, %[upper_component_mask]\n\t"
+                       "uxtab16 r6, r7, r6, ror #8\n\t"
+
+                       "uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+                       "3:\n\t"
+
+#endif
+                       "str r5, [%[dest]], #4\n\t"
+                       /* increment counter and jmp to top */
+                       "subs   %[w], %[w], #1\n\t"
+                       "bne    1b\n\t"
+                       "2:\n\t"
+                       : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+                       : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
+                         [alpha_mask] "r" (alpha_mask)
+                       : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+                       );
+    }
+}
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+                              pixman_image_t * pSrc,
+                              pixman_image_t * pMask,
+                              pixman_image_t * pDst,
+                              int16_t  xSrc,
+                              int16_t  ySrc,
+                              int16_t      xMask,
+                              int16_t      yMask,
+                              int16_t      xDst,
+                              int16_t      yDst,
+                              uint16_t     width,
+                              uint16_t     height)
+{
+    uint32_t   *dstLine, *dst;
+    uint32_t   *srcLine, *src;
+    uint32_t   mask;
+    int        dstStride, srcStride;
+    uint16_t   w;
+    uint32_t component_half = 0x800080;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    fbComposeGetSolid (pMask, mask, pDst->bits.format);
+    mask = (mask) >> 24;
+
+    while (height--)
+    {
+       dst = dstLine;
+       dstLine += dstStride;
+       src = srcLine;
+       srcLine += srcStride;
+       w = width;
+
+//#define inner_branch
+       asm volatile (
+                       "cmp %[w], #0\n\t"
+                       "beq 2f\n\t"
+                       "1:\n\t"
+                       /* load src */
+                       "ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+                       /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+                        * The 0x0 case also allows us to avoid doing an unecessary data
+                        * write which is more valuable so we only check for that */
+                       "cmp r5, #0\n\t"
+                       "beq 3f\n\t"
+
+#endif
+                       "ldr r4, [%[dest]] \n\t"
+
+                       "uxtb16 r6, r5\n\t"
+                       "uxtb16 r7, r5, ror #8\n\t"
+
+                       /* multiply by alpha (r8) then by 257 and divide by 65536 */
+                       "mla r6, r6, %[mask_alpha], %[component_half]\n\t"
+                       "mla r7, r7, %[mask_alpha], %[component_half]\n\t"
+
+                       "uxtab16 r6, r6, r6, ror #8\n\t"
+                       "uxtab16 r7, r7, r7, ror #8\n\t"
+
+                       "uxtb16 r6, r6, ror #8\n\t"
+                       "uxtb16 r7, r7, ror #8\n\t"
+
+                       /* recombine */
+                       "orr r5, r6, r7, lsl #8\n\t"
+
+                       "uxtb16 r6, r4\n\t"
+                       "uxtb16 r7, r4, ror #8\n\t"
+
+                       /* 255 - alpha */
+                       "sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+                       /* multiply by alpha (r8) then by 257 and divide by 65536 */
+                       "mla r6, r6, r8, %[component_half]\n\t"
+                       "mla r7, r7, r8, %[component_half]\n\t"
+
+                       "uxtab16 r6, r6, r6, ror #8\n\t"
+                       "uxtab16 r7, r7, r7, ror #8\n\t"
+
+                       "uxtb16 r6, r6, ror #8\n\t"
+                       "uxtb16 r7, r7, ror #8\n\t"
+
+                       /* recombine */
+                       "orr r6, r6, r7, lsl #8\n\t"
+
+                       "uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+                       "3:\n\t"
+
+#endif
+                       "str r5, [%[dest]], #4\n\t"
+                       /* increment counter and jmp to top */
+                       "subs   %[w], %[w], #1\n\t"
+                       "bne    1b\n\t"
+                       "2:\n\t"
+                       : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+                       : [component_half] "r" (component_half), [mask_alpha] "r" (mask),
+                         [alpha_mask] "r" (alpha_mask)
+                       : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
+                       );
+    }
+}
+
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t      op,
+                              pixman_image_t * pSrc,
+                              pixman_image_t * pMask,
+                              pixman_image_t * pDst,
+                              int16_t      xSrc,
+                              int16_t      ySrc,
+                              int16_t      xMask,
+                              int16_t      yMask,
+                              int16_t      xDst,
+                              int16_t      yDst,
+                              uint16_t     width,
+                              uint16_t     height)
+{
+    uint32_t    src, srca;
+    uint32_t   *dstLine, *dst;
+    uint8_t    *maskLine, *mask;
+    int                 dstStride, maskStride;
+    uint16_t    w;
+
+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+       return;
+
+    uint32_t component_mask = 0xff00ff;
+    uint32_t component_half = 0x800080;
+
+    uint32_t src_hi = (src >> 8) & component_mask;
+    uint32_t src_lo = src & component_mask;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
+
+    while (height--)
+    {
+       dst = dstLine;
+       dstLine += dstStride;
+       mask = maskLine;
+       maskLine += maskStride;
+       w = width;
+
+//#define inner_branch
+       asm volatile (
+                       "cmp %[w], #0\n\t"
+                       "beq 2f\n\t"
+                       "1:\n\t"
+                       /* load mask */
+                       "ldrb r5, [%[mask]], #1\n\t"
+#ifdef inner_branch
+                       /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+                        * The 0x0 case also allows us to avoid doing an unecessary data
+                        * write which is more valuable so we only check for that */
+                       "cmp r5, #0\n\t"
+                       "beq 3f\n\t"
+
+#endif
+                       "ldr r4, [%[dest]] \n\t"
+
+                       /* multiply by alpha (r8) then by 257 and divide by 65536 */
+                       "mla r6, %[src_lo], r5, %[component_half]\n\t"
+                       "mla r7, %[src_hi], r5, %[component_half]\n\t"
+
+                       "uxtab16 r6, r6, r6, ror #8\n\t"
+                       "uxtab16 r7, r7, r7, ror #8\n\t"
+
+                       "uxtb16 r6, r6, ror #8\n\t"
+                       "uxtb16 r7, r7, ror #8\n\t"
+
+                       /* recombine */
+                       "orr r5, r6, r7, lsl #8\n\t"
+
+                       "uxtb16 r6, r4\n\t"
+                       "uxtb16 r7, r4, ror #8\n\t"
+
+                       /* we could simplify this to use 'sub' if we were
+                        * willing to give up a register for alpha_mask */
+                       "mvn r8, r5\n\t"
+                       "mov r8, r8, lsr #24\n\t"
+
+                       /* multiply by alpha (r8) then by 257 and divide by 65536 */
+                       "mla r6, r6, r8, %[component_half]\n\t"
+                       "mla r7, r7, r8, %[component_half]\n\t"
+
+                       "uxtab16 r6, r6, r6, ror #8\n\t"
+                       "uxtab16 r7, r7, r7, ror #8\n\t"
+
+                       "uxtb16 r6, r6, ror #8\n\t"
+                       "uxtb16 r7, r7, ror #8\n\t"
+
+                       /* recombine */
+                       "orr r6, r6, r7, lsl #8\n\t"
+
+                       "uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+                       "3:\n\t"
+
+#endif
+                       "str r5, [%[dest]], #4\n\t"
+                       /* increment counter and jmp to top */
+                       "subs   %[w], %[w], #1\n\t"
+                       "bne    1b\n\t"
+                       "2:\n\t"
+                       : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
+                       : [component_half] "r" (component_half),
+                         [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
+                       : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+                       );
+    }
+}
diff --git a/pixman/pixman-arm-simd.h b/pixman/pixman-arm-simd.h
new file mode 100644 (file)
index 0000000..ecaace5
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+#include "pixman-private.h"
+
+#ifdef USE_ARM_SIMD
+
+static inline pixman_bool_t pixman_have_arm_simd(void) { return TRUE; }
+
+#else
+#define pixman_have_arm_simd() FALSE
+#endif
+
+#ifdef USE_ARM_SIMD
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+                               pixman_image_t * pSrc,
+                               pixman_image_t * pMask,
+                               pixman_image_t * pDst,
+                               int16_t      xSrc,
+                               int16_t      ySrc,
+                               int16_t      xMask,
+                               int16_t      yMask,
+                               int16_t      xDst,
+                               int16_t      yDst,
+                               uint16_t     width,
+                               uint16_t     height);
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+                        pixman_image_t * pSrc,
+                        pixman_image_t * pMask,
+                        pixman_image_t * pDst,
+                        int16_t      xSrc,
+                        int16_t      ySrc,
+                        int16_t      xMask,
+                        int16_t      yMask,
+                        int16_t      xDst,
+                        int16_t      yDst,
+                        uint16_t     width,
+                        uint16_t     height);
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+                        pixman_image_t * pSrc,
+                        pixman_image_t * pMask,
+                        pixman_image_t * pDst,
+                        int16_t      xSrc,
+                        int16_t      ySrc,
+                        int16_t      xMask,
+                        int16_t      yMask,
+                        int16_t      xDst,
+                        int16_t      yDst,
+                        uint16_t     width,
+                        uint16_t     height);
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
+                        pixman_image_t * pSrc,
+                        pixman_image_t * pMask,
+                        pixman_image_t * pDst,
+                        int16_t      xSrc,
+                        int16_t      ySrc,
+                        int16_t      xMask,
+                        int16_t      yMask,
+                        int16_t      xDst,
+                        int16_t      yDst,
+                        uint16_t     width,
+                        uint16_t     height);
+
+
+#endif /* USE_ARM */