Some formatting changes to pixman-arm-neon.c
authorSøren Sandmann Pedersen <sandmann@redhat.com>
Mon, 20 Jul 2009 23:56:46 +0000 (19:56 -0400)
committerSøren Sandmann Pedersen <sandmann@redhat.com>
Mon, 20 Jul 2009 23:56:46 +0000 (19:56 -0400)
pixman/pixman-arm-neon.c

index cea6f75..2f7b8a0 100644 (file)
@@ -169,8 +169,8 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl,
            w = width;
 
 #ifndef USE_GCC_INLINE_ASM
-           sval = vld1_u8 ((void*)src);
-           dval = vld1_u8 ((void*)dst);
+           sval = vld1_u8 (((void *))src);
+           dval = vld1_u8 (((void *))dst);
            keep_dst = dst;
 
            temp = vqadd_u8 (dval, sval);
@@ -181,10 +181,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl,
 
            while (w)
            {
-               sval = vld1_u8 ((void*)src);
-               dval = vld1_u8 ((void*)dst);
+               sval = vld1_u8 (((void *))src);
+               dval = vld1_u8 (((void *))dst);
 
-               vst1_u8 ((void*)keep_dst, temp);
+               vst1_u8 (((void *))keep_dst, temp);
                keep_dst = dst;
 
                temp = vqadd_u8 (dval, sval);
@@ -194,7 +194,7 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl,
                w -= 8;
            }
 
-           vst1_u8 ((void*)keep_dst, temp);
+           vst1_u8 (((void *))keep_dst, temp);
 #else
            asm volatile (
 /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
@@ -249,9 +249,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl,
            if (w & 4)
            {
                sval = vreinterpret_u8_u32 (
-                   vld1_lane_u32 ((void*)src, vreinterpret_u32_u8 (sval), 1));
+                   vld1_lane_u32 (((void *))src, vreinterpret_u32_u8 (sval), 1));
                dval = vreinterpret_u8_u32 (
-                   vld1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (dval), 1));
+                   vld1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (dval), 1));
 
                dst4 = dst;
                src += 4;
@@ -261,9 +261,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl,
            if (w & 2)
            {
                sval = vreinterpret_u8_u16 (
-                   vld1_lane_u16 ((void*)src, vreinterpret_u16_u8 (sval), 1));
+                   vld1_lane_u16 (((void *))src, vreinterpret_u16_u8 (sval), 1));
                dval = vreinterpret_u8_u16 (
-                   vld1_lane_u16 ((void*)dst, vreinterpret_u16_u8 (dval), 1));
+                   vld1_lane_u16 (((void *))dst, vreinterpret_u16_u8 (dval), 1));
 
                dst2 = dst;
                src += 2;
@@ -282,10 +282,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl,
                vst1_lane_u8 (dst, dval, 1);
 
            if (w & 2)
-               vst1_lane_u16 ((void*)dst2, vreinterpret_u16_u8 (dval), 1);
+               vst1_lane_u16 (((void *))dst2, vreinterpret_u16_u8 (dval), 1);
 
            if (w & 4)
-               vst1_lane_u32 ((void*)dst4, vreinterpret_u32_u8 (dval), 1);
+               vst1_lane_u32 (((void *))dst4, vreinterpret_u32_u8 (dval), 1);
        }
     }
 }
@@ -328,8 +328,8 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl,
            w = width;
 
 #ifndef USE_GCC_INLINE_ASM
-           sval = vld4_u8 ((void*)src);
-           dval = vld4_u8 ((void*)dst);
+           sval = vld4_u8 (((void *))src);
+           dval = vld4_u8 (((void *))dst);
            keep_dst = dst;
 
            temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
@@ -341,10 +341,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl,
 
            while (w)
            {
-               sval = vld4_u8 ((void*)src);
-               dval = vld4_u8 ((void*)dst);
+               sval = vld4_u8 (((void *))src);
+               dval = vld4_u8 (((void *))dst);
 
-               vst4_u8 ((void*)keep_dst, temp);
+               vst4_u8 (((void *))keep_dst, temp);
                keep_dst = dst;
 
                temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
@@ -355,7 +355,7 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl,
                w -= 8;
            }
 
-           vst4_u8 ((void*)keep_dst, temp);
+           vst4_u8 (((void *))keep_dst, temp);
 #else
            asm volatile (
 /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
@@ -427,10 +427,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl,
                uint8x8_t sval, dval;
 
                /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
-               sval = vreinterpret_u8_u32 (vld1_u32 ((void*)src));
-               dval = vreinterpret_u8_u32 (vld1_u32 ((void*)dst));
+               sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src));
+               dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst));
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
-               vst1_u8 ((void*)dst, vqadd_u8 (sval, dval));
+               vst1_u8 (((void *))dst, vqadd_u8 (sval, dval));
 
                src += 2;
                dst += 2;
@@ -442,10 +442,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl,
                uint8x8_t sval, dval;
 
                /* single 32-bit pixel in lane 0 */
-               sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)src));  /* only interested in lane 0 */
-               dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst));  /* only interested in lane 0 */
+               sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src));  /* only interested in lane 0 */
+               dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst));  /* only interested in lane 0 */
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
-               vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
+               vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
            }
        }
     }
@@ -495,8 +495,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
 #ifndef USE_GCC_INLINE_ASM
            uint8x8x4_t sval, dval, temp;
 
-           sval = vld4_u8 ((void*)src);
-           dval = vld4_u8 ((void*)dst);
+           sval = vld4_u8 (((void *))src);
+           dval = vld4_u8 (((void *))dst);
            keep_dst = dst;
 
            sval = neon8mul (sval, mask_alpha);
@@ -509,10 +509,10 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
 
            while (w)
            {
-               sval = vld4_u8 ((void*)src);
-               dval = vld4_u8 ((void*)dst);
+               sval = vld4_u8 (((void *))src);
+               dval = vld4_u8 (((void *))dst);
 
-               vst4_u8 ((void*)keep_dst, temp);
+               vst4_u8 (((void *))keep_dst, temp);
                keep_dst = dst;
 
                sval = neon8mul (sval, mask_alpha);
@@ -523,7 +523,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
                dst += 8;
                w -= 8;
            }
-           vst4_u8 ((void*)keep_dst, temp);
+           vst4_u8 (((void *))keep_dst, temp);
 #else
            asm volatile (
 /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
@@ -612,8 +612,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
            {
                uint8x8_t sval, dval;
 
-               sval = vreinterpret_u8_u32 (vld1_u32 ((void*)src));
-               dval = vreinterpret_u8_u32 (vld1_u32 ((void*)dst));
+               sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src));
+               dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst));
 
                /* sval * const alpha_mul */
                sval = neon2mul (sval, mask_alpha);
@@ -621,7 +621,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
                /* dval * 255-(src alpha) */
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
 
-               vst1_u8 ((void*)dst, vqadd_u8 (sval, dval));
+               vst1_u8 (((void *))dst, vqadd_u8 (sval, dval));
 
                src += 2;
                dst += 2;
@@ -632,8 +632,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
            {
                uint8x8_t sval, dval;
 
-               sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)src));
-               dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst));
+               sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src));
+               dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst));
 
                /* sval * const alpha_mul */
                sval = neon2mul (sval, mask_alpha);
@@ -641,7 +641,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
                /* dval * 255-(src alpha) */
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
 
-               vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
+               vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
            }
        }
     }
@@ -703,12 +703,12 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
            uint16x8_t dval, temp;
            uint8x8x4_t sval8temp;
 
-           alpha = vld1_u8 ((void*)mask);
-           dval = vld1q_u16 ((void*)dst);
+           alpha = vld1_u8 (((void *))mask);
+           dval = vld1q_u16 (((void *))dst);
            keep_dst = dst;
 
-           sval8temp = neon8mul (sval8,alpha);
-           temp = pack0565 (neon8qadd (sval8temp,neon8mul (unpack0565 (dval),vmvn_u8 (sval8temp.val[3]))));
+           sval8temp = neon8mul (sval8, alpha);
+           temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3]))));
 
            mask += (w & 7);
            dst += (w & 7);
@@ -716,20 +716,20 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
 
            while (w)
            {
-               dval = vld1q_u16 ((void*)dst);
-               alpha = vld1_u8 ((void*)mask);
+               dval = vld1q_u16 (((void *))dst);
+               alpha = vld1_u8 (((void *))mask);
 
-               vst1q_u16 ((void*)keep_dst,temp);
+               vst1q_u16 (((void *))keep_dst, temp);
                keep_dst = dst;
 
-               sval8temp = neon8mul (sval8,alpha);
-               temp = pack0565 (neon8qadd (sval8temp,neon8mul (unpack0565 (dval),vmvn_u8 (sval8temp.val[3]))));
+               sval8temp = neon8mul (sval8, alpha);
+               temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3]))));
 
                mask+=8;
                dst+=8;
                w-=8;
            }
-           vst1q_u16 ((void*)keep_dst,temp);
+           vst1q_u16 (((void *))keep_dst, temp);
 #else
            asm volatile (
                "vdup.32      d0, %[src]\n\t"
@@ -842,35 +842,35 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
 
            if (w&4)
            {
-               alpha = vreinterpret_u8_u32 (vld1_lane_u32 ((void*)mask,vreinterpret_u32_u8 (alpha),1));
-               dval = vreinterpretq_u16_u64 (vld1q_lane_u64 ((void*)dst,vreinterpretq_u64_u16 (dval),1));
+               alpha = vreinterpret_u8_u32 (vld1_lane_u32 (((void *))mask, vreinterpret_u32_u8 (alpha),1));
+               dval = vreinterpretq_u16_u64 (vld1q_lane_u64 (((void *))dst, vreinterpretq_u64_u16 (dval),1));
                dst4=dst;
                mask+=4;
                dst+=4;
            }
            if (w&2)
            {
-               alpha = vreinterpret_u8_u16 (vld1_lane_u16 ((void*)mask,vreinterpret_u16_u8 (alpha),1));
-               dval = vreinterpretq_u16_u32 (vld1q_lane_u32 ((void*)dst,vreinterpretq_u32_u16 (dval),1));
+               alpha = vreinterpret_u8_u16 (vld1_lane_u16 (((void *))mask, vreinterpret_u16_u8 (alpha),1));
+               dval = vreinterpretq_u16_u32 (vld1q_lane_u32 (((void *))dst, vreinterpretq_u32_u16 (dval),1));
                dst2=dst;
                mask+=2;
                dst+=2;
            }
            if (w&1)
            {
-               alpha = vld1_lane_u8 ((void*)mask,alpha,1);
-               dval = vld1q_lane_u16 ((void*)dst,dval,1);
+               alpha = vld1_lane_u8 (((void *))mask, alpha,1);
+               dval = vld1q_lane_u16 (((void *))dst, dval,1);
            }
 
-           sval8temp = neon8mul (sval8,alpha);
-           temp = pack0565 (neon8qadd (sval8temp,neon8mul (unpack0565 (dval),vmvn_u8 (sval8temp.val[3]))));
+           sval8temp = neon8mul (sval8, alpha);
+           temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3]))));
 
            if (w&1)
-               vst1q_lane_u16 ((void*)dst,temp,1);
+               vst1q_lane_u16 (((void *))dst, temp,1);
            if (w&2)
-               vst1q_lane_u32 ((void*)dst2,vreinterpretq_u32_u16 (temp),1);
+               vst1q_lane_u32 (((void *))dst2, vreinterpretq_u32_u16 (temp),1);
            if (w&4)
-               vst1q_lane_u64 ((void*)dst4,vreinterpretq_u64_u16 (temp),1);
+               vst1q_lane_u64 (((void *))dst4, vreinterpretq_u64_u16 (temp),1);
 #else
            asm volatile (
                "vdup.32      d0, %[src]\n\t"
@@ -1040,8 +1040,8 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
            uint8x8_t alpha;
            uint8x8x4_t dval, temp;
 
-           alpha = vld1_u8 ((void*)mask);
-           dval = vld4_u8 ((void*)dst);
+           alpha = vld1_u8 (((void *))mask);
+           dval = vld4_u8 (((void *))dst);
            keep_dst = dst;
 
            temp = neon8mul (sval8, alpha);
@@ -1054,10 +1054,10 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
 
            while (w)
            {
-               alpha = vld1_u8 ((void*)mask);
-               dval = vld4_u8 ((void*)dst);
+               alpha = vld1_u8 (((void *))mask);
+               dval = vld4_u8 (((void *))dst);
 
-               vst4_u8 ((void*)keep_dst, temp);
+               vst4_u8 (((void *))keep_dst, temp);
                keep_dst = dst;
 
                temp = neon8mul (sval8, alpha);
@@ -1068,7 +1068,7 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
                dst += 8;
                w -= 8;
            }
-           vst4_u8 ((void*)keep_dst, temp);
+           vst4_u8 (((void *))keep_dst, temp);
 #else
            asm volatile (
                "vdup.32      d0, %[src]\n\t"
@@ -1160,14 +1160,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
                uint8x8_t dval, temp, res;
 
                alpha = vtbl1_u8 (
-                   vreinterpret_u8_u16 (vld1_dup_u16 ((void*)mask)), mask_selector);
-               dval = vld1_u8 ((void*)dst);
+                   vreinterpret_u8_u16 (vld1_dup_u16 (((void *))mask)), mask_selector);
+               dval = vld1_u8 (((void *))dst);
 
                temp = neon2mul (sval2, alpha);
                res = vqadd_u8 (
                    temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector)));
 
-               vst1_u8 ((void*)dst, res);
+               vst1_u8 (((void *))dst, res);
 
                mask += 2;
                dst += 2;
@@ -1178,14 +1178,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
            {
                uint8x8_t dval, temp, res;
 
-               alpha = vtbl1_u8 (vld1_dup_u8 ((void*)mask), mask_selector);
-               dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst));
+               alpha = vtbl1_u8 (vld1_dup_u8 (((void *))mask), mask_selector);
+               dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst));
 
                temp = neon2mul (sval2, alpha);
                res = vqadd_u8 (
                    temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector)));
 
-               vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (res), 0);
+               vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (res), 0);
            }
        }
     }
@@ -1377,17 +1377,17 @@ neon_composite_src_16_16 (pixman_implementation_t * impl,
            /* preload from next scanline */
            "   pld       [%[src], %[src_stride], LSL #1]       \n"
            "   sub       %[count], %[count], #64               \n"
-           "   vld1.16   {d16,d17,d18,d19}, [%[src]]!          \n"
-           "   vld1.16   {d20,d21,d22,d23}, [%[src]]!          \n"
+           "   vld1.16   {d16, d17, d18, d19}, [%[src]]!               \n"
+           "   vld1.16   {d20, d21, d22, d23}, [%[src]]!               \n"
            /* preload from next scanline */
            "   pld       [%[src], %[src_stride], LSL #1]       \n"
-           "   vld1.16   {d24,d25,d26,d27}, [%[src]]!          \n"
-           "   vld1.16   {d28,d29,d30,d31}, [%[src]]!          \n"
+           "   vld1.16   {d24, d25, d26, d27}, [%[src]]!               \n"
+           "   vld1.16   {d28, d29, d30, d31}, [%[src]]!               \n"
            "   cmp       %[count], #64                         \n"
-           "   vst1.16   {d16,d17,d18,d19}, [%[dst]]!          \n"
-           "   vst1.16   {d20,d21,d22,d23}, [%[dst]]!          \n"
-           "   vst1.16   {d24,d25,d26,d27}, [%[dst]]!          \n"
-           "   vst1.16   {d28,d29,d30,d31}, [%[dst]]!          \n"
+           "   vst1.16   {d16, d17, d18, d19}, [%[dst]]!               \n"
+           "   vst1.16   {d20, d21, d22, d23}, [%[dst]]!               \n"
+           "   vst1.16   {d24, d25, d26, d27}, [%[dst]]!               \n"
+           "   vst1.16   {d28, d29, d30, d31}, [%[dst]]!               \n"
            "   bge 0b                                          \n"
            "   cmp       %[count], #0                          \n"
            "   beq 7f    @ aligned fastpath                    \n"
@@ -1396,22 +1396,22 @@ neon_composite_src_16_16 (pixman_implementation_t * impl,
            "   beq 2f    @ skip oversized fragment             \n"
            /* preload from next scanline */
            "   pld       [%[src], %[src_stride], LSL #1]       \n"
-           "   vld1.16   {d16,d17,d18,d19}, [%[src]]!          \n"
-           "   vld1.16   {d20,d21,d22,d23}, [%[src]]!          \n"
-           "   vst1.16   {d16,d17,d18,d19}, [%[dst]]!          \n"
-           "   vst1.16   {d20,d21,d22,d23}, [%[dst]]!          \n"
+           "   vld1.16   {d16, d17, d18, d19}, [%[src]]!               \n"
+           "   vld1.16   {d20, d21, d22, d23}, [%[src]]!               \n"
+           "   vst1.16   {d16, d17, d18, d19}, [%[dst]]!               \n"
+           "   vst1.16   {d20, d21, d22, d23}, [%[dst]]!               \n"
            "2: @ two quadwords                                 \n"
            "   tst       %[count], #16                         \n"
            "   beq 3f    @ skip oversized fragment             \n"
            /* preload from next scanline */
            "   pld       [%[src], %[src_stride], LSL #1]       \n"
-           "   vld1.16   {d16,d17,d18,d19}, [%[src]]!          \n"
-           "   vst1.16   {d16,d17,d18,d19}, [%[dst]]!          \n"
+           "   vld1.16   {d16, d17, d18, d19}, [%[src]]!               \n"
+           "   vst1.16   {d16, d17, d18, d19}, [%[dst]]!               \n"
            "3: @ one quadword                                  \n"
            "   tst       %[count], #8                          \n"
            "   beq 4f    @ skip oversized fragment             \n"
-           "   vld1.16   {d16,d17}, [%[src]]!                  \n"
-           "   vst1.16   {d16,d17}, [%[dst]]!                  \n"
+           "   vld1.16   {d16, d17}, [%[src]]!                 \n"
+           "   vst1.16   {d16, d17}, [%[dst]]!                 \n"
            "4: @ one doubleword                                \n"
            "   tst       %[count], #4                          \n"
            "   beq 5f    @ skip oversized fragment             \n"
@@ -1533,8 +1533,8 @@ neon_composite_src_24_16 (pixman_implementation_t * impl,
            "0: @ start with sixteen pixels at a time           \n"
            "   sub       %[count], %[count], #16               \n"
            "   pld      [%[src], %[src_stride], lsl #2]        @ preload from next scanline                    \n"
-           "   vld4.8    {d0,d1,d2,d3}, [%[src]]!              @ d3 is alpha and ignored, d2-0 are rgb.        \n"
-           "   vld4.8    {d4,d5,d6,d7}, [%[src]]!              @ d7 is alpha and ignored, d6-4 are rgb.        \n"
+           "   vld4.8    {d0, d1, d2, d3}, [%[src]]!           @ d3 is alpha and ignored, d2-0 are rgb.        \n"
+           "   vld4.8    {d4, d5, d6, d7}, [%[src]]!           @ d7 is alpha and ignored, d6-4 are rgb.        \n"
            "   vshll.u8  q8, d2, #8                            @ expand first red for repacking                \n"
            "   vshll.u8  q10, d1, #8                           @ expand first green for repacking              \n"
            "   vshll.u8  q11, d0, #8                           @ expand first blue for repacking               \n"
@@ -1546,20 +1546,20 @@ neon_composite_src_24_16 (pixman_implementation_t * impl,
            "   vsri.u16  q9, q10, #5                           @ insert second green after red                 \n"
            "   vsri.u16  q9, q11, #11                          @ insert second blue after green                \n"
            "   cmp       %[count], #16                         \n"
-           "   vst1.16   {d16,d17,d18,d19}, [%[dst]]!          @ store 16 pixels                               \n"
+           "   vst1.16   {d16, d17, d18, d19}, [%[dst]]!          @ store 16 pixels                            \n"
            "   bge 0b                                          \n"
            "1: @ end of main loop                              \n"
            "   cmp       %[count], #8                          @ can we still do an 8-pixel block?             \n"
            "   blt 2f                                          \n"
            "   sub       %[count], %[count], #8                \n"
            "   pld      [%[src], %[src_stride], lsl #2]        @ preload from next scanline                    \n"
-           "   vld4.8    {d0,d1,d2,d3}, [%[src]]!              @ d3 is alpha and ignored, d2-0 are rgb.        \n"
+           "   vld4.8    {d0, d1, d2, d3}, [%[src]]!           @ d3 is alpha and ignored, d2-0 are rgb.        \n"
            "   vshll.u8  q8, d2, #8                            @ expand first red for repacking                \n"
            "   vshll.u8  q10, d1, #8                           @ expand first green for repacking              \n"
            "   vshll.u8  q11, d0, #8                           @ expand first blue for repacking               \n"
            "   vsri.u16  q8, q10, #5                           @ insert first green after red                  \n"
            "   vsri.u16  q8, q11, #11                          @ insert first blue after green                 \n"
-           "   vst1.16   {d16,d17}, [%[dst]]!          @ store 8 pixels                                \n"
+           "   vst1.16   {d16, d17}, [%[dst]]!          @ store 8 pixels                               \n"
            "2: @ end                                           \n"
 
            /* Clobbered input and working registers marked as input/outputs */
@@ -1848,7 +1848,7 @@ pixman_fill_neon (uint32_t *bits,
            /* The main block: Do 128-bit aligned writes */
            "3:\n"
            "subs               r5, r5, #1\n"
-           "vst1.64    {d0,d1}, [r4, :128]!\n"
+           "vst1.64    {d0, d1}, [r4, :128]!\n"
            "bne                3b\n"
 
            /* Handle the tailing bytes: Do 64, 32, 16 and 8-bit aligned writes as needed.
@@ -1898,8 +1898,8 @@ pixman_fill_neon (uint32_t *bits,
 #define NEON_SCANLINE_BUFFER_PIXELS (1024)
 
 static inline void
-neon_quadword_copy (void*    dst,
-                   void*    src,
+neon_quadword_copy ((void *)    dst,
+                   (void *)    src,
                    uint32_t count,         /* of quadwords */
                    uint32_t trailer_count  /* of bytes */)
 {
@@ -1919,33 +1919,33 @@ neon_quadword_copy (void*    dst,
         "      blt 1f    @ skip oversized fragments            \n"
         "0: @ start with eight quadwords at a time             \n"
         "      sub       %[count], %[count], #8                \n"
-        "      vld1.8    {d16,d17,d18,d19}, [%[src]]!          \n"
-        "      vld1.8    {d20,d21,d22,d23}, [%[src]]!          \n"
-        "      vld1.8    {d24,d25,d26,d27}, [%[src]]!          \n"
-        "      vld1.8    {d28,d29,d30,d31}, [%[src]]!          \n"
+        "      vld1.8    {d16, d17, d18, d19}, [%[src]]!               \n"
+        "      vld1.8    {d20, d21, d22, d23}, [%[src]]!               \n"
+        "      vld1.8    {d24, d25, d26, d27}, [%[src]]!               \n"
+        "      vld1.8    {d28, d29, d30, d31}, [%[src]]!               \n"
         "      cmp       %[count], #8                          \n"
-        "      vst1.8    {d16,d17,d18,d19}, [%[dst]]!          \n"
-        "      vst1.8    {d20,d21,d22,d23}, [%[dst]]!          \n"
-        "      vst1.8    {d24,d25,d26,d27}, [%[dst]]!          \n"
-        "      vst1.8    {d28,d29,d30,d31}, [%[dst]]!          \n"
+        "      vst1.8    {d16, d17, d18, d19}, [%[dst]]!               \n"
+        "      vst1.8    {d20, d21, d22, d23}, [%[dst]]!               \n"
+        "      vst1.8    {d24, d25, d26, d27}, [%[dst]]!               \n"
+        "      vst1.8    {d28, d29, d30, d31}, [%[dst]]!               \n"
         "      bge 0b                                          \n"
         "1: @ four quadwords                                   \n"
         "      tst       %[count], #4                          \n"
         "      beq 2f    @ skip oversized fragment             \n"
-        "      vld1.8    {d16,d17,d18,d19}, [%[src]]!          \n"
-        "      vld1.8    {d20,d21,d22,d23}, [%[src]]!          \n"
-        "      vst1.8    {d16,d17,d18,d19}, [%[dst]]!          \n"
-        "      vst1.8    {d20,d21,d22,d23}, [%[dst]]!          \n"
+        "      vld1.8    {d16, d17, d18, d19}, [%[src]]!               \n"
+        "      vld1.8    {d20, d21, d22, d23}, [%[src]]!               \n"
+        "      vst1.8    {d16, d17, d18, d19}, [%[dst]]!               \n"
+        "      vst1.8    {d20, d21, d22, d23}, [%[dst]]!               \n"
         "2: @ two quadwords                                    \n"
         "      tst       %[count], #2                          \n"
         "      beq 3f    @ skip oversized fragment             \n"
-        "      vld1.8    {d16,d17,d18,d19}, [%[src]]!          \n"
-        "      vst1.8    {d16,d17,d18,d19}, [%[dst]]!          \n"
+        "      vld1.8    {d16, d17, d18, d19}, [%[src]]!               \n"
+        "      vst1.8    {d16, d17, d18, d19}, [%[dst]]!               \n"
         "3: @ one quadword                                     \n"
         "      tst       %[count], #1                          \n"
         "      beq 4f    @ skip oversized fragment             \n"
-        "      vld1.8    {d16,d17}, [%[src]]!                  \n"
-        "      vst1.8    {d16,d17}, [%[dst]]!                  \n"
+        "      vld1.8    {d16, d17}, [%[src]]!                 \n"
+        "      vst1.8    {d16, d17}, [%[dst]]!                 \n"
         "4: @ end                                              \n"
 
         /* Clobbered input registers marked as input/outputs */
@@ -2048,9 +2048,9 @@ solid_over_565_8_pix_neon (uint32_t  glyph_colour,
 #ifdef USE_GCC_INLINE_ASM
 
     asm volatile (
-        "      vld4.8 {d20[],d21[],d22[],d23[]}, [%[glyph_colour]]  @ splat solid colour components    \n"
+        "      vld4.8 {d20[], d21[], d22[], d23[]}, [%[glyph_colour]]  @ splat solid colour components \n"
         "0:    @ loop                                                                                                                                                          \n"
-        "      vld1.16   {d0,d1}, [%[dest]]         @ load first pixels from framebuffer                       \n"
+        "      vld1.16   {d0, d1}, [%[dest]]         @ load first pixels from framebuffer                      \n"
         "      vld1.8    {d17}, [%[in_mask]]         @ load alpha mask of glyph                                                \n"
         "      vmull.u8  q9, d17, d23               @ apply glyph colour alpha to mask                         \n"
         "      vshrn.u16 d17, q9, #8                @ reformat it to match original mask                       \n"
@@ -2071,7 +2071,7 @@ solid_over_565_8_pix_neon (uint32_t  glyph_colour,
         "      add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait                \n"
         "      vsri.16   q1, q2, #5                 @ pack green behind red                                            \n"
         "      vsri.16   q1, q3, #11                @ pack blue into pixels                                            \n"
-        "      vst1.16   {d2,d3}, [%[dest]]         @ store composited pixels                                          \n"
+        "      vst1.16   {d2, d3}, [%[dest]]         @ store composited pixels                                         \n"
         "      add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer                                     \n"
         "      bne 0b                               @ next please                                                                      \n"
 
@@ -2260,13 +2260,13 @@ plain_over_565_8_pix_neon (uint32_t  colour,
      * (solid colour without alpha mask)
      */
     asm volatile (
-        "      vld4.8   {d20[],d21[],d22[],d23[]}, [%[colour]]  @ solid colour load/splat \n"
+        "      vld4.8   {d20[], d21[], d22[], d23[]}, [%[colour]]  @ solid colour load/splat \n"
         "      vmull.u8  q12, d23, d22              @ premultiply alpha red   \n"
         "      vmull.u8  q13, d23, d21              @ premultiply alpha green \n"
         "      vmull.u8  q14, d23, d20              @ premultiply alpha blue  \n"
         "      vmvn      d18, d23                   @ inverse alpha for background \n"
         "0:    @ loop\n"
-        "      vld1.16   {d0,d1}, [%[dest]]         @ load first pixels from framebuffer       \n"
+        "      vld1.16   {d0, d1}, [%[dest]]         @ load first pixels from framebuffer      \n"
         "      vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels       \n"
         "      vshrn.u16 d4, q0, #3                 @ unpack green                             \n"
         "      vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits          \n"
@@ -2282,7 +2282,7 @@ plain_over_565_8_pix_neon (uint32_t  colour,
         "      subs      %[count], %[count], #1     @ decrement/test loop counter              \n"
         "      vsri.16   q0, q1, #5                 @ pack green behind red                    \n"
         "      vsri.16   q0, q2, #11                @ pack blue into pixels                    \n"
-        "      vst1.16   {d0,d1}, [%[dest]]         @ store composited pixels                  \n"
+        "      vst1.16   {d0, d1}, [%[dest]]         @ store composited pixels                 \n"
         "      add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer             \n"
         "      bne 0b                               @ next please                              \n"
 
@@ -2426,8 +2426,8 @@ ARGB8_over_565_8_pix_neon (uint32_t *src,
     asm volatile (
         "0:    @ loop\n"
         "      pld   [%[src], %[src_stride]]         @ preload from next scanline      \n"
-        "      vld1.16   {d0,d1}, [%[dest]]         @ load pixels from framebuffer     \n"
-        "      vld4.8   {d20,d21,d22,d23},[%[src]]! @ load source image pixels         \n"
+        "      vld1.16   {d0, d1}, [%[dest]]         @ load pixels from framebuffer    \n"
+        "      vld4.8   {d20, d21, d22, d23},[%[src]]! @ load source image pixels              \n"
         "      vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits          \n"
         "      vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels       \n"
         "      vshrn.u16 d4, q0, #3                 @ unpack green                             \n"
@@ -2444,7 +2444,7 @@ ARGB8_over_565_8_pix_neon (uint32_t *src,
         "      vmlal.u8  q3, d23, d20               @ ...blue                                  \n"
         "      vsri.16   q1, q2, #5                 @ pack green behind red                    \n"
         "      vsri.16   q1, q3, #11                @ pack blue into pixels                    \n"
-        "      vst1.16   {d2,d3}, [%[dest]]!        @ store composited pixels                  \n"
+        "      vst1.16   {d2, d3}, [%[dest]]!        @ store composited pixels                 \n"
         "      bne 0b                               @ next please                              \n"
 
         /* Clobbered registers marked as input/outputs */