From 184cd80aa46dd9d8bd023d3b70a345330b72d7e7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=C3=B8ren=20Sandmann=20Pedersen?= Date: Mon, 20 Jul 2009 19:56:46 -0400 Subject: [PATCH] Some formatting changes to pixman-arm-neon.c --- pixman/pixman-arm-neon.c | 236 +++++++++++++++++++++++------------------------ 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index cea6f75..2f7b8a0 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -169,8 +169,8 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, w = width; #ifndef USE_GCC_INLINE_ASM - sval = vld1_u8 ((void*)src); - dval = vld1_u8 ((void*)dst); + sval = vld1_u8 (((void *))src); + dval = vld1_u8 (((void *))dst); keep_dst = dst; temp = vqadd_u8 (dval, sval); @@ -181,10 +181,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, while (w) { - sval = vld1_u8 ((void*)src); - dval = vld1_u8 ((void*)dst); + sval = vld1_u8 (((void *))src); + dval = vld1_u8 (((void *))dst); - vst1_u8 ((void*)keep_dst, temp); + vst1_u8 (((void *))keep_dst, temp); keep_dst = dst; temp = vqadd_u8 (dval, sval); @@ -194,7 +194,7 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, w -= 8; } - vst1_u8 ((void*)keep_dst, temp); + vst1_u8 (((void *))keep_dst, temp); #else asm volatile ( /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ @@ -249,9 +249,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, if (w & 4) { sval = vreinterpret_u8_u32 ( - vld1_lane_u32 ((void*)src, vreinterpret_u32_u8 (sval), 1)); + vld1_lane_u32 (((void *))src, vreinterpret_u32_u8 (sval), 1)); dval = vreinterpret_u8_u32 ( - vld1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (dval), 1)); + vld1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (dval), 1)); dst4 = dst; src += 4; @@ -261,9 +261,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, if (w & 2) { sval = vreinterpret_u8_u16 ( - vld1_lane_u16 ((void*)src, vreinterpret_u16_u8 (sval), 1)); + vld1_lane_u16 (((void *))src, vreinterpret_u16_u8 (sval), 1)); dval = vreinterpret_u8_u16 ( - vld1_lane_u16 ((void*)dst, vreinterpret_u16_u8 (dval), 1)); + vld1_lane_u16 (((void *))dst, vreinterpret_u16_u8 (dval), 1)); dst2 = dst; src += 2; @@ -282,10 +282,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, vst1_lane_u8 (dst, dval, 1); if (w & 2) - vst1_lane_u16 ((void*)dst2, vreinterpret_u16_u8 (dval), 1); + vst1_lane_u16 (((void *))dst2, vreinterpret_u16_u8 (dval), 1); if (w & 4) - vst1_lane_u32 ((void*)dst4, vreinterpret_u32_u8 (dval), 1); + vst1_lane_u32 (((void *))dst4, vreinterpret_u32_u8 (dval), 1); } } } @@ -328,8 +328,8 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, w = width; #ifndef USE_GCC_INLINE_ASM - sval = vld4_u8 ((void*)src); - dval = vld4_u8 ((void*)dst); + sval = vld4_u8 (((void *))src); + dval = vld4_u8 (((void *))dst); keep_dst = dst; temp = neon8mul (dval, vmvn_u8 (sval.val[3])); @@ -341,10 +341,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, while (w) { - sval = vld4_u8 ((void*)src); - dval = vld4_u8 ((void*)dst); + sval = vld4_u8 (((void *))src); + dval = vld4_u8 (((void *))dst); - vst4_u8 ((void*)keep_dst, temp); + vst4_u8 (((void *))keep_dst, temp); keep_dst = dst; temp = neon8mul (dval, vmvn_u8 (sval.val[3])); @@ -355,7 +355,7 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, w -= 8; } - vst4_u8 ((void*)keep_dst, temp); + vst4_u8 (((void *))keep_dst, temp); #else asm volatile ( /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ @@ -427,10 +427,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, uint8x8_t sval, dval; /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */ - sval = vreinterpret_u8_u32 (vld1_u32 ((void*)src)); - dval = vreinterpret_u8_u32 (vld1_u32 ((void*)dst)); + sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src)); + dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst)); dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_u8 ((void*)dst, vqadd_u8 (sval, dval)); + vst1_u8 (((void *))dst, vqadd_u8 (sval, dval)); src += 2; dst += 2; @@ -442,10 +442,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, uint8x8_t sval, dval; /* single 32-bit pixel in lane 0 */ - sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)src)); /* only interested in lane 0 */ - dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst)); /* only interested in lane 0 */ + sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src)); /* only interested in lane 0 */ + dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst)); /* only interested in lane 0 */ dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); + vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); } } } @@ -495,8 +495,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, #ifndef USE_GCC_INLINE_ASM uint8x8x4_t sval, dval, temp; - sval = vld4_u8 ((void*)src); - dval = vld4_u8 ((void*)dst); + sval = vld4_u8 (((void *))src); + dval = vld4_u8 (((void *))dst); keep_dst = dst; sval = neon8mul (sval, mask_alpha); @@ -509,10 +509,10 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, while (w) { - sval = vld4_u8 ((void*)src); - dval = vld4_u8 ((void*)dst); + sval = vld4_u8 (((void *))src); + dval = vld4_u8 (((void *))dst); - vst4_u8 ((void*)keep_dst, temp); + vst4_u8 (((void *))keep_dst, temp); keep_dst = dst; sval = neon8mul (sval, mask_alpha); @@ -523,7 +523,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, dst += 8; w -= 8; } - vst4_u8 ((void*)keep_dst, temp); + vst4_u8 (((void *))keep_dst, temp); #else asm volatile ( /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ @@ -612,8 +612,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, { uint8x8_t sval, dval; - sval = vreinterpret_u8_u32 (vld1_u32 ((void*)src)); - dval = vreinterpret_u8_u32 (vld1_u32 ((void*)dst)); + sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src)); + dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst)); /* sval * const alpha_mul */ sval = neon2mul (sval, mask_alpha); @@ -621,7 +621,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, /* dval * 255-(src alpha) */ dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_u8 ((void*)dst, vqadd_u8 (sval, dval)); + vst1_u8 (((void *))dst, vqadd_u8 (sval, dval)); src += 2; dst += 2; @@ -632,8 +632,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, { uint8x8_t sval, dval; - sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)src)); - dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst)); + sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src)); + dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst)); /* sval * const alpha_mul */ sval = neon2mul (sval, mask_alpha); @@ -641,7 +641,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, /* dval * 255-(src alpha) */ dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); + vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); } } } @@ -703,12 +703,12 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, uint16x8_t dval, temp; uint8x8x4_t sval8temp; - alpha = vld1_u8 ((void*)mask); - dval = vld1q_u16 ((void*)dst); + alpha = vld1_u8 (((void *))mask); + dval = vld1q_u16 (((void *))dst); keep_dst = dst; - sval8temp = neon8mul (sval8,alpha); - temp = pack0565 (neon8qadd (sval8temp,neon8mul (unpack0565 (dval),vmvn_u8 (sval8temp.val[3])))); + sval8temp = neon8mul (sval8, alpha); + temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); mask += (w & 7); dst += (w & 7); @@ -716,20 +716,20 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, while (w) { - dval = vld1q_u16 ((void*)dst); - alpha = vld1_u8 ((void*)mask); + dval = vld1q_u16 (((void *))dst); + alpha = vld1_u8 (((void *))mask); - vst1q_u16 ((void*)keep_dst,temp); + vst1q_u16 (((void *))keep_dst, temp); keep_dst = dst; - sval8temp = neon8mul (sval8,alpha); - temp = pack0565 (neon8qadd (sval8temp,neon8mul (unpack0565 (dval),vmvn_u8 (sval8temp.val[3])))); + sval8temp = neon8mul (sval8, alpha); + temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); mask+=8; dst+=8; w-=8; } - vst1q_u16 ((void*)keep_dst,temp); + vst1q_u16 (((void *))keep_dst, temp); #else asm volatile ( "vdup.32 d0, %[src]\n\t" @@ -842,35 +842,35 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, if (w&4) { - alpha = vreinterpret_u8_u32 (vld1_lane_u32 ((void*)mask,vreinterpret_u32_u8 (alpha),1)); - dval = vreinterpretq_u16_u64 (vld1q_lane_u64 ((void*)dst,vreinterpretq_u64_u16 (dval),1)); + alpha = vreinterpret_u8_u32 (vld1_lane_u32 (((void *))mask, vreinterpret_u32_u8 (alpha),1)); + dval = vreinterpretq_u16_u64 (vld1q_lane_u64 (((void *))dst, vreinterpretq_u64_u16 (dval),1)); dst4=dst; mask+=4; dst+=4; } if (w&2) { - alpha = vreinterpret_u8_u16 (vld1_lane_u16 ((void*)mask,vreinterpret_u16_u8 (alpha),1)); - dval = vreinterpretq_u16_u32 (vld1q_lane_u32 ((void*)dst,vreinterpretq_u32_u16 (dval),1)); + alpha = vreinterpret_u8_u16 (vld1_lane_u16 (((void *))mask, vreinterpret_u16_u8 (alpha),1)); + dval = vreinterpretq_u16_u32 (vld1q_lane_u32 (((void *))dst, vreinterpretq_u32_u16 (dval),1)); dst2=dst; mask+=2; dst+=2; } if (w&1) { - alpha = vld1_lane_u8 ((void*)mask,alpha,1); - dval = vld1q_lane_u16 ((void*)dst,dval,1); + alpha = vld1_lane_u8 (((void *))mask, alpha,1); + dval = vld1q_lane_u16 (((void *))dst, dval,1); } - sval8temp = neon8mul (sval8,alpha); - temp = pack0565 (neon8qadd (sval8temp,neon8mul (unpack0565 (dval),vmvn_u8 (sval8temp.val[3])))); + sval8temp = neon8mul (sval8, alpha); + temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); if (w&1) - vst1q_lane_u16 ((void*)dst,temp,1); + vst1q_lane_u16 (((void *))dst, temp,1); if (w&2) - vst1q_lane_u32 ((void*)dst2,vreinterpretq_u32_u16 (temp),1); + vst1q_lane_u32 (((void *))dst2, vreinterpretq_u32_u16 (temp),1); if (w&4) - vst1q_lane_u64 ((void*)dst4,vreinterpretq_u64_u16 (temp),1); + vst1q_lane_u64 (((void *))dst4, vreinterpretq_u64_u16 (temp),1); #else asm volatile ( "vdup.32 d0, %[src]\n\t" @@ -1040,8 +1040,8 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, uint8x8_t alpha; uint8x8x4_t dval, temp; - alpha = vld1_u8 ((void*)mask); - dval = vld4_u8 ((void*)dst); + alpha = vld1_u8 (((void *))mask); + dval = vld4_u8 (((void *))dst); keep_dst = dst; temp = neon8mul (sval8, alpha); @@ -1054,10 +1054,10 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, while (w) { - alpha = vld1_u8 ((void*)mask); - dval = vld4_u8 ((void*)dst); + alpha = vld1_u8 (((void *))mask); + dval = vld4_u8 (((void *))dst); - vst4_u8 ((void*)keep_dst, temp); + vst4_u8 (((void *))keep_dst, temp); keep_dst = dst; temp = neon8mul (sval8, alpha); @@ -1068,7 +1068,7 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, dst += 8; w -= 8; } - vst4_u8 ((void*)keep_dst, temp); + vst4_u8 (((void *))keep_dst, temp); #else asm volatile ( "vdup.32 d0, %[src]\n\t" @@ -1160,14 +1160,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, uint8x8_t dval, temp, res; alpha = vtbl1_u8 ( - vreinterpret_u8_u16 (vld1_dup_u16 ((void*)mask)), mask_selector); - dval = vld1_u8 ((void*)dst); + vreinterpret_u8_u16 (vld1_dup_u16 (((void *))mask)), mask_selector); + dval = vld1_u8 (((void *))dst); temp = neon2mul (sval2, alpha); res = vqadd_u8 ( temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector))); - vst1_u8 ((void*)dst, res); + vst1_u8 (((void *))dst, res); mask += 2; dst += 2; @@ -1178,14 +1178,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, { uint8x8_t dval, temp, res; - alpha = vtbl1_u8 (vld1_dup_u8 ((void*)mask), mask_selector); - dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst)); + alpha = vtbl1_u8 (vld1_dup_u8 (((void *))mask), mask_selector); + dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst)); temp = neon2mul (sval2, alpha); res = vqadd_u8 ( temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector))); - vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (res), 0); + vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (res), 0); } } } @@ -1377,17 +1377,17 @@ neon_composite_src_16_16 (pixman_implementation_t * impl, /* preload from next scanline */ " pld [%[src], %[src_stride], LSL #1] \n" " sub %[count], %[count], #64 \n" - " vld1.16 {d16,d17,d18,d19}, [%[src]]! \n" - " vld1.16 {d20,d21,d22,d23}, [%[src]]! \n" + " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.16 {d20, d21, d22, d23}, [%[src]]! \n" /* preload from next scanline */ " pld [%[src], %[src_stride], LSL #1] \n" - " vld1.16 {d24,d25,d26,d27}, [%[src]]! \n" - " vld1.16 {d28,d29,d30,d31}, [%[src]]! \n" + " vld1.16 {d24, d25, d26, d27}, [%[src]]! \n" + " vld1.16 {d28, d29, d30, d31}, [%[src]]! \n" " cmp %[count], #64 \n" - " vst1.16 {d16,d17,d18,d19}, [%[dst]]! \n" - " vst1.16 {d20,d21,d22,d23}, [%[dst]]! \n" - " vst1.16 {d24,d25,d26,d27}, [%[dst]]! \n" - " vst1.16 {d28,d29,d30,d31}, [%[dst]]! \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.16 {d20, d21, d22, d23}, [%[dst]]! \n" + " vst1.16 {d24, d25, d26, d27}, [%[dst]]! \n" + " vst1.16 {d28, d29, d30, d31}, [%[dst]]! \n" " bge 0b \n" " cmp %[count], #0 \n" " beq 7f @ aligned fastpath \n" @@ -1396,22 +1396,22 @@ neon_composite_src_16_16 (pixman_implementation_t * impl, " beq 2f @ skip oversized fragment \n" /* preload from next scanline */ " pld [%[src], %[src_stride], LSL #1] \n" - " vld1.16 {d16,d17,d18,d19}, [%[src]]! \n" - " vld1.16 {d20,d21,d22,d23}, [%[src]]! \n" - " vst1.16 {d16,d17,d18,d19}, [%[dst]]! \n" - " vst1.16 {d20,d21,d22,d23}, [%[dst]]! \n" + " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.16 {d20, d21, d22, d23}, [%[src]]! \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.16 {d20, d21, d22, d23}, [%[dst]]! \n" "2: @ two quadwords \n" " tst %[count], #16 \n" " beq 3f @ skip oversized fragment \n" /* preload from next scanline */ " pld [%[src], %[src_stride], LSL #1] \n" - " vld1.16 {d16,d17,d18,d19}, [%[src]]! \n" - " vst1.16 {d16,d17,d18,d19}, [%[dst]]! \n" + " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n" "3: @ one quadword \n" " tst %[count], #8 \n" " beq 4f @ skip oversized fragment \n" - " vld1.16 {d16,d17}, [%[src]]! \n" - " vst1.16 {d16,d17}, [%[dst]]! \n" + " vld1.16 {d16, d17}, [%[src]]! \n" + " vst1.16 {d16, d17}, [%[dst]]! \n" "4: @ one doubleword \n" " tst %[count], #4 \n" " beq 5f @ skip oversized fragment \n" @@ -1533,8 +1533,8 @@ neon_composite_src_24_16 (pixman_implementation_t * impl, "0: @ start with sixteen pixels at a time \n" " sub %[count], %[count], #16 \n" " pld [%[src], %[src_stride], lsl #2] @ preload from next scanline \n" - " vld4.8 {d0,d1,d2,d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n" - " vld4.8 {d4,d5,d6,d7}, [%[src]]! @ d7 is alpha and ignored, d6-4 are rgb. \n" + " vld4.8 {d0, d1, d2, d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n" + " vld4.8 {d4, d5, d6, d7}, [%[src]]! @ d7 is alpha and ignored, d6-4 are rgb. \n" " vshll.u8 q8, d2, #8 @ expand first red for repacking \n" " vshll.u8 q10, d1, #8 @ expand first green for repacking \n" " vshll.u8 q11, d0, #8 @ expand first blue for repacking \n" @@ -1546,20 +1546,20 @@ neon_composite_src_24_16 (pixman_implementation_t * impl, " vsri.u16 q9, q10, #5 @ insert second green after red \n" " vsri.u16 q9, q11, #11 @ insert second blue after green \n" " cmp %[count], #16 \n" - " vst1.16 {d16,d17,d18,d19}, [%[dst]]! @ store 16 pixels \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! @ store 16 pixels \n" " bge 0b \n" "1: @ end of main loop \n" " cmp %[count], #8 @ can we still do an 8-pixel block? \n" " blt 2f \n" " sub %[count], %[count], #8 \n" " pld [%[src], %[src_stride], lsl #2] @ preload from next scanline \n" - " vld4.8 {d0,d1,d2,d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n" + " vld4.8 {d0, d1, d2, d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n" " vshll.u8 q8, d2, #8 @ expand first red for repacking \n" " vshll.u8 q10, d1, #8 @ expand first green for repacking \n" " vshll.u8 q11, d0, #8 @ expand first blue for repacking \n" " vsri.u16 q8, q10, #5 @ insert first green after red \n" " vsri.u16 q8, q11, #11 @ insert first blue after green \n" - " vst1.16 {d16,d17}, [%[dst]]! @ store 8 pixels \n" + " vst1.16 {d16, d17}, [%[dst]]! @ store 8 pixels \n" "2: @ end \n" /* Clobbered input and working registers marked as input/outputs */ @@ -1848,7 +1848,7 @@ pixman_fill_neon (uint32_t *bits, /* The main block: Do 128-bit aligned writes */ "3:\n" "subs r5, r5, #1\n" - "vst1.64 {d0,d1}, [r4, :128]!\n" + "vst1.64 {d0, d1}, [r4, :128]!\n" "bne 3b\n" /* Handle the tailing bytes: Do 64, 32, 16 and 8-bit aligned writes as needed. @@ -1898,8 +1898,8 @@ pixman_fill_neon (uint32_t *bits, #define NEON_SCANLINE_BUFFER_PIXELS (1024) static inline void -neon_quadword_copy (void* dst, - void* src, +neon_quadword_copy ((void *) dst, + (void *) src, uint32_t count, /* of quadwords */ uint32_t trailer_count /* of bytes */) { @@ -1919,33 +1919,33 @@ neon_quadword_copy (void* dst, " blt 1f @ skip oversized fragments \n" "0: @ start with eight quadwords at a time \n" " sub %[count], %[count], #8 \n" - " vld1.8 {d16,d17,d18,d19}, [%[src]]! \n" - " vld1.8 {d20,d21,d22,d23}, [%[src]]! \n" - " vld1.8 {d24,d25,d26,d27}, [%[src]]! \n" - " vld1.8 {d28,d29,d30,d31}, [%[src]]! \n" + " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n" + " vld1.8 {d24, d25, d26, d27}, [%[src]]! \n" + " vld1.8 {d28, d29, d30, d31}, [%[src]]! \n" " cmp %[count], #8 \n" - " vst1.8 {d16,d17,d18,d19}, [%[dst]]! \n" - " vst1.8 {d20,d21,d22,d23}, [%[dst]]! \n" - " vst1.8 {d24,d25,d26,d27}, [%[dst]]! \n" - " vst1.8 {d28,d29,d30,d31}, [%[dst]]! \n" + " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n" + " vst1.8 {d24, d25, d26, d27}, [%[dst]]! \n" + " vst1.8 {d28, d29, d30, d31}, [%[dst]]! \n" " bge 0b \n" "1: @ four quadwords \n" " tst %[count], #4 \n" " beq 2f @ skip oversized fragment \n" - " vld1.8 {d16,d17,d18,d19}, [%[src]]! \n" - " vld1.8 {d20,d21,d22,d23}, [%[src]]! \n" - " vst1.8 {d16,d17,d18,d19}, [%[dst]]! \n" - " vst1.8 {d20,d21,d22,d23}, [%[dst]]! \n" + " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n" + " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n" "2: @ two quadwords \n" " tst %[count], #2 \n" " beq 3f @ skip oversized fragment \n" - " vld1.8 {d16,d17,d18,d19}, [%[src]]! \n" - " vst1.8 {d16,d17,d18,d19}, [%[dst]]! \n" + " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n" + " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n" "3: @ one quadword \n" " tst %[count], #1 \n" " beq 4f @ skip oversized fragment \n" - " vld1.8 {d16,d17}, [%[src]]! \n" - " vst1.8 {d16,d17}, [%[dst]]! \n" + " vld1.8 {d16, d17}, [%[src]]! \n" + " vst1.8 {d16, d17}, [%[dst]]! \n" "4: @ end \n" /* Clobbered input registers marked as input/outputs */ @@ -2048,9 +2048,9 @@ solid_over_565_8_pix_neon (uint32_t glyph_colour, #ifdef USE_GCC_INLINE_ASM asm volatile ( - " vld4.8 {d20[],d21[],d22[],d23[]}, [%[glyph_colour]] @ splat solid colour components \n" + " vld4.8 {d20[], d21[], d22[], d23[]}, [%[glyph_colour]] @ splat solid colour components \n" "0: @ loop \n" - " vld1.16 {d0,d1}, [%[dest]] @ load first pixels from framebuffer \n" + " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n" " vld1.8 {d17}, [%[in_mask]] @ load alpha mask of glyph \n" " vmull.u8 q9, d17, d23 @ apply glyph colour alpha to mask \n" " vshrn.u16 d17, q9, #8 @ reformat it to match original mask \n" @@ -2071,7 +2071,7 @@ solid_over_565_8_pix_neon (uint32_t glyph_colour, " add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait \n" " vsri.16 q1, q2, #5 @ pack green behind red \n" " vsri.16 q1, q3, #11 @ pack blue into pixels \n" - " vst1.16 {d2,d3}, [%[dest]] @ store composited pixels \n" + " vst1.16 {d2, d3}, [%[dest]] @ store composited pixels \n" " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n" " bne 0b @ next please \n" @@ -2260,13 +2260,13 @@ plain_over_565_8_pix_neon (uint32_t colour, * (solid colour without alpha mask) */ asm volatile ( - " vld4.8 {d20[],d21[],d22[],d23[]}, [%[colour]] @ solid colour load/splat \n" + " vld4.8 {d20[], d21[], d22[], d23[]}, [%[colour]] @ solid colour load/splat \n" " vmull.u8 q12, d23, d22 @ premultiply alpha red \n" " vmull.u8 q13, d23, d21 @ premultiply alpha green \n" " vmull.u8 q14, d23, d20 @ premultiply alpha blue \n" " vmvn d18, d23 @ inverse alpha for background \n" "0: @ loop\n" - " vld1.16 {d0,d1}, [%[dest]] @ load first pixels from framebuffer \n" + " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n" " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n" " vshrn.u16 d4, q0, #3 @ unpack green \n" " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n" @@ -2282,7 +2282,7 @@ plain_over_565_8_pix_neon (uint32_t colour, " subs %[count], %[count], #1 @ decrement/test loop counter \n" " vsri.16 q0, q1, #5 @ pack green behind red \n" " vsri.16 q0, q2, #11 @ pack blue into pixels \n" - " vst1.16 {d0,d1}, [%[dest]] @ store composited pixels \n" + " vst1.16 {d0, d1}, [%[dest]] @ store composited pixels \n" " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n" " bne 0b @ next please \n" @@ -2426,8 +2426,8 @@ ARGB8_over_565_8_pix_neon (uint32_t *src, asm volatile ( "0: @ loop\n" " pld [%[src], %[src_stride]] @ preload from next scanline \n" - " vld1.16 {d0,d1}, [%[dest]] @ load pixels from framebuffer \n" - " vld4.8 {d20,d21,d22,d23},[%[src]]! @ load source image pixels \n" + " vld1.16 {d0, d1}, [%[dest]] @ load pixels from framebuffer \n" + " vld4.8 {d20, d21, d22, d23},[%[src]]! @ load source image pixels \n" " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n" " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n" " vshrn.u16 d4, q0, #3 @ unpack green \n" @@ -2444,7 +2444,7 @@ ARGB8_over_565_8_pix_neon (uint32_t *src, " vmlal.u8 q3, d23, d20 @ ...blue \n" " vsri.16 q1, q2, #5 @ pack green behind red \n" " vsri.16 q1, q3, #11 @ pack blue into pixels \n" - " vst1.16 {d2,d3}, [%[dest]]! @ store composited pixels \n" + " vst1.16 {d2, d3}, [%[dest]]! @ store composited pixels \n" " bne 0b @ next please \n" /* Clobbered registers marked as input/outputs */ -- 2.7.4