From: Siarhei Siamashka Date: Wed, 16 Mar 2011 14:33:41 +0000 (+0200) Subject: ARM: support for software pipelining in bilinear macros X-Git-Tag: 1.0_branch~262 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0f7be9f72ef6bfe2555b7f2cc29297c4f4762740;p=profile%2Fivi%2Fpixman.git ARM: support for software pipelining in bilinear macros Now it's possible to override the main loop of bilinear scaling code with optimized pipelined implementation. --- diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index a331f4d..e235511 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -2617,12 +2617,32 @@ fname: bilinear_store_&dst_fmt 4, q2, q3 .endm +.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head +.else + bilinear_interpolate_four_pixels src_fmt, dst_fmt +.endif +.endm + +.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail +.endif +.endm + +.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head +.else + bilinear_interpolate_four_pixels src_fmt, dst_fmt +.endif +.endm + /* * Main template macro for generating NEON optimized bilinear scanline * functions. * - * TODO: use software pipelining in order to improve performance - * * Bilinear scanline scaler macro template uses the following arguments: * fname - name of the function to generate * src_fmt - source color format (8888 or 0565) @@ -2692,10 +2712,15 @@ pixman_asm_function fname subs WIDTH, WIDTH, #4 blt 1f mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) + bilinear_interpolate_four_pixels_head src_fmt, dst_fmt + subs WIDTH, WIDTH, #4 + blt 5f 0: - bilinear_interpolate_four_pixels src_fmt, dst_fmt + bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt subs WIDTH, WIDTH, #4 bge 0b +5: + bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt 1: /* handle the remaining trailing pixels */