ARM: NEON optimization for bilinear scaled 'src_0565_0565'
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>
Wed, 9 Mar 2011 11:27:41 +0000 (13:27 +0200)
committerSiarhei Siamashka <siarhei.siamashka@nokia.com>
Sat, 12 Mar 2011 19:30:18 +0000 (21:30 +0200)
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
 Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
  before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
  after:  op=1, src=10020565, dst=10020565, speed=32.29 MPix/s

pixman/pixman-arm-neon-asm.S
pixman/pixman-arm-neon.c

index 9245db9..2b6875b 100644 (file)
@@ -2631,3 +2631,6 @@ generate_bilinear_scanline_func \
 
 generate_bilinear_scanline_func \
     pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
index 18e26eb..0a10ca1 100644 (file)
@@ -133,6 +133,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
                                          uint32_t, uint16_t)
 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
                                          uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
+                                         uint16_t, uint16_t)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -358,6 +360,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
 
     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
 
     { PIXMAN_OP_NONE },
 };