ARM: optimization for scaled src_0565_0565 with nearest filter

author Siarhei Siamashka <siarhei.siamashka@nokia.com>

Sun, 3 Oct 2010 22:56:59 +0000 (01:56 +0300)

committer Siarhei Siamashka <siarhei.siamashka@nokia.com>

Wed, 10 Nov 2010 15:26:49 +0000 (17:26 +0200)
author Siarhei Siamashka <siarhei.siamashka@nokia.com>
Sun, 3 Oct 2010 22:56:59 +0000 (01:56 +0300)
committer Siarhei Siamashka <siarhei.siamashka@nokia.com>
Wed, 10 Nov 2010 15:26:49 +0000 (17:26 +0200)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S

index a3d2d40..7567700 100644 (file)
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -1,5 +1,6 @@
  /*
   * Copyright © 2008 Mozilla Corporation
+ * Copyright © 2010 Nokia Corporation
   *
   * Permission to use, copy, modify, distribute, and sell this software and its
   * documentation for any purpose is hereby granted without fee, provided that
@@ -328,3 +329,72 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
         pop     {r4, r5, r6, r7, r8, r9, r10, r11}
         bx      lr
  .endfunc
+
+/*
+ * Note: This function is only using armv4t instructions (not even armv6),
+ *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+ *       be split into a few variants, tuned for each microarchitecture.
+ *
+ * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+ * have efficient write combining), it needs to be changed to use 16-byte
+ * aligned writes using STM instruction.
+ */
+pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+       W       .req    r0
+       DST     .req    r1
+       SRC     .req    r2
+       VX      .req    r3
+       UNIT_X  .req    ip
+       TMP1    .req    r4
+       TMP2    .req    r5
+       VXMASK  .req    r6
+
+       ldr     UNIT_X, [sp]
+       push    {r4, r5, r6, r7}
+       mvn     VXMASK, #1
+
+       /* define helper macro */
+       .macro  scale_2_pixels
+               ldrh    TMP1, [SRC, TMP1]
+               and     TMP2, VXMASK, VX, lsr #15
+               add     VX, VX, UNIT_X
+               strh    TMP1, [DST], #2
+
+               ldrh    TMP2, [SRC, TMP2]
+               and     TMP1, VXMASK, VX, lsr #15
+               add     VX, VX, UNIT_X
+               strh    TMP2, [DST], #2
+       .endm
+
+       /* now do the scaling */
+       and     TMP1, VXMASK, VX, lsr #15
+       add     VX, VX, UNIT_X
+       subs    W, #4
+       blt     2f
+1: /* main loop, process 4 pixels per iteration */
+       scale_2_pixels
+       scale_2_pixels
+       subs    W, W, #4
+       bge     1b
+2:
+       tst     W, #2
+       beq     2f
+       scale_2_pixels
+2:
+       tst     W, #1
+       ldrneh  TMP1, [SRC, TMP1]
+       strneh  TMP1, [DST], #2
+       /* cleanup helper macro */
+       .purgem scale_2_pixels
+       .unreq  DST
+       .unreq  SRC
+       .unreq  W
+       .unreq  VX
+       .unreq  UNIT_X
+       .unreq  TMP1
+       .unreq  TMP2
+       .unreq  VXMASK
+       /* return */
+       pop     {r4, r5, r6, r7}
+       bx      lr
+.endfunc
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c

index d466a31..3b05007 100644 (file)
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -29,6 +29,7 @@
  
  #include "pixman-private.h"
  #include "pixman-arm-common.h"
+#include "pixman-fast-path.h"
  
  #if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
  
@@ -386,6 +387,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
  PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
                                        uint8_t, 1, uint32_t, 1)
  
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+                                        uint16_t, uint16_t)
+
  static const pixman_fast_path_t arm_simd_fast_paths[] =
  {
      PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
@@ -404,6 +408,9 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
      PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
      PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
  
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+
      { PIXMAN_OP_NONE },
  };
author	Siarhei Siamashka <siarhei.siamashka@nokia.com>
	Sun, 3 Oct 2010 22:56:59 +0000 (01:56 +0300)
committer	Siarhei Siamashka <siarhei.siamashka@nokia.com>
	Wed, 10 Nov 2010 15:26:49 +0000 (17:26 +0200)
pixman/pixman-arm-simd-asm.S		patch \| blob \| history
pixman/pixman-arm-simd.c		patch \| blob \| history