Properly save neon registers.
authorChristian Duvivier <cduvivier@google.com>
Thu, 26 Sep 2013 23:01:37 +0000 (16:01 -0700)
committerChristian Duvivier <cduvivier@google.com>
Fri, 27 Sep 2013 21:25:33 +0000 (14:25 -0700)
Replace current code which corrupts the stack by
duplicate of vp8 code to save and restore neon
registers.

Change-Id: Ibb0220b9aa985d10533befa0a455ebce57a2891a

vp9/common/arm/neon/vp9_idct16x16_neon.c
vp9/common/arm/neon/vp9_save_reg_neon.asm [new file with mode: 0644]
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
vp9/vp9_common.mk

index fddf902..fb7b5cd 100644 (file)
@@ -29,17 +29,19 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
                                                int16_t skip_adding,
                                                uint8_t *dest,
                                                int dest_stride);
-extern void save_neon_registers();
-extern void restore_neon_registers();
 
+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
+extern void vp9_push_neon(int64_t *store);
+extern void vp9_pop_neon(int64_t *store);
 
 void vp9_short_idct16x16_add_neon(int16_t *input,
                                   uint8_t *dest, int dest_stride) {
+  int64_t store_reg[8];
   int16_t pass1_output[16*16] = {0};
   int16_t row_idct_output[16*16] = {0};
 
   // save d8-d15 register values.
-  save_neon_registers();
+  vp9_push_neon(store_reg);
 
   /* Parallel idct on the upper 8 rows */
   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@@ -102,18 +104,19 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
                                      dest_stride);
 
   // restore d8-d15 register values.
-  restore_neon_registers();
+  vp9_pop_neon(store_reg);
 
   return;
 }
 
 void vp9_short_idct16x16_10_add_neon(int16_t *input,
                                   uint8_t *dest, int dest_stride) {
+  int64_t store_reg[8];
   int16_t pass1_output[16*16] = {0};
   int16_t row_idct_output[16*16] = {0};
 
   // save d8-d15 register values.
-  save_neon_registers();
+  vp9_push_neon(store_reg);
 
   /* Parallel idct on the upper 8 rows */
   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@@ -163,7 +166,7 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
                                      dest_stride);
 
   // restore d8-d15 register values.
-  restore_neon_registers();
+  vp9_pop_neon(store_reg);
 
   return;
 }
diff --git a/vp9/common/arm/neon/vp9_save_reg_neon.asm b/vp9/common/arm/neon/vp9_save_reg_neon.asm
new file mode 100644 (file)
index 0000000..71c3e70
--- /dev/null
@@ -0,0 +1,36 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp9_push_neon|
+    EXPORT  |vp9_pop_neon|
+
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+|vp9_push_neon| PROC
+    vst1.i64            {d8, d9, d10, d11}, [r0]!
+    vst1.i64            {d12, d13, d14, d15}, [r0]!
+    bx              lr
+
+    ENDP
+
+|vp9_pop_neon| PROC
+    vld1.i64            {d8, d9, d10, d11}, [r0]!
+    vld1.i64            {d12, d13, d14, d15}, [r0]!
+    bx              lr
+
+    ENDP
+
+    END
+
index 856022b..df2a052 100644 (file)
@@ -12,8 +12,6 @@
     EXPORT  |vp9_short_idct16x16_add_neon_pass2|
     EXPORT  |vp9_short_idct16x16_10_add_neon_pass1|
     EXPORT  |vp9_short_idct16x16_10_add_neon_pass2|
-    EXPORT  |save_neon_registers|
-    EXPORT  |restore_neon_registers|
     ARM
     REQUIRE8
     PRESERVE8
@@ -1178,14 +1176,4 @@ end_idct10_16x16_pass2
     pop             {r3-r9}
     bx              lr
     ENDP  ; |vp9_short_idct16x16_10_add_neon_pass2|
-;void |save_neon_registers|()
-|save_neon_registers| PROC
-    vpush           {d8-d15}
-    bx              lr
-    ENDP  ; |save_registers|
-;void |restore_neon_registers|()
-|restore_neon_registers| PROC
-    vpop           {d8-d15}
-    bx             lr
-    ENDP  ; |restore_registers|
     END
index 6778474..68403c3 100644 (file)
@@ -108,5 +108,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
 
 $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))