ARM: add some PLD in NEON IDCT
authorMåns Rullgård <mans@mansr.com>
Thu, 28 May 2009 17:19:28 +0000 (17:19 +0000)
committerMåns Rullgård <mans@mansr.com>
Thu, 28 May 2009 17:19:28 +0000 (17:19 +0000)
Originally committed as revision 18972 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/arm/simple_idct_neon.S

index e7099a2..0882481 100644 (file)
         .text
         .align 6
 
+function idct_row4_pld_neon
+        pld             [r0]
+        add             r3,  r0,  r1,  lsl #2
+        pld             [r0, r1]
+        pld             [r0, r1, lsl #1]
+        pld             [r3, -r1]
+        pld             [r3]
+        pld             [r3, r1]
+        add             r3,  r3,  r1,  lsl #1
+        pld             [r3]
+        pld             [r3, r1]
+        .endfunc
+
 function idct_row4_neon
         vmov.i32        q15, #(1<<(ROW_SHIFT-1))
         vld1.64         {d2-d5},  [r2,:128]!
@@ -252,7 +265,7 @@ idct_coeff_neon:
 function ff_simple_idct_put_neon, export=1
         idct_start      r2
 
-        bl              idct_row4_neon
+        bl              idct_row4_pld_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon
@@ -307,7 +320,7 @@ function idct_col4_add8_neon
 function ff_simple_idct_add_neon, export=1
         idct_start      r2
 
-        bl              idct_row4_neon
+        bl              idct_row4_pld_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon