ARMv6 optimised get_pixels
authorMåns Rullgård <mans@mansr.com>
Tue, 9 Feb 2010 16:13:38 +0000 (16:13 +0000)
committerMåns Rullgård <mans@mansr.com>
Tue, 9 Feb 2010 16:13:38 +0000 (16:13 +0000)
Originally committed as revision 21701 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/arm/dsputil_armv6.S
libavcodec/arm/dsputil_init_armv6.c

index 2efa3d5..4c7f440 100644 (file)
@@ -287,6 +287,28 @@ function ff_add_pixels_clamped_armv6, export=1
         pop             {r4-r8,pc}
 .endfunc
 
+function ff_get_pixels_armv6, export=1
+        pld             [r1, r2]
+        push            {r4-r8, lr}
+        mov             lr,  #8
+1:
+        ldrd            r4,  r5,  [r1],  r2
+        subs            lr,  lr,  #1
+        uxtb16          r6,  r4
+        uxtb16          r4,  r4,  ror #8
+        uxtb16          r12, r5
+        uxtb16          r8,  r5,  ror #8
+        pld             [r1, r2]
+        pkhbt           r5,  r6,  r4,  lsl #16
+        pkhtb           r6,  r4,  r6,  asr #16
+        pkhbt           r7,  r12, r8,  lsl #16
+        pkhtb           r12, r8,  r12, asr #16
+        stm             r0!, {r5,r6,r7,r12}
+        bgt             1b
+
+        pop             {r4-r8, pc}
+.endfunc
+
 function ff_pix_abs16_armv6, export=1
         ldr             r0,  [sp]
         push            {r4-r9, lr}
index 972e743..1e59943 100644 (file)
@@ -50,6 +50,8 @@ void ff_add_pixels_clamped_armv6(const DCTELEM *block,
                                  uint8_t *restrict pixels,
                                  int line_size);
 
+void ff_get_pixels_armv6(DCTELEM *block, const uint8_t *pixels, int stride);
+
 int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
                        int line_size, int h);
 int ff_pix_abs16_x2_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
@@ -92,6 +94,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
 
     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
+    c->get_pixels = ff_get_pixels_armv6;
 
     c->pix_abs[0][0] = ff_pix_abs16_armv6;
     c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;