ARM: NEON optimised vorbis_inverse_coupling

author Måns Rullgård <mans@mansr.com>

Fri, 14 Aug 2009 01:02:06 +0000 (01:02 +0000)

committer Måns Rullgård <mans@mansr.com>

Fri, 14 Aug 2009 01:02:06 +0000 (01:02 +0000)
author Måns Rullgård <mans@mansr.com>
Fri, 14 Aug 2009 01:02:06 +0000 (01:02 +0000)
committer Måns Rullgård <mans@mansr.com>
Fri, 14 Aug 2009 01:02:06 +0000 (01:02 +0000)
diff --git a/libavcodec/arm/dsputil_neon.c b/libavcodec/arm/dsputil_neon.c

index d7ee435..6731529 100644 (file)
--- a/libavcodec/arm/dsputil_neon.c
+++ b/libavcodec/arm/dsputil_neon.c
@@ -161,6 +161,8 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
  void ff_float_to_int16_neon(int16_t *, const float *, long);
  void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
  
+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
+
  void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
  {
      c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
@@ -272,4 +274,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
          c->float_to_int16 = ff_float_to_int16_neon;
          c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
      }
+
+    if (CONFIG_VORBIS_DECODER)
+        c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
  }
diff --git a/libavcodec/arm/dsputil_neon_s.S b/libavcodec/arm/dsputil_neon_s.S

index c816f08..71d09c6 100644 (file)
--- a/libavcodec/arm/dsputil_neon_s.S
+++ b/libavcodec/arm/dsputil_neon_s.S
@@ -19,6 +19,7 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
+#include "config.h"
  #include "asm.S"
  
          preserve8
@@ -795,3 +796,66 @@ NOVFP   ldr             lr,  [sp, #16]
          vst1.64         {d22,d23},[ip,:128], r5
          pop             {r4,r5,pc}
          .endfunc
+
+#if CONFIG_VORBIS_DECODER
+function ff_vorbis_inverse_coupling_neon, export=1
+        vmov.i32        q10, #1<<31
+        subs            r2,  r2,  #4
+        tst             r2,  #4
+        mov             r3,  r0
+        mov             r12, r1
+        beq             3f
+
+        vld1.32         {d24-d25},[r1,:128]!
+        vld1.32         {d22-d23},[r0,:128]!
+        vcle.s32        q8,  q12, #0
+        vand            q9,  q11, q10
+        veor            q12, q12, q9
+        vand            q2,  q12, q8
+        vbic            q3,  q12, q8
+        vadd.f32        q12, q11, q2
+        vsub.f32        q11, q11, q3
+1:      vld1.32         {d2-d3},  [r1,:128]!
+        vld1.32         {d0-d1},  [r0,:128]!
+        vcle.s32        q8,  q1,  #0
+        vand            q9,  q0,  q10
+        veor            q1,  q1,  q9
+        vst1.32         {d24-d25},[r3, :128]!
+        vst1.32         {d22-d23},[r12,:128]!
+        vand            q2,  q1,  q8
+        vbic            q3,  q1,  q8
+        vadd.f32        q1,  q0,  q2
+        vsub.f32        q0,  q0,  q3
+        subs            r2,  r2,  #8
+        ble             2f
+        vld1.32         {d24-d25},[r1,:128]!
+        vld1.32         {d22-d23},[r0,:128]!
+        vcle.s32        q8,  q12, #0
+        vand            q9,  q11, q10
+        veor            q12, q12, q9
+        vst1.32         {d2-d3},  [r3, :128]!
+        vst1.32         {d0-d1},  [r12,:128]!
+        vand            q2,  q12, q8
+        vbic            q3,  q12, q8
+        vadd.f32        q12, q11, q2
+        vsub.f32        q11, q11, q3
+        b               1b
+
+2:      vst1.32         {d2-d3},  [r3, :128]!
+        vst1.32         {d0-d1},  [r12,:128]!
+        bxlt            lr
+
+3:      vld1.32         {d2-d3},  [r1,:128]
+        vld1.32         {d0-d1},  [r0,:128]
+        vcle.s32        q8,  q1,  #0
+        vand            q9,  q0,  q10
+        veor            q1,  q1,  q9
+        vand            q2,  q1,  q8
+        vbic            q3,  q1,  q8
+        vadd.f32        q1,  q0,  q2
+        vsub.f32        q0,  q0,  q3
+        vst1.32         {d2-d3},  [r0,:128]!
+        vst1.32         {d0-d1},  [r1,:128]!
+        bx              lr
+        .endfunc
+#endif
author	Måns Rullgård <mans@mansr.com>
	Fri, 14 Aug 2009 01:02:06 +0000 (01:02 +0000)
committer	Måns Rullgård <mans@mansr.com>
	Fri, 14 Aug 2009 01:02:06 +0000 (01:02 +0000)
libavcodec/arm/dsputil_neon.c		patch \| blob \| history
libavcodec/arm/dsputil_neon_s.S		patch \| blob \| history