ARM: libavresample: NEON optimised stereo fltp to s16 conversion
authorMans Rullgard <mans@mansr.com>
Mon, 10 Sep 2012 14:47:00 +0000 (15:47 +0100)
committerMans Rullgard <mans@mansr.com>
Thu, 13 Sep 2012 18:15:43 +0000 (19:15 +0100)
Signed-off-by: Mans Rullgard <mans@mansr.com>
libavresample/arm/audio_convert_init.c
libavresample/arm/audio_convert_neon.S

index 7c46a80c5cd049d42a688ee50a3b08a1e4a21254..647111df3ae751ad2db9f50e3078b18fac387dde 100644 (file)
@@ -26,6 +26,8 @@
 #include "libavresample/audio_convert.h"
 
 void ff_conv_flt_to_s16_neon(int16_t *dst, const float *src, int len);
+void ff_conv_fltp_to_s16_2ch_neon(int16_t *dst, float *const *src,
+                                  int len, int channels);
 
 av_cold void ff_audio_convert_init_arm(AudioConvert *ac)
 {
@@ -35,5 +37,8 @@ av_cold void ff_audio_convert_init_arm(AudioConvert *ac)
         ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
                                   0, 16, 8, "NEON",
                                   ff_conv_flt_to_s16_neon);
+        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
+                                  2, 16, 8, "NEON",
+                                  ff_conv_fltp_to_s16_2ch_neon);
     }
 }
index e82f8c633ff9cc814eed230e1fdba1c5a37e54f4..97e0434f0933bce121eb9351a9d1df7eac5f0201 100644 (file)
@@ -64,3 +64,67 @@ function ff_conv_flt_to_s16_neon, export=1
         vst1.16         {q2},     [r0,:128]!
         bx              lr
 endfunc
+
+function ff_conv_fltp_to_s16_2ch_neon, export=1
+        ldm             r1,  {r1, r3}
+        subs            r2,  r2,  #8
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q8,  q0,  #31
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q9,  q1,  #31
+        vld1.32         {q10},    [r3,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vld1.32         {q11},    [r3,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        beq             3f
+        bics            r12, r2,  #15
+        beq             2f
+1:      subs            r12, r12, #16
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vsri.32         q10, q8,  #16
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vld1.32         {q12},    [r3,:128]!
+        vcvt.s32.f32    q12, q12, #31
+        vld1.32         {q13},    [r3,:128]!
+        vsri.32         q11, q9,  #16
+        vst1.16         {q10},    [r0,:128]!
+        vcvt.s32.f32    q13, q13, #31
+        vst1.16         {q11},    [r0,:128]!
+        vsri.32         q12, q0,  #16
+        vld1.32         {q8},     [r1,:128]!
+        vsri.32         q13, q1,  #16
+        vst1.16         {q12},    [r0,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vld1.32         {q9},     [r1,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vld1.32         {q10},    [r3,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vld1.32         {q11},    [r3,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        vst1.16         {q13},    [r0,:128]!
+        bne             1b
+        ands            r2,  r2,  #15
+        beq             3f
+2:      vsri.32         q10, q8,  #16
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vld1.32         {q12},    [r3,:128]!
+        vcvt.s32.f32    q12, q12, #31
+        vsri.32         q11, q9,  #16
+        vld1.32         {q13},    [r3,:128]!
+        vcvt.s32.f32    q13, q13, #31
+        vst1.16         {q10},    [r0,:128]!
+        vsri.32         q12, q0,  #16
+        vst1.16         {q11},    [r0,:128]!
+        vsri.32         q13, q1,  #16
+        vst1.16         {q12-q13},[r0,:128]!
+        bx              lr
+3:      vsri.32         q10, q8,  #16
+        vsri.32         q11, q9,  #16
+        vst1.16         {q10-q11},[r0,:128]!
+        bx              lr
+endfunc