Fixed-point MDCT with 32-bit unscaled output

author Mans Rullgard <mans@mansr.com>

Mon, 21 Mar 2011 17:52:34 +0000 (17:52 +0000)

committer Mans Rullgard <mans@mansr.com>

Sat, 2 Apr 2011 20:06:07 +0000 (21:06 +0100)
author Mans Rullgard <mans@mansr.com>
Mon, 21 Mar 2011 17:52:34 +0000 (17:52 +0000)
committer Mans Rullgard <mans@mansr.com>
Sat, 2 Apr 2011 20:06:07 +0000 (21:06 +0100)
diff --git a/libavcodec/fft-internal.h b/libavcodec/fft-internal.h

index 1f240db..d30571b 100644 (file)
--- a/libavcodec/fft-internal.h
+++ b/libavcodec/fft-internal.h
@@ -39,6 +39,8 @@
  #include "libavutil/intmath.h"
  #include "mathops.h"
  
+void ff_mdct_calcw_c(FFTContext *s, FFTDouble *output, const FFTSample *input);
+
  #define SCALE_FLOAT(a, bits) lrint((a) * (double)(1 << (bits)))
  #define FIX15(a) av_clip(SCALE_FLOAT(a, 15), -32767, 32767)
  
@@ -49,11 +51,17 @@
          y = (a + b) >> 1;                       \
      } while (0)
  
-#define CMUL(dre, dim, are, aim, bre, bim) do {                 \
-        (dre) = (MUL16(are, bre) - MUL16(aim, bim)) >> 15;      \
-        (dim) = (MUL16(are, bim) + MUL16(aim, bre)) >> 15;      \
+#define CMULS(dre, dim, are, aim, bre, bim, sh) do {            \
+        (dre) = (MUL16(are, bre) - MUL16(aim, bim)) >> sh;      \
+        (dim) = (MUL16(are, bim) + MUL16(aim, bre)) >> sh;      \
      } while (0)
  
+#define CMUL(dre, dim, are, aim, bre, bim)      \
+    CMULS(dre, dim, are, aim, bre, bim, 15)
+
+#define CMULL(dre, dim, are, aim, bre, bim)     \
+    CMULS(dre, dim, are, aim, bre, bim, 0)
+
  #endif /* CONFIG_FFT_FLOAT */
  
  #define ff_imdct_calc_c FFT_NAME(ff_imdct_calc_c)
diff --git a/libavcodec/fft.c b/libavcodec/fft.c

index ff5d34b..19da483 100644 (file)
--- a/libavcodec/fft.c
+++ b/libavcodec/fft.c
@@ -123,6 +123,9 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
      if (ARCH_ARM)     ff_fft_init_arm(s);
      if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
      if (HAVE_MMX)     ff_fft_init_mmx(s);
+    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
+#else
+    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
  #endif
  
      for(j=4; j<=nbits; j++) {
diff --git a/libavcodec/fft.h b/libavcodec/fft.h

index 2f13e5f..0f324cf 100644 (file)
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -53,6 +53,10 @@ typedef struct FFTContext FFTContext;
  
  #endif /* CONFIG_FFT_FLOAT */
  
+typedef struct FFTDComplex {
+    FFTDouble re, im;
+} FFTDComplex;
+
  /* FFT computation */
  
  struct FFTContext {
@@ -77,6 +81,7 @@ struct FFTContext {
      void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
      void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
      void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
      int fft_permutation;
  #define FF_FFT_PERM_DEFAULT   0
  #define FF_FFT_PERM_SWAP_LSBS 1
diff --git a/libavcodec/mdct_fixed.c b/libavcodec/mdct_fixed.c

index 19c8039..94527f9 100644 (file)
--- a/libavcodec/mdct_fixed.c
+++ b/libavcodec/mdct_fixed.c
@@ -18,3 +18,47 @@
  
  #define CONFIG_FFT_FLOAT 0
  #include "mdct.c"
+
+/* same as ff_mdct_calcw_c with double-width unscaled output */
+void ff_mdct_calcw_c(FFTContext *s, FFTDouble *out, const FFTSample *input)
+{
+    int i, j, n, n8, n4, n2, n3;
+    FFTDouble re, im;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = s->tmp_buf;
+    FFTDComplex *o = (FFTDComplex *)out;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
+        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
+        j = revtab[i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+
+        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
+        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
+        j = revtab[n8 + i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
+    }
+
+    s->fft_calc(s, x);
+
+    /* post rotation */
+    for(i=0;i<n8;i++) {
+        FFTDouble r0, i0, r1, i1;
+        CMULL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
+        CMULL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
+        o[n8-i-1].re = r0;
+        o[n8-i-1].im = i0;
+        o[n8+i  ].re = r1;
+        o[n8+i  ].im = i1;
+    }
+}
author	Mans Rullgard <mans@mansr.com>
	Mon, 21 Mar 2011 17:52:34 +0000 (17:52 +0000)
committer	Mans Rullgard <mans@mansr.com>
	Sat, 2 Apr 2011 20:06:07 +0000 (21:06 +0100)
libavcodec/fft-internal.h		patch \| blob \| history
libavcodec/fft.c		patch \| blob \| history
libavcodec/fft.h		patch \| blob \| history
libavcodec/mdct_fixed.c		patch \| blob \| history