Incremental updates around.
Monty
svn path=/trunk/vorbis/; revision=219
********************************************************************
function: libvorbis codec headers
- last mod: $Id: codec.h,v 1.2 1999/12/30 07:26:29 xiphmont Exp $
+ last mod: $Id: codec.h,v 1.3 1999/12/31 12:35:10 xiphmont Exp $
********************************************************************/
} envelope_lookup;
typedef struct lpclook{
- /* encode lookups */
- int *uscale;
- double *escale;
+ /* en/decode lookups */
+ int *linearmap;
+ double *barknorm;
drft_lookup fft;
- /* en/decode lookups */
- long *dscale;
- int *iscale;
- double *ifrac;
- double *norm;
int n;
int ln;
int m;
int blocksize[2];
int floororder[2];
- int flooroctaves[2];
+ int floormap[2];
int floorch;
--- /dev/null
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
+ * PLEASE READ THESE TERMS DISTRIBUTING. *
+ * *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
+ * http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: predefined encoding modes
+ last mod: $Id: modes.h,v 1.1 1999/12/31 12:35:11 xiphmont Exp $
+
+ ********************************************************************/
+
+#ifndef _V_MODES_H_
+#define _V_MODES_H_
+
+#include <stdio.h>
+#include "codec.h"
+
+double threshhold_points[THRESH_POINTS]=
+/* 0Hz 24kHz
+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 */
+{0.,.01,.02,.03,.04,.06,.08,.1,.15,.2,.25,.3,.34,.4,.45,.5,.6,.7,.8,1.};
+
+vorbis_info predef_modes[]={
+ /* CD quality stereo, no channel coupling */
+
+ /* channels, sample rate, upperkbps, nominalkbps, lowerkbps */
+ { 2, 44100, 0,0,0,
+ /* dummy, dummy, dummy, dummy */
+ 0, NULL, 0, NULL,
+ /* smallblock, largeblock, LPC order (small, large) */
+ {256, 2048}, {8,22},
+ /* {bark mapping size}, spectral channels */
+ {32,128}, 2,
+ /* thresh sample period, preecho clamp trigger threshhold, range, dummy */
+ 64, 4, 2, NULL,
+ /* noise masking curve dB attenuation levels [20] */
+ /*{-12,-12,-18,-18,-18,-18,-18,-18,-18,-12,
+ -8,-4,0,0,0,0,0,0,0,0},*/
+ {-100,-100,-100,-100,-100,-100,-100,-24,-24,-24,
+ -24,-24,-24,-24,-24,-24,-24,-24,-24,-24},
+ /* noise masking scale biases */
+ .95,1.01,.01,
+ /* tone masking curve dB attenuation levels [20] */
+ {-20,-20,-20,-20,-20,-20,-20,-20,-20,-20,
+ -20,-20,-20,-20,-20,-20,-20,-20,-20,-20},
+ /* tone masking rolloff settings (dB per octave), octave bias */
+ 90,60,.001,
+ NULL,NULL,NULL},
+
+};
+
+#define predef_mode_max 0
+
+#endif
# vorbis makefile configured for use with gcc on any platform
-# $Id: Makefile.in,v 1.17 1999/12/30 07:26:31 xiphmont Exp $
+# $Id: Makefile.in,v 1.18 1999/12/31 12:35:11 xiphmont Exp $
###############################################################################
# #
HFILES = ../include/codec.h ../include/vorbisfile.h \
bitwise.h envelope.h lpc.h lsp.h \
- psy.h smallft.h window.h xlogmap.h os.h mdct.h
+ psy.h smallft.h window.h barkmel.h os.h mdct.h
LFILES = framing.o mdct.o smallft.o block.o envelope.o window.o\
lsp.o lpc.o analysis.o synthesis.o psy.o info.o bitwise.o\
spectrum.o vorbisfile.o
* *
********************************************************************
- function: linear x scale -> log x scale mappings (with bias)
- last mod: $Id: xlogmap.h,v 1.2 1999/12/30 07:26:57 xiphmont Exp $
+ function: linear scale -> bark and mel scales
+ last mod: $Id: barkmel.h,v 1.1 1999/12/31 12:35:12 xiphmont Exp $
********************************************************************/
-#ifndef _V_XLOGMAP_H_
-#define _V_XLOGMAP_H_
+#ifndef _V_BARKMEL_H_
+#define _V_BARKMEL_H_
#include <math.h>
-/*
-Bias = log_2( n / ( (2^octaves) - 1))
-log_x = log_2( linear_x + 2^Bias ) - Bias
-linear_x = 2^(Bias+log_x)-2^Bias;
-*/
+/* The bark scale equations are approximations, since the original
+ table was somewhat hand rolled. They're chosen to have the best
+ possible fit to the rolled tables, thus their somewhat odd
+ appearence (these are more accurate and over a longer range than
+ the oft-quoted bark equations found in the texts I have). The
+ approximations are valid from 0 - 30kHz (nyquist) or so.
-#define LOG_BIAS(n,o) (log((n)/(pow(2.,(o))-1))/log(2.))
-#define LOG_X(x,b) (log((x)+pow(2.,(b)))/log(2.)-(b))
-#define LINEAR_X(x,b) (pow(2.,((b)+(x)))-pow(2.,(b)))
+ all f in Hz, z in Bark */
+
+#define fBARK(f) (13.1*atan(.00074*(f))+2.24*atan((f)*(f)*1.85e-8)+1e-4*(f))
+#define iBARK(z) (102.*(z)-2.*pow(z,2.)+.4*pow(z,3)+pow(1.46,z)-1.)
+#define fMEL(f) (1000.*(log(1.+(f)*.001)*1.442695))
#endif
********************************************************************
function: PCM data vector blocking, windowing and dis/reassembly
- last mod: $Id: block.c,v 1.18 1999/12/30 07:26:35 xiphmont Exp $
+ last mod: $Id: block.c,v 1.19 1999/12/31 12:35:12 xiphmont Exp $
Handle windowing, overlap-add, etc of the PCM vectors. This is made
more amusing by Vorbis' current two allowed block sizes.
/* Yes, wasteful to have four lookups. This will get collapsed once
things crystallize */
- lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->blocksize[0]/2,
- vi->floororder[0],vi->flooroctaves[0],1);
- lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->blocksize[1]/2,
- vi->floororder[0],vi->flooroctaves[0],1);
+ lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->floormap[0],vi->rate,
+ vi->floororder[0]);
+ lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->floormap[1],vi->rate,
+ vi->floororder[1]);
/*lpc_init(&v->vbal[0],vi->blocksize[0]/2,256,
vi->balanceorder,vi->balanceoctaves,1);
/* Yes, wasteful to have four lookups. This will get collapsed once
things crystallize */
- lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->blocksize[0]/2,
- vi->floororder[0],vi->flooroctaves[0],0);
- lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->blocksize[1]/4,
- vi->floororder[1],vi->flooroctaves[1],0);
+ lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->floormap[0],vi->rate,
+ vi->floororder[0]);
+ lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->floormap[1],vi->rate,
+ vi->floororder[1]);
/*lpc_init(&v->vbal[0],vi->blocksize[0]/2,256,
vi->balanceorder,vi->balanceoctaves,0);
lpc_init(&v->vbal[1],vi->blocksize[1]/2,256,
********************************************************************
function: maintain the info structure, info <-> header packets
- last mod: $Id: info.c,v 1.11 1999/12/30 07:26:39 xiphmont Exp $
+ last mod: $Id: info.c,v 1.12 1999/12/31 12:35:13 xiphmont Exp $
********************************************************************/
memcpy(vi,&(predef_modes[mode]),sizeof(vorbis_info));
vi->threshhold_points=threshhold_points;
vi->user_comments=calloc(1,sizeof(char *));
- vi->vendor=strdup("Xiphophorus libVorbis I 19991104");
+ vi->vendor=strdup("Xiphophorus libVorbis I 19991230");
return(0);
}
vi->floororder[0]=_oggpack_read(&opb,8);
vi->floororder[1]=_oggpack_read(&opb,8);
- vi->flooroctaves[0]=_oggpack_read(&opb,8);
- vi->flooroctaves[1]=_oggpack_read(&opb,8);
+ vi->floormap[0]=_oggpack_read(&opb,16);
+ vi->floormap[1]=_oggpack_read(&opb,16);
vi->floorch=_oggpack_read(&opb,8);
if(vi->rate<1)return(-1);
_oggpack_write(&opb,ilog2(vi->blocksize[1]),4);
_oggpack_write(&opb,vi->floororder[0],8);
_oggpack_write(&opb,vi->floororder[1],8);
- _oggpack_write(&opb,vi->flooroctaves[0],8);
- _oggpack_write(&opb,vi->flooroctaves[1],8);
+ _oggpack_write(&opb,vi->floormap[0],16);
+ _oggpack_write(&opb,vi->floormap[1],16);
_oggpack_write(&opb,vi->floorch,8);
/* build the packet */
********************************************************************
function: LPC low level routines
- last mod: $Id: lpc.c,v 1.10 1999/12/30 07:26:40 xiphmont Exp $
+ last mod: $Id: lpc.c,v 1.11 1999/12/31 12:35:14 xiphmont Exp $
********************************************************************/
#include "os.h"
#include "smallft.h"
#include "lpc.h"
-#include "xlogmap.h"
+#include "barkmel.h"
-/* This is pared down for Vorbis. Autocorrelation LPC coeff generation
- algorithm invented by N. Levinson in 1947, modified by J. Durbin in
- 1959. */
+/* Autocorrelation LPC coeff generation algorithm invented by
+ N. Levinson in 1947, modified by J. Durbin in 1959. */
/* Input : n elements of time doamin data
Output: m lpc coefficients, excitation energy */
-
double vorbis_lpc_from_data(double *data,double *lpc,int n,int m){
double *aut=alloca(sizeof(double)*(m+1));
double error;
return(vorbis_lpc_from_data(work,lpc,n,m));
}
-/* On top of this basic LPC infrastructure we introduce two modifications:
-
- 1) Filter generation is limited in the resolution of features it
- can represent (this is more obvious when the filter is looked at as
- a set of LSP coefficients). Human perception of the audio spectrum
- is logarithmic not only in amplitude, but also frequency. Because
- the high frequency features we'll need to encode will be broader
- than the low frequency features, filter generation will be
- dominated by higher frequencies (when most of the energy is in the
- lowest frequencies, and greatest perceived resolution is in the
- midrange). To avoid this effect, Vorbis encodes the frequency
- spectrum with a biased log frequency scale. The intent is to
- roughly equalize the sizes of the octaves (see xlogmap.h)
-
- 2) When we change the frequency scale, we also change the
- (apparent) relative energies of the bands; that is, on a log scale
- covering 5 octaves, the highest octave goes from being represented
- in half the bins, to only 1/32 of the bins. If the amplitudes
- remain the same, we have divided the energy represented by the
- highest octave by 16 (as far as Levinson-Durbin is concerned).
- This will seriously skew filter generation, which bases calculation
- on the mean square error with respect to energy. Thus, Vorbis
- normalizes the amplitudes of the log spectrum frequencies to keep
- the relative octave energies correct. */
-
-/* n == size of vector to be used for filter, m == order of filter,
- oct == octaves in normalized scale, encode_p == encode (1) or
- decode (0) */
-
-void lpc_init(lpc_lookup *l,int n, int mapped, int m, int oct, int encode_p){
- double bias=LOG_BIAS(n,oct);
- double scale=(float)mapped/(float)oct; /* where n==mapped */
- int i;
+/* initialize Bark scale and normalization lookups. We could do this
+ with static tables, but Vorbis allows a number of possible
+ combinations, so it's best to do it computationally.
+ The below is authoritative in terms of defining scale mapping.
+ Note that the scale depends on the sampling rate as well as the
+ linear block and mapping sizes (note that for a given sample rate
+ and block size, there's generally a fairly obviously optimal
+ mapping size */
+
+void lpc_init(lpc_lookup *l,int n, long mapped, long rate, int m){
+ int i;
+ double scale;
memset(l,0,sizeof(lpc_lookup));
l->n=n;
l->ln=mapped;
l->m=m;
- l->iscale=malloc(n*sizeof(int));
- l->ifrac=malloc(n*sizeof(double));
- l->norm=malloc(n*sizeof(double));
- for(i=0;i<n;i++){
- /* how much 'real estate' in the log domain does the bin in the
- linear domain represent? */
- double logA=LOG_X(i,bias);
- double logB=LOG_X(i+1.,bias);
- l->norm[i]=logB-logA; /* this much */
- }
+ l->linearmap=malloc(n*sizeof(int));
+ l->barknorm=malloc(mapped*sizeof(double));
- /* the scale is encode/decode specific for algebraic simplicity */
+ /* we choose a scaling constant so that:
+ floor(bark(rate-1)*C)=mapped-1
+ floor(bark(rate)*C)=mapped */
- if(encode_p){
- /* encode */
- l->escale=malloc(mapped*sizeof(double));
- l->uscale=malloc(n*sizeof(int));
-
- /* undersample guard */
+ scale=mapped/fBARK(rate);
+
+ /* the mapping from a linear scale to a smaller bark scale is
+ straightforward with a single catch; make sure not to skip any
+ bark-scale bins. In order to do this, we assign map_N = min
+ (map_N-1 + 1, bark(N)) */
+ {
+ int last=-1;
for(i=0;i<n;i++){
- l->uscale[i]=rint(LOG_X(i,bias)/oct*mapped);
- }
+ int val=floor( fBARK(((double)rate)/n*i) *scale); /* bark numbers
+ represent
+ band edges */
+ if(val>=mapped)val=mapped; /* guard against the approximation */
+ if(val>last+1)val=last+1;
+ l->linearmap[i]=val;
+ last=val;
+ }
+ }
- for(i=0;i<mapped;i++){
- l->escale[i]=LINEAR_X(i/scale,bias);
- l->uscale[(int)(floor(l->escale[i]))]=-1;
- l->uscale[(int)(ceil(l->escale[i]))]=-1;
- }
+ /* 'Normalization' is just making sure that power isn't lost in the
+ log scale by virtue of compressing the scale in higher
+ frequencies. We figure the weight of bands in proportion to
+ their linear/bark width ratio below, again, authoritatively. We
+ use computed width (not the number of actual bins above) for
+ smoothness in the scale; they should agree closely unless the
+ encoder chose parameters poorly (and got a bark scale that would
+ have had lots of skipped bins) */
+ for(i=0;i<mapped;i++)
+ l->barknorm[i]=iBARK((i+1)/scale)-iBARK(i/scale);
- }
- /* decode; encode may use this too */
+ /* we cheat decoding the LPC spectrum via FFTs */
drft_init(&l->fft,mapped*2);
- for(i=0;i<n;i++){
- double is=LOG_X(i,bias)/oct*mapped;
- if(is<0.)is=0.;
- l->iscale[i]=floor(is);
- if(l->iscale[i]>=l->ln-1)l->iscale[i]=l->ln-2;
-
- l->ifrac[i]=is-floor(is);
- if(l->ifrac[i]>1.)l->ifrac[i]=1.;
-
- }
}
void lpc_clear(lpc_lookup *l){
if(l){
- if(l->escale)free(l->escale);
+ if(l->barknorm)free(l->barknorm);
+ if(l->linearmap)free(l->linearmap);
drft_clear(&l->fft);
- free(l->iscale);
- free(l->ifrac);
- free(l->norm);
}
}
not bottlenecked here anyway */
double vorbis_curve_to_lpc(double *curve,double *lpc,lpc_lookup *l){
- /* map the input curve to a log curve for encoding */
-
- /* for clarity, mapped and n are both represented although setting
- 'em equal is a decent rule of thumb. The below must be reworked
- slightly if mapped != n */
+ /* map the input curve to a bark-scale curve for encoding */
int mapped=l->ln;
double *work=alloca(sizeof(double)*mapped);
int i;
- /* fairly correct for low frequencies, naieve for high frequencies
- (suffers from undersampling) */
- for(i=0;i<mapped;i++){
- double lin=l->escale[i];
- int a=floor(lin);
- int b=ceil(lin);
- double del=lin-floor(lin);
+ memset(work,0,sizeof(double)*mapped);
- work[i]=(curve[a]/l->norm[a]*(1.-del)+
- curve[b]/l->norm[b]*del);
+ /* Only the decode side is behavior-specced; for now in the encoder,
+ we select the maximum value of each band as representative (this
+ helps make sure peaks don't go out of range. In error terms,
+ selecting min would make more sense, but the codebook is trained
+ numerically, so we don't lose in encoding. We'd still want to
+ use the original curve for error and noise estimation */
+ for(i=0;i<l->n;i++){
+ int bark=l->linearmap[i];
+ if(work[bark]<curve[i])work[bark]=curve[i];
}
-
- /* for(i=0;i<l->n;i++)
- if(l->uscale[i]>0)
- if(work[l->uscale[i]]<curve[i])work[l->uscale[i]]=curve[i];*/
+ for(i=0;i<mapped;i++)work[i]*=l->barknorm[i];
#ifdef ANALYSIS
{
char buffer[80];
static int frameno=0;
+ sprintf(buffer,"prelpc%d.m",frameno);
+ out=fopen(buffer,"w+");
+ for(j=0;j<l->n;j++)
+ fprintf(out,"%g\n",curve[j]);
+ fclose(out);
sprintf(buffer,"preloglpc%d.m",frameno++);
out=fopen(buffer,"w+");
for(j=0;j<l->ln;j++)
}
-/* generate the whole freq response curve on an LPC IIR filter */
+/* generate the whole freq response curve of an LPC IIR filter */
void vorbis_lpc_to_curve(double *curve,double *lpc,double amp,lpc_lookup *l){
double *lcurve=alloca(sizeof(double)*(l->ln*2));
int i;
+ static int frameno=0;
_vlpc_de_helper(lcurve,lpc,amp,l);
int j;
FILE *out;
char buffer[80];
- static int frameno=0;
sprintf(buffer,"loglpc%d.m",frameno++);
out=fopen(buffer,"w+");
if(amp==0)return;
- for(i=0;i<l->n;i++){
- int ii=l->iscale[i];
- curve[i]=((1.-l->ifrac[i])*lcurve[ii]+
- l->ifrac[i]*lcurve[ii+1])*l->norm[i];
- }
+ for(i=0;i<l->ln;i++)lcurve[i]/=l->barknorm[i];
+ for(i=0;i<l->n;i++)curve[i]=lcurve[l->linearmap[i]];
+#ifdef ANALYSIS
+ {
+ int j;
+ FILE *out;
+ char buffer[80];
+
+ sprintf(buffer,"lpc%d.m",frameno-1);
+ out=fopen(buffer,"w+");
+ for(j=0;j<l->n;j++)
+ fprintf(out,"%g\n",curve[j]);
+ fclose(out);
+ }
+#endif
}
void vorbis_lpc_apply(double *residue,double *lpc,double amp,lpc_lookup *l){
double *lcurve=alloca(sizeof(double)*((l->ln+l->n)*2));
int i;
+ static int frameno=0;
if(amp==0){
memset(residue,0,l->n*sizeof(double));
_vlpc_de_helper(lcurve,lpc,amp,l);
- for(i=0;i<l->n;i++){
- if(residue[i]!=0){
- int ii=l->iscale[i];
- residue[i]*=((1.-l->ifrac[i])*lcurve[ii]+
- l->ifrac[i]*lcurve[ii+1])*l->norm[i];
- }
- }
+#ifdef ANALYSIS
+ {
+ int j;
+ FILE *out;
+ char buffer[80];
+
+ sprintf(buffer,"loglpc%d.m",frameno++);
+ out=fopen(buffer,"w+");
+ for(j=0;j<l->ln;j++)
+ fprintf(out,"%g\n",lcurve[j]);
+ fclose(out);
+ }
+#endif
+
+ for(i=0;i<l->ln;i++)lcurve[i]/=l->barknorm[i];
+ for(i=0;i<l->n;i++)
+ if(residue[i]!=0)
+ residue[i]*=lcurve[l->linearmap[i]];
}
}
********************************************************************
function: LPC low level routines
- last mod: $Id: lpc.h,v 1.5 1999/12/30 07:26:41 xiphmont Exp $
+ last mod: $Id: lpc.h,v 1.6 1999/12/31 12:35:15 xiphmont Exp $
********************************************************************/
#include "codec.h"
-extern void lpc_init(lpc_lookup *l,int n, int mapped,
- int m, int oct, int encode_p);
+extern void lpc_init(lpc_lookup *l,int n, long mapped, long rate, int m);
extern void lpc_clear(lpc_lookup *l);
/* simple linear scale LPC code */
********************************************************************
function: random psychoacoustics (not including preecho)
- last mod: $Id: psy.c,v 1.7 1999/12/30 07:26:47 xiphmont Exp $
+ last mod: $Id: psy.c,v 1.8 1999/12/31 12:35:16 xiphmont Exp $
********************************************************************/
#include "psy.h"
#include "lpc.h"
#include "smallft.h"
-#include "xlogmap.h"
/* Set up decibel threshhold 'curves'. Actually, just set a level at
log frequency intervals, interpolate, and call it a curve. */
********************************************************************
function: spectrum envelope and residue code/decode
- last mod: $Id: spectrum.c,v 1.7 1999/12/30 07:26:51 xiphmont Exp $
+ last mod: $Id: spectrum.c,v 1.8 1999/12/31 12:35:17 xiphmont Exp $
********************************************************************/
int scale=vb->W;
int m=vb->vd->vi->floororder[scale];
- int n=vb->pcmend*4;
+ int n=vb->pcmend*64;
int last=0;
double dlast=0.;
double min=M_PI/n/2.;
for(i=0;i<m;i++){
int val=rint(lsp[i]/M_PI*n-last);
_oggpack_write(&vb->opb,val,bits);
+
lsp[i]=(last+=val)*M_PI/n;
/* Underpowered but sufficient for now. In the real spec (coming
int _vs_spectrum_decode(vorbis_block *vb,double *amp,double *lsp){
int scale=vb->W;
int m=vb->vd->vi->floororder[scale];
- int n=vb->pcmend*4;
+ int n=vb->pcmend*64;
int last=0;
double dlast=0.;
int bits=rint(log(n)/log(2));