}
if(linebuffer[0]=='#'){
- if(pass)fprintf(out,"%s",linebuffer);
+ if(pass)fprintf(out,"%s\n",linebuffer);
sofar=0;
}else{
return(linebuffer);
int main(int argc,char *argv[]){
vqgen v;
vqbook b;
- quant_return q;
+ quant_meta q;
int *quantlist=NULL;
- int entries=-1,dim=-1,quant=-1;
+ int entries=-1,dim=-1;
FILE *out=NULL;
FILE *in=NULL;
char *line,*name;
/* quant */
line=rline(in,out,1);
- if(sscanf(line,"%lf %lf %d %d",&q.minval,&q.delt,
- &q.addtoquant,&quant)!=4){
+ if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
+ &q.quant,&q.sequencep)!=4){
fprintf(stderr,"Syntax error reading book file\n");
exit(1);
}
/* quantized entries */
- /* save quant data; we can't regen it because the quant details
- are specific to the data type */
+ /* save quant data; we don't want to requantize later as our method
+ is currently imperfect wrt repeated application */
i=0;
quantlist=malloc(sizeof(int)*v.elements*v.entries);
for(j=0;j<entries;j++){
for(k=0;k<dim;k++){
line=rline(in,out,0);
sscanf(line,"%lf",&a);
+ v.entrylist[i]=a;
quantlist[i++]=rint(a);
}
}
- /* unquantized entries */
- i=0;
- for(j=0;j<entries;j++){
- double a;
- for(k=0;k<dim;k++){
- line=rline(in,out,0);
- sscanf(line,"%lf",&a);
- v.entrylist[i++]=a;
- }
- }
-
/* ignore bias */
for(j=0;j<entries;j++)line=rline(in,out,0);
free(v.bias);
}
fclose(in);
+ vqgen_unquantize(&v,&q);
/* build the book */
vqsp_book(&v,&b);
function: metrics and quantization code for LSP VQ codebooks
author: Monty <xiphmont@mit.edu>
modifications by: Monty
- last modification date: Dec 15 1999
+ last modification date: Dec 24 1999
********************************************************************/
#include "vqgen.h"
#include "vqext.h"
-char *vqext_booktype="LSPdata";
+char *vqext_booktype="LSPdata";
+quant_meta q={0,0,0,1}; /* set sequence data */
/* LSP training metric. We weight error proportional to distance
*between* LSP vector values. The idea of this metric is not to set
features. */
double global_maxdel=M_PI;
-#define FUDGE ((global_maxdel*1.0)-testdist)
+#define FUDGE ((global_maxdel*2.0)-testdist)
/* candidate,actual */
double vqext_metric(vqgen *v,double *b, double *a){
int i;
int el=v->elements;
double acc=0.;
- double lasta=0.;
+ /*double lasta=0.;*/
double lastb=0.;
for(i=0;i<el;i++){
return acc;
}
-/* LSP quantizes all absolute values, but the book encodes distance
- between values (which has a smaller range). Thus the desired
- quantibits apply to the encoded (delta) values, not abs
- positions. This requires minor additional trickery. */
-
-quant_return vqext_quantize(vqgen *v,int quantbits){
- quant_return q;
- double maxval=0.;
- double maxdel;
- double mindel;
- double delta;
- double fullrangevals;
- double maxquant=((1<<quantbits)-1);
- int j,k;
-
- mindel=maxdel=_now(v,0)[0];
-
- for(j=0;j<v->entries;j++){
- double last=0.;
- for(k=0;k<v->elements;k++){
- if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last;
- if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last;
- if(maxval<_now(v,j)[k])maxval=_now(v,j)[k];
- last=_now(v,j)[k];
- }
- }
-
- q.minval=0.;
- delta=(maxdel-mindel)/((1<<quantbits)-2);
- fullrangevals=floor(maxval/delta);
- q.delt=delta=maxval/fullrangevals;
- q.addtoquant=floor(mindel/delta);
-
- for(j=0;j<v->entries;j++){
- double last=0.;
- for(k=0;k<v->elements;k++){
- double val=_now(v,j)[k];
- double now=rint(val/delta);
- double test=_now(v,j)[k]=now-last-q.addtoquant;
-
- if(test<0){
- fprintf(stderr,"fault; quantized value<0\n");
- exit(1);
- }
-
- if(test>maxquant){
- fprintf(stderr,"fault; quantized value>max\n");
- exit(1);
- }
- last=now;
- }
- }
- return(q);
-}
-
-/* much easier :-) */
-void vqext_unquantize(vqgen *v,quant_return *q){
- long j,k;
- if(global_maxdel==M_PI)global_maxdel=0.;
- for(j=0;j<v->entries;j++){
- double last=0.;
- for(k=0;k<v->elements;k++){
- double del=(_now(v,j)[k]+q->addtoquant)*q->delt+q->minval;
- last+=del;
- _now(v,j)[k]=last;
- if(del>global_maxdel)global_maxdel=del;
- }
- }
-}
-
/* Data files are line-vectors, starting with zero. If we want to
train on a subvector starting in the middle, we need to adjust the
data as if it was starting at zero */
for(i=start;i<start+dim;i++)b[i]-=base;
}
}
+
+/* we just need to calc the global_maxdel from the training set */
+void vqext_preprocess(vqgen *v){
+ long j,k;
+
+ global_maxdel=0.;
+ for(j=0;j<v->entries;j++){
+ double last=0.;
+ for(k=0;k<v->elements;k++){
+ double now=_now(v,j)[k];
+ if(now-last>global_maxdel)global_maxdel=now-last;
+ last=now;
+ }
+ }
+}
function: utility main for training codebooks
author: Monty <xiphmont@mit.edu>
modifications by: Monty
- last modification date: Dec 15 1999
+ last modification date: Dec 23 1999
********************************************************************/
}
if(linebuffer[0]=='#'){
- if(pass)fprintf(out,"%s",linebuffer);
+ if(pass)fprintf(out,"%s\n",linebuffer);
sofar=0;
}else{
return(linebuffer);
int main(int argc,char *argv[]){
vqgen v;
- quant_return q;
- int entries=-1,dim=-1,quant=-1;
+ int entries=-1,dim=-1;
int start=0,num=-1;
double desired=.05;
int iter=1000;
char *line;
long i,j,k;
int init=0;
+ q.quant=-1;
argv++;
if(!*argv){
/* quant setup */
line=rline(in,out,1);
- if(sscanf(line,"%lf %lf %d %d",&q.minval,&q.delt,
- &q.addtoquant,&quant)!=4){
+ if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
+ &q.quant,&q.sequencep)!=4){
fprintf(stderr,"Syntax error reading book file\n");
exit(1);
}
/* quantized entries */
- for(j=0;j<entries;j++)
- for(k=0;k<dim;k++)
- line=rline(in,out,0);
-
- /* unquantized entries */
i=0;
for(j=0;j<entries;j++){
for(k=0;k<dim;k++){
sscanf(line,"%lf",&a);
v.entrylist[i++]=a;
}
- }
-
- /* bias, points */
+ }
+ vqgen_unquantize(&v,&q);
+
+ /* bias, points */
i=0;
for(j=0;j<entries;j++){
line=rline(in,out,0);
}
{
- double b[80];
+ double *b=alloca(dim*sizeof(double));
i=0;
v.entries=0; /* hack to avoid reseeding */
while(1){
- for(k=0;k<dim && k<80;k++){
+ for(k=0;k<dim;k++){
line=rline(in,out,0);
if(!line)break;
sscanf(line,"%lf",b+k);
}
switch(argv[0][1]){
case 'p':
- if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&quant)!=3)
+ if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
goto syner;
break;
case 's':
int cols=-1;
if(!init){
- if(dim==-1 || entries==-1 || quant==-1){
+ if(dim==-1 || entries==-1 || q.quant==-1){
fprintf(stderr,"-p required when training a new set\n");
exit(1);
}
exit(1);
}
+ vqext_preprocess(&v);
+
/* train the book */
signal(SIGTERM,setexit);
signal(SIGINT,setexit);
for(i=0;i<iter && !exiting;i++){
double result;
- if(i!=0)vqext_unquantize(&v,&q);
+ if(i!=0)vqgen_unquantize(&v,&q);
result=vqgen_iterate(&v);
- q=vqext_quantize(&v,quant);
+ vqgen_quantize(&v,&q);
if(result<desired)break;
}
fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
fprintf(out,"%s\n",vqext_booktype);
fprintf(out,"%d %d\n",entries,dim);
- fprintf(out,"%g %g %d %d\n",q.minval,q.delt,q.addtoquant,quant);
+ fprintf(out,"%ld %ld %d %d\n",q.min,q.delta,q.quant,q.sequencep);
/* quantized entries */
fprintf(out,"# quantized entries---\n");
for(j=0;j<entries;j++)
for(k=0;k<dim;k++)
fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
-
- /* dequantize */
- vqext_unquantize(&v,&q);
-
- fprintf(out,"# unquantized entries---\n");
- i=0;
- for(j=0;j<entries;j++)
- for(k=0;k<dim;k++)
- fprintf(out,"%f\n",v.entrylist[i++]);
-
- /* unquantized entries */
fprintf(out,"# biases---\n");
i=0;
#include "vqgen.h"
extern char *vqext_booktype;
+extern quant_meta q;
extern double vqext_metric(vqgen *v,double *b, double *a);
-extern quant_return vqext_quantize(vqgen *v,int quantbits);
-extern void vqext_unquantize(vqgen *v,quant_return *q);
extern void vqext_adjdata(double *b,int start,int dim);
+extern void vqext_preprocess(vqgen *v);
#endif
/* External calls *******************************************************/
+/* We have two forms of quantization; in the first, each vector
+ element in the codebook entry is orthogonal. Residues would use this
+ quantization for example.
+
+ In the second, we have a sequence of monotonically increasing
+ values that we wish to quantize as deltas (to save space). We
+ still need to quantize so that absolute values are accurate. For
+ example, LSP quantizes all absolute values, but the book encodes
+ distance between values because each successive value is larger
+ than the preceeding value. Thus the desired quantibits apply to
+ the encoded (delta) values, not abs positions. This requires minor
+ additional encode-side trickery. */
+
+/* 24 bit float (not IEEE; nonnormalized mantissa +
+ biased exponent ): neeeeemm mmmmmmmm mmmmmmmm */
+
+#define VQ_FEXP_BIAS 20 /* bias toward values smaller than 1. */
+long float24_pack(double val){
+ int sign=0;
+ long exp;
+ long mant;
+ if(val<0){
+ sign=0x800000;
+ val= -val;
+ }
+ exp= floor(log(val)/log(2));
+ mant=rint(ldexp(val,17-exp));
+ exp=(exp+VQ_FEXP_BIAS)<<18;
+
+ return(sign|exp|mant);
+}
+
+double float24_unpack(long val){
+ double mant=val&0x3ffff;
+ double sign=val&0x800000;
+ double exp =(val&0x7c0000)>>18;
+ if(sign)mant= -mant;
+ return(ldexp(mant,exp-17-VQ_FEXP_BIAS));
+}
+
+void vqgen_quantize(vqgen *v,quant_meta *q){
+
+ double maxdel;
+ double mindel;
+
+ double delta;
+ double maxquant=((1<<q->quant)-1);
+
+ int j,k;
+
+ mindel=maxdel=_now(v,0)[0];
+
+ for(j=0;j<v->entries;j++){
+ double last=0.;
+ for(k=0;k<v->elements;k++){
+ if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last;
+ if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last;
+ if(q->sequencep)last=_now(v,j)[k];
+ }
+ }
+
+
+ /* first find the basic delta amount from the maximum span to be
+ encoded. Loosen the delta slightly to allow for additional error
+ during sequence quantization */
+
+ delta=(maxdel-mindel)/((1<<q->quant)-1.5);
+
+ q->min=float24_pack(mindel);
+ q->delta=float24_pack(delta);
+
+ for(j=0;j<v->entries;j++){
+ double last=0;
+ for(k=0;k<v->elements;k++){
+ double val=_now(v,j)[k];
+ double now=rint((val-last-mindel)/delta);
+
+ _now(v,j)[k]=now;
+ if(now<0){
+ /* be paranoid; this should be impossible */
+ fprintf(stderr,"fault; quantized value<0\n");
+ exit(1);
+ }
+
+ if(now>maxquant){
+ /* be paranoid; this should be impossible */
+ fprintf(stderr,"fault; quantized value>max\n");
+ exit(1);
+ }
+ if(q->sequencep)last=(now*delta)+mindel+last;
+ }
+ }
+}
+
+/* much easier :-) */
+void vqgen_unquantize(vqgen *v,quant_meta *q){
+ long j,k;
+ double mindel=float24_unpack(q->min);
+ double delta=float24_unpack(q->delta);
+
+ for(j=0;j<v->entries;j++){
+ double last=0.;
+ for(k=0;k<v->elements;k++){
+ double now=_now(v,j)[k]*delta+last+mindel;
+ _now(v,j)[k]=now;
+ if(q->sequencep)last=now;
+
+ }
+ }
+}
+
void vqgen_init(vqgen *v,int elements,int entries,
double (*metric)(vqgen *,double *, double *)){
memset(v,0,sizeof(vqgen));
typedef struct vqgen{
int it;
-
- int elements;
+ int elements;
/* point cache */
double *pointlist;
} vqgen;
typedef struct vqbook{
- long elements;
- long entries;
+ long dim; /* codebook dimensions (elements per vector) */
+ long entries; /* codebook entries */
+
+ long min; /* packed 24 bit float; quant value 0 maps to minval */
+ long delta; /* packed 24 bit float; val 1 - val 0 == delta */
+ int quant; /* 0 < quant <= 16 */
+ int sequencep; /* bitflag */
- double *valuelist;
- long *codelist;
- long *lengthlist;
+ double *valuelist; /* list of dim*entries quant/actual entry values */
+ long *codelist; /* list of bitstream codewords for each entry */
+ long *lengthlist; /* codeword lengths in bits */
/* auxiliary encoding/decoding information */
+ /* encode: provided pre-calculated partitioning tree */
+ /* decode: hufftree */
long *ptr0;
long *ptr1;
- /* auxiliary encoding information */
- double *n;
+ /* auxiliary encoding information. Not used in decode */
+ double *n; /* decision hyperplanes: sum(x_i*n_i)[0<=i<dim]=c */
double *c;
long aux;
long alloc;
} vqbook;
typedef struct {
- double minval;
- double delt;
- int addtoquant;
-} quant_return;
+ long min; /* packed 24 bit float */
+ long delta; /* packed 24 bit float */
+ int quant; /* 0 < quant <= 16 */
+ int sequencep; /* bitflag */
+} quant_meta;
static inline double *_point(vqgen *v,long ptr){
return v->pointlist+(v->elements*ptr);
extern double *vqgen_midpoint(vqgen *v);
extern double vqgen_iterate(vqgen *v);
+extern void vqgen_unquantize(vqgen *v,quant_meta *q);
+extern void vqgen_quantize(vqgen *v,quant_meta *q);
+
extern void vqsp_book(vqgen *v,vqbook *b);
extern int vqenc_entry(vqbook *b,double *val);
double *sortvals;
int els;
-int iascsort(const void *a,const void *b){
+int dascsort(const void *a,const void *b){
double av=sortvals[*((long *)a) * els];
double bv=sortvals[*((long *)b) * els];
if(av<bv)return(-1);
return(1);
}
+long *isortvals;
+int iascsort(const void *a,const void *b){
+ long av=isortvals[*((long *)a)];
+ long bv=isortvals[*((long *)b)];
+ return(av-bv);
+}
+
extern double _dist_sq(vqgen *v,double *a, double *b);
/* goes through the split, but just counts it and returns a metric*/
/* just sort the index array */
sortvals=v->entrylist+k;
els=v->elements;
- qsort(entryindex,entries,sizeof(long),iascsort);
+ qsort(entryindex,entries,sizeof(long),dascsort);
if(entries&0x1){
p[k]=v->entrylist[(entryindex[entries/2])*v->elements+k];
}else{
b->alloc*=2;
b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
- b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
+ b->n=realloc(b->n,sizeof(double)*b->dim*b->alloc);
b->c=realloc(b->c,sizeof(double)*b->alloc);
}
- memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
+ memcpy(b->n+b->dim*thisaux,n,sizeof(double)*v->elements);
b->c[thisaux]=c;
if(entriesA==1){
void vqsp_book(vqgen *v, vqbook *b){
long *entryindex=malloc(sizeof(double)*v->entries);
long *pointindex=malloc(sizeof(double)*v->points);
- long i;
+ long i,j;
memset(b,0,sizeof(vqbook));
for(i=0;i<v->entries;i++)entryindex[i]=i;
for(i=0;i<v->points;i++)pointindex[i]=i;
- b->elements=v->elements;
+ b->dim=v->elements;
b->entries=v->entries;
b->alloc=4096;
+
b->ptr0=malloc(sizeof(long)*b->alloc);
b->ptr1=malloc(sizeof(long)*b->alloc);
- b->n=malloc(sizeof(double)*b->elements*b->alloc);
+ b->n=malloc(sizeof(double)*b->dim*b->alloc);
b->c=malloc(sizeof(double)*b->alloc);
-
- b->valuelist=v->entrylist;
- b->codelist=malloc(sizeof(long)*b->entries);
b->lengthlist=calloc(b->entries,sizeof(long));
-
+
/* first, generate the encoding decision heirarchy */
fprintf(stderr,"Total leaves: %d\n",
lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
+ free(entryindex);
+ free(pointindex);
+
/* run all training points through the decision tree to get a final
probability count */
{
long *probability=calloc(b->entries,sizeof(long));
long *membership=malloc(b->entries*sizeof(long));
- long j;
for(i=0;i<v->points;i++){
int ret=vqenc_entry(b,v->pointlist+i*v->elements);
/* print the results */
for(i=0;i<v->entries;i++)
- fprintf(stderr,"%d: count=%ld codelength=%d\n",i,probability[i],b->lengthlist[i]);
+ fprintf(stderr,"%ld: count=%ld codelength=%ld\n",i,probability[i],b->lengthlist[i]);
free(probability);
free(membership);
}
- /* rearrange, sort by codelength */
+ /* Sort the entries by codeword length, short to long (eases
+ assignment and packing to do it now) */
+ {
+ long *wordlen=b->lengthlist;
+ long *index=malloc(b->entries*sizeof(long));
+ long *revindex=malloc(b->entries*sizeof(long));
+ int k;
+ for(i=0;i<b->entries;i++)index[i]=i;
+ isortvals=b->lengthlist;
+ qsort(index,b->entries,sizeof(long),iascsort);
+
+ /* rearrange storage; ptr0/1 first as it needs a reverse index */
+ /* n and c stay unchanged */
+ for(i=0;i<b->entries;i++)revindex[index[i]]=i;
+ for(i=0;i<b->aux;i++){
+ if(b->ptr0[i]>=0)b->ptr0[i]=revindex[i];
+ if(b->ptr1[i]>=0)b->ptr1[i]=revindex[i];
+ }
+ free(revindex);
+
+ /* map lengthlist and vallist with index */
+ b->lengthlist=calloc(b->entries,sizeof(long));
+ b->valuelist=malloc(sizeof(double)*b->entries*b->dim);
+ for(i=0;i<b->entries;i++){
+ long e=index[i];
+ for(k=0;k<b->dim;k++)
+ b->valuelist[i*b->dim+k]=v->entrylist[e*b->dim+k];
+ b->lengthlist[i]=wordlen[e];
+ }
+ free(wordlen);
+ }
/* generate the codewords (short to long) */
-
-
- free(entryindex);
- free(pointindex);
+ {
+ long current=0;
+ long length=0;
+ b->codelist=malloc(sizeof(long)*b->entries);
+ for(i=0;i<b->entries;i++){
+ if(length != b->lengthlist[i]){
+ current<<=(b->lengthlist[i]-length);
+ length=b->lengthlist[i];
+ }
+ b->codelist[i]=current;
+ fprintf(stderr,"codeword %ld: %ld (length %d)\n",
+ i, current, b->lengthlist[i]);
+ current++;
+ }
+ }
+
+ /* sanity check the codewords */
+ for(i=0;i<b->entries;i++){
+ for(j=i+1;j<b->entries;j++){
+ if(b->codelist[i]==b->codelist[j]){
+ fprintf(stderr,"Error; codewords for %ld and %ld both equal %lx\n",
+ i, j, b->codelist[i]);
+ exit(1);
+ }
+ }
+ }
+
}
int vqenc_entry(vqbook *b,double *val){
int ptr=0,k;
while(1){
double c= -b->c[ptr];
- double *nptr=b->n+b->elements*ptr;
- for(k=0;k<b->elements;k++)
+ double *nptr=b->n+b->dim*ptr;
+ for(k=0;k<b->dim;k++)
c+=nptr[k]*val[k];
if(c>0.) /* in A */
ptr= -b->ptr0[ptr];