-# $Id: Makefile.in,v 1.1 1999/12/17 07:21:26 xiphmont Exp $
+# $Id: Makefile.in,v 1.2 1999/12/17 13:19:30 xiphmont Exp $
###############################################################################
# #
HFILES = vqgen.h vqext.h
-OFILES = vqgen.o train.o
-ALLOFILES = $(OFILES) lspdata.o
+OFILES = vqgen.o vqsplit.o
+ALLOFILES = $(OFILES) lspdata.o train.o build.o
all:
$(MAKE) target CFLAGS="$(OPT)"
profile:
$(MAKE) target CFLAGS="$(PROFILE)"
-target: lspvqtrain
+target: lspvqtrain vqbuild
-lspvqtrain: $(OFILES) lspdata.o
+lspvqtrain: $(OFILES) lspdata.o train.o
+ $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS)
+
+vqbuild: $(OFILES) build.o
$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ $(LIBS)
$(ALLOFILES): $(HFILES)
$(CC) $(CFLAGS) -c $<
clean:
- -rm -f *.o *.a test* *~ *.out *.m config.*\
+ -rm -f *.o *.a test* *~ *.out *.m config.* \
lspvqtrain
function: utility main for building codebooks from training sets
author: Monty <xiphmont@mit.edu>
modifications by: Monty
- last modification date: Dec 14 1999
+ last modification date: Dec 15 1999
********************************************************************/
#include <math.h>
#include <string.h>
#include <errno.h>
-#include <signal.h>
#include "vqgen.h"
-static int rline(FILE *in,FILE *out,char *line,int max,int pass){
- while(fgets(line,160,in)){
- if(line[0]=='#'){
- if(pass)fprintf(out,"%s",line);
+static char *linebuffer=NULL;
+static int lbufsize=0;
+static char *rline(FILE *in,FILE *out,int pass){
+ long sofar=0;
+ if(feof(in))return NULL;
+
+ while(1){
+ int gotline=0;
+
+ while(!gotline){
+ if(sofar>=lbufsize){
+ if(!lbufsize){
+ lbufsize=1024;
+ linebuffer=malloc(lbufsize);
+ }else{
+ lbufsize*=2;
+ linebuffer=realloc(linebuffer,lbufsize);
+ }
+ }
+ {
+ long c=fgetc(in);
+ switch(c){
+ case '\n':
+ case EOF:
+ gotline=1;
+ break;
+ default:
+ linebuffer[sofar++]=c;
+ linebuffer[sofar]='\0';
+ break;
+ }
+ }
+ }
+
+ if(linebuffer[0]=='#'){
+ if(pass)fprintf(out,"%s",linebuffer);
+ sofar=0;
}else{
- return(1);
+ return(linebuffer);
}
}
- return(0);
}
/* command line:
- buildvq vq=file out=file quant=n
+ buildvq file
*/
-int exiting=0;
-void setexit(int dummy){
- fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
- exiting=1;
-}
-
int main(int argc,char *argv[]){
vqgen v;
- int entries=-1,dim=-1;
- char line[1024];
+ vqbook b;
+ quant_return q;
+ int *quantlist=NULL;
+
+ int entries=-1,dim=-1,quant=-1;
+ FILE *out=NULL;
+ FILE *in=NULL;
+ char *line,*name;
long i,j,k;
- int init=0;
- while(*argv){
-
- /* load the trained data */
- if(!strncmp(*argv,"vq=",3)){
- FILE *in=NULL;
- char filename[80],*ptr;
- if(sscanf(*argv,"vq=%70s",filename)!=1){
- fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
- exit(1);
- }
-
- in=fopen(filename,"r");
- ptr=strrchr(filename,'-');
- if(ptr){
- int num;
- ptr++;
- num=atoi(ptr);
- sprintf(ptr,"%d.vqi",num+1);
- }else
- strcat(filename,"-0.vqi");
-
- out=fopen(filename,"w");
- if(out==NULL){
- fprintf(stderr,"Unable to open %s for writing\n",filename);
- exit(1);
- }
- fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
-
- if(in){
- /* we wish to suck in a preexisting book and continue to train it */
- double a;
-
- rline(in,out,line,160,1);
- if(sscanf(line,"%d %d %d",&entries,&dim,&met)!=3){
- fprintf(stderr,"Syntax error reading book file\n");
- exit(1);
- }
-
- metric=set_metric(met);
- vqgen_init(&v,dim,entries,metric,0.);
- init=1;
-
- /* entries, bias, points */
- i=0;
- for(j=0;j<entries;j++){
- for(k=0;k<dim;k++){
- rline(in,out,line,160,0);
- sscanf(line,"%lf",&a);
- v.entrylist[i++]=a;
- }
- }
-
- i=0;
- for(j=0;j<entries;j++){
- rline(in,out,line,160,0);
- sscanf(line,"%lf",&a);
- v.bias[i++]=a;
- }
-
- {
- double b[80];
- i=0;
- v.entries=0; /* hack to avoid reseeding */
- while(1){
- for(k=0;k<dim && k<80;k++){
- rline(in,out,line,160,0);
- sscanf(line,"%lf",b+k);
- }
- if(feof(in))break;
- vqgen_addpoint(&v,b);
- }
- v.entries=entries;
- }
+ if(argv[1]==NULL){
+ fprintf(stderr,"Need a trained data set on the command line.\n");
+ exit(1);
+ }
- fclose(in);
- }
- }
+ {
+ char *ptr;
+ char *filename=strdup(argv[1]);
- /* set parameters if we're not loading a pre book */
- if(!strncmp(*argv,"entries=",8)){
- sscanf(*argv,"entries=%d",&entries);
+ in=fopen(filename,"r");
+ if(!in){
+ fprintf(stderr,"Could not open input file %s\n",filename);
+ exit(1);
}
- if(!strncmp(*argv,"desired=",8)){
- sscanf(*argv,"desired=%lf",&desired);
- }
- if(!strncmp(*argv,"dim=",4)){
- sscanf(*argv,"dim=%d",&dim);
+
+ ptr=strrchr(filename,'.');
+ if(ptr){
+ *ptr='\0';
+ name=strdup(ptr);
+ sprintf(ptr,".h");
+ }else{
+ name=strdup(filename);
+ strcat(filename,".h");
}
- /* which error metric (0==euclidian distance default) */
- if(!strncmp(*argv,"met=",4)){
- sscanf(*argv,"met=%d",&met);
- metric=set_metric(met);
+ out=fopen(filename,"w");
+ if(out==NULL){
+ fprintf(stderr,"Unable to open %s for writing\n",filename);
+ exit(1);
}
+ }
- if(!strncmp(*argv,"in=",3)){
- int start;
- char file[80];
- FILE *in;
-
- if(sscanf(*argv,"in=%79[^,],%d",file,&start)!=2)goto syner;
- if(!out){
- fprintf(stderr,"vq= must preceed in= arguments\n");
- exit(1);
- }
- if(!init){
- if(dim==-1 || entries==-1){
- fprintf(stderr,"Must specify dimensionality and entries before"
- " first input file\n");
- exit(1);
- }
- vqgen_init(&v,dim,entries,metric,0.);
- init=1;
- }
+ /* suck in the trained book */
- in=fopen(file,"r");
- if(in==NULL){
- fprintf(stderr,"Could not open input file %s\n",file);
- exit(1);
- }
- fprintf(out,"# training file entry: %s\n",file);
-
- while(rline(in,out,line,1024,1)){
- double b[16];
- int n=sscanf(line,"%lf %lf %lf %lf %lf %lf %lf %lf "
- "%lf %lf %lf %lf %lf %lf %lf %lf",
- b,b+1,b+2,b+3,b+4,b+5,b+6,b+7,b+8,b+9,b+10,b+11,b+12,b+13,
- b+14,b+15);
- if(start+dim>n){
- fprintf(stderr,"ran out of columns reading %s\n",file);
- exit(1);
- }
- vqgen_addpoint(&v,b+start);
+ /* read book type, but it doesn't matter */
+ line=rline(in,out,1);
+
+ line=rline(in,out,1);
+ if(sscanf(line,"%d %d",&entries,&dim)!=2){
+ fprintf(stderr,"Syntax error reading book file\n");
+ exit(1);
+ }
+
+ /* just use it to allocate mem */
+ vqgen_init(&v,dim,entries,NULL);
+
+ /* quant */
+ line=rline(in,out,1);
+ if(sscanf(line,"%lf %lf %d %d",&q.minval,&q.delt,
+ &q.addtoquant,&quant)!=4){
+ fprintf(stderr,"Syntax error reading book file\n");
+ exit(1);
+ }
+
+ /* quantized entries */
+ /* save quant data; we can't regen it because the quant details
+ are specific to the data type */
+ i=0;
+ quantlist=malloc(sizeof(int)*v.elements*v.entries);
+ for(j=0;j<entries;j++){
+ double a;
+ for(k=0;k<dim;k++){
+ line=rline(in,out,0);
+ sscanf(line,"%lf",&a);
+ quantlist[i++]=rint(a);
+ }
+ }
+
+ /* unquantized entries */
+ i=0;
+ for(j=0;j<entries;j++){
+ double a;
+ for(k=0;k<dim;k++){
+ line=rline(in,out,0);
+ sscanf(line,"%lf",&a);
+ v.entrylist[i++]=a;
+ }
+ }
+
+ /* ignore bias */
+ for(j=0;j<entries;j++)line=rline(in,out,0);
+ free(v.bias);
+ v.bias=NULL;
+
+ /* training points */
+ {
+ double b[80];
+ i=0;
+ v.entries=0; /* hack to avoid reseeding */
+ while(1){
+ for(k=0;k<dim && k<80;k++){
+ line=rline(in,out,0);
+ if(!line)break;
+ sscanf(line,"%lf",b+k);
}
-
- fclose(in);
+ if(feof(in))break;
+ vqgen_addpoint(&v,b);
}
- argv++;
+ v.entries=entries;
}
+
+ fclose(in);
- /* train the book */
- signal(SIGTERM,setexit);
- signal(SIGINT,setexit);
-
- for(i=0;i<iter && !exiting;i++){
- if(vqgen_iterate(&v)<desired)break;
- }
+ /* build the book */
+ vqsp_book(&v,&b);
- /* save the book */
+ /* save the book in C header form */
- fprintf(out,"%d %d %d\n",entries,dim,met);
- i=0;
- for(j=0;j<entries;j++)
- for(k=0;k<dim;k++)
- fprintf(out,"%f\n",v.entrylist[i++]);
-
- fprintf(out,"# biases---\n");
- i=0;
- for(j=0;j<entries;j++)
- fprintf(out,"%f\n",v.bias[i++]);
- fprintf(out,"# points---\n");
- i=0;
- for(j=0;j<v.points;j++)
- for(k=0;k<dim && k<80;k++)
- fprintf(out,"%f\n",v.pointlist[i++]);
- fclose(out);
exit(0);
-
- syner:
- fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
- exit(1);
}
if(linebuffer[0]=='#'){
if(pass)fprintf(out,"%s",linebuffer);
+ sofar=0;
}else{
return(linebuffer);
}
double a;
line=rline(in,out,1);
- if(strlen(line)>0)line[strlen(line)-1]='\0';
if(strcmp(line,vqext_booktype)){
fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
exit(1);
exit(1);
}
- /* entries */
+ /* quantized entries */
+ for(j=0;j<entries;j++)
+ for(k=0;k<dim;k++)
+ line=rline(in,out,0);
+
+ /* unquantized entries */
i=0;
for(j=0;j<entries;j++){
for(k=0;k<dim;k++){
}
}
- /* dequantize */
- vqext_unquantize(&v,&q);
-
/* bias, points */
i=0;
for(j=0;j<entries;j++){
while(1){
for(k=0;k<dim && k<80;k++){
line=rline(in,out,0);
+ if(!line)break;
sscanf(line,"%lf",b+k);
}
if(feof(in))break;
argv++;
}
+ if(!init){
+ fprintf(stderr,"No input files!\n");
+ exit(1);
+ }
+
+
/* train the book */
signal(SIGTERM,setexit);
signal(SIGINT,setexit);
fprintf(out,"%d %d\n",entries,dim);
fprintf(out,"%g %g %d %d\n",q.minval,q.delt,q.addtoquant,quant);
+ /* quantized entries */
+ fprintf(out,"# quantized entries---\n");
+ i=0;
+ for(j=0;j<entries;j++)
+ for(k=0;k<dim;k++)
+ fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
+
+ /* dequantize */
+ vqext_unquantize(&v,&q);
+
+ fprintf(out,"# unquantized entries---\n");
i=0;
for(j=0;j<entries;j++)
for(k=0;k<dim;k++)
fprintf(out,"%f\n",v.entrylist[i++]);
+
+ /* unquantized entries */
fprintf(out,"# biases---\n");
i=0;
#include "vqgen.h"
extern char *vqext_booktype;
-typedef struct {
- double minval;
- double delt;
- int addtoquant;
-} quant_return;
extern double vqext_metric(vqgen *v,double *b, double *a);
extern quant_return vqext_quantize(vqgen *v,int quantbits);
/* internal helpers *****************************************************/
#define vN(data,i) (data+v->elements*i)
-double *_point(vqgen *v,long ptr){
- return v->pointlist+(v->elements*ptr);
-}
-
-double *_now(vqgen *v,long ptr){
- return v->entrylist+(v->elements*ptr);
-}
-
/* default metric; squared 'distance' from desired value. */
double _dist_sq(vqgen *v,double *a, double *b){
int i;
return(asserror);
}
-
-/* Building a codebook from trained set **********************************
-
- The codebook in raw form is technically finished once it's trained.
- However, we want to finalize the representative codebook values for
- each entry and generate auxiliary information to optimize encoding.
- We generate the auxiliary coding tree using collected data,
- probably the same data as in the original training */
-
-/* At each recursion, the data set is split in half. Cells with data
- points on side A go into set A, same with set B. The sets may
- overlap. If the cell overlaps the deviding line only very slightly
- (provided parameter), we may choose to ignore the overlap in order
- to pare the tree down */
-
-double *sortvals;
-int els;
-int iascsort(const void *a,const void *b){
- double av=sortvals[*((long *)a) * els];
- double bv=sortvals[*((long *)b) * els];
- if(av<bv)return(-1);
- return(1);
-}
-
-/* goes through the split, but just counts it and returns a metric*/
-void lp_count(vqgen *v,long *entryindex,long entries,
- long *pointindex,long points,
- long *entryA,long *entryB,
- double *n, double c, double slack,
- long *entriesA,long *entriesB,long *entriesC){
- long i,j,k;
- long A=0,B=0,C=0;
-
- memset(entryA,0,sizeof(long)*entries);
- memset(entryB,0,sizeof(long)*entries);
-
- for(i=0;i<points;i++){
- double *ppt=_point(v,pointindex[i]);
- long firstentry=0;
- double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
- double position=-c;
-
- for(j=1;j<entries;j++){
- double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
- if(thismetric<firstmetric){
- firstmetric=thismetric;
- firstentry=j;
- }
- }
-
- /* count point split */
- for(k=0;k<v->elements;k++)
- position+=ppt[k]*n[k];
- if(position>0.){
- entryA[firstentry]++;
- }else{
- entryB[firstentry]++;
- }
- }
-
- /* look to see if entries are in the slack zone */
- /* The entry splitting isn't total, so that storage has to be
- allocated for recursion. Reuse the entryA/entryB vectors */
- for(j=0;j<entries;j++){
- long total=entryA[j]+entryB[j];
- if((double)entryA[j]/total<slack){
- entryA[j]=0;
- }else if((double)entryB[j]/total<slack){
- entryB[j]=0;
- }
- if(entryA[j] && entryB[j])C++;
- if(entryA[j])entryA[A++]=entryindex[j];
- if(entryB[j])entryB[B++]=entryindex[j];
- }
- *entriesA=A;
- *entriesB=B;
- *entriesC=C;
-}
-
-void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
- int k;
- *c=0.;
- for(k=0;k<v->elements;k++){
- double center=(p[k]+q[k])/2.;
- n[k]=(center-q[k])*2.;
- *c+=center*n[k];
- }
-}
-
-void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
- int k;
- *c=0.;
- for(k=0;k<v->elements;k++){
- n[k]=(center[k]-q[k])*2.;
- *c+=center[k]*n[k];
- }
-}
-
-int lp_split(vqgen *v,vqbook *b,
- long *entryindex,long entries,
- long *pointindex,long points,
- long depth,double slack){
-
- /* The encoder, regardless of book, will be using a straight
- euclidian distance-to-point metric to determine closest point.
- Thus we split the cells using the same (we've already trained the
- codebook set spacing and distribution using special metrics and
- even a midpoint division won't disturb the basic properties) */
-
- long ret;
- double *p;
- double *q;
- double *n;
- double c;
- long *entryA=calloc(entries,sizeof(long));
- long *entryB=calloc(entries,sizeof(long));
- long entriesA=0;
- long entriesB=0;
- long entriesC=0;
- long pointsA=0;
- long i,j,k;
-
- p=alloca(sizeof(double)*v->elements);
- q=alloca(sizeof(double)*v->elements);
- n=alloca(sizeof(double)*v->elements);
- memset(p,0,sizeof(double)*v->elements);
-
- /* We need to find the dividing hyperplane. find the median of each
- axis as the centerpoint and the normal facing farthest point */
-
- /* more than one way to do this part. For small sets, we can brute
- force it. */
-
- if(entries<32){
- /* try every pair possibility */
- double best=0;
- long besti=0;
- long bestj=0;
- double this;
- for(i=0;i<entries-1;i++){
- for(j=i+1;j<entries;j++){
- pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
- lp_count(v,entryindex,entries,
- pointindex,points,
- entryA,entryB,
- n, c, slack,
- &entriesA,&entriesB,&entriesC);
- this=(entriesA-entriesC)*(entriesB-entriesC);
-
- if(this>best){
- best=this;
- besti=i;
- bestj=j;
- }
- }
- }
- pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
- }else{
- double best=0.;
- long bestj=0;
-
- /* try COG/normal and furthest pairs */
- /* medianpoint */
- for(k=0;k<v->elements;k++){
- /* just sort the index array */
- sortvals=v->pointlist+k;
- els=v->elements;
- qsort(pointindex,points,sizeof(long),iascsort);
- if(points&0x1){
- p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
- }else{
- p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
- v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
- }
- }
-
- /* try every normal, but just for distance */
- for(j=0;j<entries;j++){
- double *ppj=_now(v,entryindex[j]);
- double this=_dist_sq(v,p,ppj);
- if(this>best){
- best=this;
- bestj=j;
- }
- }
-
- pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
-
-
- }
-
- /* find cells enclosing points */
- /* count A/B points */
-
- lp_count(v,entryindex,entries,
- pointindex,points,
- entryA,entryB,
- n, c, slack,
- &entriesA,&entriesB,&entriesC);
-
- /* the point index is split evenly, so we do an Order n
- rearrangement into A first/B last and just pass it on */
- {
- long Aptr=0;
- long Bptr=points-1;
- while(Aptr<=Bptr){
- while(Aptr<=Bptr){
- double position=-c;
- for(k=0;k<v->elements;k++)
- position+=_point(v,pointindex[Aptr])[k]*n[k];
- if(position<=0.)break; /* not in A */
- Aptr++;
- }
- while(Aptr<=Bptr){
- double position=-c;
- for(k=0;k<v->elements;k++)
- position+=_point(v,pointindex[Bptr])[k]*n[k];
- if(position>0.)break; /* not in B */
- Bptr--;
- }
- if(Aptr<Bptr){
- long temp=pointindex[Aptr];
- pointindex[Aptr]=pointindex[Bptr];
- pointindex[Bptr]=temp;
- }
- pointsA=Aptr;
- }
- }
-
- fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
- entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
- {
- long thisaux=b->aux++;
- if(b->aux>=b->alloc){
- b->alloc*=2;
- b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
- b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
- b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
- b->c=realloc(b->c,sizeof(double)*b->alloc);
- }
-
- memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
- b->c[thisaux]=c;
-
- if(entriesA==1){
- ret=1;
- b->ptr0[thisaux]=entryA[0];
- }else{
- b->ptr0[thisaux]= -b->aux;
- ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
- }
- if(entriesB==1){
- ret++;
- b->ptr1[thisaux]=entryB[0];
- }else{
- b->ptr1[thisaux]= -b->aux;
- ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
- depth+1,slack);
- }
- }
- free(entryA);
- free(entryB);
- return(ret);
-}
-
-int vqenc_entry(vqbook *b,double *val){
- int ptr=0,k;
- while(1){
- double c= -b->c[ptr];
- double *nptr=b->n+b->elements*ptr;
- for(k=0;k<b->elements;k++)
- c+=nptr[k]*val[k];
- if(c>0.) /* in A */
- ptr= -b->ptr0[ptr];
- else /* in B */
- ptr= -b->ptr1[ptr];
- if(ptr<=0)break;
- }
- return(-ptr);
-}
-
-void vqgen_book(vqgen *v,vqbook *b){
- long i;
- long *entryindex=malloc(sizeof(double)*v->entries);
- long *pointindex=malloc(sizeof(double)*v->points);
-
- memset(b,0,sizeof(vqbook));
- for(i=0;i<v->entries;i++)entryindex[i]=i;
- for(i=0;i<v->points;i++)pointindex[i]=i;
- b->elements=v->elements;
- b->entries=v->entries;
- b->alloc=4096;
- b->ptr0=malloc(sizeof(long)*b->alloc);
- b->ptr1=malloc(sizeof(long)*b->alloc);
- b->n=malloc(sizeof(double)*b->elements*b->alloc);
- b->c=malloc(sizeof(double)*b->alloc);
-
- b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
- b->codelist=malloc(sizeof(long)*b->entries);
- b->lengthlist=malloc(b->entries*sizeof(long));
-
- /* first, generate the encoding decision heirarchy */
- fprintf(stderr,"Total leaves: %d\n",
- lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
-
- /* run all training points through the decision tree to get a final
- probability count */
- {
- long *probability=calloc(b->entries*2,sizeof(long));
- for(i=0;i<v->points;i++){
- int ret=vqenc_entry(b,v->pointlist+i*v->elements);
- probability[ret]++;
- }
-
- for(i=0;i<b->entries;i++){
- fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
- }
-
- }
-
- /* generate the codewords (short to long) */
-
- free(entryindex);
- free(pointindex);
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
typedef struct vqbook{
long elements;
long entries;
+
double *valuelist;
long *codelist;
long *lengthlist;
} vqbook;
+typedef struct {
+ double minval;
+ double delt;
+ int addtoquant;
+} quant_return;
+
+static inline double *_point(vqgen *v,long ptr){
+ return v->pointlist+(v->elements*ptr);
+}
+
+static inline double *_now(vqgen *v,long ptr){
+ return v->entrylist+(v->elements*ptr);
+}
+
extern void vqgen_init(vqgen *v,int elements,int entries,
double (*metric)(vqgen *,double *, double *));
extern void vqgen_addpoint(vqgen *v, double *p);
extern double *vqgen_midpoint(vqgen *v);
extern double vqgen_iterate(vqgen *v);
+
+extern void vqsp_book(vqgen *v,vqbook *b);
extern int vqenc_entry(vqbook *b,double *val);
-extern void vqgen_book(vqgen *v,vqbook *b);
-extern double *_now(vqgen *v,long ptr);
#endif
--- /dev/null
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
+ * PLEASE READ THESE TERMS DISTRIBUTING. *
+ * *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999 *
+ * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company *
+ * http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: build a VQ codebook
+ author: Monty <xiphmont@mit.edu>
+ modifications by: Monty
+ last modification date: Dec 10 1999
+
+ ********************************************************************/
+
+/* This code is *not* part of libvorbis. It is used to generate
+ trained codebooks offline and then spit the results into a
+ pregenerated codebook that is compiled into libvorbis. It is an
+ expensive (but good) algorithm. Run it on big iron. */
+
+/* There are so many optimizations to explore in *both* stages that
+ considering the undertaking is almost withering. For now, we brute
+ force it all */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include "vqgen.h"
+
+/* Codebook generation happens in two steps:
+
+ 1) Train the codebook with data collected from the encoder: We use
+ one of a few error metrics (which represent the distance between a
+ given data point and a candidate point in the training set) to
+ divide the training set up into cells representing roughly equal
+ probability of occurring.
+
+ 2) Generate the codebook and auxiliary data from the trained data set
+*/
+
+/* Building a codebook from trained set **********************************
+
+ The codebook in raw form is technically finished once it's trained.
+ However, we want to finalize the representative codebook values for
+ each entry and generate auxiliary information to optimize encoding.
+ We generate the auxiliary coding tree using collected data,
+ probably the same data as in the original training */
+
+/* At each recursion, the data set is split in half. Cells with data
+ points on side A go into set A, same with set B. The sets may
+ overlap. If the cell overlaps the deviding line only very slightly
+ (provided parameter), we may choose to ignore the overlap in order
+ to pare the tree down */
+
+double *sortvals;
+int els;
+int iascsort(const void *a,const void *b){
+ double av=sortvals[*((long *)a) * els];
+ double bv=sortvals[*((long *)b) * els];
+ if(av<bv)return(-1);
+ return(1);
+}
+
+extern double _dist_sq(vqgen *v,double *a, double *b);
+
+/* goes through the split, but just counts it and returns a metric*/
+void vqsp_count(vqgen *v,long *entryindex,long entries,
+ long *pointindex,long points,
+ long *entryA,long *entryB,
+ double *n, double c, double slack,
+ long *entriesA,long *entriesB,long *entriesC){
+ long i,j,k;
+ long A=0,B=0,C=0;
+
+ memset(entryA,0,sizeof(long)*entries);
+ memset(entryB,0,sizeof(long)*entries);
+
+ for(i=0;i<points;i++){
+ double *ppt=_point(v,pointindex[i]);
+ long firstentry=0;
+ double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
+ double position=-c;
+
+ for(j=1;j<entries;j++){
+ double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
+ if(thismetric<firstmetric){
+ firstmetric=thismetric;
+ firstentry=j;
+ }
+ }
+
+ /* count point split */
+ for(k=0;k<v->elements;k++)
+ position+=ppt[k]*n[k];
+ if(position>0.){
+ entryA[firstentry]++;
+ }else{
+ entryB[firstentry]++;
+ }
+ }
+
+ /* look to see if entries are in the slack zone */
+ /* The entry splitting isn't total, so that storage has to be
+ allocated for recursion. Reuse the entryA/entryB vectors */
+ for(j=0;j<entries;j++){
+ long total=entryA[j]+entryB[j];
+ if((double)entryA[j]/total<slack){
+ entryA[j]=0;
+ }else if((double)entryB[j]/total<slack){
+ entryB[j]=0;
+ }
+ if(entryA[j] && entryB[j])C++;
+ if(entryA[j])entryA[A++]=entryindex[j];
+ if(entryB[j])entryB[B++]=entryindex[j];
+ }
+ *entriesA=A;
+ *entriesB=B;
+ *entriesC=C;
+}
+
+void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
+ int k;
+ *c=0.;
+ for(k=0;k<v->elements;k++){
+ double center=(p[k]+q[k])/2.;
+ n[k]=(center-q[k])*2.;
+ *c+=center*n[k];
+ }
+}
+
+void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
+ int k;
+ *c=0.;
+ for(k=0;k<v->elements;k++){
+ n[k]=(center[k]-q[k])*2.;
+ *c+=center[k]*n[k];
+ }
+}
+
+int lp_split(vqgen *v,vqbook *b,
+ long *entryindex,long entries,
+ long *pointindex,long points,
+ long depth,double slack){
+
+ /* The encoder, regardless of book, will be using a straight
+ euclidian distance-to-point metric to determine closest point.
+ Thus we split the cells using the same (we've already trained the
+ codebook set spacing and distribution using special metrics and
+ even a midpoint division won't disturb the basic properties) */
+
+ long ret;
+ double *p;
+ double *q;
+ double *n;
+ double c;
+ long *entryA=calloc(entries,sizeof(long));
+ long *entryB=calloc(entries,sizeof(long));
+ long entriesA=0;
+ long entriesB=0;
+ long entriesC=0;
+ long pointsA=0;
+ long i,j,k;
+
+ p=alloca(sizeof(double)*v->elements);
+ q=alloca(sizeof(double)*v->elements);
+ n=alloca(sizeof(double)*v->elements);
+ memset(p,0,sizeof(double)*v->elements);
+
+ /* We need to find the dividing hyperplane. find the median of each
+ axis as the centerpoint and the normal facing farthest point */
+
+ /* more than one way to do this part. For small sets, we can brute
+ force it. */
+
+ if(entries<32){
+ /* try every pair possibility */
+ double best=0;
+ long besti=0;
+ long bestj=0;
+ double this;
+ for(i=0;i<entries-1;i++){
+ for(j=i+1;j<entries;j++){
+ pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
+ vqsp_count(v,entryindex,entries,
+ pointindex,points,
+ entryA,entryB,
+ n, c, slack,
+ &entriesA,&entriesB,&entriesC);
+ this=(entriesA-entriesC)*(entriesB-entriesC);
+
+ if(this>best){
+ best=this;
+ besti=i;
+ bestj=j;
+ }
+ }
+ }
+ pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
+ }else{
+ double best=0.;
+ long bestj=0;
+
+ /* try COG/normal and furthest pairs */
+ /* medianpoint */
+ for(k=0;k<v->elements;k++){
+ /* just sort the index array */
+ sortvals=v->pointlist+k;
+ els=v->elements;
+ qsort(pointindex,points,sizeof(long),iascsort);
+ if(points&0x1){
+ p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
+ }else{
+ p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
+ v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
+ }
+ }
+
+ /* try every normal, but just for distance */
+ for(j=0;j<entries;j++){
+ double *ppj=_now(v,entryindex[j]);
+ double this=_dist_sq(v,p,ppj);
+ if(this>best){
+ best=this;
+ bestj=j;
+ }
+ }
+
+ pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
+
+
+ }
+
+ /* find cells enclosing points */
+ /* count A/B points */
+
+ vqsp_count(v,entryindex,entries,
+ pointindex,points,
+ entryA,entryB,
+ n, c, slack,
+ &entriesA,&entriesB,&entriesC);
+
+ /* the point index is split evenly, so we do an Order n
+ rearrangement into A first/B last and just pass it on */
+ {
+ long Aptr=0;
+ long Bptr=points-1;
+ while(Aptr<=Bptr){
+ while(Aptr<=Bptr){
+ double position=-c;
+ for(k=0;k<v->elements;k++)
+ position+=_point(v,pointindex[Aptr])[k]*n[k];
+ if(position<=0.)break; /* not in A */
+ Aptr++;
+ }
+ while(Aptr<=Bptr){
+ double position=-c;
+ for(k=0;k<v->elements;k++)
+ position+=_point(v,pointindex[Bptr])[k]*n[k];
+ if(position>0.)break; /* not in B */
+ Bptr--;
+ }
+ if(Aptr<Bptr){
+ long temp=pointindex[Aptr];
+ pointindex[Aptr]=pointindex[Bptr];
+ pointindex[Bptr]=temp;
+ }
+ pointsA=Aptr;
+ }
+ }
+
+ fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
+ entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
+ {
+ long thisaux=b->aux++;
+ if(b->aux>=b->alloc){
+ b->alloc*=2;
+ b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
+ b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
+ b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
+ b->c=realloc(b->c,sizeof(double)*b->alloc);
+ }
+
+ memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
+ b->c[thisaux]=c;
+
+ if(entriesA==1){
+ ret=1;
+ b->ptr0[thisaux]=entryA[0];
+ }else{
+ b->ptr0[thisaux]= -b->aux;
+ ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
+ }
+ if(entriesB==1){
+ ret++;
+ b->ptr1[thisaux]=entryB[0];
+ }else{
+ b->ptr1[thisaux]= -b->aux;
+ ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
+ depth+1,slack);
+ }
+ }
+ free(entryA);
+ free(entryB);
+ return(ret);
+}
+
+void vqsp_book(vqgen *v, vqbook *b){
+ long *entryindex=malloc(sizeof(double)*v->entries);
+ long *pointindex=malloc(sizeof(double)*v->points);
+ long i;
+
+ memset(b,0,sizeof(vqbook));
+ for(i=0;i<v->entries;i++)entryindex[i]=i;
+ for(i=0;i<v->points;i++)pointindex[i]=i;
+
+ b->elements=v->elements;
+ b->entries=v->entries;
+ b->alloc=4096;
+ b->ptr0=malloc(sizeof(long)*b->alloc);
+ b->ptr1=malloc(sizeof(long)*b->alloc);
+ b->n=malloc(sizeof(double)*b->elements*b->alloc);
+ b->c=malloc(sizeof(double)*b->alloc);
+
+ b->valuelist=v->entrylist;
+ b->codelist=malloc(sizeof(long)*b->entries);
+ b->lengthlist=calloc(b->entries,sizeof(long));
+
+ /* first, generate the encoding decision heirarchy */
+ fprintf(stderr,"Total leaves: %d\n",
+ lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
+
+ /* run all training points through the decision tree to get a final
+ probability count */
+ {
+ long *probability=calloc(b->entries,sizeof(long));
+ long *membership=malloc(b->entries*sizeof(long));
+ long j;
+
+ for(i=0;i<v->points;i++){
+ int ret=vqenc_entry(b,v->pointlist+i*v->elements);
+ probability[ret]++;
+ }
+ for(i=0;i<v->entries;i++)membership[i]=i;
+
+ /* find codeword lengths */
+ /* much more elegant means exist. Brute force n^2, minimum thought */
+ for(i=v->entries;i>1;i--){
+ int first=-1,second=-1;
+ long least=v->points+1;
+
+ /* find the two nodes to join */
+ for(j=0;j<v->entries;j++)
+ if(probability[j]<least){
+ least=probability[j];
+ first=membership[j];
+ }
+ least=v->points+1;
+ for(j=0;j<v->entries;j++)
+ if(probability[j]<least && membership[j]!=first){
+ least=probability[j];
+ second=membership[j];
+ }
+ if(first==-1 || second==-1){
+ fprintf(stderr,"huffman fault; no free branch\n");
+ exit(1);
+ }
+
+ /* join them */
+ least=probability[first]+probability[second];
+ for(j=0;j<v->entries;j++)
+ if(membership[j]==first || membership[j]==second){
+ membership[j]=first;
+ probability[j]=least;
+ b->lengthlist[j]++;
+ }
+ }
+ for(i=0;i<v->entries-1;i++)
+ if(membership[i]!=membership[i+1]){
+ fprintf(stderr,"huffman fault; failed to build single tree\n");
+ exit(1);
+ }
+
+ /* unneccessary metric */
+ memset(probability,0,sizeof(long)*v->entries);
+ for(i=0;i<v->points;i++){
+ int ret=vqenc_entry(b,v->pointlist+i*v->elements);
+ probability[ret]++;
+ }
+
+ /* print the results */
+ for(i=0;i<v->entries;i++)
+ fprintf(stderr,"%d: count=%ld codelength=%d\n",i,probability[i],b->lengthlist[i]);
+
+ free(probability);
+ free(membership);
+ }
+
+ /* rearrange, sort by codelength */
+
+
+ /* generate the codewords (short to long) */
+
+
+ free(entryindex);
+ free(pointindex);
+}
+
+int vqenc_entry(vqbook *b,double *val){
+ int ptr=0,k;
+ while(1){
+ double c= -b->c[ptr];
+ double *nptr=b->n+b->elements*ptr;
+ for(k=0;k<b->elements;k++)
+ c+=nptr[k]*val[k];
+ if(c>0.) /* in A */
+ ptr= -b->ptr0[ptr];
+ else /* in B */
+ ptr= -b->ptr1[ptr];
+ if(ptr<=0)break;
+ }
+ return(-ptr);
+}
+
+
+
+