1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999 *
9 * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: build a VQ codebook
15 author: Monty <xiphmont@mit.edu>
16 modifications by: Monty
17 last modification date: Dec 10 1999
19 ********************************************************************/
21 /* This code is *not* part of libvorbis. It is used to generate
22 trained codebooks offline and then spit the results into a
23 pregenerated codebook that is compiled into libvorbis. It is an
24 expensive (but good) algorithm. Run it on big iron. */
26 /* There are so many optimizations to explore in *both* stages that
27 considering the undertaking is almost withering. For now, we brute
36 /* Codebook generation happens in two steps:
38 1) Train the codebook with data collected from the encoder: We use
39 one of a few error metrics (which represent the distance between a
40 given data point and a candidate point in the training set) to
41 divide the training set up into cells representing roughly equal
42 probability of occurring.
44 2) Generate the codebook and auxiliary data from the trained data set
47 /* Codebook training ****************************************************
49 * The basic idea here is that a VQ codebook is like an m-dimensional
50 * foam with n bubbles. The bubbles compete for space/volume and are
51 * 'pressurized' [biased] according to some metric. The basic alg
52 * iterates through allowing the bubbles to compete for space until
53 * they converge (if the damping is dome properly) on a steady-state
54 * solution. Individual input points, collected from libvorbis, are
55 * used to train the algorithm monte-carlo style. */
57 /* internal helpers *****************************************************/
58 #define vN(data,i) (data+v->elements*i)
60 double *_point(vqgen *v,long ptr){
61 return v->pointlist+(v->elements*ptr);
64 double *_now(vqgen *v,long ptr){
65 return v->entrylist+(v->elements*ptr);
68 /* default metric; squared 'distance' from desired value. */
69 double _dist_sq(vqgen *v,double *a, double *b){
74 double val=(a[i]-b[i]);
80 /* *must* be beefed up. */
81 void _vqgen_seed(vqgen *v){
82 memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
85 /* External calls *******************************************************/
87 void vqgen_init(vqgen *v,int elements,int entries,
88 double (*metric)(vqgen *,double *, double *)){
89 memset(v,0,sizeof(vqgen));
93 v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
96 v->entrylist=malloc(v->entries*v->elements*sizeof(double));
97 v->assigned=malloc(v->entries*sizeof(long));
98 v->bias=calloc(v->entries,sizeof(double));
100 v->metric_func=metric;
102 v->metric_func=_dist_sq;
105 void vqgen_addpoint(vqgen *v, double *p){
106 if(v->points>=v->allocated){
108 v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
111 memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
113 if(v->points==v->entries)_vqgen_seed(v);
116 double vqgen_iterate(vqgen *v){
118 double fdesired=(double)v->points/v->entries;
119 long desired=fdesired;
122 double *new=malloc(sizeof(double)*v->entries*v->elements);
123 long *nearcount=malloc(v->entries*sizeof(long));
124 double *nearbias=malloc(v->entries*desired*sizeof(double));
131 sprintf(buff,"cells%d.m",v->it);
132 cells=fopen(buff,"w");
133 sprintf(buff,"assig%d.m",v->it);
134 assig=fopen(buff,"w");
135 sprintf(buff,"bias%d.m",v->it);
136 bias=fopen(buff,"w");
139 fprintf(stderr,"Pass #%d... ",v->it);
142 fprintf(stderr,"generation requires at least two entries\n");
146 /* fill in nearest points for entries */
147 /*memset(v->bias,0,sizeof(double)*v->entries);*/
148 memset(nearcount,0,sizeof(long)*v->entries);
149 memset(v->assigned,0,sizeof(long)*v->entries);
150 for(i=0;i<v->points;i++){
151 double *ppt=_point(v,i);
152 double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
153 double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
156 if(firstmetric>secondmetric){
157 double temp=firstmetric;
158 firstmetric=secondmetric;
164 for(j=2;j<v->entries;j++){
165 double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
166 if(thismetric<secondmetric){
167 if(thismetric<firstmetric){
168 secondmetric=firstmetric;
169 secondentry=firstentry;
170 firstmetric=thismetric;
173 secondmetric=thismetric;
180 meterror+=firstmetric-v->bias[firstentry];
181 /* set up midpoints for next iter */
183 for(k=0;k<v->elements;k++)
184 vN(new,j)[k]+=_point(v,i)[k];
186 for(k=0;k<v->elements;k++)
187 vN(new,j)[k]=_point(v,i)[k];
191 fprintf(cells,"%g %g\n%g %g\n\n",
192 _now(v,j)[0],_now(v,j)[1],
193 _point(v,i)[0],_point(v,i)[1]);
196 for(j=0;j<v->entries;j++){
199 double *nearbiasptr=nearbias+desired*j;
200 long k=nearcount[j]-1;
202 /* 'thismetric' is to be the bias value necessary in the current
203 arrangement for entry j to capture point i */
205 /* use the secondary entry as the threshhold */
206 thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
208 /* use the primary entry as the threshhold */
209 thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
212 if(k>=0 && thismetric>nearbiasptr[k]){
214 /* start at the end and search backward for where this entry
217 for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
219 /* insert at k. Shift and inject. */
220 memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
221 nearbiasptr[k]=thismetric;
223 if(nearcount[j]<desired)nearcount[j]++;
226 if(nearcount[j]<desired){
227 /* we checked the thresh earlier. We know this is the
229 nearbiasptr[nearcount[j]++]=thismetric;
235 /* inflate/deflate */
236 for(i=0;i<v->entries;i++)
237 v->bias[i]=nearbias[(i+1)*desired-1];
239 /* assign midpoints */
241 for(j=0;j<v->entries;j++){
242 asserror+=fabs(v->assigned[j]-fdesired);
244 for(k=0;k<v->elements;k++)
245 _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
247 fprintf(assig,"%ld\n",v->assigned[j]);
248 fprintf(bias,"%g\n",v->bias[j]);
252 asserror/=(v->entries*fdesired);
253 fprintf(stderr,": dist %g(%g) metric error=%g \n",
254 asserror,fdesired,meterror/v->points);
269 /* Building a codebook from trained set **********************************
271 The codebook in raw form is technically finished once it's trained.
272 However, we want to finalize the representative codebook values for
273 each entry and generate auxiliary information to optimize encoding.
274 We generate the auxiliary coding tree using collected data,
275 probably the same data as in the original training */
277 /* At each recursion, the data set is split in half. Cells with data
278 points on side A go into set A, same with set B. The sets may
279 overlap. If the cell overlaps the deviding line only very slightly
280 (provided parameter), we may choose to ignore the overlap in order
281 to pare the tree down */
285 int iascsort(const void *a,const void *b){
286 double av=sortvals[*((long *)a) * els];
287 double bv=sortvals[*((long *)b) * els];
292 /* goes through the split, but just counts it and returns a metric*/
293 void lp_count(vqgen *v,long *entryindex,long entries,
294 long *pointindex,long points,
295 long *entryA,long *entryB,
296 double *n, double c, double slack,
297 long *entriesA,long *entriesB,long *entriesC){
301 memset(entryA,0,sizeof(long)*entries);
302 memset(entryB,0,sizeof(long)*entries);
304 for(i=0;i<points;i++){
305 double *ppt=_point(v,pointindex[i]);
307 double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
310 for(j=1;j<entries;j++){
311 double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
312 if(thismetric<firstmetric){
313 firstmetric=thismetric;
318 /* count point split */
319 for(k=0;k<v->elements;k++)
320 position+=ppt[k]*n[k];
322 entryA[firstentry]++;
324 entryB[firstentry]++;
328 /* look to see if entries are in the slack zone */
329 /* The entry splitting isn't total, so that storage has to be
330 allocated for recursion. Reuse the entryA/entryB vectors */
331 for(j=0;j<entries;j++){
332 long total=entryA[j]+entryB[j];
333 if((double)entryA[j]/total<slack){
335 }else if((double)entryB[j]/total<slack){
338 if(entryA[j] && entryB[j])C++;
339 if(entryA[j])entryA[A++]=entryindex[j];
340 if(entryB[j])entryB[B++]=entryindex[j];
347 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
350 for(k=0;k<v->elements;k++){
351 double center=(p[k]+q[k])/2.;
352 n[k]=(center-q[k])*2.;
357 void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
360 for(k=0;k<v->elements;k++){
361 n[k]=(center[k]-q[k])*2.;
366 int lp_split(vqgen *v,vqbook *b,
367 long *entryindex,long entries,
368 long *pointindex,long points,
369 long depth,double slack){
371 /* The encoder, regardless of book, will be using a straight
372 euclidian distance-to-point metric to determine closest point.
373 Thus we split the cells using the same (we've already trained the
374 codebook set spacing and distribution using special metrics and
375 even a midpoint division won't disturb the basic properties) */
382 long *entryA=calloc(entries,sizeof(long));
383 long *entryB=calloc(entries,sizeof(long));
390 p=alloca(sizeof(double)*v->elements);
391 q=alloca(sizeof(double)*v->elements);
392 n=alloca(sizeof(double)*v->elements);
393 memset(p,0,sizeof(double)*v->elements);
395 /* We need to find the dividing hyperplane. find the median of each
396 axis as the centerpoint and the normal facing farthest point */
398 /* more than one way to do this part. For small sets, we can brute
402 /* try every pair possibility */
407 for(i=0;i<entries-1;i++){
408 for(j=i+1;j<entries;j++){
409 pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
410 lp_count(v,entryindex,entries,
414 &entriesA,&entriesB,&entriesC);
415 this=(entriesA-entriesC)*(entriesB-entriesC);
424 pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
429 /* try COG/normal and furthest pairs */
431 for(k=0;k<v->elements;k++){
432 /* just sort the index array */
433 sortvals=v->pointlist+k;
435 qsort(pointindex,points,sizeof(long),iascsort);
437 p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
439 p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
440 v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
444 /* try every normal, but just for distance */
445 for(j=0;j<entries;j++){
446 double *ppj=_now(v,entryindex[j]);
447 double this=_dist_sq(v,p,ppj);
454 pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
459 /* find cells enclosing points */
460 /* count A/B points */
462 lp_count(v,entryindex,entries,
466 &entriesA,&entriesB,&entriesC);
468 /* the point index is split evenly, so we do an Order n
469 rearrangement into A first/B last and just pass it on */
476 for(k=0;k<v->elements;k++)
477 position+=_point(v,pointindex[Aptr])[k]*n[k];
478 if(position<=0.)break; /* not in A */
483 for(k=0;k<v->elements;k++)
484 position+=_point(v,pointindex[Bptr])[k]*n[k];
485 if(position>0.)break; /* not in B */
489 long temp=pointindex[Aptr];
490 pointindex[Aptr]=pointindex[Bptr];
491 pointindex[Bptr]=temp;
497 fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
498 entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
500 long thisaux=b->aux++;
501 if(b->aux>=b->alloc){
503 b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
504 b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
505 b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
506 b->c=realloc(b->c,sizeof(double)*b->alloc);
509 memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
514 b->ptr0[thisaux]=entryA[0];
516 b->ptr0[thisaux]= -b->aux;
517 ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
521 b->ptr1[thisaux]=entryB[0];
523 b->ptr1[thisaux]= -b->aux;
524 ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
533 int vqenc_entry(vqbook *b,double *val){
536 double c= -b->c[ptr];
537 double *nptr=b->n+b->elements*ptr;
538 for(k=0;k<b->elements;k++)
549 void vqgen_book(vqgen *v,vqbook *b){
551 long *entryindex=malloc(sizeof(double)*v->entries);
552 long *pointindex=malloc(sizeof(double)*v->points);
554 memset(b,0,sizeof(vqbook));
555 for(i=0;i<v->entries;i++)entryindex[i]=i;
556 for(i=0;i<v->points;i++)pointindex[i]=i;
557 b->elements=v->elements;
558 b->entries=v->entries;
560 b->ptr0=malloc(sizeof(long)*b->alloc);
561 b->ptr1=malloc(sizeof(long)*b->alloc);
562 b->n=malloc(sizeof(double)*b->elements*b->alloc);
563 b->c=malloc(sizeof(double)*b->alloc);
565 b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
566 b->codelist=malloc(sizeof(long)*b->entries);
567 b->lengthlist=malloc(b->entries*sizeof(long));
569 /* first, generate the encoding decision heirarchy */
570 fprintf(stderr,"Total leaves: %d\n",
571 lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
573 /* run all training points through the decision tree to get a final
576 long *probability=calloc(b->entries*2,sizeof(long));
577 for(i=0;i<v->points;i++){
578 int ret=vqenc_entry(b,v->pointlist+i*v->elements);
582 for(i=0;i<b->entries;i++){
583 fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
588 /* generate the codewords (short to long) */