1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999 *
9 * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: build a VQ codebook
15 author: Monty <xiphmont@mit.edu>
16 modifications by: Monty
17 last modification date: Dec 10 1999
19 ********************************************************************/
21 /* This code is *not* part of libvorbis. It is used to generate
22 trained codebooks offline and then spit the results into a
23 pregenerated codebook that is compiled into libvorbis. It is an
24 expensive (but good) algorithm. Run it on big iron. */
26 /* There are so many optimizations to explore in *both* stages that
27 considering the undertaking is almost withering. For now, we brute
36 /* Codebook generation happens in two steps:
38 1) Train the codebook with data collected from the encoder: We use
39 one of a few error metrics (which represent the distance between a
40 given data point and a candidate point in the training set) to
41 divide the training set up into cells representing roughly equal
42 probability of occurring.
44 2) Generate the codebook and auxiliary data from the trained data set
47 /* Codebook training ****************************************************
49 * The basic idea here is that a VQ codebook is like an m-dimensional
50 * foam with n bubbles. The bubbles compete for space/volume and are
51 * 'pressurized' [biased] according to some metric. The basic alg
52 * iterates through allowing the bubbles to compete for space until
53 * they converge (if the damping is dome properly) on a steady-state
54 * solution. Individual input points, collected from libvorbis, are
55 * used to train the algorithm monte-carlo style. */
57 /* internal helpers *****************************************************/
58 #define vN(data,i) (data+v->elements*i)
60 double *_point(vqgen *v,long ptr){
61 return v->pointlist+(v->elements*ptr);
64 double *_now(vqgen *v,long ptr){
65 return v->entrylist+(v->elements*ptr);
68 /* default metric; squared 'distance' from desired value. */
69 double _dist_sq(vqgen *v,double *a, double *b){
74 double val=(a[i]-b[i]);
80 /* *must* be beefed up. */
81 void _vqgen_seed(vqgen *v){
82 memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
85 /* External calls *******************************************************/
87 void vqgen_init(vqgen *v,int elements,int entries,
88 double (*metric)(vqgen *,double *, double *),
90 memset(v,0,sizeof(vqgen));
95 v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
98 v->entrylist=malloc(v->entries*v->elements*sizeof(double));
99 v->assigned=malloc(v->entries*sizeof(long));
100 v->bias=calloc(v->entries,sizeof(double));
102 v->metric_func=metric;
104 v->metric_func=_dist_sq;
107 void vqgen_addpoint(vqgen *v, double *p){
108 if(v->points>=v->allocated){
110 v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
113 memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
115 if(v->points==v->entries)_vqgen_seed(v);
118 /* take the trained entries, look at the points that comprise the cell
119 and find midpoints (as the actual encoding process uses euclidian
120 distance rather than any more complex metric to find the closest
123 double *vqgen_midpoint(vqgen *v){
125 double *lo=malloc(v->entries*v->elements*sizeof(double));
126 double *hi=malloc(v->entries*v->elements*sizeof(double));
128 memset(v->assigned,0,sizeof(long)*v->entries);
129 for(i=0;i<v->points;i++){
130 double *ppt=_point(v,i);
131 double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
133 for(j=1;j<v->entries;j++){
134 double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
135 if(thismetric<firstmetric){
136 firstmetric=thismetric;
142 if(v->assigned[j]++){
143 for(k=0;k<v->elements;k++){
144 if(ppt[k]<vN(lo,j)[k])vN(lo,j)[k]=ppt[k];
145 if(ppt[k]>vN(hi,j)[k])vN(hi,j)[k]=ppt[k];
148 for(k=0;k<v->elements;k++){
155 for(j=0;j<v->entries;j++)
157 for(k=0;k<v->elements;k++)
158 vN(lo,j)[k]=(vN(lo,j)[k]+vN(hi,j)[k])/2.;
160 for(k=0;k<v->elements;k++)
161 vN(lo,j)[k]=_now(v,j)[k];
166 double vqgen_iterate(vqgen *v){
168 double fdesired=(double)v->points/v->entries;
169 long desired=fdesired;
172 double *new=malloc(sizeof(double)*v->entries*v->elements);
173 long *nearcount=malloc(v->entries*sizeof(long));
174 double *nearbias=malloc(v->entries*desired*sizeof(double));
181 sprintf(buff,"cells%d.m",v->it);
182 cells=fopen(buff,"w");
183 sprintf(buff,"assig%d.m",v->it);
184 assig=fopen(buff,"w");
185 sprintf(buff,"bias%d.m",v->it);
186 bias=fopen(buff,"w");
189 fprintf(stderr,"Pass #%d... ",v->it);
192 fprintf(stderr,"generation requires at least two entries\n");
196 /* fill in nearest points for entries */
197 /*memset(v->bias,0,sizeof(double)*v->entries);*/
198 memset(nearcount,0,sizeof(long)*v->entries);
199 memset(v->assigned,0,sizeof(long)*v->entries);
200 for(i=0;i<v->points;i++){
201 double *ppt=_point(v,i);
202 double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
203 double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
206 if(firstmetric>secondmetric){
207 double temp=firstmetric;
208 firstmetric=secondmetric;
214 for(j=2;j<v->entries;j++){
215 double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
216 if(thismetric<secondmetric){
217 if(thismetric<firstmetric){
218 secondmetric=firstmetric;
219 secondentry=firstentry;
220 firstmetric=thismetric;
223 secondmetric=thismetric;
230 meterror+=firstmetric-v->bias[firstentry];
231 /* set up midpoints for next iter */
233 for(k=0;k<v->elements;k++)
234 vN(new,j)[k]+=_point(v,i)[k];
236 for(k=0;k<v->elements;k++)
237 vN(new,j)[k]=_point(v,i)[k];
241 fprintf(cells,"%g %g\n%g %g\n\n",
242 _now(v,j)[0],_now(v,j)[1],
243 _point(v,i)[0],_point(v,i)[1]);
246 for(j=0;j<v->entries;j++){
249 double *nearbiasptr=nearbias+desired*j;
250 long k=nearcount[j]-1;
252 /* 'thismetric' is to be the bias value necessary in the current
253 arrangement for entry j to capture point i */
255 /* use the secondary entry as the threshhold */
256 thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
258 /* use the primary entry as the threshhold */
259 thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
262 if(k>=0 && thismetric>nearbiasptr[k]){
264 /* start at the end and search backward for where this entry
267 for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
269 /* insert at k. Shift and inject. */
270 memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
271 nearbiasptr[k]=thismetric;
273 if(nearcount[j]<desired)nearcount[j]++;
276 if(nearcount[j]<desired){
277 /* we checked the thresh earlier. We know this is the
279 nearbiasptr[nearcount[j]++]=thismetric;
285 /* inflate/deflate */
286 for(i=0;i<v->entries;i++)
287 v->bias[i]=nearbias[(i+1)*desired-1];
289 /* last, assign midpoints */
290 for(j=0;j<v->entries;j++){
291 asserror+=fabs(v->assigned[j]-fdesired);
293 for(k=0;k<v->elements;k++)
294 _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
296 fprintf(assig,"%ld\n",v->assigned[j]);
297 fprintf(bias,"%g\n",v->bias[j]);
301 asserror/=(v->entries*fdesired);
302 fprintf(stderr,": dist %g(%g) metric error=%g \n",
303 asserror,fdesired,meterror/v->points);
318 /* Building a codebook from trained set **********************************
320 The codebook in raw form is technically finished once it's trained.
321 However, we want to finalize the representative codebook values for
322 each entry and generate auxiliary information to optimize encoding.
323 We generate the auxiliary coding tree using collected data,
324 probably the same data as in the original training */
326 /* At each recursion, the data set is split in half. Cells with data
327 points on side A go into set A, same with set B. The sets may
328 overlap. If the cell overlaps the deviding line only very slightly
329 (provided parameter), we may choose to ignore the overlap in order
330 to pare the tree down */
334 int iascsort(const void *a,const void *b){
335 double av=sortvals[*((long *)a) * els];
336 double bv=sortvals[*((long *)b) * els];
341 /* goes through the split, but just counts it and returns a metric*/
342 void lp_count(vqgen *v,long *entryindex,long entries,
343 long *pointindex,long points,
344 long *entryA,long *entryB,
345 double *n, double c, double slack,
346 long *entriesA,long *entriesB,long *entriesC){
350 memset(entryA,0,sizeof(long)*entries);
351 memset(entryB,0,sizeof(long)*entries);
353 for(i=0;i<points;i++){
354 double *ppt=_point(v,pointindex[i]);
356 double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
359 for(j=1;j<entries;j++){
360 double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
361 if(thismetric<firstmetric){
362 firstmetric=thismetric;
367 /* count point split */
368 for(k=0;k<v->elements;k++)
369 position+=ppt[k]*n[k];
371 entryA[firstentry]++;
373 entryB[firstentry]++;
377 /* look to see if entries are in the slack zone */
378 /* The entry splitting isn't total, so that storage has to be
379 allocated for recursion. Reuse the entryA/entryB vectors */
380 for(j=0;j<entries;j++){
381 long total=entryA[j]+entryB[j];
382 if((double)entryA[j]/total<slack){
384 }else if((double)entryB[j]/total<slack){
387 if(entryA[j] && entryB[j])C++;
388 if(entryA[j])entryA[A++]=entryindex[j];
389 if(entryB[j])entryB[B++]=entryindex[j];
396 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
399 for(k=0;k<v->elements;k++){
400 double center=(p[k]+q[k])/2.;
401 n[k]=(center-q[k])*2.;
406 void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
409 for(k=0;k<v->elements;k++){
410 n[k]=(center[k]-q[k])*2.;
415 int lp_split(vqgen *v,vqbook *b,
416 long *entryindex,long entries,
417 long *pointindex,long points,
418 long depth,double slack){
420 /* The encoder, regardless of book, will be using a straight
421 euclidian distance-to-point metric to determine closest point.
422 Thus we split the cells using the same (we've already trained the
423 codebook set spacing and distribution using special metrics and
424 even a midpoint division won't disturb the basic properties) */
431 long *entryA=calloc(entries,sizeof(long));
432 long *entryB=calloc(entries,sizeof(long));
439 p=alloca(sizeof(double)*v->elements);
440 q=alloca(sizeof(double)*v->elements);
441 n=alloca(sizeof(double)*v->elements);
442 memset(p,0,sizeof(double)*v->elements);
444 /* We need to find the dividing hyperplane. find the median of each
445 axis as the centerpoint and the normal facing farthest point */
447 /* more than one way to do this part. For small sets, we can brute
451 /* try every pair possibility */
456 for(i=0;i<entries-1;i++){
457 for(j=i+1;j<entries;j++){
458 pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
459 lp_count(v,entryindex,entries,
463 &entriesA,&entriesB,&entriesC);
464 this=(entriesA-entriesC)*(entriesB-entriesC);
473 pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
478 /* try COG/normal and furthest pairs */
480 for(k=0;k<v->elements;k++){
481 /* just sort the index array */
482 sortvals=v->pointlist+k;
484 qsort(pointindex,points,sizeof(long),iascsort);
486 p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
488 p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
489 v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
493 /* try every normal, but just for distance */
494 for(j=0;j<entries;j++){
495 double *ppj=_now(v,entryindex[j]);
496 double this=_dist_sq(v,p,ppj);
503 pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
508 /* find cells enclosing points */
509 /* count A/B points */
511 lp_count(v,entryindex,entries,
515 &entriesA,&entriesB,&entriesC);
517 /* the point index is split evenly, so we do an Order n
518 rearrangement into A first/B last and just pass it on */
525 for(k=0;k<v->elements;k++)
526 position+=_point(v,pointindex[Aptr])[k]*n[k];
527 if(position<=0.)break; /* not in A */
532 for(k=0;k<v->elements;k++)
533 position+=_point(v,pointindex[Bptr])[k]*n[k];
534 if(position>0.)break; /* not in B */
538 long temp=pointindex[Aptr];
539 pointindex[Aptr]=pointindex[Bptr];
540 pointindex[Bptr]=temp;
546 fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
547 entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
549 long thisaux=b->aux++;
550 if(b->aux>=b->alloc){
552 b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
553 b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
554 b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
555 b->c=realloc(b->c,sizeof(double)*b->alloc);
558 memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
563 b->ptr0[thisaux]=entryA[0];
565 b->ptr0[thisaux]= -b->aux;
566 ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
570 b->ptr1[thisaux]=entryB[0];
572 b->ptr1[thisaux]= -b->aux;
573 ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
582 int vqenc_entry(vqbook *b,double *val){
585 double c= -b->c[ptr];
586 double *nptr=b->n+b->elements*ptr;
587 for(k=0;k<b->elements;k++)
598 void vqgen_book(vqgen *v,vqbook *b){
600 long *entryindex=malloc(sizeof(double)*v->entries);
601 long *pointindex=malloc(sizeof(double)*v->points);
603 memset(b,0,sizeof(vqbook));
604 for(i=0;i<v->entries;i++)entryindex[i]=i;
605 for(i=0;i<v->points;i++)pointindex[i]=i;
606 b->elements=v->elements;
607 b->entries=v->entries;
609 b->ptr0=malloc(sizeof(long)*b->alloc);
610 b->ptr1=malloc(sizeof(long)*b->alloc);
611 b->n=malloc(sizeof(double)*b->elements*b->alloc);
612 b->c=malloc(sizeof(double)*b->alloc);
614 b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
615 b->codelist=malloc(sizeof(long)*b->entries);
616 b->lengthlist=malloc(b->entries*sizeof(long));
618 /* first, generate the encoding decision heirarchy */
619 fprintf(stderr,"Total leaves: %ld\n",
620 lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
622 /* run all training points through the decision tree to get a final
625 long *probability=calloc(b->entries*2,sizeof(long));
626 for(i=0;i<v->points;i++){
627 int ret=vqenc_entry(b,v->pointlist+i*v->elements);
631 for(i=0;i<b->entries;i++){
632 fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
637 /* generate the codewords (short to long) */