1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999 *
9 * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: build a VQ codebook
15 author: Monty <xiphmont@mit.edu>
16 modifications by: Monty
17 last modification date: Dec 10 1999
19 ********************************************************************/
21 /* This code is *not* part of libvorbis. It is used to generate
22 trained codebooks offline and then spit the results into a
23 pregenerated codebook that is compiled into libvorbis. It is an
24 expensive (but good) algorithm. Run it on big iron. */
26 /* There are so many optimizations to explore in *both* stages that
27 considering the undertaking is almost withering. For now, we brute
36 /* Codebook generation happens in two steps:
38 1) Train the codebook with data collected from the encoder: We use
39 one of a few error metrics (which represent the distance between a
40 given data point and a candidate point in the training set) to
41 divide the training set up into cells representing roughly equal
42 probability of occurring.
44 2) Generate the codebook and auxiliary data from the trained data set
47 /* Codebook training ****************************************************
49 * The basic idea here is that a VQ codebook is like an m-dimensional
50 * foam with n bubbles. The bubbles compete for space/volume and are
51 * 'pressurized' [biased] according to some metric. The basic alg
52 * iterates through allowing the bubbles to compete for space until
53 * they converge (if the damping is dome properly) on a steady-state
54 * solution. Individual input points, collected from libvorbis, are
55 * used to train the algorithm monte-carlo style. */
57 /* internal helpers *****************************************************/
58 #define vN(data,i) (data+v->elements*i)
60 double *_point(vqgen *v,long ptr){
61 return v->pointlist+(v->elements*ptr);
64 double *_now(vqgen *v,long ptr){
65 return v->entrylist+(v->elements*ptr);
68 /* default metric; squared 'distance' from desired value. */
69 double _dist_sq(vqgen *v,double *a, double *b){
74 double val=(a[i]-b[i]);
80 /* *must* be beefed up. */
81 void _vqgen_seed(vqgen *v){
82 memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
85 /* External calls *******************************************************/
87 void vqgen_init(vqgen *v,int elements,int entries,
88 double (*metric)(vqgen *,double *, double *),
90 memset(v,0,sizeof(vqgen));
95 v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
98 v->entrylist=malloc(v->entries*v->elements*sizeof(double));
99 v->assigned=malloc(v->entries*sizeof(long));
100 v->bias=calloc(v->entries,sizeof(double));
102 v->metric_func=metric;
104 v->metric_func=_dist_sq;
107 void vqgen_addpoint(vqgen *v, double *p){
108 if(v->points>=v->allocated){
110 v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
113 memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
115 if(v->points==v->entries)_vqgen_seed(v);
118 double vqgen_iterate(vqgen *v){
120 double fdesired=(double)v->points/v->entries;
121 long desired=fdesired;
124 double *new=malloc(sizeof(double)*v->entries*v->elements);
125 long *nearcount=malloc(v->entries*sizeof(long));
126 double *nearbias=malloc(v->entries*desired*sizeof(double));
133 sprintf(buff,"cells%d.m",v->it);
134 cells=fopen(buff,"w");
135 sprintf(buff,"assig%d.m",v->it);
136 assig=fopen(buff,"w");
137 sprintf(buff,"bias%d.m",v->it);
138 bias=fopen(buff,"w");
141 fprintf(stderr,"Pass #%d... ",v->it);
144 fprintf(stderr,"generation requires at least two entries\n");
148 /* fill in nearest points for entries */
149 /*memset(v->bias,0,sizeof(double)*v->entries);*/
150 memset(nearcount,0,sizeof(long)*v->entries);
151 memset(v->assigned,0,sizeof(long)*v->entries);
152 for(i=0;i<v->points;i++){
153 double *ppt=_point(v,i);
154 double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
155 double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
158 if(firstmetric>secondmetric){
159 double temp=firstmetric;
160 firstmetric=secondmetric;
166 for(j=2;j<v->entries;j++){
167 double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
168 if(thismetric<secondmetric){
169 if(thismetric<firstmetric){
170 secondmetric=firstmetric;
171 secondentry=firstentry;
172 firstmetric=thismetric;
175 secondmetric=thismetric;
182 meterror+=firstmetric-v->bias[firstentry];
183 /* set up midpoints for next iter */
185 for(k=0;k<v->elements;k++)
186 vN(new,j)[k]+=_point(v,i)[k];
188 for(k=0;k<v->elements;k++)
189 vN(new,j)[k]=_point(v,i)[k];
193 fprintf(cells,"%g %g\n%g %g\n\n",
194 _now(v,j)[0],_now(v,j)[1],
195 _point(v,i)[0],_point(v,i)[1]);
198 for(j=0;j<v->entries;j++){
201 double *nearbiasptr=nearbias+desired*j;
202 long k=nearcount[j]-1;
204 /* 'thismetric' is to be the bias value necessary in the current
205 arrangement for entry j to capture point i */
207 /* use the secondary entry as the threshhold */
208 thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
210 /* use the primary entry as the threshhold */
211 thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
214 if(k>=0 && thismetric>nearbiasptr[k]){
216 /* start at the end and search backward for where this entry
219 for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
221 /* insert at k. Shift and inject. */
222 memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
223 nearbiasptr[k]=thismetric;
225 if(nearcount[j]<desired)nearcount[j]++;
228 if(nearcount[j]<desired){
229 /* we checked the thresh earlier. We know this is the
231 nearbiasptr[nearcount[j]++]=thismetric;
237 /* inflate/deflate */
238 for(i=0;i<v->entries;i++)
239 v->bias[i]=nearbias[(i+1)*desired-1];
241 /* assign midpoints */
243 for(j=0;j<v->entries;j++){
244 asserror+=fabs(v->assigned[j]-fdesired);
246 for(k=0;k<v->elements;k++)
247 _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
249 fprintf(assig,"%ld\n",v->assigned[j]);
250 fprintf(bias,"%g\n",v->bias[j]);
255 /* midpoints must be quantized. but we need to know the range in
257 double *min=alloca(sizeof(double)*v->elements);
258 double *max=alloca(sizeof(double)*v->elements);
260 for(k=0;k<v->elements;k++)
261 min[k]=max[k]=_now(v,0)[k];
262 for(j=1;j<v->entries;j++){
263 for(k=0;k<v->elements;k++){
264 double val=_now(v,0)[k];
265 if(val<min[k])min[k]=val;
266 if(val>max[k])max[k]=val;
269 for(k=0;k<v->elements;k++){
271 double delta=(max[k]-min[k])/((1<<v->quantbits)-1);
272 for(j=0;j<v->entries;j++){
273 double val=_now(v,j)[k];
274 _now(v,j)[k]=base+delta*rint((val-base)/delta);
279 asserror/=(v->entries*fdesired);
280 fprintf(stderr,": dist %g(%g) metric error=%g \n",
281 asserror,fdesired,meterror/v->points);
296 /* Building a codebook from trained set **********************************
298 The codebook in raw form is technically finished once it's trained.
299 However, we want to finalize the representative codebook values for
300 each entry and generate auxiliary information to optimize encoding.
301 We generate the auxiliary coding tree using collected data,
302 probably the same data as in the original training */
304 /* At each recursion, the data set is split in half. Cells with data
305 points on side A go into set A, same with set B. The sets may
306 overlap. If the cell overlaps the deviding line only very slightly
307 (provided parameter), we may choose to ignore the overlap in order
308 to pare the tree down */
312 int iascsort(const void *a,const void *b){
313 double av=sortvals[*((long *)a) * els];
314 double bv=sortvals[*((long *)b) * els];
319 /* goes through the split, but just counts it and returns a metric*/
320 void lp_count(vqgen *v,long *entryindex,long entries,
321 long *pointindex,long points,
322 long *entryA,long *entryB,
323 double *n, double c, double slack,
324 long *entriesA,long *entriesB,long *entriesC){
328 memset(entryA,0,sizeof(long)*entries);
329 memset(entryB,0,sizeof(long)*entries);
331 for(i=0;i<points;i++){
332 double *ppt=_point(v,pointindex[i]);
334 double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
337 for(j=1;j<entries;j++){
338 double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
339 if(thismetric<firstmetric){
340 firstmetric=thismetric;
345 /* count point split */
346 for(k=0;k<v->elements;k++)
347 position+=ppt[k]*n[k];
349 entryA[firstentry]++;
351 entryB[firstentry]++;
355 /* look to see if entries are in the slack zone */
356 /* The entry splitting isn't total, so that storage has to be
357 allocated for recursion. Reuse the entryA/entryB vectors */
358 for(j=0;j<entries;j++){
359 long total=entryA[j]+entryB[j];
360 if((double)entryA[j]/total<slack){
362 }else if((double)entryB[j]/total<slack){
365 if(entryA[j] && entryB[j])C++;
366 if(entryA[j])entryA[A++]=entryindex[j];
367 if(entryB[j])entryB[B++]=entryindex[j];
374 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
377 for(k=0;k<v->elements;k++){
378 double center=(p[k]+q[k])/2.;
379 n[k]=(center-q[k])*2.;
384 void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
387 for(k=0;k<v->elements;k++){
388 n[k]=(center[k]-q[k])*2.;
393 int lp_split(vqgen *v,vqbook *b,
394 long *entryindex,long entries,
395 long *pointindex,long points,
396 long depth,double slack){
398 /* The encoder, regardless of book, will be using a straight
399 euclidian distance-to-point metric to determine closest point.
400 Thus we split the cells using the same (we've already trained the
401 codebook set spacing and distribution using special metrics and
402 even a midpoint division won't disturb the basic properties) */
409 long *entryA=calloc(entries,sizeof(long));
410 long *entryB=calloc(entries,sizeof(long));
417 p=alloca(sizeof(double)*v->elements);
418 q=alloca(sizeof(double)*v->elements);
419 n=alloca(sizeof(double)*v->elements);
420 memset(p,0,sizeof(double)*v->elements);
422 /* We need to find the dividing hyperplane. find the median of each
423 axis as the centerpoint and the normal facing farthest point */
425 /* more than one way to do this part. For small sets, we can brute
429 /* try every pair possibility */
434 for(i=0;i<entries-1;i++){
435 for(j=i+1;j<entries;j++){
436 pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
437 lp_count(v,entryindex,entries,
441 &entriesA,&entriesB,&entriesC);
442 this=(entriesA-entriesC)*(entriesB-entriesC);
451 pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
456 /* try COG/normal and furthest pairs */
458 for(k=0;k<v->elements;k++){
459 /* just sort the index array */
460 sortvals=v->pointlist+k;
462 qsort(pointindex,points,sizeof(long),iascsort);
464 p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
466 p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
467 v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
471 /* try every normal, but just for distance */
472 for(j=0;j<entries;j++){
473 double *ppj=_now(v,entryindex[j]);
474 double this=_dist_sq(v,p,ppj);
481 pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
486 /* find cells enclosing points */
487 /* count A/B points */
489 lp_count(v,entryindex,entries,
493 &entriesA,&entriesB,&entriesC);
495 /* the point index is split evenly, so we do an Order n
496 rearrangement into A first/B last and just pass it on */
503 for(k=0;k<v->elements;k++)
504 position+=_point(v,pointindex[Aptr])[k]*n[k];
505 if(position<=0.)break; /* not in A */
510 for(k=0;k<v->elements;k++)
511 position+=_point(v,pointindex[Bptr])[k]*n[k];
512 if(position>0.)break; /* not in B */
516 long temp=pointindex[Aptr];
517 pointindex[Aptr]=pointindex[Bptr];
518 pointindex[Bptr]=temp;
524 fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
525 entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
527 long thisaux=b->aux++;
528 if(b->aux>=b->alloc){
530 b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
531 b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
532 b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
533 b->c=realloc(b->c,sizeof(double)*b->alloc);
536 memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
541 b->ptr0[thisaux]=entryA[0];
543 b->ptr0[thisaux]= -b->aux;
544 ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
548 b->ptr1[thisaux]=entryB[0];
550 b->ptr1[thisaux]= -b->aux;
551 ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
560 int vqenc_entry(vqbook *b,double *val){
563 double c= -b->c[ptr];
564 double *nptr=b->n+b->elements*ptr;
565 for(k=0;k<b->elements;k++)
576 void vqgen_book(vqgen *v,vqbook *b){
578 long *entryindex=malloc(sizeof(double)*v->entries);
579 long *pointindex=malloc(sizeof(double)*v->points);
581 memset(b,0,sizeof(vqbook));
582 for(i=0;i<v->entries;i++)entryindex[i]=i;
583 for(i=0;i<v->points;i++)pointindex[i]=i;
584 b->elements=v->elements;
585 b->entries=v->entries;
587 b->ptr0=malloc(sizeof(long)*b->alloc);
588 b->ptr1=malloc(sizeof(long)*b->alloc);
589 b->n=malloc(sizeof(double)*b->elements*b->alloc);
590 b->c=malloc(sizeof(double)*b->alloc);
592 b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
593 b->codelist=malloc(sizeof(long)*b->entries);
594 b->lengthlist=malloc(b->entries*sizeof(long));
596 /* first, generate the encoding decision heirarchy */
597 fprintf(stderr,"Total leaves: %ld\n",
598 lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
600 /* run all training points through the decision tree to get a final
603 long *probability=calloc(b->entries*2,sizeof(long));
604 for(i=0;i<v->points;i++){
605 int ret=vqenc_entry(b,v->pointlist+i*v->elements);
609 for(i=0;i<b->entries;i++){
610 fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
615 /* generate the codewords (short to long) */