vq/vqgen.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: build a VQ codebook
  15  author: Monty <xiphmont@mit.edu>
  16  modifications by: Monty
  17  last modification date: Dec 10 1999
  18
  19  ********************************************************************/
  20
  21 /* This code is *not* part of libvorbis.  It is used to generate
  22    trained codebooks offline and then spit the results into a
  23    pregenerated codebook that is compiled into libvorbis.  It is an
  24    expensive (but good) algorithm.  Run it on big iron. */
  25
  26 /* There are so many optimizations to explore in *both* stages that
  27    considering the undertaking is almost withering.  For now, we brute
  28    force it all */
  29
  30 #include <stdlib.h>
  31 #include <stdio.h>
  32 #include <math.h>
  33 #include <string.h>
  34 #include "vqgen.h"
  35
  36 /* Codebook generation happens in two steps:
  37
  38    1) Train the codebook with data collected from the encoder: We use
  39    one of a few error metrics (which represent the distance between a
  40    given data point and a candidate point in the training set) to
  41    divide the training set up into cells representing roughly equal
  42    probability of occurring.
  43
  44    2) Generate the codebook and auxiliary data from the trained data set
  45 */
  46
  47 /* Codebook training ****************************************************
  48  *
  49  * The basic idea here is that a VQ codebook is like an m-dimensional
  50  * foam with n bubbles.  The bubbles compete for space/volume and are
  51  * 'pressurized' [biased] according to some metric.  The basic alg
  52  * iterates through allowing the bubbles to compete for space until
  53  * they converge (if the damping is dome properly) on a steady-state
  54  * solution. Individual input points, collected from libvorbis, are
  55  * used to train the algorithm monte-carlo style.  */
  56
  57 /* internal helpers *****************************************************/
  58 #define vN(data,i) (data+v->elements*i)
  59
  60 double *_point(vqgen *v,long ptr){
  61   return v->pointlist+(v->elements*ptr);
  62 }
  63
  64 double *_now(vqgen *v,long ptr){
  65   return v->entrylist+(v->elements*ptr);
  66 }
  67
  68 /* default metric; squared 'distance' from desired value. */
  69 double _dist_sq(vqgen *v,double *a, double *b){
  70   int i;
  71   int el=v->elements;
  72   double acc=0.;
  73   for(i=0;i<el;i++){
  74     double val=(a[i]-b[i]);
  75     acc+=val*val;
  76   }
  77   return acc;
  78 }
  79
  80 /* *must* be beefed up. */
  81 void _vqgen_seed(vqgen *v){
  82   memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
  83 }
  84
  85 /* External calls *******************************************************/
  86
  87 void vqgen_init(vqgen *v,int elements,int entries,
  88                 double (*metric)(vqgen *,double *, double *),
  89                 int quant){
  90   memset(v,0,sizeof(vqgen));
  91
  92   v->elements=elements;
  93   v->quantbits=quant;
  94   v->allocated=32768;
  95   v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
  96
  97   v->entries=entries;
  98   v->entrylist=malloc(v->entries*v->elements*sizeof(double));
  99   v->assigned=malloc(v->entries*sizeof(long));
 100   v->bias=calloc(v->entries,sizeof(double));
 101   if(metric)
 102     v->metric_func=metric;
 103   else
 104     v->metric_func=_dist_sq;
 105 }
 106
 107 void vqgen_addpoint(vqgen *v, double *p){
 108   if(v->points>=v->allocated){
 109     v->allocated*=2;
 110     v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
 111   }
 112
 113   memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
 114   v->points++;
 115   if(v->points==v->entries)_vqgen_seed(v);
 116 }
 117
 118 double vqgen_iterate(vqgen *v){
 119   long   i,j,k;
 120   double fdesired=(double)v->points/v->entries;
 121   long  desired=fdesired;
 122   double asserror=0.;
 123   double meterror=0.;
 124   double *new=malloc(sizeof(double)*v->entries*v->elements);
 125   long   *nearcount=malloc(v->entries*sizeof(long));
 126   double *nearbias=malloc(v->entries*desired*sizeof(double));
 127
 128 #ifdef NOISY
 129   char buff[80];
 130   FILE *assig;
 131   FILE *bias;
 132   FILE *cells;
 133   sprintf(buff,"cells%d.m",v->it);
 134   cells=fopen(buff,"w");
 135   sprintf(buff,"assig%d.m",v->it);
 136   assig=fopen(buff,"w");
 137   sprintf(buff,"bias%d.m",v->it);
 138   bias=fopen(buff,"w");
 139 #endif
 140
 141   fprintf(stderr,"Pass #%d... ",v->it);
 142
 143   if(v->entries<2){
 144     fprintf(stderr,"generation requires at least two entries\n");
 145     exit(1);
 146   }
 147
 148   /* fill in nearest points for entries */
 149   /*memset(v->bias,0,sizeof(double)*v->entries);*/
 150   memset(nearcount,0,sizeof(long)*v->entries);
 151   memset(v->assigned,0,sizeof(long)*v->entries);
 152   for(i=0;i<v->points;i++){
 153     double *ppt=_point(v,i);
 154     double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
 155     double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
 156     long   firstentry=0;
 157     long   secondentry=1;
 158     if(firstmetric>secondmetric){
 159       double temp=firstmetric;
 160       firstmetric=secondmetric;
 161       secondmetric=temp;
 162       firstentry=1;
 163       secondentry=0;
 164     }
 165
 166     for(j=2;j<v->entries;j++){
 167       double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
 168       if(thismetric<secondmetric){
 169         if(thismetric<firstmetric){
 170           secondmetric=firstmetric;
 171           secondentry=firstentry;
 172           firstmetric=thismetric;
 173           firstentry=j;
 174         }else{
 175           secondmetric=thismetric;
 176           secondentry=j;
 177         }
 178       }
 179     }
 180
 181     j=firstentry;
 182     meterror+=firstmetric-v->bias[firstentry];
 183     /* set up midpoints for next iter */
 184     if(v->assigned[j]++)
 185       for(k=0;k<v->elements;k++)
 186         vN(new,j)[k]+=_point(v,i)[k];
 187     else
 188       for(k=0;k<v->elements;k++)
 189         vN(new,j)[k]=_point(v,i)[k];
 190
 191
 192 #ifdef NOISY
 193     fprintf(cells,"%g %g\n%g %g\n\n",
 194             _now(v,j)[0],_now(v,j)[1],
 195             _point(v,i)[0],_point(v,i)[1]);
 196 #endif
 197
 198     for(j=0;j<v->entries;j++){
 199
 200       double thismetric;
 201       double *nearbiasptr=nearbias+desired*j;
 202       long k=nearcount[j]-1;
 203
 204       /* 'thismetric' is to be the bias value necessary in the current
 205          arrangement for entry j to capture point i */
 206       if(firstentry==j){
 207         /* use the secondary entry as the threshhold */
 208         thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
 209       }else{
 210         /* use the primary entry as the threshhold */
 211         thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
 212       }
 213
 214       if(k>=0 && thismetric>nearbiasptr[k]){
 215
 216         /* start at the end and search backward for where this entry
 217            belongs */
 218
 219         for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
 220
 221         /* insert at k.  Shift and inject. */
 222         memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
 223         nearbiasptr[k]=thismetric;
 224
 225         if(nearcount[j]<desired)nearcount[j]++;
 226
 227       }else{
 228         if(nearcount[j]<desired){
 229           /* we checked the thresh earlier.  We know this is the
 230              last entry */
 231           nearbiasptr[nearcount[j]++]=thismetric;
 232         }
 233       }
 234     }
 235   }
 236
 237   /* inflate/deflate */
 238   for(i=0;i<v->entries;i++)
 239     v->bias[i]=nearbias[(i+1)*desired-1];
 240
 241   /* assign midpoints */
 242
 243   for(j=0;j<v->entries;j++){
 244     asserror+=fabs(v->assigned[j]-fdesired);
 245     if(v->assigned[j])
 246       for(k=0;k<v->elements;k++)
 247         _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
 248 #ifdef NOISY
 249     fprintf(assig,"%ld\n",v->assigned[j]);
 250     fprintf(bias,"%g\n",v->bias[j]);
 251 #endif
 252   }
 253
 254   {
 255     /* midpoints must be quantized.  but we need to know the range in
 256        order to do so */
 257     double min,max;
 258
 259     for(k=0;k<v->elements;k++){
 260       double delta;
 261       min=max=_now(v,0)[k];
 262
 263       for(j=1;j<v->entries;j++){
 264         double val=_now(v,0)[k];
 265         if(val<min)min=val;
 266         if(val>max)max=val;
 267       }
 268
 269       delta=(max-min)/((1<<v->quantbits)-1);
 270       for(j=0;j<v->entries;j++){
 271         double val=_now(v,j)[k];
 272         _now(v,j)[k]=min+delta*rint((val-min)/delta);
 273       }
 274     }
 275   }
 276
 277   asserror/=(v->entries*fdesired);
 278   fprintf(stderr,": dist %g(%g) metric error=%g \n",
 279           asserror,fdesired,meterror/v->points);
 280   v->it++;
 281
 282   free(new);
 283   free(nearcount);
 284   free(nearbias);
 285 #ifdef NOISY
 286   fclose(assig);
 287   fclose(bias);
 288   fclose(cells);
 289 #endif
 290   return(asserror);
 291 }
 292
 293
 294 /* Building a codebook from trained set **********************************
 295
 296    The codebook in raw form is technically finished once it's trained.
 297    However, we want to finalize the representative codebook values for
 298    each entry and generate auxiliary information to optimize encoding.
 299    We generate the auxiliary coding tree using collected data,
 300    probably the same data as in the original training */
 301
 302 /* At each recursion, the data set is split in half.  Cells with data
 303    points on side A go into set A, same with set B.  The sets may
 304    overlap.  If the cell overlaps the deviding line only very slightly
 305    (provided parameter), we may choose to ignore the overlap in order
 306    to pare the tree down */
 307
 308 double *sortvals;
 309 int els;
 310 int iascsort(const void *a,const void *b){
 311   double av=sortvals[*((long *)a) * els];
 312   double bv=sortvals[*((long *)b) * els];
 313   if(av<bv)return(-1);
 314   return(1);
 315 }
 316
 317 /* goes through the split, but just counts it and returns a metric*/
 318 void lp_count(vqgen *v,long *entryindex,long entries,
 319                 long *pointindex,long points,
 320                 long *entryA,long *entryB,
 321                 double *n, double c, double slack,
 322                 long *entriesA,long *entriesB,long *entriesC){
 323   long i,j,k;
 324   long A=0,B=0,C=0;
 325
 326   memset(entryA,0,sizeof(long)*entries);
 327   memset(entryB,0,sizeof(long)*entries);
 328
 329   for(i=0;i<points;i++){
 330     double *ppt=_point(v,pointindex[i]);
 331     long   firstentry=0;
 332     double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
 333     double position=-c;
 334
 335     for(j=1;j<entries;j++){
 336       double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
 337       if(thismetric<firstmetric){
 338         firstmetric=thismetric;
 339         firstentry=j;
 340       }
 341     }
 342
 343     /* count point split */
 344     for(k=0;k<v->elements;k++)
 345       position+=ppt[k]*n[k];
 346     if(position>0.){
 347       entryA[firstentry]++;
 348     }else{
 349       entryB[firstentry]++;
 350     }
 351   }
 352
 353   /* look to see if entries are in the slack zone */
 354   /* The entry splitting isn't total, so that storage has to be
 355      allocated for recursion.  Reuse the entryA/entryB vectors */
 356   for(j=0;j<entries;j++){
 357     long total=entryA[j]+entryB[j];
 358     if((double)entryA[j]/total<slack){
 359       entryA[j]=0;
 360     }else if((double)entryB[j]/total<slack){
 361       entryB[j]=0;
 362     }
 363     if(entryA[j] && entryB[j])C++;
 364     if(entryA[j])entryA[A++]=entryindex[j];
 365     if(entryB[j])entryB[B++]=entryindex[j];
 366   }
 367   *entriesA=A;
 368   *entriesB=B;
 369   *entriesC=C;
 370 }
 371
 372 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
 373   int k;
 374   *c=0.;
 375   for(k=0;k<v->elements;k++){
 376     double center=(p[k]+q[k])/2.;
 377     n[k]=(center-q[k])*2.;
 378     *c+=center*n[k];
 379   }
 380 }
 381
 382 void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
 383   int k;
 384   *c=0.;
 385   for(k=0;k<v->elements;k++){
 386     n[k]=(center[k]-q[k])*2.;
 387     *c+=center[k]*n[k];
 388   }
 389 }
 390
 391 int lp_split(vqgen *v,vqbook *b,
 392              long *entryindex,long entries,
 393              long *pointindex,long points,
 394              long depth,double slack){
 395
 396   /* The encoder, regardless of book, will be using a straight
 397      euclidian distance-to-point metric to determine closest point.
 398      Thus we split the cells using the same (we've already trained the
 399      codebook set spacing and distribution using special metrics and
 400      even a midpoint division won't disturb the basic properties) */
 401
 402   long ret;
 403   double *p;
 404   double *q;
 405   double *n;
 406   double c;
 407   long *entryA=calloc(entries,sizeof(long));
 408   long *entryB=calloc(entries,sizeof(long));
 409   long entriesA=0;
 410   long entriesB=0;
 411   long entriesC=0;
 412   long pointsA=0;
 413   long i,j,k;
 414
 415   p=alloca(sizeof(double)*v->elements);
 416   q=alloca(sizeof(double)*v->elements);
 417   n=alloca(sizeof(double)*v->elements);
 418   memset(p,0,sizeof(double)*v->elements);
 419
 420   /* We need to find the dividing hyperplane. find the median of each
 421      axis as the centerpoint and the normal facing farthest point */
 422
 423   /* more than one way to do this part.  For small sets, we can brute
 424      force it. */
 425
 426   if(entries<32){
 427     /* try every pair possibility */
 428     double best=0;
 429     long   besti=0;
 430     long   bestj=0;
 431     double this;
 432     for(i=0;i<entries-1;i++){
 433       for(j=i+1;j<entries;j++){
 434         pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
 435         lp_count(v,entryindex,entries,
 436                  pointindex,points,
 437                  entryA,entryB,
 438                  n, c, slack,
 439                  &entriesA,&entriesB,&entriesC);
 440         this=(entriesA-entriesC)*(entriesB-entriesC);
 441
 442         if(this>best){
 443           best=this;
 444           besti=i;
 445           bestj=j;
 446         }
 447       }
 448     }
 449     pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
 450   }else{
 451     double best=0.;
 452     long   bestj=0;
 453
 454     /* try COG/normal and furthest pairs */
 455     /* medianpoint */
 456     for(k=0;k<v->elements;k++){
 457       /* just sort the index array */
 458       sortvals=v->pointlist+k;
 459       els=v->elements;
 460       qsort(pointindex,points,sizeof(long),iascsort);
 461       if(points&0x1){
 462         p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
 463       }else{
 464         p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
 465               v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
 466       }
 467     }
 468
 469     /* try every normal, but just for distance */
 470     for(j=0;j<entries;j++){
 471       double *ppj=_now(v,entryindex[j]);
 472       double this=_dist_sq(v,p,ppj);
 473       if(this>best){
 474         best=this;
 475         bestj=j;
 476       }
 477     }
 478
 479     pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
 480
 481
 482   }
 483
 484   /* find cells enclosing points */
 485   /* count A/B points */
 486
 487   lp_count(v,entryindex,entries,
 488            pointindex,points,
 489            entryA,entryB,
 490            n, c, slack,
 491            &entriesA,&entriesB,&entriesC);
 492
 493   /* the point index is split evenly, so we do an Order n
 494      rearrangement into A first/B last and just pass it on */
 495   {
 496     long Aptr=0;
 497     long Bptr=points-1;
 498     while(Aptr<=Bptr){
 499       while(Aptr<=Bptr){
 500         double position=-c;
 501         for(k=0;k<v->elements;k++)
 502           position+=_point(v,pointindex[Aptr])[k]*n[k];
 503         if(position<=0.)break; /* not in A */
 504         Aptr++;
 505       }
 506       while(Aptr<=Bptr){
 507         double position=-c;
 508         for(k=0;k<v->elements;k++)
 509           position+=_point(v,pointindex[Bptr])[k]*n[k];
 510         if(position>0.)break; /* not in B */
 511         Bptr--;
 512       }
 513       if(Aptr<Bptr){
 514         long temp=pointindex[Aptr];
 515         pointindex[Aptr]=pointindex[Bptr];
 516         pointindex[Bptr]=temp;
 517       }
 518       pointsA=Aptr;
 519     }
 520   }
 521
 522   fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
 523           entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
 524   {
 525     long thisaux=b->aux++;
 526     if(b->aux>=b->alloc){
 527       b->alloc*=2;
 528       b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
 529       b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
 530       b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
 531       b->c=realloc(b->c,sizeof(double)*b->alloc);
 532     }
 533
 534     memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
 535     b->c[thisaux]=c;
 536
 537     if(entriesA==1){
 538       ret=1;
 539       b->ptr0[thisaux]=entryA[0];
 540     }else{
 541       b->ptr0[thisaux]= -b->aux;
 542       ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
 543     }
 544     if(entriesB==1){
 545       ret++;
 546       b->ptr1[thisaux]=entryB[0];
 547     }else{
 548       b->ptr1[thisaux]= -b->aux;
 549       ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
 550                    depth+1,slack);
 551     }
 552   }
 553   free(entryA);
 554   free(entryB);
 555   return(ret);
 556 }
 557
 558 int vqenc_entry(vqbook *b,double *val){
 559   int ptr=0,k;
 560   while(1){
 561     double c= -b->c[ptr];
 562     double *nptr=b->n+b->elements*ptr;
 563     for(k=0;k<b->elements;k++)
 564       c+=nptr[k]*val[k];
 565     if(c>0.) /* in A */
 566       ptr= -b->ptr0[ptr];
 567     else     /* in B */
 568       ptr= -b->ptr1[ptr];
 569     if(ptr<=0)break;
 570   }
 571   return(-ptr);
 572 }
 573
 574 void vqgen_book(vqgen *v,vqbook *b){
 575   long i;
 576   long *entryindex=malloc(sizeof(double)*v->entries);
 577   long *pointindex=malloc(sizeof(double)*v->points);
 578
 579   memset(b,0,sizeof(vqbook));
 580   for(i=0;i<v->entries;i++)entryindex[i]=i;
 581   for(i=0;i<v->points;i++)pointindex[i]=i;
 582   b->elements=v->elements;
 583   b->entries=v->entries;
 584   b->alloc=4096;
 585   b->ptr0=malloc(sizeof(long)*b->alloc);
 586   b->ptr1=malloc(sizeof(long)*b->alloc);
 587   b->n=malloc(sizeof(double)*b->elements*b->alloc);
 588   b->c=malloc(sizeof(double)*b->alloc);
 589
 590   b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
 591   b->codelist=malloc(sizeof(long)*b->entries);
 592   b->lengthlist=malloc(b->entries*sizeof(long));
 593
 594   /* first, generate the encoding decision heirarchy */
 595   fprintf(stderr,"Total leaves: %ld\n",
 596           lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
 597
 598   /* run all training points through the decision tree to get a final
 599      probability count */
 600   {
 601     long *probability=calloc(b->entries*2,sizeof(long));
 602     for(i=0;i<v->points;i++){
 603       int ret=vqenc_entry(b,v->pointlist+i*v->elements);
 604       probability[ret]++;
 605     }
 606
 607     for(i=0;i<b->entries;i++){
 608       fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
 609     }
 610
 611   }
 612
 613   /* generate the codewords (short to long) */
 614
 615   free(entryindex);
 616   free(pointindex);
 617 }
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631