vq/vqgen.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: build a VQ codebook
  15  author: Monty <xiphmont@mit.edu>
  16  modifications by: Monty
  17  last modification date: Dec 10 1999
  18
  19  ********************************************************************/
  20
  21 /* This code is *not* part of libvorbis.  It is used to generate
  22    trained codebooks offline and then spit the results into a
  23    pregenerated codebook that is compiled into libvorbis.  It is an
  24    expensive (but good) algorithm.  Run it on big iron. */
  25
  26 /* There are so many optimizations to explore in *both* stages that
  27    considering the undertaking is almost withering.  For now, we brute
  28    force it all */
  29
  30 #include <stdlib.h>
  31 #include <stdio.h>
  32 #include <math.h>
  33 #include <string.h>
  34 #include "vqgen.h"
  35
  36 /* Codebook generation happens in two steps:
  37
  38    1) Train the codebook with data collected from the encoder: We use
  39    one of a few error metrics (which represent the distance between a
  40    given data point and a candidate point in the training set) to
  41    divide the training set up into cells representing roughly equal
  42    probability of occurring.
  43
  44    2) Generate the codebook and auxiliary data from the trained data set
  45 */
  46
  47 /* Codebook training ****************************************************
  48  *
  49  * The basic idea here is that a VQ codebook is like an m-dimensional
  50  * foam with n bubbles.  The bubbles compete for space/volume and are
  51  * 'pressurized' [biased] according to some metric.  The basic alg
  52  * iterates through allowing the bubbles to compete for space until
  53  * they converge (if the damping is dome properly) on a steady-state
  54  * solution. Individual input points, collected from libvorbis, are
  55  * used to train the algorithm monte-carlo style.  */
  56
  57 /* internal helpers *****************************************************/
  58 #define vN(data,i) (data+v->elements*i)
  59
  60 double *_point(vqgen *v,long ptr){
  61   return v->pointlist+(v->elements*ptr);
  62 }
  63
  64 double *_now(vqgen *v,long ptr){
  65   return v->entrylist+(v->elements*ptr);
  66 }
  67
  68 /* default metric; squared 'distance' from desired value. */
  69 double _dist_sq(vqgen *v,double *a, double *b){
  70   int i;
  71   int el=v->elements;
  72   double acc=0.;
  73   for(i=0;i<el;i++){
  74     double val=(a[i]-b[i]);
  75     acc+=val*val;
  76   }
  77   return acc;
  78 }
  79
  80 /* *must* be beefed up. */
  81 void _vqgen_seed(vqgen *v){
  82   memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
  83 }
  84
  85 /* External calls *******************************************************/
  86
  87 void vqgen_init(vqgen *v,int elements,int entries,
  88                 double (*metric)(vqgen *,double *, double *),
  89                 double spread){
  90   memset(v,0,sizeof(vqgen));
  91
  92   v->elements=elements;
  93   v->errspread=spread;
  94   v->allocated=32768;
  95   v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
  96
  97   v->entries=entries;
  98   v->entrylist=malloc(v->entries*v->elements*sizeof(double));
  99   v->assigned=malloc(v->entries*sizeof(long));
 100   v->bias=calloc(v->entries,sizeof(double));
 101   if(metric)
 102     v->metric_func=metric;
 103   else
 104     v->metric_func=_dist_sq;
 105 }
 106
 107 void vqgen_addpoint(vqgen *v, double *p){
 108   if(v->points>=v->allocated){
 109     v->allocated*=2;
 110     v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
 111   }
 112
 113   memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
 114   v->points++;
 115   if(v->points==v->entries)_vqgen_seed(v);
 116 }
 117
 118 /* take the trained entries, look at the points that comprise the cell
 119    and find midpoints (as the actual encoding process uses euclidian
 120    distance rather than any more complex metric to find the closest
 121    match */
 122
 123 double *vqgen_midpoint(vqgen *v){
 124   long i,j,k;
 125   double *lo=malloc(v->entries*v->elements*sizeof(double));
 126   double *hi=malloc(v->entries*v->elements*sizeof(double));
 127
 128   memset(v->assigned,0,sizeof(long)*v->entries);
 129   for(i=0;i<v->points;i++){
 130     double *ppt=_point(v,i);
 131     double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
 132     long   firstentry=0;
 133     for(j=1;j<v->entries;j++){
 134       double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
 135       if(thismetric<firstmetric){
 136         firstmetric=thismetric;
 137         firstentry=j;
 138       }
 139     }
 140
 141     j=firstentry;
 142     if(v->assigned[j]++){
 143       for(k=0;k<v->elements;k++){
 144         if(ppt[k]<vN(lo,j)[k])vN(lo,j)[k]=ppt[k];
 145         if(ppt[k]>vN(hi,j)[k])vN(hi,j)[k]=ppt[k];
 146       }
 147     }else{
 148       for(k=0;k<v->elements;k++){
 149         vN(lo,j)[k]=ppt[k];
 150         vN(hi,j)[k]=ppt[k];
 151       }
 152     }
 153   }
 154
 155   for(j=0;j<v->entries;j++)
 156     if(v->assigned[j])
 157       for(k=0;k<v->elements;k++)
 158         vN(lo,j)[k]=(vN(lo,j)[k]+vN(hi,j)[k])/2.;
 159     else
 160       for(k=0;k<v->elements;k++)
 161         vN(lo,j)[k]=_now(v,j)[k];
 162   free(hi);
 163   return(lo);
 164 }
 165
 166 double vqgen_iterate(vqgen *v){
 167   long   i,j,k;
 168   double fdesired=(double)v->points/v->entries;
 169   long  desired=fdesired;
 170   double asserror=0.;
 171   double meterror=0.;
 172   double *new=malloc(sizeof(double)*v->entries*v->elements);
 173   long   *nearcount=malloc(v->entries*sizeof(long));
 174   double *nearbias=malloc(v->entries*desired*sizeof(double));
 175
 176 #ifdef NOISY
 177   char buff[80];
 178   FILE *assig;
 179   FILE *bias;
 180   FILE *cells;
 181   sprintf(buff,"cells%d.m",v->it);
 182   cells=fopen(buff,"w");
 183   sprintf(buff,"assig%d.m",v->it);
 184   assig=fopen(buff,"w");
 185   sprintf(buff,"bias%d.m",v->it);
 186   bias=fopen(buff,"w");
 187 #endif
 188
 189   fprintf(stderr,"Pass #%d... ",v->it);
 190
 191   if(v->entries<2){
 192     fprintf(stderr,"generation requires at least two entries\n");
 193     exit(1);
 194   }
 195
 196   /* fill in nearest points for entries */
 197   /*memset(v->bias,0,sizeof(double)*v->entries);*/
 198   memset(nearcount,0,sizeof(long)*v->entries);
 199   memset(v->assigned,0,sizeof(long)*v->entries);
 200   for(i=0;i<v->points;i++){
 201     double *ppt=_point(v,i);
 202     double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
 203     double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
 204     long   firstentry=0;
 205     long   secondentry=1;
 206     if(firstmetric>secondmetric){
 207       double temp=firstmetric;
 208       firstmetric=secondmetric;
 209       secondmetric=temp;
 210       firstentry=1;
 211       secondentry=0;
 212     }
 213
 214     for(j=2;j<v->entries;j++){
 215       double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
 216       if(thismetric<secondmetric){
 217         if(thismetric<firstmetric){
 218           secondmetric=firstmetric;
 219           secondentry=firstentry;
 220           firstmetric=thismetric;
 221           firstentry=j;
 222         }else{
 223           secondmetric=thismetric;
 224           secondentry=j;
 225         }
 226       }
 227     }
 228
 229     j=firstentry;
 230     meterror+=firstmetric-v->bias[firstentry];
 231     /* set up midpoints for next iter */
 232     if(v->assigned[j]++)
 233       for(k=0;k<v->elements;k++)
 234         vN(new,j)[k]+=_point(v,i)[k];
 235     else
 236       for(k=0;k<v->elements;k++)
 237         vN(new,j)[k]=_point(v,i)[k];
 238
 239
 240 #ifdef NOISY
 241     fprintf(cells,"%g %g\n%g %g\n\n",
 242             _now(v,j)[0],_now(v,j)[1],
 243             _point(v,i)[0],_point(v,i)[1]);
 244 #endif
 245
 246     for(j=0;j<v->entries;j++){
 247
 248       double thismetric;
 249       double *nearbiasptr=nearbias+desired*j;
 250       long k=nearcount[j]-1;
 251
 252       /* 'thismetric' is to be the bias value necessary in the current
 253          arrangement for entry j to capture point i */
 254       if(firstentry==j){
 255         /* use the secondary entry as the threshhold */
 256         thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
 257       }else{
 258         /* use the primary entry as the threshhold */
 259         thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
 260       }
 261
 262       if(k>=0 && thismetric>nearbiasptr[k]){
 263
 264         /* start at the end and search backward for where this entry
 265            belongs */
 266
 267         for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
 268
 269         /* insert at k.  Shift and inject. */
 270         memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
 271         nearbiasptr[k]=thismetric;
 272
 273         if(nearcount[j]<desired)nearcount[j]++;
 274
 275       }else{
 276         if(nearcount[j]<desired){
 277           /* we checked the thresh earlier.  We know this is the
 278              last entry */
 279           nearbiasptr[nearcount[j]++]=thismetric;
 280         }
 281       }
 282     }
 283   }
 284
 285   /* inflate/deflate */
 286   for(i=0;i<v->entries;i++)
 287     v->bias[i]=nearbias[(i+1)*desired-1];
 288
 289   /* last, assign midpoints */
 290   for(j=0;j<v->entries;j++){
 291     asserror+=fabs(v->assigned[j]-fdesired);
 292     if(v->assigned[j])
 293       for(k=0;k<v->elements;k++)
 294         _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
 295 #ifdef NOISY
 296     fprintf(assig,"%ld\n",v->assigned[j]);
 297     fprintf(bias,"%g\n",v->bias[j]);
 298 #endif
 299   }
 300
 301   asserror/=(v->entries*fdesired);
 302   fprintf(stderr,": dist %g(%g) metric error=%g \n",
 303           asserror,fdesired,meterror/v->points);
 304   v->it++;
 305
 306   free(new);
 307   free(nearcount);
 308   free(nearbias);
 309 #ifdef NOISY
 310   fclose(assig);
 311   fclose(bias);
 312   fclose(cells);
 313 #endif
 314   return(asserror);
 315 }
 316
 317
 318 /* Building a codebook from trained set **********************************
 319
 320    The codebook in raw form is technically finished once it's trained.
 321    However, we want to finalize the representative codebook values for
 322    each entry and generate auxiliary information to optimize encoding.
 323    We generate the auxiliary coding tree using collected data,
 324    probably the same data as in the original training */
 325
 326 /* At each recursion, the data set is split in half.  Cells with data
 327    points on side A go into set A, same with set B.  The sets may
 328    overlap.  If the cell overlaps the deviding line only very slightly
 329    (provided parameter), we may choose to ignore the overlap in order
 330    to pare the tree down */
 331
 332 double *sortvals;
 333 int els;
 334 int iascsort(const void *a,const void *b){
 335   double av=sortvals[*((long *)a) * els];
 336   double bv=sortvals[*((long *)b) * els];
 337   if(av<bv)return(-1);
 338   return(1);
 339 }
 340
 341 /* goes through the split, but just counts it and returns a metric*/
 342 void lp_count(vqgen *v,long *entryindex,long entries,
 343                 long *pointindex,long points,
 344                 long *entryA,long *entryB,
 345                 double *n, double c, double slack,
 346                 long *entriesA,long *entriesB,long *entriesC){
 347   long i,j,k;
 348   long A=0,B=0,C=0;
 349
 350   memset(entryA,0,sizeof(long)*entries);
 351   memset(entryB,0,sizeof(long)*entries);
 352
 353   for(i=0;i<points;i++){
 354     double *ppt=_point(v,pointindex[i]);
 355     long   firstentry=0;
 356     double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
 357     double position=-c;
 358
 359     for(j=1;j<entries;j++){
 360       double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
 361       if(thismetric<firstmetric){
 362         firstmetric=thismetric;
 363         firstentry=j;
 364       }
 365     }
 366
 367     /* count point split */
 368     for(k=0;k<v->elements;k++)
 369       position+=ppt[k]*n[k];
 370     if(position>0.){
 371       entryA[firstentry]++;
 372     }else{
 373       entryB[firstentry]++;
 374     }
 375   }
 376
 377   /* look to see if entries are in the slack zone */
 378   /* The entry splitting isn't total, so that storage has to be
 379      allocated for recursion.  Reuse the entryA/entryB vectors */
 380   for(j=0;j<entries;j++){
 381     long total=entryA[j]+entryB[j];
 382     if((double)entryA[j]/total<slack){
 383       entryA[j]=0;
 384     }else if((double)entryB[j]/total<slack){
 385       entryB[j]=0;
 386     }
 387     if(entryA[j] && entryB[j])C++;
 388     if(entryA[j])entryA[A++]=entryindex[j];
 389     if(entryB[j])entryB[B++]=entryindex[j];
 390   }
 391   *entriesA=A;
 392   *entriesB=B;
 393   *entriesC=C;
 394 }
 395
 396 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
 397   int k;
 398   *c=0.;
 399   for(k=0;k<v->elements;k++){
 400     double center=(p[k]+q[k])/2.;
 401     n[k]=(center-q[k])*2.;
 402     *c+=center*n[k];
 403   }
 404 }
 405
 406 void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
 407   int k;
 408   *c=0.;
 409   for(k=0;k<v->elements;k++){
 410     n[k]=(center[k]-q[k])*2.;
 411     *c+=center[k]*n[k];
 412   }
 413 }
 414
 415 int lp_split(vqgen *v,vqbook *b,
 416              long *entryindex,long entries,
 417              long *pointindex,long points,
 418              long depth,double slack){
 419
 420   /* The encoder, regardless of book, will be using a straight
 421      euclidian distance-to-point metric to determine closest point.
 422      Thus we split the cells using the same (we've already trained the
 423      codebook set spacing and distribution using special metrics and
 424      even a midpoint division won't disturb the basic properties) */
 425
 426   long ret;
 427   double *p;
 428   double *q;
 429   double *n;
 430   double c;
 431   long *entryA=calloc(entries,sizeof(long));
 432   long *entryB=calloc(entries,sizeof(long));
 433   long entriesA=0;
 434   long entriesB=0;
 435   long entriesC=0;
 436   long pointsA=0;
 437   long i,j,k;
 438
 439   p=alloca(sizeof(double)*v->elements);
 440   q=alloca(sizeof(double)*v->elements);
 441   n=alloca(sizeof(double)*v->elements);
 442   memset(p,0,sizeof(double)*v->elements);
 443
 444   /* We need to find the dividing hyperplane. find the median of each
 445      axis as the centerpoint and the normal facing farthest point */
 446
 447   /* more than one way to do this part.  For small sets, we can brute
 448      force it. */
 449
 450   if(entries<32){
 451     /* try every pair possibility */
 452     double best=0;
 453     long   besti=0;
 454     long   bestj=0;
 455     double this;
 456     for(i=0;i<entries-1;i++){
 457       for(j=i+1;j<entries;j++){
 458         pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
 459         lp_count(v,entryindex,entries,
 460                  pointindex,points,
 461                  entryA,entryB,
 462                  n, c, slack,
 463                  &entriesA,&entriesB,&entriesC);
 464         this=(entriesA-entriesC)*(entriesB-entriesC);
 465
 466         if(this>best){
 467           best=this;
 468           besti=i;
 469           bestj=j;
 470         }
 471       }
 472     }
 473     pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
 474   }else{
 475     double best=0.;
 476     long   bestj=0;
 477
 478     /* try COG/normal and furthest pairs */
 479     /* medianpoint */
 480     for(k=0;k<v->elements;k++){
 481       /* just sort the index array */
 482       sortvals=v->pointlist+k;
 483       els=v->elements;
 484       qsort(pointindex,points,sizeof(long),iascsort);
 485       if(points&0x1){
 486         p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
 487       }else{
 488         p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
 489               v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
 490       }
 491     }
 492
 493     /* try every normal, but just for distance */
 494     for(j=0;j<entries;j++){
 495       double *ppj=_now(v,entryindex[j]);
 496       double this=_dist_sq(v,p,ppj);
 497       if(this>best){
 498         best=this;
 499         bestj=j;
 500       }
 501     }
 502
 503     pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
 504
 505
 506   }
 507
 508   /* find cells enclosing points */
 509   /* count A/B points */
 510
 511   lp_count(v,entryindex,entries,
 512            pointindex,points,
 513            entryA,entryB,
 514            n, c, slack,
 515            &entriesA,&entriesB,&entriesC);
 516
 517   /* the point index is split evenly, so we do an Order n
 518      rearrangement into A first/B last and just pass it on */
 519   {
 520     long Aptr=0;
 521     long Bptr=points-1;
 522     while(Aptr<=Bptr){
 523       while(Aptr<=Bptr){
 524         double position=-c;
 525         for(k=0;k<v->elements;k++)
 526           position+=_point(v,pointindex[Aptr])[k]*n[k];
 527         if(position<=0.)break; /* not in A */
 528         Aptr++;
 529       }
 530       while(Aptr<=Bptr){
 531         double position=-c;
 532         for(k=0;k<v->elements;k++)
 533           position+=_point(v,pointindex[Bptr])[k]*n[k];
 534         if(position>0.)break; /* not in B */
 535         Bptr--;
 536       }
 537       if(Aptr<Bptr){
 538         long temp=pointindex[Aptr];
 539         pointindex[Aptr]=pointindex[Bptr];
 540         pointindex[Bptr]=temp;
 541       }
 542       pointsA=Aptr;
 543     }
 544   }
 545
 546   fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
 547           entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
 548   {
 549     long thisaux=b->aux++;
 550     if(b->aux>=b->alloc){
 551       b->alloc*=2;
 552       b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
 553       b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
 554       b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
 555       b->c=realloc(b->c,sizeof(double)*b->alloc);
 556     }
 557
 558     memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
 559     b->c[thisaux]=c;
 560
 561     if(entriesA==1){
 562       ret=1;
 563       b->ptr0[thisaux]=entryA[0];
 564     }else{
 565       b->ptr0[thisaux]= -b->aux;
 566       ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
 567     }
 568     if(entriesB==1){
 569       ret++;
 570       b->ptr1[thisaux]=entryB[0];
 571     }else{
 572       b->ptr1[thisaux]= -b->aux;
 573       ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
 574                    depth+1,slack);
 575     }
 576   }
 577   free(entryA);
 578   free(entryB);
 579   return(ret);
 580 }
 581
 582 int vqenc_entry(vqbook *b,double *val){
 583   int ptr=0,k;
 584   while(1){
 585     double c= -b->c[ptr];
 586     double *nptr=b->n+b->elements*ptr;
 587     for(k=0;k<b->elements;k++)
 588       c+=nptr[k]*val[k];
 589     if(c>0.) /* in A */
 590       ptr= -b->ptr0[ptr];
 591     else     /* in B */
 592       ptr= -b->ptr1[ptr];
 593     if(ptr<=0)break;
 594   }
 595   return(-ptr);
 596 }
 597
 598 void vqgen_book(vqgen *v,vqbook *b){
 599   long i;
 600   long *entryindex=malloc(sizeof(double)*v->entries);
 601   long *pointindex=malloc(sizeof(double)*v->points);
 602
 603   memset(b,0,sizeof(vqbook));
 604   for(i=0;i<v->entries;i++)entryindex[i]=i;
 605   for(i=0;i<v->points;i++)pointindex[i]=i;
 606   b->elements=v->elements;
 607   b->entries=v->entries;
 608   b->alloc=4096;
 609   b->ptr0=malloc(sizeof(long)*b->alloc);
 610   b->ptr1=malloc(sizeof(long)*b->alloc);
 611   b->n=malloc(sizeof(double)*b->elements*b->alloc);
 612   b->c=malloc(sizeof(double)*b->alloc);
 613
 614   b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
 615   b->codelist=malloc(sizeof(long)*b->entries);
 616   b->lengthlist=malloc(b->entries*sizeof(long));
 617
 618   /* first, generate the encoding decision heirarchy */
 619   fprintf(stderr,"Total leaves: %ld\n",
 620           lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
 621
 622   /* run all training points through the decision tree to get a final
 623      probability count */
 624   {
 625     long *probability=calloc(b->entries*2,sizeof(long));
 626     for(i=0;i<v->points;i++){
 627       int ret=vqenc_entry(b,v->pointlist+i*v->elements);
 628       probability[ret]++;
 629     }
 630
 631     for(i=0;i<b->entries;i++){
 632       fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
 633     }
 634
 635   }
 636
 637   /* generate the codewords (short to long) */
 638
 639   free(entryindex);
 640   free(pointindex);
 641 }
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655