vq/vqgen.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
   9  * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: build a VQ codebook
  15  author: Monty <xiphmont@mit.edu>
  16  modifications by: Monty
  17  last modification date: Dec 10 1999
  18
  19  ********************************************************************/
  20
  21 /* This code is *not* part of libvorbis.  It is used to generate
  22    trained codebooks offline and then spit the results into a
  23    pregenerated codebook that is compiled into libvorbis.  It is an
  24    expensive (but good) algorithm.  Run it on big iron. */
  25
  26 /* There are so many optimizations to explore in *both* stages that
  27    considering the undertaking is almost withering.  For now, we brute
  28    force it all */
  29
  30 #include <stdlib.h>
  31 #include <stdio.h>
  32 #include <math.h>
  33 #include <string.h>
  34 #include "vqgen.h"
  35
  36 /* Codebook generation happens in two steps:
  37
  38    1) Train the codebook with data collected from the encoder: We use
  39    one of a few error metrics (which represent the distance between a
  40    given data point and a candidate point in the training set) to
  41    divide the training set up into cells representing roughly equal
  42    probability of occurring.
  43
  44    2) Generate the codebook and auxiliary data from the trained data set
  45 */
  46
  47 /* Codebook training ****************************************************
  48  *
  49  * The basic idea here is that a VQ codebook is like an m-dimensional
  50  * foam with n bubbles.  The bubbles compete for space/volume and are
  51  * 'pressurized' [biased] according to some metric.  The basic alg
  52  * iterates through allowing the bubbles to compete for space until
  53  * they converge (if the damping is dome properly) on a steady-state
  54  * solution. Individual input points, collected from libvorbis, are
  55  * used to train the algorithm monte-carlo style.  */
  56
  57 /* internal helpers *****************************************************/
  58 #define vN(data,i) (data+v->elements*i)
  59
  60 double *_point(vqgen *v,long ptr){
  61   return v->pointlist+(v->elements*ptr);
  62 }
  63
  64 double *_now(vqgen *v,long ptr){
  65   return v->entrylist+(v->elements*ptr);
  66 }
  67
  68 /* default metric; squared 'distance' from desired value. */
  69 double _dist_sq(vqgen *v,double *a, double *b){
  70   int i;
  71   int el=v->elements;
  72   double acc=0.;
  73   for(i=0;i<el;i++){
  74     double val=(a[i]-b[i]);
  75     acc+=val*val;
  76   }
  77   return acc;
  78 }
  79
  80 /* *must* be beefed up. */
  81 void _vqgen_seed(vqgen *v){
  82   memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
  83 }
  84
  85 /* External calls *******************************************************/
  86
  87 void vqgen_init(vqgen *v,int elements,int entries,
  88                 double (*metric)(vqgen *,double *, double *),
  89                 int quant){
  90   memset(v,0,sizeof(vqgen));
  91
  92   v->elements=elements;
  93   v->quantbits=quant;
  94   v->allocated=32768;
  95   v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
  96
  97   v->entries=entries;
  98   v->entrylist=malloc(v->entries*v->elements*sizeof(double));
  99   v->assigned=malloc(v->entries*sizeof(long));
 100   v->bias=calloc(v->entries,sizeof(double));
 101   if(metric)
 102     v->metric_func=metric;
 103   else
 104     v->metric_func=_dist_sq;
 105 }
 106
 107 void vqgen_addpoint(vqgen *v, double *p){
 108   if(v->points>=v->allocated){
 109     v->allocated*=2;
 110     v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
 111   }
 112
 113   memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
 114   v->points++;
 115   if(v->points==v->entries)_vqgen_seed(v);
 116 }
 117
 118 double vqgen_iterate(vqgen *v){
 119   long   i,j,k;
 120   double fdesired=(double)v->points/v->entries;
 121   long  desired=fdesired;
 122   double asserror=0.;
 123   double meterror=0.;
 124   double *new=malloc(sizeof(double)*v->entries*v->elements);
 125   long   *nearcount=malloc(v->entries*sizeof(long));
 126   double *nearbias=malloc(v->entries*desired*sizeof(double));
 127
 128 #ifdef NOISY
 129   char buff[80];
 130   FILE *assig;
 131   FILE *bias;
 132   FILE *cells;
 133   sprintf(buff,"cells%d.m",v->it);
 134   cells=fopen(buff,"w");
 135   sprintf(buff,"assig%d.m",v->it);
 136   assig=fopen(buff,"w");
 137   sprintf(buff,"bias%d.m",v->it);
 138   bias=fopen(buff,"w");
 139 #endif
 140
 141   fprintf(stderr,"Pass #%d... ",v->it);
 142
 143   if(v->entries<2){
 144     fprintf(stderr,"generation requires at least two entries\n");
 145     exit(1);
 146   }
 147
 148   /* fill in nearest points for entries */
 149   /*memset(v->bias,0,sizeof(double)*v->entries);*/
 150   memset(nearcount,0,sizeof(long)*v->entries);
 151   memset(v->assigned,0,sizeof(long)*v->entries);
 152   for(i=0;i<v->points;i++){
 153     double *ppt=_point(v,i);
 154     double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
 155     double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
 156     long   firstentry=0;
 157     long   secondentry=1;
 158     if(firstmetric>secondmetric){
 159       double temp=firstmetric;
 160       firstmetric=secondmetric;
 161       secondmetric=temp;
 162       firstentry=1;
 163       secondentry=0;
 164     }
 165
 166     for(j=2;j<v->entries;j++){
 167       double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
 168       if(thismetric<secondmetric){
 169         if(thismetric<firstmetric){
 170           secondmetric=firstmetric;
 171           secondentry=firstentry;
 172           firstmetric=thismetric;
 173           firstentry=j;
 174         }else{
 175           secondmetric=thismetric;
 176           secondentry=j;
 177         }
 178       }
 179     }
 180
 181     j=firstentry;
 182     meterror+=firstmetric-v->bias[firstentry];
 183     /* set up midpoints for next iter */
 184     if(v->assigned[j]++)
 185       for(k=0;k<v->elements;k++)
 186         vN(new,j)[k]+=_point(v,i)[k];
 187     else
 188       for(k=0;k<v->elements;k++)
 189         vN(new,j)[k]=_point(v,i)[k];
 190
 191
 192 #ifdef NOISY
 193     fprintf(cells,"%g %g\n%g %g\n\n",
 194             _now(v,j)[0],_now(v,j)[1],
 195             _point(v,i)[0],_point(v,i)[1]);
 196 #endif
 197
 198     for(j=0;j<v->entries;j++){
 199
 200       double thismetric;
 201       double *nearbiasptr=nearbias+desired*j;
 202       long k=nearcount[j]-1;
 203
 204       /* 'thismetric' is to be the bias value necessary in the current
 205          arrangement for entry j to capture point i */
 206       if(firstentry==j){
 207         /* use the secondary entry as the threshhold */
 208         thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
 209       }else{
 210         /* use the primary entry as the threshhold */
 211         thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
 212       }
 213
 214       if(k>=0 && thismetric>nearbiasptr[k]){
 215
 216         /* start at the end and search backward for where this entry
 217            belongs */
 218
 219         for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
 220
 221         /* insert at k.  Shift and inject. */
 222         memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
 223         nearbiasptr[k]=thismetric;
 224
 225         if(nearcount[j]<desired)nearcount[j]++;
 226
 227       }else{
 228         if(nearcount[j]<desired){
 229           /* we checked the thresh earlier.  We know this is the
 230              last entry */
 231           nearbiasptr[nearcount[j]++]=thismetric;
 232         }
 233       }
 234     }
 235   }
 236
 237   /* inflate/deflate */
 238   for(i=0;i<v->entries;i++)
 239     v->bias[i]=nearbias[(i+1)*desired-1];
 240
 241   /* assign midpoints */
 242
 243   for(j=0;j<v->entries;j++){
 244     asserror+=fabs(v->assigned[j]-fdesired);
 245     if(v->assigned[j])
 246       for(k=0;k<v->elements;k++)
 247         _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
 248 #ifdef NOISY
 249     fprintf(assig,"%ld\n",v->assigned[j]);
 250     fprintf(bias,"%g\n",v->bias[j]);
 251 #endif
 252   }
 253
 254   {
 255     /* midpoints must be quantized.  but we need to know the range in
 256        order to do so */
 257     double *min=alloca(sizeof(double)*v->elements);
 258     double *max=alloca(sizeof(double)*v->elements);
 259
 260     for(k=0;k<v->elements;k++)
 261       min[k]=max[k]=_now(v,0)[k];
 262     for(j=1;j<v->entries;j++){
 263       for(k=0;k<v->elements;k++){
 264         double val=_now(v,0)[k];
 265         if(val<min[k])min[k]=val;
 266         if(val>max[k])max[k]=val;
 267       }
 268     }
 269     for(k=0;k<v->elements;k++){
 270       double base=min[k];
 271       double delta=(max[k]-min[k])/((1<<v->quantbits)-1);
 272       for(j=0;j<v->entries;j++){
 273         double val=_now(v,j)[k];
 274         _now(v,j)[k]=base+delta*rint((val-base)/delta);
 275       }
 276     }
 277   }
 278
 279   asserror/=(v->entries*fdesired);
 280   fprintf(stderr,": dist %g(%g) metric error=%g \n",
 281           asserror,fdesired,meterror/v->points);
 282   v->it++;
 283
 284   free(new);
 285   free(nearcount);
 286   free(nearbias);
 287 #ifdef NOISY
 288   fclose(assig);
 289   fclose(bias);
 290   fclose(cells);
 291 #endif
 292   return(asserror);
 293 }
 294
 295
 296 /* Building a codebook from trained set **********************************
 297
 298    The codebook in raw form is technically finished once it's trained.
 299    However, we want to finalize the representative codebook values for
 300    each entry and generate auxiliary information to optimize encoding.
 301    We generate the auxiliary coding tree using collected data,
 302    probably the same data as in the original training */
 303
 304 /* At each recursion, the data set is split in half.  Cells with data
 305    points on side A go into set A, same with set B.  The sets may
 306    overlap.  If the cell overlaps the deviding line only very slightly
 307    (provided parameter), we may choose to ignore the overlap in order
 308    to pare the tree down */
 309
 310 double *sortvals;
 311 int els;
 312 int iascsort(const void *a,const void *b){
 313   double av=sortvals[*((long *)a) * els];
 314   double bv=sortvals[*((long *)b) * els];
 315   if(av<bv)return(-1);
 316   return(1);
 317 }
 318
 319 /* goes through the split, but just counts it and returns a metric*/
 320 void lp_count(vqgen *v,long *entryindex,long entries,
 321                 long *pointindex,long points,
 322                 long *entryA,long *entryB,
 323                 double *n, double c, double slack,
 324                 long *entriesA,long *entriesB,long *entriesC){
 325   long i,j,k;
 326   long A=0,B=0,C=0;
 327
 328   memset(entryA,0,sizeof(long)*entries);
 329   memset(entryB,0,sizeof(long)*entries);
 330
 331   for(i=0;i<points;i++){
 332     double *ppt=_point(v,pointindex[i]);
 333     long   firstentry=0;
 334     double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
 335     double position=-c;
 336
 337     for(j=1;j<entries;j++){
 338       double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
 339       if(thismetric<firstmetric){
 340         firstmetric=thismetric;
 341         firstentry=j;
 342       }
 343     }
 344
 345     /* count point split */
 346     for(k=0;k<v->elements;k++)
 347       position+=ppt[k]*n[k];
 348     if(position>0.){
 349       entryA[firstentry]++;
 350     }else{
 351       entryB[firstentry]++;
 352     }
 353   }
 354
 355   /* look to see if entries are in the slack zone */
 356   /* The entry splitting isn't total, so that storage has to be
 357      allocated for recursion.  Reuse the entryA/entryB vectors */
 358   for(j=0;j<entries;j++){
 359     long total=entryA[j]+entryB[j];
 360     if((double)entryA[j]/total<slack){
 361       entryA[j]=0;
 362     }else if((double)entryB[j]/total<slack){
 363       entryB[j]=0;
 364     }
 365     if(entryA[j] && entryB[j])C++;
 366     if(entryA[j])entryA[A++]=entryindex[j];
 367     if(entryB[j])entryB[B++]=entryindex[j];
 368   }
 369   *entriesA=A;
 370   *entriesB=B;
 371   *entriesC=C;
 372 }
 373
 374 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
 375   int k;
 376   *c=0.;
 377   for(k=0;k<v->elements;k++){
 378     double center=(p[k]+q[k])/2.;
 379     n[k]=(center-q[k])*2.;
 380     *c+=center*n[k];
 381   }
 382 }
 383
 384 void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
 385   int k;
 386   *c=0.;
 387   for(k=0;k<v->elements;k++){
 388     n[k]=(center[k]-q[k])*2.;
 389     *c+=center[k]*n[k];
 390   }
 391 }
 392
 393 int lp_split(vqgen *v,vqbook *b,
 394              long *entryindex,long entries,
 395              long *pointindex,long points,
 396              long depth,double slack){
 397
 398   /* The encoder, regardless of book, will be using a straight
 399      euclidian distance-to-point metric to determine closest point.
 400      Thus we split the cells using the same (we've already trained the
 401      codebook set spacing and distribution using special metrics and
 402      even a midpoint division won't disturb the basic properties) */
 403
 404   long ret;
 405   double *p;
 406   double *q;
 407   double *n;
 408   double c;
 409   long *entryA=calloc(entries,sizeof(long));
 410   long *entryB=calloc(entries,sizeof(long));
 411   long entriesA=0;
 412   long entriesB=0;
 413   long entriesC=0;
 414   long pointsA=0;
 415   long i,j,k;
 416
 417   p=alloca(sizeof(double)*v->elements);
 418   q=alloca(sizeof(double)*v->elements);
 419   n=alloca(sizeof(double)*v->elements);
 420   memset(p,0,sizeof(double)*v->elements);
 421
 422   /* We need to find the dividing hyperplane. find the median of each
 423      axis as the centerpoint and the normal facing farthest point */
 424
 425   /* more than one way to do this part.  For small sets, we can brute
 426      force it. */
 427
 428   if(entries<32){
 429     /* try every pair possibility */
 430     double best=0;
 431     long   besti=0;
 432     long   bestj=0;
 433     double this;
 434     for(i=0;i<entries-1;i++){
 435       for(j=i+1;j<entries;j++){
 436         pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
 437         lp_count(v,entryindex,entries,
 438                  pointindex,points,
 439                  entryA,entryB,
 440                  n, c, slack,
 441                  &entriesA,&entriesB,&entriesC);
 442         this=(entriesA-entriesC)*(entriesB-entriesC);
 443
 444         if(this>best){
 445           best=this;
 446           besti=i;
 447           bestj=j;
 448         }
 449       }
 450     }
 451     pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
 452   }else{
 453     double best=0.;
 454     long   bestj=0;
 455
 456     /* try COG/normal and furthest pairs */
 457     /* medianpoint */
 458     for(k=0;k<v->elements;k++){
 459       /* just sort the index array */
 460       sortvals=v->pointlist+k;
 461       els=v->elements;
 462       qsort(pointindex,points,sizeof(long),iascsort);
 463       if(points&0x1){
 464         p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
 465       }else{
 466         p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
 467               v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
 468       }
 469     }
 470
 471     /* try every normal, but just for distance */
 472     for(j=0;j<entries;j++){
 473       double *ppj=_now(v,entryindex[j]);
 474       double this=_dist_sq(v,p,ppj);
 475       if(this>best){
 476         best=this;
 477         bestj=j;
 478       }
 479     }
 480
 481     pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
 482
 483
 484   }
 485
 486   /* find cells enclosing points */
 487   /* count A/B points */
 488
 489   lp_count(v,entryindex,entries,
 490            pointindex,points,
 491            entryA,entryB,
 492            n, c, slack,
 493            &entriesA,&entriesB,&entriesC);
 494
 495   /* the point index is split evenly, so we do an Order n
 496      rearrangement into A first/B last and just pass it on */
 497   {
 498     long Aptr=0;
 499     long Bptr=points-1;
 500     while(Aptr<=Bptr){
 501       while(Aptr<=Bptr){
 502         double position=-c;
 503         for(k=0;k<v->elements;k++)
 504           position+=_point(v,pointindex[Aptr])[k]*n[k];
 505         if(position<=0.)break; /* not in A */
 506         Aptr++;
 507       }
 508       while(Aptr<=Bptr){
 509         double position=-c;
 510         for(k=0;k<v->elements;k++)
 511           position+=_point(v,pointindex[Bptr])[k]*n[k];
 512         if(position>0.)break; /* not in B */
 513         Bptr--;
 514       }
 515       if(Aptr<Bptr){
 516         long temp=pointindex[Aptr];
 517         pointindex[Aptr]=pointindex[Bptr];
 518         pointindex[Bptr]=temp;
 519       }
 520       pointsA=Aptr;
 521     }
 522   }
 523
 524   fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
 525           entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
 526   {
 527     long thisaux=b->aux++;
 528     if(b->aux>=b->alloc){
 529       b->alloc*=2;
 530       b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
 531       b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
 532       b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
 533       b->c=realloc(b->c,sizeof(double)*b->alloc);
 534     }
 535
 536     memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
 537     b->c[thisaux]=c;
 538
 539     if(entriesA==1){
 540       ret=1;
 541       b->ptr0[thisaux]=entryA[0];
 542     }else{
 543       b->ptr0[thisaux]= -b->aux;
 544       ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
 545     }
 546     if(entriesB==1){
 547       ret++;
 548       b->ptr1[thisaux]=entryB[0];
 549     }else{
 550       b->ptr1[thisaux]= -b->aux;
 551       ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
 552                    depth+1,slack);
 553     }
 554   }
 555   free(entryA);
 556   free(entryB);
 557   return(ret);
 558 }
 559
 560 int vqenc_entry(vqbook *b,double *val){
 561   int ptr=0,k;
 562   while(1){
 563     double c= -b->c[ptr];
 564     double *nptr=b->n+b->elements*ptr;
 565     for(k=0;k<b->elements;k++)
 566       c+=nptr[k]*val[k];
 567     if(c>0.) /* in A */
 568       ptr= -b->ptr0[ptr];
 569     else     /* in B */
 570       ptr= -b->ptr1[ptr];
 571     if(ptr<=0)break;
 572   }
 573   return(-ptr);
 574 }
 575
 576 void vqgen_book(vqgen *v,vqbook *b){
 577   long i;
 578   long *entryindex=malloc(sizeof(double)*v->entries);
 579   long *pointindex=malloc(sizeof(double)*v->points);
 580
 581   memset(b,0,sizeof(vqbook));
 582   for(i=0;i<v->entries;i++)entryindex[i]=i;
 583   for(i=0;i<v->points;i++)pointindex[i]=i;
 584   b->elements=v->elements;
 585   b->entries=v->entries;
 586   b->alloc=4096;
 587   b->ptr0=malloc(sizeof(long)*b->alloc);
 588   b->ptr1=malloc(sizeof(long)*b->alloc);
 589   b->n=malloc(sizeof(double)*b->elements*b->alloc);
 590   b->c=malloc(sizeof(double)*b->alloc);
 591
 592   b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
 593   b->codelist=malloc(sizeof(long)*b->entries);
 594   b->lengthlist=malloc(b->entries*sizeof(long));
 595
 596   /* first, generate the encoding decision heirarchy */
 597   fprintf(stderr,"Total leaves: %ld\n",
 598           lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
 599
 600   /* run all training points through the decision tree to get a final
 601      probability count */
 602   {
 603     long *probability=calloc(b->entries*2,sizeof(long));
 604     for(i=0;i<v->points;i++){
 605       int ret=vqenc_entry(b,v->pointlist+i*v->elements);
 606       probability[ret]++;
 607     }
 608
 609     for(i=0;i<b->entries;i++){
 610       fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
 611     }
 612
 613   }
 614
 615   /* generate the codewords (short to long) */
 616
 617   free(entryindex);
 618   free(pointindex);
 619 }
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633