1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999 *
9 * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: build a VQ codebook
15 author: Monty <xiphmont@mit.edu>
16 modifications by: Monty
17 last modification date: Dec 10 1999
19 ********************************************************************/
21 /* This code is *not* part of libvorbis. It is used to generate
22 trained codebooks offline and then spit the results into a
23 pregenerated codebook that is compiled into libvorbis. It is an
24 expensive (but good) algorithm. Run it on big iron. */
26 /* There are so many optimizations to explore in *both* stages that
27 considering the undertaking is almost withering. For now, we brute
36 /* Codebook generation happens in two steps:
38 1) Train the codebook with data collected from the encoder: We use
39 one of a few error metrics (which represent the distance between a
40 given data point and a candidate point in the training set) to
41 divide the training set up into cells representing roughly equal
42 probability of occurring.
44 2) Generate the codebook and auxiliary data from the trained data set
47 /* Codebook training ****************************************************
49 * The basic idea here is that a VQ codebook is like an m-dimensional
50 * foam with n bubbles. The bubbles compete for space/volume and are
51 * 'pressurized' [biased] according to some metric. The basic alg
52 * iterates through allowing the bubbles to compete for space until
53 * they converge (if the damping is dome properly) on a steady-state
54 * solution. Individual input points, collected from libvorbis, are
55 * used to train the algorithm monte-carlo style. */
57 /* internal helpers *****************************************************/
58 #define vN(data,i) (data+v->elements*i)
60 /* default metric; squared 'distance' from desired value. */
61 double _dist_sq(vqgen *v,double *a, double *b){
66 double val=(a[i]-b[i]);
72 /* *must* be beefed up. */
73 void _vqgen_seed(vqgen *v){
74 memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
77 /* External calls *******************************************************/
79 void vqgen_init(vqgen *v,int elements,int entries,
80 double (*metric)(vqgen *,double *, double *)){
81 memset(v,0,sizeof(vqgen));
85 v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
88 v->entrylist=malloc(v->entries*v->elements*sizeof(double));
89 v->assigned=malloc(v->entries*sizeof(long));
90 v->bias=calloc(v->entries,sizeof(double));
92 v->metric_func=metric;
94 v->metric_func=_dist_sq;
97 void vqgen_addpoint(vqgen *v, double *p){
98 if(v->points>=v->allocated){
100 v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
103 memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
105 if(v->points==v->entries)_vqgen_seed(v);
108 double vqgen_iterate(vqgen *v){
110 double fdesired=(double)v->points/v->entries;
111 long desired=fdesired;
114 double *new=malloc(sizeof(double)*v->entries*v->elements);
115 long *nearcount=malloc(v->entries*sizeof(long));
116 double *nearbias=malloc(v->entries*desired*sizeof(double));
123 sprintf(buff,"cells%d.m",v->it);
124 cells=fopen(buff,"w");
125 sprintf(buff,"assig%d.m",v->it);
126 assig=fopen(buff,"w");
127 sprintf(buff,"bias%d.m",v->it);
128 bias=fopen(buff,"w");
131 fprintf(stderr,"Pass #%d... ",v->it);
134 fprintf(stderr,"generation requires at least two entries\n");
138 /* fill in nearest points for entries */
139 /*memset(v->bias,0,sizeof(double)*v->entries);*/
140 memset(nearcount,0,sizeof(long)*v->entries);
141 memset(v->assigned,0,sizeof(long)*v->entries);
142 for(i=0;i<v->points;i++){
143 double *ppt=_point(v,i);
144 double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
145 double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
148 if(firstmetric>secondmetric){
149 double temp=firstmetric;
150 firstmetric=secondmetric;
156 for(j=2;j<v->entries;j++){
157 double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
158 if(thismetric<secondmetric){
159 if(thismetric<firstmetric){
160 secondmetric=firstmetric;
161 secondentry=firstentry;
162 firstmetric=thismetric;
165 secondmetric=thismetric;
172 meterror+=sqrt(_dist_sq(v,_now(v,j),_point(v,i)));
173 /* set up midpoints for next iter */
175 for(k=0;k<v->elements;k++)
176 vN(new,j)[k]+=_point(v,i)[k];
178 for(k=0;k<v->elements;k++)
179 vN(new,j)[k]=_point(v,i)[k];
183 fprintf(cells,"%g %g\n%g %g\n\n",
184 _now(v,j)[0],_now(v,j)[1],
185 _point(v,i)[0],_point(v,i)[1]);
188 for(j=0;j<v->entries;j++){
191 double *nearbiasptr=nearbias+desired*j;
192 long k=nearcount[j]-1;
194 /* 'thismetric' is to be the bias value necessary in the current
195 arrangement for entry j to capture point i */
197 /* use the secondary entry as the threshhold */
198 thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
200 /* use the primary entry as the threshhold */
201 thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
204 if(k>=0 && thismetric>nearbiasptr[k]){
206 /* start at the end and search backward for where this entry
209 for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
211 /* insert at k. Shift and inject. */
212 memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
213 nearbiasptr[k]=thismetric;
215 if(nearcount[j]<desired)nearcount[j]++;
218 if(nearcount[j]<desired){
219 /* we checked the thresh earlier. We know this is the
221 nearbiasptr[nearcount[j]++]=thismetric;
227 /* inflate/deflate */
228 for(i=0;i<v->entries;i++)
229 v->bias[i]=nearbias[(i+1)*desired-1];
231 /* assign midpoints */
233 for(j=0;j<v->entries;j++){
234 asserror+=fabs(v->assigned[j]-fdesired);
236 for(k=0;k<v->elements;k++)
237 _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
239 fprintf(assig,"%ld\n",v->assigned[j]);
240 fprintf(bias,"%g\n",v->bias[j]);
244 asserror/=(v->entries*fdesired);
245 fprintf(stderr,": dist %g(%g) metric error=%g \n",
246 asserror,fdesired,meterror/v->points/v->elements);