/********************************************************************
* *
- * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
- * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
- * PLEASE READ THESE TERMS DISTRIBUTING. *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999 *
- * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company *
- * http://www.xiph.org/ *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
- function: build a VQ codebook
- author: Monty <xiphmont@mit.edu>
- modifications by: Monty
- last modification date: Dec 10 1999
+ function: train a VQ codebook
+ last mod: $Id$
********************************************************************/
#include <stdio.h>
#include <math.h>
#include <string.h>
+
#include "vqgen.h"
+#include "bookutil.h"
/* Codebook generation happens in two steps:
/* internal helpers *****************************************************/
#define vN(data,i) (data+v->elements*i)
-double *_point(vqgen *v,long ptr){
- return v->pointlist+(v->elements*ptr);
-}
-
-double *_now(vqgen *v,long ptr){
- return v->entrylist+(v->elements*ptr);
-}
-
/* default metric; squared 'distance' from desired value. */
-double _dist_sq(vqgen *v,double *a, double *b){
+float _dist(vqgen *v,float *a, float *b){
int i;
int el=v->elements;
- double acc=0.;
+ float acc=0.f;
for(i=0;i<el;i++){
- double val=(a[i]-b[i]);
+ float val=(a[i]-b[i]);
acc+=val*val;
}
- return acc;
+ return sqrt(acc);
+}
+
+float *_weight_null(vqgen *v,float *a){
+ return a;
}
/* *must* be beefed up. */
void _vqgen_seed(vqgen *v){
- memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
+ long i;
+ for(i=0;i<v->entries;i++)
+ memcpy(_now(v,i),_point(v,i),sizeof(float)*v->elements);
+ v->seeded=1;
+}
+
+int directdsort(const void *a, const void *b){
+ float av=*((float *)a);
+ float bv=*((float *)b);
+ return (av<bv)-(av>bv);
+}
+
+void vqgen_cellmetric(vqgen *v){
+ int j,k;
+ float min=-1.f,max=-1.f,mean=0.f,acc=0.f;
+ long dup=0,unused=0;
+ #ifdef NOISY
+ int i;
+ char buff[80];
+ float spacings[v->entries];
+ int count=0;
+ FILE *cells;
+ sprintf(buff,"cellspace%d.m",v->it);
+ cells=fopen(buff,"w");
+#endif
+
+ /* minimum, maximum, cell spacing */
+ for(j=0;j<v->entries;j++){
+ float localmin=-1.;
+
+ for(k=0;k<v->entries;k++){
+ if(j!=k){
+ float this=_dist(v,_now(v,j),_now(v,k));
+ if(this>0){
+ if(v->assigned[k] && (localmin==-1 || this<localmin))
+ localmin=this;
+ }else{
+ if(k<j){
+ dup++;
+ break;
+ }
+ }
+ }
+ }
+ if(k<v->entries)continue;
+
+ if(v->assigned[j]==0){
+ unused++;
+ continue;
+ }
+
+ localmin=v->max[j]+localmin/2; /* this gives us rough diameter */
+ if(min==-1 || localmin<min)min=localmin;
+ if(max==-1 || localmin>max)max=localmin;
+ mean+=localmin;
+ acc++;
+#ifdef NOISY
+ spacings[count++]=localmin;
+#endif
+ }
+
+ fprintf(stderr,"cell diameter: %.03g::%.03g::%.03g (%ld unused/%ld dup)\n",
+ min,mean/acc,max,unused,dup);
+
+#ifdef NOISY
+ qsort(spacings,count,sizeof(float),directdsort);
+ for(i=0;i<count;i++)
+ fprintf(cells,"%g\n",spacings[i]);
+ fclose(cells);
+#endif
+
}
/* External calls *******************************************************/
-void vqgen_init(vqgen *v,int elements,int entries,
- double (*metric)(vqgen *,double *, double *),
- double spread){
+/* We have two forms of quantization; in the first, each vector
+ element in the codebook entry is orthogonal. Residues would use this
+ quantization for example.
+
+ In the second, we have a sequence of monotonically increasing
+ values that we wish to quantize as deltas (to save space). We
+ still need to quantize so that absolute values are accurate. For
+ example, LSP quantizes all absolute values, but the book encodes
+ distance between values because each successive value is larger
+ than the preceeding value. Thus the desired quantibits apply to
+ the encoded (delta) values, not abs positions. This requires minor
+ additional encode-side trickery. */
+
+void vqgen_quantize(vqgen *v,quant_meta *q){
+
+ float maxdel;
+ float mindel;
+
+ float delta;
+ float maxquant=((1<<q->quant)-1);
+
+ int j,k;
+
+ mindel=maxdel=_now(v,0)[0];
+
+ for(j=0;j<v->entries;j++){
+ float last=0.f;
+ for(k=0;k<v->elements;k++){
+ if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last;
+ if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last;
+ if(q->sequencep)last=_now(v,j)[k];
+ }
+ }
+
+
+ /* first find the basic delta amount from the maximum span to be
+ encoded. Loosen the delta slightly to allow for additional error
+ during sequence quantization */
+
+ delta=(maxdel-mindel)/((1<<q->quant)-1.5f);
+
+ q->min=_float32_pack(mindel);
+ q->delta=_float32_pack(delta);
+
+ mindel=_float32_unpack(q->min);
+ delta=_float32_unpack(q->delta);
+
+ for(j=0;j<v->entries;j++){
+ float last=0;
+ for(k=0;k<v->elements;k++){
+ float val=_now(v,j)[k];
+ float now=rint((val-last-mindel)/delta);
+
+ _now(v,j)[k]=now;
+ if(now<0){
+ /* be paranoid; this should be impossible */
+ fprintf(stderr,"fault; quantized value<0\n");
+ exit(1);
+ }
+
+ if(now>maxquant){
+ /* be paranoid; this should be impossible */
+ fprintf(stderr,"fault; quantized value>max\n");
+ exit(1);
+ }
+ if(q->sequencep)last=(now*delta)+mindel+last;
+ }
+ }
+}
+
+/* much easier :-). Unlike in the codebook, we don't un-log log
+ scales; we just make sure they're properly offset. */
+void vqgen_unquantize(vqgen *v,quant_meta *q){
+ long j,k;
+ float mindel=_float32_unpack(q->min);
+ float delta=_float32_unpack(q->delta);
+
+ for(j=0;j<v->entries;j++){
+ float last=0.f;
+ for(k=0;k<v->elements;k++){
+ float now=_now(v,j)[k];
+ now=fabs(now)*delta+last+mindel;
+ if(q->sequencep)last=now;
+ _now(v,j)[k]=now;
+ }
+ }
+}
+
+void vqgen_init(vqgen *v,int elements,int aux,int entries,float mindist,
+ float (*metric)(vqgen *,float *, float *),
+ float *(*weight)(vqgen *,float *),int centroid){
memset(v,0,sizeof(vqgen));
+ v->centroid=centroid;
v->elements=elements;
- v->errspread=spread;
+ v->aux=aux;
+ v->mindist=mindist;
v->allocated=32768;
- v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
+ v->pointlist=_ogg_malloc(v->allocated*(v->elements+v->aux)*sizeof(float));
v->entries=entries;
- v->entrylist=malloc(v->entries*v->elements*sizeof(double));
- v->assigned=malloc(v->entries*sizeof(long));
- v->bias=calloc(v->entries,sizeof(double));
+ v->entrylist=_ogg_malloc(v->entries*v->elements*sizeof(float));
+ v->assigned=_ogg_malloc(v->entries*sizeof(long));
+ v->bias=_ogg_calloc(v->entries,sizeof(float));
+ v->max=_ogg_calloc(v->entries,sizeof(float));
if(metric)
v->metric_func=metric;
else
- v->metric_func=_dist_sq;
+ v->metric_func=_dist;
+ if(weight)
+ v->weight_func=weight;
+ else
+ v->weight_func=_weight_null;
+
+ v->asciipoints=tmpfile();
+
}
-void vqgen_addpoint(vqgen *v, double *p){
+void vqgen_addpoint(vqgen *v, float *p,float *a){
+ int k;
+ for(k=0;k<v->elements;k++)
+ fprintf(v->asciipoints,"%.12g\n",p[k]);
+ for(k=0;k<v->aux;k++)
+ fprintf(v->asciipoints,"%.12g\n",a[k]);
+
if(v->points>=v->allocated){
v->allocated*=2;
- v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
+ v->pointlist=_ogg_realloc(v->pointlist,v->allocated*(v->elements+v->aux)*
+ sizeof(float));
+ }
+
+ memcpy(_point(v,v->points),p,sizeof(float)*v->elements);
+ if(v->aux)memcpy(_point(v,v->points)+v->elements,a,sizeof(float)*v->aux);
+
+ /* quantize to the density mesh if it's selected */
+ if(v->mindist>0.f){
+ /* quantize to the mesh */
+ for(k=0;k<v->elements+v->aux;k++)
+ _point(v,v->points)[k]=
+ rint(_point(v,v->points)[k]/v->mindist)*v->mindist;
}
-
- memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
v->points++;
- if(v->points==v->entries)_vqgen_seed(v);
+ if(!(v->points&0xff))spinnit("loading... ",v->points);
}
-/* take the trained entries, look at the points that comprise the cell
- and find midpoints (as the actual encoding process uses euclidian
- distance rather than any more complex metric to find the closest
- match */
-
-double *vqgen_midpoint(vqgen *v){
- long i,j,k;
- double *lo=malloc(v->entries*v->elements*sizeof(double));
- double *hi=malloc(v->entries*v->elements*sizeof(double));
+/* yes, not threadsafe. These utils aren't */
+static int sortit=0;
+static int sortsize=0;
+static int meshcomp(const void *a,const void *b){
+ if(((sortit++)&0xfff)==0)spinnit("sorting mesh...",sortit);
+ return(memcmp(a,b,sortsize));
+}
- memset(v->assigned,0,sizeof(long)*v->entries);
- for(i=0;i<v->points;i++){
- double *ppt=_point(v,i);
- double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
- long firstentry=0;
- for(j=1;j<v->entries;j++){
- double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
- if(thismetric<firstmetric){
- firstmetric=thismetric;
- firstentry=j;
+void vqgen_sortmesh(vqgen *v){
+ sortit=0;
+ if(v->mindist>0.f){
+ long i,march=1;
+
+ /* sort to make uniqueness detection trivial */
+ sortsize=(v->elements+v->aux)*sizeof(float);
+ qsort(v->pointlist,v->points,sortsize,meshcomp);
+
+ /* now march through and eliminate dupes */
+ for(i=1;i<v->points;i++){
+ if(memcmp(_point(v,i),_point(v,i-1),sortsize)){
+ /* a new, unique entry. march it down */
+ if(i>march)memcpy(_point(v,march),_point(v,i),sortsize);
+ march++;
}
+ spinnit("eliminating density... ",v->points-i);
}
-
- j=firstentry;
- if(v->assigned[j]++){
- for(k=0;k<v->elements;k++){
- if(ppt[k]<vN(lo,j)[k])vN(lo,j)[k]=ppt[k];
- if(ppt[k]>vN(hi,j)[k])vN(hi,j)[k]=ppt[k];
- }
- }else{
- for(k=0;k<v->elements;k++){
- vN(lo,j)[k]=ppt[k];
- vN(hi,j)[k]=ppt[k];
- }
- }
- }
- for(j=0;j<v->entries;j++)
- if(v->assigned[j])
- for(k=0;k<v->elements;k++)
- vN(lo,j)[k]=(vN(lo,j)[k]+vN(hi,j)[k])/2.;
- else
- for(k=0;k<v->elements;k++)
- vN(lo,j)[k]=_now(v,j)[k];
- free(hi);
- return(lo);
+ /* we're done */
+ fprintf(stderr,"\r%ld training points remining out of %ld"
+ " after density mesh (%ld%%)\n",march,v->points,march*100/v->points);
+ v->points=march;
+
+ }
+ v->sorted=1;
}
-double vqgen_iterate(vqgen *v){
+float vqgen_iterate(vqgen *v,int biasp){
long i,j,k;
- double fdesired=(double)v->points/v->entries;
- long desired=fdesired;
- double asserror=0.;
- double meterror=0.;
- double *new=malloc(sizeof(double)*v->entries*v->elements);
- long *nearcount=malloc(v->entries*sizeof(long));
- double *nearbias=malloc(v->entries*desired*sizeof(double));
-#ifdef NOISY
- char buff[80];
- FILE *assig;
- FILE *bias;
- FILE *cells;
- sprintf(buff,"cells%d.m",v->it);
- cells=fopen(buff,"w");
- sprintf(buff,"assig%d.m",v->it);
- assig=fopen(buff,"w");
- sprintf(buff,"bias%d.m",v->it);
- bias=fopen(buff,"w");
-#endif
-
- fprintf(stderr,"Pass #%d... ",v->it);
+ float fdesired;
+ long desired;
+ long desired2;
+
+ float asserror=0.f;
+ float meterror=0.f;
+ float *new;
+ float *new2;
+ long *nearcount;
+ float *nearbias;
+ #ifdef NOISY
+ char buff[80];
+ FILE *assig;
+ FILE *bias;
+ FILE *cells;
+ sprintf(buff,"cells%d.m",v->it);
+ cells=fopen(buff,"w");
+ sprintf(buff,"assig%d.m",v->it);
+ assig=fopen(buff,"w");
+ sprintf(buff,"bias%d.m",v->it);
+ bias=fopen(buff,"w");
+ #endif
+
if(v->entries<2){
fprintf(stderr,"generation requires at least two entries\n");
exit(1);
}
- /* fill in nearest points for entries */
- /*memset(v->bias,0,sizeof(double)*v->entries);*/
+ if(!v->sorted)vqgen_sortmesh(v);
+ if(!v->seeded)_vqgen_seed(v);
+
+ fdesired=(float)v->points/v->entries;
+ desired=fdesired;
+ desired2=desired*2;
+ new=_ogg_malloc(sizeof(float)*v->entries*v->elements);
+ new2=_ogg_malloc(sizeof(float)*v->entries*v->elements);
+ nearcount=_ogg_malloc(v->entries*sizeof(long));
+ nearbias=_ogg_malloc(v->entries*desired2*sizeof(float));
+
+ /* fill in nearest points for entry biasing */
+ /*memset(v->bias,0,sizeof(float)*v->entries);*/
memset(nearcount,0,sizeof(long)*v->entries);
memset(v->assigned,0,sizeof(long)*v->entries);
- for(i=0;i<v->points;i++){
- double *ppt=_point(v,i);
- double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
- double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
- long firstentry=0;
- long secondentry=1;
- if(firstmetric>secondmetric){
- double temp=firstmetric;
- firstmetric=secondmetric;
- secondmetric=temp;
- firstentry=1;
- secondentry=0;
+ if(biasp){
+ for(i=0;i<v->points;i++){
+ float *ppt=v->weight_func(v,_point(v,i));
+ float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
+ float secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
+ long firstentry=0;
+ long secondentry=1;
+
+ if(!(i&0xff))spinnit("biasing... ",v->points+v->points+v->entries-i);
+
+ if(firstmetric>secondmetric){
+ float temp=firstmetric;
+ firstmetric=secondmetric;
+ secondmetric=temp;
+ firstentry=1;
+ secondentry=0;
+ }
+
+ for(j=2;j<v->entries;j++){
+ float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
+ if(thismetric<secondmetric){
+ if(thismetric<firstmetric){
+ secondmetric=firstmetric;
+ secondentry=firstentry;
+ firstmetric=thismetric;
+ firstentry=j;
+ }else{
+ secondmetric=thismetric;
+ secondentry=j;
+ }
+ }
+ }
+
+ j=firstentry;
+ for(j=0;j<v->entries;j++){
+
+ float thismetric,localmetric;
+ float *nearbiasptr=nearbias+desired2*j;
+ long k=nearcount[j];
+
+ localmetric=v->metric_func(v,_now(v,j),ppt);
+ /* 'thismetric' is to be the bias value necessary in the current
+ arrangement for entry j to capture point i */
+ if(firstentry==j){
+ /* use the secondary entry as the threshhold */
+ thismetric=secondmetric-localmetric;
+ }else{
+ /* use the primary entry as the threshhold */
+ thismetric=firstmetric-localmetric;
+ }
+
+ /* support the idea of 'minimum distance'... if we want the
+ cells in a codebook to be roughly some minimum size (as with
+ the low resolution residue books) */
+
+ /* a cute two-stage delayed sorting hack */
+ if(k<desired){
+ nearbiasptr[k]=thismetric;
+ k++;
+ if(k==desired){
+ spinnit("biasing... ",v->points+v->points+v->entries-i);
+ qsort(nearbiasptr,desired,sizeof(float),directdsort);
+ }
+
+ }else if(thismetric>nearbiasptr[desired-1]){
+ nearbiasptr[k]=thismetric;
+ k++;
+ if(k==desired2){
+ spinnit("biasing... ",v->points+v->points+v->entries-i);
+ qsort(nearbiasptr,desired2,sizeof(float),directdsort);
+ k=desired;
+ }
+ }
+ nearcount[j]=k;
+ }
}
- for(j=2;j<v->entries;j++){
- double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
- if(thismetric<secondmetric){
- if(thismetric<firstmetric){
- secondmetric=firstmetric;
- secondentry=firstentry;
- firstmetric=thismetric;
- firstentry=j;
- }else{
- secondmetric=thismetric;
- secondentry=j;
- }
+ /* inflate/deflate */
+
+ for(i=0;i<v->entries;i++){
+ float *nearbiasptr=nearbias+desired2*i;
+
+ spinnit("biasing... ",v->points+v->entries-i);
+
+ /* due to the delayed sorting, we likely need to finish it off....*/
+ if(nearcount[i]>desired)
+ qsort(nearbiasptr,nearcount[i],sizeof(float),directdsort);
+
+ v->bias[i]=nearbiasptr[desired-1];
+
+ }
+ }else{
+ memset(v->bias,0,v->entries*sizeof(float));
+ }
+
+ /* Now assign with new bias and find new midpoints */
+ for(i=0;i<v->points;i++){
+ float *ppt=v->weight_func(v,_point(v,i));
+ float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
+ long firstentry=0;
+
+ if(!(i&0xff))spinnit("centering... ",v->points-i);
+
+ for(j=0;j<v->entries;j++){
+ float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
+ if(thismetric<firstmetric){
+ firstmetric=thismetric;
+ firstentry=j;
}
}
-
+
j=firstentry;
- meterror+=firstmetric-v->bias[firstentry];
- /* set up midpoints for next iter */
- if(v->assigned[j]++)
- for(k=0;k<v->elements;k++)
- vN(new,j)[k]+=_point(v,i)[k];
- else
- for(k=0;k<v->elements;k++)
- vN(new,j)[k]=_point(v,i)[k];
-
-
+
#ifdef NOISY
fprintf(cells,"%g %g\n%g %g\n\n",
- _now(v,j)[0],_now(v,j)[1],
- _point(v,i)[0],_point(v,i)[1]);
+ _now(v,j)[0],_now(v,j)[1],
+ ppt[0],ppt[1]);
#endif
- for(j=0;j<v->entries;j++){
-
- double thismetric;
- double *nearbiasptr=nearbias+desired*j;
- long k=nearcount[j]-1;
-
- /* 'thismetric' is to be the bias value necessary in the current
- arrangement for entry j to capture point i */
- if(firstentry==j){
- /* use the secondary entry as the threshhold */
- thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
+ firstmetric-=v->bias[j];
+ meterror+=firstmetric;
+
+ if(v->centroid==0){
+ /* set up midpoints for next iter */
+ if(v->assigned[j]++){
+ for(k=0;k<v->elements;k++)
+ vN(new,j)[k]+=ppt[k];
+ if(firstmetric>v->max[j])v->max[j]=firstmetric;
}else{
- /* use the primary entry as the threshhold */
- thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
+ for(k=0;k<v->elements;k++)
+ vN(new,j)[k]=ppt[k];
+ v->max[j]=firstmetric;
}
-
- if(k>=0 && thismetric>nearbiasptr[k]){
-
- /* start at the end and search backward for where this entry
- belongs */
-
- for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
-
- /* insert at k. Shift and inject. */
- memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
- nearbiasptr[k]=thismetric;
-
- if(nearcount[j]<desired)nearcount[j]++;
-
+ }else{
+ /* centroid */
+ if(v->assigned[j]++){
+ for(k=0;k<v->elements;k++){
+ if(vN(new,j)[k]>ppt[k])vN(new,j)[k]=ppt[k];
+ if(vN(new2,j)[k]<ppt[k])vN(new2,j)[k]=ppt[k];
+ }
+ if(firstmetric>v->max[firstentry])v->max[j]=firstmetric;
}else{
- if(nearcount[j]<desired){
- /* we checked the thresh earlier. We know this is the
- last entry */
- nearbiasptr[nearcount[j]++]=thismetric;
- }
+ for(k=0;k<v->elements;k++){
+ vN(new,j)[k]=ppt[k];
+ vN(new2,j)[k]=ppt[k];
+ }
+ v->max[firstentry]=firstmetric;
}
}
}
-
- /* inflate/deflate */
- for(i=0;i<v->entries;i++)
- v->bias[i]=nearbias[(i+1)*desired-1];
- /* last, assign midpoints */
+ /* assign midpoints */
+
for(j=0;j<v->entries;j++){
- asserror+=fabs(v->assigned[j]-fdesired);
- if(v->assigned[j])
- for(k=0;k<v->elements;k++)
- _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
#ifdef NOISY
fprintf(assig,"%ld\n",v->assigned[j]);
fprintf(bias,"%g\n",v->bias[j]);
#endif
+ asserror+=fabs(v->assigned[j]-fdesired);
+ if(v->assigned[j]){
+ if(v->centroid==0){
+ for(k=0;k<v->elements;k++)
+ _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
+ }else{
+ for(k=0;k<v->elements;k++)
+ _now(v,j)[k]=(vN(new,j)[k]+vN(new2,j)[k])/2.f;
+ }
+ }
}
asserror/=(v->entries*fdesired);
+
+ fprintf(stderr,"Pass #%d... ",v->it);
fprintf(stderr,": dist %g(%g) metric error=%g \n",
- asserror,fdesired,meterror/v->points);
+ asserror,fdesired,meterror/v->points);
v->it++;
free(new);
return(asserror);
}
-
-/* Building a codebook from trained set **********************************
-
- The codebook in raw form is technically finished once it's trained.
- However, we want to finalize the representative codebook values for
- each entry and generate auxiliary information to optimize encoding.
- We generate the auxiliary coding tree using collected data,
- probably the same data as in the original training */
-
-/* At each recursion, the data set is split in half. Cells with data
- points on side A go into set A, same with set B. The sets may
- overlap. If the cell overlaps the deviding line only very slightly
- (provided parameter), we may choose to ignore the overlap in order
- to pare the tree down */
-
-double *sortvals;
-int els;
-int iascsort(const void *a,const void *b){
- double av=sortvals[*((long *)a) * els];
- double bv=sortvals[*((long *)b) * els];
- if(av<bv)return(-1);
- return(1);
-}
-
-/* goes through the split, but just counts it and returns a metric*/
-void lp_count(vqgen *v,long *entryindex,long entries,
- long *pointindex,long points,
- long *entryA,long *entryB,
- double *n, double c, double slack,
- long *entriesA,long *entriesB,long *entriesC){
- long i,j,k;
- long A=0,B=0,C=0;
-
- memset(entryA,0,sizeof(long)*entries);
- memset(entryB,0,sizeof(long)*entries);
-
- for(i=0;i<points;i++){
- double *ppt=_point(v,pointindex[i]);
- long firstentry=0;
- double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
- double position=-c;
-
- for(j=1;j<entries;j++){
- double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
- if(thismetric<firstmetric){
- firstmetric=thismetric;
- firstentry=j;
- }
- }
-
- /* count point split */
- for(k=0;k<v->elements;k++)
- position+=ppt[k]*n[k];
- if(position>0.){
- entryA[firstentry]++;
- }else{
- entryB[firstentry]++;
- }
- }
-
- /* look to see if entries are in the slack zone */
- /* The entry splitting isn't total, so that storage has to be
- allocated for recursion. Reuse the entryA/entryB vectors */
- for(j=0;j<entries;j++){
- long total=entryA[j]+entryB[j];
- if((double)entryA[j]/total<slack){
- entryA[j]=0;
- }else if((double)entryB[j]/total<slack){
- entryB[j]=0;
- }
- if(entryA[j] && entryB[j])C++;
- if(entryA[j])entryA[A++]=entryindex[j];
- if(entryB[j])entryB[B++]=entryindex[j];
- }
- *entriesA=A;
- *entriesB=B;
- *entriesC=C;
-}
-
-void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
- int k;
- *c=0.;
- for(k=0;k<v->elements;k++){
- double center=(p[k]+q[k])/2.;
- n[k]=(center-q[k])*2.;
- *c+=center*n[k];
- }
-}
-
-void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
- int k;
- *c=0.;
- for(k=0;k<v->elements;k++){
- n[k]=(center[k]-q[k])*2.;
- *c+=center[k]*n[k];
- }
-}
-
-int lp_split(vqgen *v,vqbook *b,
- long *entryindex,long entries,
- long *pointindex,long points,
- long depth,double slack){
-
- /* The encoder, regardless of book, will be using a straight
- euclidian distance-to-point metric to determine closest point.
- Thus we split the cells using the same (we've already trained the
- codebook set spacing and distribution using special metrics and
- even a midpoint division won't disturb the basic properties) */
-
- long ret;
- double *p;
- double *q;
- double *n;
- double c;
- long *entryA=calloc(entries,sizeof(long));
- long *entryB=calloc(entries,sizeof(long));
- long entriesA=0;
- long entriesB=0;
- long entriesC=0;
- long pointsA=0;
- long i,j,k;
-
- p=alloca(sizeof(double)*v->elements);
- q=alloca(sizeof(double)*v->elements);
- n=alloca(sizeof(double)*v->elements);
- memset(p,0,sizeof(double)*v->elements);
-
- /* We need to find the dividing hyperplane. find the median of each
- axis as the centerpoint and the normal facing farthest point */
-
- /* more than one way to do this part. For small sets, we can brute
- force it. */
-
- if(entries<32){
- /* try every pair possibility */
- double best=0;
- long besti=0;
- long bestj=0;
- double this;
- for(i=0;i<entries-1;i++){
- for(j=i+1;j<entries;j++){
- pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
- lp_count(v,entryindex,entries,
- pointindex,points,
- entryA,entryB,
- n, c, slack,
- &entriesA,&entriesB,&entriesC);
- this=(entriesA-entriesC)*(entriesB-entriesC);
-
- if(this>best){
- best=this;
- besti=i;
- bestj=j;
- }
- }
- }
- pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
- }else{
- double best=0.;
- long bestj=0;
-
- /* try COG/normal and furthest pairs */
- /* medianpoint */
- for(k=0;k<v->elements;k++){
- /* just sort the index array */
- sortvals=v->pointlist+k;
- els=v->elements;
- qsort(pointindex,points,sizeof(long),iascsort);
- if(points&0x1){
- p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
- }else{
- p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
- v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
- }
- }
-
- /* try every normal, but just for distance */
- for(j=0;j<entries;j++){
- double *ppj=_now(v,entryindex[j]);
- double this=_dist_sq(v,p,ppj);
- if(this>best){
- best=this;
- bestj=j;
- }
- }
-
- pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
-
-
- }
-
- /* find cells enclosing points */
- /* count A/B points */
-
- lp_count(v,entryindex,entries,
- pointindex,points,
- entryA,entryB,
- n, c, slack,
- &entriesA,&entriesB,&entriesC);
-
- /* the point index is split evenly, so we do an Order n
- rearrangement into A first/B last and just pass it on */
- {
- long Aptr=0;
- long Bptr=points-1;
- while(Aptr<=Bptr){
- while(Aptr<=Bptr){
- double position=-c;
- for(k=0;k<v->elements;k++)
- position+=_point(v,pointindex[Aptr])[k]*n[k];
- if(position<=0.)break; /* not in A */
- Aptr++;
- }
- while(Aptr<=Bptr){
- double position=-c;
- for(k=0;k<v->elements;k++)
- position+=_point(v,pointindex[Bptr])[k]*n[k];
- if(position>0.)break; /* not in B */
- Bptr--;
- }
- if(Aptr<Bptr){
- long temp=pointindex[Aptr];
- pointindex[Aptr]=pointindex[Bptr];
- pointindex[Bptr]=temp;
- }
- pointsA=Aptr;
- }
- }
-
- fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
- entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
- {
- long thisaux=b->aux++;
- if(b->aux>=b->alloc){
- b->alloc*=2;
- b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
- b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
- b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
- b->c=realloc(b->c,sizeof(double)*b->alloc);
- }
-
- memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
- b->c[thisaux]=c;
-
- if(entriesA==1){
- ret=1;
- b->ptr0[thisaux]=entryA[0];
- }else{
- b->ptr0[thisaux]= -b->aux;
- ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack);
- }
- if(entriesB==1){
- ret++;
- b->ptr1[thisaux]=entryB[0];
- }else{
- b->ptr1[thisaux]= -b->aux;
- ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
- depth+1,slack);
- }
- }
- free(entryA);
- free(entryB);
- return(ret);
-}
-
-int vqenc_entry(vqbook *b,double *val){
- int ptr=0,k;
- while(1){
- double c= -b->c[ptr];
- double *nptr=b->n+b->elements*ptr;
- for(k=0;k<b->elements;k++)
- c+=nptr[k]*val[k];
- if(c>0.) /* in A */
- ptr= -b->ptr0[ptr];
- else /* in B */
- ptr= -b->ptr1[ptr];
- if(ptr<=0)break;
- }
- return(-ptr);
-}
-
-void vqgen_book(vqgen *v,vqbook *b){
- long i;
- long *entryindex=malloc(sizeof(double)*v->entries);
- long *pointindex=malloc(sizeof(double)*v->points);
-
- memset(b,0,sizeof(vqbook));
- for(i=0;i<v->entries;i++)entryindex[i]=i;
- for(i=0;i<v->points;i++)pointindex[i]=i;
- b->elements=v->elements;
- b->entries=v->entries;
- b->alloc=4096;
- b->ptr0=malloc(sizeof(long)*b->alloc);
- b->ptr1=malloc(sizeof(long)*b->alloc);
- b->n=malloc(sizeof(double)*b->elements*b->alloc);
- b->c=malloc(sizeof(double)*b->alloc);
-
- b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
- b->codelist=malloc(sizeof(long)*b->entries);
- b->lengthlist=malloc(b->entries*sizeof(long));
-
- /* first, generate the encoding decision heirarchy */
- fprintf(stderr,"Total leaves: %ld\n",
- lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
-
- /* run all training points through the decision tree to get a final
- probability count */
- {
- long *probability=calloc(b->entries*2,sizeof(long));
- for(i=0;i<v->points;i++){
- int ret=vqenc_entry(b,v->pointlist+i*v->elements);
- probability[ret]++;
- }
-
- for(i=0;i<b->entries;i++){
- fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
- }
-
- }
-
- /* generate the codewords (short to long) */
-
- free(entryindex);
- free(pointindex);
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-