Add cmake as optional build system to AppVeyor configuration

[platform/upstream/libvorbis.git] / vq / vqgen.c
diff --git a/vq/vqgen.c b/vq/vqgen.c

index e9db712..2e46dd1 100644 (file)
--- a/vq/vqgen.c
+++ b/vq/vqgen.c
@@ -1,20 +1,17 @@
  /********************************************************************
   *                                                                  *
  /********************************************************************
   *                                                                  *
- * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
- * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
- * PLEASE READ THESE TERMS DISTRIBUTING.                            *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
   *                                                                  *
   *                                                                  *
- * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-1999             *
- * by 1999 Monty <monty@xiph.org> and The XIPHOPHORUS Company       *
- * http://www.xiph.org/                                             *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
   *                                                                  *
   ********************************************************************
  
   *                                                                  *
   ********************************************************************
  
- function: build a VQ codebook 
- author: Monty <xiphmont@mit.edu>
- modifications by: Monty
- last modification date: Dec 10 1999
+ function: train a VQ codebook 
+ last mod: $Id$
  
   ********************************************************************/
  
  
   ********************************************************************/
  
@@ -31,7 +28,9 @@
  #include <stdio.h>
  #include <math.h>
  #include <string.h>
  #include <stdio.h>
  #include <math.h>
  #include <string.h>
+
  #include "vqgen.h"
  #include "vqgen.h"
+#include "bookutil.h"
  
  /* Codebook generation happens in two steps: 
  
  
  /* Codebook generation happens in two steps: 
  
@@ -57,250 +56,502 @@
  /* internal helpers *****************************************************/
  #define vN(data,i) (data+v->elements*i)
  
  /* internal helpers *****************************************************/
  #define vN(data,i) (data+v->elements*i)
  
-double *_point(vqgen *v,long ptr){
-  return v->pointlist+(v->elements*ptr);
-}
-
-double *_now(vqgen *v,long ptr){
-  return v->entrylist+(v->elements*ptr);
-}
-
  /* default metric; squared 'distance' from desired value. */
  /* default metric; squared 'distance' from desired value. */
-double _dist_sq(vqgen *v,double *a, double *b){
+float _dist(vqgen *v,float *a, float *b){
    int i;
    int el=v->elements;
    int i;
    int el=v->elements;
-  double acc=0.;
+  float acc=0.f;
    for(i=0;i<el;i++){
    for(i=0;i<el;i++){
-    double val=(a[i]-b[i]);
+    float val=(a[i]-b[i]);
      acc+=val*val;
    }
      acc+=val*val;
    }
-  return acc;
+  return sqrt(acc);
+}
+
+float *_weight_null(vqgen *v,float *a){
+  return a;
  }
  
  /* *must* be beefed up. */
  void _vqgen_seed(vqgen *v){
  }
  
  /* *must* be beefed up. */
  void _vqgen_seed(vqgen *v){
-  memcpy(v->entrylist,v->pointlist,sizeof(double)*v->entries*v->elements);
+  long i;
+  for(i=0;i<v->entries;i++)
+    memcpy(_now(v,i),_point(v,i),sizeof(float)*v->elements);
+  v->seeded=1;
+}
+
+int directdsort(const void *a, const void *b){
+  float av=*((float *)a);
+  float bv=*((float *)b);
+  return (av<bv)-(av>bv);
+}
+
+void vqgen_cellmetric(vqgen *v){
+  int j,k;
+  float min=-1.f,max=-1.f,mean=0.f,acc=0.f;
+  long dup=0,unused=0;
+ #ifdef NOISY
+  int i;
+   char buff[80];
+   float spacings[v->entries];
+   int count=0;
+   FILE *cells;
+   sprintf(buff,"cellspace%d.m",v->it);
+   cells=fopen(buff,"w");
+#endif
+
+  /* minimum, maximum, cell spacing */
+  for(j=0;j<v->entries;j++){
+    float localmin=-1.;
+
+    for(k=0;k<v->entries;k++){
+      if(j!=k){
+        float this=_dist(v,_now(v,j),_now(v,k));
+        if(this>0){
+          if(v->assigned[k] && (localmin==-1 || this<localmin))
+            localmin=this;
+        }else{        
+          if(k<j){
+            dup++;
+            break;
+          }
+        }
+      }
+    }
+    if(k<v->entries)continue;
+
+    if(v->assigned[j]==0){
+      unused++;
+      continue;
+    }
+    
+    localmin=v->max[j]+localmin/2; /* this gives us rough diameter */
+    if(min==-1 || localmin<min)min=localmin;
+    if(max==-1 || localmin>max)max=localmin;
+    mean+=localmin;
+    acc++;
+#ifdef NOISY
+    spacings[count++]=localmin;
+#endif
+  }
+
+  fprintf(stderr,"cell diameter: %.03g::%.03g::%.03g (%ld unused/%ld dup)\n",
+          min,mean/acc,max,unused,dup);
+
+#ifdef NOISY
+  qsort(spacings,count,sizeof(float),directdsort);
+  for(i=0;i<count;i++)
+    fprintf(cells,"%g\n",spacings[i]);
+  fclose(cells);
+#endif            
+
  }
  
  /* External calls *******************************************************/
  
  }
  
  /* External calls *******************************************************/
  
-void vqgen_init(vqgen *v,int elements,int entries,
-               double (*metric)(vqgen *,double *, double *),
-               double spread){
+/* We have two forms of quantization; in the first, each vector
+   element in the codebook entry is orthogonal.  Residues would use this
+   quantization for example.
+
+   In the second, we have a sequence of monotonically increasing
+   values that we wish to quantize as deltas (to save space).  We
+   still need to quantize so that absolute values are accurate. For
+   example, LSP quantizes all absolute values, but the book encodes
+   distance between values because each successive value is larger
+   than the preceeding value.  Thus the desired quantibits apply to
+   the encoded (delta) values, not abs positions. This requires minor
+   additional encode-side trickery. */
+
+void vqgen_quantize(vqgen *v,quant_meta *q){
+
+  float maxdel;
+  float mindel;
+
+  float delta;
+  float maxquant=((1<<q->quant)-1);
+
+  int j,k;
+
+  mindel=maxdel=_now(v,0)[0];
+  
+  for(j=0;j<v->entries;j++){
+    float last=0.f;
+    for(k=0;k<v->elements;k++){
+      if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last;
+      if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last;
+      if(q->sequencep)last=_now(v,j)[k];
+    }
+  }
+
+
+  /* first find the basic delta amount from the maximum span to be
+     encoded.  Loosen the delta slightly to allow for additional error
+     during sequence quantization */
+
+  delta=(maxdel-mindel)/((1<<q->quant)-1.5f);
+
+  q->min=_float32_pack(mindel);
+  q->delta=_float32_pack(delta);
+
+  mindel=_float32_unpack(q->min);
+  delta=_float32_unpack(q->delta);
+
+  for(j=0;j<v->entries;j++){
+    float last=0;
+    for(k=0;k<v->elements;k++){
+      float val=_now(v,j)[k];
+      float now=rint((val-last-mindel)/delta);
+      
+      _now(v,j)[k]=now;
+      if(now<0){
+        /* be paranoid; this should be impossible */
+        fprintf(stderr,"fault; quantized value<0\n");
+        exit(1);
+      }
+
+      if(now>maxquant){
+        /* be paranoid; this should be impossible */
+        fprintf(stderr,"fault; quantized value>max\n");
+        exit(1);
+      }
+      if(q->sequencep)last=(now*delta)+mindel+last;
+    }
+  }
+}
+
+/* much easier :-).  Unlike in the codebook, we don't un-log log
+   scales; we just make sure they're properly offset. */
+void vqgen_unquantize(vqgen *v,quant_meta *q){
+  long j,k;
+  float mindel=_float32_unpack(q->min);
+  float delta=_float32_unpack(q->delta);
+
+  for(j=0;j<v->entries;j++){
+    float last=0.f;
+    for(k=0;k<v->elements;k++){
+      float now=_now(v,j)[k];
+      now=fabs(now)*delta+last+mindel;
+      if(q->sequencep)last=now;
+      _now(v,j)[k]=now;
+    }
+  }
+}
+
+void vqgen_init(vqgen *v,int elements,int aux,int entries,float mindist,
+                float  (*metric)(vqgen *,float *, float *),
+                float *(*weight)(vqgen *,float *),int centroid){
    memset(v,0,sizeof(vqgen));
  
    memset(v,0,sizeof(vqgen));
  
+  v->centroid=centroid;
    v->elements=elements;
    v->elements=elements;
-  v->errspread=spread;
+  v->aux=aux;
+  v->mindist=mindist;
    v->allocated=32768;
    v->allocated=32768;
-  v->pointlist=malloc(v->allocated*v->elements*sizeof(double));
+  v->pointlist=_ogg_malloc(v->allocated*(v->elements+v->aux)*sizeof(float));
  
    v->entries=entries;
  
    v->entries=entries;
-  v->entrylist=malloc(v->entries*v->elements*sizeof(double));
-  v->assigned=malloc(v->entries*sizeof(long));
-  v->bias=calloc(v->entries,sizeof(double));
+  v->entrylist=_ogg_malloc(v->entries*v->elements*sizeof(float));
+  v->assigned=_ogg_malloc(v->entries*sizeof(long));
+  v->bias=_ogg_calloc(v->entries,sizeof(float));
+  v->max=_ogg_calloc(v->entries,sizeof(float));
    if(metric)
      v->metric_func=metric;
    else
    if(metric)
      v->metric_func=metric;
    else
-    v->metric_func=_dist_sq;
+    v->metric_func=_dist;
+  if(weight)
+    v->weight_func=weight;
+  else
+    v->weight_func=_weight_null;
+
+  v->asciipoints=tmpfile();
+
  }
  
  }
  
-void vqgen_addpoint(vqgen *v, double *p){
+void vqgen_addpoint(vqgen *v, float *p,float *a){
+  int k;
+  for(k=0;k<v->elements;k++)
+    fprintf(v->asciipoints,"%.12g\n",p[k]);
+  for(k=0;k<v->aux;k++)
+    fprintf(v->asciipoints,"%.12g\n",a[k]);
+
    if(v->points>=v->allocated){
      v->allocated*=2;
    if(v->points>=v->allocated){
      v->allocated*=2;
-    v->pointlist=realloc(v->pointlist,v->allocated*v->elements*sizeof(double));
+    v->pointlist=_ogg_realloc(v->pointlist,v->allocated*(v->elements+v->aux)*
+                         sizeof(float));
+  }
+
+  memcpy(_point(v,v->points),p,sizeof(float)*v->elements);
+  if(v->aux)memcpy(_point(v,v->points)+v->elements,a,sizeof(float)*v->aux);
+ 
+  /* quantize to the density mesh if it's selected */
+  if(v->mindist>0.f){
+    /* quantize to the mesh */
+    for(k=0;k<v->elements+v->aux;k++)
+      _point(v,v->points)[k]=
+        rint(_point(v,v->points)[k]/v->mindist)*v->mindist;
    }
    }
-  
-  memcpy(_point(v,v->points),p,sizeof(double)*v->elements);
    v->points++;
    v->points++;
-  if(v->points==v->entries)_vqgen_seed(v);
+  if(!(v->points&0xff))spinnit("loading... ",v->points);
  }
  
  }
  
-/* take the trained entries, look at the points that comprise the cell
-   and find midpoints (as the actual encoding process uses euclidian
-   distance rather than any more complex metric to find the closest
-   match */
-
-double *vqgen_midpoint(vqgen *v){
-  long i,j,k;
-  double *lo=malloc(v->entries*v->elements*sizeof(double));
-  double *hi=malloc(v->entries*v->elements*sizeof(double));
+/* yes, not threadsafe.  These utils aren't */
+static int sortit=0;
+static int sortsize=0;
+static int meshcomp(const void *a,const void *b){
+  if(((sortit++)&0xfff)==0)spinnit("sorting mesh...",sortit);
+  return(memcmp(a,b,sortsize));
+}
  
  
-  memset(v->assigned,0,sizeof(long)*v->entries);
-  for(i=0;i<v->points;i++){
-    double *ppt=_point(v,i);
-    double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
-    long   firstentry=0;
-    for(j=1;j<v->entries;j++){
-      double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
-      if(thismetric<firstmetric){
-       firstmetric=thismetric;
-       firstentry=j;
+void vqgen_sortmesh(vqgen *v){
+  sortit=0;
+  if(v->mindist>0.f){
+    long i,march=1;
+
+    /* sort to make uniqueness detection trivial */
+    sortsize=(v->elements+v->aux)*sizeof(float);
+    qsort(v->pointlist,v->points,sortsize,meshcomp);
+
+    /* now march through and eliminate dupes */
+    for(i=1;i<v->points;i++){
+      if(memcmp(_point(v,i),_point(v,i-1),sortsize)){
+        /* a new, unique entry.  march it down */
+        if(i>march)memcpy(_point(v,march),_point(v,i),sortsize);
+        march++;
        }
        }
+      spinnit("eliminating density... ",v->points-i);
      }
      }
-    
-    j=firstentry;
-    if(v->assigned[j]++){
-      for(k=0;k<v->elements;k++){
-       if(ppt[k]<vN(lo,j)[k])vN(lo,j)[k]=ppt[k];
-       if(ppt[k]>vN(hi,j)[k])vN(hi,j)[k]=ppt[k];
-      }
-    }else{
-      for(k=0;k<v->elements;k++){
-       vN(lo,j)[k]=ppt[k];
-       vN(hi,j)[k]=ppt[k];
-      }
-    }
-  }
  
  
-  for(j=0;j<v->entries;j++)
-    if(v->assigned[j])
-      for(k=0;k<v->elements;k++)
-       vN(lo,j)[k]=(vN(lo,j)[k]+vN(hi,j)[k])/2.;
-    else
-      for(k=0;k<v->elements;k++)
-       vN(lo,j)[k]=_now(v,j)[k];
-  free(hi);
-  return(lo);
+    /* we're done */
+    fprintf(stderr,"\r%ld training points remining out of %ld"
+            " after density mesh (%ld%%)\n",march,v->points,march*100/v->points);
+    v->points=march;
+
+  }
+  v->sorted=1;
  }
  
  }
  
-double vqgen_iterate(vqgen *v){
+float vqgen_iterate(vqgen *v,int biasp){
    long   i,j,k;
    long   i,j,k;
-  double fdesired=(double)v->points/v->entries;
-  long  desired=fdesired;
-  double asserror=0.;
-  double meterror=0.;
-  double *new=malloc(sizeof(double)*v->entries*v->elements);
-  long   *nearcount=malloc(v->entries*sizeof(long));
-  double *nearbias=malloc(v->entries*desired*sizeof(double));
  
  
-#ifdef NOISY
-  char buff[80];
-  FILE *assig;
-  FILE *bias;
-  FILE *cells;
-  sprintf(buff,"cells%d.m",v->it);
-  cells=fopen(buff,"w");
-  sprintf(buff,"assig%d.m",v->it);
-  assig=fopen(buff,"w");
-  sprintf(buff,"bias%d.m",v->it);
-  bias=fopen(buff,"w");
-#endif
-
-  fprintf(stderr,"Pass #%d... ",v->it);
+  float fdesired;
+  long  desired;
+  long  desired2;
+
+  float asserror=0.f;
+  float meterror=0.f;
+  float *new;
+  float *new2;
+  long   *nearcount;
+  float *nearbias;
+ #ifdef NOISY
+   char buff[80];
+   FILE *assig;
+   FILE *bias;
+   FILE *cells;
+   sprintf(buff,"cells%d.m",v->it);
+   cells=fopen(buff,"w");
+   sprintf(buff,"assig%d.m",v->it);
+   assig=fopen(buff,"w");
+   sprintf(buff,"bias%d.m",v->it);
+   bias=fopen(buff,"w");
+ #endif
+ 
  
    if(v->entries<2){
      fprintf(stderr,"generation requires at least two entries\n");
      exit(1);
    }
  
  
    if(v->entries<2){
      fprintf(stderr,"generation requires at least two entries\n");
      exit(1);
    }
  
-  /* fill in nearest points for entries */
-  /*memset(v->bias,0,sizeof(double)*v->entries);*/
+  if(!v->sorted)vqgen_sortmesh(v);
+  if(!v->seeded)_vqgen_seed(v);
+
+  fdesired=(float)v->points/v->entries;
+  desired=fdesired;
+  desired2=desired*2;
+  new=_ogg_malloc(sizeof(float)*v->entries*v->elements);
+  new2=_ogg_malloc(sizeof(float)*v->entries*v->elements);
+  nearcount=_ogg_malloc(v->entries*sizeof(long));
+  nearbias=_ogg_malloc(v->entries*desired2*sizeof(float));
+
+  /* fill in nearest points for entry biasing */
+  /*memset(v->bias,0,sizeof(float)*v->entries);*/
    memset(nearcount,0,sizeof(long)*v->entries);
    memset(v->assigned,0,sizeof(long)*v->entries);
    memset(nearcount,0,sizeof(long)*v->entries);
    memset(v->assigned,0,sizeof(long)*v->entries);
-  for(i=0;i<v->points;i++){
-    double *ppt=_point(v,i);
-    double firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
-    double secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
-    long   firstentry=0;
-    long   secondentry=1;
-    if(firstmetric>secondmetric){
-      double temp=firstmetric;
-      firstmetric=secondmetric;
-      secondmetric=temp;
-      firstentry=1;
-      secondentry=0;
+  if(biasp){
+    for(i=0;i<v->points;i++){
+      float *ppt=v->weight_func(v,_point(v,i));
+      float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
+      float secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
+      long   firstentry=0;
+      long   secondentry=1;
+      
+      if(!(i&0xff))spinnit("biasing... ",v->points+v->points+v->entries-i);
+      
+      if(firstmetric>secondmetric){
+        float temp=firstmetric;
+        firstmetric=secondmetric;
+        secondmetric=temp;
+        firstentry=1;
+        secondentry=0;
+      }
+      
+      for(j=2;j<v->entries;j++){
+        float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
+        if(thismetric<secondmetric){
+          if(thismetric<firstmetric){
+            secondmetric=firstmetric;
+            secondentry=firstentry;
+            firstmetric=thismetric;
+            firstentry=j;
+          }else{
+            secondmetric=thismetric;
+            secondentry=j;
+          }
+        }
+      }
+      
+      j=firstentry;
+      for(j=0;j<v->entries;j++){
+        
+        float thismetric,localmetric;
+        float *nearbiasptr=nearbias+desired2*j;
+        long k=nearcount[j];
+        
+        localmetric=v->metric_func(v,_now(v,j),ppt);
+        /* 'thismetric' is to be the bias value necessary in the current
+           arrangement for entry j to capture point i */
+        if(firstentry==j){
+          /* use the secondary entry as the threshhold */
+          thismetric=secondmetric-localmetric;
+        }else{
+          /* use the primary entry as the threshhold */
+          thismetric=firstmetric-localmetric;
+        }
+        
+        /* support the idea of 'minimum distance'... if we want the
+           cells in a codebook to be roughly some minimum size (as with
+           the low resolution residue books) */
+        
+        /* a cute two-stage delayed sorting hack */
+        if(k<desired){
+          nearbiasptr[k]=thismetric;
+          k++;
+          if(k==desired){
+            spinnit("biasing... ",v->points+v->points+v->entries-i);
+            qsort(nearbiasptr,desired,sizeof(float),directdsort);
+          }
+          
+        }else if(thismetric>nearbiasptr[desired-1]){
+          nearbiasptr[k]=thismetric;
+          k++;
+          if(k==desired2){
+            spinnit("biasing... ",v->points+v->points+v->entries-i);
+            qsort(nearbiasptr,desired2,sizeof(float),directdsort);
+            k=desired;
+          }
+        }
+        nearcount[j]=k;
+      }
      }
      
      }
      
-    for(j=2;j<v->entries;j++){
-      double thismetric=v->metric_func(v,_now(v,j),_point(v,i))+v->bias[j];
-      if(thismetric<secondmetric){
-       if(thismetric<firstmetric){
-         secondmetric=firstmetric;
-         secondentry=firstentry;
-         firstmetric=thismetric;
-         firstentry=j;
-       }else{
-         secondmetric=thismetric;
-         secondentry=j;
-       }
+    /* inflate/deflate */
+    
+    for(i=0;i<v->entries;i++){
+      float *nearbiasptr=nearbias+desired2*i;
+      
+      spinnit("biasing... ",v->points+v->entries-i);
+      
+      /* due to the delayed sorting, we likely need to finish it off....*/
+      if(nearcount[i]>desired)
+        qsort(nearbiasptr,nearcount[i],sizeof(float),directdsort);
+
+      v->bias[i]=nearbiasptr[desired-1];
+
+    }
+  }else{ 
+    memset(v->bias,0,v->entries*sizeof(float));
+  }
+
+  /* Now assign with new bias and find new midpoints */
+  for(i=0;i<v->points;i++){
+    float *ppt=v->weight_func(v,_point(v,i));
+    float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
+    long   firstentry=0;
+
+    if(!(i&0xff))spinnit("centering... ",v->points-i);
+
+    for(j=0;j<v->entries;j++){
+      float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
+      if(thismetric<firstmetric){
+        firstmetric=thismetric;
+        firstentry=j;
        }
      }
        }
      }
-      
+
      j=firstentry;
      j=firstentry;
-    meterror+=firstmetric-v->bias[firstentry];
-    /* set up midpoints for next iter */
-    if(v->assigned[j]++)
-      for(k=0;k<v->elements;k++)
-       vN(new,j)[k]+=_point(v,i)[k];
-    else
-      for(k=0;k<v->elements;k++)
-       vN(new,j)[k]=_point(v,i)[k];
-
-   
+      
  #ifdef NOISY
      fprintf(cells,"%g %g\n%g %g\n\n",
  #ifdef NOISY
      fprintf(cells,"%g %g\n%g %g\n\n",
-           _now(v,j)[0],_now(v,j)[1],
-           _point(v,i)[0],_point(v,i)[1]);
+          _now(v,j)[0],_now(v,j)[1],
+          ppt[0],ppt[1]);
  #endif
  
  #endif
  
-    for(j=0;j<v->entries;j++){
-      
-      double thismetric;
-      double *nearbiasptr=nearbias+desired*j;
-      long k=nearcount[j]-1;
-      
-      /* 'thismetric' is to be the bias value necessary in the current
-        arrangement for entry j to capture point i */
-      if(firstentry==j){
-       /* use the secondary entry as the threshhold */
-       thismetric=secondmetric-v->metric_func(v,_now(v,j),_point(v,i));
+    firstmetric-=v->bias[j];
+    meterror+=firstmetric;
+
+    if(v->centroid==0){
+      /* set up midpoints for next iter */
+      if(v->assigned[j]++){
+        for(k=0;k<v->elements;k++)
+          vN(new,j)[k]+=ppt[k];
+        if(firstmetric>v->max[j])v->max[j]=firstmetric;
        }else{
        }else{
-       /* use the primary entry as the threshhold */
-       thismetric=firstmetric-v->metric_func(v,_now(v,j),_point(v,i));
+        for(k=0;k<v->elements;k++)
+          vN(new,j)[k]=ppt[k];
+        v->max[j]=firstmetric;
        }
        }
-      
-      if(k>=0 && thismetric>nearbiasptr[k]){
-       
-       /* start at the end and search backward for where this entry
-          belongs */
-       
-       for(;k>0;k--) if(nearbiasptr[k-1]>=thismetric)break;
-       
-       /* insert at k.  Shift and inject. */
-       memmove(nearbiasptr+k+1,nearbiasptr+k,(desired-k-1)*sizeof(double));
-       nearbiasptr[k]=thismetric;
-       
-       if(nearcount[j]<desired)nearcount[j]++;
-       
+    }else{
+      /* centroid */
+      if(v->assigned[j]++){
+        for(k=0;k<v->elements;k++){
+          if(vN(new,j)[k]>ppt[k])vN(new,j)[k]=ppt[k];
+          if(vN(new2,j)[k]<ppt[k])vN(new2,j)[k]=ppt[k];
+        }
+        if(firstmetric>v->max[firstentry])v->max[j]=firstmetric;
        }else{
        }else{
-       if(nearcount[j]<desired){
-         /* we checked the thresh earlier.  We know this is the
-            last entry */
-         nearbiasptr[nearcount[j]++]=thismetric;
-       }
+        for(k=0;k<v->elements;k++){
+          vN(new,j)[k]=ppt[k];
+          vN(new2,j)[k]=ppt[k];
+        }
+        v->max[firstentry]=firstmetric;
        }
      }
    }
        }
      }
    }
-  
-  /* inflate/deflate */
-  for(i=0;i<v->entries;i++)
-    v->bias[i]=nearbias[(i+1)*desired-1];
  
  
-  /* last, assign midpoints */
+  /* assign midpoints */
+
    for(j=0;j<v->entries;j++){
    for(j=0;j<v->entries;j++){
-    asserror+=fabs(v->assigned[j]-fdesired);
-    if(v->assigned[j])
-      for(k=0;k<v->elements;k++)
-       _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
  #ifdef NOISY
      fprintf(assig,"%ld\n",v->assigned[j]);
      fprintf(bias,"%g\n",v->bias[j]);
  #endif
  #ifdef NOISY
      fprintf(assig,"%ld\n",v->assigned[j]);
      fprintf(bias,"%g\n",v->bias[j]);
  #endif
+    asserror+=fabs(v->assigned[j]-fdesired);
+    if(v->assigned[j]){
+      if(v->centroid==0){
+        for(k=0;k<v->elements;k++)
+          _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
+      }else{
+        for(k=0;k<v->elements;k++)
+          _now(v,j)[k]=(vN(new,j)[k]+vN(new2,j)[k])/2.f;
+      }
+    }
    }
  
    asserror/=(v->entries*fdesired);
    }
  
    asserror/=(v->entries*fdesired);
+
+  fprintf(stderr,"Pass #%d... ",v->it);
    fprintf(stderr,": dist %g(%g) metric error=%g \n",
    fprintf(stderr,": dist %g(%g) metric error=%g \n",
-         asserror,fdesired,meterror/v->points);
+          asserror,fdesired,meterror/v->points);
    v->it++;
    
    free(new);
    v->it++;
    
    free(new);
@@ -314,342 +565,3 @@ double vqgen_iterate(vqgen *v){
    return(asserror);
  }
  
    return(asserror);
  }
  
-
-/* Building a codebook from trained set **********************************
-
-   The codebook in raw form is technically finished once it's trained.
-   However, we want to finalize the representative codebook values for
-   each entry and generate auxiliary information to optimize encoding.
-   We generate the auxiliary coding tree using collected data,
-   probably the same data as in the original training */
-
-/* At each recursion, the data set is split in half.  Cells with data
-   points on side A go into set A, same with set B.  The sets may
-   overlap.  If the cell overlaps the deviding line only very slightly
-   (provided parameter), we may choose to ignore the overlap in order
-   to pare the tree down */
-
-double *sortvals;
-int els;
-int iascsort(const void *a,const void *b){
-  double av=sortvals[*((long *)a) * els];
-  double bv=sortvals[*((long *)b) * els];
-  if(av<bv)return(-1);
-  return(1);
-}
-
-/* goes through the split, but just counts it and returns a metric*/
-void lp_count(vqgen *v,long *entryindex,long entries, 
-               long *pointindex,long points,
-               long *entryA,long *entryB,
-               double *n, double c, double slack,
-               long *entriesA,long *entriesB,long *entriesC){
-  long i,j,k;
-  long A=0,B=0,C=0;
-
-  memset(entryA,0,sizeof(long)*entries);
-  memset(entryB,0,sizeof(long)*entries);
-
-  for(i=0;i<points;i++){
-    double *ppt=_point(v,pointindex[i]);
-    long   firstentry=0;
-    double firstmetric=_dist_sq(v,_now(v,entryindex[0]),ppt);
-    double position=-c;
-    
-    for(j=1;j<entries;j++){
-      double thismetric=_dist_sq(v,_now(v,entryindex[j]),ppt);
-      if(thismetric<firstmetric){
-       firstmetric=thismetric;
-       firstentry=j;
-      }
-    }
-
-    /* count point split */
-    for(k=0;k<v->elements;k++)
-      position+=ppt[k]*n[k];
-    if(position>0.){
-      entryA[firstentry]++;
-    }else{
-      entryB[firstentry]++;
-    }
-  }
-
-  /* look to see if entries are in the slack zone */
-  /* The entry splitting isn't total, so that storage has to be
-     allocated for recursion.  Reuse the entryA/entryB vectors */
-  for(j=0;j<entries;j++){
-    long total=entryA[j]+entryB[j];
-    if((double)entryA[j]/total<slack){
-      entryA[j]=0;
-    }else if((double)entryB[j]/total<slack){
-      entryB[j]=0;
-    }
-    if(entryA[j] && entryB[j])C++;
-    if(entryA[j])entryA[A++]=entryindex[j];
-    if(entryB[j])entryB[B++]=entryindex[j];
-  }
-  *entriesA=A;
-  *entriesB=B;
-  *entriesC=C;
-}
-
-void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
-  int k;
-  *c=0.;
-  for(k=0;k<v->elements;k++){
-    double center=(p[k]+q[k])/2.;
-    n[k]=(center-q[k])*2.;
-    *c+=center*n[k];
-  }
-}
-
-void pq_center_out(vqgen *v,double *n,double *c,double *center,double *q){
-  int k;
-  *c=0.;
-  for(k=0;k<v->elements;k++){
-    n[k]=(center[k]-q[k])*2.;
-    *c+=center[k]*n[k];
-  }
-}
-
-int lp_split(vqgen *v,vqbook *b,
-            long *entryindex,long entries, 
-            long *pointindex,long points,
-            long depth,double slack){
-
-  /* The encoder, regardless of book, will be using a straight
-     euclidian distance-to-point metric to determine closest point.
-     Thus we split the cells using the same (we've already trained the
-     codebook set spacing and distribution using special metrics and
-     even a midpoint division won't disturb the basic properties) */
-
-  long ret;
-  double *p;
-  double *q;
-  double *n;
-  double c;
-  long *entryA=calloc(entries,sizeof(long));
-  long *entryB=calloc(entries,sizeof(long));
-  long entriesA=0;
-  long entriesB=0;
-  long entriesC=0;
-  long pointsA=0;
-  long i,j,k;
-
-  p=alloca(sizeof(double)*v->elements);
-  q=alloca(sizeof(double)*v->elements);
-  n=alloca(sizeof(double)*v->elements);
-  memset(p,0,sizeof(double)*v->elements);
-
-  /* We need to find the dividing hyperplane. find the median of each
-     axis as the centerpoint and the normal facing farthest point */
-
-  /* more than one way to do this part.  For small sets, we can brute
-     force it. */
-
-  if(entries<32){
-    /* try every pair possibility */
-    double best=0;
-    long   besti=0;
-    long   bestj=0;
-    double this;
-    for(i=0;i<entries-1;i++){
-      for(j=i+1;j<entries;j++){
-       pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
-       lp_count(v,entryindex,entries, 
-                pointindex,points,
-                entryA,entryB,
-                n, c, slack,
-                &entriesA,&entriesB,&entriesC);
-       this=(entriesA-entriesC)*(entriesB-entriesC);
-
-       if(this>best){
-         best=this;
-         besti=i;
-         bestj=j;
-       }
-      }
-    }
-    pq_in_out(v,n,&c,_now(v,entryindex[besti]),_now(v,entryindex[bestj]));
-  }else{
-    double best=0.;
-    long   bestj=0;
-
-    /* try COG/normal and furthest pairs */
-    /* medianpoint */
-    for(k=0;k<v->elements;k++){
-      /* just sort the index array */
-      sortvals=v->pointlist+k;
-      els=v->elements;
-      qsort(pointindex,points,sizeof(long),iascsort);
-      if(points&0x1){
-       p[k]=v->pointlist[(pointindex[points/2])*v->elements+k];
-      }else{
-       p[k]=(v->pointlist[(pointindex[points/2])*v->elements+k]+
-             v->pointlist[(pointindex[points/2-1])*v->elements+k])/2.;
-      }
-    }
-    
-    /* try every normal, but just for distance */
-    for(j=0;j<entries;j++){
-      double *ppj=_now(v,entryindex[j]);
-      double this=_dist_sq(v,p,ppj);
-      if(this>best){
-       best=this;
-       bestj=j;
-      }
-    }
-
-    pq_center_out(v,n,&c,p,_point(v,pointindex[bestj]));
-
-
-  }
-
-  /* find cells enclosing points */
-  /* count A/B points */
-
-  lp_count(v,entryindex,entries, 
-          pointindex,points,
-          entryA,entryB,
-          n, c, slack,
-          &entriesA,&entriesB,&entriesC);
-
-  /* the point index is split evenly, so we do an Order n
-     rearrangement into A first/B last and just pass it on */
-  {
-    long Aptr=0;
-    long Bptr=points-1;
-    while(Aptr<=Bptr){
-      while(Aptr<=Bptr){
-       double position=-c;
-       for(k=0;k<v->elements;k++)
-         position+=_point(v,pointindex[Aptr])[k]*n[k];
-       if(position<=0.)break; /* not in A */
-       Aptr++;
-      }
-      while(Aptr<=Bptr){
-       double position=-c;
-       for(k=0;k<v->elements;k++)
-         position+=_point(v,pointindex[Bptr])[k]*n[k];
-       if(position>0.)break; /* not in B */
-       Bptr--;
-      }
-      if(Aptr<Bptr){
-       long temp=pointindex[Aptr];
-       pointindex[Aptr]=pointindex[Bptr];
-       pointindex[Bptr]=temp;
-      }
-      pointsA=Aptr;
-    }
-  }
-
-  fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
-         entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);
-  {
-    long thisaux=b->aux++;
-    if(b->aux>=b->alloc){
-      b->alloc*=2;
-      b->ptr0=realloc(b->ptr0,sizeof(long)*b->alloc);
-      b->ptr1=realloc(b->ptr1,sizeof(long)*b->alloc);
-      b->n=realloc(b->n,sizeof(double)*b->elements*b->alloc);
-      b->c=realloc(b->c,sizeof(double)*b->alloc);
-    }
-    
-    memcpy(b->n+b->elements*thisaux,n,sizeof(double)*v->elements);
-    b->c[thisaux]=c;
-
-    if(entriesA==1){
-      ret=1;
-      b->ptr0[thisaux]=entryA[0];
-    }else{
-      b->ptr0[thisaux]= -b->aux;
-      ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,slack); 
-    }
-    if(entriesB==1){
-      ret++;
-      b->ptr1[thisaux]=entryB[0];
-    }else{
-      b->ptr1[thisaux]= -b->aux;
-      ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
-                  depth+1,slack); 
-    }
-  }
-  free(entryA);
-  free(entryB);
-  return(ret);
-}
-
-int vqenc_entry(vqbook *b,double *val){
-  int ptr=0,k;
-  while(1){
-    double c= -b->c[ptr];
-    double *nptr=b->n+b->elements*ptr;
-    for(k=0;k<b->elements;k++)
-      c+=nptr[k]*val[k];
-    if(c>0.) /* in A */
-      ptr= -b->ptr0[ptr];
-    else     /* in B */
-      ptr= -b->ptr1[ptr];
-    if(ptr<=0)break;
-  }
-  return(-ptr);
-}
-
-void vqgen_book(vqgen *v,vqbook *b){
-  long i;
-  long *entryindex=malloc(sizeof(double)*v->entries);
-  long *pointindex=malloc(sizeof(double)*v->points);
-
-  memset(b,0,sizeof(vqbook));
-  for(i=0;i<v->entries;i++)entryindex[i]=i;
-  for(i=0;i<v->points;i++)pointindex[i]=i;
-  b->elements=v->elements;
-  b->entries=v->entries;
-  b->alloc=4096;
-  b->ptr0=malloc(sizeof(long)*b->alloc);
-  b->ptr1=malloc(sizeof(long)*b->alloc);
-  b->n=malloc(sizeof(double)*b->elements*b->alloc);
-  b->c=malloc(sizeof(double)*b->alloc);
-
-  b->valuelist=malloc(sizeof(double)*b->elements*b->entries);
-  b->codelist=malloc(sizeof(long)*b->entries);
-  b->lengthlist=malloc(b->entries*sizeof(long));
-  
-  /* first, generate the encoding decision heirarchy */
-  fprintf(stderr,"Total leaves: %ld\n",
-         lp_split(v,b,entryindex,v->entries, pointindex,v->points,0,0));
-  
-  /* run all training points through the decision tree to get a final
-     probability count */
-  {
-    long *probability=calloc(b->entries*2,sizeof(long));
-    for(i=0;i<v->points;i++){
-      int ret=vqenc_entry(b,v->pointlist+i*v->elements);
-      probability[ret]++;
-    }
-
-    for(i=0;i<b->entries;i++){
-      fprintf(stderr,"point %ld: %ld\n",i,probability[i]);
-    }
-
-  }
-
-  /* generate the codewords (short to long) */
-
-  free(entryindex);
-  free(pointindex);
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-