vq/vqsplit.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
   5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
   6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
   7  *                                                                  *
   8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
   9  * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
  10  * http://www.xiph.org/                                             *
  11  *                                                                  *
  12  ********************************************************************
  13
  14  function: build a VQ codebook and the encoding decision 'tree'
  15  last mod: $Id: vqsplit.c,v 1.17 2000/02/21 01:13:02 xiphmont Exp $
  16
  17  ********************************************************************/
  18
  19 /* This code is *not* part of libvorbis.  It is used to generate
  20    trained codebooks offline and then spit the results into a
  21    pregenerated codebook that is compiled into libvorbis.  It is an
  22    expensive (but good) algorithm.  Run it on big iron. */
  23
  24 /* There are so many optimizations to explore in *both* stages that
  25    considering the undertaking is almost withering.  For now, we brute
  26    force it all */
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <math.h>
  31 #include <string.h>
  32 #include <sys/time.h>
  33
  34 #include "vqgen.h"
  35 #include "vqsplit.h"
  36 #include "bookutil.h"
  37
  38 /* Codebook generation happens in two steps:
  39
  40    1) Train the codebook with data collected from the encoder: We use
  41    one of a few error metrics (which represent the distance between a
  42    given data point and a candidate point in the training set) to
  43    divide the training set up into cells representing roughly equal
  44    probability of occurring.
  45
  46    2) Generate the codebook and auxiliary data from the trained data set
  47 */
  48
  49 /* Building a codebook from trained set **********************************
  50
  51    The codebook in raw form is technically finished once it's trained.
  52    However, we want to finalize the representative codebook values for
  53    each entry and generate auxiliary information to optimize encoding.
  54    We generate the auxiliary coding tree using collected data,
  55    probably the same data as in the original training */
  56
  57 /* At each recursion, the data set is split in half.  Cells with data
  58    points on side A go into set A, same with set B.  The sets may
  59    overlap.  If the cell overlaps the deviding line only very slightly
  60    (provided parameter), we may choose to ignore the overlap in order
  61    to pare the tree down */
  62
  63 long *isortvals;
  64 int iascsort(const void *a,const void *b){
  65   long av=isortvals[*((long *)a)];
  66   long bv=isortvals[*((long *)b)];
  67   return(av-bv);
  68 }
  69
  70 /* grab it from vqgen.c */
  71 extern double _dist(vqgen *v,double *a, double *b);
  72
  73 /* goes through the split, but just counts it and returns a metric*/
  74 void vqsp_count(vqgen *v,long *membership,
  75                 long *entryindex,long entries,
  76                 long *pointindex,long points,
  77                 long *entryA,long *entryB,
  78                 double *n, double c,
  79                 long *entriesA,long *entriesB,long *entriesC){
  80   long i,j,k;
  81   long A=0,B=0,C=0;
  82
  83   memset(entryA,0,sizeof(long)*entries);
  84   memset(entryB,0,sizeof(long)*entries);
  85
  86   /* Do the points belonging to this cell occur on sideA, sideB or
  87      both? */
  88
  89   for(i=0;i<points;i++){
  90     double *ppt=_point(v,pointindex[i]);
  91     long   firstentry=membership[i];
  92     double position=-c;
  93
  94     if(!entryA[firstentry] || !entryB[firstentry]){
  95       for(k=0;k<v->elements;k++)
  96       position+=ppt[k]*n[k];
  97       if(position>0.)
  98         entryA[firstentry]=1;
  99       else
 100         entryB[firstentry]=1;
 101     }
 102   }
 103
 104   /* The entry splitting isn't total, so that storage has to be
 105      allocated for recursion.  Reuse the entryA/entryB vectors */
 106   /* keep the entries in ascending order (relative to the original
 107      list); we rely on that stability when ordering p/q choice */
 108   for(j=0;j<entries;j++){
 109     if(entryA[j] && entryB[j])C++;
 110     if(entryA[j])entryA[A++]=entryindex[j];
 111     if(entryB[j])entryB[B++]=entryindex[j];
 112   }
 113   *entriesA=A;
 114   *entriesB=B;
 115   *entriesC=C;
 116 }
 117
 118 void pq_in_out(vqgen *v,double *n,double *c,double *p,double *q){
 119   int k;
 120   *c=0.;
 121   for(k=0;k<v->elements;k++){
 122     double center=(p[k]+q[k])/2.;
 123     n[k]=(center-q[k])*2.;
 124     *c+=center*n[k];
 125   }
 126 }
 127
 128 int lp_split(vqgen *v,codebook *b,
 129              long *entryindex,long entries,
 130              long *pointindex,long points,
 131              long depth, long *pointsofar){
 132
 133   encode_aux *t=b->c->encode_tree;
 134
 135   /* The encoder, regardless of book, will be using a straight
 136      euclidian distance-to-point metric to determine closest point.
 137      Thus we split the cells using the same (we've already trained the
 138      codebook set spacing and distribution using special metrics and
 139      even a midpoint division won't disturb the basic properties) */
 140
 141   long ret;
 142   double *n=alloca(sizeof(double)*v->elements);
 143   double c;
 144   long *entryA=calloc(entries,sizeof(long));
 145   long *entryB=calloc(entries,sizeof(long));
 146   long entriesA=0;
 147   long entriesB=0;
 148   long entriesC=0;
 149   long pointsA=0;
 150   long i,j,k;
 151
 152   long *membership=malloc(sizeof(long)*points);
 153
 154   long   besti=-1;
 155   long   bestj=-1;
 156
 157   char spinbuf[80];
 158   sprintf(spinbuf,"splitting [%ld left]... ",v->points-*pointsofar);
 159
 160   if(depth==22 && points==9 && entries==2 && *pointsofar==252935){
 161     fprintf(stderr,"HERE\n");
 162
 163   }
 164
 165
 166   /* which cells do points belong to?  Do this before n^2 best pair chooser. */
 167
 168   for(i=0;i<points;i++){
 169     double *ppt=_point(v,pointindex[i]);
 170     long   firstentry=0;
 171     double firstmetric=_dist(v,_now(v,entryindex[0]),ppt);
 172
 173     if(points*entries>64*1024)spinnit(spinbuf,entries);
 174
 175     for(j=1;j<entries;j++){
 176       double thismetric=_dist(v,_now(v,entryindex[j]),ppt);
 177       if(thismetric<=firstmetric){ /* Not <; on the line goes to higher number */
 178         firstmetric=thismetric;
 179         firstentry=j;
 180       }
 181     }
 182
 183     membership[i]=firstentry;
 184   }
 185
 186   /* We need to find the dividing hyperplane. find the median of each
 187      axis as the centerpoint and the normal facing farthest point */
 188
 189   /* more than one way to do this part.  For small sets, we can brute
 190      force it. */
 191
 192   if(entries<8 || (double)points*entries*entries<16.*1024*1024){
 193     /* try every pair possibility */
 194     double best=0;
 195     double this;
 196     for(i=0;i<entries-1;i++){
 197       for(j=i+1;j<entries;j++){
 198         spinnit(spinbuf,entries-i);
 199         pq_in_out(v,n,&c,_now(v,entryindex[i]),_now(v,entryindex[j]));
 200         vqsp_count(v,membership,
 201                    entryindex,entries,
 202                    pointindex,points,
 203                    entryA,entryB,
 204                    n, c,
 205                    &entriesA,&entriesB,&entriesC);
 206         this=(entriesA-entriesC)*(entriesB-entriesC);
 207
 208         /* when choosing best, we also want some form of stability to
 209            make sure more branches are pared later; secondary
 210            weighting isn;t needed as the entry lists are in ascending
 211            order, and we always try p/q in the same sequence */
 212
 213         if( (besti==-1) ||
 214             (this>best) ){
 215
 216           best=this;
 217           besti=entryindex[i];
 218           bestj=entryindex[j];
 219
 220         }
 221       }
 222     }
 223   }else{
 224     double *p=alloca(v->elements*sizeof(double));
 225     double *q=alloca(v->elements*sizeof(double));
 226     double best=0.;
 227
 228     /* try COG/normal and furthest pairs */
 229     /* meanpoint */
 230     /* eventually, we want to select the closest entry and figure n/c
 231        from p/q (because storing n/c is too large */
 232     for(k=0;k<v->elements;k++){
 233       spinnit(spinbuf,entries);
 234
 235       p[k]=0.;
 236       for(j=0;j<entries;j++)
 237         p[k]+=v->entrylist[entryindex[j]*v->elements+k];
 238       p[k]/=entries;
 239
 240     }
 241
 242     /* we go through the entries one by one, looking for the entry on
 243        the other side closest to the point of reflection through the
 244        center */
 245
 246     for(i=0;i<entries;i++){
 247       double *ppi=_now(v,entryindex[i]);
 248       double ref_best=0.;
 249       double ref_j=-1;
 250       double this;
 251       spinnit(spinbuf,entries-i);
 252
 253       for(k=0;k<v->elements;k++)
 254         q[k]=2*p[k]-ppi[k];
 255
 256       for(j=0;j<entries;j++){
 257         if(j!=i){
 258           double this=_dist(v,q,_now(v,entryindex[j]));
 259           if(ref_j==-1 || this<=ref_best){ /* <=, not <; very important */
 260             ref_best=this;
 261             ref_j=entryindex[j];
 262           }
 263         }
 264       }
 265
 266       pq_in_out(v,n,&c,ppi,_now(v,ref_j));
 267       vqsp_count(v,membership,
 268                  entryindex,entries,
 269                  pointindex,points,
 270                  entryA,entryB,
 271                  n, c,
 272                  &entriesA,&entriesB,&entriesC);
 273       this=(entriesA-entriesC)*(entriesB-entriesC);
 274
 275         /* when choosing best, we also want some form of stability to
 276            make sure more branches are pared later; secondary
 277            weighting isn;t needed as the entry lists are in ascending
 278            order, and we always try p/q in the same sequence */
 279
 280       if( (besti==-1) ||
 281           (this>best) ){
 282
 283         best=this;
 284         besti=entryindex[i];
 285         bestj=ref_j;
 286
 287       }
 288     }
 289     if(besti>bestj){
 290       long temp=besti;
 291       besti=bestj;
 292       bestj=temp;
 293     }
 294
 295   }
 296
 297   /* find cells enclosing points */
 298   /* count A/B points */
 299
 300   pq_in_out(v,n,&c,_now(v,besti),_now(v,bestj));
 301   vqsp_count(v,membership,
 302              entryindex,entries,
 303              pointindex,points,
 304              entryA,entryB,
 305              n, c,
 306              &entriesA,&entriesB,&entriesC);
 307
 308   free(membership);
 309
 310   /* the point index is split, so we do an Order n rearrangement into
 311      A first/B last and just pass it on */
 312   {
 313     long Aptr=0;
 314     long Bptr=points-1;
 315     while(Aptr<=Bptr){
 316       while(Aptr<=Bptr){
 317         double position=-c;
 318         for(k=0;k<v->elements;k++)
 319           position+=_point(v,pointindex[Aptr])[k]*n[k];
 320         if(position<=0.)break; /* not in A */
 321         Aptr++;
 322       }
 323       while(Aptr<=Bptr){
 324         double position=-c;
 325         for(k=0;k<v->elements;k++)
 326           position+=_point(v,pointindex[Bptr])[k]*n[k];
 327         if(position>0.)break; /* not in B */
 328         Bptr--;
 329       }
 330       if(Aptr<Bptr){
 331         long temp=pointindex[Aptr];
 332         pointindex[Aptr]=pointindex[Bptr];
 333         pointindex[Bptr]=temp;
 334       }
 335       pointsA=Aptr;
 336     }
 337   }
 338
 339   /*  fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n",
 340       entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);*/
 341   {
 342     long thisaux=t->aux++;
 343     if(t->aux>=t->alloc){
 344       t->alloc*=2;
 345       t->ptr0=realloc(t->ptr0,sizeof(long)*t->alloc);
 346       t->ptr1=realloc(t->ptr1,sizeof(long)*t->alloc);
 347       t->p=realloc(t->p,sizeof(long)*t->alloc);
 348       t->q=realloc(t->q,sizeof(long)*t->alloc);
 349     }
 350
 351     t->p[thisaux]=besti;
 352     t->q[thisaux]=bestj;
 353
 354     if(entriesA==1){
 355       ret=1;
 356       t->ptr0[thisaux]=entryA[0];
 357       *pointsofar+=pointsA;
 358     }else{
 359       t->ptr0[thisaux]= -t->aux;
 360       ret=lp_split(v,b,entryA,entriesA,pointindex,pointsA,depth+1,pointsofar);
 361     }
 362     if(entriesB==1){
 363       ret++;
 364       t->ptr1[thisaux]=entryB[0];
 365       *pointsofar+=points-pointsA;
 366     }else{
 367       t->ptr1[thisaux]= -t->aux;
 368       ret+=lp_split(v,b,entryB,entriesB,pointindex+pointsA,points-pointsA,
 369                     depth+1,pointsofar);
 370     }
 371   }
 372   free(entryA);
 373   free(entryB);
 374   return(ret);
 375 }
 376
 377 static int _node_eq(encode_aux *v, long a, long b){
 378   long    Aptr0=v->ptr0[a];
 379   long    Aptr1=v->ptr1[a];
 380   long    Bptr0=v->ptr0[b];
 381   long    Bptr1=v->ptr1[b];
 382
 383   /* the possibility of choosing the same p and q, but switched, can;t
 384      happen because we always look for the best p/q in the same search
 385      order and the search is stable */
 386
 387   if(Aptr0==Bptr0 && Aptr1==Bptr1)
 388     return(1);
 389
 390   return(0);
 391 }
 392
 393 void vqsp_book(vqgen *v, codebook *b, long *quantlist){
 394   long i,j;
 395   static_codebook *c=(static_codebook *)b->c;
 396   encode_aux *t;
 397
 398   memset(b,0,sizeof(codebook));
 399   memset(c,0,sizeof(static_codebook));
 400   b->c=c;
 401   t=c->encode_tree=calloc(1,sizeof(encode_aux));
 402
 403   /* make sure there are no duplicate entries and that every
 404      entry has points */
 405
 406   for(i=0;i<v->entries;){
 407     /* duplicate? if so, eliminate */
 408     for(j=0;j<i;j++){
 409       if(_dist(v,_now(v,i),_now(v,j))==0.){
 410         fprintf(stderr,"found a duplicate entry!  removing...\n");
 411         v->entries--;
 412         memcpy(_now(v,i),_now(v,v->entries),sizeof(double)*v->elements);
 413         memcpy(quantlist+i*v->elements,quantlist+v->entries*v->elements,
 414                sizeof(long)*v->elements);
 415         break;
 416       }
 417     }
 418     if(j==i)i++;
 419   }
 420
 421   {
 422     v->assigned=calloc(v->entries,sizeof(long));
 423     for(i=0;i<v->points;i++){
 424       double *ppt=_point(v,i);
 425       double firstmetric=_dist(v,_now(v,0),ppt);
 426       long   firstentry=0;
 427
 428       if(!(i&0xff))spinnit("checking... ",v->points-i);
 429
 430       for(j=0;j<v->entries;j++){
 431         double thismetric=_dist(v,_now(v,j),ppt);
 432         if(thismetric<firstmetric){
 433           firstmetric=thismetric;
 434           firstentry=j;
 435         }
 436       }
 437
 438       v->assigned[firstentry]++;
 439     }
 440
 441     for(j=0;j<v->entries;){
 442       if(v->assigned[j]==0){
 443         fprintf(stderr,"found an unused entry!  removing...\n");
 444         v->entries--;
 445         memcpy(_now(v,j),_now(v,v->entries),sizeof(double)*v->elements);
 446         v->assigned[j]=v->assigned[v->elements];
 447         memcpy(quantlist+j*v->elements,quantlist+v->entries*v->elements,
 448                sizeof(long)*v->elements);
 449         continue;
 450       }
 451       j++;
 452     }
 453   }
 454
 455   fprintf(stderr,"Building a book with %ld unique entries...\n",v->entries);
 456
 457   /* We don't always generate a single tree; it tends to be large.  We
 458      trade off by splitting the cell and point sets into n totally
 459      non-overlapping subsets (where the decision tree tends to have
 460      substantial overlap in hyperplane splits) and generating a tree
 461      for each.  Because the tree depth is O(n) o(lg(n)), the smaller
 462      data set will get us much better storage usage for worst cases
 463      (and we will run toward worst case) */
 464
 465   {
 466     /* split data set... */
 467     int trees=v->entries/(32+log(v->entries)/log(2)*2)+1; /* a guess */
 468     long *entrymap=malloc(v->entries*sizeof(long *));
 469     long **entryindex=malloc(trees*sizeof(long *));
 470     long *entries=calloc(trees,sizeof(long));
 471     long *pointmap=malloc(v->points*sizeof(long));
 472     long desired=(v->entries/trees)+1;
 473     int booknum;
 474     long pointssofar=0;
 475
 476     for(i=0;i<trees;i++)entryindex[i]=calloc(desired,sizeof(long));
 477
 478     fprintf(stderr,"Prepartitioning into %d trees...\n",trees);
 479
 480     /* map points to cells */
 481     for(i=0;i<v->points;i++){
 482       double *ppt=_point(v,i);
 483       double firstmetric=_dist(v,_now(v,0),ppt);
 484       long   firstentry=0;
 485
 486       if(!(i&0xff))spinnit("parititoning... ",v->points-i);
 487
 488       for(j=0;j<v->entries;j++){
 489         double thismetric=_dist(v,_now(v,j),ppt);
 490         if(thismetric<firstmetric){
 491           firstmetric=thismetric;
 492           firstentry=j;
 493         }
 494       }
 495       pointmap[i]=firstentry;
 496     }
 497
 498     /* this could be more effective; long, narrow slices are probably
 499        better than these porous blobs */
 500
 501     for(j=0;j<v->entries;j++){
 502       int choice=-1;
 503       for(i=0;i<trees;i++){
 504         if(entries[i]<desired){ /* still has open space */
 505           choice=i;
 506           break;
 507         }
 508       }
 509
 510       entryindex[choice][entries[choice]++]=j;
 511       entrymap[j]=choice;
 512     }
 513
 514
 515     /* we overload the static codebook ptrs just a bit...  the first
 516        <tree> ptr0s just point to the starting point in the struct for
 517        that tree.  The first ptr0 points to the first tree at offset
 518        <tree>, and thus can be used as the tree count.  Clever but
 519        totally opaque :-( . */
 520
 521     t->alloc=4096;
 522     t->ptr0=malloc(sizeof(long)*t->alloc);
 523     t->ptr1=malloc(sizeof(long)*t->alloc);
 524     t->p=malloc(sizeof(long)*t->alloc);
 525     t->q=malloc(sizeof(long)*t->alloc);
 526     t->aux=trees;
 527     c->dim=v->elements;
 528     c->entries=v->entries;
 529     c->lengthlist=calloc(c->entries,sizeof(long));
 530
 531     for(booknum=0;booknum<trees;booknum++){
 532       long points=0;
 533       long *pointindex;
 534
 535       t->ptr0[booknum]=t->aux;
 536       for(i=0;i<v->points;i++)
 537         if(entrymap[pointmap[i]]==booknum)points++;
 538       pointindex=malloc(points*sizeof(long));
 539       points=0;
 540       for(i=0;i<v->points;i++)
 541         if(entrymap[pointmap[i]]==booknum)pointindex[points++]=i;
 542
 543       /* generate the encoding decision heirarchy */
 544       fprintf(stderr,"Leaves added: %d              \n",
 545               lp_split(v,b,entryindex[booknum],entries[booknum],
 546                        pointindex,points,0,&pointssofar));
 547
 548       free(pointindex);
 549       fprintf(stderr,"Paring/rerouting redundant branches... ");
 550
 551       /* The tree is likely big and redundant.  Pare and reroute branches */
 552       {
 553         int changedflag=1;
 554
 555         while(changedflag){
 556           changedflag=0;
 557
 558           /* span the tree node by node; list unique decision nodes and
 559              short circuit redundant branches */
 560
 561           for(i=t->ptr0[booknum];i<t->aux;){
 562             int k;
 563
 564             /* check list of unique decisions */
 565             for(j=t->ptr0[booknum];j<i;j++)
 566               if(_node_eq(t,i,j))break;
 567
 568             if(j<i){
 569               /* a redundant entry; find all higher nodes referencing it and
 570                  short circuit them to the previously noted unique entry */
 571               changedflag=1;
 572               for(k=0;k<t->aux;k++){
 573                 if(t->ptr0[k]==-i)t->ptr0[k]=-j;
 574                 if(t->ptr1[k]==-i)t->ptr1[k]=-j;
 575               }
 576
 577               /* Now, we need to fill in the hole from this redundant
 578                  entry in the listing.  Insert the last entry in the list.
 579                  Fix the forward pointers to that last entry */
 580               t->aux--;
 581               t->ptr0[i]=t->ptr0[t->aux];
 582               t->ptr1[i]=t->ptr1[t->aux];
 583               t->p[i]=t->p[t->aux];
 584               t->q[i]=t->q[t->aux];
 585               for(k=0;k<t->aux;k++){
 586                 if(t->ptr0[k]==-t->aux)t->ptr0[k]=-i;
 587                 if(t->ptr1[k]==-t->aux)t->ptr1[k]=-i;
 588               }
 589               /* hole plugged */
 590
 591             }else
 592               i++;
 593           }
 594
 595           fprintf(stderr,"\rParing/rerouting redundant branches... "
 596                   "%ld remaining   ",t->aux);
 597         }
 598         fprintf(stderr,"\n");
 599       }
 600     }
 601   }
 602
 603   /* run all training points through the decision tree to get a final
 604      probability count */
 605   {
 606     long *probability=malloc(c->entries*sizeof(long));
 607     for(i=0;i<c->entries;i++)probability[i]=1; /* trivial guard */
 608     b->valuelist=v->entrylist; /* temporary for vqenc_entry; replaced later */
 609
 610     for(i=0;i<v->points;i++){
 611       int ret=codebook_entry(b,v->pointlist+i*v->elements);
 612       probability[ret]++;
 613       if(!(i&0xff))spinnit("counting hits... ",v->points-i);
 614     }
 615
 616     build_tree_from_lengths(c->entries,probability,c->lengthlist);
 617
 618     free(probability);
 619   }
 620
 621   /* Sort the entries by codeword length, short to long (eases
 622      assignment and packing to do it now) */
 623   {
 624     long *wordlen=c->lengthlist;
 625     long *index=malloc(c->entries*sizeof(long));
 626     long *revindex=malloc(c->entries*sizeof(long));
 627     int k;
 628     for(i=0;i<c->entries;i++)index[i]=i;
 629     isortvals=c->lengthlist;
 630     qsort(index,c->entries,sizeof(long),iascsort);
 631
 632     /* rearrange storage; ptr0/1 first as it needs a reverse index */
 633     /* n and c stay unchanged */
 634     for(i=0;i<c->entries;i++)revindex[index[i]]=i;
 635     for(i=t->ptr0[0];i<t->aux;i++){
 636       if(!(i&0x3f))spinnit("sorting... ",t->aux-i);
 637
 638       if(t->ptr0[i]>=0)t->ptr0[i]=revindex[t->ptr0[i]];
 639       if(t->ptr1[i]>=0)t->ptr1[i]=revindex[t->ptr1[i]];
 640       t->p[i]=revindex[t->p[i]];
 641       t->q[i]=revindex[t->q[i]];
 642     }
 643     free(revindex);
 644
 645     /* map lengthlist and vallist with index */
 646     c->lengthlist=calloc(c->entries,sizeof(long));
 647     b->valuelist=malloc(sizeof(double)*c->entries*c->dim);
 648     c->quantlist=malloc(sizeof(long)*c->entries*c->dim);
 649     for(i=0;i<c->entries;i++){
 650       long e=index[i];
 651       for(k=0;k<c->dim;k++){
 652         b->valuelist[i*c->dim+k]=v->entrylist[e*c->dim+k];
 653         c->quantlist[i*c->dim+k]=quantlist[e*c->dim+k];
 654       }
 655       c->lengthlist[i]=wordlen[e];
 656     }
 657
 658     free(wordlen);
 659   }
 660
 661   fprintf(stderr,"Done.                            \n\n");
 662 }
 663