04098a4efcc9e750c1926895df4a1667d9b6edec
[platform/upstream/libvorbis.git] / vq / train.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
7  *                                                                  *
8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
9  * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
10  * http://www.xiph.org/                                             *
11  *                                                                  *
12  ********************************************************************
13
14  function: utility main for training codebooks
15  last mod: $Id: train.c,v 1.18 2000/06/14 01:38:32 xiphmont Exp $
16
17  ********************************************************************/
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <math.h>
22 #include <string.h>
23 #include <errno.h>
24 #include <signal.h>
25 #include "vqgen.h"
26 #include "vqext.h"
27 #include "bookutil.h"
28
29 static char *rline(FILE *in,FILE *out,int pass){
30   while(1){
31     char *line=get_line(in);
32     if(line && line[0]=='#'){
33       if(pass)fprintf(out,"%s\n",line);
34     }else{
35       return(line);
36     }
37   }
38 }
39
40 /* command line:
41    trainvq  vqfile [options] trainfile [trainfile]
42
43    options: -params     entries,dim,quant
44             -subvector  start[,num]
45             -error      desired_error
46             -iterations iterations
47 */
48
49 static void usage(void){
50   fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
51           "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
52           "options: -p[arams]     <entries,dim,quant>\n"
53           "         -s[ubvector]  <start[,num]>\n"
54           "         -e[rror]      <desired_error>\n"
55           "         -i[terations] <maxiterations>\n"
56           "         -d[istance]   quantization mesh spacing for density limitation\n"
57           "         -b <dummy>    eliminate cell size biasing; use normal LBG\n\n"
58           "         -c <dummy>    Use centroid (not median) midpoints\n"
59
60           "examples:\n"
61           "   train a new codebook to 1%% tolerance on datafile 'foo':\n"
62           "      xxxvqtrain book -p 256,6,8 -e .01 foo\n"
63           "      (produces a trained set in book-0.vqi)\n\n"
64           "   continue training 'book-0.vqi' (produces book-1.vqi):\n"
65           "      xxxvqtrain book-0.vqi\n\n"
66           "   add subvector from element 1 to <dimension> from files\n"
67           "      data*.m to the training in progress, prodicing book-1.vqi:\n"
68           "      xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
69 }
70
71 int exiting=0;
72 void setexit(int dummy){
73   fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
74   exiting=1;
75 }
76
77 int main(int argc,char *argv[]){
78   vqgen v;
79
80   int entries=-1,dim=-1;
81   int start=0,num=-1;
82   double desired=.05,mindist=0.;
83   int iter=1000;
84   int biasp=1;
85   int centroid=0;
86
87   FILE *out=NULL;
88   char *line;
89   long i,j,k;
90   int init=0;
91   q.quant=-1;
92
93   argv++;
94   if(!*argv){
95     usage();
96     exit(0);
97   }
98
99   /* get the book name, a preexisting book to continue training */
100   {
101     FILE *in=NULL;
102     char *filename=alloca(strlen(*argv)+30),*ptr;
103
104     strcpy(filename,*argv);
105     in=fopen(filename,"r");
106     ptr=strrchr(filename,'-');
107     if(ptr){
108       int num;
109       ptr++;
110       num=atoi(ptr);
111       sprintf(ptr,"%d.vqi",num+1);
112     }else
113       strcat(filename,"-0.vqi");
114     
115     out=fopen(filename,"w");
116     if(out==NULL){
117       fprintf(stderr,"Unable to open %s for writing\n",filename);
118       exit(1);
119     }
120     
121     if(in){
122       /* we wish to suck in a preexisting book and continue to train it */
123       double a;
124       
125       line=rline(in,out,1);
126       if(strcmp(line,vqext_booktype)){
127         fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
128         exit(1);
129       } 
130       
131       line=rline(in,out,1);
132       if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
133         fprintf(stderr,"Syntax error reading book file\n");
134         exit(1);
135       }
136       
137       vqgen_init(&v,dim,vqext_aux,entries,mindist,
138                  vqext_metric,vqext_weight,centroid);
139       init=1;
140       
141       /* quant setup */
142       line=rline(in,out,1);
143       if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
144                 &q.quant,&q.sequencep)!=4){
145         fprintf(stderr,"Syntax error reading book file\n");
146         exit(1);
147       }
148       
149       /* quantized entries */
150       i=0;
151       for(j=0;j<entries;j++){
152         for(k=0;k<dim;k++){
153           line=rline(in,out,0);
154           sscanf(line,"%lf",&a);
155           v.entrylist[i++]=a;
156         }
157       }      
158       vqgen_unquantize(&v,&q);
159
160       /* bias */
161       i=0;
162       for(j=0;j<entries;j++){
163         line=rline(in,out,0);
164         sscanf(line,"%lf",&a);
165         v.bias[i++]=a;
166       }
167       
168       v.seeded=1;
169       {
170         double *b=alloca((dim+vqext_aux)*sizeof(double));
171         i=0;
172         while(1){
173           for(k=0;k<dim+vqext_aux;k++){
174             line=rline(in,out,0);
175             if(!line)break;
176             sscanf(line,"%lf",b+k);
177           }
178           if(feof(in))break;
179           vqgen_addpoint(&v,b,b+dim);
180         }
181       }
182       
183       fclose(in);
184     }
185   }
186   
187   /* get the rest... */
188   argv=argv++;
189   while(*argv){
190     if(argv[0][0]=='-'){
191       /* it's an option */
192       if(!argv[1]){
193         fprintf(stderr,"Option %s missing argument.\n",argv[0]);
194         exit(1);
195       }
196       switch(argv[0][1]){
197       case 'p':
198         if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
199           goto syner;
200         break;
201       case 's':
202         if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
203           num= -1;
204           if(sscanf(argv[1],"%d",&start)!=1)
205             goto syner;
206         }
207         break;
208       case 'e':
209         if(sscanf(argv[1],"%lf",&desired)!=1)
210           goto syner;
211         break;
212       case 'd':
213         if(sscanf(argv[1],"%lf",&mindist)!=1)
214           goto syner;
215         if(init)v.mindist=mindist;
216         break;
217       case 'i':
218         if(sscanf(argv[1],"%d",&iter)!=1)
219           goto syner;
220         break;
221       case 'b':
222         biasp=0;
223         break;
224       case 'c':
225         centroid=1;
226         break;
227       default:
228         fprintf(stderr,"Unknown option %s\n",argv[0]);
229         exit(1);
230       }
231       argv+=2;
232     }else{
233       /* it's an input file */
234       char *file=strdup(*argv++);
235       FILE *in;
236       int cols=-1;
237
238       if(!init){
239         if(dim==-1 || entries==-1 || q.quant==-1){
240           fprintf(stderr,"-p required when training a new set\n");
241           exit(1);
242         }
243         vqgen_init(&v,dim,vqext_aux,entries,mindist,
244                    vqext_metric,vqext_weight,centroid);
245         init=1;
246       }
247
248       in=fopen(file,"r");
249       if(in==NULL){
250         fprintf(stderr,"Could not open input file %s\n",file);
251         exit(1);
252       }
253       fprintf(out,"# training file entry: %s\n",file);
254
255       while((line=rline(in,out,0))){
256         if(cols==-1){
257           char *temp=line;
258           while(*temp==' ')temp++;
259           for(cols=0;*temp;cols++){
260             while(*temp>32)temp++;
261             while(*temp==' ')temp++;
262           }
263         }
264         {
265           int i;
266           double b[cols];
267           if(start+num*dim>cols){
268             fprintf(stderr,"ran out of columns reading %s\n",file);
269             exit(1);
270           }
271           while(*line==' ')line++;
272           for(i=0;i<cols;i++){
273
274             /* static length buffer bug workaround */
275             char *temp=line;
276             char old;
277             while(*temp>32)temp++;
278
279             old=temp[0];
280             temp[0]='\0';
281             b[i]=atof(line);
282             temp[0]=old;
283             
284             while(*line>32)line++;
285             while(*line==' ')line++;
286           }
287           if(num<=0)num=(cols-start)/dim;
288           for(i=0;i<num;i++)
289             vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);
290
291         }
292       }
293       fclose(in);
294     }
295   }
296
297   if(!init){
298     fprintf(stderr,"No input files!\n");
299     exit(1);
300   }
301
302   vqext_preprocess(&v);
303
304   /* train the book */
305   signal(SIGTERM,setexit);
306   signal(SIGINT,setexit);
307
308   for(i=0;i<iter && !exiting;i++){
309     double result;
310     if(i!=0){
311       vqgen_unquantize(&v,&q);
312       vqgen_cellmetric(&v);
313     }
314     result=vqgen_iterate(&v,biasp);
315     vqext_quantize(&v,&q);
316     if(result<desired)break;
317   }
318
319   /* save the book */
320
321   fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
322   fprintf(out,"%s\n",vqext_booktype);
323   fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
324   fprintf(out,"%ld %ld %d %d\n",
325           q.min,q.delta,q.quant,q.sequencep);
326
327   /* quantized entries */
328   fprintf(out,"# quantized entries---\n");
329   i=0;
330   for(j=0;j<entries;j++)
331     for(k=0;k<dim;k++)
332       fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
333   
334   fprintf(out,"# biases---\n");
335   i=0;
336   for(j=0;j<entries;j++)
337     fprintf(out,"%f\n",v.bias[i++]);
338
339   /* we may have done the density limiting mesh trick; refetch the
340      training points from the temp file */
341
342   rewind(v.asciipoints);
343   fprintf(out,"# points---\n");
344   {
345     /* sloppy, no error handling */
346     long bytes;
347     char buff[4096];
348     while((bytes=fread(buff,1,4096,v.asciipoints)))
349       while(bytes)bytes-=fwrite(buff,1,bytes,out);
350   }
351
352   fclose(out);
353   fclose(v.asciipoints);
354
355   vqgen_unquantize(&v,&q);
356   vqgen_cellmetric(&v);
357   exit(0);
358
359   syner:
360     fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
361     exit(1);
362 }