Enhancements (added cell spacing metrics and minimum cell distances to trainer)
[platform/upstream/libvorbis.git] / vq / train.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
7  *                                                                  *
8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
9  * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
10  * http://www.xiph.org/                                             *
11  *                                                                  *
12  ********************************************************************
13
14  function: utility main for training codebooks
15  last mod: $Id: train.c,v 1.15 2000/02/16 16:18:38 xiphmont Exp $
16
17  ********************************************************************/
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <math.h>
22 #include <string.h>
23 #include <errno.h>
24 #include <signal.h>
25 #include "vqgen.h"
26 #include "vqext.h"
27 #include "bookutil.h"
28
29 static char *rline(FILE *in,FILE *out,int pass){
30   while(1){
31     char *line=get_line(in);
32     if(line && line[0]=='#'){
33       if(pass)fprintf(out,"%s\n",line);
34     }else{
35       return(line);
36     }
37   }
38 }
39
40 /* command line:
41    trainvq  vqfile [options] trainfile [trainfile]
42
43    options: -params     entries,dim,quant
44             -subvector  start[,num]
45             -error      desired_error
46             -iterations iterations
47 */
48
49 static void usage(void){
50   fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
51           "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
52           "options: -p[arams]     <entries,dim,quant>\n"
53           "         -s[ubvector]  <start[,num]>\n"
54           "         -e[rror]      <desired_error>\n"
55           "         -i[terations] <maxiterations>\n\n"
56           "examples:\n"
57           "   train a new codebook to 1%% tolerance on datafile 'foo':\n"
58           "      xxxvqtrain book -p 256,6,8 -e .01 foo\n"
59           "      (produces a trained set in book-0.vqi)\n\n"
60           "   continue training 'book-0.vqi' (produces book-1.vqi):\n"
61           "      xxxvqtrain book-0.vqi\n\n"
62           "   add subvector from element 1 to <dimension> from files\n"
63           "      data*.m to the training in progress, prodicing book-1.vqi:\n"
64           "      xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
65 }
66
67 int exiting=0;
68 void setexit(int dummy){
69   fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
70   exiting=1;
71 }
72
73 int main(int argc,char *argv[]){
74   vqgen v;
75
76   int entries=-1,dim=-1;
77   int start=0,num=-1;
78   double desired=.05;
79   int iter=1000;
80
81   FILE *out=NULL;
82   char *line;
83   long i,j,k;
84   int init=0;
85   q.quant=-1;
86
87   argv++;
88   if(!*argv){
89     usage();
90     exit(0);
91   }
92
93   /* get the book name, a preexisting book to continue training */
94   {
95     FILE *in=NULL;
96     char *filename=alloca(strlen(*argv)+30),*ptr;
97
98     strcpy(filename,*argv);
99     in=fopen(filename,"r");
100     ptr=strrchr(filename,'-');
101     if(ptr){
102       int num;
103       ptr++;
104       num=atoi(ptr);
105       sprintf(ptr,"%d.vqi",num+1);
106     }else
107       strcat(filename,"-0.vqi");
108     
109     out=fopen(filename,"w");
110     if(out==NULL){
111       fprintf(stderr,"Unable to open %s for writing\n",filename);
112       exit(1);
113     }
114     
115     if(in){
116       /* we wish to suck in a preexisting book and continue to train it */
117       double a;
118       
119       line=rline(in,out,1);
120       if(strcmp(line,vqext_booktype)){
121         fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
122         exit(1);
123       } 
124       
125       line=rline(in,out,1);
126       if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
127         fprintf(stderr,"Syntax error reading book file\n");
128         exit(1);
129       }
130       
131       vqgen_init(&v,dim,vqext_aux,entries,vqext_mindist,
132                  vqext_metric,vqext_weight);
133       init=1;
134       
135       /* quant setup */
136       line=rline(in,out,1);
137       if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
138                 &q.quant,&q.sequencep)!=4){
139         fprintf(stderr,"Syntax error reading book file\n");
140         exit(1);
141       }
142       
143       /* quantized entries */
144       i=0;
145       for(j=0;j<entries;j++){
146         for(k=0;k<dim;k++){
147           line=rline(in,out,0);
148           sscanf(line,"%lf",&a);
149           v.entrylist[i++]=a;
150         }
151       }      
152       vqgen_unquantize(&v,&q);
153
154       /* bias, points */
155       i=0;
156       for(j=0;j<entries;j++){
157         line=rline(in,out,0);
158         sscanf(line,"%lf",&a);
159         v.bias[i++]=a;
160       }
161       
162       {
163         double *b=alloca((dim+vqext_aux)*sizeof(double));
164         i=0;
165         v.entries=0; /* hack to avoid reseeding */
166         while(1){
167           for(k=0;k<dim+vqext_aux;k++){
168             line=rline(in,out,0);
169             if(!line)break;
170             sscanf(line,"%lf",b+k);
171           }
172           if(feof(in))break;
173           vqgen_addpoint(&v,b,b+dim);
174         }
175         v.entries=entries;
176       }
177       
178       fclose(in);
179     }
180   }
181   
182   /* get the rest... */
183   argv=argv++;
184   while(*argv){
185     if(argv[0][0]=='-'){
186       /* it's an option */
187       if(!argv[1]){
188         fprintf(stderr,"Option %s missing argument.\n",argv[0]);
189         exit(1);
190       }
191       switch(argv[0][1]){
192       case 'p':
193         if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
194           goto syner;
195         break;
196       case 's':
197         if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
198           num= -1;
199           if(sscanf(argv[1],"%d",&start)!=1)
200             goto syner;
201         }
202         break;
203       case 'e':
204         if(sscanf(argv[1],"%lf",&desired)!=1)
205           goto syner;
206         break;
207       case 'i':
208         if(sscanf(argv[1],"%d",&iter)!=1)
209           goto syner;
210         break;
211       default:
212         fprintf(stderr,"Unknown option %s\n",argv[0]);
213         exit(1);
214       }
215       argv+=2;
216     }else{
217       /* it's an input file */
218       char *file=strdup(*argv++);
219       FILE *in;
220       int cols=-1;
221
222       if(!init){
223         if(dim==-1 || entries==-1 || q.quant==-1){
224           fprintf(stderr,"-p required when training a new set\n");
225           exit(1);
226         }
227         vqgen_init(&v,dim,vqext_aux,entries,vqext_mindist,
228                    vqext_metric,vqext_weight);
229         init=1;
230       }
231
232       in=fopen(file,"r");
233       if(in==NULL){
234         fprintf(stderr,"Could not open input file %s\n",file);
235         exit(1);
236       }
237       fprintf(out,"# training file entry: %s\n",file);
238
239       while((line=rline(in,out,0))){
240         if(cols==-1){
241           char *temp=line;
242           while(*temp==' ')temp++;
243           for(cols=0;*temp;cols++){
244             while(*temp>32)temp++;
245             while(*temp==' ')temp++;
246           }
247         }
248         {
249           int i;
250           double b[cols];
251           if(start+num*dim>cols){
252             fprintf(stderr,"ran out of columns reading %s\n",file);
253             exit(1);
254           }
255           while(*line==' ')line++;
256           for(i=0;i<cols;i++){
257
258             /* static length buffer bug workaround */
259             char *temp=line;
260             char old;
261             while(*temp>32)temp++;
262
263             old=temp[0];
264             temp[0]='\0';
265             b[i]=atof(line);
266             temp[0]=old;
267             
268             while(*line>32)line++;
269             while(*line==' ')line++;
270           }
271           if(num<=0)num=(cols-start)/dim;
272           for(i=0;i<num;i++)
273             vqext_addpoint_adj(&v,b,start+i*dim,dim,cols);
274
275         }
276       }
277       fclose(in);
278     }
279   }
280
281   if(!init){
282     fprintf(stderr,"No input files!\n");
283     exit(1);
284   }
285
286   vqext_preprocess(&v);
287
288   /* train the book */
289   signal(SIGTERM,setexit);
290   signal(SIGINT,setexit);
291
292   for(i=0;i<iter && !exiting;i++){
293     double result;
294     if(i!=0){
295       vqgen_unquantize(&v,&q);
296       vqgen_cellmetric(&v);
297     }
298     result=vqgen_iterate(&v);
299     vqext_quantize(&v,&q);
300     if(result<desired)break;
301   }
302
303   /* save the book */
304
305   fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
306   fprintf(out,"%s\n",vqext_booktype);
307   fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
308   fprintf(out,"%ld %ld %d %d\n",q.min,q.delta,q.quant,q.sequencep);
309
310   /* quantized entries */
311   fprintf(out,"# quantized entries---\n");
312   i=0;
313   for(j=0;j<entries;j++)
314     for(k=0;k<dim;k++)
315       fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
316   
317   fprintf(out,"# biases---\n");
318   i=0;
319   for(j=0;j<entries;j++)
320     fprintf(out,"%f\n",v.bias[i++]);
321
322   fprintf(out,"# points---\n");
323   i=0;
324   for(j=0;j<v.points;j++)
325     for(k=0;k<dim+vqext_aux;k++)
326       fprintf(out,"%f\n",v.pointlist[i++]);
327
328   fclose(out);
329
330   vqgen_unquantize(&v,&q);
331   vqgen_cellmetric(&v);
332   exit(0);
333
334   syner:
335     fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
336     exit(1);
337 }