Allow cascade and metric utilities to take more than one codebook
[platform/upstream/libvorbis.git] / vq / train.c
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5  * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
6  * PLEASE READ THESE TERMS DISTRIBUTING.                            *
7  *                                                                  *
8  * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
9  * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
10  * http://www.xiph.org/                                             *
11  *                                                                  *
12  ********************************************************************
13
14  function: utility main for training codebooks
15  last mod: $Id: train.c,v 1.13 2000/01/05 15:05:00 xiphmont Exp $
16
17  ********************************************************************/
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <math.h>
22 #include <string.h>
23 #include <errno.h>
24 #include <signal.h>
25 #include "vqgen.h"
26 #include "vqext.h"
27 #include "bookutil.h"
28
29 static char *rline(FILE *in,FILE *out,int pass){
30   while(1){
31     char *line=get_line(in);
32     if(line && line[0]=='#'){
33       if(pass)fprintf(out,"%s\n",line);
34     }else{
35       return(line);
36     }
37   }
38 }
39
40 /* command line:
41    trainvq  vqfile [options] trainfile [trainfile]
42
43    options: -params     entries,dim,quant
44             -subvector  start[,num]
45             -error      desired_error
46             -iterations iterations
47 */
48
49 static void usage(void){
50   fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
51           "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
52           "options: -p[arams]     <entries,dim,quant>\n"
53           "         -s[ubvector]  <start[,num]>\n"
54           "         -e[rror]      <desired_error>\n"
55           "         -i[terations] <maxiterations>\n\n"
56           "examples:\n"
57           "   train a new codebook to 1%% tolerance on datafile 'foo':\n"
58           "      xxxvqtrain book -p 256,6,8 -e .01 foo\n"
59           "      (produces a trained set in book-0.vqi)\n\n"
60           "   continue training 'book-0.vqi' (produces book-1.vqi):\n"
61           "      xxxvqtrain book-0.vqi\n\n"
62           "   add subvector from element 1 to <dimension> from files\n"
63           "      data*.m to the training in progress, prodicing book-1.vqi:\n"
64           "      xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
65 }
66
67 int exiting=0;
68 void setexit(int dummy){
69   fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
70   exiting=1;
71 }
72
73 int main(int argc,char *argv[]){
74   vqgen v;
75
76   int entries=-1,dim=-1;
77   int start=0,num=-1;
78   double desired=.05;
79   int iter=1000;
80
81   FILE *out=NULL;
82   char *line;
83   long i,j,k;
84   int init=0;
85   q.quant=-1;
86
87   argv++;
88   if(!*argv){
89     usage();
90     exit(0);
91   }
92
93   /* get the book name, a preexisting book to continue training */
94   {
95     FILE *in=NULL;
96     char *filename=alloca(strlen(*argv)+30),*ptr;
97
98     strcpy(filename,*argv);
99     in=fopen(filename,"r");
100     ptr=strrchr(filename,'-');
101     if(ptr){
102       int num;
103       ptr++;
104       num=atoi(ptr);
105       sprintf(ptr,"%d.vqi",num+1);
106     }else
107       strcat(filename,"-0.vqi");
108     
109     out=fopen(filename,"w");
110     if(out==NULL){
111       fprintf(stderr,"Unable to open %s for writing\n",filename);
112       exit(1);
113     }
114     
115     if(in){
116       /* we wish to suck in a preexisting book and continue to train it */
117       double a;
118       
119       line=rline(in,out,1);
120       if(strcmp(line,vqext_booktype)){
121         fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
122         exit(1);
123       } 
124       
125       line=rline(in,out,1);
126       if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
127         fprintf(stderr,"Syntax error reading book file\n");
128         exit(1);
129       }
130       
131       vqgen_init(&v,dim,vqext_aux,entries,vqext_metric,vqext_weight);
132       init=1;
133       
134       /* quant setup */
135       line=rline(in,out,1);
136       if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
137                 &q.quant,&q.sequencep)!=4){
138         fprintf(stderr,"Syntax error reading book file\n");
139         exit(1);
140       }
141       
142       /* quantized entries */
143       i=0;
144       for(j=0;j<entries;j++){
145         for(k=0;k<dim;k++){
146           line=rline(in,out,0);
147           sscanf(line,"%lf",&a);
148           v.entrylist[i++]=a;
149         }
150       }      
151       vqgen_unquantize(&v,&q);
152
153       /* bias, points */
154       i=0;
155       for(j=0;j<entries;j++){
156         line=rline(in,out,0);
157         sscanf(line,"%lf",&a);
158         v.bias[i++]=a;
159       }
160       
161       {
162         double *b=alloca((dim+vqext_aux)*sizeof(double));
163         i=0;
164         v.entries=0; /* hack to avoid reseeding */
165         while(1){
166           for(k=0;k<dim+vqext_aux;k++){
167             line=rline(in,out,0);
168             if(!line)break;
169             sscanf(line,"%lf",b+k);
170           }
171           if(feof(in))break;
172           vqgen_addpoint(&v,b,b+dim);
173         }
174         v.entries=entries;
175       }
176       
177       fclose(in);
178     }
179   }
180   
181   /* get the rest... */
182   argv=argv++;
183   while(*argv){
184     if(argv[0][0]=='-'){
185       /* it's an option */
186       if(!argv[1]){
187         fprintf(stderr,"Option %s missing argument.\n",argv[0]);
188         exit(1);
189       }
190       switch(argv[0][1]){
191       case 'p':
192         if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
193           goto syner;
194         break;
195       case 's':
196         if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
197           num= -1;
198           if(sscanf(argv[1],"%d",&start)!=1)
199             goto syner;
200         }
201         break;
202       case 'e':
203         if(sscanf(argv[1],"%lf",&desired)!=1)
204           goto syner;
205         break;
206       case 'i':
207         if(sscanf(argv[1],"%d",&iter)!=1)
208           goto syner;
209         break;
210       default:
211         fprintf(stderr,"Unknown option %s\n",argv[0]);
212         exit(1);
213       }
214       argv+=2;
215     }else{
216       /* it's an input file */
217       char *file=strdup(*argv++);
218       FILE *in;
219       int cols=-1;
220
221       if(!init){
222         if(dim==-1 || entries==-1 || q.quant==-1){
223           fprintf(stderr,"-p required when training a new set\n");
224           exit(1);
225         }
226         vqgen_init(&v,dim,vqext_aux,entries,vqext_metric,vqext_weight);
227         init=1;
228       }
229
230       in=fopen(file,"r");
231       if(in==NULL){
232         fprintf(stderr,"Could not open input file %s\n",file);
233         exit(1);
234       }
235       fprintf(out,"# training file entry: %s\n",file);
236
237       while((line=rline(in,out,0))){
238         if(cols==-1){
239           char *temp=line;
240           while(*temp==' ')temp++;
241           for(cols=0;*temp;cols++){
242             while(*temp>32)temp++;
243             while(*temp==' ')temp++;
244           }
245         }
246         {
247           int i;
248           double b[cols];
249           if(start*num+dim>cols){
250             fprintf(stderr,"ran out of columns reading %s\n",file);
251             exit(1);
252           }
253           while(*line==' ')line++;
254           for(i=0;i<cols;i++){
255
256             /* static length buffer bug workaround */
257             char *temp=line;
258             char old;
259             while(*temp>32)temp++;
260
261             old=temp[0];
262             temp[0]='\0';
263             b[i]=atof(line);
264             temp[0]=old;
265             
266             while(*line>32)line++;
267             while(*line==' ')line++;
268           }
269           if(num<=0)num=(cols-start)/dim;
270           for(i=0;i<num;i++)
271             vqext_addpoint_adj(&v,b,start+i*dim,dim,cols);
272
273         }
274       }
275       fclose(in);
276     }
277   }
278
279   if(!init){
280     fprintf(stderr,"No input files!\n");
281     exit(1);
282   }
283
284   vqext_preprocess(&v);
285
286   /* train the book */
287   signal(SIGTERM,setexit);
288   signal(SIGINT,setexit);
289
290   for(i=0;i<iter && !exiting;i++){
291     double result;
292     if(i!=0)vqgen_unquantize(&v,&q);
293     result=vqgen_iterate(&v);
294     vqgen_quantize(&v,&q);
295     if(result<desired)break;
296   }
297
298   /* save the book */
299
300   fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
301   fprintf(out,"%s\n",vqext_booktype);
302   fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
303   fprintf(out,"%ld %ld %d %d\n",q.min,q.delta,q.quant,q.sequencep);
304
305   /* quantized entries */
306   fprintf(out,"# quantized entries---\n");
307   i=0;
308   for(j=0;j<entries;j++)
309     for(k=0;k<dim;k++)
310       fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
311   
312   fprintf(out,"# biases---\n");
313   i=0;
314   for(j=0;j<entries;j++)
315     fprintf(out,"%f\n",v.bias[i++]);
316
317   fprintf(out,"# points---\n");
318   i=0;
319   for(j=0;j<v.points;j++)
320     for(k=0;k<dim+vqext_aux;k++)
321       fprintf(out,"%f\n",v.pointlist[i++]);
322
323   fclose(out);
324   exit(0);
325
326   syner:
327     fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
328     exit(1);
329 }