1 /********************************************************************
3 * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
5 * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
6 * PLEASE READ THESE TERMS DISTRIBUTING. *
8 * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
9 * by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
10 * http://www.xiph.org/ *
12 ********************************************************************
14 function: utility main for training codebooks
15 last mod: $Id: train.c,v 1.18 2000/06/14 01:38:32 xiphmont Exp $
17 ********************************************************************/
29 static char *rline(FILE *in,FILE *out,int pass){
31 char *line=get_line(in);
32 if(line && line[0]=='#'){
33 if(pass)fprintf(out,"%s\n",line);
41 trainvq vqfile [options] trainfile [trainfile]
43 options: -params entries,dim,quant
44 -subvector start[,num]
46 -iterations iterations
49 static void usage(void){
50 fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
51 "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
52 "options: -p[arams] <entries,dim,quant>\n"
53 " -s[ubvector] <start[,num]>\n"
54 " -e[rror] <desired_error>\n"
55 " -i[terations] <maxiterations>\n"
56 " -d[istance] quantization mesh spacing for density limitation\n"
57 " -b <dummy> eliminate cell size biasing; use normal LBG\n\n"
58 " -c <dummy> Use centroid (not median) midpoints\n"
61 " train a new codebook to 1%% tolerance on datafile 'foo':\n"
62 " xxxvqtrain book -p 256,6,8 -e .01 foo\n"
63 " (produces a trained set in book-0.vqi)\n\n"
64 " continue training 'book-0.vqi' (produces book-1.vqi):\n"
65 " xxxvqtrain book-0.vqi\n\n"
66 " add subvector from element 1 to <dimension> from files\n"
67 " data*.m to the training in progress, prodicing book-1.vqi:\n"
68 " xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
72 void setexit(int dummy){
73 fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
77 int main(int argc,char *argv[]){
80 int entries=-1,dim=-1;
82 double desired=.05,mindist=0.;
99 /* get the book name, a preexisting book to continue training */
102 char *filename=alloca(strlen(*argv)+30),*ptr;
104 strcpy(filename,*argv);
105 in=fopen(filename,"r");
106 ptr=strrchr(filename,'-');
111 sprintf(ptr,"%d.vqi",num+1);
113 strcat(filename,"-0.vqi");
115 out=fopen(filename,"w");
117 fprintf(stderr,"Unable to open %s for writing\n",filename);
122 /* we wish to suck in a preexisting book and continue to train it */
125 line=rline(in,out,1);
126 if(strcmp(line,vqext_booktype)){
127 fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
131 line=rline(in,out,1);
132 if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
133 fprintf(stderr,"Syntax error reading book file\n");
137 vqgen_init(&v,dim,vqext_aux,entries,mindist,
138 vqext_metric,vqext_weight,centroid);
142 line=rline(in,out,1);
143 if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
144 &q.quant,&q.sequencep)!=4){
145 fprintf(stderr,"Syntax error reading book file\n");
149 /* quantized entries */
151 for(j=0;j<entries;j++){
153 line=rline(in,out,0);
154 sscanf(line,"%lf",&a);
158 vqgen_unquantize(&v,&q);
162 for(j=0;j<entries;j++){
163 line=rline(in,out,0);
164 sscanf(line,"%lf",&a);
170 double *b=alloca((dim+vqext_aux)*sizeof(double));
173 for(k=0;k<dim+vqext_aux;k++){
174 line=rline(in,out,0);
176 sscanf(line,"%lf",b+k);
179 vqgen_addpoint(&v,b,b+dim);
187 /* get the rest... */
193 fprintf(stderr,"Option %s missing argument.\n",argv[0]);
198 if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
202 if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
204 if(sscanf(argv[1],"%d",&start)!=1)
209 if(sscanf(argv[1],"%lf",&desired)!=1)
213 if(sscanf(argv[1],"%lf",&mindist)!=1)
215 if(init)v.mindist=mindist;
218 if(sscanf(argv[1],"%d",&iter)!=1)
228 fprintf(stderr,"Unknown option %s\n",argv[0]);
233 /* it's an input file */
234 char *file=strdup(*argv++);
239 if(dim==-1 || entries==-1 || q.quant==-1){
240 fprintf(stderr,"-p required when training a new set\n");
243 vqgen_init(&v,dim,vqext_aux,entries,mindist,
244 vqext_metric,vqext_weight,centroid);
250 fprintf(stderr,"Could not open input file %s\n",file);
253 fprintf(out,"# training file entry: %s\n",file);
255 while((line=rline(in,out,0))){
258 while(*temp==' ')temp++;
259 for(cols=0;*temp;cols++){
260 while(*temp>32)temp++;
261 while(*temp==' ')temp++;
267 if(start+num*dim>cols){
268 fprintf(stderr,"ran out of columns reading %s\n",file);
271 while(*line==' ')line++;
274 /* static length buffer bug workaround */
277 while(*temp>32)temp++;
284 while(*line>32)line++;
285 while(*line==' ')line++;
287 if(num<=0)num=(cols-start)/dim;
289 vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);
298 fprintf(stderr,"No input files!\n");
302 vqext_preprocess(&v);
305 signal(SIGTERM,setexit);
306 signal(SIGINT,setexit);
308 for(i=0;i<iter && !exiting;i++){
311 vqgen_unquantize(&v,&q);
312 vqgen_cellmetric(&v);
314 result=vqgen_iterate(&v,biasp);
315 vqext_quantize(&v,&q);
316 if(result<desired)break;
321 fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
322 fprintf(out,"%s\n",vqext_booktype);
323 fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
324 fprintf(out,"%ld %ld %d %d\n",
325 q.min,q.delta,q.quant,q.sequencep);
327 /* quantized entries */
328 fprintf(out,"# quantized entries---\n");
330 for(j=0;j<entries;j++)
332 fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
334 fprintf(out,"# biases---\n");
336 for(j=0;j<entries;j++)
337 fprintf(out,"%f\n",v.bias[i++]);
339 /* we may have done the density limiting mesh trick; refetch the
340 training points from the temp file */
342 rewind(v.asciipoints);
343 fprintf(out,"# points---\n");
345 /* sloppy, no error handling */
348 while((bytes=fread(buff,1,4096,v.asciipoints)))
349 while(bytes)bytes-=fwrite(buff,1,bytes,out);
353 fclose(v.asciipoints);
355 vqgen_unquantize(&v,&q);
356 vqgen_cellmetric(&v);
360 fprintf(stderr,"Syntax error in argument '%s'\n",*argv);