1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
11 ********************************************************************
13 function: example encoder application; makes an Ogg Theora/Vorbis
14 file from YUV4MPEG2 and WAV input
15 last mod: $Id: encoder_example.c 16517 2009-08-25 19:48:57Z giles $
17 ********************************************************************/
19 #if !defined(_REENTRANT)
22 #if !defined(_GNU_SOURCE)
25 #if !defined(_LARGEFILE_SOURCE)
26 #define _LARGEFILE_SOURCE
28 #if !defined(_LARGEFILE64_SOURCE)
29 #define _LARGEFILE64_SOURCE
31 #if !defined(_FILE_OFFSET_BITS)
32 #define _FILE_OFFSET_BITS 64
46 #include "theora/theoraenc.h"
47 #include "vorbis/codec.h"
48 #include "vorbis/vorbisenc.h"
51 /*supply missing headers and functions to Win32. going to hell, I know*/
55 static double rint(double x)
58 return (double)(int)(x - 0.5);
60 return (double)(int)(x + 0.5);
64 const char *optstring = "b:e:o:a:A:v:V:s:S:f:F:ck:d:z:\1\2\3\4";
65 struct option options [] = {
66 {"begin-time",required_argument,NULL,'b'},
67 {"end-time",required_argument,NULL,'e'},
68 {"output",required_argument,NULL,'o'},
69 {"audio-rate-target",required_argument,NULL,'A'},
70 {"video-rate-target",required_argument,NULL,'V'},
71 {"audio-quality",required_argument,NULL,'a'},
72 {"video-quality",required_argument,NULL,'v'},
73 {"aspect-numerator",required_argument,NULL,'s'},
74 {"aspect-denominator",required_argument,NULL,'S'},
75 {"framerate-numerator",required_argument,NULL,'f'},
76 {"framerate-denominator",required_argument,NULL,'F'},
77 {"vp3-compatible",no_argument,NULL,'c'},
78 {"speed",required_argument,NULL,'z'},
79 {"soft-target",no_argument,NULL,'\1'},
80 {"keyframe-freq",required_argument,NULL,'k'},
81 {"buf-delay",required_argument,NULL,'d'},
82 {"two-pass",no_argument,NULL,'\2'},
83 {"first-pass",required_argument,NULL,'\3'},
84 {"second-pass",required_argument,NULL,'\4'},
88 /* You'll go to Hell for using globals. */
115 char chroma_type[16];
117 /*The size of each converted frame buffer.*/
118 size_t y4m_dst_buf_sz;
119 /*The amount to read directly into the converted frame buffer.*/
120 size_t y4m_dst_buf_read_sz;
121 /*The size of the auxilliary buffer.*/
122 size_t y4m_aux_buf_sz;
123 /*The amount to read into the auxilliary buffer.*/
124 size_t y4m_aux_buf_read_sz;
126 /*The function used to perform chroma conversion.*/
127 typedef void (*y4m_convert_func)(unsigned char *_dst,unsigned char *_aux);
129 y4m_convert_func y4m_convert=NULL;
133 ogg_uint32_t keyframe_frequency=0;
141 static void usage(void){
143 "Usage: encoder_example [options] [audio_file] video_file\n\n"
145 " -o --output <filename.ogv> file name for encoded output;\n"
146 " If this option is not given, the\n"
147 " compressed data is sent to stdout.\n\n"
148 " -A --audio-rate-target <n> bitrate target for Vorbis audio;\n"
149 " use -a and not -A if at all possible,\n"
150 " as -a gives higher quality for a given\n"
152 " -V --video-rate-target <n> bitrate target for Theora video\n\n"
153 " --soft-target Use a large reservoir and treat the rate\n"
154 " as a soft target; rate control is less\n"
155 " strict but resulting quality is usually\n"
156 " higher/smoother overall. Soft target also\n"
157 " allows an optional -v setting to specify\n"
158 " a minimum allowed quality.\n\n"
159 " --two-pass Compress input using two-pass rate control\n"
160 " This option requires that the input to the\n"
161 " to the encoder is seekable and performs\n"
162 " both passes automatically.\n\n"
163 " --first-pass <filename> Perform first-pass of a two-pass rate\n"
164 " controlled encoding, saving pass data to\n"
165 " <filename> for a later second pass\n\n"
166 " --second-pass <filename> Perform second-pass of a two-pass rate\n"
167 " controlled encoding, reading first-pass\n"
168 " data from <filename>. The first pass\n"
169 " data must come from a first encoding pass\n"
170 " using identical input video to work\n"
172 " -a --audio-quality <n> Vorbis quality selector from -1 to 10\n"
173 " (-1 yields smallest files but lowest\n"
174 " fidelity; 10 yields highest fidelity\n"
175 " but large files. '2' is a reasonable\n"
177 " -v --video-quality <n> Theora quality selector from 0 to 10\n"
178 " (0 yields smallest files but lowest\n"
179 " video quality. 10 yields highest\n"
180 " fidelity but large files).\n\n"
181 " -s --aspect-numerator <n> Aspect ratio numerator, default is 0\n"
182 " or extracted from YUV input file\n"
183 " -S --aspect-denominator <n> Aspect ratio denominator, default is 0\n"
184 " or extracted from YUV input file\n"
185 " -f --framerate-numerator <n> Frame rate numerator, can be extracted\n"
186 " from YUV input file. ex: 30000000\n"
187 " -F --framerate-denominator <n> Frame rate denominator, can be extracted\n"
188 " from YUV input file. ex: 1000000\n"
189 " The frame rate nominator divided by this\n"
190 " determinates the frame rate in units per tick\n"
191 " -k --keyframe-freq <n> Keyframe frequency\n"
192 " -z --speed <n> Sets the encoder speed level. Higher speed\n"
193 " levels favor quicker encoding over better\n"
194 " quality per bit. Depending on the encoding\n"
195 " mode, and the internal algorithms used,\n"
196 " quality may actually improve with higher\n"
197 " speeds, but in this case bitrate will also\n"
198 " likely increase. The maximum value, and the\n"
199 " meaning of each value, are implementation-\n"
200 " specific and may change depending on the\n"
201 " current encoding mode (rate constrained,\n"
202 " two-pass, etc.).\n"
203 " -d --buf-delay <n> Buffer delay (in frames). Longer delays\n"
204 " allow smoother rate adaptation and provide\n"
205 " better overall quality, but require more\n"
206 " client side buffering and add latency. The\n"
207 " default value is the keyframe interval for\n"
208 " one-pass encoding (or somewhat larger if\n"
209 " --soft-target is used) and infinite for\n"
210 " two-pass encoding.\n"
211 " -b --begin-time <h:m:s.d> Begin encoding at offset into input\n"
212 " -e --end-time <h:m:s.d> End encoding at offset into input\n"
213 "encoder_example accepts only uncompressed RIFF WAV format audio and\n"
214 "YUV4MPEG2 uncompressed video.\n\n");
218 static int y4m_parse_tags(char *_tags){
231 got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
233 /*Skip any leading spaces.*/
235 /*If that's all we have, stop.*/
237 /*Find the end of this tag.*/
238 for(q=p+1;*q!='\0'&&*q!=' ';q++);
242 if(sscanf(p+1,"%d",&pic_w)!=1)return -1;
246 if(sscanf(p+1,"%d",&pic_h)!=1)return -1;
250 if(sscanf(p+1,"%d:%d",&tmp_video_fps_n,&tmp_video_fps_d)!=2)return -1;
258 if(sscanf(p+1,"%d:%d",&tmp_video_par_n,&tmp_video_par_d)!=2)return -1;
263 memcpy(chroma_type,p+1,q-p-1);
264 chroma_type[q-p-1]='\0';
267 /*Ignore unknown tags.*/
270 if(!got_w||!got_h||!got_fps||!got_interlace||!got_par)return -1;
271 /*Chroma-type is not specified in older files, e.g., those generated by
273 if(!got_chroma)strcpy(chroma_type,"420");
274 /*Update fps and aspect ratio globals if not specified in the command line.*/
275 if(video_fps_n==-1)video_fps_n=tmp_video_fps_n;
276 if(video_fps_d==-1)video_fps_d=tmp_video_fps_d;
277 if(video_par_n==-1)video_par_n=tmp_video_par_n;
278 if(video_par_d==-1)video_par_d=tmp_video_par_d;
282 /*All anti-aliasing filters in the following conversion functions are based on
283 one of two window functions:
284 The 6-tap Lanczos window (for down-sampling and shifts):
285 sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t)
287 The 4-tap Mitchell window (for up-sampling):
288 7|t|^3-12|t|^2+16/3, |t|<1
289 -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2
291 The number of taps is intentionally kept small to reduce computational
292 overhead and limit ringing.
294 The taps from these filters are scaled so that their sum is 1, and the result
295 is scaled by 128 and rounded to integers to create a filter whose
296 intermediate values fit inside 16 bits.
297 Coefficients are rounded in such a way as to ensure their sum is still 128,
298 which is usually equivalent to normal rounding.*/
300 #define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
301 #define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
302 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
304 /*420jpeg chroma samples are sited like:
305 Y-------Y-------Y-------Y-------
309 Y-------Y-------Y-------Y-------
313 Y-------Y-------Y-------Y-------
317 Y-------Y-------Y-------Y-------
322 420mpeg2 chroma samples are sited like:
323 Y-------Y-------Y-------Y-------
327 Y-------Y-------Y-------Y-------
331 Y-------Y-------Y-------Y-------
335 Y-------Y-------Y-------Y-------
340 We use a resampling filter to shift the site locations one quarter pixel (at
341 the chroma plane's resolution) to the right.
342 The 4:2:2 modes look exactly the same, except there are twice as many chroma
343 lines, and they are vertically co-sited with the luma samples in both the
344 mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
345 static void y4m_convert_42xmpeg2_42xjpeg(unsigned char *_dst,
346 unsigned char *_aux){
352 /*Skip past the luma data.*/
354 /*Compute the size of each chroma plane.*/
355 c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
356 c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
357 for(pli=1;pli<3;pli++){
359 /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
361 for(x=0;x<OC_MINI(c_w,2);x++){
362 _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[0]-17*_aux[OC_MAXI(x-1,0)]+
363 114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
364 _aux[OC_MINI(x+3,c_w-1)]+64>>7,255);
367 _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
368 114*_aux[x]+35*_aux[x+1]-9*_aux[x+2]+_aux[x+3]+64>>7,255);
371 _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
372 114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
373 _aux[c_w-1]+64>>7,255);
381 /*This format is only used for interlaced content, but is included for
384 420jpeg chroma samples are sited like:
385 Y-------Y-------Y-------Y-------
389 Y-------Y-------Y-------Y-------
393 Y-------Y-------Y-------Y-------
397 Y-------Y-------Y-------Y-------
402 420paldv chroma samples are sited like:
403 YR------Y-------YR------Y-------
407 YB------Y-------YB------Y-------
411 YR------Y-------YR------Y-------
415 YB------Y-------YB------Y-------
420 We use a resampling filter to shift the site locations one quarter pixel (at
421 the chroma plane's resolution) to the right.
422 Then we use another filter to move the C_r location down one quarter pixel,
423 and the C_b location up one quarter pixel.*/
424 static void y4m_convert_42xpaldv_42xjpeg(unsigned char *_dst,
425 unsigned char *_aux){
433 /*Skip past the luma data.*/
435 /*Compute the size of each chroma plane.*/
437 c_h=(pic_h+dst_c_dec_h-1)/dst_c_dec_h;
439 /*First do the horizontal re-sampling.
440 This is the same as the mpeg2 case, except that after the horizontal case,
441 we need to apply a second vertical filter.*/
443 for(pli=1;pli<3;pli++){
445 /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
447 for(x=0;x<OC_MINI(c_w,2);x++){
448 tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[0]-17*_aux[OC_MAXI(x-1,0)]+
449 114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
450 _aux[OC_MINI(x+3,c_w-1)]+64>>7,255);
453 tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
454 114*_aux[x]+35*_aux[x+1]-9*_aux[x+2]+_aux[x+3]+64>>7,255);
457 tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
458 114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
459 _aux[c_w-1]+64>>7,255);
467 /*Slide C_b up a quarter-pel.
468 This is the same filter used above, but in the other order.*/
470 for(y=0;y<OC_MINI(c_h,3);y++){
471 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[0]-
472 9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]+
473 114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]+
474 4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64>>7,255);
477 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[(y-3)*c_w]-
478 9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]-
479 17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64>>7,255);
482 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[(y-3)*c_w]-
483 9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]-
484 17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64>>7,255);
494 /*Slide C_r down a quarter-pel.
495 This is the same as the horizontal filter.*/
497 for(y=0;y<OC_MINI(c_h,2);y++){
498 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[0]-
499 17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]+
500 35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]+
501 tmp[OC_MINI(y+3,c_h-1)*c_w]+64>>7,255);
504 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[(y-2)*c_w]-
505 17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]-
506 9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64>>7,255);
509 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[(y-2)*c_w]-
510 17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]-
511 9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64>>7,255);
518 /*For actual interlaced material, this would have to be done separately on
519 each field, and the shift amounts would be different.
520 C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
521 C_b up 1/8 in the bottom field.
522 The corresponding filters would be:
523 Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
524 Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
528 /*422jpeg chroma samples are sited like:
529 Y---BR--Y-------Y---BR--Y-------
533 Y---BR--Y-------Y---BR--Y-------
537 Y---BR--Y-------Y---BR--Y-------
541 Y---BR--Y-------Y---BR--Y-------
546 411 chroma samples are sited like:
547 YBR-----Y-------Y-------Y-------
551 YBR-----Y-------Y-------Y-------
555 YBR-----Y-------Y-------Y-------
559 YBR-----Y-------Y-------Y-------
564 We use a filter to resample at site locations one eighth pixel (at the source
565 chroma plane's horizontal resolution) and five eighths of a pixel to the
567 static void y4m_convert_411_422jpeg(unsigned char *_dst,
568 unsigned char *_aux){
575 /*Skip past the luma data.*/
577 /*Compute the size of each chroma plane.*/
578 c_w=(pic_w+src_c_dec_h-1)/src_c_dec_h;
579 dst_c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
580 c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
581 for(pli=1;pli<3;pli++){
583 /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
584 4-tap Mitchell window.*/
585 for(x=0;x<OC_MINI(c_w,1);x++){
586 _dst[x<<1]=(unsigned char)OC_CLAMPI(0,111*_aux[0]+
587 18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64>>7,255);
588 _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,47*_aux[0]+
589 86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64>>7,255);
592 _dst[x<<1]=(unsigned char)OC_CLAMPI(0,_aux[x-1]+110*_aux[x]+
593 18*_aux[x+1]-_aux[x+2]+64>>7,255);
594 _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,-3*_aux[x-1]+50*_aux[x]+
595 86*_aux[x+1]-5*_aux[x+2]+64>>7,255);
598 _dst[x<<1]=(unsigned char)OC_CLAMPI(0,_aux[x-1]+110*_aux[x]+
599 18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64>>7,255);
600 if((x<<1|1)<dst_c_w){
601 _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,-3*_aux[x-1]+50*_aux[x]+
602 86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64>>7,255);
611 /*The image is padded with empty chroma components at 4:2:0.
612 This costs about 17 bits a frame to code.*/
613 static void y4m_convert_mono_420jpeg(unsigned char *_dst,
614 unsigned char *_aux){
617 c_sz=((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
618 memset(_dst,128,c_sz*2);
622 /*Right now just 444 to 420.
623 Not too hard to generalize.*/
624 static void y4m_convert_4xxjpeg_42xjpeg(unsigned char *_dst,
625 unsigned char *_aux){
635 /*Compute the size of each chroma plane.*/
636 c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
637 c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
642 for(pli=1;pli<3;pli++){
644 /*In reality, the horizontal and vertical steps could be pipelined, for
645 less memory consumption and better cache performance, but we do them
646 separately for simplicity.*/
647 /*First do horizontal filtering (convert to 4:2:2)*/
648 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
649 for(y=0;y<pic_h;y++){
650 for(x=0;x<OC_MINI(pic_w,2);x+=2){
651 tmp[x>>1]=OC_CLAMPI(0,64*_aux[0]+78*_aux[OC_MINI(1,pic_w-1)]-
652 17*_aux[OC_MINI(2,pic_w-1)]+3*_aux[OC_MINI(3,pic_w-1)]+64>>7,255);
654 for(;x<pic_w-3;x+=2){
655 tmp[x>>1]=OC_CLAMPI(0,3*(_aux[x-2]+_aux[x+3])-17*(_aux[x-1]+_aux[x+2])+
656 78*(_aux[x]+_aux[x+1])+64>>7,255);
659 tmp[x>>1]=OC_CLAMPI(0,3*(_aux[x-2]+_aux[pic_w-1])-
660 17*(_aux[x-1]+_aux[OC_MINI(x+2,pic_w-1)])+
661 78*(_aux[x]+_aux[OC_MINI(x+1,pic_w-1)])+64>>7,255);
668 /*Now do the vertical filtering.*/
670 for(y=0;y<OC_MINI(pic_h,2);y+=2){
671 _dst[(y>>1)*c_w]=OC_CLAMPI(0,64*tmp[0]+78*tmp[OC_MINI(1,pic_h-1)*c_w]-
672 17*tmp[OC_MINI(2,pic_h-1)*c_w]+3*tmp[OC_MINI(3,pic_h-1)*c_w]+
675 for(;y<pic_h-3;y+=2){
676 _dst[(y>>1)*c_w]=OC_CLAMPI(0,3*(tmp[(y-2)*c_w]+tmp[(y+3)*c_w])-
677 17*(tmp[(y-1)*c_w]+tmp[(y+2)*c_w])+78*(tmp[y*c_w]+tmp[(y+1)*c_w])+
681 _dst[(y>>1)*c_w]=OC_CLAMPI(0,3*(tmp[(y-2)*c_w]+tmp[(pic_h-1)*c_w])-
682 17*(tmp[(y-1)*c_w]+tmp[OC_MINI(y+2,pic_h-1)*c_w])+
683 78*(tmp[y*c_w]+tmp[OC_MINI(y+1,pic_h-1)*c_w])+64>>7,255);
694 /*No conversion function needed.*/
695 static void y4m_convert_null(unsigned char *_dst,
696 unsigned char *_aux){
699 static void id_file(char *f){
701 unsigned char buffer[80];
704 /* open it, look for magic */
712 fprintf(stderr,"Unable to open file %s.\n",f);
717 ret=fread(buffer,1,4,test);
719 fprintf(stderr,"EOF determining file type of file %s.\n",f);
723 if(!memcmp(buffer,"RIFF",4)){
724 /* possible WAV file */
727 /* umm, we already have one */
728 fprintf(stderr,"Multiple RIFF WAVE files specified on command line.\n");
732 /* Parse the rest of the header */
734 ret=fread(buffer,1,8,test);
735 if(ret<8)goto riff_err;
736 if(!memcmp(buffer+4,"WAVE",4)){
739 ret=fread(buffer,1,4,test);
740 if(ret<4)goto riff_err;
741 if(!memcmp("fmt",buffer,3)){
743 /* OK, this is our audio specs chunk. Slurp it up. */
745 ret=fread(buffer,1,20,test);
746 if(ret<20)goto riff_err;
748 if(memcmp(buffer+4,"\001\000",2)){
749 fprintf(stderr,"The WAV file %s is in a compressed format; "
750 "can't read it.\n",f);
755 audio_ch=buffer[6]+(buffer[7]<<8);
756 audio_hz=buffer[8]+(buffer[9]<<8)+
757 (buffer[10]<<16)+(buffer[11]<<24);
759 if(buffer[18]+(buffer[19]<<8)!=16){
760 fprintf(stderr,"Can only read 16 bit WAV files for now.\n");
764 /* Now, align things to the beginning of the data */
765 /* Look for 'dataxxxx' */
767 ret=fread(buffer,1,4,test);
768 if(ret<4)goto riff_err;
769 if(!memcmp("data",buffer,4)){
770 /* We're there. Ignore the declared size for now. */
771 ret=fread(buffer,1,4,test);
772 if(ret<4)goto riff_err;
774 fprintf(stderr,"File %s is 16 bit %d channel %d Hz RIFF WAV audio.\n",
775 f,audio_ch,audio_hz);
784 fprintf(stderr,"Couldn't find WAVE data in RIFF file %s.\n",f);
788 if(!memcmp(buffer,"YUV4",4)){
789 /* possible YUV2MPEG2 format file */
790 /* read until newline, or 80 cols, whichever happens first */
793 ret=fread(buffer+i,1,1,test);
794 if(ret<1)goto yuv_err;
795 if(buffer[i]=='\n')break;
798 fprintf(stderr,"Error parsing %s header; not a YUV2MPEG2 file?\n",f);
802 if(!memcmp(buffer,"MPEG",4)){
805 /* umm, we already have one */
806 fprintf(stderr,"Multiple video files specified on command line.\n");
811 fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
814 ret=y4m_parse_tags((char *)buffer+5);
816 fprintf(stderr,"Error parsing YUV4MPEG2 header in file %s.\n",f);
821 fprintf(stderr,"Input video is interlaced; Theora handles only progressive scan\n");
825 if(strcmp(chroma_type,"420")==0||strcmp(chroma_type,"420jpeg")==0){
826 src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
827 y4m_dst_buf_read_sz=pic_w*pic_h+2*((pic_w+1)/2)*((pic_h+1)/2);
828 y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
829 y4m_convert=y4m_convert_null;
831 else if(strcmp(chroma_type,"420mpeg2")==0){
832 src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
833 y4m_dst_buf_read_sz=pic_w*pic_h;
834 /*Chroma filter required: read into the aux buf first.*/
835 y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
836 y4m_convert=y4m_convert_42xmpeg2_42xjpeg;
838 else if(strcmp(chroma_type,"420paldv")==0){
839 src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
840 y4m_dst_buf_read_sz=pic_w*pic_h;
841 /*Chroma filter required: read into the aux buf first.
842 We need to make two filter passes, so we need some extra space in the
844 y4m_aux_buf_sz=3*((pic_w+1)/2)*((pic_h+1)/2);
845 y4m_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
846 y4m_convert=y4m_convert_42xpaldv_42xjpeg;
848 else if(strcmp(chroma_type,"422")==0){
849 src_c_dec_h=dst_c_dec_h=2;
850 src_c_dec_v=dst_c_dec_v=1;
851 y4m_dst_buf_read_sz=pic_w*pic_h;
852 /*Chroma filter required: read into the aux buf first.*/
853 y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+1)/2)*pic_h;
854 y4m_convert=y4m_convert_42xmpeg2_42xjpeg;
856 else if(strcmp(chroma_type,"411")==0){
858 /*We don't want to introduce any additional sub-sampling, so we
859 promote 4:1:1 material to 4:2:2, as the closest format Theora can
862 src_c_dec_v=dst_c_dec_v=1;
863 y4m_dst_buf_read_sz=pic_w*pic_h;
864 /*Chroma filter required: read into the aux buf first.*/
865 y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+3)/4)*pic_h;
866 y4m_convert=y4m_convert_411_422jpeg;
868 else if(strcmp(chroma_type,"444")==0){
869 src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
870 y4m_dst_buf_read_sz=pic_w*pic_h*3;
871 y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
872 y4m_convert=y4m_convert_null;
874 else if(strcmp(chroma_type,"444alpha")==0){
875 src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
876 y4m_dst_buf_read_sz=pic_w*pic_h*3;
877 /*Read the extra alpha plane into the aux buf.
878 It will be discarded.*/
879 y4m_aux_buf_sz=y4m_aux_buf_read_sz=pic_w*pic_h;
880 y4m_convert=y4m_convert_null;
882 else if(strcmp(chroma_type,"mono")==0){
883 src_c_dec_h=src_c_dec_v=0;
884 dst_c_dec_h=dst_c_dec_v=2;
885 y4m_dst_buf_read_sz=pic_w*pic_h;
886 y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
887 y4m_convert=y4m_convert_mono_420jpeg;
890 fprintf(stderr,"Unknown chroma sampling type: %s\n",chroma_type);
893 /*The size of the final frame buffers is always computed from the
894 destination chroma decimation type.*/
895 y4m_dst_buf_sz=pic_w*pic_h+2*((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*
896 ((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
900 fprintf(stderr,"File %s is %dx%d %.02f fps %s video.\n",
901 f,pic_w,pic_h,(double)video_fps_n/video_fps_d,chroma_type);
906 fprintf(stderr,"Input file %s is neither a WAV nor YUV4MPEG2 file.\n",f);
910 fprintf(stderr,"EOF parsing RIFF file %s.\n",f);
913 fprintf(stderr,"EOF parsing YUV4MPEG2 file %s.\n",f);
919 char *spinascii="|/-\\";
922 if(spinner==4)spinner=0;
923 fprintf(stderr,"\r%c",spinascii[spinner]);
926 int fetch_and_process_audio(FILE *audio,ogg_page *audiopage,
927 ogg_stream_state *vo,
928 vorbis_dsp_state *vd,
931 static ogg_int64_t samples_sofar=0;
934 ogg_int64_t beginsample = audio_hz*begin_sec + audio_hz*begin_usec*.000001;
935 ogg_int64_t endsample = audio_hz*end_sec + audio_hz*end_usec*.000001;
937 while(audio && !audioflag){
938 /* process any audio already buffered */
940 if(ogg_stream_pageout(vo,audiopage)>0) return 1;
941 if(ogg_stream_eos(vo))return 0;
944 /* read and process more audio */
945 signed char readbuffer[4096];
946 signed char *readptr=readbuffer;
947 int toread=4096/2/audio_ch;
948 int bytesread=fread(readbuffer,1,toread*2*audio_ch,audio);
949 int sampread=bytesread/2/audio_ch;
950 float **vorbis_buffer;
954 (samples_sofar>=endsample && endsample>0)){
955 /* end of file. this can be done implicitly, but it's
956 easier to see here in non-clever fashion. Tell the
957 library we're at end of stream so that it can handle the
958 last frame and mark end of stream in the output properly */
959 vorbis_analysis_wrote(vd,0);
961 if(samples_sofar < beginsample){
962 if(samples_sofar+sampread > beginsample){
963 readptr += (beginsample-samples_sofar)*2*audio_ch;
964 sampread += samples_sofar-beginsample;
965 samples_sofar = sampread+beginsample;
967 samples_sofar += sampread;
971 samples_sofar += sampread;
974 if(samples_sofar > endsample && endsample > 0)
975 sampread-= (samples_sofar - endsample);
979 vorbis_buffer=vorbis_analysis_buffer(vd,sampread);
980 /* uninterleave samples */
981 for(i=0;i<sampread;i++){
982 for(j=0;j<audio_ch;j++){
983 vorbis_buffer[j][i]=((readptr[count+1]<<8)|
984 (0x00ff&(int)readptr[count]))/32768.f;
989 vorbis_analysis_wrote(vd,sampread);
993 while(vorbis_analysis_blockout(vd,vb)==1){
995 /* analysis, assume we want to use bitrate management */
996 vorbis_analysis(vb,NULL);
997 vorbis_bitrate_addblock(vb);
999 /* weld packets into the bitstream */
1000 while(vorbis_bitrate_flushpacket(vd,&op))
1001 ogg_stream_packetin(vo,&op);
1010 static int frame_state=-1;
1011 static ogg_int64_t frames=0;
1012 static unsigned char *yuvframe[3];
1013 static th_ycbcr_buffer ycbcr;
1015 int fetch_and_process_video_packet(FILE *video,FILE *twopass_file,int passno,
1016 th_enc_ctx *td,ogg_packet *op){
1024 ogg_int64_t beginframe;
1025 ogg_int64_t endframe;
1027 beginframe=(video_fps_n*begin_sec+video_fps_n*begin_usec*.000001)/video_fps_d;
1028 endframe=(video_fps_n*end_sec+video_fps_n*end_usec*.000001)/video_fps_d;
1029 if(frame_state==-1){
1030 /* initialize the double frame buffer */
1031 yuvframe[0]=(unsigned char *)malloc(y4m_dst_buf_sz);
1032 yuvframe[1]=(unsigned char *)malloc(y4m_dst_buf_sz);
1033 yuvframe[2]=(unsigned char *)malloc(y4m_aux_buf_sz);
1037 frame_c_w=frame_w/dst_c_dec_h;
1038 frame_c_h=frame_h/dst_c_dec_v;
1039 c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
1040 c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
1042 /* read and process more video */
1043 /* video strategy reads one frame ahead so we know when we're
1044 at end of stream and can mark last video frame as such
1045 (vorbis audio has to flush one frame past last video frame
1046 due to overlap and thus doesn't need this extra work */
1048 /* have two frame buffers full (if possible) before
1049 proceeding. after first pass and until eos, one will
1050 always be full when we get here */
1051 for(;frame_state<2 && (frames<endframe || endframe<0);){
1053 int ret=fread(frame,1,6,video);
1054 /* match and skip the frame header */
1056 if(memcmp(frame,"FRAME",5)){
1057 fprintf(stderr,"Loss of framing in YUV input data\n");
1063 if(fread(&c,1,1,video)&&c=='\n')break;
1065 fprintf(stderr,"Error parsing YUV frame header\n");
1069 /*Read the frame data that needs no conversion.*/
1070 if(fread(yuvframe[frame_state],1,y4m_dst_buf_read_sz,video)!=
1071 y4m_dst_buf_read_sz){
1072 fprintf(stderr,"Error reading YUV frame data.\n");
1075 /*Read the frame data that does need conversion.*/
1076 if(fread(yuvframe[2],1,y4m_aux_buf_read_sz,video)!=y4m_aux_buf_read_sz){
1077 fprintf(stderr,"Error reading YUV frame data.\n");
1080 /*Now convert the just read frame.*/
1081 (*y4m_convert)(yuvframe[frame_state],yuvframe[2]);
1083 if(frames>=beginframe)
1086 /* check to see if there are dupes to flush */
1087 if(th_encode_packetout(td,frame_state<1,op)>0)return 1;
1089 /* can't get here unless YUV4MPEG stream has no video */
1090 fprintf(stderr,"Video input contains no frames.\n");
1093 /* Theora is a one-frame-in,one-frame-out system; submit a frame
1094 for compression and pull out the packet */
1095 /* in two-pass mode's second pass, we need to submit first-pass data */
1098 static unsigned char buffer[80];
1101 /*Ask the encoder how many bytes it would like.*/
1102 bytes=th_encode_ctl(td,TH_ENCCTL_2PASS_IN,NULL,0);
1104 fprintf(stderr,"Error submitting pass data in second pass.\n");
1107 /*If it's got enough, stop.*/
1109 /*Read in some more bytes, if necessary.*/
1110 if(bytes>80-buf_pos)bytes=80-buf_pos;
1111 if(bytes>0&&fread(buffer+buf_pos,1,bytes,twopass_file)<bytes){
1112 fprintf(stderr,"Could not read frame data from two-pass data file!\n");
1115 /*And pass them off.*/
1116 ret=th_encode_ctl(td,TH_ENCCTL_2PASS_IN,buffer,bytes);
1118 fprintf(stderr,"Error submitting pass data in second pass.\n");
1121 /*If the encoder consumed the whole buffer, reset it.*/
1122 if(ret>=bytes)buf_pos=0;
1123 /*Otherwise remember how much it used.*/
1127 /*We submit the buffer to the library as if it were padded, but we do not
1128 actually allocate space for the padding.
1129 This is okay, because with the 1.0 API the library will never read data from the padded
1131 ycbcr[0].width=frame_w;
1132 ycbcr[0].height=frame_h;
1133 ycbcr[0].stride=pic_w;
1134 ycbcr[0].data=yuvframe[0]-pic_x-pic_y*pic_w;
1135 ycbcr[1].width=frame_c_w;
1136 ycbcr[1].height=frame_c_h;
1137 ycbcr[1].stride=c_w;
1138 ycbcr[1].data=yuvframe[0]+pic_sz-(pic_x/dst_c_dec_h)-(pic_y/dst_c_dec_v)*c_w;
1139 ycbcr[2].width=frame_c_w;
1140 ycbcr[2].height=frame_c_h;
1141 ycbcr[2].stride=c_w;
1142 ycbcr[2].data=ycbcr[1].data+c_sz;
1143 th_encode_ycbcr_in(td,ycbcr);
1145 unsigned char *temp=yuvframe[0];
1146 yuvframe[0]=yuvframe[1];
1150 /* in two-pass mode's first pass we need to extract and save the pass data */
1152 unsigned char *buffer;
1153 int bytes = th_encode_ctl(td, TH_ENCCTL_2PASS_OUT, &buffer, sizeof(buffer));
1155 fprintf(stderr,"Could not read two-pass data from encoder.\n");
1158 if(fwrite(buffer,1,bytes,twopass_file)<bytes){
1159 fprintf(stderr,"Unable to write to two-pass data file.\n");
1162 fflush(twopass_file);
1164 /* if there was only one frame, it's the last in the stream */
1165 ret = th_encode_packetout(td,frame_state<1,op);
1166 if(passno==1 && frame_state<1){
1167 /* need to read the final (summary) packet */
1168 unsigned char *buffer;
1169 int bytes = th_encode_ctl(td, TH_ENCCTL_2PASS_OUT, &buffer, sizeof(buffer));
1171 fprintf(stderr,"Could not read two-pass summary data from encoder.\n");
1174 if(fseek(twopass_file,0,SEEK_SET)<0){
1175 fprintf(stderr,"Unable to seek in two-pass data file.\n");
1178 if(fwrite(buffer,1,bytes,twopass_file)<bytes){
1179 fprintf(stderr,"Unable to write to two-pass data file.\n");
1182 fflush(twopass_file);
1188 int fetch_and_process_video(FILE *video,ogg_page *videopage,
1189 ogg_stream_state *to,th_enc_ctx *td,FILE *twopass_file,int passno,
1193 /* is there a video page flushed? If not, work until there is. */
1195 if(ogg_stream_pageout(to,videopage)>0) return 1;
1196 if(ogg_stream_eos(to)) return 0;
1197 ret=fetch_and_process_video_packet(video,twopass_file,passno,td,&op);
1199 ogg_stream_packetin(to,&op);
1204 static int ilog(unsigned _v){
1206 for(ret=0;_v;ret++)_v>>=1;
1210 int main(int argc,char *argv[]){
1211 int c,long_option_index,ret;
1213 ogg_stream_state to; /* take physical pages, weld into a logical
1214 stream of packets */
1215 ogg_stream_state vo; /* take physical pages, weld into a logical
1216 stream of packets */
1217 ogg_page og; /* one Ogg bitstream page. Vorbis packets are inside */
1218 ogg_packet op; /* one raw packet of data for decode */
1224 vorbis_info vi; /* struct that stores all the static vorbis bitstream
1226 vorbis_comment vc; /* struct that stores all the user comments */
1228 vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */
1229 vorbis_block vb; /* local working space for packet->PCM decode */
1238 ogg_int64_t audio_bytesout=0;
1239 ogg_int64_t video_bytesout=0;
1242 FILE *outfile = stdout;
1244 FILE *twopass_file = NULL;
1245 fpos_t video_rewind_pos;
1249 #ifdef _WIN32 /* We need to set stdin/stdout to binary mode. Damn windows. */
1250 /* if we were reading/writing a file, it would also need to in
1251 binary mode, eg, fopen("file.wav","wb"); */
1252 /* Beware the evil ifdef. We avoid these where we can, but this one we
1253 cannot. Don't add any more, you'll probably go to hell if you do. */
1254 _setmode( _fileno( stdin ), _O_BINARY );
1255 _setmode( _fileno( stdout ), _O_BINARY );
1258 while((c=getopt_long(argc,argv,optstring,options,&long_option_index))!=EOF){
1261 outfile=fopen(optarg,"wb");
1263 fprintf(stderr,"Unable to open output file '%s'\n", optarg);
1269 audio_q=(float)(atof(optarg)*.099);
1270 if(audio_q<-.1 || audio_q>1){
1271 fprintf(stderr,"Illegal audio quality (choose -1 through 10)\n");
1278 video_q=(int)rint(6.3*atof(optarg));
1279 if(video_q<0 || video_q>63){
1280 fprintf(stderr,"Illegal video quality (choose 0 through 10)\n");
1286 audio_r=(int)(atof(optarg)*1000);
1288 fprintf(stderr,"Illegal audio quality (choose > 0 please)\n");
1295 video_r=(int)rint(atof(optarg)*1000);
1297 fprintf(stderr,"Illegal video bitrate (choose > 0 please)\n");
1307 video_par_n=(int)rint(atof(optarg));
1311 video_par_d=(int)rint(atof(optarg));
1315 video_fps_n=(int)rint(atof(optarg));
1319 video_fps_d=(int)rint(atof(optarg));
1327 keyframe_frequency=rint(atof(optarg));
1328 if(keyframe_frequency<1 || keyframe_frequency>2147483647){
1329 fprintf(stderr,"Illegal keyframe frequency\n");
1335 buf_delay=atoi(optarg);
1337 fprintf(stderr,"Illegal buffer delay\n");
1345 fprintf(stderr,"Illegal speed level\n");
1352 char *pos=strchr(optarg,':');
1353 begin_sec=atol(optarg);
1355 char *pos2=strchr(++pos,':');
1357 begin_sec+=atol(pos);
1361 begin_sec+=atol(pos2);
1366 pos=strchr(pos,'.');
1368 int digits = strlen(++pos);
1369 begin_usec=atol(pos);
1377 char *pos=strchr(optarg,':');
1378 end_sec=atol(optarg);
1380 char *pos2=strchr(++pos,':');
1386 end_sec+=atol(pos2);
1391 pos=strchr(pos,'.');
1393 int digits = strlen(++pos);
1401 twopass=3; /* perform both passes */
1402 twopass_file=tmpfile();
1404 fprintf(stderr,"Unable to open temporary file for twopass data\n");
1409 twopass=1; /* perform first pass */
1410 twopass_file=fopen(optarg,"wb");
1412 fprintf(stderr,"Unable to open \'%s\' for twopass data\n",optarg);
1417 twopass=2; /* perform second pass */
1418 twopass_file=fopen(optarg,"rb");
1420 fprintf(stderr,"Unable to open twopass data file \'%s\'",optarg);
1432 fprintf(stderr,"Soft rate target (--soft-target) requested without a bitrate (-V).\n");
1446 if(keyframe_frequency<=0){
1447 /*Use a default keyframe frequency of 64 for 1-pass (streaming) mode, and
1448 256 for two-pass mode.*/
1449 keyframe_frequency=twopass?256:64;
1453 /* assume that anything following the options must be a filename */
1454 id_file(argv[optind]);
1459 /* verify that the input is seekable! */
1461 if(fseek(video,0,SEEK_CUR)){
1462 fprintf(stderr,"--two-pass (automatic two-pass) requires the video input\n"
1463 "to be seekable. For non-seekable input, encoder_example\n"
1464 "must be run twice, first with the --first-pass option, then\n"
1465 "with the --second-pass option.\n\n");
1468 if(fgetpos(video,&video_rewind_pos)<0){
1469 fprintf(stderr,"Unable to determine start position of video data.\n");
1475 /* Set up Ogg output stream */
1477 ogg_stream_init(&to,rand()); /* oops, add one ot the above */
1479 /* initialize Vorbis assuming we have audio to compress. */
1480 if(audio && twopass!=1){
1481 ogg_stream_init(&vo,rand());
1482 vorbis_info_init(&vi);
1484 ret = vorbis_encode_init_vbr(&vi,audio_ch,audio_hz,audio_q);
1486 ret = vorbis_encode_init(&vi,audio_ch,audio_hz,-1,
1487 (int)(64870*(ogg_int64_t)audio_r>>16),-1);
1489 fprintf(stderr,"The Vorbis encoder could not set up a mode according to\n"
1490 "the requested quality or bitrate.\n\n");
1494 vorbis_comment_init(&vc);
1495 vorbis_analysis_init(&vd,&vi);
1496 vorbis_block_init(&vd,&vb);
1499 for(passno=(twopass==3?1:twopass);passno<=(twopass==3?2:twopass);passno++){
1500 /* Set up Theora encoder */
1502 fprintf(stderr,"No video files submitted for compression?\n");
1505 /* Theora has a divisible-by-sixteen restriction for the encoded frame size */
1506 /* scale the picture size up to the nearest /16 and calculate offsets */
1507 frame_w=pic_w+15&~0xF;
1508 frame_h=pic_h+15&~0xF;
1509 /*Force the offsets to be even so that chroma samples line up like we
1511 pic_x=frame_w-pic_w>>1&~1;
1512 pic_y=frame_h-pic_h>>1&~1;
1514 ti.frame_width=frame_w;
1515 ti.frame_height=frame_h;
1517 ti.pic_height=pic_h;
1520 ti.fps_numerator=video_fps_n;
1521 ti.fps_denominator=video_fps_d;
1522 ti.aspect_numerator=video_par_n;
1523 ti.aspect_denominator=video_par_d;
1524 ti.colorspace=TH_CS_UNSPECIFIED;
1525 /*Account for the Ogg page overhead.
1526 This is 1 byte per 255 for lacing values, plus 26 bytes per 4096 bytes for
1527 the page header, plus approximately 1/2 byte per packet (not accounted for
1529 ti.target_bitrate=(int)(64870*(ogg_int64_t)video_r>>16);
1531 ti.keyframe_granule_shift=ilog(keyframe_frequency-1);
1533 if(dst_c_dec_v==2)ti.pixel_fmt=TH_PF_420;
1534 else ti.pixel_fmt=TH_PF_422;
1536 else ti.pixel_fmt=TH_PF_444;
1537 td=th_encode_alloc(&ti);
1539 /* setting just the granule shift only allows power-of-two keyframe
1540 spacing. Set the actual requested spacing. */
1541 ret=th_encode_ctl(td,TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
1542 &keyframe_frequency,sizeof(keyframe_frequency-1));
1544 fprintf(stderr,"Could not set keyframe interval to %d.\n",(int)keyframe_frequency);
1547 ret=th_encode_ctl(td,TH_ENCCTL_SET_VP3_COMPATIBLE,&vp3_compatible,
1548 sizeof(vp3_compatible));
1549 if(ret<0||!vp3_compatible){
1550 fprintf(stderr,"Could not enable strict VP3 compatibility.\n");
1552 fprintf(stderr,"Ensure your source format is supported by VP3.\n");
1554 "(4:2:0 pixel format, width and height multiples of 16).\n");
1559 /* reverse the rate control flags to favor a 'long time' strategy */
1560 int arg = TH_RATECTL_CAP_UNDERFLOW;
1561 ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_FLAGS,&arg,sizeof(arg));
1563 fprintf(stderr,"Could not set encoder flags for --soft-target\n");
1564 /* Default buffer control is overridden on two-pass */
1565 if(!twopass&&buf_delay<0){
1566 if((keyframe_frequency*7>>1) > 5*video_fps_n/video_fps_d)
1567 arg=keyframe_frequency*7>>1;
1569 arg=5*video_fps_n/video_fps_d;
1570 ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_BUFFER,&arg,sizeof(arg));
1572 fprintf(stderr,"Could not set rate control buffer for --soft-target\n");
1575 /* set up two-pass if needed */
1577 unsigned char *buffer;
1579 bytes=th_encode_ctl(td,TH_ENCCTL_2PASS_OUT,&buffer,sizeof(buffer));
1581 fprintf(stderr,"Could not set up the first pass of two-pass mode.\n");
1582 fprintf(stderr,"Did you remember to specify an estimated bitrate?\n");
1585 /*Perform a seek test to ensure we can overwrite this placeholder data at
1586 the end; this is better than letting the user sit through a whole
1587 encode only to find out their pass 1 file is useless at the end.*/
1588 if(fseek(twopass_file,0,SEEK_SET)<0){
1589 fprintf(stderr,"Unable to seek in two-pass data file.\n");
1592 if(fwrite(buffer,1,bytes,twopass_file)<bytes){
1593 fprintf(stderr,"Unable to write to two-pass data file.\n");
1596 fflush(twopass_file);
1599 /*Enable the second pass here.
1600 We make this call just to set the encoder into 2-pass mode, because
1601 by default enabling two-pass sets the buffer delay to the whole file
1602 (because there's no way to explicitly request that behavior).
1603 If we waited until we were actually encoding, it would overwite our
1605 if(th_encode_ctl(td,TH_ENCCTL_2PASS_IN,NULL,0)<0){
1606 fprintf(stderr,"Could not set up the second pass of two-pass mode.\n");
1610 /* 'automatic' second pass */
1611 if(fsetpos(video,&video_rewind_pos)<0){
1612 fprintf(stderr,"Could not rewind video input file for second pass!\n");
1615 if(fseek(twopass_file,0,SEEK_SET)<0){
1616 fprintf(stderr,"Unable to seek in two-pass data file.\n");
1623 /*Now we can set the buffer delay if the user requested a non-default one
1624 (this has to be done after two-pass is enabled).*/
1625 if(passno!=1&&buf_delay>=0){
1626 ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_BUFFER,
1627 &buf_delay,sizeof(buf_delay));
1629 fprintf(stderr,"Warning: could not set desired buffer delay.\n");
1632 /*Speed should also be set after the current encoder mode is established,
1633 since the available speed levels may change depending.*/
1637 ret=th_encode_ctl(td,TH_ENCCTL_GET_SPLEVEL_MAX,
1638 &speed_max,sizeof(speed_max));
1640 fprintf(stderr,"Warning: could not determine maximum speed level.\n");
1643 ret=th_encode_ctl(td,TH_ENCCTL_SET_SPLEVEL,&speed,sizeof(speed));
1645 fprintf(stderr,"Warning: could not set speed level to %i of %i\n",
1647 if(speed>speed_max){
1648 fprintf(stderr,"Setting it to %i instead\n",speed_max);
1650 ret=th_encode_ctl(td,TH_ENCCTL_SET_SPLEVEL,
1651 &speed_max,sizeof(speed_max));
1653 fprintf(stderr,"Warning: could not set speed level to %i of %i\n",
1654 speed_max,speed_max);
1658 /* write the bitstream header packets with proper page interleave */
1659 th_comment_init(&tc);
1660 /* first packet will get its own page automatically */
1661 if(th_encode_flushheader(td,&tc,&op)<=0){
1662 fprintf(stderr,"Internal Theora library error.\n");
1666 ogg_stream_packetin(&to,&op);
1667 if(ogg_stream_pageout(&to,&og)!=1){
1668 fprintf(stderr,"Internal Ogg library error.\n");
1671 fwrite(og.header,1,og.header_len,outfile);
1672 fwrite(og.body,1,og.body_len,outfile);
1674 /* create the remaining theora headers */
1676 ret=th_encode_flushheader(td,&tc,&op);
1678 fprintf(stderr,"Internal Theora library error.\n");
1682 if(passno!=1)ogg_stream_packetin(&to,&op);
1684 if(audio && passno!=1){
1686 ogg_packet header_comm;
1687 ogg_packet header_code;
1688 vorbis_analysis_headerout(&vd,&vc,&header,&header_comm,&header_code);
1689 ogg_stream_packetin(&vo,&header); /* automatically placed in its own
1691 if(ogg_stream_pageout(&vo,&og)!=1){
1692 fprintf(stderr,"Internal Ogg library error.\n");
1695 fwrite(og.header,1,og.header_len,outfile);
1696 fwrite(og.body,1,og.body_len,outfile);
1697 /* remaining vorbis header packets */
1698 ogg_stream_packetin(&vo,&header_comm);
1699 ogg_stream_packetin(&vo,&header_code);
1701 /* Flush the rest of our headers. This ensures
1702 the actual data in each stream will start
1703 on a new page, as per spec. */
1706 int result = ogg_stream_flush(&to,&og);
1708 /* can't get here */
1709 fprintf(stderr,"Internal Ogg library error.\n");
1713 fwrite(og.header,1,og.header_len,outfile);
1714 fwrite(og.body,1,og.body_len,outfile);
1717 if(audio && passno!=1){
1719 int result=ogg_stream_flush(&vo,&og);
1721 /* can't get here */
1722 fprintf(stderr,"Internal Ogg library error.\n");
1726 fwrite(og.header,1,og.header_len,outfile);
1727 fwrite(og.body,1,og.body_len,outfile);
1730 /* setup complete. Raw processing loop */
1733 fprintf(stderr,"\rCompressing.... \n");
1736 fprintf(stderr,"\rScanning first pass.... \n");
1740 int audio_or_video=-1;
1743 int ret=fetch_and_process_video_packet(video,twopass_file,passno,td,&op);
1745 if(op.e_o_s)break; /* end of stream */
1746 timebase=th_granule_time(td,op.granulepos);
1753 /* is there an audio page flushed? If not, fetch one if possible */
1754 audioflag=fetch_and_process_audio(audio,&audiopage,&vo,&vd,&vb,audioflag);
1755 /* is there a video page flushed? If not, fetch one if possible */
1756 videoflag=fetch_and_process_video(video,&videopage,&to,td,twopass_file,passno,videoflag);
1757 /* no pages of either? Must be end of stream. */
1758 if(!audioflag && !videoflag)break;
1759 /* which is earlier; the end of the audio page or the end of the
1760 video page? Flush the earlier to stream */
1762 audioflag?vorbis_granule_time(&vd,ogg_page_granulepos(&audiopage)):-1;
1764 videoflag?th_granule_time(td,ogg_page_granulepos(&videopage)):-1;
1767 } else if(!videoflag) {
1770 if(audiotime<videotime)
1775 if(audio_or_video==1){
1776 /* flush a video page */
1777 video_bytesout+=fwrite(videopage.header,1,videopage.header_len,outfile);
1778 video_bytesout+=fwrite(videopage.body,1,videopage.body_len,outfile);
1782 /* flush an audio page */
1783 audio_bytesout+=fwrite(audiopage.header,1,audiopage.header_len,outfile);
1784 audio_bytesout+=fwrite(audiopage.body,1,audiopage.body_len,outfile);
1790 int hundredths=(int)(timebase*100-(long)timebase*100);
1791 int seconds=(long)timebase%60;
1792 int minutes=((long)timebase/60)%60;
1793 int hours=(long)timebase/3600;
1794 if(audio_or_video)vkbps=(int)rint(video_bytesout*8./timebase*.001);
1795 else akbps=(int)rint(audio_bytesout*8./timebase*.001);
1797 "\r %d:%02d:%02d.%02d audio: %dkbps video: %dkbps ",
1798 hours,minutes,seconds,hundredths,akbps,vkbps);
1801 if(video)th_encode_free(td);
1804 /* clear out state */
1805 if(audio && twopass!=1){
1806 ogg_stream_clear(&vo);
1807 vorbis_block_clear(&vb);
1808 vorbis_dsp_clear(&vd);
1809 vorbis_comment_clear(&vc);
1810 vorbis_info_clear(&vi);
1811 if(audio!=stdin)fclose(audio);
1814 ogg_stream_clear(&to);
1815 th_comment_clear(&tc);
1816 if(video!=stdin)fclose(video);
1819 if(outfile && outfile!=stdout)fclose(outfile);
1820 if(twopass_file)fclose(twopass_file);
1822 fprintf(stderr,"\r \ndone.\n\n");