1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
11 ********************************************************************
14 last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $
16 ********************************************************************/
21 #if defined(OC_X86_ASM)
23 # include "x86_vc/x86int.h"
25 # include "x86/x86int.h"
28 #if defined(OC_DUMP_IMAGES)
33 /*Returns the fragment index of the top-left block in a macro block.
34 This can be used to test whether or not the whole macro block is valid.
35 _sb_map: The super block map.
36 _quadi: The quadrant number.
37 Return: The index of the fragment of the upper left block in the macro
38 block, or -1 if the block lies outside the coded frame.*/
39 static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
40 /*It so happens that under the Hilbert curve ordering described below, the
41 upper-left block in each macro block is at index 0, except in macro block
42 3, where it is at index 2.*/
43 return _sb_map[_quadi][_quadi&_quadi<<1];
46 /*Fills in the mapping from block positions to fragment numbers for a single
48 This function also fills in the "valid" flag of each quadrant in the super
50 _sb_maps: The array of super block maps for the color plane.
51 _sb_flags: The array of super block flags for the color plane.
52 _frag0: The index of the first fragment in the plane.
53 _hfrags: The number of horizontal fragments in a coded frame.
54 _vfrags: The number of vertical fragments in a coded frame.*/
55 static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
56 oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
57 /*Contains the (macro_block,block) indices for a 4x4 grid of
59 The pattern is a 4x4 Hilbert space-filling curve.
60 A Hilbert curve has the nice property that as the curve grows larger, its
61 fractal dimension approaches 2.
62 The intuition is that nearby blocks in the curve are also close spatially,
63 with the previous element always an immediate neighbor, so that runs of
64 blocks should be well correlated.*/
65 static const int SB_MAP[4][4][2]={
66 {{0,0},{0,1},{3,2},{3,3}},
67 {{0,3},{0,2},{3,1},{3,0}},
68 {{1,0},{1,3},{2,0},{2,3}},
69 {{1,1},{1,2},{2,1},{2,2}}
79 /*Figure out how many columns of blocks in this super block lie within the
83 else if(imax<=0)break;
89 /*Figure out how many rows of blocks in this super block lie within the
93 else if(jmax<=0)break;
94 /*By default, set all fragment indices to -1.*/
95 memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi]));
96 /*Fill in the fragment map for this super block.*/
101 _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
105 /*Mark which quadrants of this super block lie within the image.*/
106 for(quadi=0;quadi<4;quadi++){
107 _sb_flags[sbi].quad_valid|=
108 (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
115 /*Fills in the Y plane fragment map for a macro block given the fragment
116 coordinates of its upper-left hand corner.
117 _mb_map: The macro block map to fill.
118 _fplane: The description of the Y plane.
119 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
120 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
121 static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
122 const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
125 for(i=0;i<2;i++)for(j=0;j<2;j++){
126 _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
130 /*Fills in the chroma plane fragment maps for a macro block.
131 This version is for use with chroma decimated in the X and Y directions
133 _mb_map: The macro block map to fill.
134 _fplanes: The descriptions of the fragment planes.
135 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
136 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
137 static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
138 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
142 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
143 _mb_map[1][0]=fragi+_fplanes[1].froffset;
144 _mb_map[2][0]=fragi+_fplanes[2].froffset;
147 /*Fills in the chroma plane fragment maps for a macro block.
148 This version is for use with chroma decimated in the Y direction.
149 _mb_map: The macro block map to fill.
150 _fplanes: The descriptions of the fragment planes.
151 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
152 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
153 static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
154 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
158 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
160 _mb_map[1][j]=fragi+_fplanes[1].froffset;
161 _mb_map[2][j]=fragi+_fplanes[2].froffset;
166 /*Fills in the chroma plane fragment maps for a macro block.
167 This version is for use with chroma decimated in the X direction (4:2:2).
168 _mb_map: The macro block map to fill.
169 _fplanes: The descriptions of the fragment planes.
170 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
171 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
172 static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
173 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
177 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
179 _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
180 _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
181 fragi+=_fplanes[1].nhfrags;
185 /*Fills in the chroma plane fragment maps for a macro block.
186 This version is for use with no chroma decimation (4:4:4).
187 This uses the already filled-in luma plane values.
188 _mb_map: The macro block map to fill.
189 _fplanes: The descriptions of the fragment planes.*/
190 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
191 const oc_fragment_plane _fplanes[3]){
194 _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
195 _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
199 /*The function type used to fill in the chroma plane fragment maps for a
201 _mb_map: The macro block map to fill.
202 _fplanes: The descriptions of the fragment planes.
203 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
204 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
205 typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
206 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
208 /*A table of functions used to fill in the chroma plane fragment maps for a
209 macro block for each type of chrominance decimation.*/
210 static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
211 oc_mb_fill_cmapping00,
212 oc_mb_fill_cmapping01,
213 oc_mb_fill_cmapping10,
214 (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
217 /*Fills in the mapping from macro blocks to their corresponding fragment
218 numbers in each plane.
219 _mb_maps: The list of macro block maps.
220 _mb_modes: The list of macro block modes; macro blocks completely outside
221 the coded region are marked invalid.
222 _fplanes: The descriptions of the fragment planes.
223 _pixel_fmt: The chroma decimation type.*/
224 static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
225 signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
226 oc_mb_fill_cmapping_func mb_fill_cmapping;
229 mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
230 /*Loop through the luma plane super blocks.*/
231 for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
233 for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
235 /*Loop through the macro blocks in each super block in display order.*/
236 for(ymb=0;ymb<2;ymb++){
238 for(xmb=0;xmb<2;xmb++){
242 mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
245 /*Initialize fragment indices to -1.*/
246 memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
247 /*Make sure this macro block is within the encoded region.*/
248 if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
249 _mb_modes[mbi]=OC_MODE_INVALID;
252 /*Fill in the fragment indices for the luma plane.*/
253 oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
254 /*Fill in the fragment indices for the chroma planes.*/
255 (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
262 /*Marks the fragments which fall all or partially outside the displayable
264 _state: The Theora state containing the fragments to be marked.*/
265 static void oc_state_border_init(oc_theora_state *_state){
267 oc_fragment *yfrag_end;
268 oc_fragment *xfrag_end;
269 oc_fragment_plane *fplane;
277 /*The method we use here is slow, but the code is dead simple and handles
278 all the special cases easily.
279 We only ever need to do it once.*/
280 /*Loop through the fragments, marking those completely outside the
281 displayable region and constructing a border mask for those that straddle
284 yfrag_end=frag=_state->frags;
285 for(pli=0;pli<3;pli++){
286 fplane=_state->fplanes+pli;
287 /*Set up the cropping rectangle for this plane.*/
288 crop_x0=_state->info.pic_x;
289 crop_xf=_state->info.pic_x+_state->info.pic_width;
290 crop_y0=_state->info.pic_y;
291 crop_yf=_state->info.pic_y+_state->info.pic_height;
293 if(!(_state->info.pixel_fmt&1)){
295 crop_xf=crop_xf+1>>1;
297 if(!(_state->info.pixel_fmt&2)){
299 crop_yf=crop_yf+1>>1;
303 for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
305 for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
306 /*First check to see if this fragment is completely outside the
307 displayable region.*/
308 /*Note the special checks for an empty cropping rectangle.
309 This guarantees that if we count a fragment as straddling the
310 border below, at least one pixel in the fragment will be inside
311 the displayable region.*/
312 if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
313 crop_x0>=crop_xf||crop_y0>=crop_yf){
316 /*Otherwise, check to see if it straddles the border.*/
317 else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
318 y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
326 if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
327 mask|=(ogg_int64_t)1<<(i<<3|j);
332 /*Search the fragment array for border info with the same pattern.
333 In general, there will be at most 8 different patterns (per
336 if(i>=_state->nborders){
338 _state->borders[i].mask=mask;
339 _state->borders[i].npixels=npixels;
341 else if(_state->borders[i].mask!=mask)continue;
346 else frag->borderi=-1;
352 static int oc_state_frarray_init(oc_theora_state *_state){
371 /*Figure out the number of fragments in each plane.*/
372 /*These parameters have already been validated to be multiples of 16.*/
373 yhfrags=_state->info.frame_width>>3;
374 yvfrags=_state->info.frame_height>>3;
375 hdec=!(_state->info.pixel_fmt&1);
376 vdec=!(_state->info.pixel_fmt&2);
377 chfrags=yhfrags+hdec>>hdec;
378 cvfrags=yvfrags+vdec>>vdec;
379 yfrags=yhfrags*(ptrdiff_t)yvfrags;
380 cfrags=chfrags*(ptrdiff_t)cvfrags;
381 nfrags=yfrags+2*cfrags;
382 /*Figure out the number of super blocks in each plane.*/
390 nmbs=(size_t)ysbs<<2;
391 /*Check for overflow.
392 We support the ridiculous upper limits of the specification (1048560 by
393 1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
394 but for those with 32-bit pointers (or smaller!) we have to check.
395 If the caller wants to prevent denial-of-service by imposing a more
396 reasonable upper limit on the size of attempted allocations, they must do
397 so themselves; we have no platform independent way to determine how much
398 system memory there is nor an application-independent way to decide what a
399 "reasonable" allocation is.*/
400 if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
401 ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
404 /*Initialize the fragment array.*/
405 _state->fplanes[0].nhfrags=yhfrags;
406 _state->fplanes[0].nvfrags=yvfrags;
407 _state->fplanes[0].froffset=0;
408 _state->fplanes[0].nfrags=yfrags;
409 _state->fplanes[0].nhsbs=yhsbs;
410 _state->fplanes[0].nvsbs=yvsbs;
411 _state->fplanes[0].sboffset=0;
412 _state->fplanes[0].nsbs=ysbs;
413 _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
414 _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
415 _state->fplanes[1].froffset=yfrags;
416 _state->fplanes[2].froffset=yfrags+cfrags;
417 _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
418 _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
419 _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
420 _state->fplanes[1].sboffset=ysbs;
421 _state->fplanes[2].sboffset=ysbs+csbs;
422 _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
423 _state->nfrags=nfrags;
424 _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
425 _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
427 _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
428 _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
429 _state->nhmbs=yhsbs<<1;
430 _state->nvmbs=yvsbs<<1;
432 _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
433 _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
434 _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
435 if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
436 _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
437 _state->coded_fragis==NULL){
440 /*Create the mapping from super blocks to fragments.*/
441 for(pli=0;pli<3;pli++){
442 oc_fragment_plane *fplane;
443 fplane=_state->fplanes+pli;
444 oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
445 _state->sb_flags+fplane->sboffset,fplane->froffset,
446 fplane->nhfrags,fplane->nvfrags);
448 /*Create the mapping from macro blocks to fragments.*/
449 oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
450 _state->fplanes,_state->info.pixel_fmt);
451 /*Initialize the invalid and borderi fields of each fragment.*/
452 oc_state_border_init(_state);
456 static void oc_state_frarray_clear(oc_theora_state *_state){
457 _ogg_free(_state->coded_fragis);
458 _ogg_free(_state->mb_modes);
459 _ogg_free(_state->mb_maps);
460 _ogg_free(_state->sb_flags);
461 _ogg_free(_state->sb_maps);
462 _ogg_free(_state->frag_mvs);
463 _ogg_free(_state->frags);
467 /*Initializes the buffers used for reconstructed frames.
468 These buffers are padded with 16 extra pixels on each side, to allow
469 unrestricted motion vectors without special casing the boundary.
470 If chroma is decimated in either direction, the padding is reduced by a
471 factor of 2 on the appropriate sides.
472 _nrefs: The number of reference buffers to init; must be 3 or 4.*/
473 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
475 unsigned char *ref_frame_data;
476 size_t ref_frame_data_sz;
486 ptrdiff_t *frag_buf_offs;
492 if(_nrefs<3||_nrefs>4)return TH_EINVAL;
494 /*Compute the image buffer parameters for each plane.*/
495 hdec=!(info->pixel_fmt&1);
496 vdec=!(info->pixel_fmt&2);
497 yhstride=info->frame_width+2*OC_UMV_PADDING;
498 yheight=info->frame_height+2*OC_UMV_PADDING;
499 chstride=yhstride>>hdec;
500 cheight=yheight>>vdec;
501 yplane_sz=yhstride*(size_t)yheight;
502 cplane_sz=chstride*(size_t)cheight;
503 yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
504 coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
505 ref_frame_sz=yplane_sz+2*cplane_sz;
506 ref_frame_data_sz=_nrefs*ref_frame_sz;
507 /*Check for overflow.
508 The same caveats apply as for oc_state_frarray_init().*/
509 if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
510 ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
513 ref_frame_data=_ogg_malloc(ref_frame_data_sz);
514 frag_buf_offs=_state->frag_buf_offs=
515 _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
516 if(ref_frame_data==NULL||frag_buf_offs==NULL){
517 _ogg_free(frag_buf_offs);
518 _ogg_free(ref_frame_data);
521 /*Set up the width, height and stride for the image buffers.*/
522 _state->ref_frame_bufs[0][0].width=info->frame_width;
523 _state->ref_frame_bufs[0][0].height=info->frame_height;
524 _state->ref_frame_bufs[0][0].stride=yhstride;
525 _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
526 info->frame_width>>hdec;
527 _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
528 info->frame_height>>vdec;
529 _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
531 for(rfi=1;rfi<_nrefs;rfi++){
532 memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
533 sizeof(_state->ref_frame_bufs[0]));
535 /*Set up the data pointers for the image buffers.*/
536 for(rfi=0;rfi<_nrefs;rfi++){
537 _state->ref_frame_data[rfi]=ref_frame_data;
538 _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
539 ref_frame_data+=yplane_sz;
540 _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
541 ref_frame_data+=cplane_sz;
542 _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
543 ref_frame_data+=cplane_sz;
544 /*Flip the buffer upside down.
545 This allows us to decode Theora's bottom-up frames in their natural
546 order, yet return a top-down buffer with a positive stride to the user.*/
547 oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
548 _state->ref_frame_bufs[rfi]);
550 _state->ref_ystride[0]=-yhstride;
551 _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
552 /*Initialize the fragment buffer offsets.*/
553 ref_frame_data=_state->ref_frame_data[0];
555 for(pli=0;pli<3;pli++){
556 th_img_plane *iplane;
557 oc_fragment_plane *fplane;
560 ptrdiff_t vfragi_end;
562 iplane=_state->ref_frame_bufs[0]+pli;
563 fplane=_state->fplanes+pli;
565 vfragi_end=fplane->froffset+fplane->nfrags;
566 nhfrags=fplane->nhfrags;
567 stride=iplane->stride;
568 while(fragi<vfragi_end){
569 ptrdiff_t hfragi_end;
572 for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
573 frag_buf_offs[fragi]=hpix-ref_frame_data;
579 /*Initialize the reference frame indices.*/
580 _state->ref_frame_idx[OC_FRAME_GOLD]=
581 _state->ref_frame_idx[OC_FRAME_PREV]=
582 _state->ref_frame_idx[OC_FRAME_SELF]=-1;
583 _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1;
587 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
588 _ogg_free(_state->frag_buf_offs);
589 _ogg_free(_state->ref_frame_data[0]);
593 void oc_state_vtable_init_c(oc_theora_state *_state){
594 _state->opt_vtable.frag_copy=oc_frag_copy_c;
595 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
596 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
597 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
598 _state->opt_vtable.idct8x8=oc_idct8x8_c;
599 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
600 _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
601 _state->opt_vtable.state_loop_filter_frag_rows=
602 oc_state_loop_filter_frag_rows_c;
603 _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
604 _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
607 /*Initialize the accelerated function pointers.*/
608 void oc_state_vtable_init(oc_theora_state *_state){
609 #if defined(OC_X86_ASM)
610 oc_state_vtable_init_x86(_state);
612 oc_state_vtable_init_c(_state);
617 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
619 /*First validate the parameters.*/
620 if(_info==NULL)return TH_EFAULT;
621 /*The width and height of the encoded frame must be multiples of 16.
622 They must also, when divided by 16, fit into a 16-bit unsigned integer.
623 The displayable frame offset coordinates must fit into an 8-bit unsigned
625 Note that the offset Y in the API is specified on the opposite side from
626 how it is specified in the bitstream, because the Y axis is flipped in
628 The displayable frame must fit inside the encoded frame.
629 The color space must be one known by the encoder.*/
630 if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
631 _info->frame_width<=0||_info->frame_width>=0x100000||
632 _info->frame_height<=0||_info->frame_height>=0x100000||
633 _info->pic_x+_info->pic_width>_info->frame_width||
634 _info->pic_y+_info->pic_height>_info->frame_height||
635 _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
636 /*Note: the following <0 comparisons may generate spurious warnings on
637 platforms where enums are unsigned.
638 We could cast them to unsigned and just use the following >= comparison,
639 but there are a number of compilers which will mis-optimize this.
640 It's better to live with the spurious warnings.*/
641 _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
642 _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
645 memset(_state,0,sizeof(*_state));
646 memcpy(&_state->info,_info,sizeof(*_info));
647 /*Invert the sense of pic_y to match Theora's right-handed coordinate
649 _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
650 _state->frame_type=OC_UNKWN_FRAME;
651 oc_state_vtable_init(_state);
652 ret=oc_state_frarray_init(_state);
653 if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
655 oc_state_frarray_clear(_state);
658 /*If the keyframe_granule_shift is out of range, use the maximum allowable
660 if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
661 _state->info.keyframe_granule_shift=31;
663 _state->keyframe_num=0;
664 _state->curframe_num=-1;
665 /*3.2.0 streams mark the frame index instead of the frame count.
666 This was changed with stream version 3.2.1 to conform to other Ogg
668 We add an extra bias when computing granule positions for new streams.*/
669 _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
673 void oc_state_clear(oc_theora_state *_state){
674 oc_state_ref_bufs_clear(_state);
675 oc_state_frarray_clear(_state);
679 /*Duplicates the pixels on the border of the image plane out into the
680 surrounding padding for use by unrestricted motion vectors.
681 This function only adds the left and right borders, and only for the fragment
683 _refi: The index of the reference buffer to pad.
684 _pli: The color plane.
685 _y0: The Y coordinate of the first row to pad.
686 _yend: The Y coordinate of the row to stop padding at.*/
687 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
689 th_img_plane *iplane;
695 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
696 iplane=_state->ref_frame_bufs[_refi]+_pli;
697 stride=iplane->stride;
698 apix=iplane->data+_y0*(ptrdiff_t)stride;
699 bpix=apix+iplane->width-1;
700 epix=iplane->data+_yend*(ptrdiff_t)stride;
701 /*Note the use of != instead of <, which allows the stride to be negative.*/
703 memset(apix-hpadding,apix[0],hpadding);
704 memset(bpix+1,bpix[0],hpadding);
710 /*Duplicates the pixels on the border of the image plane out into the
711 surrounding padding for use by unrestricted motion vectors.
712 This function only adds the top and bottom borders, and must be called after
713 the left and right borders are added.
714 _refi: The index of the reference buffer to pad.
715 _pli: The color plane.*/
716 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
717 th_img_plane *iplane;
725 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
726 vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
727 iplane=_state->ref_frame_bufs[_refi]+_pli;
728 stride=iplane->stride;
729 fullw=iplane->width+(hpadding<<1);
730 apix=iplane->data-hpadding;
731 bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
732 epix=apix-stride*(ptrdiff_t)vpadding;
734 memcpy(apix-stride,apix,fullw);
735 memcpy(bpix+stride,bpix,fullw);
741 /*Duplicates the pixels on the border of the given reference image out into
742 the surrounding padding for use by unrestricted motion vectors.
743 _state: The context containing the reference buffers.
744 _refi: The index of the reference buffer to pad.*/
745 void oc_state_borders_fill(oc_theora_state *_state,int _refi){
747 for(pli=0;pli<3;pli++){
748 oc_state_borders_fill_rows(_state,_refi,pli,0,
749 _state->ref_frame_bufs[_refi][pli].height);
750 oc_state_borders_fill_caps(_state,_refi,pli);
754 /*Determines the offsets in an image buffer to use for motion compensation.
755 _state: The Theora state the offsets are to be computed with.
756 _offsets: Returns the offset for the buffer(s).
757 _offsets[0] is always set.
758 _offsets[1] is set if the motion vector has non-zero fractional
760 _pli: The color plane index.
761 _dx: The X component of the motion vector.
762 _dy: The Y component of the motion vector.
763 Return: The number of offsets returned: 1 or 2.*/
764 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
765 int _pli,int _dx,int _dy){
766 /*Here is a brief description of how Theora handles motion vectors:
767 Motion vector components are specified to half-pixel accuracy in
768 undecimated directions of each plane, and quarter-pixel accuracy in
769 decimated directions.
770 Integer parts are extracted by dividing (not shifting) by the
771 appropriate amount, with truncation towards zero.
772 These integer values are used to calculate the first offset.
774 If either of the fractional parts are non-zero, then a second offset is
776 No third or fourth offsets are computed, even if both components have
777 non-zero fractional parts.
778 The second offset is computed by dividing (not shifting) by the
779 appropriate amount, always truncating _away_ from zero.*/
781 /*This version of the code doesn't use any tables, but is slower.*/
788 ystride=_state->ref_ystride[_pli];
789 /*These two variables decide whether we are in half- or quarter-pixel
790 precision in each component.*/
791 xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
792 yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
793 /*These two variables are either 0 if all the fractional bits are zero or -1
794 if any of them are non-zero.*/
795 xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
796 yfrac=OC_SIGNMASK(-(_dy&(yprec|1)));
797 offs=(_dx>>xprec)+(_dy>>yprec)*ystride;
801 xmask=OC_SIGNMASK(_dx);
802 ymask=OC_SIGNMASK(_dy);
804 _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
805 _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
813 /*Using tables simplifies the code, and there's enough arithmetic to hide the
814 latencies of the memory references.*/
815 static const signed char OC_MVMAP[2][64]={
817 -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
818 -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0,
819 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
820 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
823 -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
824 -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0,
825 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
826 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7
829 static const signed char OC_MVMAP2[2][64]={
831 -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
832 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
833 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
834 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
837 -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
838 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
839 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
840 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
851 ystride=_state->ref_ystride[_pli];
852 qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
853 my=OC_MVMAP[qpy][_dy+31];
854 my2=OC_MVMAP2[qpy][_dy+31];
855 qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
856 mx=OC_MVMAP[qpx][_dx+31];
857 mx2=OC_MVMAP2[qpx][_dx+31];
860 _offsets[1]=offs+my2*ystride+mx2;
869 void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
870 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
871 _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs,
872 _last_zzi,_dc_quant);
875 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
876 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
878 ptrdiff_t frag_buf_off;
881 /*Apply the inverse transform.*/
882 /*Special case only having a DC component.*/
886 /*We round this dequant product (and not any of the others) because there's
888 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
890 for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p;
893 /*First, dequantize the DC coefficient.*/
894 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
895 oc_idct8x8(_state,_dct_coeffs,_last_zzi);
897 /*Fill in the target buffer.*/
898 frag_buf_off=_state->frag_buf_offs[_fragi];
899 mb_mode=_state->frags[_fragi].mb_mode;
900 ystride=_state->ref_ystride[_pli];
901 dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
902 if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs);
904 const unsigned char *ref;
907 _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
909 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
910 _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
911 oc_frag_recon_inter2(_state,
912 dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
914 else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs);
918 /*Copies the fragments specified by the lists of fragment indices from one
920 _fragis: A pointer to a list of fragment indices.
921 _nfragis: The number of fragment indices to copy.
922 _dst_frame: The reference frame to copy to.
923 _src_frame: The reference frame to copy from.
924 _pli: The color plane the fragments lie in.*/
925 void oc_state_frag_copy_list(const oc_theora_state *_state,
926 const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
927 int _dst_frame,int _src_frame,int _pli){
928 _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame,
932 void oc_state_frag_copy_list_c(const oc_theora_state *_state,
933 const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
934 int _dst_frame,int _src_frame,int _pli){
935 const ptrdiff_t *frag_buf_offs;
936 const unsigned char *src_frame_data;
937 unsigned char *dst_frame_data;
940 dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
941 src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
942 ystride=_state->ref_ystride[_pli];
943 frag_buf_offs=_state->frag_buf_offs;
944 for(fragii=0;fragii<_nfragis;fragii++){
945 ptrdiff_t frag_buf_off;
946 frag_buf_off=frag_buf_offs[_fragis[fragii]];
947 oc_frag_copy(_state,dst_frame_data+frag_buf_off,
948 src_frame_data+frag_buf_off,ystride);
952 static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
957 f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
958 /*The _bv array is used to compute the function
959 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
960 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
962 _pix[1]=OC_CLAMP255(_pix[1]+f);
963 _pix[2]=OC_CLAMP255(_pix[2]-f);
968 static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
973 f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
974 /*The _bv array is used to compute the function
975 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
976 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
978 _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
979 _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
983 /*Initialize the bounding values array used by the loop filter.
984 _bv: Storage for the array.
985 Return: 0 on success, or a non-zero value if no filtering need be applied.*/
986 int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
989 flimit=_state->loop_filter_limits[_state->qis[0]];
990 if(flimit==0)return 1;
991 memset(_bv,0,sizeof(_bv[0])*256);
992 for(i=0;i<flimit;i++){
993 if(127-i-flimit>=0)_bv[127-i-flimit]=i-flimit;
996 if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i;
1001 /*Apply the loop filter to a given set of fragment rows in the given plane.
1002 The filter may be run on the bottom edge, affecting pixels in the next row of
1003 fragments, so this row also needs to be available.
1004 _bv: The bounding values array.
1005 _refi: The index of the frame buffer to filter.
1006 _pli: The color plane to filter.
1007 _fragy0: The Y coordinate of the first fragment row to filter.
1008 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
1009 void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
1010 int _refi,int _pli,int _fragy0,int _fragy_end){
1011 _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
1012 _fragy0,_fragy_end);
1015 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
1016 int _refi,int _pli,int _fragy0,int _fragy_end){
1017 const oc_fragment_plane *fplane;
1018 const oc_fragment *frags;
1019 const ptrdiff_t *frag_buf_offs;
1020 unsigned char *ref_frame_data;
1021 ptrdiff_t fragi_top;
1022 ptrdiff_t fragi_bot;
1024 ptrdiff_t fragi0_end;
1028 fplane=_state->fplanes+_pli;
1029 nhfrags=fplane->nhfrags;
1030 fragi_top=fplane->froffset;
1031 fragi_bot=fragi_top+fplane->nfrags;
1032 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
1033 fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
1034 ystride=_state->ref_ystride[_pli];
1035 frags=_state->frags;
1036 frag_buf_offs=_state->frag_buf_offs;
1037 ref_frame_data=_state->ref_frame_data[_refi];
1038 /*The following loops are constructed somewhat non-intuitively on purpose.
1039 The main idea is: if a block boundary has at least one coded fragment on
1040 it, the filter is applied to it.
1041 However, the order that the filters are applied in matters, and VP3 chose
1042 the somewhat strange ordering used below.*/
1043 while(fragi0<fragi0_end){
1045 ptrdiff_t fragi_end;
1047 fragi_end=fragi+nhfrags;
1048 while(fragi<fragi_end){
1049 if(frags[fragi].coded){
1051 ref=ref_frame_data+frag_buf_offs[fragi];
1052 if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
1053 if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
1054 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
1055 loop_filter_h(ref+8,ystride,_bv);
1057 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
1058 loop_filter_v(ref+(ystride<<3),ystride,_bv);
1067 #if defined(OC_DUMP_IMAGES)
1068 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
1070 /*Dump a PNG of the reconstructed image.*/
1076 unsigned char *y_row;
1077 unsigned char *u_row;
1078 unsigned char *v_row;
1092 width=_state->info.frame_width;
1093 height=_state->info.frame_height;
1094 iframe=_state->granpos>>_state->info.keyframe_granule_shift;
1095 pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
1096 sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
1097 fp=fopen(fname,"wb");
1098 if(fp==NULL)return TH_EFAULT;
1099 image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
1104 png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
1110 info=png_create_info_struct(png);
1112 png_destroy_write_struct(&png,NULL);
1117 if(setjmp(png_jmpbuf(png))){
1118 png_destroy_write_struct(&png,&info);
1123 framei=_state->ref_frame_idx[_frame];
1124 y_row=_state->ref_frame_bufs[framei][0].data;
1125 u_row=_state->ref_frame_bufs[framei][1].data;
1126 v_row=_state->ref_frame_bufs[framei][2].data;
1127 y_stride=_state->ref_frame_bufs[framei][0].stride;
1128 u_stride=_state->ref_frame_bufs[framei][1].stride;
1129 v_stride=_state->ref_frame_bufs[framei][2].stride;
1130 /*Chroma up-sampling is just done with a box filter.
1131 This is very likely what will actually be used in practice on a real
1132 display, and also removes one more layer to search in for the source of
1134 As an added bonus, it's dead simple.*/
1135 for(imgi=height;imgi-->0;){
1140 for(imgj=0;imgj<6*width;){
1147 /*This is intentionally slow and very accurate.*/
1148 yval=(*y-16)*(1.0F/219);
1149 uval=(*u-128)*(2*(1-0.114F)/224);
1150 vval=(*v-128)*(2*(1-0.299F)/224);
1151 rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
1152 gval=OC_CLAMPI(0,(int)(65535*(
1153 yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
1154 bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
1155 image[imgi][imgj++]=(unsigned char)(rval>>8);
1156 image[imgi][imgj++]=(unsigned char)(rval&0xFF);
1157 image[imgi][imgj++]=(unsigned char)(gval>>8);
1158 image[imgi][imgj++]=(unsigned char)(gval&0xFF);
1159 image[imgi][imgj++]=(unsigned char)(bval>>8);
1160 image[imgi][imgj++]=(unsigned char)(bval&0xFF);
1161 dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
1166 dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
1171 png_init_io(png,fp);
1172 png_set_compression_level(png,Z_BEST_COMPRESSION);
1173 png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
1174 PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
1175 switch(_state->info.colorspace){
1176 case TH_CS_ITU_REC_470M:{
1177 png_set_gAMA(png,info,2.2);
1178 png_set_cHRM_fixed(png,info,31006,31616,
1179 67000,32000,21000,71000,14000,8000);
1181 case TH_CS_ITU_REC_470BG:{
1182 png_set_gAMA(png,info,2.67);
1183 png_set_cHRM_fixed(png,info,31271,32902,
1184 64000,33000,29000,60000,15000,6000);
1188 png_set_pHYs(png,info,_state->info.aspect_numerator,
1189 _state->info.aspect_denominator,0);
1190 png_set_rows(png,info,image);
1191 png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
1192 png_write_end(png,info);
1193 png_destroy_write_struct(&png,&info);
1202 ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
1203 oc_theora_state *state;
1204 state=(oc_theora_state *)_encdec;
1208 iframe=_granpos>>state->info.keyframe_granule_shift;
1209 pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
1210 /*3.2.0 streams store the frame index in the granule position.
1211 3.2.1 and later store the frame count.
1212 We return the index, so adjust the value if we have a 3.2.1 or later
1214 return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
1219 double th_granule_time(void *_encdec,ogg_int64_t _granpos){
1220 oc_theora_state *state;
1221 state=(oc_theora_state *)_encdec;
1223 return (th_granule_frame(_encdec, _granpos)+1)*(
1224 (double)state->info.fps_denominator/state->info.fps_numerator);