2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #define UNCHECKED_BITSTREAM_READER 1
32 #include "h264_mvpred.h"
35 #include "mpegutils.h"
36 #include "libavutil/avassert.h"
39 static const uint8_t golomb_to_inter_cbp_gray[16]={
40 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
44 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 static const uint8_t chroma_dc_coeff_token_len[4*5]={
55 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
63 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
75 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
87 static const uint8_t coeff_token_len[4][4*17]={
90 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
91 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
92 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
93 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
97 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
98 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
99 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
100 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
104 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
105 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
106 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
107 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
111 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
112 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
113 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
118 static const uint8_t coeff_token_bits[4][4*17]={
121 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
122 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
123 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
124 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
128 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
129 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
130 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
131 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
135 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
136 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
137 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
138 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
142 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
143 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
144 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
145 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
149 static const uint8_t total_zeros_len[16][16]= {
150 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
151 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
152 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
153 {5,3,4,4,3,3,3,4,3,4,5,5,5},
154 {4,4,4,3,3,3,3,3,4,5,4,5},
155 {6,5,3,3,3,3,3,3,4,3,6},
156 {6,5,3,3,3,2,3,4,3,6},
167 static const uint8_t total_zeros_bits[16][16]= {
168 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
169 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
170 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
171 {3,7,5,4,6,5,4,3,3,2,2,1,0},
172 {5,4,3,7,6,5,4,3,2,1,1,0},
173 {1,1,7,6,5,4,3,2,1,1,0},
174 {1,1,5,4,3,3,2,1,1,0},
185 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
191 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
197 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
198 { 1, 3, 3, 4, 4, 4, 5, 5 },
199 { 3, 2, 3, 3, 3, 3, 3 },
200 { 3, 3, 2, 2, 3, 3 },
207 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
208 { 1, 2, 3, 2, 3, 1, 1, 0 },
209 { 0, 1, 1, 4, 5, 6, 7 },
210 { 0, 1, 1, 2, 6, 7 },
217 static const uint8_t run_len[7][16]={
224 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 static const uint8_t run_bits[7][16]={
234 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 static VLC coeff_token_vlc[4];
238 static VLCElem coeff_token_vlc_tables[520+332+280+256];
239 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
241 static VLC chroma_dc_coeff_token_vlc;
242 static VLCElem chroma_dc_coeff_token_vlc_table[256];
243 static const int chroma_dc_coeff_token_vlc_table_size = 256;
245 static VLC chroma422_dc_coeff_token_vlc;
246 static VLCElem chroma422_dc_coeff_token_vlc_table[8192];
247 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
249 static VLC total_zeros_vlc[15+1];
250 static VLCElem total_zeros_vlc_tables[15][512];
251 static const int total_zeros_vlc_tables_size = 512;
253 static VLC chroma_dc_total_zeros_vlc[3+1];
254 static VLCElem chroma_dc_total_zeros_vlc_tables[3][8];
255 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
257 static VLC chroma422_dc_total_zeros_vlc[7+1];
258 static VLCElem chroma422_dc_total_zeros_vlc_tables[7][32];
259 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
261 static VLC run_vlc[6+1];
262 static VLCElem run_vlc_tables[6][8];
263 static const int run_vlc_tables_size = 8;
266 static VLCElem run7_vlc_table[96];
267 static const int run7_vlc_table_size = 96;
269 #define LEVEL_TAB_BITS 8
270 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
272 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
273 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
274 #define COEFF_TOKEN_VLC_BITS 8
275 #define TOTAL_ZEROS_VLC_BITS 9
276 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
277 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
278 #define RUN_VLC_BITS 3
279 #define RUN7_VLC_BITS 6
282 * Get the predicted number of non-zero coefficients.
283 * @param n block index
285 static inline int pred_non_zero_count(const H264Context *h, const H264SliceContext *sl, int n)
287 const int index8= scan8[n];
288 const int left = sl->non_zero_count_cache[index8 - 1];
289 const int top = sl->non_zero_count_cache[index8 - 8];
292 if(i<64) i= (i+1)>>1;
294 ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
299 static av_cold void init_cavlc_level_tab(void){
303 for(suffix_length=0; suffix_length<7; suffix_length++){
304 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
305 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
308 int level_code = (prefix << suffix_length) +
309 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
310 int mask = -(level_code&1);
311 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
312 cavlc_level_tab[suffix_length][i][0]= level_code;
313 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314 }else if(prefix + 1 <= LEVEL_TAB_BITS){
315 cavlc_level_tab[suffix_length][i][0]= prefix+100;
316 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
325 av_cold void ff_h264_decode_init_vlc(void)
329 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
330 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
331 vlc_init(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
332 &chroma_dc_coeff_token_len [0], 1, 1,
333 &chroma_dc_coeff_token_bits[0], 1, 1,
334 VLC_INIT_USE_STATIC);
336 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
337 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
338 vlc_init(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
339 &chroma422_dc_coeff_token_len [0], 1, 1,
340 &chroma422_dc_coeff_token_bits[0], 1, 1,
341 VLC_INIT_USE_STATIC);
344 for (int i = 0; i < 4; i++) {
345 coeff_token_vlc[i].table = coeff_token_vlc_tables + offset;
346 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
347 vlc_init(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
348 &coeff_token_len [i][0], 1, 1,
349 &coeff_token_bits[i][0], 1, 1,
350 VLC_INIT_USE_STATIC);
351 offset += coeff_token_vlc_tables_size[i];
354 * This is a one time safety check to make sure that
355 * the packed static coeff_token_vlc table sizes
356 * were initialized correctly.
358 av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
360 for (int i = 0; i < 3; i++) {
361 chroma_dc_total_zeros_vlc[i + 1].table = chroma_dc_total_zeros_vlc_tables[i];
362 chroma_dc_total_zeros_vlc[i + 1].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
363 vlc_init(&chroma_dc_total_zeros_vlc[i + 1],
364 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
365 &chroma_dc_total_zeros_len [i][0], 1, 1,
366 &chroma_dc_total_zeros_bits[i][0], 1, 1,
367 VLC_INIT_USE_STATIC);
370 for (int i = 0; i < 7; i++) {
371 chroma422_dc_total_zeros_vlc[i + 1].table = chroma422_dc_total_zeros_vlc_tables[i];
372 chroma422_dc_total_zeros_vlc[i + 1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
373 vlc_init(&chroma422_dc_total_zeros_vlc[i + 1],
374 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
375 &chroma422_dc_total_zeros_len [i][0], 1, 1,
376 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
377 VLC_INIT_USE_STATIC);
380 for (int i = 0; i < 15; i++) {
381 total_zeros_vlc[i + 1].table = total_zeros_vlc_tables[i];
382 total_zeros_vlc[i + 1].table_allocated = total_zeros_vlc_tables_size;
383 vlc_init(&total_zeros_vlc[i + 1],
384 TOTAL_ZEROS_VLC_BITS, 16,
385 &total_zeros_len [i][0], 1, 1,
386 &total_zeros_bits[i][0], 1, 1,
387 VLC_INIT_USE_STATIC);
390 for (int i = 0; i < 6; i++) {
391 run_vlc[i + 1].table = run_vlc_tables[i];
392 run_vlc[i + 1].table_allocated = run_vlc_tables_size;
393 vlc_init(&run_vlc[i + 1],
395 &run_len [i][0], 1, 1,
396 &run_bits[i][0], 1, 1,
397 VLC_INIT_USE_STATIC);
399 run7_vlc.table = run7_vlc_table;
400 run7_vlc.table_allocated = run7_vlc_table_size;
401 vlc_init(&run7_vlc, RUN7_VLC_BITS, 16,
402 &run_len [6][0], 1, 1,
403 &run_bits[6][0], 1, 1,
404 VLC_INIT_USE_STATIC);
406 init_cavlc_level_tab();
409 static inline int get_level_prefix(GetBitContext *gb){
414 UPDATE_CACHE(re, gb);
415 buf=GET_CACHE(re, gb);
417 log= 32 - av_log2(buf);
419 LAST_SKIP_BITS(re, gb, log);
420 CLOSE_READER(re, gb);
426 * Decode a residual block.
427 * @param n block index
428 * @param scantable scantable
429 * @param max_coeff number of coefficients in the block
430 * @return <0 if an error occurred
432 static int decode_residual(const H264Context *h, H264SliceContext *sl,
433 GetBitContext *gb, int16_t *block, int n,
434 const uint8_t *scantable, const uint32_t *qmul,
437 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
439 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
441 //FIXME put trailing_onex into the context
445 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
447 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
448 total_coeff= coeff_token>>2;
450 if(n >= LUMA_DC_BLOCK_INDEX){
451 total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
452 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
453 total_coeff= coeff_token>>2;
455 total_coeff= pred_non_zero_count(h, sl, n);
456 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
457 total_coeff= coeff_token>>2;
460 sl->non_zero_count_cache[scan8[n]] = total_coeff;
462 //FIXME set last_non_zero?
466 if(total_coeff > (unsigned)max_coeff) {
467 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
471 trailing_ones= coeff_token&3;
472 ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
473 av_assert2(total_coeff<=16);
475 i = show_bits(gb, 3);
476 skip_bits(gb, trailing_ones);
477 level[0] = 1-((i&4)>>1);
478 level[1] = 1-((i&2) );
479 level[2] = 1-((i&1)<<1);
481 if(trailing_ones<total_coeff) {
483 int suffix_length = total_coeff > 10 & trailing_ones < 3;
484 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
485 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
487 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
488 if(level_code >= 100){
489 prefix= level_code - 100;
490 if(prefix == LEVEL_TAB_BITS)
491 prefix += get_level_prefix(gb);
493 //first coefficient has suffix_length equal to 0 or 1
494 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
496 level_code= (prefix<<1) + get_bits1(gb); //part
498 level_code= prefix; //part
499 }else if(prefix==14){
501 level_code= (prefix<<1) + get_bits1(gb); //part
503 level_code= prefix + get_bits(gb, 4); //part
508 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
511 level_code += (1<<(prefix-3))-4096;
513 level_code += get_bits(gb, prefix-3); //part
516 if(trailing_ones < 3) level_code += 2;
519 mask= -(level_code&1);
520 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
522 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
524 suffix_length = 1 + (level_code + 3U > 6U);
525 level[trailing_ones]= level_code;
528 //remaining coefficients have suffix_length > 0
529 for(i=trailing_ones+1;i<total_coeff;i++) {
530 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
531 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
532 level_code= cavlc_level_tab[suffix_length][bitsi][0];
534 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
535 if(level_code >= 100){
536 prefix= level_code - 100;
537 if(prefix == LEVEL_TAB_BITS){
538 prefix += get_level_prefix(gb);
541 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
543 level_code = 15<<suffix_length;
546 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
547 return AVERROR_INVALIDDATA;
549 level_code += (1<<(prefix-3))-4096;
551 level_code += get_bits(gb, prefix-3);
553 mask= -(level_code&1);
554 level_code= (((2+level_code)>>1) ^ mask) - mask;
556 level[i]= level_code;
557 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
561 if(total_coeff == max_coeff)
564 if (max_coeff <= 8) {
566 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff].table,
567 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
569 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff].table,
570 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
572 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
576 #define STORE_BLOCK(type) \
577 scantable += zeros_left + total_coeff - 1; \
578 if(n >= LUMA_DC_BLOCK_INDEX){ \
579 ((type*)block)[*scantable] = level[0]; \
580 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
582 run_before= get_vlc2(gb, run_vlc[zeros_left].table, RUN_VLC_BITS, 1); \
584 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
585 zeros_left -= run_before; \
586 scantable -= 1 + run_before; \
587 ((type*)block)[*scantable]= level[i]; \
589 for(;i<total_coeff;i++) { \
591 ((type*)block)[*scantable]= level[i]; \
594 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
595 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
597 run_before= get_vlc2(gb, run_vlc[zeros_left].table, RUN_VLC_BITS, 1); \
599 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
600 zeros_left -= run_before; \
601 scantable -= 1 + run_before; \
602 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
604 for(;i<total_coeff;i++) { \
606 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610 if (h->pixel_shift) {
617 av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
624 static av_always_inline
625 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
626 GetBitContext *gb, const uint8_t *scan,
627 const uint8_t *scan8x8, int pixel_shift,
628 int mb_type, int cbp, int p)
631 int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
632 if(IS_INTRA16x16(mb_type)){
633 AV_ZERO128(sl->mb_luma_dc[p]+0);
634 AV_ZERO128(sl->mb_luma_dc[p]+8);
635 AV_ZERO128(sl->mb_luma_dc[p]+16);
636 AV_ZERO128(sl->mb_luma_dc[p]+24);
637 if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
638 return -1; //FIXME continue if partitioned and other return -1 too
641 av_assert2((cbp&15) == 0 || (cbp&15) == 15);
644 for(i8x8=0; i8x8<4; i8x8++){
645 for(i4x4=0; i4x4<4; i4x4++){
646 const int index= i4x4 + 4*i8x8 + p*16;
647 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
648 index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
655 fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
659 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
660 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
662 for(i8x8=0; i8x8<4; i8x8++){
664 if(IS_8x8DCT(mb_type)){
665 int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
667 for(i4x4=0; i4x4<4; i4x4++){
668 const int index= i4x4 + 4*i8x8 + p*16;
669 if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
670 h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
673 nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
674 nnz[0] += nnz[1] + nnz[8] + nnz[9];
675 new_cbp |= !!nnz[0] << i8x8;
677 for(i4x4=0; i4x4<4; i4x4++){
678 const int index= i4x4 + 4*i8x8 + p*16;
679 if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
680 scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
683 new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
687 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
688 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
695 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
699 unsigned int mb_type, cbp;
700 int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
701 const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
702 const int pixel_shift = h->pixel_shift;
704 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
706 ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y);
707 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
709 if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
710 if (sl->mb_skip_run == -1) {
711 unsigned mb_skip_run = get_ue_golomb_long(&sl->gb);
712 if (mb_skip_run > h->mb_num) {
713 av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run);
714 return AVERROR_INVALIDDATA;
716 sl->mb_skip_run = mb_skip_run;
719 if (sl->mb_skip_run--) {
720 if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
721 if (sl->mb_skip_run == 0)
722 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
724 decode_mb_skip(h, sl);
728 if (FRAME_MBAFF(h)) {
729 if ((sl->mb_y & 1) == 0)
730 sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
733 sl->prev_mb_skipped = 0;
735 mb_type= get_ue_golomb(&sl->gb);
736 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
738 partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
739 mb_type = ff_h264_b_mb_type_info[mb_type].type;
742 goto decode_intra_mb;
744 } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
746 partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
747 mb_type = ff_h264_p_mb_type_info[mb_type].type;
750 goto decode_intra_mb;
753 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
754 if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
758 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
762 cbp = ff_h264_i_mb_type_info[mb_type].cbp;
763 sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
764 mb_type = ff_h264_i_mb_type_info[mb_type].type;
768 mb_type |= MB_TYPE_INTERLACED;
770 h->slice_table[mb_xy] = sl->slice_num;
772 if(IS_INTRA_PCM(mb_type)){
773 const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
774 h->ps.sps->bit_depth_luma;
776 // We assume these blocks are very rare so we do not optimize it.
777 sl->intra_pcm_ptr = align_get_bits(&sl->gb);
778 if (get_bits_left(&sl->gb) < mb_size) {
779 av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
780 return AVERROR_INVALIDDATA;
782 skip_bits_long(&sl->gb, mb_size);
784 // In deblocking, the quantizer is 0
785 h->cur_pic.qscale_table[mb_xy] = 0;
786 // All coeffs are present
787 memset(h->non_zero_count[mb_xy], 16, 48);
789 h->cur_pic.mb_type[mb_xy] = mb_type;
793 fill_decode_neighbors(h, sl, mb_type);
794 fill_decode_caches(h, sl, mb_type);
797 if(IS_INTRA(mb_type)){
799 // init_top_left_availability(h);
800 if(IS_INTRA4x4(mb_type)){
803 if(dct8x8_allowed && get_bits1(&sl->gb)){
804 mb_type |= MB_TYPE_8x8DCT;
808 // fill_intra4x4_pred_table(h);
809 for(i=0; i<16; i+=di){
810 int mode = pred_intra_mode(h, sl, i);
812 if(!get_bits1(&sl->gb)){
813 const int rem_mode= get_bits(&sl->gb, 3);
814 mode = rem_mode + (rem_mode >= mode);
818 fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
820 sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
822 write_back_intra_pred_mode(h, sl);
823 if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
824 sl->top_samples_available, sl->left_samples_available) < 0)
827 sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
828 sl->left_samples_available, sl->intra16x16_pred_mode, 0);
829 if (sl->intra16x16_pred_mode < 0)
833 pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
834 sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
837 sl->chroma_pred_mode = pred_mode;
839 sl->chroma_pred_mode = DC_128_PRED8x8;
841 }else if(partition_count==4){
842 int i, j, sub_partition_count[4], list, ref[2][4];
844 if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
846 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
847 if(sl->sub_mb_type[i] >=13){
848 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
851 sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
852 sl->sub_mb_type[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
854 if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
855 ff_h264_pred_direct_motion(h, sl, &mb_type);
856 sl->ref_cache[0][scan8[4]] =
857 sl->ref_cache[1][scan8[4]] =
858 sl->ref_cache[0][scan8[12]] =
859 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
862 av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
864 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
865 if(sl->sub_mb_type[i] >=4){
866 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
869 sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
870 sl->sub_mb_type[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
874 for (list = 0; list < sl->list_count; list++) {
875 int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
877 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
878 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
882 }else if(ref_count == 2){
883 tmp= get_bits1(&sl->gb)^1;
885 tmp= get_ue_golomb_31(&sl->gb);
887 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
900 dct8x8_allowed = get_dct8x8_allowed(h, sl);
902 for (list = 0; list < sl->list_count; list++) {
904 if(IS_DIRECT(sl->sub_mb_type[i])) {
905 sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
908 sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
909 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
911 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
912 const int sub_mb_type= sl->sub_mb_type[i];
913 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
914 for(j=0; j<sub_partition_count[i]; j++){
916 const int index= 4*i + block_width*j;
917 int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
918 pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
919 mx += (unsigned)get_se_golomb(&sl->gb);
920 my += (unsigned)get_se_golomb(&sl->gb);
921 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
923 if(IS_SUB_8X8(sub_mb_type)){
925 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
927 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
928 }else if(IS_SUB_8X4(sub_mb_type)){
929 mv_cache[ 1 ][0]= mx;
930 mv_cache[ 1 ][1]= my;
931 }else if(IS_SUB_4X8(sub_mb_type)){
932 mv_cache[ 8 ][0]= mx;
933 mv_cache[ 8 ][1]= my;
935 mv_cache[ 0 ][0]= mx;
936 mv_cache[ 0 ][1]= my;
939 uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
945 }else if(IS_DIRECT(mb_type)){
946 ff_h264_pred_direct_motion(h, sl, &mb_type);
947 dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
950 //FIXME we should set ref_idx_l? to 0 if we use that later ...
951 if(IS_16X16(mb_type)){
952 for (list = 0; list < sl->list_count; list++) {
954 if(IS_DIR(mb_type, 0, list)){
955 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
958 } else if (rc == 2) {
959 val= get_bits1(&sl->gb)^1;
961 val= get_ue_golomb_31(&sl->gb);
963 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
967 fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
970 for (list = 0; list < sl->list_count; list++) {
971 if(IS_DIR(mb_type, 0, list)){
972 pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
973 mx += (unsigned)get_se_golomb(&sl->gb);
974 my += (unsigned)get_se_golomb(&sl->gb);
975 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
977 fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
981 else if(IS_16X8(mb_type)){
982 for (list = 0; list < sl->list_count; list++) {
985 if(IS_DIR(mb_type, i, list)){
986 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
989 } else if (rc == 2) {
990 val= get_bits1(&sl->gb)^1;
992 val= get_ue_golomb_31(&sl->gb);
994 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
999 val= LIST_NOT_USED&0xFF;
1000 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1003 for (list = 0; list < sl->list_count; list++) {
1006 if(IS_DIR(mb_type, i, list)){
1007 pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1008 mx += (unsigned)get_se_golomb(&sl->gb);
1009 my += (unsigned)get_se_golomb(&sl->gb);
1010 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1012 val= pack16to32(mx,my);
1015 fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1019 av_assert2(IS_8X16(mb_type));
1020 for (list = 0; list < sl->list_count; list++) {
1023 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1024 unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
1027 } else if (rc == 2) {
1028 val= get_bits1(&sl->gb)^1;
1030 val= get_ue_golomb_31(&sl->gb);
1032 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1037 val= LIST_NOT_USED&0xFF;
1038 fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1041 for (list = 0; list < sl->list_count; list++) {
1044 if(IS_DIR(mb_type, i, list)){
1045 pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1046 mx += (unsigned)get_se_golomb(&sl->gb);
1047 my += (unsigned)get_se_golomb(&sl->gb);
1048 ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1050 val= pack16to32(mx,my);
1053 fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1059 if(IS_INTER(mb_type))
1060 write_back_motion(h, sl, mb_type);
1062 if(!IS_INTRA16x16(mb_type)){
1063 cbp= get_ue_golomb(&sl->gb);
1067 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1070 if (IS_INTRA4x4(mb_type))
1071 cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1073 cbp = ff_h264_golomb_to_inter_cbp[cbp];
1076 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1079 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1080 else cbp= golomb_to_inter_cbp_gray[cbp];
1083 if (!decode_chroma && cbp>15) {
1084 av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1085 return AVERROR_INVALIDDATA;
1089 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1090 mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1093 h->cbp_table[mb_xy]= cbp;
1094 h->cur_pic.mb_type[mb_xy] = mb_type;
1096 if(cbp || IS_INTRA16x16(mb_type)){
1097 int i4x4, i8x8, chroma_idx;
1100 GetBitContext *gb = &sl->gb;
1101 const uint8_t *scan, *scan8x8;
1102 const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1104 dquant= get_se_golomb(&sl->gb);
1106 sl->qscale += (unsigned)dquant;
1108 if (((unsigned)sl->qscale) > max_qp){
1109 if (sl->qscale < 0) sl->qscale += max_qp + 1;
1110 else sl->qscale -= max_qp+1;
1111 if (((unsigned)sl->qscale) > max_qp){
1112 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1113 sl->qscale = max_qp;
1118 sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
1119 sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1121 if(IS_INTERLACED(mb_type)){
1122 scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1123 scan = sl->qscale ? h->field_scan : h->field_scan_q0;
1125 scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1126 scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1129 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1132 h->cbp_table[mb_xy] |= ret << 12;
1134 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1137 if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1141 const int num_c8x8 = h->ps.sps->chroma_format_idc;
1144 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1145 if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1146 CHROMA_DC_BLOCK_INDEX + chroma_idx,
1147 CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan,
1148 NULL, 4 * num_c8x8) < 0) {
1154 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1155 const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1156 int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1157 for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1158 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1159 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1160 if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1162 mb += 16 << pixel_shift;
1167 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1168 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1172 fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1173 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1174 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1176 h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1177 write_back_non_zero_count(h, sl);