2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
41 static void free_buffers(VP8Context *s)
45 for (i = 0; i < MAX_THREADS; i++) {
47 pthread_cond_destroy(&s->thread_data[i].cond);
48 pthread_mutex_destroy(&s->thread_data[i].lock);
50 av_freep(&s->thread_data[i].filter_strength);
52 av_freep(&s->thread_data);
53 av_freep(&s->macroblocks_base);
54 av_freep(&s->intra4x4_pred_mode_top);
55 av_freep(&s->top_nnz);
56 av_freep(&s->top_border);
58 s->macroblocks = NULL;
61 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
64 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
65 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
67 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
68 ff_thread_release_buffer(s->avctx, &f->tf);
69 return AVERROR(ENOMEM);
74 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
76 av_buffer_unref(&f->seg_map);
77 ff_thread_release_buffer(s->avctx, &f->tf);
80 #if CONFIG_VP8_DECODER
81 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
85 vp8_release_frame(s, dst);
87 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
90 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
91 vp8_release_frame(s, dst);
92 return AVERROR(ENOMEM);
97 #endif /* CONFIG_VP8_DECODER */
99 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
101 VP8Context *s = avctx->priv_data;
104 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
105 vp8_release_frame(s, &s->frames[i]);
106 memset(s->framep, 0, sizeof(s->framep));
112 static void vp8_decode_flush(AVCodecContext *avctx)
114 vp8_decode_flush_impl(avctx, 0);
117 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
119 VP8Frame *frame = NULL;
122 // find a free buffer
123 for (i = 0; i < 5; i++)
124 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
125 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
126 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
127 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
128 frame = &s->frames[i];
132 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
135 if (frame->tf.f->data[0])
136 vp8_release_frame(s, frame);
141 static av_always_inline
142 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
144 AVCodecContext *avctx = s->avctx;
147 if (width != s->avctx->width ||
148 height != s->avctx->height) {
149 vp8_decode_flush_impl(s->avctx, 1);
151 ret = ff_set_dimensions(s->avctx, width, height);
156 s->mb_width = (s->avctx->coded_width + 15) / 16;
157 s->mb_height = (s->avctx->coded_height + 15) / 16;
159 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
160 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
161 if (!s->mb_layout) { // Frame threading and one thread
162 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
163 sizeof(*s->macroblocks));
164 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
165 } else // Sliced threading
166 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
167 sizeof(*s->macroblocks));
168 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
169 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
170 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
172 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
173 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
175 return AVERROR(ENOMEM);
178 for (i = 0; i < MAX_THREADS; i++) {
179 s->thread_data[i].filter_strength =
180 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
181 if (!s->thread_data[i].filter_strength) {
183 return AVERROR(ENOMEM);
186 pthread_mutex_init(&s->thread_data[i].lock, NULL);
187 pthread_cond_init(&s->thread_data[i].cond, NULL);
191 s->macroblocks = s->macroblocks_base + 1;
196 static int vp7_update_dimensions(VP8Context *s, int width, int height)
198 return update_dimensions(s, width, height, IS_VP7);
201 static int vp8_update_dimensions(VP8Context *s, int width, int height)
203 return update_dimensions(s, width, height, IS_VP8);
206 static void parse_segment_info(VP8Context *s)
208 VP56RangeCoder *c = &s->c;
211 s->segmentation.update_map = vp8_rac_get(c);
213 if (vp8_rac_get(c)) { // update segment feature data
214 s->segmentation.absolute_vals = vp8_rac_get(c);
216 for (i = 0; i < 4; i++)
217 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
219 for (i = 0; i < 4; i++)
220 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
222 if (s->segmentation.update_map)
223 for (i = 0; i < 3; i++)
224 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
227 static void update_lf_deltas(VP8Context *s)
229 VP56RangeCoder *c = &s->c;
232 for (i = 0; i < 4; i++) {
233 if (vp8_rac_get(c)) {
234 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
237 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
241 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
242 if (vp8_rac_get(c)) {
243 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
246 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
251 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
253 const uint8_t *sizes = buf;
256 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
258 buf += 3 * (s->num_coeff_partitions - 1);
259 buf_size -= 3 * (s->num_coeff_partitions - 1);
263 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
264 int size = AV_RL24(sizes + 3 * i);
265 if (buf_size - size < 0)
268 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
272 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
277 static void vp7_get_quants(VP8Context *s)
279 VP56RangeCoder *c = &s->c;
281 int yac_qi = vp8_rac_get_uint(c, 7);
282 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
283 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
284 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
289 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
290 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
291 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
292 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
293 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
296 static void get_quants(VP8Context *s)
298 VP56RangeCoder *c = &s->c;
301 int yac_qi = vp8_rac_get_uint(c, 7);
302 int ydc_delta = vp8_rac_get_sint(c, 4);
303 int y2dc_delta = vp8_rac_get_sint(c, 4);
304 int y2ac_delta = vp8_rac_get_sint(c, 4);
305 int uvdc_delta = vp8_rac_get_sint(c, 4);
306 int uvac_delta = vp8_rac_get_sint(c, 4);
308 for (i = 0; i < 4; i++) {
309 if (s->segmentation.enabled) {
310 base_qi = s->segmentation.base_quant[i];
311 if (!s->segmentation.absolute_vals)
316 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
317 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
318 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
319 /* 101581>>16 is equivalent to 155/100 */
320 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
321 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
322 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
324 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
325 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
330 * Determine which buffers golden and altref should be updated with after this frame.
331 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
333 * Intra frames update all 3 references
334 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
335 * If the update (golden|altref) flag is set, it's updated with the current frame
336 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
337 * If the flag is not set, the number read means:
339 * 1: VP56_FRAME_PREVIOUS
340 * 2: update golden with altref, or update altref with golden
342 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
344 VP56RangeCoder *c = &s->c;
347 return VP56_FRAME_CURRENT;
349 switch (vp8_rac_get_uint(c, 2)) {
351 return VP56_FRAME_PREVIOUS;
353 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
355 return VP56_FRAME_NONE;
358 static void vp78_reset_probability_tables(VP8Context *s)
361 for (i = 0; i < 4; i++)
362 for (j = 0; j < 16; j++)
363 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
364 sizeof(s->prob->token[i][j]));
367 static void vp78_update_probability_tables(VP8Context *s)
369 VP56RangeCoder *c = &s->c;
372 for (i = 0; i < 4; i++)
373 for (j = 0; j < 8; j++)
374 for (k = 0; k < 3; k++)
375 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
376 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
377 int prob = vp8_rac_get_uint(c, 8);
378 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
379 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
383 #define VP7_MVC_SIZE 17
384 #define VP8_MVC_SIZE 19
386 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
389 VP56RangeCoder *c = &s->c;
393 for (i = 0; i < 4; i++)
394 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
396 for (i = 0; i < 3; i++)
397 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
399 // 17.2 MV probability update
400 for (i = 0; i < 2; i++)
401 for (j = 0; j < mvc_size; j++)
402 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
403 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
406 static void update_refs(VP8Context *s)
408 VP56RangeCoder *c = &s->c;
410 int update_golden = vp8_rac_get(c);
411 int update_altref = vp8_rac_get(c);
413 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
414 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
417 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
421 for (j = 1; j < 3; j++) {
422 for (i = 0; i < height / 2; i++)
423 memcpy(dst->data[j] + i * dst->linesize[j],
424 src->data[j] + i * src->linesize[j], width / 2);
428 static void fade(uint8_t *dst, uint8_t *src,
429 int width, int height, ptrdiff_t linesize,
434 for (j = 0; j < height; j++) {
435 for (i = 0; i < width; i++) {
436 uint8_t y = src[j * linesize + i];
437 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
442 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
444 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
445 int beta = (int8_t) vp8_rac_get_uint(c, 8);
448 if (!s->keyframe && (alpha || beta)) {
449 int width = s->mb_width * 16;
450 int height = s->mb_height * 16;
453 if (!s->framep[VP56_FRAME_PREVIOUS])
454 return AVERROR_INVALIDDATA;
457 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
459 /* preserve the golden frame, write a new previous frame */
460 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
461 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
462 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
465 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
467 copy_luma(dst, src, width, height);
470 fade(dst->data[0], src->data[0],
471 width, height, dst->linesize[0], alpha, beta);
477 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
479 VP56RangeCoder *c = &s->c;
480 int part1_size, hscale, vscale, i, j, ret;
481 int width = s->avctx->width;
482 int height = s->avctx->height;
485 return AVERROR_INVALIDDATA;
488 s->profile = (buf[0] >> 1) & 7;
489 if (s->profile > 1) {
490 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
491 return AVERROR_INVALIDDATA;
494 s->keyframe = !(buf[0] & 1);
496 part1_size = AV_RL24(buf) >> 4;
498 buf += 4 - s->profile;
499 buf_size -= 4 - s->profile;
501 if (buf_size < part1_size) {
502 return AVERROR_INVALIDDATA;
505 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
507 ff_vp56_init_range_decoder(c, buf, part1_size);
509 buf_size -= part1_size;
511 /* A. Dimension information (keyframes only) */
513 width = vp8_rac_get_uint(c, 12);
514 height = vp8_rac_get_uint(c, 12);
515 hscale = vp8_rac_get_uint(c, 2);
516 vscale = vp8_rac_get_uint(c, 2);
517 if (hscale || vscale)
518 avpriv_request_sample(s->avctx, "Upscaling");
520 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
521 vp78_reset_probability_tables(s);
522 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
523 sizeof(s->prob->pred16x16));
524 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
525 sizeof(s->prob->pred8x8c));
526 for (i = 0; i < 2; i++)
527 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
528 sizeof(vp7_mv_default_prob[i]));
529 memset(&s->segmentation, 0, sizeof(s->segmentation));
530 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
531 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
534 if (s->keyframe || s->profile > 0)
535 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
537 /* B. Decoding information for all four macroblock-level features */
538 for (i = 0; i < 4; i++) {
539 s->feature_enabled[i] = vp8_rac_get(c);
540 if (s->feature_enabled[i]) {
541 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
543 for (j = 0; j < 3; j++)
544 s->feature_index_prob[i][j] =
545 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
547 if (vp7_feature_value_size[s->profile][i])
548 for (j = 0; j < 4; j++)
549 s->feature_value[i][j] =
550 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
554 s->segmentation.enabled = 0;
555 s->segmentation.update_map = 0;
556 s->lf_delta.enabled = 0;
558 s->num_coeff_partitions = 1;
559 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
561 if (!s->macroblocks_base || /* first frame */
562 width != s->avctx->width || height != s->avctx->height ||
563 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
564 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
568 /* C. Dequantization indices */
571 /* D. Golden frame update flag (a Flag) for interframes only */
573 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
574 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
578 s->update_probabilities = 1;
581 if (s->profile > 0) {
582 s->update_probabilities = vp8_rac_get(c);
583 if (!s->update_probabilities)
584 s->prob[1] = s->prob[0];
587 s->fade_present = vp8_rac_get(c);
590 /* E. Fading information for previous frame */
591 if (s->fade_present && vp8_rac_get(c)) {
592 if ((ret = vp7_fade_frame(s ,c)) < 0)
596 /* F. Loop filter type */
598 s->filter.simple = vp8_rac_get(c);
600 /* G. DCT coefficient ordering specification */
602 for (i = 1; i < 16; i++)
603 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
605 /* H. Loop filter levels */
607 s->filter.simple = vp8_rac_get(c);
608 s->filter.level = vp8_rac_get_uint(c, 6);
609 s->filter.sharpness = vp8_rac_get_uint(c, 3);
611 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
612 vp78_update_probability_tables(s);
614 s->mbskip_enabled = 0;
616 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
618 s->prob->intra = vp8_rac_get_uint(c, 8);
619 s->prob->last = vp8_rac_get_uint(c, 8);
620 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
626 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
628 VP56RangeCoder *c = &s->c;
629 int header_size, hscale, vscale, ret;
630 int width = s->avctx->width;
631 int height = s->avctx->height;
633 s->keyframe = !(buf[0] & 1);
634 s->profile = (buf[0]>>1) & 7;
635 s->invisible = !(buf[0] & 0x10);
636 header_size = AV_RL24(buf) >> 5;
641 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
644 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
645 sizeof(s->put_pixels_tab));
646 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
647 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
648 sizeof(s->put_pixels_tab));
650 if (header_size > buf_size - 7 * s->keyframe) {
651 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
652 return AVERROR_INVALIDDATA;
656 if (AV_RL24(buf) != 0x2a019d) {
657 av_log(s->avctx, AV_LOG_ERROR,
658 "Invalid start code 0x%x\n", AV_RL24(buf));
659 return AVERROR_INVALIDDATA;
661 width = AV_RL16(buf + 3) & 0x3fff;
662 height = AV_RL16(buf + 5) & 0x3fff;
663 hscale = buf[4] >> 6;
664 vscale = buf[6] >> 6;
668 if (hscale || vscale)
669 avpriv_request_sample(s->avctx, "Upscaling");
671 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
672 vp78_reset_probability_tables(s);
673 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
674 sizeof(s->prob->pred16x16));
675 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
676 sizeof(s->prob->pred8x8c));
677 memcpy(s->prob->mvc, vp8_mv_default_prob,
678 sizeof(s->prob->mvc));
679 memset(&s->segmentation, 0, sizeof(s->segmentation));
680 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
683 ff_vp56_init_range_decoder(c, buf, header_size);
685 buf_size -= header_size;
688 s->colorspace = vp8_rac_get(c);
690 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
691 s->fullrange = vp8_rac_get(c);
694 if ((s->segmentation.enabled = vp8_rac_get(c)))
695 parse_segment_info(s);
697 s->segmentation.update_map = 0; // FIXME: move this to some init function?
699 s->filter.simple = vp8_rac_get(c);
700 s->filter.level = vp8_rac_get_uint(c, 6);
701 s->filter.sharpness = vp8_rac_get_uint(c, 3);
703 if ((s->lf_delta.enabled = vp8_rac_get(c)))
707 if (setup_partitions(s, buf, buf_size)) {
708 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
709 return AVERROR_INVALIDDATA;
712 if (!s->macroblocks_base || /* first frame */
713 width != s->avctx->width || height != s->avctx->height)
714 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
721 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
722 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
725 // if we aren't saving this frame's probabilities for future frames,
726 // make a copy of the current probabilities
727 if (!(s->update_probabilities = vp8_rac_get(c)))
728 s->prob[1] = s->prob[0];
730 s->update_last = s->keyframe || vp8_rac_get(c);
732 vp78_update_probability_tables(s);
734 if ((s->mbskip_enabled = vp8_rac_get(c)))
735 s->prob->mbskip = vp8_rac_get_uint(c, 8);
738 s->prob->intra = vp8_rac_get_uint(c, 8);
739 s->prob->last = vp8_rac_get_uint(c, 8);
740 s->prob->golden = vp8_rac_get_uint(c, 8);
741 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
747 static av_always_inline
748 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
750 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
751 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
755 * Motion vector coding, 17.1.
757 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
761 if (vp56_rac_get_prob_branchy(c, p[0])) {
764 for (i = 0; i < 3; i++)
765 x += vp56_rac_get_prob(c, p[9 + i]) << i;
766 for (i = (vp7 ? 7 : 9); i > 3; i--)
767 x += vp56_rac_get_prob(c, p[9 + i]) << i;
768 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
772 const uint8_t *ps = p + 2;
773 bit = vp56_rac_get_prob(c, *ps);
776 bit = vp56_rac_get_prob(c, *ps);
779 x += vp56_rac_get_prob(c, *ps);
782 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
785 static av_always_inline
786 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
789 return vp7_submv_prob;
792 return vp8_submv_prob[4 - !!left];
794 return vp8_submv_prob[2];
795 return vp8_submv_prob[1 - !!left];
799 * Split motion vector prediction, 16.4.
800 * @returns the number of motion vectors parsed (2, 4 or 16)
802 static av_always_inline
803 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
804 int layout, int is_vp7)
808 VP8Macroblock *top_mb;
809 VP8Macroblock *left_mb = &mb[-1];
810 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
811 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
813 VP56mv *left_mv = left_mb->bmv;
814 VP56mv *cur_mv = mb->bmv;
816 if (!layout) // layout is inlined, s->mb_layout is not
819 top_mb = &mb[-s->mb_width - 1];
820 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
821 top_mv = top_mb->bmv;
823 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
824 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
825 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
827 part_idx = VP8_SPLITMVMODE_8x8;
829 part_idx = VP8_SPLITMVMODE_4x4;
832 num = vp8_mbsplit_count[part_idx];
833 mbsplits_cur = vp8_mbsplits[part_idx],
834 firstidx = vp8_mbfirstidx[part_idx];
835 mb->partitioning = part_idx;
837 for (n = 0; n < num; n++) {
839 uint32_t left, above;
840 const uint8_t *submv_prob;
843 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
845 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
847 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
849 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
851 submv_prob = get_submv_prob(left, above, is_vp7);
853 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
854 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
855 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
856 mb->bmv[n].y = mb->mv.y +
857 read_mv_component(c, s->prob->mvc[0], is_vp7);
858 mb->bmv[n].x = mb->mv.x +
859 read_mv_component(c, s->prob->mvc[1], is_vp7);
861 AV_ZERO32(&mb->bmv[n]);
864 AV_WN32A(&mb->bmv[n], above);
867 AV_WN32A(&mb->bmv[n], left);
875 * The vp7 reference decoder uses a padding macroblock column (added to right
876 * edge of the frame) to guard against illegal macroblock offsets. The
877 * algorithm has bugs that permit offsets to straddle the padding column.
878 * This function replicates those bugs.
880 * @param[out] edge_x macroblock x address
881 * @param[out] edge_y macroblock y address
883 * @return macroblock offset legal (boolean)
885 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
886 int xoffset, int yoffset, int boundary,
887 int *edge_x, int *edge_y)
889 int vwidth = mb_width + 1;
890 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
891 if (new < boundary || new % vwidth == vwidth - 1)
893 *edge_y = new / vwidth;
894 *edge_x = new % vwidth;
898 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
900 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
903 static av_always_inline
904 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
905 int mb_x, int mb_y, int layout)
907 VP8Macroblock *mb_edge[12];
908 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
909 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
912 uint8_t cnt[3] = { 0 };
913 VP56RangeCoder *c = &s->c;
916 AV_ZERO32(&near_mv[0]);
917 AV_ZERO32(&near_mv[1]);
918 AV_ZERO32(&near_mv[2]);
920 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
921 const VP7MVPred * pred = &vp7_mv_pred[i];
924 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
925 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
926 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
927 ? s->macroblocks_base + 1 + edge_x +
928 (s->mb_width + 1) * (edge_y + 1)
929 : s->macroblocks + edge_x +
930 (s->mb_height - edge_y - 1) * 2;
931 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
933 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
934 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
936 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
937 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
941 AV_WN32A(&near_mv[CNT_NEAR], mv);
945 AV_WN32A(&near_mv[CNT_NEAREST], mv);
954 cnt[idx] += vp7_mv_pred[i].score;
957 mb->partitioning = VP8_SPLITMVMODE_NONE;
959 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
960 mb->mode = VP8_MVMODE_MV;
962 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
964 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
966 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
967 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
969 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
971 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
972 mb->mode = VP8_MVMODE_SPLIT;
973 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
975 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
976 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
980 mb->mv = near_mv[CNT_NEAR];
984 mb->mv = near_mv[CNT_NEAREST];
988 mb->mode = VP8_MVMODE_ZERO;
994 static av_always_inline
995 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
996 int mb_x, int mb_y, int layout)
998 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1001 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1002 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1004 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1005 int8_t *sign_bias = s->sign_bias;
1007 uint8_t cnt[4] = { 0 };
1008 VP56RangeCoder *c = &s->c;
1010 if (!layout) { // layout is inlined (s->mb_layout is not)
1011 mb_edge[0] = mb + 2;
1012 mb_edge[2] = mb + 1;
1014 mb_edge[0] = mb - s->mb_width - 1;
1015 mb_edge[2] = mb - s->mb_width - 2;
1018 AV_ZERO32(&near_mv[0]);
1019 AV_ZERO32(&near_mv[1]);
1020 AV_ZERO32(&near_mv[2]);
1022 /* Process MB on top, left and top-left */
1023 #define MV_EDGE_CHECK(n) \
1025 VP8Macroblock *edge = mb_edge[n]; \
1026 int edge_ref = edge->ref_frame; \
1027 if (edge_ref != VP56_FRAME_CURRENT) { \
1028 uint32_t mv = AV_RN32A(&edge->mv); \
1030 if (cur_sign_bias != sign_bias[edge_ref]) { \
1031 /* SWAR negate of the values in mv. */ \
1033 mv = ((mv & 0x7fff7fff) + \
1034 0x00010001) ^ (mv & 0x80008000); \
1036 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1037 AV_WN32A(&near_mv[++idx], mv); \
1038 cnt[idx] += 1 + (n != 2); \
1040 cnt[CNT_ZERO] += 1 + (n != 2); \
1048 mb->partitioning = VP8_SPLITMVMODE_NONE;
1049 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1050 mb->mode = VP8_MVMODE_MV;
1052 /* If we have three distinct MVs, merge first and last if they're the same */
1053 if (cnt[CNT_SPLITMV] &&
1054 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1055 cnt[CNT_NEAREST] += 1;
1057 /* Swap near and nearest if necessary */
1058 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1059 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1060 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1063 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1064 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1065 /* Choose the best mv out of 0,0 and the nearest mv */
1066 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1067 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1068 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1069 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1071 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1072 mb->mode = VP8_MVMODE_SPLIT;
1073 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1075 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1076 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1077 mb->bmv[0] = mb->mv;
1080 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1081 mb->bmv[0] = mb->mv;
1084 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1085 mb->bmv[0] = mb->mv;
1088 mb->mode = VP8_MVMODE_ZERO;
1090 mb->bmv[0] = mb->mv;
1094 static av_always_inline
1095 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1096 int mb_x, int keyframe, int layout)
1098 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1101 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1102 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1107 uint8_t *const left = s->intra4x4_pred_mode_left;
1109 top = mb->intra4x4_pred_mode_top;
1111 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1112 for (y = 0; y < 4; y++) {
1113 for (x = 0; x < 4; x++) {
1115 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1116 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1117 left[y] = top[x] = *intra4x4;
1123 for (i = 0; i < 16; i++)
1124 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1125 vp8_pred4x4_prob_inter);
1129 static av_always_inline
1130 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1131 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1133 VP56RangeCoder *c = &s->c;
1134 static const char *vp7_feature_name[] = { "q-index",
1136 "partial-golden-update",
1141 for (i = 0; i < 4; i++) {
1142 if (s->feature_enabled[i]) {
1143 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1144 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1145 s->feature_index_prob[i]);
1146 av_log(s->avctx, AV_LOG_WARNING,
1147 "Feature %s present in macroblock (value 0x%x)\n",
1148 vp7_feature_name[i], s->feature_value[i][index]);
1152 } else if (s->segmentation.update_map)
1153 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1154 else if (s->segmentation.enabled)
1155 *segment = ref ? *ref : *segment;
1156 mb->segment = *segment;
1158 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1161 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1162 vp8_pred16x16_prob_intra);
1164 if (mb->mode == MODE_I4x4) {
1165 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1167 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1168 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1169 if (s->mb_layout == 1)
1170 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1172 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1173 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1176 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1177 vp8_pred8x8c_prob_intra);
1178 mb->ref_frame = VP56_FRAME_CURRENT;
1179 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1181 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1183 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1184 : VP56_FRAME_GOLDEN;
1186 mb->ref_frame = VP56_FRAME_PREVIOUS;
1187 s->ref_count[mb->ref_frame - 1]++;
1189 // motion vectors, 16.3
1191 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1193 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1196 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1198 if (mb->mode == MODE_I4x4)
1199 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1201 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1203 mb->ref_frame = VP56_FRAME_CURRENT;
1204 mb->partitioning = VP8_SPLITMVMODE_NONE;
1205 AV_ZERO32(&mb->bmv[0]);
1210 * @param r arithmetic bitstream reader context
1211 * @param block destination for block coefficients
1212 * @param probs probabilities to use when reading trees from the bitstream
1213 * @param i initial coeff index, 0 unless a separate DC block is coded
1214 * @param qmul array holding the dc/ac dequant factor at position 0/1
1216 * @return 0 if no coeffs were decoded
1217 * otherwise, the index of the last coeff decoded plus one
1219 static av_always_inline
1220 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1221 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1222 int i, uint8_t *token_prob, int16_t qmul[2],
1223 const uint8_t scan[16], int vp7)
1225 VP56RangeCoder c = *r;
1230 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1234 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1236 break; // invalid input; blocks should end with EOB
1237 token_prob = probs[i][0];
1243 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1245 token_prob = probs[i + 1][1];
1247 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1248 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1250 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1254 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1255 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1256 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1257 } else { // DCT_CAT2
1259 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1260 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1262 } else { // DCT_CAT3 and up
1263 int a = vp56_rac_get_prob(&c, token_prob[8]);
1264 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1265 int cat = (a << 1) + b;
1266 coeff = 3 + (8 << cat);
1267 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1270 token_prob = probs[i + 1][2];
1272 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1279 static av_always_inline
1280 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1282 int16_t dc = block[0];
1290 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1291 block[0] = pred[0] = dc;
1296 block[0] = pred[0] = dc;
1302 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1304 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1305 int i, uint8_t *token_prob,
1307 const uint8_t scan[16])
1309 return decode_block_coeffs_internal(r, block, probs, i,
1310 token_prob, qmul, scan, IS_VP7);
1313 #ifndef vp8_decode_block_coeffs_internal
1314 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1316 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1317 int i, uint8_t *token_prob,
1320 return decode_block_coeffs_internal(r, block, probs, i,
1321 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1326 * @param c arithmetic bitstream reader context
1327 * @param block destination for block coefficients
1328 * @param probs probabilities to use when reading trees from the bitstream
1329 * @param i initial coeff index, 0 unless a separate DC block is coded
1330 * @param zero_nhood the initial prediction context for number of surrounding
1331 * all-zero blocks (only left/top, so 0-2)
1332 * @param qmul array holding the dc/ac dequant factor at position 0/1
1334 * @return 0 if no coeffs were decoded
1335 * otherwise, the index of the last coeff decoded plus one
1337 static av_always_inline
1338 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1339 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1340 int i, int zero_nhood, int16_t qmul[2],
1341 const uint8_t scan[16], int vp7)
1343 uint8_t *token_prob = probs[i][zero_nhood];
1344 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1346 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1347 token_prob, qmul, scan)
1348 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1352 static av_always_inline
1353 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1354 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1357 int i, x, y, luma_start = 0, luma_ctx = 3;
1358 int nnz_pred, nnz, nnz_total = 0;
1359 int segment = mb->segment;
1362 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1363 nnz_pred = t_nnz[8] + l_nnz[8];
1365 // decode DC values and do hadamard
1366 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1367 nnz_pred, s->qmat[segment].luma_dc_qmul,
1368 ff_zigzag_scan, is_vp7);
1369 l_nnz[8] = t_nnz[8] = !!nnz;
1371 if (is_vp7 && mb->mode > MODE_I4x4) {
1372 nnz |= inter_predict_dc(td->block_dc,
1373 s->inter_dc_pred[mb->ref_frame - 1]);
1380 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1382 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1389 for (y = 0; y < 4; y++)
1390 for (x = 0; x < 4; x++) {
1391 nnz_pred = l_nnz[y] + t_nnz[x];
1392 nnz = decode_block_coeffs(c, td->block[y][x],
1393 s->prob->token[luma_ctx],
1394 luma_start, nnz_pred,
1395 s->qmat[segment].luma_qmul,
1396 s->prob[0].scan, is_vp7);
1397 /* nnz+block_dc may be one more than the actual last index,
1398 * but we don't care */
1399 td->non_zero_count_cache[y][x] = nnz + block_dc;
1400 t_nnz[x] = l_nnz[y] = !!nnz;
1405 // TODO: what to do about dimensions? 2nd dim for luma is x,
1406 // but for chroma it's (y<<1)|x
1407 for (i = 4; i < 6; i++)
1408 for (y = 0; y < 2; y++)
1409 for (x = 0; x < 2; x++) {
1410 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1411 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1412 s->prob->token[2], 0, nnz_pred,
1413 s->qmat[segment].chroma_qmul,
1414 s->prob[0].scan, is_vp7);
1415 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1416 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1420 // if there were no coded coeffs despite the macroblock not being marked skip,
1421 // we MUST not do the inner loop filter and should not do IDCT
1422 // Since skip isn't used for bitstream prediction, just manually set it.
1427 static av_always_inline
1428 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1429 uint8_t *src_cb, uint8_t *src_cr,
1430 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1432 AV_COPY128(top_border, src_y + 15 * linesize);
1434 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1435 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1439 static av_always_inline
1440 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1441 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1442 int mb_y, int mb_width, int simple, int xchg)
1444 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1446 src_cb -= uvlinesize;
1447 src_cr -= uvlinesize;
1449 #define XCHG(a, b, xchg) \
1457 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1458 XCHG(top_border, src_y, xchg);
1459 XCHG(top_border + 8, src_y + 8, 1);
1460 if (mb_x < mb_width - 1)
1461 XCHG(top_border + 32, src_y + 16, 1);
1463 // only copy chroma for normal loop filter
1464 // or to initialize the top row to 127
1465 if (!simple || !mb_y) {
1466 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1467 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1468 XCHG(top_border + 16, src_cb, 1);
1469 XCHG(top_border + 24, src_cr, 1);
1473 static av_always_inline
1474 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1477 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1479 return mb_y ? mode : LEFT_DC_PRED8x8;
1482 static av_always_inline
1483 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1486 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1488 return mb_y ? mode : HOR_PRED8x8;
1491 static av_always_inline
1492 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1496 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1498 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1500 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1501 case PLANE_PRED8x8: /* TM */
1502 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1507 static av_always_inline
1508 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1511 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1513 return mb_y ? mode : HOR_VP8_PRED;
1517 static av_always_inline
1518 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1519 int *copy_buf, int vp7)
1523 if (!mb_x && mb_y) {
1528 case DIAG_DOWN_LEFT_PRED:
1529 case VERT_LEFT_PRED:
1530 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1538 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1540 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1541 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1542 * as 16x16/8x8 DC */
1543 case DIAG_DOWN_RIGHT_PRED:
1544 case VERT_RIGHT_PRED:
1553 static av_always_inline
1554 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1555 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1557 int x, y, mode, nnz;
1560 /* for the first row, we need to run xchg_mb_border to init the top edge
1561 * to 127 otherwise, skip it if we aren't going to deblock */
1562 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1563 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1564 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1565 s->filter.simple, 1);
1567 if (mb->mode < MODE_I4x4) {
1568 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1569 s->hpc.pred16x16[mode](dst[0], s->linesize);
1571 uint8_t *ptr = dst[0];
1572 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1573 const uint8_t lo = is_vp7 ? 128 : 127;
1574 const uint8_t hi = is_vp7 ? 128 : 129;
1575 uint8_t tr_top[4] = { lo, lo, lo, lo };
1577 // all blocks on the right edge of the macroblock use bottom edge
1578 // the top macroblock for their topright edge
1579 uint8_t *tr_right = ptr - s->linesize + 16;
1581 // if we're on the right edge of the frame, said edge is extended
1582 // from the top macroblock
1583 if (mb_y && mb_x == s->mb_width - 1) {
1584 tr = tr_right[-1] * 0x01010101u;
1585 tr_right = (uint8_t *) &tr;
1589 AV_ZERO128(td->non_zero_count_cache);
1591 for (y = 0; y < 4; y++) {
1592 uint8_t *topright = ptr + 4 - s->linesize;
1593 for (x = 0; x < 4; x++) {
1595 ptrdiff_t linesize = s->linesize;
1596 uint8_t *dst = ptr + 4 * x;
1597 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1599 if ((y == 0 || x == 3) && mb_y == 0) {
1602 topright = tr_right;
1604 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1605 mb_y + y, ©, is_vp7);
1607 dst = copy_dst + 12;
1611 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1613 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1617 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1626 copy_dst[11] = ptr[4 * x - 1];
1627 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1628 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1629 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1632 s->hpc.pred4x4[mode](dst, topright, linesize);
1634 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1635 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1636 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1637 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1640 nnz = td->non_zero_count_cache[y][x];
1643 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1644 td->block[y][x], s->linesize);
1646 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1647 td->block[y][x], s->linesize);
1652 ptr += 4 * s->linesize;
1657 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1658 mb_x, mb_y, is_vp7);
1659 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1660 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1662 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1663 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1664 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1665 s->filter.simple, 0);
1668 static const uint8_t subpel_idx[3][8] = {
1669 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1670 // also function pointer index
1671 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1672 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1678 * @param s VP8 decoding context
1679 * @param dst target buffer for block data at block position
1680 * @param ref reference picture buffer at origin (0, 0)
1681 * @param mv motion vector (relative to block position) to get pixel data from
1682 * @param x_off horizontal position of block from origin (0, 0)
1683 * @param y_off vertical position of block from origin (0, 0)
1684 * @param block_w width of block (16, 8 or 4)
1685 * @param block_h height of block (always same as block_w)
1686 * @param width width of src/dst plane data
1687 * @param height height of src/dst plane data
1688 * @param linesize size of a single line of plane data, including padding
1689 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1691 static av_always_inline
1692 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1693 ThreadFrame *ref, const VP56mv *mv,
1694 int x_off, int y_off, int block_w, int block_h,
1695 int width, int height, ptrdiff_t linesize,
1696 vp8_mc_func mc_func[3][3])
1698 uint8_t *src = ref->f->data[0];
1701 ptrdiff_t src_linesize = linesize;
1703 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1704 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1706 x_off += mv->x >> 2;
1707 y_off += mv->y >> 2;
1710 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1711 src += y_off * linesize + x_off;
1712 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1713 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1714 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1715 src - my_idx * linesize - mx_idx,
1716 EDGE_EMU_LINESIZE, linesize,
1717 block_w + subpel_idx[1][mx],
1718 block_h + subpel_idx[1][my],
1719 x_off - mx_idx, y_off - my_idx,
1721 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1722 src_linesize = EDGE_EMU_LINESIZE;
1724 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1726 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1727 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1728 linesize, block_h, 0, 0);
1733 * chroma MC function
1735 * @param s VP8 decoding context
1736 * @param dst1 target buffer for block data at block position (U plane)
1737 * @param dst2 target buffer for block data at block position (V plane)
1738 * @param ref reference picture buffer at origin (0, 0)
1739 * @param mv motion vector (relative to block position) to get pixel data from
1740 * @param x_off horizontal position of block from origin (0, 0)
1741 * @param y_off vertical position of block from origin (0, 0)
1742 * @param block_w width of block (16, 8 or 4)
1743 * @param block_h height of block (always same as block_w)
1744 * @param width width of src/dst plane data
1745 * @param height height of src/dst plane data
1746 * @param linesize size of a single line of plane data, including padding
1747 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1749 static av_always_inline
1750 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1751 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1752 int x_off, int y_off, int block_w, int block_h,
1753 int width, int height, ptrdiff_t linesize,
1754 vp8_mc_func mc_func[3][3])
1756 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1759 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1760 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1762 x_off += mv->x >> 3;
1763 y_off += mv->y >> 3;
1766 src1 += y_off * linesize + x_off;
1767 src2 += y_off * linesize + x_off;
1768 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1769 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1770 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1771 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1772 src1 - my_idx * linesize - mx_idx,
1773 EDGE_EMU_LINESIZE, linesize,
1774 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1775 x_off - mx_idx, y_off - my_idx, width, height);
1776 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1777 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1779 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1780 src2 - my_idx * linesize - mx_idx,
1781 EDGE_EMU_LINESIZE, linesize,
1782 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1783 x_off - mx_idx, y_off - my_idx, width, height);
1784 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1785 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1787 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1788 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1791 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1792 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1793 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1797 static av_always_inline
1798 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1799 ThreadFrame *ref_frame, int x_off, int y_off,
1800 int bx_off, int by_off, int block_w, int block_h,
1801 int width, int height, VP56mv *mv)
1806 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1807 ref_frame, mv, x_off + bx_off, y_off + by_off,
1808 block_w, block_h, width, height, s->linesize,
1809 s->put_pixels_tab[block_w == 8]);
1812 if (s->profile == 3) {
1813 /* this block only applies VP8; it is safe to check
1814 * only the profile, as VP7 profile <= 1 */
1826 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1827 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1828 &uvmv, x_off + bx_off, y_off + by_off,
1829 block_w, block_h, width, height, s->uvlinesize,
1830 s->put_pixels_tab[1 + (block_w == 4)]);
1833 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1834 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1835 static av_always_inline
1836 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1839 /* Don't prefetch refs that haven't been used very often this frame. */
1840 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1841 int x_off = mb_x << 4, y_off = mb_y << 4;
1842 int mx = (mb->mv.x >> 2) + x_off + 8;
1843 int my = (mb->mv.y >> 2) + y_off;
1844 uint8_t **src = s->framep[ref]->tf.f->data;
1845 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1846 /* For threading, a ff_thread_await_progress here might be useful, but
1847 * it actually slows down the decoder. Since a bad prefetch doesn't
1848 * generate bad decoder output, we don't run it here. */
1849 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1850 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1851 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1856 * Apply motion vectors to prediction buffer, chapter 18.
1858 static av_always_inline
1859 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1860 VP8Macroblock *mb, int mb_x, int mb_y)
1862 int x_off = mb_x << 4, y_off = mb_y << 4;
1863 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1864 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1865 VP56mv *bmv = mb->bmv;
1867 switch (mb->partitioning) {
1868 case VP8_SPLITMVMODE_NONE:
1869 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1870 0, 0, 16, 16, width, height, &mb->mv);
1872 case VP8_SPLITMVMODE_4x4: {
1877 for (y = 0; y < 4; y++) {
1878 for (x = 0; x < 4; x++) {
1879 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1880 ref, &bmv[4 * y + x],
1881 4 * x + x_off, 4 * y + y_off, 4, 4,
1882 width, height, s->linesize,
1883 s->put_pixels_tab[2]);
1892 for (y = 0; y < 2; y++) {
1893 for (x = 0; x < 2; x++) {
1894 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1895 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1896 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1897 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1898 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1899 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1900 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1901 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1902 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1903 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1904 if (s->profile == 3) {
1908 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1909 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1910 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1911 width, height, s->uvlinesize,
1912 s->put_pixels_tab[2]);
1917 case VP8_SPLITMVMODE_16x8:
1918 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1919 0, 0, 16, 8, width, height, &bmv[0]);
1920 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1921 0, 8, 16, 8, width, height, &bmv[1]);
1923 case VP8_SPLITMVMODE_8x16:
1924 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1925 0, 0, 8, 16, width, height, &bmv[0]);
1926 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1927 8, 0, 8, 16, width, height, &bmv[1]);
1929 case VP8_SPLITMVMODE_8x8:
1930 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1931 0, 0, 8, 8, width, height, &bmv[0]);
1932 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1933 8, 0, 8, 8, width, height, &bmv[1]);
1934 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1935 0, 8, 8, 8, width, height, &bmv[2]);
1936 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1937 8, 8, 8, 8, width, height, &bmv[3]);
1942 static av_always_inline
1943 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1947 if (mb->mode != MODE_I4x4) {
1948 uint8_t *y_dst = dst[0];
1949 for (y = 0; y < 4; y++) {
1950 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1952 if (nnz4 & ~0x01010101) {
1953 for (x = 0; x < 4; x++) {
1954 if ((uint8_t) nnz4 == 1)
1955 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1958 else if ((uint8_t) nnz4 > 1)
1959 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1967 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1970 y_dst += 4 * s->linesize;
1974 for (ch = 0; ch < 2; ch++) {
1975 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1977 uint8_t *ch_dst = dst[1 + ch];
1978 if (nnz4 & ~0x01010101) {
1979 for (y = 0; y < 2; y++) {
1980 for (x = 0; x < 2; x++) {
1981 if ((uint8_t) nnz4 == 1)
1982 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1983 td->block[4 + ch][(y << 1) + x],
1985 else if ((uint8_t) nnz4 > 1)
1986 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
1987 td->block[4 + ch][(y << 1) + x],
1991 goto chroma_idct_end;
1993 ch_dst += 4 * s->uvlinesize;
1996 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2004 static av_always_inline
2005 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2006 VP8FilterStrength *f, int is_vp7)
2008 int interior_limit, filter_level;
2010 if (s->segmentation.enabled) {
2011 filter_level = s->segmentation.filter_level[mb->segment];
2012 if (!s->segmentation.absolute_vals)
2013 filter_level += s->filter.level;
2015 filter_level = s->filter.level;
2017 if (s->lf_delta.enabled) {
2018 filter_level += s->lf_delta.ref[mb->ref_frame];
2019 filter_level += s->lf_delta.mode[mb->mode];
2022 filter_level = av_clip_uintp2(filter_level, 6);
2024 interior_limit = filter_level;
2025 if (s->filter.sharpness) {
2026 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2027 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2029 interior_limit = FFMAX(interior_limit, 1);
2031 f->filter_level = filter_level;
2032 f->inner_limit = interior_limit;
2033 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2034 mb->mode == VP8_MVMODE_SPLIT;
2037 static av_always_inline
2038 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2039 int mb_x, int mb_y, int is_vp7)
2041 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2042 int filter_level = f->filter_level;
2043 int inner_limit = f->inner_limit;
2044 int inner_filter = f->inner_filter;
2045 ptrdiff_t linesize = s->linesize;
2046 ptrdiff_t uvlinesize = s->uvlinesize;
2047 static const uint8_t hev_thresh_lut[2][64] = {
2048 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2049 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2050 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2052 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2053 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2054 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2062 bedge_lim_y = filter_level;
2063 bedge_lim_uv = filter_level * 2;
2064 mbedge_lim = filter_level + 2;
2067 bedge_lim_uv = filter_level * 2 + inner_limit;
2068 mbedge_lim = bedge_lim_y + 4;
2071 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2074 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2075 mbedge_lim, inner_limit, hev_thresh);
2076 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2077 mbedge_lim, inner_limit, hev_thresh);
2080 #define H_LOOP_FILTER_16Y_INNER(cond) \
2081 if (cond && inner_filter) { \
2082 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2083 bedge_lim_y, inner_limit, \
2085 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2086 bedge_lim_y, inner_limit, \
2088 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2089 bedge_lim_y, inner_limit, \
2091 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2092 uvlinesize, bedge_lim_uv, \
2093 inner_limit, hev_thresh); \
2096 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2099 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2100 mbedge_lim, inner_limit, hev_thresh);
2101 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2102 mbedge_lim, inner_limit, hev_thresh);
2106 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2107 linesize, bedge_lim_y,
2108 inner_limit, hev_thresh);
2109 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2110 linesize, bedge_lim_y,
2111 inner_limit, hev_thresh);
2112 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2113 linesize, bedge_lim_y,
2114 inner_limit, hev_thresh);
2115 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2116 dst[2] + 4 * uvlinesize,
2117 uvlinesize, bedge_lim_uv,
2118 inner_limit, hev_thresh);
2121 H_LOOP_FILTER_16Y_INNER(is_vp7)
2124 static av_always_inline
2125 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2128 int mbedge_lim, bedge_lim;
2129 int filter_level = f->filter_level;
2130 int inner_limit = f->inner_limit;
2131 int inner_filter = f->inner_filter;
2132 ptrdiff_t linesize = s->linesize;
2137 bedge_lim = 2 * filter_level + inner_limit;
2138 mbedge_lim = bedge_lim + 4;
2141 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2143 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2144 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2145 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2149 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2151 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2152 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2153 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2157 #define MARGIN (16 << 2)
2158 static av_always_inline
2159 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2160 VP8Frame *prev_frame, int is_vp7)
2162 VP8Context *s = avctx->priv_data;
2165 s->mv_min.y = -MARGIN;
2166 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2167 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2168 VP8Macroblock *mb = s->macroblocks_base +
2169 ((s->mb_width + 1) * (mb_y + 1) + 1);
2170 int mb_xy = mb_y * s->mb_width;
2172 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2174 s->mv_min.x = -MARGIN;
2175 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2176 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2178 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2179 DC_PRED * 0x01010101);
2180 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2181 prev_frame && prev_frame->seg_map ?
2182 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2191 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2192 VP8Frame *prev_frame)
2194 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2197 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2198 VP8Frame *prev_frame)
2200 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2204 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2206 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2207 if (otd->thread_mb_pos < tmp) { \
2208 pthread_mutex_lock(&otd->lock); \
2209 td->wait_mb_pos = tmp; \
2211 if (otd->thread_mb_pos >= tmp) \
2213 pthread_cond_wait(&otd->cond, &otd->lock); \
2215 td->wait_mb_pos = INT_MAX; \
2216 pthread_mutex_unlock(&otd->lock); \
2220 #define update_pos(td, mb_y, mb_x) \
2222 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2223 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2225 int is_null = !next_td || !prev_td; \
2226 int pos_check = (is_null) ? 1 \
2227 : (next_td != td && \
2228 pos >= next_td->wait_mb_pos) || \
2230 pos >= prev_td->wait_mb_pos); \
2231 td->thread_mb_pos = pos; \
2232 if (sliced_threading && pos_check) { \
2233 pthread_mutex_lock(&td->lock); \
2234 pthread_cond_broadcast(&td->cond); \
2235 pthread_mutex_unlock(&td->lock); \
2239 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2240 #define update_pos(td, mb_y, mb_x)
2243 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2244 int jobnr, int threadnr, int is_vp7)
2246 VP8Context *s = avctx->priv_data;
2247 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2248 int mb_y = td->thread_mb_pos >> 16;
2249 int mb_x, mb_xy = mb_y * s->mb_width;
2250 int num_jobs = s->num_jobs;
2251 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2252 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2255 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2256 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2257 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2262 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2263 if (mb_y == s->mb_height - 1)
2266 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2267 if (s->mb_layout == 1)
2268 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2270 // Make sure the previous frame has read its segmentation map,
2271 // if we re-use the same map.
2272 if (prev_frame && s->segmentation.enabled &&
2273 !s->segmentation.update_map)
2274 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2275 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2276 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2277 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2280 if (!is_vp7 || mb_y == 0)
2281 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2283 s->mv_min.x = -MARGIN;
2284 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2286 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2287 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2288 if (prev_td != td) {
2289 if (threadnr != 0) {
2290 check_thread_pos(td, prev_td,
2291 mb_x + (is_vp7 ? 2 : 1),
2292 mb_y - (is_vp7 ? 2 : 1));
2294 check_thread_pos(td, prev_td,
2295 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2296 mb_y - (is_vp7 ? 2 : 1));
2300 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2302 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2303 dst[2] - dst[1], 2);
2306 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2307 prev_frame && prev_frame->seg_map ?
2308 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2310 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2313 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2315 if (mb->mode <= MODE_I4x4)
2316 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2318 inter_predict(s, td, dst, mb, mb_x, mb_y);
2320 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2323 idct_mb(s, td, dst, mb);
2325 AV_ZERO64(td->left_nnz);
2326 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2328 /* Reset DC block predictors if they would exist
2329 * if the mb had coefficients */
2330 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2331 td->left_nnz[8] = 0;
2332 s->top_nnz[mb_x][8] = 0;
2336 if (s->deblock_filter)
2337 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2339 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2340 if (s->filter.simple)
2341 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2342 NULL, NULL, s->linesize, 0, 1);
2344 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2345 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2348 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2356 if (mb_x == s->mb_width + 1) {
2357 update_pos(td, mb_y, s->mb_width + 3);
2359 update_pos(td, mb_y, mb_x);
2364 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2365 int jobnr, int threadnr, int is_vp7)
2367 VP8Context *s = avctx->priv_data;
2368 VP8ThreadData *td = &s->thread_data[threadnr];
2369 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2370 AVFrame *curframe = s->curframe->tf.f;
2372 VP8ThreadData *prev_td, *next_td;
2374 curframe->data[0] + 16 * mb_y * s->linesize,
2375 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2376 curframe->data[2] + 8 * mb_y * s->uvlinesize
2379 if (s->mb_layout == 1)
2380 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2382 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2387 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2388 if (mb_y == s->mb_height - 1)
2391 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2393 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2394 VP8FilterStrength *f = &td->filter_strength[mb_x];
2396 check_thread_pos(td, prev_td,
2397 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2399 if (next_td != &s->thread_data[0])
2400 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2402 if (num_jobs == 1) {
2403 if (s->filter.simple)
2404 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2405 NULL, NULL, s->linesize, 0, 1);
2407 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2408 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2411 if (s->filter.simple)
2412 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2414 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2419 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2423 static av_always_inline
2424 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2425 int threadnr, int is_vp7)
2427 VP8Context *s = avctx->priv_data;
2428 VP8ThreadData *td = &s->thread_data[jobnr];
2429 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2430 VP8Frame *curframe = s->curframe;
2431 int mb_y, num_jobs = s->num_jobs;
2433 td->thread_nr = threadnr;
2434 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2435 if (mb_y >= s->mb_height)
2437 td->thread_mb_pos = mb_y << 16;
2438 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2439 if (s->deblock_filter)
2440 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2441 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2446 if (avctx->active_thread_type == FF_THREAD_FRAME)
2447 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2453 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2454 int jobnr, int threadnr)
2456 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2459 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2460 int jobnr, int threadnr)
2462 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2466 static av_always_inline
2467 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2468 AVPacket *avpkt, int is_vp7)
2470 VP8Context *s = avctx->priv_data;
2471 int ret, i, referenced, num_jobs;
2472 enum AVDiscard skip_thresh;
2473 VP8Frame *av_uninit(curframe), *prev_frame;
2476 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2478 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2483 prev_frame = s->framep[VP56_FRAME_CURRENT];
2485 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2486 s->update_altref == VP56_FRAME_CURRENT;
2488 skip_thresh = !referenced ? AVDISCARD_NONREF
2489 : !s->keyframe ? AVDISCARD_NONKEY
2492 if (avctx->skip_frame >= skip_thresh) {
2494 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2497 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2499 // release no longer referenced frames
2500 for (i = 0; i < 5; i++)
2501 if (s->frames[i].tf.f->data[0] &&
2502 &s->frames[i] != prev_frame &&
2503 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2504 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2505 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2506 vp8_release_frame(s, &s->frames[i]);
2508 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2511 avctx->colorspace = AVCOL_SPC_BT470BG;
2513 avctx->color_range = AVCOL_RANGE_JPEG;
2515 avctx->color_range = AVCOL_RANGE_MPEG;
2517 /* Given that arithmetic probabilities are updated every frame, it's quite
2518 * likely that the values we have on a random interframe are complete
2519 * junk if we didn't start decode on a keyframe. So just don't display
2520 * anything rather than junk. */
2521 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2522 !s->framep[VP56_FRAME_GOLDEN] ||
2523 !s->framep[VP56_FRAME_GOLDEN2])) {
2524 av_log(avctx, AV_LOG_WARNING,
2525 "Discarding interframe without a prior keyframe!\n");
2526 ret = AVERROR_INVALIDDATA;
2530 curframe->tf.f->key_frame = s->keyframe;
2531 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2532 : AV_PICTURE_TYPE_P;
2533 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2534 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2538 // check if golden and altref are swapped
2539 if (s->update_altref != VP56_FRAME_NONE)
2540 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2542 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2544 if (s->update_golden != VP56_FRAME_NONE)
2545 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2547 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2550 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2552 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2554 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2556 ff_thread_finish_setup(avctx);
2558 s->linesize = curframe->tf.f->linesize[0];
2559 s->uvlinesize = curframe->tf.f->linesize[1];
2561 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2562 /* Zero macroblock structures for top/top-left prediction
2563 * from outside the frame. */
2565 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2566 (s->mb_width + 1) * sizeof(*s->macroblocks));
2567 if (!s->mb_layout && s->keyframe)
2568 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2570 memset(s->ref_count, 0, sizeof(s->ref_count));
2572 if (s->mb_layout == 1) {
2573 // Make sure the previous frame has read its segmentation map,
2574 // if we re-use the same map.
2575 if (prev_frame && s->segmentation.enabled &&
2576 !s->segmentation.update_map)
2577 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2579 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2581 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2584 if (avctx->active_thread_type == FF_THREAD_FRAME)
2587 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2588 s->num_jobs = num_jobs;
2589 s->curframe = curframe;
2590 s->prev_frame = prev_frame;
2591 s->mv_min.y = -MARGIN;
2592 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2593 for (i = 0; i < MAX_THREADS; i++) {
2594 s->thread_data[i].thread_mb_pos = 0;
2595 s->thread_data[i].wait_mb_pos = INT_MAX;
2598 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2601 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2604 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2605 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2608 // if future frames don't use the updated probabilities,
2609 // reset them to the values we saved
2610 if (!s->update_probabilities)
2611 s->prob[0] = s->prob[1];
2613 if (!s->invisible) {
2614 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2621 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2625 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2628 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2631 #if CONFIG_VP7_DECODER
2632 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2635 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2637 #endif /* CONFIG_VP7_DECODER */
2639 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2641 VP8Context *s = avctx->priv_data;
2644 vp8_decode_flush_impl(avctx, 1);
2645 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2646 av_frame_free(&s->frames[i].tf.f);
2651 static av_cold int vp8_init_frames(VP8Context *s)
2654 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2655 s->frames[i].tf.f = av_frame_alloc();
2656 if (!s->frames[i].tf.f)
2657 return AVERROR(ENOMEM);
2662 static av_always_inline
2663 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2665 VP8Context *s = avctx->priv_data;
2669 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2670 avctx->internal->allocate_progress = 1;
2672 ff_videodsp_init(&s->vdsp, 8);
2674 ff_vp78dsp_init(&s->vp8dsp);
2675 if (CONFIG_VP7_DECODER && is_vp7) {
2676 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2677 ff_vp7dsp_init(&s->vp8dsp);
2678 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2679 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2680 ff_vp8dsp_init(&s->vp8dsp);
2683 /* does not change for VP8 */
2684 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2686 if ((ret = vp8_init_frames(s)) < 0) {
2687 ff_vp8_decode_free(avctx);
2694 #if CONFIG_VP7_DECODER
2695 static int vp7_decode_init(AVCodecContext *avctx)
2697 return vp78_decode_init(avctx, IS_VP7);
2699 #endif /* CONFIG_VP7_DECODER */
2701 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2703 return vp78_decode_init(avctx, IS_VP8);
2706 #if CONFIG_VP8_DECODER
2707 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2709 VP8Context *s = avctx->priv_data;
2714 if ((ret = vp8_init_frames(s)) < 0) {
2715 ff_vp8_decode_free(avctx);
2722 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2724 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2725 const AVCodecContext *src)
2727 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2730 if (s->macroblocks_base &&
2731 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2733 s->mb_width = s_src->mb_width;
2734 s->mb_height = s_src->mb_height;
2737 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2738 s->segmentation = s_src->segmentation;
2739 s->lf_delta = s_src->lf_delta;
2740 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2742 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2743 if (s_src->frames[i].tf.f->data[0]) {
2744 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2750 s->framep[0] = REBASE(s_src->next_framep[0]);
2751 s->framep[1] = REBASE(s_src->next_framep[1]);
2752 s->framep[2] = REBASE(s_src->next_framep[2]);
2753 s->framep[3] = REBASE(s_src->next_framep[3]);
2757 #endif /* CONFIG_VP8_DECODER */
2759 #if CONFIG_VP7_DECODER
2760 AVCodec ff_vp7_decoder = {
2762 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2763 .type = AVMEDIA_TYPE_VIDEO,
2764 .id = AV_CODEC_ID_VP7,
2765 .priv_data_size = sizeof(VP8Context),
2766 .init = vp7_decode_init,
2767 .close = ff_vp8_decode_free,
2768 .decode = vp7_decode_frame,
2769 .capabilities = AV_CODEC_CAP_DR1,
2770 .flush = vp8_decode_flush,
2772 #endif /* CONFIG_VP7_DECODER */
2774 #if CONFIG_VP8_DECODER
2775 AVCodec ff_vp8_decoder = {
2777 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2778 .type = AVMEDIA_TYPE_VIDEO,
2779 .id = AV_CODEC_ID_VP8,
2780 .priv_data_size = sizeof(VP8Context),
2781 .init = ff_vp8_decode_init,
2782 .close = ff_vp8_decode_free,
2783 .decode = ff_vp8_decode_frame,
2784 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2785 AV_CODEC_CAP_SLICE_THREADS,
2786 .flush = vp8_decode_flush,
2787 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2788 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2790 #endif /* CONFIG_VP7_DECODER */