2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "config_components.h"
29 #include "libavutil/imgutils.h"
30 #include "libavutil/mem_internal.h"
33 #include "codec_internal.h"
37 #include "rectangle.h"
39 #include "threadframe.h"
47 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
48 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
49 #elif CONFIG_VP7_DECODER
50 #define VPX(vp7, f) vp7_ ## f
51 #else // CONFIG_VP8_DECODER
52 #define VPX(vp7, f) vp8_ ## f
55 static void free_buffers(VP8Context *s)
59 for (i = 0; i < MAX_THREADS; i++) {
61 pthread_cond_destroy(&s->thread_data[i].cond);
62 pthread_mutex_destroy(&s->thread_data[i].lock);
64 av_freep(&s->thread_data[i].filter_strength);
66 av_freep(&s->thread_data);
67 av_freep(&s->macroblocks_base);
68 av_freep(&s->intra4x4_pred_mode_top);
69 av_freep(&s->top_nnz);
70 av_freep(&s->top_border);
72 s->macroblocks = NULL;
75 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
78 if ((ret = ff_thread_get_ext_buffer(s->avctx, &f->tf,
79 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
81 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
83 if (s->avctx->hwaccel) {
84 const AVHWAccel *hwaccel = s->avctx->hwaccel;
85 if (hwaccel->frame_priv_data_size) {
86 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
87 if (!f->hwaccel_priv_buf)
89 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
95 av_buffer_unref(&f->seg_map);
96 ff_thread_release_ext_buffer(s->avctx, &f->tf);
97 return AVERROR(ENOMEM);
100 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
102 av_buffer_unref(&f->seg_map);
103 av_buffer_unref(&f->hwaccel_priv_buf);
104 f->hwaccel_picture_private = NULL;
105 ff_thread_release_ext_buffer(s->avctx, &f->tf);
108 #if CONFIG_VP8_DECODER
109 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
113 vp8_release_frame(s, dst);
115 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
118 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
119 vp8_release_frame(s, dst);
120 return AVERROR(ENOMEM);
122 if (src->hwaccel_picture_private) {
123 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
124 if (!dst->hwaccel_priv_buf)
125 return AVERROR(ENOMEM);
126 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
131 #endif /* CONFIG_VP8_DECODER */
133 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
135 VP8Context *s = avctx->priv_data;
138 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
139 vp8_release_frame(s, &s->frames[i]);
140 memset(s->framep, 0, sizeof(s->framep));
146 static void vp8_decode_flush(AVCodecContext *avctx)
148 vp8_decode_flush_impl(avctx, 0);
151 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
153 VP8Frame *frame = NULL;
156 // find a free buffer
157 for (i = 0; i < 5; i++)
158 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
159 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
160 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
161 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
162 frame = &s->frames[i];
166 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
169 if (frame->tf.f->buf[0])
170 vp8_release_frame(s, frame);
175 static enum AVPixelFormat get_pixel_format(VP8Context *s)
177 enum AVPixelFormat pix_fmts[] = {
178 #if CONFIG_VP8_VAAPI_HWACCEL
181 #if CONFIG_VP8_NVDEC_HWACCEL
188 return ff_get_format(s->avctx, pix_fmts);
191 static av_always_inline
192 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
194 AVCodecContext *avctx = s->avctx;
195 int i, ret, dim_reset = 0;
197 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
198 height != s->avctx->height) {
199 vp8_decode_flush_impl(s->avctx, 1);
201 ret = ff_set_dimensions(s->avctx, width, height);
205 dim_reset = (s->macroblocks_base != NULL);
208 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
209 !s->actually_webp && !is_vp7) {
210 s->pix_fmt = get_pixel_format(s);
212 return AVERROR(EINVAL);
213 avctx->pix_fmt = s->pix_fmt;
216 s->mb_width = (s->avctx->coded_width + 15) / 16;
217 s->mb_height = (s->avctx->coded_height + 15) / 16;
219 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
220 avctx->thread_count > 1;
221 if (!s->mb_layout) { // Frame threading and one thread
222 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
223 sizeof(*s->macroblocks));
224 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
225 } else // Sliced threading
226 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
227 sizeof(*s->macroblocks));
228 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
229 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
230 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
232 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
233 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
235 return AVERROR(ENOMEM);
238 for (i = 0; i < MAX_THREADS; i++) {
239 s->thread_data[i].filter_strength =
240 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
241 if (!s->thread_data[i].filter_strength) {
243 return AVERROR(ENOMEM);
246 pthread_mutex_init(&s->thread_data[i].lock, NULL);
247 pthread_cond_init(&s->thread_data[i].cond, NULL);
251 s->macroblocks = s->macroblocks_base + 1;
256 static int vp7_update_dimensions(VP8Context *s, int width, int height)
258 return update_dimensions(s, width, height, IS_VP7);
261 static int vp8_update_dimensions(VP8Context *s, int width, int height)
263 return update_dimensions(s, width, height, IS_VP8);
267 static void parse_segment_info(VP8Context *s)
269 VP56RangeCoder *c = &s->c;
272 s->segmentation.update_map = vp8_rac_get(c);
273 s->segmentation.update_feature_data = vp8_rac_get(c);
275 if (s->segmentation.update_feature_data) {
276 s->segmentation.absolute_vals = vp8_rac_get(c);
278 for (i = 0; i < 4; i++)
279 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
281 for (i = 0; i < 4; i++)
282 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
284 if (s->segmentation.update_map)
285 for (i = 0; i < 3; i++)
286 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
289 static void update_lf_deltas(VP8Context *s)
291 VP56RangeCoder *c = &s->c;
294 for (i = 0; i < 4; i++) {
295 if (vp8_rac_get(c)) {
296 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
299 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
303 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
304 if (vp8_rac_get(c)) {
305 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
308 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
313 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
315 const uint8_t *sizes = buf;
319 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
321 buf += 3 * (s->num_coeff_partitions - 1);
322 buf_size -= 3 * (s->num_coeff_partitions - 1);
326 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
327 int size = AV_RL24(sizes + 3 * i);
328 if (buf_size - size < 0)
330 s->coeff_partition_size[i] = size;
332 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
339 s->coeff_partition_size[i] = buf_size;
340 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
345 static void vp7_get_quants(VP8Context *s)
347 VP56RangeCoder *c = &s->c;
349 int yac_qi = vp8_rac_get_uint(c, 7);
350 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
351 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
352 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
353 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
354 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
356 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
357 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
358 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
359 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
360 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
361 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
364 static void vp8_get_quants(VP8Context *s)
366 VP56RangeCoder *c = &s->c;
369 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
370 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
371 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
372 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
373 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
374 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
376 for (i = 0; i < 4; i++) {
377 if (s->segmentation.enabled) {
378 base_qi = s->segmentation.base_quant[i];
379 if (!s->segmentation.absolute_vals)
380 base_qi += s->quant.yac_qi;
382 base_qi = s->quant.yac_qi;
384 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
385 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
386 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
387 /* 101581>>16 is equivalent to 155/100 */
388 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
389 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
390 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
392 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
393 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
398 * Determine which buffers golden and altref should be updated with after this frame.
399 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
401 * Intra frames update all 3 references
402 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
403 * If the update (golden|altref) flag is set, it's updated with the current frame
404 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
405 * If the flag is not set, the number read means:
407 * 1: VP56_FRAME_PREVIOUS
408 * 2: update golden with altref, or update altref with golden
410 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
412 VP56RangeCoder *c = &s->c;
415 return VP56_FRAME_CURRENT;
417 switch (vp8_rac_get_uint(c, 2)) {
419 return VP56_FRAME_PREVIOUS;
421 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
423 return VP56_FRAME_NONE;
426 static void vp78_reset_probability_tables(VP8Context *s)
429 for (i = 0; i < 4; i++)
430 for (j = 0; j < 16; j++)
431 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
432 sizeof(s->prob->token[i][j]));
435 static void vp78_update_probability_tables(VP8Context *s)
437 VP56RangeCoder *c = &s->c;
440 for (i = 0; i < 4; i++)
441 for (j = 0; j < 8; j++)
442 for (k = 0; k < 3; k++)
443 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
444 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
445 int prob = vp8_rac_get_uint(c, 8);
446 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
447 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
451 #define VP7_MVC_SIZE 17
452 #define VP8_MVC_SIZE 19
454 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
457 VP56RangeCoder *c = &s->c;
461 for (i = 0; i < 4; i++)
462 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
464 for (i = 0; i < 3; i++)
465 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
467 // 17.2 MV probability update
468 for (i = 0; i < 2; i++)
469 for (j = 0; j < mvc_size; j++)
470 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
471 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
474 static void update_refs(VP8Context *s)
476 VP56RangeCoder *c = &s->c;
478 int update_golden = vp8_rac_get(c);
479 int update_altref = vp8_rac_get(c);
481 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
482 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
485 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
489 for (j = 1; j < 3; j++) {
490 for (i = 0; i < height / 2; i++)
491 memcpy(dst->data[j] + i * dst->linesize[j],
492 src->data[j] + i * src->linesize[j], width / 2);
496 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
497 const uint8_t *src, ptrdiff_t src_linesize,
498 int width, int height,
502 for (j = 0; j < height; j++) {
503 const uint8_t *src2 = src + j * src_linesize;
504 uint8_t *dst2 = dst + j * dst_linesize;
505 for (i = 0; i < width; i++) {
507 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
512 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
516 if (!s->keyframe && (alpha || beta)) {
517 int width = s->mb_width * 16;
518 int height = s->mb_height * 16;
521 if (!s->framep[VP56_FRAME_PREVIOUS] ||
522 !s->framep[VP56_FRAME_GOLDEN]) {
523 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
524 return AVERROR_INVALIDDATA;
528 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
530 /* preserve the golden frame, write a new previous frame */
531 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
532 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
533 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
536 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
538 copy_chroma(dst, src, width, height);
541 fade(dst->data[0], dst->linesize[0],
542 src->data[0], src->linesize[0],
543 width, height, alpha, beta);
549 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
551 VP56RangeCoder *c = &s->c;
552 int part1_size, hscale, vscale, i, j, ret;
553 int width = s->avctx->width;
554 int height = s->avctx->height;
559 return AVERROR_INVALIDDATA;
562 s->profile = (buf[0] >> 1) & 7;
563 if (s->profile > 1) {
564 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
565 return AVERROR_INVALIDDATA;
568 s->keyframe = !(buf[0] & 1);
570 part1_size = AV_RL24(buf) >> 4;
572 if (buf_size < 4 - s->profile + part1_size) {
573 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
574 return AVERROR_INVALIDDATA;
577 buf += 4 - s->profile;
578 buf_size -= 4 - s->profile;
580 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
582 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
586 buf_size -= part1_size;
588 /* A. Dimension information (keyframes only) */
590 width = vp8_rac_get_uint(c, 12);
591 height = vp8_rac_get_uint(c, 12);
592 hscale = vp8_rac_get_uint(c, 2);
593 vscale = vp8_rac_get_uint(c, 2);
594 if (hscale || vscale)
595 avpriv_request_sample(s->avctx, "Upscaling");
597 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
598 vp78_reset_probability_tables(s);
599 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
600 sizeof(s->prob->pred16x16));
601 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
602 sizeof(s->prob->pred8x8c));
603 for (i = 0; i < 2; i++)
604 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
605 sizeof(vp7_mv_default_prob[i]));
606 memset(&s->segmentation, 0, sizeof(s->segmentation));
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
608 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
611 if (s->keyframe || s->profile > 0)
612 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
614 /* B. Decoding information for all four macroblock-level features */
615 for (i = 0; i < 4; i++) {
616 s->feature_enabled[i] = vp8_rac_get(c);
617 if (s->feature_enabled[i]) {
618 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
620 for (j = 0; j < 3; j++)
621 s->feature_index_prob[i][j] =
622 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
624 if (vp7_feature_value_size[s->profile][i])
625 for (j = 0; j < 4; j++)
626 s->feature_value[i][j] =
627 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
631 s->segmentation.enabled = 0;
632 s->segmentation.update_map = 0;
633 s->lf_delta.enabled = 0;
635 s->num_coeff_partitions = 1;
636 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
640 if (!s->macroblocks_base || /* first frame */
641 width != s->avctx->width || height != s->avctx->height ||
642 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
643 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
647 /* C. Dequantization indices */
650 /* D. Golden frame update flag (a Flag) for interframes only */
652 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
653 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
657 s->update_probabilities = 1;
660 if (s->profile > 0) {
661 s->update_probabilities = vp8_rac_get(c);
662 if (!s->update_probabilities)
663 s->prob[1] = s->prob[0];
666 s->fade_present = vp8_rac_get(c);
669 if (vpX_rac_is_end(c))
670 return AVERROR_INVALIDDATA;
671 /* E. Fading information for previous frame */
672 if (s->fade_present && vp8_rac_get(c)) {
673 alpha = (int8_t) vp8_rac_get_uint(c, 8);
674 beta = (int8_t) vp8_rac_get_uint(c, 8);
677 /* F. Loop filter type */
679 s->filter.simple = vp8_rac_get(c);
681 /* G. DCT coefficient ordering specification */
683 for (i = 1; i < 16; i++)
684 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
686 /* H. Loop filter levels */
688 s->filter.simple = vp8_rac_get(c);
689 s->filter.level = vp8_rac_get_uint(c, 6);
690 s->filter.sharpness = vp8_rac_get_uint(c, 3);
692 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
693 vp78_update_probability_tables(s);
695 s->mbskip_enabled = 0;
697 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
699 s->prob->intra = vp8_rac_get_uint(c, 8);
700 s->prob->last = vp8_rac_get_uint(c, 8);
701 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
704 if (vpX_rac_is_end(c))
705 return AVERROR_INVALIDDATA;
707 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
713 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
715 VP56RangeCoder *c = &s->c;
716 int header_size, hscale, vscale, ret;
717 int width = s->avctx->width;
718 int height = s->avctx->height;
721 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
722 return AVERROR_INVALIDDATA;
725 s->keyframe = !(buf[0] & 1);
726 s->profile = (buf[0]>>1) & 7;
727 s->invisible = !(buf[0] & 0x10);
728 header_size = AV_RL24(buf) >> 5;
732 s->header_partition_size = header_size;
735 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
738 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
739 sizeof(s->put_pixels_tab));
740 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
741 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
742 sizeof(s->put_pixels_tab));
744 if (header_size > buf_size - 7 * s->keyframe) {
745 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
746 return AVERROR_INVALIDDATA;
750 if (AV_RL24(buf) != 0x2a019d) {
751 av_log(s->avctx, AV_LOG_ERROR,
752 "Invalid start code 0x%x\n", AV_RL24(buf));
753 return AVERROR_INVALIDDATA;
755 width = AV_RL16(buf + 3) & 0x3fff;
756 height = AV_RL16(buf + 5) & 0x3fff;
757 hscale = buf[4] >> 6;
758 vscale = buf[6] >> 6;
762 if (hscale || vscale)
763 avpriv_request_sample(s->avctx, "Upscaling");
765 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
766 vp78_reset_probability_tables(s);
767 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
768 sizeof(s->prob->pred16x16));
769 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
770 sizeof(s->prob->pred8x8c));
771 memcpy(s->prob->mvc, vp8_mv_default_prob,
772 sizeof(s->prob->mvc));
773 memset(&s->segmentation, 0, sizeof(s->segmentation));
774 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
777 ret = ff_vp56_init_range_decoder(c, buf, header_size);
781 buf_size -= header_size;
784 s->colorspace = vp8_rac_get(c);
786 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
787 s->fullrange = vp8_rac_get(c);
790 if ((s->segmentation.enabled = vp8_rac_get(c)))
791 parse_segment_info(s);
793 s->segmentation.update_map = 0; // FIXME: move this to some init function?
795 s->filter.simple = vp8_rac_get(c);
796 s->filter.level = vp8_rac_get_uint(c, 6);
797 s->filter.sharpness = vp8_rac_get_uint(c, 3);
799 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
800 s->lf_delta.update = vp8_rac_get(c);
801 if (s->lf_delta.update)
805 if (setup_partitions(s, buf, buf_size)) {
806 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
807 return AVERROR_INVALIDDATA;
810 if (!s->macroblocks_base || /* first frame */
811 width != s->avctx->width || height != s->avctx->height ||
812 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
813 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
820 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
821 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
824 // if we aren't saving this frame's probabilities for future frames,
825 // make a copy of the current probabilities
826 if (!(s->update_probabilities = vp8_rac_get(c)))
827 s->prob[1] = s->prob[0];
829 s->update_last = s->keyframe || vp8_rac_get(c);
831 vp78_update_probability_tables(s);
833 if ((s->mbskip_enabled = vp8_rac_get(c)))
834 s->prob->mbskip = vp8_rac_get_uint(c, 8);
837 s->prob->intra = vp8_rac_get_uint(c, 8);
838 s->prob->last = vp8_rac_get_uint(c, 8);
839 s->prob->golden = vp8_rac_get_uint(c, 8);
840 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
843 // Record the entropy coder state here so that hwaccels can use it.
844 s->c.code_word = vp56_rac_renorm(&s->c);
845 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
846 s->coder_state_at_header_end.range = s->c.high;
847 s->coder_state_at_header_end.value = s->c.code_word >> 16;
848 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
853 static av_always_inline
854 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
856 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
857 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
858 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
859 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
863 * Motion vector coding, 17.1.
865 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
869 if (vp56_rac_get_prob_branchy(c, p[0])) {
872 for (i = 0; i < 3; i++)
873 x += vp56_rac_get_prob(c, p[9 + i]) << i;
874 for (i = (vp7 ? 7 : 9); i > 3; i--)
875 x += vp56_rac_get_prob(c, p[9 + i]) << i;
876 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
880 const uint8_t *ps = p + 2;
881 bit = vp56_rac_get_prob(c, *ps);
884 bit = vp56_rac_get_prob(c, *ps);
887 x += vp56_rac_get_prob(c, *ps);
890 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
893 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
895 return read_mv_component(c, p, 1);
898 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
900 return read_mv_component(c, p, 0);
903 static av_always_inline
904 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
907 return vp7_submv_prob;
910 return vp8_submv_prob[4 - !!left];
912 return vp8_submv_prob[2];
913 return vp8_submv_prob[1 - !!left];
917 * Split motion vector prediction, 16.4.
918 * @returns the number of motion vectors parsed (2, 4 or 16)
920 static av_always_inline
921 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
922 int layout, int is_vp7)
926 VP8Macroblock *top_mb;
927 VP8Macroblock *left_mb = &mb[-1];
928 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
929 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
931 VP56mv *left_mv = left_mb->bmv;
932 VP56mv *cur_mv = mb->bmv;
934 if (!layout) // layout is inlined, s->mb_layout is not
937 top_mb = &mb[-s->mb_width - 1];
938 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
939 top_mv = top_mb->bmv;
941 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
942 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
943 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
945 part_idx = VP8_SPLITMVMODE_8x8;
947 part_idx = VP8_SPLITMVMODE_4x4;
950 num = vp8_mbsplit_count[part_idx];
951 mbsplits_cur = vp8_mbsplits[part_idx],
952 firstidx = vp8_mbfirstidx[part_idx];
953 mb->partitioning = part_idx;
955 for (n = 0; n < num; n++) {
957 uint32_t left, above;
958 const uint8_t *submv_prob;
961 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
963 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
965 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
967 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
969 submv_prob = get_submv_prob(left, above, is_vp7);
971 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
972 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
973 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
974 mb->bmv[n].y = mb->mv.y +
975 read_mv_component(c, s->prob->mvc[0], is_vp7);
976 mb->bmv[n].x = mb->mv.x +
977 read_mv_component(c, s->prob->mvc[1], is_vp7);
979 AV_ZERO32(&mb->bmv[n]);
982 AV_WN32A(&mb->bmv[n], above);
985 AV_WN32A(&mb->bmv[n], left);
993 * The vp7 reference decoder uses a padding macroblock column (added to right
994 * edge of the frame) to guard against illegal macroblock offsets. The
995 * algorithm has bugs that permit offsets to straddle the padding column.
996 * This function replicates those bugs.
998 * @param[out] edge_x macroblock x address
999 * @param[out] edge_y macroblock y address
1001 * @return macroblock offset legal (boolean)
1003 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1004 int xoffset, int yoffset, int boundary,
1005 int *edge_x, int *edge_y)
1007 int vwidth = mb_width + 1;
1008 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1009 if (new < boundary || new % vwidth == vwidth - 1)
1011 *edge_y = new / vwidth;
1012 *edge_x = new % vwidth;
1016 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1018 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1021 static av_always_inline
1022 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1023 int mb_x, int mb_y, int layout)
1025 VP8Macroblock *mb_edge[12];
1026 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1027 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1030 uint8_t cnt[3] = { 0 };
1031 VP56RangeCoder *c = &s->c;
1034 AV_ZERO32(&near_mv[0]);
1035 AV_ZERO32(&near_mv[1]);
1036 AV_ZERO32(&near_mv[2]);
1038 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1039 const VP7MVPred * pred = &vp7_mv_pred[i];
1042 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1043 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1044 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1045 ? s->macroblocks_base + 1 + edge_x +
1046 (s->mb_width + 1) * (edge_y + 1)
1047 : s->macroblocks + edge_x +
1048 (s->mb_height - edge_y - 1) * 2;
1049 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1051 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1052 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1054 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1055 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1059 AV_WN32A(&near_mv[CNT_NEAR], mv);
1063 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1072 cnt[idx] += vp7_mv_pred[i].score;
1075 mb->partitioning = VP8_SPLITMVMODE_NONE;
1077 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1078 mb->mode = VP8_MVMODE_MV;
1080 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1082 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1084 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1085 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1087 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1089 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1090 mb->mode = VP8_MVMODE_SPLIT;
1091 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1093 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1094 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1095 mb->bmv[0] = mb->mv;
1098 mb->mv = near_mv[CNT_NEAR];
1099 mb->bmv[0] = mb->mv;
1102 mb->mv = near_mv[CNT_NEAREST];
1103 mb->bmv[0] = mb->mv;
1106 mb->mode = VP8_MVMODE_ZERO;
1108 mb->bmv[0] = mb->mv;
1112 static av_always_inline
1113 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1114 int mb_x, int mb_y, int layout)
1116 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1119 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1120 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1122 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1123 int8_t *sign_bias = s->sign_bias;
1125 uint8_t cnt[4] = { 0 };
1126 VP56RangeCoder *c = &s->c;
1128 if (!layout) { // layout is inlined (s->mb_layout is not)
1129 mb_edge[0] = mb + 2;
1130 mb_edge[2] = mb + 1;
1132 mb_edge[0] = mb - s->mb_width - 1;
1133 mb_edge[2] = mb - s->mb_width - 2;
1136 AV_ZERO32(&near_mv[0]);
1137 AV_ZERO32(&near_mv[1]);
1138 AV_ZERO32(&near_mv[2]);
1140 /* Process MB on top, left and top-left */
1141 #define MV_EDGE_CHECK(n) \
1143 VP8Macroblock *edge = mb_edge[n]; \
1144 int edge_ref = edge->ref_frame; \
1145 if (edge_ref != VP56_FRAME_CURRENT) { \
1146 uint32_t mv = AV_RN32A(&edge->mv); \
1148 if (cur_sign_bias != sign_bias[edge_ref]) { \
1149 /* SWAR negate of the values in mv. */ \
1151 mv = ((mv & 0x7fff7fff) + \
1152 0x00010001) ^ (mv & 0x80008000); \
1154 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1155 AV_WN32A(&near_mv[++idx], mv); \
1156 cnt[idx] += 1 + (n != 2); \
1158 cnt[CNT_ZERO] += 1 + (n != 2); \
1166 mb->partitioning = VP8_SPLITMVMODE_NONE;
1167 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1168 mb->mode = VP8_MVMODE_MV;
1170 /* If we have three distinct MVs, merge first and last if they're the same */
1171 if (cnt[CNT_SPLITMV] &&
1172 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1173 cnt[CNT_NEAREST] += 1;
1175 /* Swap near and nearest if necessary */
1176 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1177 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1178 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1181 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1182 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1183 /* Choose the best mv out of 0,0 and the nearest mv */
1184 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1185 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1186 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1187 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1189 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1190 mb->mode = VP8_MVMODE_SPLIT;
1191 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1193 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1194 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1195 mb->bmv[0] = mb->mv;
1198 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1199 mb->bmv[0] = mb->mv;
1202 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1203 mb->bmv[0] = mb->mv;
1206 mb->mode = VP8_MVMODE_ZERO;
1208 mb->bmv[0] = mb->mv;
1212 static av_always_inline
1213 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1214 int mb_x, int keyframe, int layout)
1216 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1219 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1220 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1225 uint8_t *const left = s->intra4x4_pred_mode_left;
1227 top = mb->intra4x4_pred_mode_top;
1229 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1230 for (y = 0; y < 4; y++) {
1231 for (x = 0; x < 4; x++) {
1233 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1234 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1235 left[y] = top[x] = *intra4x4;
1241 for (i = 0; i < 16; i++)
1242 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1243 vp8_pred4x4_prob_inter);
1247 static av_always_inline
1248 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1249 VP8Macroblock *mb, int mb_x, int mb_y,
1250 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1252 VP56RangeCoder *c = &s->c;
1253 static const char * const vp7_feature_name[] = { "q-index",
1255 "partial-golden-update",
1260 for (i = 0; i < 4; i++) {
1261 if (s->feature_enabled[i]) {
1262 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1263 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1264 s->feature_index_prob[i]);
1265 av_log(s->avctx, AV_LOG_WARNING,
1266 "Feature %s present in macroblock (value 0x%x)\n",
1267 vp7_feature_name[i], s->feature_value[i][index]);
1271 } else if (s->segmentation.update_map) {
1272 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1273 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1274 } else if (s->segmentation.enabled)
1275 *segment = ref ? *ref : *segment;
1276 mb->segment = *segment;
1278 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1281 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1282 vp8_pred16x16_prob_intra);
1284 if (mb->mode == MODE_I4x4) {
1285 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1287 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1288 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1290 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1292 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1293 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1296 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1297 vp8_pred8x8c_prob_intra);
1298 mb->ref_frame = VP56_FRAME_CURRENT;
1299 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1301 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1303 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1304 : VP56_FRAME_GOLDEN;
1306 mb->ref_frame = VP56_FRAME_PREVIOUS;
1307 s->ref_count[mb->ref_frame - 1]++;
1309 // motion vectors, 16.3
1311 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1313 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1316 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1318 if (mb->mode == MODE_I4x4)
1319 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1321 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1323 mb->ref_frame = VP56_FRAME_CURRENT;
1324 mb->partitioning = VP8_SPLITMVMODE_NONE;
1325 AV_ZERO32(&mb->bmv[0]);
1330 * @param r arithmetic bitstream reader context
1331 * @param block destination for block coefficients
1332 * @param probs probabilities to use when reading trees from the bitstream
1333 * @param i initial coeff index, 0 unless a separate DC block is coded
1334 * @param qmul array holding the dc/ac dequant factor at position 0/1
1336 * @return 0 if no coeffs were decoded
1337 * otherwise, the index of the last coeff decoded plus one
1339 static av_always_inline
1340 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1341 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1342 int i, uint8_t *token_prob, int16_t qmul[2],
1343 const uint8_t scan[16], int vp7)
1345 VP56RangeCoder c = *r;
1350 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1354 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1356 break; // invalid input; blocks should end with EOB
1357 token_prob = probs[i][0];
1363 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1365 token_prob = probs[i + 1][1];
1367 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1368 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1370 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1374 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1375 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1376 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1377 } else { // DCT_CAT2
1379 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1380 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1382 } else { // DCT_CAT3 and up
1383 int a = vp56_rac_get_prob(&c, token_prob[8]);
1384 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1385 int cat = (a << 1) + b;
1386 coeff = 3 + (8 << cat);
1387 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1390 token_prob = probs[i + 1][2];
1392 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1399 static av_always_inline
1400 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1402 int16_t dc = block[0];
1410 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1411 block[0] = pred[0] = dc;
1416 block[0] = pred[0] = dc;
1422 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1424 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1425 int i, uint8_t *token_prob,
1427 const uint8_t scan[16])
1429 return decode_block_coeffs_internal(r, block, probs, i,
1430 token_prob, qmul, scan, IS_VP7);
1433 #ifndef vp8_decode_block_coeffs_internal
1434 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1436 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1437 int i, uint8_t *token_prob,
1440 return decode_block_coeffs_internal(r, block, probs, i,
1441 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1446 * @param c arithmetic bitstream reader context
1447 * @param block destination for block coefficients
1448 * @param probs probabilities to use when reading trees from the bitstream
1449 * @param i initial coeff index, 0 unless a separate DC block is coded
1450 * @param zero_nhood the initial prediction context for number of surrounding
1451 * all-zero blocks (only left/top, so 0-2)
1452 * @param qmul array holding the dc/ac dequant factor at position 0/1
1453 * @param scan scan pattern (VP7 only)
1455 * @return 0 if no coeffs were decoded
1456 * otherwise, the index of the last coeff decoded plus one
1458 static av_always_inline
1459 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1460 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1461 int i, int zero_nhood, int16_t qmul[2],
1462 const uint8_t scan[16], int vp7)
1464 uint8_t *token_prob = probs[i][zero_nhood];
1465 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1467 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1468 token_prob, qmul, scan)
1469 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1473 static av_always_inline
1474 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1475 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1478 int i, x, y, luma_start = 0, luma_ctx = 3;
1479 int nnz_pred, nnz, nnz_total = 0;
1480 int segment = mb->segment;
1483 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1484 nnz_pred = t_nnz[8] + l_nnz[8];
1486 // decode DC values and do hadamard
1487 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1488 nnz_pred, s->qmat[segment].luma_dc_qmul,
1489 ff_zigzag_scan, is_vp7);
1490 l_nnz[8] = t_nnz[8] = !!nnz;
1492 if (is_vp7 && mb->mode > MODE_I4x4) {
1493 nnz |= inter_predict_dc(td->block_dc,
1494 s->inter_dc_pred[mb->ref_frame - 1]);
1501 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1503 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1510 for (y = 0; y < 4; y++)
1511 for (x = 0; x < 4; x++) {
1512 nnz_pred = l_nnz[y] + t_nnz[x];
1513 nnz = decode_block_coeffs(c, td->block[y][x],
1514 s->prob->token[luma_ctx],
1515 luma_start, nnz_pred,
1516 s->qmat[segment].luma_qmul,
1517 s->prob[0].scan, is_vp7);
1518 /* nnz+block_dc may be one more than the actual last index,
1519 * but we don't care */
1520 td->non_zero_count_cache[y][x] = nnz + block_dc;
1521 t_nnz[x] = l_nnz[y] = !!nnz;
1526 // TODO: what to do about dimensions? 2nd dim for luma is x,
1527 // but for chroma it's (y<<1)|x
1528 for (i = 4; i < 6; i++)
1529 for (y = 0; y < 2; y++)
1530 for (x = 0; x < 2; x++) {
1531 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1532 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1533 s->prob->token[2], 0, nnz_pred,
1534 s->qmat[segment].chroma_qmul,
1535 s->prob[0].scan, is_vp7);
1536 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1537 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1541 // if there were no coded coeffs despite the macroblock not being marked skip,
1542 // we MUST not do the inner loop filter and should not do IDCT
1543 // Since skip isn't used for bitstream prediction, just manually set it.
1548 static av_always_inline
1549 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1550 uint8_t *src_cb, uint8_t *src_cr,
1551 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1553 AV_COPY128(top_border, src_y + 15 * linesize);
1555 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1556 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1560 static av_always_inline
1561 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1562 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1563 int mb_y, int mb_width, int simple, int xchg)
1565 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1567 src_cb -= uvlinesize;
1568 src_cr -= uvlinesize;
1570 #define XCHG(a, b, xchg) \
1578 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1579 XCHG(top_border, src_y, xchg);
1580 XCHG(top_border + 8, src_y + 8, 1);
1581 if (mb_x < mb_width - 1)
1582 XCHG(top_border + 32, src_y + 16, 1);
1584 // only copy chroma for normal loop filter
1585 // or to initialize the top row to 127
1586 if (!simple || !mb_y) {
1587 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1588 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1589 XCHG(top_border + 16, src_cb, 1);
1590 XCHG(top_border + 24, src_cr, 1);
1594 static av_always_inline
1595 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1598 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1600 return mb_y ? mode : LEFT_DC_PRED8x8;
1603 static av_always_inline
1604 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1607 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1609 return mb_y ? mode : HOR_PRED8x8;
1612 static av_always_inline
1613 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1617 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1619 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1621 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1622 case PLANE_PRED8x8: /* TM */
1623 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1628 static av_always_inline
1629 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1632 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1634 return mb_y ? mode : HOR_VP8_PRED;
1638 static av_always_inline
1639 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1640 int *copy_buf, int vp7)
1644 if (!mb_x && mb_y) {
1649 case DIAG_DOWN_LEFT_PRED:
1650 case VERT_LEFT_PRED:
1651 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1659 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1661 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1662 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1663 * as 16x16/8x8 DC */
1664 case DIAG_DOWN_RIGHT_PRED:
1665 case VERT_RIGHT_PRED:
1674 static av_always_inline
1675 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1676 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1678 int x, y, mode, nnz;
1681 /* for the first row, we need to run xchg_mb_border to init the top edge
1682 * to 127 otherwise, skip it if we aren't going to deblock */
1683 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1684 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1685 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1686 s->filter.simple, 1);
1688 if (mb->mode < MODE_I4x4) {
1689 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1690 s->hpc.pred16x16[mode](dst[0], s->linesize);
1692 uint8_t *ptr = dst[0];
1693 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1694 const uint8_t lo = is_vp7 ? 128 : 127;
1695 const uint8_t hi = is_vp7 ? 128 : 129;
1696 uint8_t tr_top[4] = { lo, lo, lo, lo };
1698 // all blocks on the right edge of the macroblock use bottom edge
1699 // the top macroblock for their topright edge
1700 uint8_t *tr_right = ptr - s->linesize + 16;
1702 // if we're on the right edge of the frame, said edge is extended
1703 // from the top macroblock
1704 if (mb_y && mb_x == s->mb_width - 1) {
1705 tr = tr_right[-1] * 0x01010101u;
1706 tr_right = (uint8_t *) &tr;
1710 AV_ZERO128(td->non_zero_count_cache);
1712 for (y = 0; y < 4; y++) {
1713 uint8_t *topright = ptr + 4 - s->linesize;
1714 for (x = 0; x < 4; x++) {
1716 ptrdiff_t linesize = s->linesize;
1717 uint8_t *dst = ptr + 4 * x;
1718 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1720 if ((y == 0 || x == 3) && mb_y == 0) {
1723 topright = tr_right;
1725 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1726 mb_y + y, ©, is_vp7);
1728 dst = copy_dst + 12;
1732 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1734 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1738 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1747 copy_dst[11] = ptr[4 * x - 1];
1748 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1749 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1750 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1753 s->hpc.pred4x4[mode](dst, topright, linesize);
1755 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1756 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1757 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1758 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1761 nnz = td->non_zero_count_cache[y][x];
1764 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1765 td->block[y][x], s->linesize);
1767 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1768 td->block[y][x], s->linesize);
1773 ptr += 4 * s->linesize;
1778 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1779 mb_x, mb_y, is_vp7);
1780 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1781 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1783 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1784 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1785 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1786 s->filter.simple, 0);
1789 static const uint8_t subpel_idx[3][8] = {
1790 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1791 // also function pointer index
1792 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1793 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1799 * @param s VP8 decoding context
1800 * @param dst target buffer for block data at block position
1801 * @param ref reference picture buffer at origin (0, 0)
1802 * @param mv motion vector (relative to block position) to get pixel data from
1803 * @param x_off horizontal position of block from origin (0, 0)
1804 * @param y_off vertical position of block from origin (0, 0)
1805 * @param block_w width of block (16, 8 or 4)
1806 * @param block_h height of block (always same as block_w)
1807 * @param width width of src/dst plane data
1808 * @param height height of src/dst plane data
1809 * @param linesize size of a single line of plane data, including padding
1810 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1812 static av_always_inline
1813 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1814 ThreadFrame *ref, const VP56mv *mv,
1815 int x_off, int y_off, int block_w, int block_h,
1816 int width, int height, ptrdiff_t linesize,
1817 vp8_mc_func mc_func[3][3])
1819 uint8_t *src = ref->f->data[0];
1822 ptrdiff_t src_linesize = linesize;
1824 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1825 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1827 x_off += mv->x >> 2;
1828 y_off += mv->y >> 2;
1831 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1832 src += y_off * linesize + x_off;
1833 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1834 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1835 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1836 src - my_idx * linesize - mx_idx,
1837 EDGE_EMU_LINESIZE, linesize,
1838 block_w + subpel_idx[1][mx],
1839 block_h + subpel_idx[1][my],
1840 x_off - mx_idx, y_off - my_idx,
1842 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1843 src_linesize = EDGE_EMU_LINESIZE;
1845 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1847 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1848 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1849 linesize, block_h, 0, 0);
1854 * chroma MC function
1856 * @param s VP8 decoding context
1857 * @param dst1 target buffer for block data at block position (U plane)
1858 * @param dst2 target buffer for block data at block position (V plane)
1859 * @param ref reference picture buffer at origin (0, 0)
1860 * @param mv motion vector (relative to block position) to get pixel data from
1861 * @param x_off horizontal position of block from origin (0, 0)
1862 * @param y_off vertical position of block from origin (0, 0)
1863 * @param block_w width of block (16, 8 or 4)
1864 * @param block_h height of block (always same as block_w)
1865 * @param width width of src/dst plane data
1866 * @param height height of src/dst plane data
1867 * @param linesize size of a single line of plane data, including padding
1868 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1870 static av_always_inline
1871 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1872 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1873 int x_off, int y_off, int block_w, int block_h,
1874 int width, int height, ptrdiff_t linesize,
1875 vp8_mc_func mc_func[3][3])
1877 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1880 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1881 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1883 x_off += mv->x >> 3;
1884 y_off += mv->y >> 3;
1887 src1 += y_off * linesize + x_off;
1888 src2 += y_off * linesize + x_off;
1889 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1890 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1891 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1892 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1893 src1 - my_idx * linesize - mx_idx,
1894 EDGE_EMU_LINESIZE, linesize,
1895 block_w + subpel_idx[1][mx],
1896 block_h + subpel_idx[1][my],
1897 x_off - mx_idx, y_off - my_idx, width, height);
1898 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1899 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1901 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1902 src2 - my_idx * linesize - mx_idx,
1903 EDGE_EMU_LINESIZE, linesize,
1904 block_w + subpel_idx[1][mx],
1905 block_h + subpel_idx[1][my],
1906 x_off - mx_idx, y_off - my_idx, width, height);
1907 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1908 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1910 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1911 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1914 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1915 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1916 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1920 static av_always_inline
1921 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1922 ThreadFrame *ref_frame, int x_off, int y_off,
1923 int bx_off, int by_off, int block_w, int block_h,
1924 int width, int height, VP56mv *mv)
1929 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1930 ref_frame, mv, x_off + bx_off, y_off + by_off,
1931 block_w, block_h, width, height, s->linesize,
1932 s->put_pixels_tab[block_w == 8]);
1935 if (s->profile == 3) {
1936 /* this block only applies VP8; it is safe to check
1937 * only the profile, as VP7 profile <= 1 */
1949 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1950 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1951 &uvmv, x_off + bx_off, y_off + by_off,
1952 block_w, block_h, width, height, s->uvlinesize,
1953 s->put_pixels_tab[1 + (block_w == 4)]);
1956 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1957 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1958 static av_always_inline
1959 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1962 /* Don't prefetch refs that haven't been used very often this frame. */
1963 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1964 int x_off = mb_x << 4, y_off = mb_y << 4;
1965 int mx = (mb->mv.x >> 2) + x_off + 8;
1966 int my = (mb->mv.y >> 2) + y_off;
1967 uint8_t **src = s->framep[ref]->tf.f->data;
1968 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1969 /* For threading, a ff_thread_await_progress here might be useful, but
1970 * it actually slows down the decoder. Since a bad prefetch doesn't
1971 * generate bad decoder output, we don't run it here. */
1972 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1973 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1974 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1979 * Apply motion vectors to prediction buffer, chapter 18.
1981 static av_always_inline
1982 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1983 VP8Macroblock *mb, int mb_x, int mb_y)
1985 int x_off = mb_x << 4, y_off = mb_y << 4;
1986 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1987 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1988 VP56mv *bmv = mb->bmv;
1990 switch (mb->partitioning) {
1991 case VP8_SPLITMVMODE_NONE:
1992 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1993 0, 0, 16, 16, width, height, &mb->mv);
1995 case VP8_SPLITMVMODE_4x4: {
2000 for (y = 0; y < 4; y++) {
2001 for (x = 0; x < 4; x++) {
2002 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
2003 ref, &bmv[4 * y + x],
2004 4 * x + x_off, 4 * y + y_off, 4, 4,
2005 width, height, s->linesize,
2006 s->put_pixels_tab[2]);
2015 for (y = 0; y < 2; y++) {
2016 for (x = 0; x < 2; x++) {
2017 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2018 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2019 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2020 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2021 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2022 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2023 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2024 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2025 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2026 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2027 if (s->profile == 3) {
2031 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2032 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2033 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2034 width, height, s->uvlinesize,
2035 s->put_pixels_tab[2]);
2040 case VP8_SPLITMVMODE_16x8:
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 0, 0, 16, 8, width, height, &bmv[0]);
2043 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044 0, 8, 16, 8, width, height, &bmv[1]);
2046 case VP8_SPLITMVMODE_8x16:
2047 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2048 0, 0, 8, 16, width, height, &bmv[0]);
2049 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050 8, 0, 8, 16, width, height, &bmv[1]);
2052 case VP8_SPLITMVMODE_8x8:
2053 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2054 0, 0, 8, 8, width, height, &bmv[0]);
2055 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2056 8, 0, 8, 8, width, height, &bmv[1]);
2057 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2058 0, 8, 8, 8, width, height, &bmv[2]);
2059 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2060 8, 8, 8, 8, width, height, &bmv[3]);
2065 static av_always_inline
2066 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2070 if (mb->mode != MODE_I4x4) {
2071 uint8_t *y_dst = dst[0];
2072 for (y = 0; y < 4; y++) {
2073 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2075 if (nnz4 & ~0x01010101) {
2076 for (x = 0; x < 4; x++) {
2077 if ((uint8_t) nnz4 == 1)
2078 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2081 else if ((uint8_t) nnz4 > 1)
2082 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2090 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2093 y_dst += 4 * s->linesize;
2097 for (ch = 0; ch < 2; ch++) {
2098 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2100 uint8_t *ch_dst = dst[1 + ch];
2101 if (nnz4 & ~0x01010101) {
2102 for (y = 0; y < 2; y++) {
2103 for (x = 0; x < 2; x++) {
2104 if ((uint8_t) nnz4 == 1)
2105 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2106 td->block[4 + ch][(y << 1) + x],
2108 else if ((uint8_t) nnz4 > 1)
2109 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2110 td->block[4 + ch][(y << 1) + x],
2114 goto chroma_idct_end;
2116 ch_dst += 4 * s->uvlinesize;
2119 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2127 static av_always_inline
2128 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2129 VP8FilterStrength *f, int is_vp7)
2131 int interior_limit, filter_level;
2133 if (s->segmentation.enabled) {
2134 filter_level = s->segmentation.filter_level[mb->segment];
2135 if (!s->segmentation.absolute_vals)
2136 filter_level += s->filter.level;
2138 filter_level = s->filter.level;
2140 if (s->lf_delta.enabled) {
2141 filter_level += s->lf_delta.ref[mb->ref_frame];
2142 filter_level += s->lf_delta.mode[mb->mode];
2145 filter_level = av_clip_uintp2(filter_level, 6);
2147 interior_limit = filter_level;
2148 if (s->filter.sharpness) {
2149 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2150 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2152 interior_limit = FFMAX(interior_limit, 1);
2154 f->filter_level = filter_level;
2155 f->inner_limit = interior_limit;
2156 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2157 mb->mode == VP8_MVMODE_SPLIT;
2160 static av_always_inline
2161 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2162 int mb_x, int mb_y, int is_vp7)
2164 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2165 int filter_level = f->filter_level;
2166 int inner_limit = f->inner_limit;
2167 int inner_filter = f->inner_filter;
2168 ptrdiff_t linesize = s->linesize;
2169 ptrdiff_t uvlinesize = s->uvlinesize;
2170 static const uint8_t hev_thresh_lut[2][64] = {
2171 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2175 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2185 bedge_lim_y = filter_level;
2186 bedge_lim_uv = filter_level * 2;
2187 mbedge_lim = filter_level + 2;
2190 bedge_lim_uv = filter_level * 2 + inner_limit;
2191 mbedge_lim = bedge_lim_y + 4;
2194 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2197 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2198 mbedge_lim, inner_limit, hev_thresh);
2199 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2200 mbedge_lim, inner_limit, hev_thresh);
2203 #define H_LOOP_FILTER_16Y_INNER(cond) \
2204 if (cond && inner_filter) { \
2205 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2206 bedge_lim_y, inner_limit, \
2208 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2209 bedge_lim_y, inner_limit, \
2211 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2212 bedge_lim_y, inner_limit, \
2214 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2215 uvlinesize, bedge_lim_uv, \
2216 inner_limit, hev_thresh); \
2219 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2222 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2223 mbedge_lim, inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2225 mbedge_lim, inner_limit, hev_thresh);
2229 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2230 linesize, bedge_lim_y,
2231 inner_limit, hev_thresh);
2232 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2233 linesize, bedge_lim_y,
2234 inner_limit, hev_thresh);
2235 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2236 linesize, bedge_lim_y,
2237 inner_limit, hev_thresh);
2238 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2239 dst[2] + 4 * uvlinesize,
2240 uvlinesize, bedge_lim_uv,
2241 inner_limit, hev_thresh);
2244 H_LOOP_FILTER_16Y_INNER(is_vp7)
2247 static av_always_inline
2248 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2251 int mbedge_lim, bedge_lim;
2252 int filter_level = f->filter_level;
2253 int inner_limit = f->inner_limit;
2254 int inner_filter = f->inner_filter;
2255 ptrdiff_t linesize = s->linesize;
2260 bedge_lim = 2 * filter_level + inner_limit;
2261 mbedge_lim = bedge_lim + 4;
2264 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2266 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2267 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2268 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2272 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2274 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2275 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2276 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2280 #define MARGIN (16 << 2)
2281 static av_always_inline
2282 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2283 VP8Frame *prev_frame, int is_vp7)
2285 VP8Context *s = avctx->priv_data;
2288 s->mv_bounds.mv_min.y = -MARGIN;
2289 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2290 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2291 VP8Macroblock *mb = s->macroblocks_base +
2292 ((s->mb_width + 1) * (mb_y + 1) + 1);
2293 int mb_xy = mb_y * s->mb_width;
2295 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2297 s->mv_bounds.mv_min.x = -MARGIN;
2298 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2300 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2301 if (vpX_rac_is_end(&s->c)) {
2302 return AVERROR_INVALIDDATA;
2305 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2306 DC_PRED * 0x01010101);
2307 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2308 prev_frame && prev_frame->seg_map ?
2309 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2310 s->mv_bounds.mv_min.x -= 64;
2311 s->mv_bounds.mv_max.x -= 64;
2313 s->mv_bounds.mv_min.y -= 64;
2314 s->mv_bounds.mv_max.y -= 64;
2319 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2320 VP8Frame *prev_frame)
2322 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2325 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2326 VP8Frame *prev_frame)
2328 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2332 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2334 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2335 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2336 pthread_mutex_lock(&otd->lock); \
2337 atomic_store(&td->wait_mb_pos, tmp); \
2339 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2341 pthread_cond_wait(&otd->cond, &otd->lock); \
2343 atomic_store(&td->wait_mb_pos, INT_MAX); \
2344 pthread_mutex_unlock(&otd->lock); \
2348 #define update_pos(td, mb_y, mb_x) \
2350 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2351 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2353 int is_null = !next_td || !prev_td; \
2354 int pos_check = (is_null) ? 1 : \
2355 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2356 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2357 atomic_store(&td->thread_mb_pos, pos); \
2358 if (sliced_threading && pos_check) { \
2359 pthread_mutex_lock(&td->lock); \
2360 pthread_cond_broadcast(&td->cond); \
2361 pthread_mutex_unlock(&td->lock); \
2365 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2366 #define update_pos(td, mb_y, mb_x) while(0)
2369 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2370 int jobnr, int threadnr, int is_vp7)
2372 VP8Context *s = avctx->priv_data;
2373 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2374 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2375 int mb_x, mb_xy = mb_y * s->mb_width;
2376 int num_jobs = s->num_jobs;
2377 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2378 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2381 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2382 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2383 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2386 if (vpX_rac_is_end(c))
2387 return AVERROR_INVALIDDATA;
2392 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2393 if (mb_y == s->mb_height - 1)
2396 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2397 if (s->mb_layout == 1)
2398 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2400 // Make sure the previous frame has read its segmentation map,
2401 // if we re-use the same map.
2402 if (prev_frame && s->segmentation.enabled &&
2403 !s->segmentation.update_map)
2404 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2405 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2406 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2407 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2410 if (!is_vp7 || mb_y == 0)
2411 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2413 td->mv_bounds.mv_min.x = -MARGIN;
2414 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2416 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2417 if (vpX_rac_is_end(c))
2418 return AVERROR_INVALIDDATA;
2419 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2420 if (prev_td != td) {
2421 if (threadnr != 0) {
2422 check_thread_pos(td, prev_td,
2423 mb_x + (is_vp7 ? 2 : 1),
2424 mb_y - (is_vp7 ? 2 : 1));
2426 check_thread_pos(td, prev_td,
2427 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2428 mb_y - (is_vp7 ? 2 : 1));
2432 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2434 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2435 dst[2] - dst[1], 2);
2438 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2439 prev_frame && prev_frame->seg_map ?
2440 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2442 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2445 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2447 if (mb->mode <= MODE_I4x4)
2448 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2450 inter_predict(s, td, dst, mb, mb_x, mb_y);
2452 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2455 idct_mb(s, td, dst, mb);
2457 AV_ZERO64(td->left_nnz);
2458 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2460 /* Reset DC block predictors if they would exist
2461 * if the mb had coefficients */
2462 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2463 td->left_nnz[8] = 0;
2464 s->top_nnz[mb_x][8] = 0;
2468 if (s->deblock_filter)
2469 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2471 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2472 if (s->filter.simple)
2473 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2474 NULL, NULL, s->linesize, 0, 1);
2476 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2477 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2480 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2485 td->mv_bounds.mv_min.x -= 64;
2486 td->mv_bounds.mv_max.x -= 64;
2488 if (mb_x == s->mb_width + 1) {
2489 update_pos(td, mb_y, s->mb_width + 3);
2491 update_pos(td, mb_y, mb_x);
2497 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2498 int jobnr, int threadnr)
2500 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2503 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2504 int jobnr, int threadnr)
2506 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2509 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2510 int jobnr, int threadnr, int is_vp7)
2512 VP8Context *s = avctx->priv_data;
2513 VP8ThreadData *td = &s->thread_data[threadnr];
2514 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2515 AVFrame *curframe = s->curframe->tf.f;
2517 VP8ThreadData *prev_td, *next_td;
2519 curframe->data[0] + 16 * mb_y * s->linesize,
2520 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2521 curframe->data[2] + 8 * mb_y * s->uvlinesize
2524 if (s->mb_layout == 1)
2525 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2527 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2532 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2533 if (mb_y == s->mb_height - 1)
2536 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2538 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2539 VP8FilterStrength *f = &td->filter_strength[mb_x];
2541 check_thread_pos(td, prev_td,
2542 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2544 if (next_td != &s->thread_data[0])
2545 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2547 if (num_jobs == 1) {
2548 if (s->filter.simple)
2549 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2550 NULL, NULL, s->linesize, 0, 1);
2552 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2553 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2556 if (s->filter.simple)
2557 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2559 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2564 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2568 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2569 int jobnr, int threadnr)
2571 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2574 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2575 int jobnr, int threadnr)
2577 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2580 static av_always_inline
2581 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2582 int threadnr, int is_vp7)
2584 VP8Context *s = avctx->priv_data;
2585 VP8ThreadData *td = &s->thread_data[jobnr];
2586 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2587 VP8Frame *curframe = s->curframe;
2588 int mb_y, num_jobs = s->num_jobs;
2591 td->thread_nr = threadnr;
2592 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2593 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2594 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2595 atomic_store(&td->thread_mb_pos, mb_y << 16);
2596 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2598 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2601 if (s->deblock_filter)
2602 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2603 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2605 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2606 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2608 if (avctx->active_thread_type == FF_THREAD_FRAME)
2609 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2615 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2616 int jobnr, int threadnr)
2618 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2621 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2622 int jobnr, int threadnr)
2624 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2627 static av_always_inline
2628 int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
2629 const AVPacket *avpkt, int is_vp7)
2631 VP8Context *s = avctx->priv_data;
2632 int ret, i, referenced, num_jobs;
2633 enum AVDiscard skip_thresh;
2634 VP8Frame *av_uninit(curframe), *prev_frame;
2637 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2639 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2644 if (s->actually_webp) {
2645 // avctx->pix_fmt already set in caller.
2646 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2647 s->pix_fmt = get_pixel_format(s);
2648 if (s->pix_fmt < 0) {
2649 ret = AVERROR(EINVAL);
2652 avctx->pix_fmt = s->pix_fmt;
2655 prev_frame = s->framep[VP56_FRAME_CURRENT];
2657 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2658 s->update_altref == VP56_FRAME_CURRENT;
2660 skip_thresh = !referenced ? AVDISCARD_NONREF
2661 : !s->keyframe ? AVDISCARD_NONKEY
2664 if (avctx->skip_frame >= skip_thresh) {
2666 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2669 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2671 // release no longer referenced frames
2672 for (i = 0; i < 5; i++)
2673 if (s->frames[i].tf.f->buf[0] &&
2674 &s->frames[i] != prev_frame &&
2675 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2676 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2677 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2678 vp8_release_frame(s, &s->frames[i]);
2680 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2683 avctx->colorspace = AVCOL_SPC_BT470BG;
2685 avctx->color_range = AVCOL_RANGE_JPEG;
2687 avctx->color_range = AVCOL_RANGE_MPEG;
2689 /* Given that arithmetic probabilities are updated every frame, it's quite
2690 * likely that the values we have on a random interframe are complete
2691 * junk if we didn't start decode on a keyframe. So just don't display
2692 * anything rather than junk. */
2693 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2694 !s->framep[VP56_FRAME_GOLDEN] ||
2695 !s->framep[VP56_FRAME_GOLDEN2])) {
2696 av_log(avctx, AV_LOG_WARNING,
2697 "Discarding interframe without a prior keyframe!\n");
2698 ret = AVERROR_INVALIDDATA;
2702 curframe->tf.f->key_frame = s->keyframe;
2703 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2704 : AV_PICTURE_TYPE_P;
2705 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2708 // check if golden and altref are swapped
2709 if (s->update_altref != VP56_FRAME_NONE)
2710 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2712 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2714 if (s->update_golden != VP56_FRAME_NONE)
2715 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2717 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2720 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2722 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2724 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2726 if (ffcodec(avctx->codec)->update_thread_context)
2727 ff_thread_finish_setup(avctx);
2729 if (avctx->hwaccel) {
2730 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2734 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2738 ret = avctx->hwaccel->end_frame(avctx);
2743 s->linesize = curframe->tf.f->linesize[0];
2744 s->uvlinesize = curframe->tf.f->linesize[1];
2746 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2747 /* Zero macroblock structures for top/top-left prediction
2748 * from outside the frame. */
2750 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2751 (s->mb_width + 1) * sizeof(*s->macroblocks));
2752 if (!s->mb_layout && s->keyframe)
2753 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2755 memset(s->ref_count, 0, sizeof(s->ref_count));
2757 if (s->mb_layout == 1) {
2758 // Make sure the previous frame has read its segmentation map,
2759 // if we re-use the same map.
2760 if (prev_frame && s->segmentation.enabled &&
2761 !s->segmentation.update_map)
2762 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2764 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2766 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2771 if (avctx->active_thread_type == FF_THREAD_FRAME)
2774 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2775 s->num_jobs = num_jobs;
2776 s->curframe = curframe;
2777 s->prev_frame = prev_frame;
2778 s->mv_bounds.mv_min.y = -MARGIN;
2779 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2780 for (i = 0; i < MAX_THREADS; i++) {
2781 VP8ThreadData *td = &s->thread_data[i];
2782 atomic_init(&td->thread_mb_pos, 0);
2783 atomic_init(&td->wait_mb_pos, INT_MAX);
2786 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2789 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2793 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2794 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2797 // if future frames don't use the updated probabilities,
2798 // reset them to the values we saved
2799 if (!s->update_probabilities)
2800 s->prob[0] = s->prob[1];
2802 if (!s->invisible) {
2803 if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0)
2810 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2814 int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2815 int *got_frame, AVPacket *avpkt)
2817 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8);
2820 #if CONFIG_VP7_DECODER
2821 static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2822 int *got_frame, AVPacket *avpkt)
2824 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7);
2826 #endif /* CONFIG_VP7_DECODER */
2828 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2830 VP8Context *s = avctx->priv_data;
2833 vp8_decode_flush_impl(avctx, 1);
2834 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2835 av_frame_free(&s->frames[i].tf.f);
2840 static av_cold int vp8_init_frames(VP8Context *s)
2843 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2844 s->frames[i].tf.f = av_frame_alloc();
2845 if (!s->frames[i].tf.f)
2846 return AVERROR(ENOMEM);
2851 static av_always_inline
2852 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2854 VP8Context *s = avctx->priv_data;
2858 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2859 s->pix_fmt = AV_PIX_FMT_NONE;
2860 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2862 ff_videodsp_init(&s->vdsp, 8);
2864 ff_vp78dsp_init(&s->vp8dsp);
2865 if (CONFIG_VP7_DECODER && is_vp7) {
2866 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2867 ff_vp7dsp_init(&s->vp8dsp);
2868 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2869 s->filter_mb_row = vp7_filter_mb_row;
2870 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2871 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2872 ff_vp8dsp_init(&s->vp8dsp);
2873 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2874 s->filter_mb_row = vp8_filter_mb_row;
2877 /* does not change for VP8 */
2878 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2880 if ((ret = vp8_init_frames(s)) < 0) {
2881 ff_vp8_decode_free(avctx);
2888 #if CONFIG_VP7_DECODER
2889 static int vp7_decode_init(AVCodecContext *avctx)
2891 return vp78_decode_init(avctx, IS_VP7);
2893 #endif /* CONFIG_VP7_DECODER */
2895 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2897 return vp78_decode_init(avctx, IS_VP8);
2900 #if CONFIG_VP8_DECODER
2902 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2904 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2905 const AVCodecContext *src)
2907 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2910 if (s->macroblocks_base &&
2911 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2913 s->mb_width = s_src->mb_width;
2914 s->mb_height = s_src->mb_height;
2917 s->pix_fmt = s_src->pix_fmt;
2918 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2919 s->segmentation = s_src->segmentation;
2920 s->lf_delta = s_src->lf_delta;
2921 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2923 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2924 if (s_src->frames[i].tf.f->buf[0]) {
2925 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2931 s->framep[0] = REBASE(s_src->next_framep[0]);
2932 s->framep[1] = REBASE(s_src->next_framep[1]);
2933 s->framep[2] = REBASE(s_src->next_framep[2]);
2934 s->framep[3] = REBASE(s_src->next_framep[3]);
2938 #endif /* HAVE_THREADS */
2939 #endif /* CONFIG_VP8_DECODER */
2941 #if CONFIG_VP7_DECODER
2942 const FFCodec ff_vp7_decoder = {
2944 .p.long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2945 .p.type = AVMEDIA_TYPE_VIDEO,
2946 .p.id = AV_CODEC_ID_VP7,
2947 .priv_data_size = sizeof(VP8Context),
2948 .init = vp7_decode_init,
2949 .close = ff_vp8_decode_free,
2950 FF_CODEC_DECODE_CB(vp7_decode_frame),
2951 .p.capabilities = AV_CODEC_CAP_DR1,
2952 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
2953 .flush = vp8_decode_flush,
2955 #endif /* CONFIG_VP7_DECODER */
2957 #if CONFIG_VP8_DECODER
2958 const FFCodec ff_vp8_decoder = {
2960 .p.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2961 .p.type = AVMEDIA_TYPE_VIDEO,
2962 .p.id = AV_CODEC_ID_VP8,
2963 .priv_data_size = sizeof(VP8Context),
2964 .init = ff_vp8_decode_init,
2965 .close = ff_vp8_decode_free,
2966 FF_CODEC_DECODE_CB(ff_vp8_decode_frame),
2967 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2968 AV_CODEC_CAP_SLICE_THREADS,
2969 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
2970 FF_CODEC_CAP_ALLOCATE_PROGRESS,
2971 .flush = vp8_decode_flush,
2972 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2973 .hw_configs = (const AVCodecHWConfigInternal *const []) {
2974 #if CONFIG_VP8_VAAPI_HWACCEL
2977 #if CONFIG_VP8_NVDEC_HWACCEL
2983 #endif /* CONFIG_VP7_DECODER */