2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "config_components.h"
29 #include "libavutil/mem_internal.h"
32 #include "codec_internal.h"
34 #include "hwaccel_internal.h"
37 #include "refstruct.h"
39 #include "threadframe.h"
49 // fixme: add 1 bit to all the calls to this?
50 static int vp8_rac_get_sint(VPXRangeCoder *c, int bits)
57 v = vp89_rac_get_uint(c, bits);
65 static int vp8_rac_get_nn(VPXRangeCoder *c)
67 int v = vp89_rac_get_uint(c, 7) << 1;
72 static int vp8_rac_get_coeff(VPXRangeCoder *c, const uint8_t *prob)
77 v = (v<<1) + vpx_rac_get_prob(c, *prob++);
83 static void free_buffers(VP8Context *s)
87 for (i = 0; i < MAX_THREADS; i++) {
89 pthread_cond_destroy(&s->thread_data[i].cond);
90 pthread_mutex_destroy(&s->thread_data[i].lock);
92 av_freep(&s->thread_data[i].filter_strength);
94 av_freep(&s->thread_data);
95 av_freep(&s->macroblocks_base);
96 av_freep(&s->intra4x4_pred_mode_top);
97 av_freep(&s->top_nnz);
98 av_freep(&s->top_border);
100 s->macroblocks = NULL;
103 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
106 if ((ret = ff_thread_get_ext_buffer(s->avctx, &f->tf,
107 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
109 if (!(f->seg_map = ff_refstruct_allocz(s->mb_width * s->mb_height)))
111 ret = ff_hwaccel_frame_priv_alloc(s->avctx, &f->hwaccel_picture_private);
118 ff_refstruct_unref(&f->seg_map);
119 ff_thread_release_ext_buffer(&f->tf);
123 static void vp8_release_frame(VP8Frame *f)
125 ff_refstruct_unref(&f->seg_map);
126 ff_refstruct_unref(&f->hwaccel_picture_private);
127 ff_thread_release_ext_buffer(&f->tf);
130 #if CONFIG_VP8_DECODER
131 static int vp8_ref_frame(VP8Frame *dst, const VP8Frame *src)
135 vp8_release_frame(dst);
137 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
139 ff_refstruct_replace(&dst->seg_map, src->seg_map);
140 ff_refstruct_replace(&dst->hwaccel_picture_private,
141 src->hwaccel_picture_private);
145 #endif /* CONFIG_VP8_DECODER */
147 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
149 VP8Context *s = avctx->priv_data;
152 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
153 vp8_release_frame(&s->frames[i]);
154 memset(s->framep, 0, sizeof(s->framep));
159 if (FF_HW_HAS_CB(avctx, flush))
160 FF_HW_SIMPLE_CALL(avctx, flush);
163 static void vp8_decode_flush(AVCodecContext *avctx)
165 vp8_decode_flush_impl(avctx, 0);
168 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
170 VP8Frame *frame = NULL;
173 // find a free buffer
174 for (i = 0; i < 5; i++)
175 if (&s->frames[i] != s->framep[VP8_FRAME_CURRENT] &&
176 &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
177 &s->frames[i] != s->framep[VP8_FRAME_GOLDEN] &&
178 &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) {
179 frame = &s->frames[i];
183 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
186 if (frame->tf.f->buf[0])
187 vp8_release_frame(frame);
192 static enum AVPixelFormat get_pixel_format(VP8Context *s)
194 enum AVPixelFormat pix_fmts[] = {
195 #if CONFIG_VP8_VAAPI_HWACCEL
198 #if CONFIG_VP8_NVDEC_HWACCEL
205 return ff_get_format(s->avctx, pix_fmts);
208 static av_always_inline
209 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
211 AVCodecContext *avctx = s->avctx;
212 int i, ret, dim_reset = 0;
214 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
215 height != s->avctx->height) {
216 vp8_decode_flush_impl(s->avctx, 1);
218 ret = ff_set_dimensions(s->avctx, width, height);
222 dim_reset = (s->macroblocks_base != NULL);
225 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
226 !s->actually_webp && !is_vp7) {
227 s->pix_fmt = get_pixel_format(s);
229 return AVERROR(EINVAL);
230 avctx->pix_fmt = s->pix_fmt;
233 s->mb_width = (s->avctx->coded_width + 15) / 16;
234 s->mb_height = (s->avctx->coded_height + 15) / 16;
236 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
237 avctx->thread_count > 1;
238 if (!s->mb_layout) { // Frame threading and one thread
239 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
240 sizeof(*s->macroblocks));
241 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
242 } else // Sliced threading
243 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
244 sizeof(*s->macroblocks));
245 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
246 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
247 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
249 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
250 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
252 return AVERROR(ENOMEM);
255 for (i = 0; i < MAX_THREADS; i++) {
256 s->thread_data[i].filter_strength =
257 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
258 if (!s->thread_data[i].filter_strength) {
260 return AVERROR(ENOMEM);
263 pthread_mutex_init(&s->thread_data[i].lock, NULL);
264 pthread_cond_init(&s->thread_data[i].cond, NULL);
268 s->macroblocks = s->macroblocks_base + 1;
273 static int vp7_update_dimensions(VP8Context *s, int width, int height)
275 return update_dimensions(s, width, height, IS_VP7);
278 static int vp8_update_dimensions(VP8Context *s, int width, int height)
280 return update_dimensions(s, width, height, IS_VP8);
284 static void parse_segment_info(VP8Context *s)
286 VPXRangeCoder *c = &s->c;
289 s->segmentation.update_map = vp89_rac_get(c);
290 s->segmentation.update_feature_data = vp89_rac_get(c);
292 if (s->segmentation.update_feature_data) {
293 s->segmentation.absolute_vals = vp89_rac_get(c);
295 for (i = 0; i < 4; i++)
296 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
298 for (i = 0; i < 4; i++)
299 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
301 if (s->segmentation.update_map)
302 for (i = 0; i < 3; i++)
303 s->prob->segmentid[i] = vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255;
306 static void update_lf_deltas(VP8Context *s)
308 VPXRangeCoder *c = &s->c;
311 for (i = 0; i < 4; i++) {
312 if (vp89_rac_get(c)) {
313 s->lf_delta.ref[i] = vp89_rac_get_uint(c, 6);
316 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
320 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
321 if (vp89_rac_get(c)) {
322 s->lf_delta.mode[i] = vp89_rac_get_uint(c, 6);
325 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
330 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
332 const uint8_t *sizes = buf;
336 s->num_coeff_partitions = 1 << vp89_rac_get_uint(&s->c, 2);
338 buf += 3 * (s->num_coeff_partitions - 1);
339 buf_size -= 3 * (s->num_coeff_partitions - 1);
343 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
344 int size = AV_RL24(sizes + 3 * i);
345 if (buf_size - size < 0)
347 s->coeff_partition_size[i] = size;
349 ret = ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, size);
356 s->coeff_partition_size[i] = buf_size;
357 ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
362 static void vp7_get_quants(VP8Context *s)
364 VPXRangeCoder *c = &s->c;
366 int yac_qi = vp89_rac_get_uint(c, 7);
367 int ydc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
368 int y2dc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
369 int y2ac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
370 int uvdc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
371 int uvac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
373 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
374 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
375 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
376 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
377 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
378 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
381 static void vp8_get_quants(VP8Context *s)
383 VPXRangeCoder *c = &s->c;
386 s->quant.yac_qi = vp89_rac_get_uint(c, 7);
387 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
388 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
389 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
390 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
391 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
393 for (i = 0; i < 4; i++) {
394 if (s->segmentation.enabled) {
395 base_qi = s->segmentation.base_quant[i];
396 if (!s->segmentation.absolute_vals)
397 base_qi += s->quant.yac_qi;
399 base_qi = s->quant.yac_qi;
401 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
402 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
403 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
404 /* 101581>>16 is equivalent to 155/100 */
405 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
406 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
407 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
409 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
410 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
415 * Determine which buffers golden and altref should be updated with after this frame.
416 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
418 * Intra frames update all 3 references
419 * Inter frames update VP8_FRAME_PREVIOUS if the update_last flag is set
420 * If the update (golden|altref) flag is set, it's updated with the current frame
421 * if update_last is set, and VP8_FRAME_PREVIOUS otherwise.
422 * If the flag is not set, the number read means:
424 * 1: VP8_FRAME_PREVIOUS
425 * 2: update golden with altref, or update altref with golden
427 static VP8FrameType ref_to_update(VP8Context *s, int update, VP8FrameType ref)
429 VPXRangeCoder *c = &s->c;
432 return VP8_FRAME_CURRENT;
434 switch (vp89_rac_get_uint(c, 2)) {
436 return VP8_FRAME_PREVIOUS;
438 return (ref == VP8_FRAME_GOLDEN) ? VP8_FRAME_ALTREF : VP8_FRAME_GOLDEN;
440 return VP8_FRAME_NONE;
443 static void vp78_reset_probability_tables(VP8Context *s)
446 for (i = 0; i < 4; i++)
447 for (j = 0; j < 16; j++)
448 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
449 sizeof(s->prob->token[i][j]));
452 static void vp78_update_probability_tables(VP8Context *s)
454 VPXRangeCoder *c = &s->c;
457 for (i = 0; i < 4; i++)
458 for (j = 0; j < 8; j++)
459 for (k = 0; k < 3; k++)
460 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
461 if (vpx_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
462 int prob = vp89_rac_get_uint(c, 8);
463 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
464 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
468 #define VP7_MVC_SIZE 17
469 #define VP8_MVC_SIZE 19
471 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
474 VPXRangeCoder *c = &s->c;
478 for (i = 0; i < 4; i++)
479 s->prob->pred16x16[i] = vp89_rac_get_uint(c, 8);
481 for (i = 0; i < 3; i++)
482 s->prob->pred8x8c[i] = vp89_rac_get_uint(c, 8);
484 // 17.2 MV probability update
485 for (i = 0; i < 2; i++)
486 for (j = 0; j < mvc_size; j++)
487 if (vpx_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
488 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
491 static void update_refs(VP8Context *s)
493 VPXRangeCoder *c = &s->c;
495 int update_golden = vp89_rac_get(c);
496 int update_altref = vp89_rac_get(c);
498 s->update_golden = ref_to_update(s, update_golden, VP8_FRAME_GOLDEN);
499 s->update_altref = ref_to_update(s, update_altref, VP8_FRAME_ALTREF);
502 static void copy_chroma(AVFrame *dst, const AVFrame *src, int width, int height)
506 for (j = 1; j < 3; j++) {
507 for (i = 0; i < height / 2; i++)
508 memcpy(dst->data[j] + i * dst->linesize[j],
509 src->data[j] + i * src->linesize[j], width / 2);
513 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
514 const uint8_t *src, ptrdiff_t src_linesize,
515 int width, int height,
519 for (j = 0; j < height; j++) {
520 const uint8_t *src2 = src + j * src_linesize;
521 uint8_t *dst2 = dst + j * dst_linesize;
522 for (i = 0; i < width; i++) {
524 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
529 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
533 if (!s->keyframe && (alpha || beta)) {
534 int width = s->mb_width * 16;
535 int height = s->mb_height * 16;
539 if (!s->framep[VP8_FRAME_PREVIOUS] ||
540 !s->framep[VP8_FRAME_GOLDEN]) {
541 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
542 return AVERROR_INVALIDDATA;
546 dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f;
548 /* preserve the golden frame, write a new previous frame */
549 if (s->framep[VP8_FRAME_GOLDEN] == s->framep[VP8_FRAME_PREVIOUS]) {
550 s->framep[VP8_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
551 if ((ret = vp8_alloc_frame(s, s->framep[VP8_FRAME_PREVIOUS], 1)) < 0)
554 dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f;
556 copy_chroma(dst, src, width, height);
559 fade(dst->data[0], dst->linesize[0],
560 src->data[0], src->linesize[0],
561 width, height, alpha, beta);
567 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
569 VPXRangeCoder *c = &s->c;
570 int part1_size, hscale, vscale, i, j, ret;
571 int width = s->avctx->width;
572 int height = s->avctx->height;
575 int fade_present = 1;
578 return AVERROR_INVALIDDATA;
581 s->profile = (buf[0] >> 1) & 7;
582 if (s->profile > 1) {
583 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
584 return AVERROR_INVALIDDATA;
587 s->keyframe = !(buf[0] & 1);
589 part1_size = AV_RL24(buf) >> 4;
591 if (buf_size < 4 - s->profile + part1_size) {
592 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
593 return AVERROR_INVALIDDATA;
596 buf += 4 - s->profile;
597 buf_size -= 4 - s->profile;
599 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
601 ret = ff_vpx_init_range_decoder(c, buf, part1_size);
605 buf_size -= part1_size;
607 /* A. Dimension information (keyframes only) */
609 width = vp89_rac_get_uint(c, 12);
610 height = vp89_rac_get_uint(c, 12);
611 hscale = vp89_rac_get_uint(c, 2);
612 vscale = vp89_rac_get_uint(c, 2);
613 if (hscale || vscale)
614 avpriv_request_sample(s->avctx, "Upscaling");
616 s->update_golden = s->update_altref = VP8_FRAME_CURRENT;
617 vp78_reset_probability_tables(s);
618 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
619 sizeof(s->prob->pred16x16));
620 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
621 sizeof(s->prob->pred8x8c));
622 for (i = 0; i < 2; i++)
623 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
624 sizeof(vp7_mv_default_prob[i]));
625 memset(&s->segmentation, 0, sizeof(s->segmentation));
626 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
627 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
630 if (s->keyframe || s->profile > 0)
631 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
633 /* B. Decoding information for all four macroblock-level features */
634 for (i = 0; i < 4; i++) {
635 s->feature_enabled[i] = vp89_rac_get(c);
636 if (s->feature_enabled[i]) {
637 s->feature_present_prob[i] = vp89_rac_get_uint(c, 8);
639 for (j = 0; j < 3; j++)
640 s->feature_index_prob[i][j] =
641 vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255;
643 if (vp7_feature_value_size[s->profile][i])
644 for (j = 0; j < 4; j++)
645 s->feature_value[i][j] =
646 vp89_rac_get(c) ? vp89_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
650 s->segmentation.enabled = 0;
651 s->segmentation.update_map = 0;
652 s->lf_delta.enabled = 0;
654 s->num_coeff_partitions = 1;
655 ret = ff_vpx_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
659 if (!s->macroblocks_base || /* first frame */
660 width != s->avctx->width || height != s->avctx->height ||
661 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
662 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
666 /* C. Dequantization indices */
669 /* D. Golden frame update flag (a Flag) for interframes only */
671 s->update_golden = vp89_rac_get(c) ? VP8_FRAME_CURRENT : VP8_FRAME_NONE;
672 s->sign_bias[VP8_FRAME_GOLDEN] = 0;
676 s->update_probabilities = 1;
678 if (s->profile > 0) {
679 s->update_probabilities = vp89_rac_get(c);
680 if (!s->update_probabilities)
681 s->prob[1] = s->prob[0];
684 fade_present = vp89_rac_get(c);
687 if (vpx_rac_is_end(c))
688 return AVERROR_INVALIDDATA;
689 /* E. Fading information for previous frame */
690 if (fade_present && vp89_rac_get(c)) {
691 alpha = (int8_t) vp89_rac_get_uint(c, 8);
692 beta = (int8_t) vp89_rac_get_uint(c, 8);
695 /* F. Loop filter type */
697 s->filter.simple = vp89_rac_get(c);
699 /* G. DCT coefficient ordering specification */
701 for (i = 1; i < 16; i++)
702 s->prob[0].scan[i] = ff_zigzag_scan[vp89_rac_get_uint(c, 4)];
704 /* H. Loop filter levels */
706 s->filter.simple = vp89_rac_get(c);
707 s->filter.level = vp89_rac_get_uint(c, 6);
708 s->filter.sharpness = vp89_rac_get_uint(c, 3);
710 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
711 vp78_update_probability_tables(s);
713 s->mbskip_enabled = 0;
715 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
717 s->prob->intra = vp89_rac_get_uint(c, 8);
718 s->prob->last = vp89_rac_get_uint(c, 8);
719 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
722 if (vpx_rac_is_end(c))
723 return AVERROR_INVALIDDATA;
725 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
731 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
733 VPXRangeCoder *c = &s->c;
734 int header_size, hscale, vscale, ret;
735 int width = s->avctx->width;
736 int height = s->avctx->height;
739 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
740 return AVERROR_INVALIDDATA;
743 s->keyframe = !(buf[0] & 1);
744 s->profile = (buf[0]>>1) & 7;
745 s->invisible = !(buf[0] & 0x10);
746 header_size = AV_RL24(buf) >> 5;
750 s->header_partition_size = header_size;
753 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
756 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
757 sizeof(s->put_pixels_tab));
758 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
759 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
760 sizeof(s->put_pixels_tab));
762 if (header_size > buf_size - 7 * s->keyframe) {
763 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
764 return AVERROR_INVALIDDATA;
768 if (AV_RL24(buf) != 0x2a019d) {
769 av_log(s->avctx, AV_LOG_ERROR,
770 "Invalid start code 0x%x\n", AV_RL24(buf));
771 return AVERROR_INVALIDDATA;
773 width = AV_RL16(buf + 3) & 0x3fff;
774 height = AV_RL16(buf + 5) & 0x3fff;
775 hscale = buf[4] >> 6;
776 vscale = buf[6] >> 6;
780 if (hscale || vscale)
781 avpriv_request_sample(s->avctx, "Upscaling");
783 s->update_golden = s->update_altref = VP8_FRAME_CURRENT;
784 vp78_reset_probability_tables(s);
785 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
786 sizeof(s->prob->pred16x16));
787 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
788 sizeof(s->prob->pred8x8c));
789 memcpy(s->prob->mvc, vp8_mv_default_prob,
790 sizeof(s->prob->mvc));
791 memset(&s->segmentation, 0, sizeof(s->segmentation));
792 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
795 ret = ff_vpx_init_range_decoder(c, buf, header_size);
799 buf_size -= header_size;
802 s->colorspace = vp89_rac_get(c);
804 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
805 s->fullrange = vp89_rac_get(c);
808 if ((s->segmentation.enabled = vp89_rac_get(c)))
809 parse_segment_info(s);
811 s->segmentation.update_map = 0; // FIXME: move this to some init function?
813 s->filter.simple = vp89_rac_get(c);
814 s->filter.level = vp89_rac_get_uint(c, 6);
815 s->filter.sharpness = vp89_rac_get_uint(c, 3);
817 if ((s->lf_delta.enabled = vp89_rac_get(c))) {
818 s->lf_delta.update = vp89_rac_get(c);
819 if (s->lf_delta.update)
823 if (setup_partitions(s, buf, buf_size)) {
824 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
825 return AVERROR_INVALIDDATA;
828 if (!s->macroblocks_base || /* first frame */
829 width != s->avctx->width || height != s->avctx->height ||
830 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
831 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
838 s->sign_bias[VP8_FRAME_GOLDEN] = vp89_rac_get(c);
839 s->sign_bias[VP8_FRAME_ALTREF] = vp89_rac_get(c);
842 // if we aren't saving this frame's probabilities for future frames,
843 // make a copy of the current probabilities
844 if (!(s->update_probabilities = vp89_rac_get(c)))
845 s->prob[1] = s->prob[0];
847 s->update_last = s->keyframe || vp89_rac_get(c);
849 vp78_update_probability_tables(s);
851 if ((s->mbskip_enabled = vp89_rac_get(c)))
852 s->prob->mbskip = vp89_rac_get_uint(c, 8);
855 s->prob->intra = vp89_rac_get_uint(c, 8);
856 s->prob->last = vp89_rac_get_uint(c, 8);
857 s->prob->golden = vp89_rac_get_uint(c, 8);
858 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
861 // Record the entropy coder state here so that hwaccels can use it.
862 s->c.code_word = vpx_rac_renorm(&s->c);
863 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
864 s->coder_state_at_header_end.range = s->c.high;
865 s->coder_state_at_header_end.value = s->c.code_word >> 16;
866 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
871 static av_always_inline
872 void clamp_mv(const VP8mvbounds *s, VP8mv *dst, const VP8mv *src)
874 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
875 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
876 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
877 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
881 * Motion vector coding, 17.1.
883 static av_always_inline int read_mv_component(VPXRangeCoder *c, const uint8_t *p, int vp7)
887 if (vpx_rac_get_prob_branchy(c, p[0])) {
890 for (i = 0; i < 3; i++)
891 x += vpx_rac_get_prob(c, p[9 + i]) << i;
892 for (i = (vp7 ? 7 : 9); i > 3; i--)
893 x += vpx_rac_get_prob(c, p[9 + i]) << i;
894 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vpx_rac_get_prob(c, p[12]))
898 const uint8_t *ps = p + 2;
899 bit = vpx_rac_get_prob(c, *ps);
902 bit = vpx_rac_get_prob(c, *ps);
905 x += vpx_rac_get_prob(c, *ps);
908 return (x && vpx_rac_get_prob(c, p[1])) ? -x : x;
911 static int vp7_read_mv_component(VPXRangeCoder *c, const uint8_t *p)
913 return read_mv_component(c, p, 1);
916 static int vp8_read_mv_component(VPXRangeCoder *c, const uint8_t *p)
918 return read_mv_component(c, p, 0);
921 static av_always_inline
922 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
925 return vp7_submv_prob;
928 return vp8_submv_prob[4 - !!left];
930 return vp8_submv_prob[2];
931 return vp8_submv_prob[1 - !!left];
935 * Split motion vector prediction, 16.4.
936 * @returns the number of motion vectors parsed (2, 4 or 16)
938 static av_always_inline
939 int decode_splitmvs(const VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb,
940 int layout, int is_vp7)
944 const VP8Macroblock *top_mb;
945 const VP8Macroblock *left_mb = &mb[-1];
946 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
947 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
949 const VP8mv *left_mv = left_mb->bmv;
950 const VP8mv *cur_mv = mb->bmv;
952 if (!layout) // layout is inlined, s->mb_layout is not
955 top_mb = &mb[-s->mb_width - 1];
956 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
957 top_mv = top_mb->bmv;
959 if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
960 if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
961 part_idx = VP8_SPLITMVMODE_16x8 + vpx_rac_get_prob(c, vp8_mbsplit_prob[2]);
963 part_idx = VP8_SPLITMVMODE_8x8;
965 part_idx = VP8_SPLITMVMODE_4x4;
968 num = vp8_mbsplit_count[part_idx];
969 mbsplits_cur = vp8_mbsplits[part_idx],
970 firstidx = vp8_mbfirstidx[part_idx];
971 mb->partitioning = part_idx;
973 for (n = 0; n < num; n++) {
975 uint32_t left, above;
976 const uint8_t *submv_prob;
979 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
981 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
983 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
985 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
987 submv_prob = get_submv_prob(left, above, is_vp7);
989 if (vpx_rac_get_prob_branchy(c, submv_prob[0])) {
990 if (vpx_rac_get_prob_branchy(c, submv_prob[1])) {
991 if (vpx_rac_get_prob_branchy(c, submv_prob[2])) {
992 mb->bmv[n].y = mb->mv.y +
993 read_mv_component(c, s->prob->mvc[0], is_vp7);
994 mb->bmv[n].x = mb->mv.x +
995 read_mv_component(c, s->prob->mvc[1], is_vp7);
997 AV_ZERO32(&mb->bmv[n]);
1000 AV_WN32A(&mb->bmv[n], above);
1003 AV_WN32A(&mb->bmv[n], left);
1011 * The vp7 reference decoder uses a padding macroblock column (added to right
1012 * edge of the frame) to guard against illegal macroblock offsets. The
1013 * algorithm has bugs that permit offsets to straddle the padding column.
1014 * This function replicates those bugs.
1016 * @param[out] edge_x macroblock x address
1017 * @param[out] edge_y macroblock y address
1019 * @return macroblock offset legal (boolean)
1021 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1022 int xoffset, int yoffset, int boundary,
1023 int *edge_x, int *edge_y)
1025 int vwidth = mb_width + 1;
1026 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1027 if (new < boundary || new % vwidth == vwidth - 1)
1029 *edge_y = new / vwidth;
1030 *edge_x = new % vwidth;
1034 static const VP8mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1036 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1039 static av_always_inline
1040 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1041 int mb_x, int mb_y, int layout)
1043 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1044 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1047 uint8_t cnt[3] = { 0 };
1048 VPXRangeCoder *c = &s->c;
1051 AV_ZERO32(&near_mv[0]);
1052 AV_ZERO32(&near_mv[1]);
1053 AV_ZERO32(&near_mv[2]);
1055 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1056 const VP7MVPred * pred = &vp7_mv_pred[i];
1059 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1060 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1061 const VP8Macroblock *edge = (s->mb_layout == 1)
1062 ? s->macroblocks_base + 1 + edge_x +
1063 (s->mb_width + 1) * (edge_y + 1)
1064 : s->macroblocks + edge_x +
1065 (s->mb_height - edge_y - 1) * 2;
1066 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1068 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1069 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1071 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1072 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1076 AV_WN32A(&near_mv[CNT_NEAR], mv);
1080 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1089 cnt[idx] += vp7_mv_pred[i].score;
1092 mb->partitioning = VP8_SPLITMVMODE_NONE;
1094 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1095 mb->mode = VP8_MVMODE_MV;
1097 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1099 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1101 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1102 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1104 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1106 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1107 mb->mode = VP8_MVMODE_SPLIT;
1108 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1110 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1111 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1112 mb->bmv[0] = mb->mv;
1115 mb->mv = near_mv[CNT_NEAR];
1116 mb->bmv[0] = mb->mv;
1119 mb->mv = near_mv[CNT_NEAREST];
1120 mb->bmv[0] = mb->mv;
1123 mb->mode = VP8_MVMODE_ZERO;
1125 mb->bmv[0] = mb->mv;
1129 static av_always_inline
1130 void vp8_decode_mvs(VP8Context *s, const VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1131 int mb_x, int mb_y, int layout)
1133 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1136 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1137 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1139 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1140 const int8_t *sign_bias = s->sign_bias;
1142 uint8_t cnt[4] = { 0 };
1143 VPXRangeCoder *c = &s->c;
1145 if (!layout) { // layout is inlined (s->mb_layout is not)
1146 mb_edge[0] = mb + 2;
1147 mb_edge[2] = mb + 1;
1149 mb_edge[0] = mb - s->mb_width - 1;
1150 mb_edge[2] = mb - s->mb_width - 2;
1153 AV_ZERO32(&near_mv[0]);
1154 AV_ZERO32(&near_mv[1]);
1155 AV_ZERO32(&near_mv[2]);
1157 /* Process MB on top, left and top-left */
1158 #define MV_EDGE_CHECK(n) \
1160 const VP8Macroblock *edge = mb_edge[n]; \
1161 int edge_ref = edge->ref_frame; \
1162 if (edge_ref != VP8_FRAME_CURRENT) { \
1163 uint32_t mv = AV_RN32A(&edge->mv); \
1165 if (cur_sign_bias != sign_bias[edge_ref]) { \
1166 /* SWAR negate of the values in mv. */ \
1168 mv = ((mv & 0x7fff7fff) + \
1169 0x00010001) ^ (mv & 0x80008000); \
1171 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1172 AV_WN32A(&near_mv[++idx], mv); \
1173 cnt[idx] += 1 + (n != 2); \
1175 cnt[CNT_ZERO] += 1 + (n != 2); \
1183 mb->partitioning = VP8_SPLITMVMODE_NONE;
1184 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1185 mb->mode = VP8_MVMODE_MV;
1187 /* If we have three distinct MVs, merge first and last if they're the same */
1188 if (cnt[CNT_SPLITMV] &&
1189 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1190 cnt[CNT_NEAREST] += 1;
1192 /* Swap near and nearest if necessary */
1193 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1194 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1195 FFSWAP(VP8mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1198 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1199 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1200 /* Choose the best mv out of 0,0 and the nearest mv */
1201 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1202 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1203 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1204 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1206 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1207 mb->mode = VP8_MVMODE_SPLIT;
1208 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1210 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1211 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1212 mb->bmv[0] = mb->mv;
1215 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1216 mb->bmv[0] = mb->mv;
1219 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1220 mb->bmv[0] = mb->mv;
1223 mb->mode = VP8_MVMODE_ZERO;
1225 mb->bmv[0] = mb->mv;
1229 static av_always_inline
1230 void decode_intra4x4_modes(VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb,
1231 int mb_x, int keyframe, int layout)
1233 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1236 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1237 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1242 uint8_t *const left = s->intra4x4_pred_mode_left;
1244 top = mb->intra4x4_pred_mode_top;
1246 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1247 for (y = 0; y < 4; y++) {
1248 for (x = 0; x < 4; x++) {
1250 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1251 *intra4x4 = vp89_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1252 left[y] = top[x] = *intra4x4;
1258 for (i = 0; i < 16; i++)
1259 intra4x4[i] = vp89_rac_get_tree(c, vp8_pred4x4_tree,
1260 vp8_pred4x4_prob_inter);
1264 static av_always_inline
1265 void decode_mb_mode(VP8Context *s, const VP8mvbounds *mv_bounds,
1266 VP8Macroblock *mb, int mb_x, int mb_y,
1267 uint8_t *segment, const uint8_t *ref, int layout, int is_vp7)
1269 VPXRangeCoder *c = &s->c;
1270 static const char * const vp7_feature_name[] = { "q-index",
1272 "partial-golden-update",
1277 for (i = 0; i < 4; i++) {
1278 if (s->feature_enabled[i]) {
1279 if (vpx_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1280 int index = vp89_rac_get_tree(c, vp7_feature_index_tree,
1281 s->feature_index_prob[i]);
1282 av_log(s->avctx, AV_LOG_WARNING,
1283 "Feature %s present in macroblock (value 0x%x)\n",
1284 vp7_feature_name[i], s->feature_value[i][index]);
1288 } else if (s->segmentation.update_map) {
1289 int bit = vpx_rac_get_prob(c, s->prob->segmentid[0]);
1290 *segment = vpx_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1291 } else if (s->segmentation.enabled)
1292 *segment = ref ? *ref : *segment;
1293 mb->segment = *segment;
1295 mb->skip = s->mbskip_enabled ? vpx_rac_get_prob(c, s->prob->mbskip) : 0;
1298 mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_intra,
1299 vp8_pred16x16_prob_intra);
1301 if (mb->mode == MODE_I4x4) {
1302 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1304 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1305 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1307 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1309 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1310 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1313 mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree,
1314 vp8_pred8x8c_prob_intra);
1315 mb->ref_frame = VP8_FRAME_CURRENT;
1316 } else if (vpx_rac_get_prob_branchy(c, s->prob->intra)) {
1318 if (vpx_rac_get_prob_branchy(c, s->prob->last))
1320 (!is_vp7 && vpx_rac_get_prob(c, s->prob->golden)) ? VP8_FRAME_ALTREF
1323 mb->ref_frame = VP8_FRAME_PREVIOUS;
1324 s->ref_count[mb->ref_frame - 1]++;
1326 // motion vectors, 16.3
1328 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1330 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1333 mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_inter,
1334 s->prob->pred16x16);
1336 if (mb->mode == MODE_I4x4)
1337 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1339 mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree,
1341 mb->ref_frame = VP8_FRAME_CURRENT;
1342 mb->partitioning = VP8_SPLITMVMODE_NONE;
1343 AV_ZERO32(&mb->bmv[0]);
1348 * @param r arithmetic bitstream reader context
1349 * @param block destination for block coefficients
1350 * @param probs probabilities to use when reading trees from the bitstream
1351 * @param i initial coeff index, 0 unless a separate DC block is coded
1352 * @param qmul array holding the dc/ac dequant factor at position 0/1
1354 * @return 0 if no coeffs were decoded
1355 * otherwise, the index of the last coeff decoded plus one
1357 static av_always_inline
1358 int decode_block_coeffs_internal(VPXRangeCoder *r, int16_t block[16],
1359 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1360 int i, const uint8_t *token_prob, const int16_t qmul[2],
1361 const uint8_t scan[16], int vp7)
1363 VPXRangeCoder c = *r;
1368 if (!vpx_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1372 if (!vpx_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1374 break; // invalid input; blocks should end with EOB
1375 token_prob = probs[i][0];
1381 if (!vpx_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1383 token_prob = probs[i + 1][1];
1385 if (!vpx_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1386 coeff = vpx_rac_get_prob_branchy(&c, token_prob[4]);
1388 coeff += vpx_rac_get_prob(&c, token_prob[5]);
1392 if (!vpx_rac_get_prob_branchy(&c, token_prob[6])) {
1393 if (!vpx_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1394 coeff = 5 + vpx_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1395 } else { // DCT_CAT2
1397 coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1398 coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1400 } else { // DCT_CAT3 and up
1401 int a = vpx_rac_get_prob(&c, token_prob[8]);
1402 int b = vpx_rac_get_prob(&c, token_prob[9 + a]);
1403 int cat = (a << 1) + b;
1404 coeff = 3 + (8 << cat);
1405 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1408 token_prob = probs[i + 1][2];
1410 block[scan[i]] = (vp89_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1417 static av_always_inline
1418 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1420 int16_t dc = block[0];
1428 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1429 block[0] = pred[0] = dc;
1434 block[0] = pred[0] = dc;
1440 static int vp7_decode_block_coeffs_internal(VPXRangeCoder *r,
1442 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1443 int i, const uint8_t *token_prob,
1444 const int16_t qmul[2],
1445 const uint8_t scan[16])
1447 return decode_block_coeffs_internal(r, block, probs, i,
1448 token_prob, qmul, scan, IS_VP7);
1451 #ifndef vp8_decode_block_coeffs_internal
1452 static int vp8_decode_block_coeffs_internal(VPXRangeCoder *r,
1454 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1455 int i, const uint8_t *token_prob,
1456 const int16_t qmul[2])
1458 return decode_block_coeffs_internal(r, block, probs, i,
1459 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1464 * @param c arithmetic bitstream reader context
1465 * @param block destination for block coefficients
1466 * @param probs probabilities to use when reading trees from the bitstream
1467 * @param i initial coeff index, 0 unless a separate DC block is coded
1468 * @param zero_nhood the initial prediction context for number of surrounding
1469 * all-zero blocks (only left/top, so 0-2)
1470 * @param qmul array holding the dc/ac dequant factor at position 0/1
1471 * @param scan scan pattern (VP7 only)
1473 * @return 0 if no coeffs were decoded
1474 * otherwise, the index of the last coeff decoded plus one
1476 static av_always_inline
1477 int decode_block_coeffs(VPXRangeCoder *c, int16_t block[16],
1478 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1479 int i, int zero_nhood, const int16_t qmul[2],
1480 const uint8_t scan[16], int vp7)
1482 const uint8_t *token_prob = probs[i][zero_nhood];
1483 if (!vpx_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1485 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1486 token_prob, qmul, scan)
1487 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1491 static av_always_inline
1492 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VPXRangeCoder *c,
1493 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1496 int i, x, y, luma_start = 0, luma_ctx = 3;
1497 int nnz_pred, nnz, nnz_total = 0;
1498 int segment = mb->segment;
1501 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1502 nnz_pred = t_nnz[8] + l_nnz[8];
1504 // decode DC values and do hadamard
1505 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1506 nnz_pred, s->qmat[segment].luma_dc_qmul,
1507 ff_zigzag_scan, is_vp7);
1508 l_nnz[8] = t_nnz[8] = !!nnz;
1510 if (is_vp7 && mb->mode > MODE_I4x4) {
1511 nnz |= inter_predict_dc(td->block_dc,
1512 s->inter_dc_pred[mb->ref_frame - 1]);
1519 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1521 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1528 for (y = 0; y < 4; y++)
1529 for (x = 0; x < 4; x++) {
1530 nnz_pred = l_nnz[y] + t_nnz[x];
1531 nnz = decode_block_coeffs(c, td->block[y][x],
1532 s->prob->token[luma_ctx],
1533 luma_start, nnz_pred,
1534 s->qmat[segment].luma_qmul,
1535 s->prob[0].scan, is_vp7);
1536 /* nnz+block_dc may be one more than the actual last index,
1537 * but we don't care */
1538 td->non_zero_count_cache[y][x] = nnz + block_dc;
1539 t_nnz[x] = l_nnz[y] = !!nnz;
1544 // TODO: what to do about dimensions? 2nd dim for luma is x,
1545 // but for chroma it's (y<<1)|x
1546 for (i = 4; i < 6; i++)
1547 for (y = 0; y < 2; y++)
1548 for (x = 0; x < 2; x++) {
1549 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1550 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1551 s->prob->token[2], 0, nnz_pred,
1552 s->qmat[segment].chroma_qmul,
1553 s->prob[0].scan, is_vp7);
1554 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1555 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1559 // if there were no coded coeffs despite the macroblock not being marked skip,
1560 // we MUST not do the inner loop filter and should not do IDCT
1561 // Since skip isn't used for bitstream prediction, just manually set it.
1566 static av_always_inline
1567 void backup_mb_border(uint8_t *top_border, const uint8_t *src_y,
1568 const uint8_t *src_cb, const uint8_t *src_cr,
1569 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1571 AV_COPY128(top_border, src_y + 15 * linesize);
1573 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1574 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1578 static av_always_inline
1579 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1580 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1581 int mb_y, int mb_width, int simple, int xchg)
1583 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1585 src_cb -= uvlinesize;
1586 src_cr -= uvlinesize;
1588 #define XCHG(a, b, xchg) \
1596 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1597 XCHG(top_border, src_y, xchg);
1598 XCHG(top_border + 8, src_y + 8, 1);
1599 if (mb_x < mb_width - 1)
1600 XCHG(top_border + 32, src_y + 16, 1);
1602 // only copy chroma for normal loop filter
1603 // or to initialize the top row to 127
1604 if (!simple || !mb_y) {
1605 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1606 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1607 XCHG(top_border + 16, src_cb, 1);
1608 XCHG(top_border + 24, src_cr, 1);
1612 static av_always_inline
1613 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1616 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1618 return mb_y ? mode : LEFT_DC_PRED8x8;
1621 static av_always_inline
1622 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1625 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1627 return mb_y ? mode : HOR_PRED8x8;
1630 static av_always_inline
1631 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1635 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1637 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1639 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1640 case PLANE_PRED8x8: /* TM */
1641 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1646 static av_always_inline
1647 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1650 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1652 return mb_y ? mode : HOR_VP8_PRED;
1656 static av_always_inline
1657 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1658 int *copy_buf, int vp7)
1662 if (!mb_x && mb_y) {
1667 case DIAG_DOWN_LEFT_PRED:
1668 case VERT_LEFT_PRED:
1669 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1677 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1679 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1680 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1681 * as 16x16/8x8 DC */
1682 case DIAG_DOWN_RIGHT_PRED:
1683 case VERT_RIGHT_PRED:
1692 static av_always_inline
1693 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
1694 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1696 int x, y, mode, nnz;
1699 /* for the first row, we need to run xchg_mb_border to init the top edge
1700 * to 127 otherwise, skip it if we aren't going to deblock */
1701 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1702 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1703 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1704 s->filter.simple, 1);
1706 if (mb->mode < MODE_I4x4) {
1707 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1708 s->hpc.pred16x16[mode](dst[0], s->linesize);
1710 uint8_t *ptr = dst[0];
1711 const uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1712 const uint8_t lo = is_vp7 ? 128 : 127;
1713 const uint8_t hi = is_vp7 ? 128 : 129;
1714 const uint8_t tr_top[4] = { lo, lo, lo, lo };
1716 // all blocks on the right edge of the macroblock use bottom edge
1717 // the top macroblock for their topright edge
1718 const uint8_t *tr_right = ptr - s->linesize + 16;
1720 // if we're on the right edge of the frame, said edge is extended
1721 // from the top macroblock
1722 if (mb_y && mb_x == s->mb_width - 1) {
1723 tr = tr_right[-1] * 0x01010101u;
1724 tr_right = (uint8_t *) &tr;
1728 AV_ZERO128(td->non_zero_count_cache);
1730 for (y = 0; y < 4; y++) {
1731 const uint8_t *topright = ptr + 4 - s->linesize;
1732 for (x = 0; x < 4; x++) {
1734 ptrdiff_t linesize = s->linesize;
1735 uint8_t *dst = ptr + 4 * x;
1736 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1738 if ((y == 0 || x == 3) && mb_y == 0) {
1741 topright = tr_right;
1743 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1744 mb_y + y, ©, is_vp7);
1746 dst = copy_dst + 12;
1750 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1752 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1756 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1765 copy_dst[11] = ptr[4 * x - 1];
1766 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1767 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1768 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1771 s->hpc.pred4x4[mode](dst, topright, linesize);
1773 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1774 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1775 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1776 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1779 nnz = td->non_zero_count_cache[y][x];
1782 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1783 td->block[y][x], s->linesize);
1785 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1786 td->block[y][x], s->linesize);
1791 ptr += 4 * s->linesize;
1796 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1797 mb_x, mb_y, is_vp7);
1798 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1799 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1801 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1802 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1803 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1804 s->filter.simple, 0);
1807 static const uint8_t subpel_idx[3][8] = {
1808 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1809 // also function pointer index
1810 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1811 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1817 * @param s VP8 decoding context
1818 * @param dst target buffer for block data at block position
1819 * @param ref reference picture buffer at origin (0, 0)
1820 * @param mv motion vector (relative to block position) to get pixel data from
1821 * @param x_off horizontal position of block from origin (0, 0)
1822 * @param y_off vertical position of block from origin (0, 0)
1823 * @param block_w width of block (16, 8 or 4)
1824 * @param block_h height of block (always same as block_w)
1825 * @param width width of src/dst plane data
1826 * @param height height of src/dst plane data
1827 * @param linesize size of a single line of plane data, including padding
1828 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1830 static av_always_inline
1831 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1832 const ThreadFrame *ref, const VP8mv *mv,
1833 int x_off, int y_off, int block_w, int block_h,
1834 int width, int height, ptrdiff_t linesize,
1835 vp8_mc_func mc_func[3][3])
1837 const uint8_t *src = ref->f->data[0];
1840 ptrdiff_t src_linesize = linesize;
1842 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1843 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1845 x_off += mv->x >> 2;
1846 y_off += mv->y >> 2;
1849 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1850 src += y_off * linesize + x_off;
1851 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1852 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1853 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1854 src - my_idx * linesize - mx_idx,
1855 EDGE_EMU_LINESIZE, linesize,
1856 block_w + subpel_idx[1][mx],
1857 block_h + subpel_idx[1][my],
1858 x_off - mx_idx, y_off - my_idx,
1860 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1861 src_linesize = EDGE_EMU_LINESIZE;
1863 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1865 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1866 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1867 linesize, block_h, 0, 0);
1872 * chroma MC function
1874 * @param s VP8 decoding context
1875 * @param dst1 target buffer for block data at block position (U plane)
1876 * @param dst2 target buffer for block data at block position (V plane)
1877 * @param ref reference picture buffer at origin (0, 0)
1878 * @param mv motion vector (relative to block position) to get pixel data from
1879 * @param x_off horizontal position of block from origin (0, 0)
1880 * @param y_off vertical position of block from origin (0, 0)
1881 * @param block_w width of block (16, 8 or 4)
1882 * @param block_h height of block (always same as block_w)
1883 * @param width width of src/dst plane data
1884 * @param height height of src/dst plane data
1885 * @param linesize size of a single line of plane data, including padding
1886 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1888 static av_always_inline
1889 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1890 uint8_t *dst2, const ThreadFrame *ref, const VP8mv *mv,
1891 int x_off, int y_off, int block_w, int block_h,
1892 int width, int height, ptrdiff_t linesize,
1893 vp8_mc_func mc_func[3][3])
1895 const uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1898 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1899 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1901 x_off += mv->x >> 3;
1902 y_off += mv->y >> 3;
1905 src1 += y_off * linesize + x_off;
1906 src2 += y_off * linesize + x_off;
1907 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1908 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1909 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1910 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1911 src1 - my_idx * linesize - mx_idx,
1912 EDGE_EMU_LINESIZE, linesize,
1913 block_w + subpel_idx[1][mx],
1914 block_h + subpel_idx[1][my],
1915 x_off - mx_idx, y_off - my_idx, width, height);
1916 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1917 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1919 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1920 src2 - my_idx * linesize - mx_idx,
1921 EDGE_EMU_LINESIZE, linesize,
1922 block_w + subpel_idx[1][mx],
1923 block_h + subpel_idx[1][my],
1924 x_off - mx_idx, y_off - my_idx, width, height);
1925 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1926 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1928 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1929 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1932 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1933 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1934 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1938 static av_always_inline
1939 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
1940 const ThreadFrame *ref_frame, int x_off, int y_off,
1941 int bx_off, int by_off, int block_w, int block_h,
1942 int width, int height, const VP8mv *mv)
1947 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1948 ref_frame, mv, x_off + bx_off, y_off + by_off,
1949 block_w, block_h, width, height, s->linesize,
1950 s->put_pixels_tab[block_w == 8]);
1953 if (s->profile == 3) {
1954 /* this block only applies VP8; it is safe to check
1955 * only the profile, as VP7 profile <= 1 */
1967 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1968 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1969 &uvmv, x_off + bx_off, y_off + by_off,
1970 block_w, block_h, width, height, s->uvlinesize,
1971 s->put_pixels_tab[1 + (block_w == 4)]);
1974 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1975 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1976 static av_always_inline
1977 void prefetch_motion(const VP8Context *s, const VP8Macroblock *mb,
1978 int mb_x, int mb_y, int mb_xy, int ref)
1980 /* Don't prefetch refs that haven't been used very often this frame. */
1981 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1982 int x_off = mb_x << 4, y_off = mb_y << 4;
1983 int mx = (mb->mv.x >> 2) + x_off + 8;
1984 int my = (mb->mv.y >> 2) + y_off;
1985 uint8_t **src = s->framep[ref]->tf.f->data;
1986 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1987 /* For threading, a ff_thread_await_progress here might be useful, but
1988 * it actually slows down the decoder. Since a bad prefetch doesn't
1989 * generate bad decoder output, we don't run it here. */
1990 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1991 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1992 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1997 * Apply motion vectors to prediction buffer, chapter 18.
1999 static av_always_inline
2000 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
2001 VP8Macroblock *mb, int mb_x, int mb_y)
2003 int x_off = mb_x << 4, y_off = mb_y << 4;
2004 int width = 16 * s->mb_width, height = 16 * s->mb_height;
2005 const ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
2006 const VP8mv *bmv = mb->bmv;
2008 switch (mb->partitioning) {
2009 case VP8_SPLITMVMODE_NONE:
2010 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2011 0, 0, 16, 16, width, height, &mb->mv);
2013 case VP8_SPLITMVMODE_4x4: {
2018 for (y = 0; y < 4; y++) {
2019 for (x = 0; x < 4; x++) {
2020 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
2021 ref, &bmv[4 * y + x],
2022 4 * x + x_off, 4 * y + y_off, 4, 4,
2023 width, height, s->linesize,
2024 s->put_pixels_tab[2]);
2033 for (y = 0; y < 2; y++) {
2034 for (x = 0; x < 2; x++) {
2035 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2036 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2037 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2038 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2039 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2040 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2041 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2042 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2043 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2044 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2045 if (s->profile == 3) {
2049 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2050 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2051 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2052 width, height, s->uvlinesize,
2053 s->put_pixels_tab[2]);
2058 case VP8_SPLITMVMODE_16x8:
2059 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2060 0, 0, 16, 8, width, height, &bmv[0]);
2061 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2062 0, 8, 16, 8, width, height, &bmv[1]);
2064 case VP8_SPLITMVMODE_8x16:
2065 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2066 0, 0, 8, 16, width, height, &bmv[0]);
2067 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2068 8, 0, 8, 16, width, height, &bmv[1]);
2070 case VP8_SPLITMVMODE_8x8:
2071 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2072 0, 0, 8, 8, width, height, &bmv[0]);
2073 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2074 8, 0, 8, 8, width, height, &bmv[1]);
2075 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2076 0, 8, 8, 8, width, height, &bmv[2]);
2077 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2078 8, 8, 8, 8, width, height, &bmv[3]);
2083 static av_always_inline
2084 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
2085 const VP8Macroblock *mb)
2089 if (mb->mode != MODE_I4x4) {
2090 uint8_t *y_dst = dst[0];
2091 for (y = 0; y < 4; y++) {
2092 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2094 if (nnz4 & ~0x01010101) {
2095 for (x = 0; x < 4; x++) {
2096 if ((uint8_t) nnz4 == 1)
2097 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2100 else if ((uint8_t) nnz4 > 1)
2101 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2109 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2112 y_dst += 4 * s->linesize;
2116 for (ch = 0; ch < 2; ch++) {
2117 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2119 uint8_t *ch_dst = dst[1 + ch];
2120 if (nnz4 & ~0x01010101) {
2121 for (y = 0; y < 2; y++) {
2122 for (x = 0; x < 2; x++) {
2123 if ((uint8_t) nnz4 == 1)
2124 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2125 td->block[4 + ch][(y << 1) + x],
2127 else if ((uint8_t) nnz4 > 1)
2128 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2129 td->block[4 + ch][(y << 1) + x],
2133 goto chroma_idct_end;
2135 ch_dst += 4 * s->uvlinesize;
2138 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2146 static av_always_inline
2147 void filter_level_for_mb(const VP8Context *s, const VP8Macroblock *mb,
2148 VP8FilterStrength *f, int is_vp7)
2150 int interior_limit, filter_level;
2152 if (s->segmentation.enabled) {
2153 filter_level = s->segmentation.filter_level[mb->segment];
2154 if (!s->segmentation.absolute_vals)
2155 filter_level += s->filter.level;
2157 filter_level = s->filter.level;
2159 if (s->lf_delta.enabled) {
2160 filter_level += s->lf_delta.ref[mb->ref_frame];
2161 filter_level += s->lf_delta.mode[mb->mode];
2164 filter_level = av_clip_uintp2(filter_level, 6);
2166 interior_limit = filter_level;
2167 if (s->filter.sharpness) {
2168 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2169 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2171 interior_limit = FFMAX(interior_limit, 1);
2173 f->filter_level = filter_level;
2174 f->inner_limit = interior_limit;
2175 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2176 mb->mode == VP8_MVMODE_SPLIT;
2179 static av_always_inline
2180 void filter_mb(const VP8Context *s, uint8_t *const dst[3], const VP8FilterStrength *f,
2181 int mb_x, int mb_y, int is_vp7)
2183 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2184 int filter_level = f->filter_level;
2185 int inner_limit = f->inner_limit;
2186 int inner_filter = f->inner_filter;
2187 ptrdiff_t linesize = s->linesize;
2188 ptrdiff_t uvlinesize = s->uvlinesize;
2189 static const uint8_t hev_thresh_lut[2][64] = {
2190 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2191 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2192 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2194 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2195 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2196 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2204 bedge_lim_y = filter_level;
2205 bedge_lim_uv = filter_level * 2;
2206 mbedge_lim = filter_level + 2;
2209 bedge_lim_uv = filter_level * 2 + inner_limit;
2210 mbedge_lim = bedge_lim_y + 4;
2213 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2216 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2217 mbedge_lim, inner_limit, hev_thresh);
2218 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2219 mbedge_lim, inner_limit, hev_thresh);
2222 #define H_LOOP_FILTER_16Y_INNER(cond) \
2223 if (cond && inner_filter) { \
2224 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2225 bedge_lim_y, inner_limit, \
2227 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2228 bedge_lim_y, inner_limit, \
2230 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2231 bedge_lim_y, inner_limit, \
2233 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2234 uvlinesize, bedge_lim_uv, \
2235 inner_limit, hev_thresh); \
2238 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2241 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2242 mbedge_lim, inner_limit, hev_thresh);
2243 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2244 mbedge_lim, inner_limit, hev_thresh);
2248 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2249 linesize, bedge_lim_y,
2250 inner_limit, hev_thresh);
2251 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2252 linesize, bedge_lim_y,
2253 inner_limit, hev_thresh);
2254 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2255 linesize, bedge_lim_y,
2256 inner_limit, hev_thresh);
2257 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2258 dst[2] + 4 * uvlinesize,
2259 uvlinesize, bedge_lim_uv,
2260 inner_limit, hev_thresh);
2263 H_LOOP_FILTER_16Y_INNER(is_vp7)
2266 static av_always_inline
2267 void filter_mb_simple(const VP8Context *s, uint8_t *dst, const VP8FilterStrength *f,
2270 int mbedge_lim, bedge_lim;
2271 int filter_level = f->filter_level;
2272 int inner_limit = f->inner_limit;
2273 int inner_filter = f->inner_filter;
2274 ptrdiff_t linesize = s->linesize;
2279 bedge_lim = 2 * filter_level + inner_limit;
2280 mbedge_lim = bedge_lim + 4;
2283 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2285 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2286 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2287 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2291 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2293 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2294 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2295 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2299 #define MARGIN (16 << 2)
2300 static av_always_inline
2301 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2302 const VP8Frame *prev_frame, int is_vp7)
2304 VP8Context *s = avctx->priv_data;
2307 s->mv_bounds.mv_min.y = -MARGIN;
2308 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2309 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2310 VP8Macroblock *mb = s->macroblocks_base +
2311 ((s->mb_width + 1) * (mb_y + 1) + 1);
2312 int mb_xy = mb_y * s->mb_width;
2314 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2316 s->mv_bounds.mv_min.x = -MARGIN;
2317 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2319 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2320 if (vpx_rac_is_end(&s->c)) {
2321 return AVERROR_INVALIDDATA;
2324 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2325 DC_PRED * 0x01010101);
2326 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy,
2327 prev_frame && prev_frame->seg_map ?
2328 prev_frame->seg_map + mb_xy : NULL, 1, is_vp7);
2329 s->mv_bounds.mv_min.x -= 64;
2330 s->mv_bounds.mv_max.x -= 64;
2332 s->mv_bounds.mv_min.y -= 64;
2333 s->mv_bounds.mv_max.y -= 64;
2338 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2339 const VP8Frame *prev_frame)
2341 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2344 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2345 const VP8Frame *prev_frame)
2347 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2351 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2353 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2354 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2355 pthread_mutex_lock(&otd->lock); \
2356 atomic_store(&td->wait_mb_pos, tmp); \
2358 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2360 pthread_cond_wait(&otd->cond, &otd->lock); \
2362 atomic_store(&td->wait_mb_pos, INT_MAX); \
2363 pthread_mutex_unlock(&otd->lock); \
2367 #define update_pos(td, mb_y, mb_x) \
2369 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2370 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2372 int is_null = !next_td || !prev_td; \
2373 int pos_check = (is_null) ? 1 : \
2374 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2375 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2376 atomic_store(&td->thread_mb_pos, pos); \
2377 if (sliced_threading && pos_check) { \
2378 pthread_mutex_lock(&td->lock); \
2379 pthread_cond_broadcast(&td->cond); \
2380 pthread_mutex_unlock(&td->lock); \
2384 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2385 #define update_pos(td, mb_y, mb_x) while(0)
2388 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2389 int jobnr, int threadnr, int is_vp7)
2391 VP8Context *s = avctx->priv_data;
2392 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2393 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2394 int mb_x, mb_xy = mb_y * s->mb_width;
2395 int num_jobs = s->num_jobs;
2396 const VP8Frame *prev_frame = s->prev_frame;
2397 VP8Frame *curframe = s->curframe;
2398 VPXRangeCoder *coeff_c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2402 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2403 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2404 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2407 if (vpx_rac_is_end(&s->c))
2408 return AVERROR_INVALIDDATA;
2413 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2414 if (mb_y == s->mb_height - 1)
2417 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2418 if (s->mb_layout == 1)
2419 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2421 // Make sure the previous frame has read its segmentation map,
2422 // if we re-use the same map.
2423 if (prev_frame && s->segmentation.enabled &&
2424 !s->segmentation.update_map)
2425 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2426 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2427 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2428 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2431 if (!is_vp7 || mb_y == 0)
2432 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2434 td->mv_bounds.mv_min.x = -MARGIN;
2435 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2437 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2438 if (vpx_rac_is_end(&s->c))
2439 return AVERROR_INVALIDDATA;
2440 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2441 if (prev_td != td) {
2442 if (threadnr != 0) {
2443 check_thread_pos(td, prev_td,
2444 mb_x + (is_vp7 ? 2 : 1),
2445 mb_y - (is_vp7 ? 2 : 1));
2447 check_thread_pos(td, prev_td,
2448 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2449 mb_y - (is_vp7 ? 2 : 1));
2453 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2455 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2456 dst[2] - dst[1], 2);
2459 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy,
2460 prev_frame && prev_frame->seg_map ?
2461 prev_frame->seg_map + mb_xy : NULL, 0, is_vp7);
2463 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_PREVIOUS);
2466 if (vpx_rac_is_end(coeff_c))
2467 return AVERROR_INVALIDDATA;
2468 decode_mb_coeffs(s, td, coeff_c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2471 if (mb->mode <= MODE_I4x4)
2472 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2474 inter_predict(s, td, dst, mb, mb_x, mb_y);
2476 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_GOLDEN);
2479 idct_mb(s, td, dst, mb);
2481 AV_ZERO64(td->left_nnz);
2482 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2484 /* Reset DC block predictors if they would exist
2485 * if the mb had coefficients */
2486 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2487 td->left_nnz[8] = 0;
2488 s->top_nnz[mb_x][8] = 0;
2492 if (s->deblock_filter)
2493 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2495 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2496 if (s->filter.simple)
2497 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2498 NULL, NULL, s->linesize, 0, 1);
2500 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2501 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2504 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_ALTREF);
2509 td->mv_bounds.mv_min.x -= 64;
2510 td->mv_bounds.mv_max.x -= 64;
2512 if (mb_x == s->mb_width + 1) {
2513 update_pos(td, mb_y, s->mb_width + 3);
2515 update_pos(td, mb_y, mb_x);
2521 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2522 int jobnr, int threadnr)
2524 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2527 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2528 int jobnr, int threadnr)
2530 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2533 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2534 int jobnr, int threadnr, int is_vp7)
2536 VP8Context *s = avctx->priv_data;
2537 VP8ThreadData *td = &s->thread_data[threadnr];
2538 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2539 AVFrame *curframe = s->curframe->tf.f;
2541 VP8ThreadData *prev_td, *next_td;
2543 curframe->data[0] + 16 * mb_y * s->linesize,
2544 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2545 curframe->data[2] + 8 * mb_y * s->uvlinesize
2548 if (s->mb_layout == 1)
2549 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2551 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2556 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2557 if (mb_y == s->mb_height - 1)
2560 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2562 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2563 const VP8FilterStrength *f = &td->filter_strength[mb_x];
2565 check_thread_pos(td, prev_td,
2566 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2568 if (next_td != &s->thread_data[0])
2569 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2571 if (num_jobs == 1) {
2572 if (s->filter.simple)
2573 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2574 NULL, NULL, s->linesize, 0, 1);
2576 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2577 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2580 if (s->filter.simple)
2581 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2583 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2588 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2592 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2593 int jobnr, int threadnr)
2595 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2598 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2599 int jobnr, int threadnr)
2601 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2604 static av_always_inline
2605 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2606 int threadnr, int is_vp7)
2608 const VP8Context *s = avctx->priv_data;
2609 VP8ThreadData *td = &s->thread_data[jobnr];
2610 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2611 VP8Frame *curframe = s->curframe;
2612 int mb_y, num_jobs = s->num_jobs;
2615 td->thread_nr = threadnr;
2616 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2617 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2618 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2619 atomic_store(&td->thread_mb_pos, mb_y << 16);
2620 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2622 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2625 if (s->deblock_filter)
2626 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2627 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2629 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2630 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2632 if (avctx->active_thread_type == FF_THREAD_FRAME)
2633 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2639 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2640 int jobnr, int threadnr)
2642 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2645 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2646 int jobnr, int threadnr)
2648 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2651 static av_always_inline
2652 int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
2653 const AVPacket *avpkt, int is_vp7)
2655 VP8Context *s = avctx->priv_data;
2656 int ret, i, referenced, num_jobs;
2657 enum AVDiscard skip_thresh;
2658 VP8Frame *av_uninit(curframe), *prev_frame;
2661 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2663 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2668 if (s->actually_webp) {
2669 // avctx->pix_fmt already set in caller.
2670 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2671 s->pix_fmt = get_pixel_format(s);
2672 if (s->pix_fmt < 0) {
2673 ret = AVERROR(EINVAL);
2676 avctx->pix_fmt = s->pix_fmt;
2679 prev_frame = s->framep[VP8_FRAME_CURRENT];
2681 referenced = s->update_last || s->update_golden == VP8_FRAME_CURRENT ||
2682 s->update_altref == VP8_FRAME_CURRENT;
2684 skip_thresh = !referenced ? AVDISCARD_NONREF
2685 : !s->keyframe ? AVDISCARD_NONKEY
2688 if (avctx->skip_frame >= skip_thresh) {
2690 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2693 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2695 // release no longer referenced frames
2696 for (i = 0; i < 5; i++)
2697 if (s->frames[i].tf.f->buf[0] &&
2698 &s->frames[i] != prev_frame &&
2699 &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
2700 &s->frames[i] != s->framep[VP8_FRAME_GOLDEN] &&
2701 &s->frames[i] != s->framep[VP8_FRAME_ALTREF])
2702 vp8_release_frame(&s->frames[i]);
2704 curframe = s->framep[VP8_FRAME_CURRENT] = vp8_find_free_buffer(s);
2707 avctx->colorspace = AVCOL_SPC_BT470BG;
2709 avctx->color_range = AVCOL_RANGE_JPEG;
2711 avctx->color_range = AVCOL_RANGE_MPEG;
2713 /* Given that arithmetic probabilities are updated every frame, it's quite
2714 * likely that the values we have on a random interframe are complete
2715 * junk if we didn't start decode on a keyframe. So just don't display
2716 * anything rather than junk. */
2717 if (!s->keyframe && (!s->framep[VP8_FRAME_PREVIOUS] ||
2718 !s->framep[VP8_FRAME_GOLDEN] ||
2719 !s->framep[VP8_FRAME_ALTREF])) {
2720 av_log(avctx, AV_LOG_WARNING,
2721 "Discarding interframe without a prior keyframe!\n");
2722 ret = AVERROR_INVALIDDATA;
2727 curframe->tf.f->flags |= AV_FRAME_FLAG_KEY;
2729 curframe->tf.f->flags &= ~AV_FRAME_FLAG_KEY;
2730 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2731 : AV_PICTURE_TYPE_P;
2732 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2735 // check if golden and altref are swapped
2736 if (s->update_altref != VP8_FRAME_NONE)
2737 s->next_framep[VP8_FRAME_ALTREF] = s->framep[s->update_altref];
2739 s->next_framep[VP8_FRAME_ALTREF] = s->framep[VP8_FRAME_ALTREF];
2741 if (s->update_golden != VP8_FRAME_NONE)
2742 s->next_framep[VP8_FRAME_GOLDEN] = s->framep[s->update_golden];
2744 s->next_framep[VP8_FRAME_GOLDEN] = s->framep[VP8_FRAME_GOLDEN];
2747 s->next_framep[VP8_FRAME_PREVIOUS] = curframe;
2749 s->next_framep[VP8_FRAME_PREVIOUS] = s->framep[VP8_FRAME_PREVIOUS];
2751 s->next_framep[VP8_FRAME_CURRENT] = curframe;
2753 if (ffcodec(avctx->codec)->update_thread_context)
2754 ff_thread_finish_setup(avctx);
2756 if (avctx->hwaccel) {
2757 const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
2758 ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2762 ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2766 ret = hwaccel->end_frame(avctx);
2771 s->linesize = curframe->tf.f->linesize[0];
2772 s->uvlinesize = curframe->tf.f->linesize[1];
2774 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2775 /* Zero macroblock structures for top/top-left prediction
2776 * from outside the frame. */
2778 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2779 (s->mb_width + 1) * sizeof(*s->macroblocks));
2780 if (!s->mb_layout && s->keyframe)
2781 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2783 memset(s->ref_count, 0, sizeof(s->ref_count));
2785 if (s->mb_layout == 1) {
2786 // Make sure the previous frame has read its segmentation map,
2787 // if we re-use the same map.
2788 if (prev_frame && s->segmentation.enabled &&
2789 !s->segmentation.update_map)
2790 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2792 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2794 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2799 if (avctx->active_thread_type == FF_THREAD_FRAME)
2802 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2803 s->num_jobs = num_jobs;
2804 s->curframe = curframe;
2805 s->prev_frame = prev_frame;
2806 s->mv_bounds.mv_min.y = -MARGIN;
2807 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2808 for (i = 0; i < MAX_THREADS; i++) {
2809 VP8ThreadData *td = &s->thread_data[i];
2810 atomic_init(&td->thread_mb_pos, 0);
2811 atomic_init(&td->wait_mb_pos, INT_MAX);
2814 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2817 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2821 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2822 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2825 // if future frames don't use the updated probabilities,
2826 // reset them to the values we saved
2827 if (!s->update_probabilities)
2828 s->prob[0] = s->prob[1];
2830 if (!s->invisible) {
2831 if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0)
2838 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2842 int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2843 int *got_frame, AVPacket *avpkt)
2845 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8);
2848 #if CONFIG_VP7_DECODER
2849 static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2850 int *got_frame, AVPacket *avpkt)
2852 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7);
2854 #endif /* CONFIG_VP7_DECODER */
2856 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2858 VP8Context *s = avctx->priv_data;
2861 vp8_decode_flush_impl(avctx, 1);
2862 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2863 av_frame_free(&s->frames[i].tf.f);
2868 static av_cold int vp8_init_frames(VP8Context *s)
2871 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2872 s->frames[i].tf.f = av_frame_alloc();
2873 if (!s->frames[i].tf.f)
2874 return AVERROR(ENOMEM);
2879 static av_always_inline
2880 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2882 VP8Context *s = avctx->priv_data;
2886 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2887 s->pix_fmt = AV_PIX_FMT_NONE;
2888 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2890 ff_videodsp_init(&s->vdsp, 8);
2892 ff_vp78dsp_init(&s->vp8dsp);
2893 if (CONFIG_VP7_DECODER && is_vp7) {
2894 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2895 ff_vp7dsp_init(&s->vp8dsp);
2896 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2897 s->filter_mb_row = vp7_filter_mb_row;
2898 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2899 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2900 ff_vp8dsp_init(&s->vp8dsp);
2901 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2902 s->filter_mb_row = vp8_filter_mb_row;
2905 /* does not change for VP8 */
2906 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2908 if ((ret = vp8_init_frames(s)) < 0) {
2909 ff_vp8_decode_free(avctx);
2916 #if CONFIG_VP7_DECODER
2917 static int vp7_decode_init(AVCodecContext *avctx)
2919 return vp78_decode_init(avctx, IS_VP7);
2921 #endif /* CONFIG_VP7_DECODER */
2923 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2925 return vp78_decode_init(avctx, IS_VP8);
2928 #if CONFIG_VP8_DECODER
2930 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2932 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2933 const AVCodecContext *src)
2935 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2938 if (s->macroblocks_base &&
2939 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2941 s->mb_width = s_src->mb_width;
2942 s->mb_height = s_src->mb_height;
2945 s->pix_fmt = s_src->pix_fmt;
2946 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2947 s->segmentation = s_src->segmentation;
2948 s->lf_delta = s_src->lf_delta;
2949 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2951 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2952 if (s_src->frames[i].tf.f->buf[0]) {
2953 int ret = vp8_ref_frame(&s->frames[i], &s_src->frames[i]);
2959 s->framep[0] = REBASE(s_src->next_framep[0]);
2960 s->framep[1] = REBASE(s_src->next_framep[1]);
2961 s->framep[2] = REBASE(s_src->next_framep[2]);
2962 s->framep[3] = REBASE(s_src->next_framep[3]);
2966 #endif /* HAVE_THREADS */
2967 #endif /* CONFIG_VP8_DECODER */
2969 #if CONFIG_VP7_DECODER
2970 const FFCodec ff_vp7_decoder = {
2972 CODEC_LONG_NAME("On2 VP7"),
2973 .p.type = AVMEDIA_TYPE_VIDEO,
2974 .p.id = AV_CODEC_ID_VP7,
2975 .priv_data_size = sizeof(VP8Context),
2976 .init = vp7_decode_init,
2977 .close = ff_vp8_decode_free,
2978 FF_CODEC_DECODE_CB(vp7_decode_frame),
2979 .p.capabilities = AV_CODEC_CAP_DR1,
2980 .flush = vp8_decode_flush,
2982 #endif /* CONFIG_VP7_DECODER */
2984 #if CONFIG_VP8_DECODER
2985 const FFCodec ff_vp8_decoder = {
2987 CODEC_LONG_NAME("On2 VP8"),
2988 .p.type = AVMEDIA_TYPE_VIDEO,
2989 .p.id = AV_CODEC_ID_VP8,
2990 .priv_data_size = sizeof(VP8Context),
2991 .init = ff_vp8_decode_init,
2992 .close = ff_vp8_decode_free,
2993 FF_CODEC_DECODE_CB(ff_vp8_decode_frame),
2994 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2995 AV_CODEC_CAP_SLICE_THREADS,
2996 .caps_internal = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2997 .flush = vp8_decode_flush,
2998 UPDATE_THREAD_CONTEXT(vp8_decode_update_thread_context),
2999 .hw_configs = (const AVCodecHWConfigInternal *const []) {
3000 #if CONFIG_VP8_VAAPI_HWACCEL
3003 #if CONFIG_VP8_NVDEC_HWACCEL
3009 #endif /* CONFIG_VP7_DECODER */